Stop the war!
Остановите войну!
for scientists:
default search action
BibTeX records: Shangtong Zhang
@article{DBLP:journals/corr/abs-2401-07844, author = {Shuze Liu and Shuhang Chen and Shangtong Zhang}, title = {The {ODE} Method for Stochastic Approximation and Reinforcement Learning with Markovian Noise}, journal = {CoRR}, volume = {abs/2401.07844}, year = {2024}, url = {https://doi.org/10.48550/arXiv.2401.07844}, doi = {10.48550/ARXIV.2401.07844}, eprinttype = {arXiv}, eprint = {2401.07844}, timestamp = {Thu, 01 Feb 2024 00:00:00 +0100}, biburl = {https://dblp.org/rec/journals/corr/abs-2401-07844.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/tcad/WeiWZYJWQZ23, author = {Yuntao Wei and Xueyan Wang and Shangtong Zhang and Jianlei Yang and Xiaotao Jia and Zhaohao Wang and Gang Qu and Weisheng Zhao}, title = {{IMGA:} Efficient In-Memory Graph Convolution Network Aggregation With Data Flow Optimizations}, journal = {{IEEE} Trans. Comput. Aided Des. Integr. Circuits Syst.}, volume = {42}, number = {12}, pages = {4695--4705}, year = {2023}, url = {https://doi.org/10.1109/TCAD.2023.3288509}, doi = {10.1109/TCAD.2023.3288509}, timestamp = {Sun, 10 Dec 2023 00:00:00 +0100}, biburl = {https://dblp.org/rec/journals/tcad/WeiWZYJWQZ23.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/aaai/Zhang23c, author = {Shangtong Zhang}, editor = {Brian Williams and Yiling Chen and Jennifer Neville}, title = {A New Challenge in Policy Evaluation}, booktitle = {Thirty-Seventh {AAAI} Conference on Artificial Intelligence, {AAAI} 2023, Thirty-Fifth Conference on Innovative Applications of Artificial Intelligence, {IAAI} 2023, Thirteenth Symposium on Educational Advances in Artificial Intelligence, {EAAI} 2023, Washington, DC, USA, February 7-14, 2023}, pages = {15465}, publisher = {{AAAI} Press}, year = {2023}, url = {https://doi.org/10.1609/aaai.v37i13.26832}, doi = {10.1609/AAAI.V37I13.26832}, timestamp = {Mon, 04 Sep 2023 12:29:24 +0200}, biburl = {https://dblp.org/rec/conf/aaai/Zhang23c.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/icml/ZhangCL23, author = {Shangtong Zhang and Remi Tachet des Combes and Romain Laroche}, editor = {Andreas Krause and Emma Brunskill and Kyunghyun Cho and Barbara Engelhardt and Sivan Sabato and Jonathan Scarlett}, title = {On the Convergence of {SARSA} with Linear Function Approximation}, booktitle = {International Conference on Machine Learning, {ICML} 2023, 23-29 July 2023, Honolulu, Hawaii, {USA}}, series = {Proceedings of Machine Learning Research}, volume = {202}, pages = {41613--41646}, publisher = {{PMLR}}, year = {2023}, url = {https://proceedings.mlr.press/v202/zhang23al.html}, timestamp = {Mon, 28 Aug 2023 17:23:08 +0200}, biburl = {https://dblp.org/rec/conf/icml/ZhangCL23.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-2301-13734, author = {Shuze Liu and Shangtong Zhang}, title = {Improving Monte Carlo Evaluation with Offline Data}, journal = {CoRR}, volume = {abs/2301.13734}, year = {2023}, url = {https://doi.org/10.48550/arXiv.2301.13734}, doi = {10.48550/ARXIV.2301.13734}, eprinttype = {arXiv}, eprint = {2301.13734}, timestamp = {Thu, 02 Feb 2023 00:00:00 +0100}, biburl = {https://dblp.org/rec/journals/corr/abs-2301-13734.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-2308-01170, author = {Xiaochi Qian and Shangtong Zhang}, title = {Direct Gradient Temporal Difference Learning}, journal = {CoRR}, volume = {abs/2308.01170}, year = {2023}, url = {https://doi.org/10.48550/arXiv.2308.01170}, doi = {10.48550/ARXIV.2308.01170}, eprinttype = {arXiv}, eprint = {2308.01170}, timestamp = {Mon, 21 Aug 2023 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-2308-01170.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-2308-03526, author = {Micha{\"{e}}l Mathieu and Sherjil Ozair and Srivatsan Srinivasan and {\c{C}}aglar G{\"{u}}l{\c{c}}ehre and Shangtong Zhang and Ray Jiang and Tom Le Paine and Richard Powell and Konrad Zolna and Julian Schrittwieser and David H. Choi and Petko Georgiev and Daniel Toyama and Aja Huang and Roman Ring and Igor Babuschkin and Timo Ewalds and Mahyar Bordbar and Sarah Henderson and Sergio G{\'{o}}mez Colmenarejo and A{\"{a}}ron van den Oord and Wojciech Marian Czarnecki and Nando de Freitas and Oriol Vinyals}, title = {AlphaStar Unplugged: Large-Scale Offline Reinforcement Learning}, journal = {CoRR}, volume = {abs/2308.03526}, year = {2023}, url = {https://doi.org/10.48550/arXiv.2308.03526}, doi = {10.48550/ARXIV.2308.03526}, eprinttype = {arXiv}, eprint = {2308.03526}, timestamp = {Tue, 22 Aug 2023 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-2308-03526.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/jmlr/ZhangW22, author = {Shangtong Zhang and Shimon Whiteson}, title = {Truncated Emphatic Temporal Difference Methods for Prediction and Control}, journal = {J. Mach. Learn. Res.}, volume = {23}, pages = {153:1--153:59}, year = {2022}, url = {http://jmlr.org/papers/v23/21-0934.html}, timestamp = {Wed, 07 Jun 2023 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/jmlr/ZhangW22.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/jmlr/ZhangCL22, author = {Shangtong Zhang and Remi Tachet des Combes and Romain Laroche}, title = {Global Optimality and Finite Sample Analysis of Softmax Off-Policy Actor Critic under State Distribution Mismatch}, journal = {J. Mach. Learn. Res.}, volume = {23}, pages = {343:1--343:91}, year = {2022}, url = {http://jmlr.org/papers/v23/21-1306.html}, timestamp = {Wed, 07 Jun 2023 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/jmlr/ZhangCL22.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/aaai/JiangZC0H22, author = {Ray Jiang and Shangtong Zhang and Veronica Chelu and Adam White and Hado van Hasselt}, title = {Learning Expected Emphatic Traces for Deep {RL}}, booktitle = {Thirty-Sixth {AAAI} Conference on Artificial Intelligence, {AAAI} 2022, Thirty-Fourth Conference on Innovative Applications of Artificial Intelligence, {IAAI} 2022, The Twelveth Symposium on Educational Advances in Artificial Intelligence, {EAAI} 2022 Virtual Event, February 22 - March 1, 2022}, pages = {7015--7023}, publisher = {{AAAI} Press}, year = {2022}, url = {https://doi.org/10.1609/aaai.v36i6.20660}, doi = {10.1609/AAAI.V36I6.20660}, timestamp = {Mon, 04 Sep 2023 12:29:24 +0200}, biburl = {https://dblp.org/rec/conf/aaai/JiangZC0H22.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/atal/ZhangLSWC22, author = {Shangtong Zhang and Romain Laroche and Harm van Seijen and Shimon Whiteson and Remi Tachet des Combes}, editor = {Piotr Faliszewski and Viviana Mascardi and Catherine Pelachaud and Matthew E. Taylor}, title = {A Deeper Look at Discounting Mismatch in Actor-Critic Algorithms}, booktitle = {21st International Conference on Autonomous Agents and Multiagent Systems, {AAMAS} 2022, Auckland, New Zealand, May 9-13, 2022}, pages = {1491--1499}, publisher = {International Foundation for Autonomous Agents and Multiagent Systems {(IFAAMAS)}}, year = {2022}, url = {https://www.ifaamas.org/Proceedings/aamas2022/pdfs/p1491.pdf}, doi = {10.5555/3535850.3536016}, timestamp = {Mon, 18 Jul 2022 17:13:00 +0200}, biburl = {https://dblp.org/rec/conf/atal/ZhangLSWC22.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-2202-06828, author = {Shangtong Zhang and Remi Tachet des Combes and Romain Laroche}, title = {On the Chattering of {SARSA} with Linear Function Approximation}, journal = {CoRR}, volume = {abs/2202.06828}, year = {2022}, url = {https://arxiv.org/abs/2202.06828}, eprinttype = {arXiv}, eprint = {2202.06828}, timestamp = {Fri, 18 Feb 2022 00:00:00 +0100}, biburl = {https://dblp.org/rec/journals/corr/abs-2202-06828.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/aaai/ZhangLW21, author = {Shangtong Zhang and Bo Liu and Shimon Whiteson}, title = {Mean-Variance Policy Iteration for Risk-Averse Reinforcement Learning}, booktitle = {Thirty-Fifth {AAAI} Conference on Artificial Intelligence, {AAAI} 2021, Thirty-Third Conference on Innovative Applications of Artificial Intelligence, {IAAI} 2021, The Eleventh Symposium on Educational Advances in Artificial Intelligence, {EAAI} 2021, Virtual Event, February 2-9, 2021}, pages = {10905--10913}, publisher = {{AAAI} Press}, year = {2021}, url = {https://doi.org/10.1609/aaai.v35i12.17302}, doi = {10.1609/AAAI.V35I12.17302}, timestamp = {Mon, 04 Sep 2023 12:29:24 +0200}, biburl = {https://dblp.org/rec/conf/aaai/ZhangLW21.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/icml/ZhangWSW21, author = {Shangtong Zhang and Yi Wan and Richard S. Sutton and Shimon Whiteson}, editor = {Marina Meila and Tong Zhang}, title = {Average-Reward Off-Policy Policy Evaluation with Function Approximation}, booktitle = {Proceedings of the 38th International Conference on Machine Learning, {ICML} 2021, 18-24 July 2021, Virtual Event}, series = {Proceedings of Machine Learning Research}, volume = {139}, pages = {12578--12588}, publisher = {{PMLR}}, year = {2021}, url = {http://proceedings.mlr.press/v139/zhang21u.html}, timestamp = {Thu, 14 Mar 2024 00:00:00 +0100}, biburl = {https://dblp.org/rec/conf/icml/ZhangWSW21.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/icml/ZhangYW21, author = {Shangtong Zhang and Hengshuai Yao and Shimon Whiteson}, editor = {Marina Meila and Tong Zhang}, title = {Breaking the Deadly Triad with a Target Network}, booktitle = {Proceedings of the 38th International Conference on Machine Learning, {ICML} 2021, 18-24 July 2021, Virtual Event}, series = {Proceedings of Machine Learning Research}, volume = {139}, pages = {12621--12631}, publisher = {{PMLR}}, year = {2021}, url = {http://proceedings.mlr.press/v139/zhang21y.html}, timestamp = {Wed, 25 Aug 2021 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/icml/ZhangYW21.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/ijcai/ZhangBW21, author = {Shangtong Zhang and Wendelin Boehmer and Shimon Whiteson}, editor = {Zhi{-}Hua Zhou}, title = {Deep Residual Reinforcement Learning (Extended Abstract)}, booktitle = {Proceedings of the Thirtieth International Joint Conference on Artificial Intelligence, {IJCAI} 2021, Virtual Event / Montreal, Canada, 19-27 August 2021}, pages = {4869--4873}, publisher = {ijcai.org}, year = {2021}, url = {https://doi.org/10.24963/ijcai.2021/668}, doi = {10.24963/IJCAI.2021/668}, timestamp = {Wed, 25 Aug 2021 17:11:16 +0200}, biburl = {https://dblp.org/rec/conf/ijcai/ZhangBW21.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-2101-02808, author = {Shangtong Zhang and Yi Wan and Richard S. Sutton and Shimon Whiteson}, title = {Average-Reward Off-Policy Policy Evaluation with Function Approximation}, journal = {CoRR}, volume = {abs/2101.02808}, year = {2021}, url = {https://arxiv.org/abs/2101.02808}, eprinttype = {arXiv}, eprint = {2101.02808}, timestamp = {Thu, 14 Mar 2024 00:00:00 +0100}, biburl = {https://dblp.org/rec/journals/corr/abs-2101-02808.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-2101-08862, author = {Shangtong Zhang and Hengshuai Yao and Shimon Whiteson}, title = {Breaking the Deadly Triad with a Target Network}, journal = {CoRR}, volume = {abs/2101.08862}, year = {2021}, url = {https://arxiv.org/abs/2101.08862}, eprinttype = {arXiv}, eprint = {2101.08862}, timestamp = {Sat, 30 Jan 2021 00:00:00 +0100}, biburl = {https://dblp.org/rec/journals/corr/abs-2101-08862.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-2107-05405, author = {Ray Jiang and Shangtong Zhang and Veronica Chelu and Adam White and Hado van Hasselt}, title = {Learning Expected Emphatic Traces for Deep {RL}}, journal = {CoRR}, volume = {abs/2107.05405}, year = {2021}, url = {https://arxiv.org/abs/2107.05405}, eprinttype = {arXiv}, eprint = {2107.05405}, timestamp = {Mon, 25 Apr 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-2107-05405.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-2108-05338, author = {Shangtong Zhang and Shimon Whiteson}, title = {Truncated Emphatic Temporal Difference Methods for Prediction and Control}, journal = {CoRR}, volume = {abs/2108.05338}, year = {2021}, url = {https://arxiv.org/abs/2108.05338}, eprinttype = {arXiv}, eprint = {2108.05338}, timestamp = {Wed, 18 Aug 2021 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-2108-05338.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-2111-02997, author = {Shangtong Zhang and Remi Tachet des Combes and Romain Laroche}, title = {Global Optimality and Finite Sample Analysis of Softmax Off-Policy Actor Critic under State Distribution Mismatch}, journal = {CoRR}, volume = {abs/2111.02997}, year = {2021}, url = {https://arxiv.org/abs/2111.02997}, eprinttype = {arXiv}, eprint = {2111.02997}, timestamp = {Fri, 05 Nov 2021 00:00:00 +0100}, biburl = {https://dblp.org/rec/journals/corr/abs-2111-02997.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/aaai/0001WLXZWX20, author = {Yuhang Song and Jianyi Wang and Thomas Lukasiewicz and Zhenghua Xu and Shangtong Zhang and Andrzej Wojcicki and Mai Xu}, title = {Mega-Reward: Achieving Human-Level Play without Extrinsic Rewards}, booktitle = {The Thirty-Fourth {AAAI} Conference on Artificial Intelligence, {AAAI} 2020, The Thirty-Second Innovative Applications of Artificial Intelligence Conference, {IAAI} 2020, The Tenth {AAAI} Symposium on Educational Advances in Artificial Intelligence, {EAAI} 2020, New York, NY, USA, February 7-12, 2020}, pages = {5826--5833}, publisher = {{AAAI} Press}, year = {2020}, url = {https://doi.org/10.1609/aaai.v34i04.6040}, doi = {10.1609/AAAI.V34I04.6040}, timestamp = {Fri, 08 Sep 2023 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/aaai/0001WLXZWX20.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/atal/ZhangBW20, author = {Shangtong Zhang and Wendelin Boehmer and Shimon Whiteson}, editor = {Amal El Fallah Seghrouchni and Gita Sukthankar and Bo An and Neil Yorke{-}Smith}, title = {Deep Residual Reinforcement Learning}, booktitle = {Proceedings of the 19th International Conference on Autonomous Agents and Multiagent Systems, {AAMAS} '20, Auckland, New Zealand, May 9-13, 2020}, pages = {1611--1619}, publisher = {International Foundation for Autonomous Agents and Multiagent Systems}, year = {2020}, url = {https://dl.acm.org/doi/10.5555/3398761.3398946}, doi = {10.5555/3398761.3398946}, timestamp = {Tue, 26 Jul 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/atal/ZhangBW20.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/icml/ZhangLW20, author = {Shangtong Zhang and Bo Liu and Shimon Whiteson}, title = {GradientDICE: Rethinking Generalized Offline Estimation of Stationary Values}, booktitle = {Proceedings of the 37th International Conference on Machine Learning, {ICML} 2020, 13-18 July 2020, Virtual Event}, series = {Proceedings of Machine Learning Research}, volume = {119}, pages = {11194--11203}, publisher = {{PMLR}}, year = {2020}, url = {http://proceedings.mlr.press/v119/zhang20r.html}, timestamp = {Tue, 07 Sep 2021 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/icml/ZhangLW20.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/icml/ZhangLYW20, author = {Shangtong Zhang and Bo Liu and Hengshuai Yao and Shimon Whiteson}, title = {Provably Convergent Two-Timescale Off-Policy Actor-Critic with Function Approximation}, booktitle = {Proceedings of the 37th International Conference on Machine Learning, {ICML} 2020, 13-18 July 2020, Virtual Event}, series = {Proceedings of Machine Learning Research}, volume = {119}, pages = {11204--11213}, publisher = {{PMLR}}, year = {2020}, url = {http://proceedings.mlr.press/v119/zhang20s.html}, timestamp = {Tue, 07 Sep 2021 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/icml/ZhangLYW20.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/nips/ZhangVW20, author = {Shangtong Zhang and Vivek Veeriah and Shimon Whiteson}, editor = {Hugo Larochelle and Marc'Aurelio Ranzato and Raia Hadsell and Maria{-}Florina Balcan and Hsuan{-}Tien Lin}, title = {Learning Retrospective Knowledge with Reverse Reinforcement Learning}, booktitle = {Advances in Neural Information Processing Systems 33: Annual Conference on Neural Information Processing Systems 2020, NeurIPS 2020, December 6-12, 2020, virtual}, year = {2020}, url = {https://proceedings.neurips.cc/paper/2020/hash/e6cbc650cd5798a05dfd0f51d14cde5c-Abstract.html}, timestamp = {Tue, 19 Jan 2021 00:00:00 +0100}, biburl = {https://dblp.org/rec/conf/nips/ZhangVW20.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-2001-11113, author = {Shangtong Zhang and Bo Liu and Shimon Whiteson}, title = {GradientDICE: Rethinking Generalized Offline Estimation of Stationary Values}, journal = {CoRR}, volume = {abs/2001.11113}, year = {2020}, url = {https://arxiv.org/abs/2001.11113}, eprinttype = {arXiv}, eprint = {2001.11113}, timestamp = {Tue, 07 Sep 2021 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-2001-11113.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-2004-10888, author = {Shangtong Zhang and Bo Liu and Shimon Whiteson}, title = {Per-Step Reward: {A} New Perspective for Risk-Averse Reinforcement Learning}, journal = {CoRR}, volume = {abs/2004.10888}, year = {2020}, url = {https://arxiv.org/abs/2004.10888}, eprinttype = {arXiv}, eprint = {2004.10888}, timestamp = {Tue, 07 Sep 2021 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-2004-10888.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-2007-06703, author = {Shangtong Zhang and Vivek Veeriah and Shimon Whiteson}, title = {Learning Retrospective Knowledge with Reverse Reinforcement Learning}, journal = {CoRR}, volume = {abs/2007.06703}, year = {2020}, url = {https://arxiv.org/abs/2007.06703}, eprinttype = {arXiv}, eprint = {2007.06703}, timestamp = {Tue, 21 Jul 2020 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-2007-06703.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-2010-01069, author = {Shangtong Zhang and Romain Laroche and Harm van Seijen and Shimon Whiteson and Remi Tachet des Combes}, title = {A Deeper Look at Discounting Mismatch in Actor-Critic Algorithms}, journal = {CoRR}, volume = {abs/2010.01069}, year = {2020}, url = {https://arxiv.org/abs/2010.01069}, eprinttype = {arXiv}, eprint = {2010.01069}, timestamp = {Mon, 12 Oct 2020 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-2010-01069.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/aaai/ZhangY19, author = {Shangtong Zhang and Hengshuai Yao}, title = {{ACE:} An Actor Ensemble Algorithm for Continuous Control with Tree Search}, booktitle = {The Thirty-Third {AAAI} Conference on Artificial Intelligence, {AAAI} 2019, The Thirty-First Innovative Applications of Artificial Intelligence Conference, {IAAI} 2019, The Ninth {AAAI} Symposium on Educational Advances in Artificial Intelligence, {EAAI} 2019, Honolulu, Hawaii, USA, January 27 - February 1, 2019}, pages = {5789--5796}, publisher = {{AAAI} Press}, year = {2019}, url = {https://doi.org/10.1609/aaai.v33i01.33015789}, doi = {10.1609/AAAI.V33I01.33015789}, timestamp = {Mon, 04 Sep 2023 12:29:24 +0200}, biburl = {https://dblp.org/rec/conf/aaai/ZhangY19.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/aaai/ZhangY19a, author = {Shangtong Zhang and Hengshuai Yao}, title = {{QUOTA:} The Quantile Option Architecture for Reinforcement Learning}, booktitle = {The Thirty-Third {AAAI} Conference on Artificial Intelligence, {AAAI} 2019, The Thirty-First Innovative Applications of Artificial Intelligence Conference, {IAAI} 2019, The Ninth {AAAI} Symposium on Educational Advances in Artificial Intelligence, {EAAI} 2019, Honolulu, Hawaii, USA, January 27 - February 1, 2019}, pages = {5797--5804}, publisher = {{AAAI} Press}, year = {2019}, url = {https://doi.org/10.1609/aaai.v33i01.33015797}, doi = {10.1609/AAAI.V33I01.33015797}, timestamp = {Tue, 02 Feb 2021 00:00:00 +0100}, biburl = {https://dblp.org/rec/conf/aaai/ZhangY19a.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/atal/MavrinZYK19, author = {Borislav Mavrin and Shangtong Zhang and Hengshuai Yao and Linglong Kong}, editor = {Edith Elkind and Manuela Veloso and Noa Agmon and Matthew E. Taylor}, title = {Exploration in the Face of Parametric and Intrinsic Uncertainties}, booktitle = {Proceedings of the 18th International Conference on Autonomous Agents and MultiAgent Systems, {AAMAS} '19, Montreal, QC, Canada, May 13-17, 2019}, pages = {2117--2119}, publisher = {International Foundation for Autonomous Agents and Multiagent Systems}, year = {2019}, url = {http://dl.acm.org/citation.cfm?id=3332029}, timestamp = {Wed, 29 May 2019 16:36:58 +0200}, biburl = {https://dblp.org/rec/conf/atal/MavrinZYK19.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/nips/ZhangBW19, author = {Shangtong Zhang and Wendelin Boehmer and Shimon Whiteson}, editor = {Hanna M. Wallach and Hugo Larochelle and Alina Beygelzimer and Florence d'Alch{\'{e}}{-}Buc and Emily B. Fox and Roman Garnett}, title = {Generalized Off-Policy Actor-Critic}, booktitle = {Advances in Neural Information Processing Systems 32: Annual Conference on Neural Information Processing Systems 2019, NeurIPS 2019, December 8-14, 2019, Vancouver, BC, Canada}, pages = {1999--2009}, year = {2019}, url = {https://proceedings.neurips.cc/paper/2019/hash/0e095e054ee94774d6a496099eb1cf6a-Abstract.html}, timestamp = {Mon, 16 May 2022 15:41:51 +0200}, biburl = {https://dblp.org/rec/conf/nips/ZhangBW19.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/nips/ZhangW19a, author = {Shangtong Zhang and Shimon Whiteson}, editor = {Hanna M. Wallach and Hugo Larochelle and Alina Beygelzimer and Florence d'Alch{\'{e}}{-}Buc and Emily B. Fox and Roman Garnett}, title = {{DAC:} The Double Actor-Critic Architecture for Learning Options}, booktitle = {Advances in Neural Information Processing Systems 32: Annual Conference on Neural Information Processing Systems 2019, NeurIPS 2019, December 8-14, 2019, Vancouver, BC, Canada}, pages = {2010--2020}, year = {2019}, url = {https://proceedings.neurips.cc/paper/2019/hash/4f284803bd0966cc24fa8683a34afc6e-Abstract.html}, timestamp = {Thu, 21 Jan 2021 00:00:00 +0100}, biburl = {https://dblp.org/rec/conf/nips/ZhangW19a.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-1903-11329, author = {Shangtong Zhang and Wendelin Boehmer and Shimon Whiteson}, title = {Generalized Off-Policy Actor-Critic}, journal = {CoRR}, volume = {abs/1903.11329}, year = {2019}, url = {http://arxiv.org/abs/1903.11329}, eprinttype = {arXiv}, eprint = {1903.11329}, timestamp = {Tue, 02 Apr 2019 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-1903-11329.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-1904-12691, author = {Shangtong Zhang and Shimon Whiteson}, title = {{DAC:} The Double Actor-Critic Architecture for Learning Options}, journal = {CoRR}, volume = {abs/1904.12691}, year = {2019}, url = {http://arxiv.org/abs/1904.12691}, eprinttype = {arXiv}, eprint = {1904.12691}, timestamp = {Thu, 02 May 2019 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-1904-12691.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-1905-01072, author = {Shangtong Zhang and Wendelin Boehmer and Shimon Whiteson}, title = {Deep Residual Reinforcement Learning}, journal = {CoRR}, volume = {abs/1905.01072}, year = {2019}, url = {http://arxiv.org/abs/1905.01072}, eprinttype = {arXiv}, eprint = {1905.01072}, timestamp = {Mon, 27 May 2019 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-1905-01072.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-1905-04640, author = {Yuhang Song and Jianyi Wang and Thomas Lukasiewicz and Zhenghua Xu and Shangtong Zhang and Mai Xu}, title = {Mega-Reward: Achieving Human-Level Play without Extrinsic Rewards}, journal = {CoRR}, volume = {abs/1905.04640}, year = {2019}, url = {http://arxiv.org/abs/1905.04640}, eprinttype = {arXiv}, eprint = {1905.04640}, timestamp = {Wed, 20 Nov 2019 00:00:00 +0100}, biburl = {https://dblp.org/rec/journals/corr/abs-1905-04640.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-1905-06125, author = {Borislav Mavrin and Shangtong Zhang and Hengshuai Yao and Linglong Kong and Kaiwen Wu and Yaoliang Yu}, title = {Distributional Reinforcement Learning for Efficient Exploration}, journal = {CoRR}, volume = {abs/1905.06125}, year = {2019}, url = {http://arxiv.org/abs/1905.06125}, eprinttype = {arXiv}, eprint = {1905.06125}, timestamp = {Tue, 28 May 2019 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-1905-06125.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-1911-04384, author = {Shangtong Zhang and Bo Liu and Hengshuai Yao and Shimon Whiteson}, title = {Provably Convergent Off-Policy Actor-Critic with Function Approximation}, journal = {CoRR}, volume = {abs/1911.04384}, year = {2019}, url = {http://arxiv.org/abs/1911.04384}, eprinttype = {arXiv}, eprint = {1911.04384}, timestamp = {Tue, 07 Sep 2021 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-1911-04384.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/jossw/CurtinELMGZ18, author = {Ryan R. Curtin and Marcus Edel and Mikhail Lozhnikov and Yannis Mentekidis and Sumedh Ghaisas and Shangtong Zhang}, title = {mlpack 3: a fast, flexible machine learning library}, journal = {J. Open Source Softw.}, volume = {3}, number = {26}, pages = {726}, year = {2018}, url = {https://doi.org/10.21105/joss.00726}, doi = {10.21105/JOSS.00726}, timestamp = {Tue, 16 Aug 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/jossw/CurtinELMGZ18.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-1811-02073, author = {Shangtong Zhang and Borislav Mavrin and Linglong Kong and Bo Liu and Hengshuai Yao}, title = {{QUOTA:} The Quantile Option Architecture for Reinforcement Learning}, journal = {CoRR}, volume = {abs/1811.02073}, year = {2018}, url = {http://arxiv.org/abs/1811.02073}, eprinttype = {arXiv}, eprint = {1811.02073}, timestamp = {Tue, 07 Sep 2021 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-1811-02073.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-1811-02696, author = {Shangtong Zhang and Hao Chen and Hengshuai Yao}, title = {{ACE:} An Actor Ensemble Algorithm for Continuous Control with Tree Search}, journal = {CoRR}, volume = {abs/1811.02696}, year = {2018}, url = {http://arxiv.org/abs/1811.02696}, eprinttype = {arXiv}, eprint = {1811.02696}, timestamp = {Thu, 22 Nov 2018 00:00:00 +0100}, biburl = {https://dblp.org/rec/journals/corr/abs-1811-02696.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/pkdd/VeeriahZS17, author = {Vivek Veeriah and Shangtong Zhang and Richard S. Sutton}, editor = {Michelangelo Ceci and Jaakko Hollm{\'{e}}n and Ljupco Todorovski and Celine Vens and Saso Dzeroski}, title = {Crossprop: Learning Representations by Stochastic Meta-Gradient Descent in Neural Networks}, booktitle = {Machine Learning and Knowledge Discovery in Databases - European Conference, {ECML} {PKDD} 2017, Skopje, Macedonia, September 18-22, 2017, Proceedings, Part {I}}, series = {Lecture Notes in Computer Science}, volume = {10534}, pages = {445--459}, publisher = {Springer}, year = {2017}, url = {https://doi.org/10.1007/978-3-319-71249-9\_27}, doi = {10.1007/978-3-319-71249-9\_27}, timestamp = {Tue, 14 May 2019 10:00:47 +0200}, biburl = {https://dblp.org/rec/conf/pkdd/VeeriahZS17.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-1712-00006, author = {Shangtong Zhang and Osmar R. Za{\"{\i}}ane}, title = {Comparing Deep Reinforcement Learning and Evolutionary Methods in Continuous Control}, journal = {CoRR}, volume = {abs/1712.00006}, year = {2017}, url = {http://arxiv.org/abs/1712.00006}, eprinttype = {arXiv}, eprint = {1712.00006}, timestamp = {Mon, 13 Aug 2018 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-1712-00006.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-1712-01275, author = {Shangtong Zhang and Richard S. Sutton}, title = {A Deeper Look at Experience Replay}, journal = {CoRR}, volume = {abs/1712.01275}, year = {2017}, url = {http://arxiv.org/abs/1712.01275}, eprinttype = {arXiv}, eprint = {1712.01275}, timestamp = {Mon, 13 Aug 2018 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-1712-01275.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/mir/ZhangZZLQHZW15, author = {Pengjing Zhang and Xiaoqing Zheng and Wenqiang Zhang and Siyan Li and Sheng Qian and Wenqi He and Shangtong Zhang and Ziyuan Wang}, editor = {Alexander G. Hauptmann and Chong{-}Wah Ngo and Xiangyang Xue and Yu{-}Gang Jiang and Cees Snoek and Nuno Vasconcelos}, title = {A Deep Neural Network for Modeling Music}, booktitle = {Proceedings of the 5th {ACM} on International Conference on Multimedia Retrieval, Shanghai, China, June 23-26, 2015}, pages = {379--386}, publisher = {{ACM}}, year = {2015}, url = {https://doi.org/10.1145/2671188.2749367}, doi = {10.1145/2671188.2749367}, timestamp = {Tue, 06 Nov 2018 00:00:00 +0100}, biburl = {https://dblp.org/rec/conf/mir/ZhangZZLQHZW15.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
manage site settings
To protect your privacy, all features that rely on external API calls from your browser are turned off by default. You need to opt-in for them to become active. All settings here will be stored as cookies with your web browser. For more information see our F.A.Q.