BibTeX records: Shangtong Zhang

download as .bib file

@article{DBLP:journals/corr/abs-2401-07844,
  author       = {Shuze Liu and
                  Shuhang Chen and
                  Shangtong Zhang},
  title        = {The {ODE} Method for Stochastic Approximation and Reinforcement Learning
                  with Markovian Noise},
  journal      = {CoRR},
  volume       = {abs/2401.07844},
  year         = {2024},
  url          = {https://doi.org/10.48550/arXiv.2401.07844},
  doi          = {10.48550/ARXIV.2401.07844},
  eprinttype    = {arXiv},
  eprint       = {2401.07844},
  timestamp    = {Thu, 01 Feb 2024 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2401-07844.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/tcad/WeiWZYJWQZ23,
  author       = {Yuntao Wei and
                  Xueyan Wang and
                  Shangtong Zhang and
                  Jianlei Yang and
                  Xiaotao Jia and
                  Zhaohao Wang and
                  Gang Qu and
                  Weisheng Zhao},
  title        = {{IMGA:} Efficient In-Memory Graph Convolution Network Aggregation
                  With Data Flow Optimizations},
  journal      = {{IEEE} Trans. Comput. Aided Des. Integr. Circuits Syst.},
  volume       = {42},
  number       = {12},
  pages        = {4695--4705},
  year         = {2023},
  url          = {https://doi.org/10.1109/TCAD.2023.3288509},
  doi          = {10.1109/TCAD.2023.3288509},
  timestamp    = {Sun, 10 Dec 2023 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/tcad/WeiWZYJWQZ23.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/aaai/Zhang23c,
  author       = {Shangtong Zhang},
  editor       = {Brian Williams and
                  Yiling Chen and
                  Jennifer Neville},
  title        = {A New Challenge in Policy Evaluation},
  booktitle    = {Thirty-Seventh {AAAI} Conference on Artificial Intelligence, {AAAI}
                  2023, Thirty-Fifth Conference on Innovative Applications of Artificial
                  Intelligence, {IAAI} 2023, Thirteenth Symposium on Educational Advances
                  in Artificial Intelligence, {EAAI} 2023, Washington, DC, USA, February
                  7-14, 2023},
  pages        = {15465},
  publisher    = {{AAAI} Press},
  year         = {2023},
  url          = {https://doi.org/10.1609/aaai.v37i13.26832},
  doi          = {10.1609/AAAI.V37I13.26832},
  timestamp    = {Mon, 04 Sep 2023 12:29:24 +0200},
  biburl       = {https://dblp.org/rec/conf/aaai/Zhang23c.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/icml/ZhangCL23,
  author       = {Shangtong Zhang and
                  Remi Tachet des Combes and
                  Romain Laroche},
  editor       = {Andreas Krause and
                  Emma Brunskill and
                  Kyunghyun Cho and
                  Barbara Engelhardt and
                  Sivan Sabato and
                  Jonathan Scarlett},
  title        = {On the Convergence of {SARSA} with Linear Function Approximation},
  booktitle    = {International Conference on Machine Learning, {ICML} 2023, 23-29 July
                  2023, Honolulu, Hawaii, {USA}},
  series       = {Proceedings of Machine Learning Research},
  volume       = {202},
  pages        = {41613--41646},
  publisher    = {{PMLR}},
  year         = {2023},
  url          = {https://proceedings.mlr.press/v202/zhang23al.html},
  timestamp    = {Mon, 28 Aug 2023 17:23:08 +0200},
  biburl       = {https://dblp.org/rec/conf/icml/ZhangCL23.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2301-13734,
  author       = {Shuze Liu and
                  Shangtong Zhang},
  title        = {Improving Monte Carlo Evaluation with Offline Data},
  journal      = {CoRR},
  volume       = {abs/2301.13734},
  year         = {2023},
  url          = {https://doi.org/10.48550/arXiv.2301.13734},
  doi          = {10.48550/ARXIV.2301.13734},
  eprinttype    = {arXiv},
  eprint       = {2301.13734},
  timestamp    = {Thu, 02 Feb 2023 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2301-13734.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2308-01170,
  author       = {Xiaochi Qian and
                  Shangtong Zhang},
  title        = {Direct Gradient Temporal Difference Learning},
  journal      = {CoRR},
  volume       = {abs/2308.01170},
  year         = {2023},
  url          = {https://doi.org/10.48550/arXiv.2308.01170},
  doi          = {10.48550/ARXIV.2308.01170},
  eprinttype    = {arXiv},
  eprint       = {2308.01170},
  timestamp    = {Mon, 21 Aug 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2308-01170.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2308-03526,
  author       = {Micha{\"{e}}l Mathieu and
                  Sherjil Ozair and
                  Srivatsan Srinivasan and
                  {\c{C}}aglar G{\"{u}}l{\c{c}}ehre and
                  Shangtong Zhang and
                  Ray Jiang and
                  Tom Le Paine and
                  Richard Powell and
                  Konrad Zolna and
                  Julian Schrittwieser and
                  David H. Choi and
                  Petko Georgiev and
                  Daniel Toyama and
                  Aja Huang and
                  Roman Ring and
                  Igor Babuschkin and
                  Timo Ewalds and
                  Mahyar Bordbar and
                  Sarah Henderson and
                  Sergio G{\'{o}}mez Colmenarejo and
                  A{\"{a}}ron van den Oord and
                  Wojciech Marian Czarnecki and
                  Nando de Freitas and
                  Oriol Vinyals},
  title        = {AlphaStar Unplugged: Large-Scale Offline Reinforcement Learning},
  journal      = {CoRR},
  volume       = {abs/2308.03526},
  year         = {2023},
  url          = {https://doi.org/10.48550/arXiv.2308.03526},
  doi          = {10.48550/ARXIV.2308.03526},
  eprinttype    = {arXiv},
  eprint       = {2308.03526},
  timestamp    = {Tue, 22 Aug 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2308-03526.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/jmlr/ZhangW22,
  author       = {Shangtong Zhang and
                  Shimon Whiteson},
  title        = {Truncated Emphatic Temporal Difference Methods for Prediction and
                  Control},
  journal      = {J. Mach. Learn. Res.},
  volume       = {23},
  pages        = {153:1--153:59},
  year         = {2022},
  url          = {http://jmlr.org/papers/v23/21-0934.html},
  timestamp    = {Wed, 07 Jun 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/jmlr/ZhangW22.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/jmlr/ZhangCL22,
  author       = {Shangtong Zhang and
                  Remi Tachet des Combes and
                  Romain Laroche},
  title        = {Global Optimality and Finite Sample Analysis of Softmax Off-Policy
                  Actor Critic under State Distribution Mismatch},
  journal      = {J. Mach. Learn. Res.},
  volume       = {23},
  pages        = {343:1--343:91},
  year         = {2022},
  url          = {http://jmlr.org/papers/v23/21-1306.html},
  timestamp    = {Wed, 07 Jun 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/jmlr/ZhangCL22.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/aaai/JiangZC0H22,
  author       = {Ray Jiang and
                  Shangtong Zhang and
                  Veronica Chelu and
                  Adam White and
                  Hado van Hasselt},
  title        = {Learning Expected Emphatic Traces for Deep {RL}},
  booktitle    = {Thirty-Sixth {AAAI} Conference on Artificial Intelligence, {AAAI}
                  2022, Thirty-Fourth Conference on Innovative Applications of Artificial
                  Intelligence, {IAAI} 2022, The Twelveth Symposium on Educational Advances
                  in Artificial Intelligence, {EAAI} 2022 Virtual Event, February 22
                  - March 1, 2022},
  pages        = {7015--7023},
  publisher    = {{AAAI} Press},
  year         = {2022},
  url          = {https://doi.org/10.1609/aaai.v36i6.20660},
  doi          = {10.1609/AAAI.V36I6.20660},
  timestamp    = {Mon, 04 Sep 2023 12:29:24 +0200},
  biburl       = {https://dblp.org/rec/conf/aaai/JiangZC0H22.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/atal/ZhangLSWC22,
  author       = {Shangtong Zhang and
                  Romain Laroche and
                  Harm van Seijen and
                  Shimon Whiteson and
                  Remi Tachet des Combes},
  editor       = {Piotr Faliszewski and
                  Viviana Mascardi and
                  Catherine Pelachaud and
                  Matthew E. Taylor},
  title        = {A Deeper Look at Discounting Mismatch in Actor-Critic Algorithms},
  booktitle    = {21st International Conference on Autonomous Agents and Multiagent
                  Systems, {AAMAS} 2022, Auckland, New Zealand, May 9-13, 2022},
  pages        = {1491--1499},
  publisher    = {International Foundation for Autonomous Agents and Multiagent Systems
                  {(IFAAMAS)}},
  year         = {2022},
  url          = {https://www.ifaamas.org/Proceedings/aamas2022/pdfs/p1491.pdf},
  doi          = {10.5555/3535850.3536016},
  timestamp    = {Mon, 18 Jul 2022 17:13:00 +0200},
  biburl       = {https://dblp.org/rec/conf/atal/ZhangLSWC22.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2202-06828,
  author       = {Shangtong Zhang and
                  Remi Tachet des Combes and
                  Romain Laroche},
  title        = {On the Chattering of {SARSA} with Linear Function Approximation},
  journal      = {CoRR},
  volume       = {abs/2202.06828},
  year         = {2022},
  url          = {https://arxiv.org/abs/2202.06828},
  eprinttype    = {arXiv},
  eprint       = {2202.06828},
  timestamp    = {Fri, 18 Feb 2022 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2202-06828.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/aaai/ZhangLW21,
  author       = {Shangtong Zhang and
                  Bo Liu and
                  Shimon Whiteson},
  title        = {Mean-Variance Policy Iteration for Risk-Averse Reinforcement Learning},
  booktitle    = {Thirty-Fifth {AAAI} Conference on Artificial Intelligence, {AAAI}
                  2021, Thirty-Third Conference on Innovative Applications of Artificial
                  Intelligence, {IAAI} 2021, The Eleventh Symposium on Educational Advances
                  in Artificial Intelligence, {EAAI} 2021, Virtual Event, February 2-9,
                  2021},
  pages        = {10905--10913},
  publisher    = {{AAAI} Press},
  year         = {2021},
  url          = {https://doi.org/10.1609/aaai.v35i12.17302},
  doi          = {10.1609/AAAI.V35I12.17302},
  timestamp    = {Mon, 04 Sep 2023 12:29:24 +0200},
  biburl       = {https://dblp.org/rec/conf/aaai/ZhangLW21.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/icml/ZhangWSW21,
  author       = {Shangtong Zhang and
                  Yi Wan and
                  Richard S. Sutton and
                  Shimon Whiteson},
  editor       = {Marina Meila and
                  Tong Zhang},
  title        = {Average-Reward Off-Policy Policy Evaluation with Function Approximation},
  booktitle    = {Proceedings of the 38th International Conference on Machine Learning,
                  {ICML} 2021, 18-24 July 2021, Virtual Event},
  series       = {Proceedings of Machine Learning Research},
  volume       = {139},
  pages        = {12578--12588},
  publisher    = {{PMLR}},
  year         = {2021},
  url          = {http://proceedings.mlr.press/v139/zhang21u.html},
  timestamp    = {Thu, 14 Mar 2024 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/icml/ZhangWSW21.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/icml/ZhangYW21,
  author       = {Shangtong Zhang and
                  Hengshuai Yao and
                  Shimon Whiteson},
  editor       = {Marina Meila and
                  Tong Zhang},
  title        = {Breaking the Deadly Triad with a Target Network},
  booktitle    = {Proceedings of the 38th International Conference on Machine Learning,
                  {ICML} 2021, 18-24 July 2021, Virtual Event},
  series       = {Proceedings of Machine Learning Research},
  volume       = {139},
  pages        = {12621--12631},
  publisher    = {{PMLR}},
  year         = {2021},
  url          = {http://proceedings.mlr.press/v139/zhang21y.html},
  timestamp    = {Wed, 25 Aug 2021 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/icml/ZhangYW21.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/ijcai/ZhangBW21,
  author       = {Shangtong Zhang and
                  Wendelin Boehmer and
                  Shimon Whiteson},
  editor       = {Zhi{-}Hua Zhou},
  title        = {Deep Residual Reinforcement Learning (Extended Abstract)},
  booktitle    = {Proceedings of the Thirtieth International Joint Conference on Artificial
                  Intelligence, {IJCAI} 2021, Virtual Event / Montreal, Canada, 19-27
                  August 2021},
  pages        = {4869--4873},
  publisher    = {ijcai.org},
  year         = {2021},
  url          = {https://doi.org/10.24963/ijcai.2021/668},
  doi          = {10.24963/IJCAI.2021/668},
  timestamp    = {Wed, 25 Aug 2021 17:11:16 +0200},
  biburl       = {https://dblp.org/rec/conf/ijcai/ZhangBW21.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2101-02808,
  author       = {Shangtong Zhang and
                  Yi Wan and
                  Richard S. Sutton and
                  Shimon Whiteson},
  title        = {Average-Reward Off-Policy Policy Evaluation with Function Approximation},
  journal      = {CoRR},
  volume       = {abs/2101.02808},
  year         = {2021},
  url          = {https://arxiv.org/abs/2101.02808},
  eprinttype    = {arXiv},
  eprint       = {2101.02808},
  timestamp    = {Thu, 14 Mar 2024 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2101-02808.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2101-08862,
  author       = {Shangtong Zhang and
                  Hengshuai Yao and
                  Shimon Whiteson},
  title        = {Breaking the Deadly Triad with a Target Network},
  journal      = {CoRR},
  volume       = {abs/2101.08862},
  year         = {2021},
  url          = {https://arxiv.org/abs/2101.08862},
  eprinttype    = {arXiv},
  eprint       = {2101.08862},
  timestamp    = {Sat, 30 Jan 2021 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2101-08862.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2107-05405,
  author       = {Ray Jiang and
                  Shangtong Zhang and
                  Veronica Chelu and
                  Adam White and
                  Hado van Hasselt},
  title        = {Learning Expected Emphatic Traces for Deep {RL}},
  journal      = {CoRR},
  volume       = {abs/2107.05405},
  year         = {2021},
  url          = {https://arxiv.org/abs/2107.05405},
  eprinttype    = {arXiv},
  eprint       = {2107.05405},
  timestamp    = {Mon, 25 Apr 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2107-05405.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2108-05338,
  author       = {Shangtong Zhang and
                  Shimon Whiteson},
  title        = {Truncated Emphatic Temporal Difference Methods for Prediction and
                  Control},
  journal      = {CoRR},
  volume       = {abs/2108.05338},
  year         = {2021},
  url          = {https://arxiv.org/abs/2108.05338},
  eprinttype    = {arXiv},
  eprint       = {2108.05338},
  timestamp    = {Wed, 18 Aug 2021 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2108-05338.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2111-02997,
  author       = {Shangtong Zhang and
                  Remi Tachet des Combes and
                  Romain Laroche},
  title        = {Global Optimality and Finite Sample Analysis of Softmax Off-Policy
                  Actor Critic under State Distribution Mismatch},
  journal      = {CoRR},
  volume       = {abs/2111.02997},
  year         = {2021},
  url          = {https://arxiv.org/abs/2111.02997},
  eprinttype    = {arXiv},
  eprint       = {2111.02997},
  timestamp    = {Fri, 05 Nov 2021 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2111-02997.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/aaai/0001WLXZWX20,
  author       = {Yuhang Song and
                  Jianyi Wang and
                  Thomas Lukasiewicz and
                  Zhenghua Xu and
                  Shangtong Zhang and
                  Andrzej Wojcicki and
                  Mai Xu},
  title        = {Mega-Reward: Achieving Human-Level Play without Extrinsic Rewards},
  booktitle    = {The Thirty-Fourth {AAAI} Conference on Artificial Intelligence, {AAAI}
                  2020, The Thirty-Second Innovative Applications of Artificial Intelligence
                  Conference, {IAAI} 2020, The Tenth {AAAI} Symposium on Educational
                  Advances in Artificial Intelligence, {EAAI} 2020, New York, NY, USA,
                  February 7-12, 2020},
  pages        = {5826--5833},
  publisher    = {{AAAI} Press},
  year         = {2020},
  url          = {https://doi.org/10.1609/aaai.v34i04.6040},
  doi          = {10.1609/AAAI.V34I04.6040},
  timestamp    = {Fri, 08 Sep 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/aaai/0001WLXZWX20.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/atal/ZhangBW20,
  author       = {Shangtong Zhang and
                  Wendelin Boehmer and
                  Shimon Whiteson},
  editor       = {Amal El Fallah Seghrouchni and
                  Gita Sukthankar and
                  Bo An and
                  Neil Yorke{-}Smith},
  title        = {Deep Residual Reinforcement Learning},
  booktitle    = {Proceedings of the 19th International Conference on Autonomous Agents
                  and Multiagent Systems, {AAMAS} '20, Auckland, New Zealand, May 9-13,
                  2020},
  pages        = {1611--1619},
  publisher    = {International Foundation for Autonomous Agents and Multiagent Systems},
  year         = {2020},
  url          = {https://dl.acm.org/doi/10.5555/3398761.3398946},
  doi          = {10.5555/3398761.3398946},
  timestamp    = {Tue, 26 Jul 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/atal/ZhangBW20.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/icml/ZhangLW20,
  author       = {Shangtong Zhang and
                  Bo Liu and
                  Shimon Whiteson},
  title        = {GradientDICE: Rethinking Generalized Offline Estimation of Stationary
                  Values},
  booktitle    = {Proceedings of the 37th International Conference on Machine Learning,
                  {ICML} 2020, 13-18 July 2020, Virtual Event},
  series       = {Proceedings of Machine Learning Research},
  volume       = {119},
  pages        = {11194--11203},
  publisher    = {{PMLR}},
  year         = {2020},
  url          = {http://proceedings.mlr.press/v119/zhang20r.html},
  timestamp    = {Tue, 07 Sep 2021 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/icml/ZhangLW20.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/icml/ZhangLYW20,
  author       = {Shangtong Zhang and
                  Bo Liu and
                  Hengshuai Yao and
                  Shimon Whiteson},
  title        = {Provably Convergent Two-Timescale Off-Policy Actor-Critic with Function
                  Approximation},
  booktitle    = {Proceedings of the 37th International Conference on Machine Learning,
                  {ICML} 2020, 13-18 July 2020, Virtual Event},
  series       = {Proceedings of Machine Learning Research},
  volume       = {119},
  pages        = {11204--11213},
  publisher    = {{PMLR}},
  year         = {2020},
  url          = {http://proceedings.mlr.press/v119/zhang20s.html},
  timestamp    = {Tue, 07 Sep 2021 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/icml/ZhangLYW20.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/nips/ZhangVW20,
  author       = {Shangtong Zhang and
                  Vivek Veeriah and
                  Shimon Whiteson},
  editor       = {Hugo Larochelle and
                  Marc'Aurelio Ranzato and
                  Raia Hadsell and
                  Maria{-}Florina Balcan and
                  Hsuan{-}Tien Lin},
  title        = {Learning Retrospective Knowledge with Reverse Reinforcement Learning},
  booktitle    = {Advances in Neural Information Processing Systems 33: Annual Conference
                  on Neural Information Processing Systems 2020, NeurIPS 2020, December
                  6-12, 2020, virtual},
  year         = {2020},
  url          = {https://proceedings.neurips.cc/paper/2020/hash/e6cbc650cd5798a05dfd0f51d14cde5c-Abstract.html},
  timestamp    = {Tue, 19 Jan 2021 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/nips/ZhangVW20.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2001-11113,
  author       = {Shangtong Zhang and
                  Bo Liu and
                  Shimon Whiteson},
  title        = {GradientDICE: Rethinking Generalized Offline Estimation of Stationary
                  Values},
  journal      = {CoRR},
  volume       = {abs/2001.11113},
  year         = {2020},
  url          = {https://arxiv.org/abs/2001.11113},
  eprinttype    = {arXiv},
  eprint       = {2001.11113},
  timestamp    = {Tue, 07 Sep 2021 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2001-11113.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2004-10888,
  author       = {Shangtong Zhang and
                  Bo Liu and
                  Shimon Whiteson},
  title        = {Per-Step Reward: {A} New Perspective for Risk-Averse Reinforcement
                  Learning},
  journal      = {CoRR},
  volume       = {abs/2004.10888},
  year         = {2020},
  url          = {https://arxiv.org/abs/2004.10888},
  eprinttype    = {arXiv},
  eprint       = {2004.10888},
  timestamp    = {Tue, 07 Sep 2021 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2004-10888.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2007-06703,
  author       = {Shangtong Zhang and
                  Vivek Veeriah and
                  Shimon Whiteson},
  title        = {Learning Retrospective Knowledge with Reverse Reinforcement Learning},
  journal      = {CoRR},
  volume       = {abs/2007.06703},
  year         = {2020},
  url          = {https://arxiv.org/abs/2007.06703},
  eprinttype    = {arXiv},
  eprint       = {2007.06703},
  timestamp    = {Tue, 21 Jul 2020 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2007-06703.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2010-01069,
  author       = {Shangtong Zhang and
                  Romain Laroche and
                  Harm van Seijen and
                  Shimon Whiteson and
                  Remi Tachet des Combes},
  title        = {A Deeper Look at Discounting Mismatch in Actor-Critic Algorithms},
  journal      = {CoRR},
  volume       = {abs/2010.01069},
  year         = {2020},
  url          = {https://arxiv.org/abs/2010.01069},
  eprinttype    = {arXiv},
  eprint       = {2010.01069},
  timestamp    = {Mon, 12 Oct 2020 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2010-01069.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/aaai/ZhangY19,
  author       = {Shangtong Zhang and
                  Hengshuai Yao},
  title        = {{ACE:} An Actor Ensemble Algorithm for Continuous Control with Tree
                  Search},
  booktitle    = {The Thirty-Third {AAAI} Conference on Artificial Intelligence, {AAAI}
                  2019, The Thirty-First Innovative Applications of Artificial Intelligence
                  Conference, {IAAI} 2019, The Ninth {AAAI} Symposium on Educational
                  Advances in Artificial Intelligence, {EAAI} 2019, Honolulu, Hawaii,
                  USA, January 27 - February 1, 2019},
  pages        = {5789--5796},
  publisher    = {{AAAI} Press},
  year         = {2019},
  url          = {https://doi.org/10.1609/aaai.v33i01.33015789},
  doi          = {10.1609/AAAI.V33I01.33015789},
  timestamp    = {Mon, 04 Sep 2023 12:29:24 +0200},
  biburl       = {https://dblp.org/rec/conf/aaai/ZhangY19.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/aaai/ZhangY19a,
  author       = {Shangtong Zhang and
                  Hengshuai Yao},
  title        = {{QUOTA:} The Quantile Option Architecture for Reinforcement Learning},
  booktitle    = {The Thirty-Third {AAAI} Conference on Artificial Intelligence, {AAAI}
                  2019, The Thirty-First Innovative Applications of Artificial Intelligence
                  Conference, {IAAI} 2019, The Ninth {AAAI} Symposium on Educational
                  Advances in Artificial Intelligence, {EAAI} 2019, Honolulu, Hawaii,
                  USA, January 27 - February 1, 2019},
  pages        = {5797--5804},
  publisher    = {{AAAI} Press},
  year         = {2019},
  url          = {https://doi.org/10.1609/aaai.v33i01.33015797},
  doi          = {10.1609/AAAI.V33I01.33015797},
  timestamp    = {Tue, 02 Feb 2021 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/aaai/ZhangY19a.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/atal/MavrinZYK19,
  author       = {Borislav Mavrin and
                  Shangtong Zhang and
                  Hengshuai Yao and
                  Linglong Kong},
  editor       = {Edith Elkind and
                  Manuela Veloso and
                  Noa Agmon and
                  Matthew E. Taylor},
  title        = {Exploration in the Face of Parametric and Intrinsic Uncertainties},
  booktitle    = {Proceedings of the 18th International Conference on Autonomous Agents
                  and MultiAgent Systems, {AAMAS} '19, Montreal, QC, Canada, May 13-17,
                  2019},
  pages        = {2117--2119},
  publisher    = {International Foundation for Autonomous Agents and Multiagent Systems},
  year         = {2019},
  url          = {http://dl.acm.org/citation.cfm?id=3332029},
  timestamp    = {Wed, 29 May 2019 16:36:58 +0200},
  biburl       = {https://dblp.org/rec/conf/atal/MavrinZYK19.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/nips/ZhangBW19,
  author       = {Shangtong Zhang and
                  Wendelin Boehmer and
                  Shimon Whiteson},
  editor       = {Hanna M. Wallach and
                  Hugo Larochelle and
                  Alina Beygelzimer and
                  Florence d'Alch{\'{e}}{-}Buc and
                  Emily B. Fox and
                  Roman Garnett},
  title        = {Generalized Off-Policy Actor-Critic},
  booktitle    = {Advances in Neural Information Processing Systems 32: Annual Conference
                  on Neural Information Processing Systems 2019, NeurIPS 2019, December
                  8-14, 2019, Vancouver, BC, Canada},
  pages        = {1999--2009},
  year         = {2019},
  url          = {https://proceedings.neurips.cc/paper/2019/hash/0e095e054ee94774d6a496099eb1cf6a-Abstract.html},
  timestamp    = {Mon, 16 May 2022 15:41:51 +0200},
  biburl       = {https://dblp.org/rec/conf/nips/ZhangBW19.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/nips/ZhangW19a,
  author       = {Shangtong Zhang and
                  Shimon Whiteson},
  editor       = {Hanna M. Wallach and
                  Hugo Larochelle and
                  Alina Beygelzimer and
                  Florence d'Alch{\'{e}}{-}Buc and
                  Emily B. Fox and
                  Roman Garnett},
  title        = {{DAC:} The Double Actor-Critic Architecture for Learning Options},
  booktitle    = {Advances in Neural Information Processing Systems 32: Annual Conference
                  on Neural Information Processing Systems 2019, NeurIPS 2019, December
                  8-14, 2019, Vancouver, BC, Canada},
  pages        = {2010--2020},
  year         = {2019},
  url          = {https://proceedings.neurips.cc/paper/2019/hash/4f284803bd0966cc24fa8683a34afc6e-Abstract.html},
  timestamp    = {Thu, 21 Jan 2021 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/nips/ZhangW19a.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-1903-11329,
  author       = {Shangtong Zhang and
                  Wendelin Boehmer and
                  Shimon Whiteson},
  title        = {Generalized Off-Policy Actor-Critic},
  journal      = {CoRR},
  volume       = {abs/1903.11329},
  year         = {2019},
  url          = {http://arxiv.org/abs/1903.11329},
  eprinttype    = {arXiv},
  eprint       = {1903.11329},
  timestamp    = {Tue, 02 Apr 2019 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-1903-11329.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-1904-12691,
  author       = {Shangtong Zhang and
                  Shimon Whiteson},
  title        = {{DAC:} The Double Actor-Critic Architecture for Learning Options},
  journal      = {CoRR},
  volume       = {abs/1904.12691},
  year         = {2019},
  url          = {http://arxiv.org/abs/1904.12691},
  eprinttype    = {arXiv},
  eprint       = {1904.12691},
  timestamp    = {Thu, 02 May 2019 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-1904-12691.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-1905-01072,
  author       = {Shangtong Zhang and
                  Wendelin Boehmer and
                  Shimon Whiteson},
  title        = {Deep Residual Reinforcement Learning},
  journal      = {CoRR},
  volume       = {abs/1905.01072},
  year         = {2019},
  url          = {http://arxiv.org/abs/1905.01072},
  eprinttype    = {arXiv},
  eprint       = {1905.01072},
  timestamp    = {Mon, 27 May 2019 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-1905-01072.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-1905-04640,
  author       = {Yuhang Song and
                  Jianyi Wang and
                  Thomas Lukasiewicz and
                  Zhenghua Xu and
                  Shangtong Zhang and
                  Mai Xu},
  title        = {Mega-Reward: Achieving Human-Level Play without Extrinsic Rewards},
  journal      = {CoRR},
  volume       = {abs/1905.04640},
  year         = {2019},
  url          = {http://arxiv.org/abs/1905.04640},
  eprinttype    = {arXiv},
  eprint       = {1905.04640},
  timestamp    = {Wed, 20 Nov 2019 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/abs-1905-04640.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-1905-06125,
  author       = {Borislav Mavrin and
                  Shangtong Zhang and
                  Hengshuai Yao and
                  Linglong Kong and
                  Kaiwen Wu and
                  Yaoliang Yu},
  title        = {Distributional Reinforcement Learning for Efficient Exploration},
  journal      = {CoRR},
  volume       = {abs/1905.06125},
  year         = {2019},
  url          = {http://arxiv.org/abs/1905.06125},
  eprinttype    = {arXiv},
  eprint       = {1905.06125},
  timestamp    = {Tue, 28 May 2019 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-1905-06125.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-1911-04384,
  author       = {Shangtong Zhang and
                  Bo Liu and
                  Hengshuai Yao and
                  Shimon Whiteson},
  title        = {Provably Convergent Off-Policy Actor-Critic with Function Approximation},
  journal      = {CoRR},
  volume       = {abs/1911.04384},
  year         = {2019},
  url          = {http://arxiv.org/abs/1911.04384},
  eprinttype    = {arXiv},
  eprint       = {1911.04384},
  timestamp    = {Tue, 07 Sep 2021 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-1911-04384.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/jossw/CurtinELMGZ18,
  author       = {Ryan R. Curtin and
                  Marcus Edel and
                  Mikhail Lozhnikov and
                  Yannis Mentekidis and
                  Sumedh Ghaisas and
                  Shangtong Zhang},
  title        = {mlpack 3: a fast, flexible machine learning library},
  journal      = {J. Open Source Softw.},
  volume       = {3},
  number       = {26},
  pages        = {726},
  year         = {2018},
  url          = {https://doi.org/10.21105/joss.00726},
  doi          = {10.21105/JOSS.00726},
  timestamp    = {Tue, 16 Aug 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/jossw/CurtinELMGZ18.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-1811-02073,
  author       = {Shangtong Zhang and
                  Borislav Mavrin and
                  Linglong Kong and
                  Bo Liu and
                  Hengshuai Yao},
  title        = {{QUOTA:} The Quantile Option Architecture for Reinforcement Learning},
  journal      = {CoRR},
  volume       = {abs/1811.02073},
  year         = {2018},
  url          = {http://arxiv.org/abs/1811.02073},
  eprinttype    = {arXiv},
  eprint       = {1811.02073},
  timestamp    = {Tue, 07 Sep 2021 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-1811-02073.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-1811-02696,
  author       = {Shangtong Zhang and
                  Hao Chen and
                  Hengshuai Yao},
  title        = {{ACE:} An Actor Ensemble Algorithm for Continuous Control with Tree
                  Search},
  journal      = {CoRR},
  volume       = {abs/1811.02696},
  year         = {2018},
  url          = {http://arxiv.org/abs/1811.02696},
  eprinttype    = {arXiv},
  eprint       = {1811.02696},
  timestamp    = {Thu, 22 Nov 2018 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/abs-1811-02696.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/pkdd/VeeriahZS17,
  author       = {Vivek Veeriah and
                  Shangtong Zhang and
                  Richard S. Sutton},
  editor       = {Michelangelo Ceci and
                  Jaakko Hollm{\'{e}}n and
                  Ljupco Todorovski and
                  Celine Vens and
                  Saso Dzeroski},
  title        = {Crossprop: Learning Representations by Stochastic Meta-Gradient Descent
                  in Neural Networks},
  booktitle    = {Machine Learning and Knowledge Discovery in Databases - European Conference,
                  {ECML} {PKDD} 2017, Skopje, Macedonia, September 18-22, 2017, Proceedings,
                  Part {I}},
  series       = {Lecture Notes in Computer Science},
  volume       = {10534},
  pages        = {445--459},
  publisher    = {Springer},
  year         = {2017},
  url          = {https://doi.org/10.1007/978-3-319-71249-9\_27},
  doi          = {10.1007/978-3-319-71249-9\_27},
  timestamp    = {Tue, 14 May 2019 10:00:47 +0200},
  biburl       = {https://dblp.org/rec/conf/pkdd/VeeriahZS17.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-1712-00006,
  author       = {Shangtong Zhang and
                  Osmar R. Za{\"{\i}}ane},
  title        = {Comparing Deep Reinforcement Learning and Evolutionary Methods in
                  Continuous Control},
  journal      = {CoRR},
  volume       = {abs/1712.00006},
  year         = {2017},
  url          = {http://arxiv.org/abs/1712.00006},
  eprinttype    = {arXiv},
  eprint       = {1712.00006},
  timestamp    = {Mon, 13 Aug 2018 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-1712-00006.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-1712-01275,
  author       = {Shangtong Zhang and
                  Richard S. Sutton},
  title        = {A Deeper Look at Experience Replay},
  journal      = {CoRR},
  volume       = {abs/1712.01275},
  year         = {2017},
  url          = {http://arxiv.org/abs/1712.01275},
  eprinttype    = {arXiv},
  eprint       = {1712.01275},
  timestamp    = {Mon, 13 Aug 2018 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-1712-01275.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/mir/ZhangZZLQHZW15,
  author       = {Pengjing Zhang and
                  Xiaoqing Zheng and
                  Wenqiang Zhang and
                  Siyan Li and
                  Sheng Qian and
                  Wenqi He and
                  Shangtong Zhang and
                  Ziyuan Wang},
  editor       = {Alexander G. Hauptmann and
                  Chong{-}Wah Ngo and
                  Xiangyang Xue and
                  Yu{-}Gang Jiang and
                  Cees Snoek and
                  Nuno Vasconcelos},
  title        = {A Deep Neural Network for Modeling Music},
  booktitle    = {Proceedings of the 5th {ACM} on International Conference on Multimedia
                  Retrieval, Shanghai, China, June 23-26, 2015},
  pages        = {379--386},
  publisher    = {{ACM}},
  year         = {2015},
  url          = {https://doi.org/10.1145/2671188.2749367},
  doi          = {10.1145/2671188.2749367},
  timestamp    = {Tue, 06 Nov 2018 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/mir/ZhangZZLQHZW15.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
a service of  Schloss Dagstuhl - Leibniz Center for Informatics