BibTeX records: Abbas Abdolmaleki

download as .bib file

@article{DBLP:journals/tmlr/BousmalisVRDLVD24,
  author       = {Konstantinos Bousmalis and
                  Giulia Vezzani and
                  Dushyant Rao and
                  Coline Manon Devin and
                  Alex X. Lee and
                  Maria Bauz{\'{a}} Villalonga and
                  Todor Davchev and
                  Yuxiang Zhou and
                  Agrim Gupta and
                  Akhil Raju and
                  Antoine Laurens and
                  Claudio Fantacci and
                  Valentin Dalibard and
                  Martina Zambelli and
                  Murilo Fernandes Martins and
                  Rugile Pevceviciute and
                  Michiel Blokzijl and
                  Misha Denil and
                  Nathan Batchelor and
                  Thomas Lampe and
                  Emilio Parisotto and
                  Konrad Zolna and
                  Scott E. Reed and
                  Sergio G{\'{o}}mez Colmenarejo and
                  Jon Scholz and
                  Abbas Abdolmaleki and
                  Oliver Groth and
                  Jean{-}Baptiste Regli and
                  Oleg Sushkov and
                  Thomas Roth{\"{o}}rl and
                  Jos{\'{e}} Enrique Chen and
                  Yusuf Aytar and
                  Dave Barker and
                  Joy Ortiz and
                  Martin A. Riedmiller and
                  Jost Tobias Springenberg and
                  Raia Hadsell and
                  Francesco Nori and
                  Nicolas Heess},
  title        = {RoboCat: {A} Self-Improving Generalist Agent for Robotic Manipulation},
  journal      = {Trans. Mach. Learn. Res.},
  volume       = {2024},
  year         = {2024},
  url          = {https://openreview.net/forum?id=vsCpILiWHu},
  timestamp    = {Thu, 08 Aug 2024 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/tmlr/BousmalisVRDLVD24.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/icml/SpringenbergA0G24,
  author       = {Jost Tobias Springenberg and
                  Abbas Abdolmaleki and
                  Jingwei Zhang and
                  Oliver Groth and
                  Michael Bloesch and
                  Thomas Lampe and
                  Philemon Brakel and
                  Sarah Bechtle and
                  Steven Kapturowski and
                  Roland Hafner and
                  Nicolas Heess and
                  Martin A. Riedmiller},
  title        = {Offline Actor-Critic Reinforcement Learning Scales to Large Models},
  booktitle    = {Forty-first International Conference on Machine Learning, {ICML} 2024,
                  Vienna, Austria, July 21-27, 2024},
  publisher    = {OpenReview.net},
  year         = {2024},
  url          = {https://openreview.net/forum?id=tl2qmO5kpD},
  timestamp    = {Mon, 02 Sep 2024 16:45:29 +0200},
  biburl       = {https://dblp.org/rec/conf/icml/SpringenbergA0G24.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/icra/LampeABHSBGHHNW24,
  author       = {Thomas Lampe and
                  Abbas Abdolmaleki and
                  Sarah Bechtle and
                  Sandy H. Huang and
                  Jost Tobias Springenberg and
                  Michael Bloesch and
                  Oliver Groth and
                  Roland Hafner and
                  Tim Hertweck and
                  Michael Neunert and
                  Markus Wulfmeier and
                  Jingwei Zhang and
                  Francesco Nori and
                  Nicolas Heess and
                  Martin A. Riedmiller},
  title        = {Mastering Stacking of Diverse Shapes with Large-Scale Iterative Reinforcement
                  Learning on Real Robots},
  booktitle    = {{IEEE} International Conference on Robotics and Automation, {ICRA}
                  2024, Yokohama, Japan, May 13-17, 2024},
  pages        = {7772--7779},
  publisher    = {{IEEE}},
  year         = {2024},
  url          = {https://doi.org/10.1109/ICRA57147.2024.10610297},
  doi          = {10.1109/ICRA57147.2024.10610297},
  timestamp    = {Mon, 19 Aug 2024 15:58:53 +0200},
  biburl       = {https://dblp.org/rec/conf/icra/LampeABHSBGHHNW24.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/l4dc/BhardwajLNRABWR24,
  author       = {Mohak Bhardwaj and
                  Thomas Lampe and
                  Michael Neunert and
                  Francesco Romano and
                  Abbas Abdolmaleki and
                  Arunkumar Byravan and
                  Markus Wulfmeier and
                  Martin A. Riedmiller and
                  Jonas Buchli},
  editor       = {Alessandro Abate and
                  Mark Cannon and
                  Kostas Margellos and
                  Antonis Papachristodoulou},
  title        = {Real-world fluid directed rigid body control via deep reinforcement
                  learning},
  booktitle    = {6th Annual Learning for Dynamics {\&} Control Conference, 15-17
                  July 2024, University of Oxford, Oxford, {UK}},
  series       = {Proceedings of Machine Learning Research},
  volume       = {242},
  pages        = {414--427},
  publisher    = {{PMLR}},
  year         = {2024},
  url          = {https://proceedings.mlr.press/v242/bhardwaj24a.html},
  timestamp    = {Fri, 05 Jul 2024 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/l4dc/BhardwajLNRABWR24.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2402-05546,
  author       = {Jost Tobias Springenberg and
                  Abbas Abdolmaleki and
                  Jingwei Zhang and
                  Oliver Groth and
                  Michael Bloesch and
                  Thomas Lampe and
                  Philemon Brakel and
                  Sarah Bechtle and
                  Steven Kapturowski and
                  Roland Hafner and
                  Nicolas Heess and
                  Martin A. Riedmiller},
  title        = {Offline Actor-Critic Reinforcement Learning Scales to Large Models},
  journal      = {CoRR},
  volume       = {abs/2402.05546},
  year         = {2024},
  url          = {https://doi.org/10.48550/arXiv.2402.05546},
  doi          = {10.48550/ARXIV.2402.05546},
  eprinttype    = {arXiv},
  eprint       = {2402.05546},
  timestamp    = {Wed, 14 Feb 2024 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2402-05546.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2402-06102,
  author       = {Mohak Bhardwaj and
                  Thomas Lampe and
                  Michael Neunert and
                  Francesco Romano and
                  Abbas Abdolmaleki and
                  Arunkumar Byravan and
                  Markus Wulfmeier and
                  Martin A. Riedmiller and
                  Jonas Buchli},
  title        = {Real-World Fluid Directed Rigid Body Control via Deep Reinforcement
                  Learning},
  journal      = {CoRR},
  volume       = {abs/2402.06102},
  year         = {2024},
  url          = {https://doi.org/10.48550/arXiv.2402.06102},
  doi          = {10.48550/ARXIV.2402.06102},
  eprinttype    = {arXiv},
  eprint       = {2402.06102},
  timestamp    = {Fri, 16 Feb 2024 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2402-06102.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/tmlr/VezzaniTWRAMHHH23,
  author       = {Giulia Vezzani and
                  Dhruva Tirumala and
                  Markus Wulfmeier and
                  Dushyant Rao and
                  Abbas Abdolmaleki and
                  Ben Moran and
                  Tuomas Haarnoja and
                  Jan Humplik and
                  Roland Hafner and
                  Michael Neunert and
                  Claudio Fantacci and
                  Tim Hertweck and
                  Thomas Lampe and
                  Fereshteh Sadeghi and
                  Nicolas Heess and
                  Martin A. Riedmiller},
  title        = {SkillS: Adaptive Skill Sequencing for Efficient Temporally-Extended
                  Exploration},
  journal      = {Trans. Mach. Learn. Res.},
  volume       = {2023},
  year         = {2023},
  url          = {https://openreview.net/forum?id=JwGKVpRfVD},
  timestamp    = {Thu, 01 Aug 2024 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/tmlr/VezzaniTWRAMHHH23.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2302-12617,
  author       = {Jingwei Zhang and
                  Jost Tobias Springenberg and
                  Arunkumar Byravan and
                  Leonard Hasenclever and
                  Abbas Abdolmaleki and
                  Dushyant Rao and
                  Nicolas Heess and
                  Martin A. Riedmiller},
  title        = {Leveraging Jumpy Models for Planning and Fast Learning in Robotic
                  Domains},
  journal      = {CoRR},
  volume       = {abs/2302.12617},
  year         = {2023},
  url          = {https://doi.org/10.48550/arXiv.2302.12617},
  doi          = {10.48550/ARXIV.2302.12617},
  eprinttype    = {arXiv},
  eprint       = {2302.12617},
  timestamp    = {Tue, 28 Feb 2023 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2302-12617.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2306-11706,
  author       = {Konstantinos Bousmalis and
                  Giulia Vezzani and
                  Dushyant Rao and
                  Coline Devin and
                  Alex X. Lee and
                  Maria Bauz{\'{a}} and
                  Todor Davchev and
                  Yuxiang Zhou and
                  Agrim Gupta and
                  Akhil Raju and
                  Antoine Laurens and
                  Claudio Fantacci and
                  Valentin Dalibard and
                  Martina Zambelli and
                  Murilo F. Martins and
                  Rugile Pevceviciute and
                  Michiel Blokzijl and
                  Misha Denil and
                  Nathan Batchelor and
                  Thomas Lampe and
                  Emilio Parisotto and
                  Konrad Zolna and
                  Scott E. Reed and
                  Sergio G{\'{o}}mez Colmenarejo and
                  Jon Scholz and
                  Abbas Abdolmaleki and
                  Oliver Groth and
                  Jean{-}Baptiste Regli and
                  Oleg Sushkov and
                  Thomas Roth{\"{o}}rl and
                  Jos{\'{e}} Enrique Chen and
                  Yusuf Aytar and
                  Dave Barker and
                  Joy Ortiz and
                  Martin A. Riedmiller and
                  Jost Tobias Springenberg and
                  Raia Hadsell and
                  Francesco Nori and
                  Nicolas Heess},
  title        = {RoboCat: {A} Self-Improving Foundation Agent for Robotic Manipulation},
  journal      = {CoRR},
  volume       = {abs/2306.11706},
  year         = {2023},
  url          = {https://doi.org/10.48550/arXiv.2306.11706},
  doi          = {10.48550/ARXIV.2306.11706},
  eprinttype    = {arXiv},
  eprint       = {2306.11706},
  timestamp    = {Thu, 08 Aug 2024 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2306-11706.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2308-15470,
  author       = {Shruti Mishra and
                  Ankit Anand and
                  Jordan Hoffmann and
                  Nicolas Heess and
                  Martin A. Riedmiller and
                  Abbas Abdolmaleki and
                  Doina Precup},
  title        = {Policy composition in reinforcement learning via multi-objective policy
                  optimization},
  journal      = {CoRR},
  volume       = {abs/2308.15470},
  year         = {2023},
  url          = {https://doi.org/10.48550/arXiv.2308.15470},
  doi          = {10.48550/ARXIV.2308.15470},
  eprinttype    = {arXiv},
  eprint       = {2308.15470},
  timestamp    = {Mon, 04 Sep 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2308-15470.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2312-11374,
  author       = {Thomas Lampe and
                  Abbas Abdolmaleki and
                  Sarah Bechtle and
                  Sandy H. Huang and
                  Jost Tobias Springenberg and
                  Michael Bloesch and
                  Oliver Groth and
                  Roland Hafner and
                  Tim Hertweck and
                  Michael Neunert and
                  Markus Wulfmeier and
                  Jingwei Zhang and
                  Francesco Nori and
                  Nicolas Heess and
                  Martin A. Riedmiller},
  title        = {Mastering Stacking of Diverse Shapes with Large-Scale Iterative Reinforcement
                  Learning on Real Robots},
  journal      = {CoRR},
  volume       = {abs/2312.11374},
  year         = {2023},
  url          = {https://doi.org/10.48550/arXiv.2312.11374},
  doi          = {10.48550/ARXIV.2312.11374},
  eprinttype    = {arXiv},
  eprint       = {2312.11374},
  timestamp    = {Tue, 16 Jan 2024 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2312-11374.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/nature/DegraveFBNTCEHA22,
  author       = {Jonas Degrave and
                  Federico Felici and
                  Jonas Buchli and
                  Michael Neunert and
                  Brendan D. Tracey and
                  Francesco Carpanese and
                  Timo Ewalds and
                  Roland Hafner and
                  Abbas Abdolmaleki and
                  Diego de Las Casas and
                  Craig Donner and
                  Leslie Fritz and
                  Cristian Galperti and
                  Andrea Huber and
                  James Keeling and
                  Maria Tsimpoukelli and
                  Jackie Kay and
                  Antoine Merle and
                  Jean{-}Marc Moret and
                  Seb Noury and
                  Federico Pesamosca and
                  David Pfau and
                  Olivier Sauter and
                  Cristian Sommariva and
                  Stefano Coda and
                  Basil Duval and
                  Ambrogio Fasoli and
                  Pushmeet Kohli and
                  Koray Kavukcuoglu and
                  Demis Hassabis and
                  Martin A. Riedmiller},
  title        = {Magnetic control of tokamak plasmas through deep reinforcement learning},
  journal      = {Nat.},
  volume       = {602},
  number       = {7897},
  pages        = {414--419},
  year         = {2022},
  url          = {https://doi.org/10.1038/s41586-021-04301-9},
  doi          = {10.1038/S41586-021-04301-9},
  timestamp    = {Sat, 30 Sep 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/nature/DegraveFBNTCEHA22.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/scirobotics/LiuLWMEHCTOASHM22,
  author       = {Siqi Liu and
                  Guy Lever and
                  Zhe Wang and
                  Josh Merel and
                  S. M. Ali Eslami and
                  Daniel Hennes and
                  Wojciech M. Czarnecki and
                  Yuval Tassa and
                  Shayegan Omidshafiei and
                  Abbas Abdolmaleki and
                  Noah Y. Siegel and
                  Leonard Hasenclever and
                  Luke Marris and
                  Saran Tunyasuvunakool and
                  H. Francis Song and
                  Markus Wulfmeier and
                  Paul Muller and
                  Tuomas Haarnoja and
                  Brendan D. Tracey and
                  Karl Tuyls and
                  Thore Graepel and
                  Nicolas Heess},
  title        = {From motor control to team play in simulated humanoid football},
  journal      = {Sci. Robotics},
  volume       = {7},
  number       = {69},
  year         = {2022},
  url          = {https://doi.org/10.1126/scirobotics.abo0235},
  doi          = {10.1126/SCIROBOTICS.ABO0235},
  timestamp    = {Mon, 28 Aug 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/scirobotics/LiuLWMEHCTOASHM22.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/collas/ZhouBHHARWH22,
  author       = {Wenxuan Zhou and
                  Steven Bohez and
                  Jan Humplik and
                  Nicolas Heess and
                  Abbas Abdolmaleki and
                  Dushyant Rao and
                  Markus Wulfmeier and
                  Tuomas Haarnoja},
  editor       = {Sarath Chandar and
                  Razvan Pascanu and
                  Doina Precup},
  title        = {Forgetting and Imbalance in Robot Lifelong Learning with Off-policy
                  Data},
  booktitle    = {Conference on Lifelong Learning Agents, CoLLAs 2022, 22-24 August
                  2022, McGill University, Montr{\'{e}}al, Qu{\'{e}}bec, Canada},
  series       = {Proceedings of Machine Learning Research},
  volume       = {199},
  pages        = {294--309},
  publisher    = {{PMLR}},
  year         = {2022},
  url          = {https://proceedings.mlr.press/v199/zhou22a.html},
  timestamp    = {Fri, 17 Feb 2023 16:29:10 +0100},
  biburl       = {https://dblp.org/rec/conf/collas/ZhouBHHARWH22.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/iclr/ByravanHTMITSAH22,
  author       = {Arunkumar Byravan and
                  Leonard Hasenclever and
                  Piotr Trochim and
                  Mehdi Mirza and
                  Alessandro Davide Ialongo and
                  Yuval Tassa and
                  Jost Tobias Springenberg and
                  Abbas Abdolmaleki and
                  Nicolas Heess and
                  Josh Merel and
                  Martin A. Riedmiller},
  title        = {Evaluating Model-Based Planning and Planner Amortization for Continuous
                  Control},
  booktitle    = {The Tenth International Conference on Learning Representations, {ICLR}
                  2022, Virtual Event, April 25-29, 2022},
  publisher    = {OpenReview.net},
  year         = {2022},
  url          = {https://openreview.net/forum?id=SS8F6tFX3-},
  timestamp    = {Sat, 20 Aug 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/iclr/ByravanHTMITSAH22.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/iros/LeeDSZLAB22,
  author       = {Alex X. Lee and
                  Coline Devin and
                  Jost Tobias Springenberg and
                  Yuxiang Zhou and
                  Thomas Lampe and
                  Abbas Abdolmaleki and
                  Konstantinos Bousmalis},
  title        = {How to Spend Your Robot Time: Bridging Kickstarting and Offline Reinforcement
                  Learning for Vision-based Robotic Manipulation},
  booktitle    = {{IEEE/RSJ} International Conference on Intelligent Robots and Systems,
                  {IROS} 2022, Kyoto, Japan, October 23-27, 2022},
  pages        = {2468--2475},
  publisher    = {{IEEE}},
  year         = {2022},
  url          = {https://doi.org/10.1109/IROS47612.2022.9981126},
  doi          = {10.1109/IROS47612.2022.9981126},
  timestamp    = {Tue, 03 Jan 2023 14:18:21 +0100},
  biburl       = {https://dblp.org/rec/conf/iros/LeeDSZLAB22.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2204-05893,
  author       = {Wenxuan Zhou and
                  Steven Bohez and
                  Jan Humplik and
                  Abbas Abdolmaleki and
                  Dushyant Rao and
                  Markus Wulfmeier and
                  Tuomas Haarnoja and
                  Nicolas Heess},
  title        = {Offline Distillation for Robot Lifelong Learning with Imbalanced Experience},
  journal      = {CoRR},
  volume       = {abs/2204.05893},
  year         = {2022},
  url          = {https://doi.org/10.48550/arXiv.2204.05893},
  doi          = {10.48550/ARXIV.2204.05893},
  eprinttype    = {arXiv},
  eprint       = {2204.05893},
  timestamp    = {Tue, 19 Apr 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2204-05893.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2204-10256,
  author       = {Bobak Shahriari and
                  Abbas Abdolmaleki and
                  Arunkumar Byravan and
                  Abe Friesen and
                  Siqi Liu and
                  Jost Tobias Springenberg and
                  Nicolas Heess and
                  Matt Hoffman and
                  Martin A. Riedmiller},
  title        = {Revisiting Gaussian mixture critics in off-policy reinforcement learning:
                  a sample-based approach},
  journal      = {CoRR},
  volume       = {abs/2204.10256},
  year         = {2022},
  url          = {https://doi.org/10.48550/arXiv.2204.10256},
  doi          = {10.48550/ARXIV.2204.10256},
  eprinttype    = {arXiv},
  eprint       = {2204.10256},
  timestamp    = {Thu, 11 May 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2204-10256.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2205-03353,
  author       = {Alex X. Lee and
                  Coline Devin and
                  Jost Tobias Springenberg and
                  Yuxiang Zhou and
                  Thomas Lampe and
                  Abbas Abdolmaleki and
                  Konstantinos Bousmalis},
  title        = {How to Spend Your Robot Time: Bridging Kickstarting and Offline Reinforcement
                  Learning for Vision-based Robotic Manipulation},
  journal      = {CoRR},
  volume       = {abs/2205.03353},
  year         = {2022},
  url          = {https://doi.org/10.48550/arXiv.2205.03353},
  doi          = {10.48550/ARXIV.2205.03353},
  eprinttype    = {arXiv},
  eprint       = {2205.03353},
  timestamp    = {Wed, 11 May 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2205-03353.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2211-13743,
  author       = {Giulia Vezzani and
                  Dhruva Tirumala and
                  Markus Wulfmeier and
                  Dushyant Rao and
                  Abbas Abdolmaleki and
                  Ben Moran and
                  Tuomas Haarnoja and
                  Jan Humplik and
                  Roland Hafner and
                  Michael Neunert and
                  Claudio Fantacci and
                  Tim Hertweck and
                  Thomas Lampe and
                  Fereshteh Sadeghi and
                  Nicolas Heess and
                  Martin A. Riedmiller},
  title        = {SkillS: Adaptive Skill Sequencing for Efficient Temporally-Extended
                  Exploration},
  journal      = {CoRR},
  volume       = {abs/2211.13743},
  year         = {2022},
  url          = {https://doi.org/10.48550/arXiv.2211.13743},
  doi          = {10.48550/ARXIV.2211.13743},
  eprinttype    = {arXiv},
  eprint       = {2211.13743},
  timestamp    = {Tue, 29 Nov 2022 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2211-13743.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/corl/HuangAVBMNBTHRH21,
  author       = {Sandy H. Huang and
                  Abbas Abdolmaleki and
                  Giulia Vezzani and
                  Philemon Brakel and
                  Daniel J. Mankowitz and
                  Michael Neunert and
                  Steven Bohez and
                  Yuval Tassa and
                  Nicolas Heess and
                  Martin A. Riedmiller and
                  Raia Hadsell},
  editor       = {Aleksandra Faust and
                  David Hsu and
                  Gerhard Neumann},
  title        = {A Constrained Multi-Objective Reinforcement Learning Framework},
  booktitle    = {Conference on Robot Learning, 8-11 November 2021, London, {UK}},
  series       = {Proceedings of Machine Learning Research},
  volume       = {164},
  pages        = {883--893},
  publisher    = {{PMLR}},
  year         = {2021},
  url          = {https://proceedings.mlr.press/v164/huang22a.html},
  timestamp    = {Wed, 19 Jan 2022 17:10:33 +0100},
  biburl       = {https://dblp.org/rec/conf/corl/HuangAVBMNBTHRH21.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/corl/LeeDZLBSBAGKFCR21,
  author       = {Alex X. Lee and
                  Coline Manon Devin and
                  Yuxiang Zhou and
                  Thomas Lampe and
                  Konstantinos Bousmalis and
                  Jost Tobias Springenberg and
                  Arunkumar Byravan and
                  Abbas Abdolmaleki and
                  Nimrod Gileadi and
                  David Khosid and
                  Claudio Fantacci and
                  Jos{\'{e}} Enrique Chen and
                  Akhil Raju and
                  Rae Jeong and
                  Michael Neunert and
                  Antoine Laurens and
                  Stefano Saliceti and
                  Federico Casarini and
                  Martin A. Riedmiller and
                  Raia Hadsell and
                  Francesco Nori},
  editor       = {Aleksandra Faust and
                  David Hsu and
                  Gerhard Neumann},
  title        = {Beyond Pick-and-Place: Tackling Robotic Stacking of Diverse Shapes},
  booktitle    = {Conference on Robot Learning, 8-11 November 2021, London, {UK}},
  series       = {Proceedings of Machine Learning Research},
  volume       = {164},
  pages        = {1089--1131},
  publisher    = {{PMLR}},
  year         = {2021},
  url          = {https://proceedings.mlr.press/v164/lee22b.html},
  timestamp    = {Thu, 08 Aug 2024 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/corl/LeeDZLBSBAGKFCR21.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/icml/WulfmeierRHLAHN21,
  author       = {Markus Wulfmeier and
                  Dushyant Rao and
                  Roland Hafner and
                  Thomas Lampe and
                  Abbas Abdolmaleki and
                  Tim Hertweck and
                  Michael Neunert and
                  Dhruva Tirumala and
                  Noah Y. Siegel and
                  Nicolas Heess and
                  Martin A. Riedmiller},
  editor       = {Marina Meila and
                  Tong Zhang},
  title        = {Data-efficient Hindsight Off-policy Option Learning},
  booktitle    = {Proceedings of the 38th International Conference on Machine Learning,
                  {ICML} 2021, 18-24 July 2021, Virtual Event},
  series       = {Proceedings of Machine Learning Research},
  volume       = {139},
  pages        = {11340--11350},
  publisher    = {{PMLR}},
  year         = {2021},
  url          = {http://proceedings.mlr.press/v139/wulfmeier21a.html},
  timestamp    = {Wed, 25 Aug 2021 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/icml/WulfmeierRHLAHN21.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2101-09458,
  author       = {William F. Whitney and
                  Michael Bloesch and
                  Jost Tobias Springenberg and
                  Abbas Abdolmaleki and
                  Martin A. Riedmiller},
  title        = {Rethinking Exploration for Sample-Efficient Policy Learning},
  journal      = {CoRR},
  volume       = {abs/2101.09458},
  year         = {2021},
  url          = {https://arxiv.org/abs/2101.09458},
  eprinttype    = {arXiv},
  eprint       = {2101.09458},
  timestamp    = {Sat, 30 Jan 2021 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2101-09458.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2105-12196,
  author       = {Siqi Liu and
                  Guy Lever and
                  Zhe Wang and
                  Josh Merel and
                  S. M. Ali Eslami and
                  Daniel Hennes and
                  Wojciech M. Czarnecki and
                  Yuval Tassa and
                  Shayegan Omidshafiei and
                  Abbas Abdolmaleki and
                  Noah Y. Siegel and
                  Leonard Hasenclever and
                  Luke Marris and
                  Saran Tunyasuvunakool and
                  H. Francis Song and
                  Markus Wulfmeier and
                  Paul Muller and
                  Tuomas Haarnoja and
                  Brendan D. Tracey and
                  Karl Tuyls and
                  Thore Graepel and
                  Nicolas Heess},
  title        = {From Motor Control to Team Play in Simulated Humanoid Football},
  journal      = {CoRR},
  volume       = {abs/2105.12196},
  year         = {2021},
  url          = {https://arxiv.org/abs/2105.12196},
  eprinttype    = {arXiv},
  eprint       = {2105.12196},
  timestamp    = {Thu, 11 May 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2105-12196.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2106-08199,
  author       = {Abbas Abdolmaleki and
                  Sandy H. Huang and
                  Giulia Vezzani and
                  Bobak Shahriari and
                  Jost Tobias Springenberg and
                  Shruti Mishra and
                  Dhruva TB and
                  Arunkumar Byravan and
                  Konstantinos Bousmalis and
                  Andr{\'{a}}s Gy{\"{o}}rgy and
                  Csaba Szepesv{\'{a}}ri and
                  Raia Hadsell and
                  Nicolas Heess and
                  Martin A. Riedmiller},
  title        = {On Multi-objective Policy Optimization as a Tool for Reinforcement
                  Learning},
  journal      = {CoRR},
  volume       = {abs/2106.08199},
  year         = {2021},
  url          = {https://arxiv.org/abs/2106.08199},
  eprinttype    = {arXiv},
  eprint       = {2106.08199},
  timestamp    = {Sun, 02 Oct 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2106-08199.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2110-03363,
  author       = {Arunkumar Byravan and
                  Leonard Hasenclever and
                  Piotr Trochim and
                  Mehdi Mirza and
                  Alessandro Davide Ialongo and
                  Yuval Tassa and
                  Jost Tobias Springenberg and
                  Abbas Abdolmaleki and
                  Nicolas Heess and
                  Josh Merel and
                  Martin A. Riedmiller},
  title        = {Evaluating model-based planning and planner amortization for continuous
                  control},
  journal      = {CoRR},
  volume       = {abs/2110.03363},
  year         = {2021},
  url          = {https://arxiv.org/abs/2110.03363},
  eprinttype    = {arXiv},
  eprint       = {2110.03363},
  timestamp    = {Thu, 21 Oct 2021 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2110-03363.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2110-06192,
  author       = {Alex X. Lee and
                  Coline Devin and
                  Yuxiang Zhou and
                  Thomas Lampe and
                  Konstantinos Bousmalis and
                  Jost Tobias Springenberg and
                  Arunkumar Byravan and
                  Abbas Abdolmaleki and
                  Nimrod Gileadi and
                  David Khosid and
                  Claudio Fantacci and
                  Jos{\'{e}} Enrique Chen and
                  Akhil Raju and
                  Rae Jeong and
                  Michael Neunert and
                  Antoine Laurens and
                  Stefano Saliceti and
                  Federico Casarini and
                  Martin A. Riedmiller and
                  Raia Hadsell and
                  Francesco Nori},
  title        = {Beyond Pick-and-Place: Tackling Robotic Stacking of Diverse Shapes},
  journal      = {CoRR},
  volume       = {abs/2110.06192},
  year         = {2021},
  url          = {https://arxiv.org/abs/2110.06192},
  eprinttype    = {arXiv},
  eprint       = {2110.06192},
  timestamp    = {Thu, 08 Aug 2024 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2110-06192.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/iclr/MankowitzLJASSK20,
  author       = {Daniel J. Mankowitz and
                  Nir Levine and
                  Rae Jeong and
                  Abbas Abdolmaleki and
                  Jost Tobias Springenberg and
                  Yuanyuan Shi and
                  Jackie Kay and
                  Todd Hester and
                  Timothy A. Mann and
                  Martin A. Riedmiller},
  title        = {Robust Reinforcement Learning for Continuous Control with Model Misspecification},
  booktitle    = {8th International Conference on Learning Representations, {ICLR} 2020,
                  Addis Ababa, Ethiopia, April 26-30, 2020},
  publisher    = {OpenReview.net},
  year         = {2020},
  url          = {https://openreview.net/forum?id=HJgC60EtwB},
  timestamp    = {Thu, 07 May 2020 17:11:47 +0200},
  biburl       = {https://dblp.org/rec/conf/iclr/MankowitzLJASSK20.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/iclr/SiegelSBANLHHR20,
  author       = {Noah Y. Siegel and
                  Jost Tobias Springenberg and
                  Felix Berkenkamp and
                  Abbas Abdolmaleki and
                  Michael Neunert and
                  Thomas Lampe and
                  Roland Hafner and
                  Nicolas Heess and
                  Martin A. Riedmiller},
  title        = {Keep Doing What Worked: Behavior Modelling Priors for Offline Reinforcement
                  Learning},
  booktitle    = {8th International Conference on Learning Representations, {ICLR} 2020,
                  Addis Ababa, Ethiopia, April 26-30, 2020},
  publisher    = {OpenReview.net},
  year         = {2020},
  url          = {https://openreview.net/forum?id=rke7geHtwH},
  timestamp    = {Thu, 07 May 2020 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/iclr/SiegelSBANLHHR20.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/iclr/SongASCSRNALTHB20,
  author       = {H. Francis Song and
                  Abbas Abdolmaleki and
                  Jost Tobias Springenberg and
                  Aidan Clark and
                  Hubert Soyer and
                  Jack W. Rae and
                  Seb Noury and
                  Arun Ahuja and
                  Siqi Liu and
                  Dhruva Tirumala and
                  Nicolas Heess and
                  Dan Belov and
                  Martin A. Riedmiller and
                  Matthew M. Botvinick},
  title        = {{V-MPO:} On-Policy Maximum a Posteriori Policy Optimization for Discrete
                  and Continuous Control},
  booktitle    = {8th International Conference on Learning Representations, {ICLR} 2020,
                  Addis Ababa, Ethiopia, April 26-30, 2020},
  publisher    = {OpenReview.net},
  year         = {2020},
  url          = {https://openreview.net/forum?id=SylOlp4FvH},
  timestamp    = {Thu, 11 May 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/iclr/SongASCSRNALTHB20.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/icml/AbdolmalekiHNS20,
  author       = {Abbas Abdolmaleki and
                  Sandy H. Huang and
                  Leonard Hasenclever and
                  Michael Neunert and
                  H. Francis Song and
                  Martina Zambelli and
                  Murilo F. Martins and
                  Nicolas Heess and
                  Raia Hadsell and
                  Martin A. Riedmiller},
  title        = {A distributional view on multi-objective policy optimization},
  booktitle    = {Proceedings of the 37th International Conference on Machine Learning,
                  {ICML} 2020, 13-18 July 2020, Virtual Event},
  series       = {Proceedings of Machine Learning Research},
  volume       = {119},
  pages        = {11--22},
  publisher    = {{PMLR}},
  year         = {2020},
  url          = {http://proceedings.mlr.press/v119/abdolmaleki20a.html},
  timestamp    = {Tue, 15 Dec 2020 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/icml/AbdolmalekiHNS20.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/rss/WulfmeierAHSNSH20,
  author       = {Markus Wulfmeier and
                  Abbas Abdolmaleki and
                  Roland Hafner and
                  Jost Tobias Springenberg and
                  Michael Neunert and
                  Noah Y. Siegel and
                  Tim Hertweck and
                  Thomas Lampe and
                  Nicolas Heess and
                  Martin A. Riedmiller},
  editor       = {Marc Toussaint and
                  Antonio Bicchi and
                  Tucker Hermans},
  title        = {Compositional Transfer in Hierarchical Reinforcement Learning},
  booktitle    = {Robotics: Science and Systems XVI, Virtual Event / Corvalis, Oregon,
                  USA, July 12-16, 2020},
  year         = {2020},
  url          = {https://doi.org/10.15607/RSS.2020.XVI.054},
  doi          = {10.15607/RSS.2020.XVI.054},
  timestamp    = {Thu, 15 Jul 2021 18:53:52 +0200},
  biburl       = {https://dblp.org/rec/conf/rss/WulfmeierAHSNSH20.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2001-00449,
  author       = {Michael Neunert and
                  Abbas Abdolmaleki and
                  Markus Wulfmeier and
                  Thomas Lampe and
                  Jost Tobias Springenberg and
                  Roland Hafner and
                  Francesco Romano and
                  Jonas Buchli and
                  Nicolas Heess and
                  Martin A. Riedmiller},
  title        = {Continuous-Discrete Reinforcement Learning for Hybrid Control in Robotics},
  journal      = {CoRR},
  volume       = {abs/2001.00449},
  year         = {2020},
  url          = {http://arxiv.org/abs/2001.00449},
  eprinttype    = {arXiv},
  eprint       = {2001.00449},
  timestamp    = {Fri, 10 Jan 2020 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2001-00449.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2002-08396,
  author       = {Noah Y. Siegel and
                  Jost Tobias Springenberg and
                  Felix Berkenkamp and
                  Abbas Abdolmaleki and
                  Michael Neunert and
                  Thomas Lampe and
                  Roland Hafner and
                  Nicolas Heess and
                  Martin A. Riedmiller},
  title        = {Keep Doing What Worked: Behavioral Modelling Priors for Offline Reinforcement
                  Learning},
  journal      = {CoRR},
  volume       = {abs/2002.08396},
  year         = {2020},
  url          = {https://arxiv.org/abs/2002.08396},
  eprinttype    = {arXiv},
  eprint       = {2002.08396},
  timestamp    = {Mon, 02 Mar 2020 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2002-08396.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2005-07513,
  author       = {Abbas Abdolmaleki and
                  Sandy H. Huang and
                  Leonard Hasenclever and
                  Michael Neunert and
                  H. Francis Song and
                  Martina Zambelli and
                  Murilo F. Martins and
                  Nicolas Heess and
                  Raia Hadsell and
                  Martin A. Riedmiller},
  title        = {A Distributional View on Multi-Objective Policy Optimization},
  journal      = {CoRR},
  volume       = {abs/2005.07513},
  year         = {2020},
  url          = {https://arxiv.org/abs/2005.07513},
  eprinttype    = {arXiv},
  eprint       = {2005.07513},
  timestamp    = {Fri, 22 May 2020 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2005-07513.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2006-00979,
  author       = {Matt Hoffman and
                  Bobak Shahriari and
                  John Aslanides and
                  Gabriel Barth{-}Maron and
                  Feryal M. P. Behbahani and
                  Tamara Norman and
                  Abbas Abdolmaleki and
                  Albin Cassirer and
                  Fan Yang and
                  Kate Baumli and
                  Sarah Henderson and
                  Alexander Novikov and
                  Sergio G{\'{o}}mez Colmenarejo and
                  Serkan Cabi and
                  {\c{C}}aglar G{\"{u}}l{\c{c}}ehre and
                  Tom Le Paine and
                  Andrew Cowie and
                  Ziyu Wang and
                  Bilal Piot and
                  Nando de Freitas},
  title        = {Acme: {A} Research Framework for Distributed Reinforcement Learning},
  journal      = {CoRR},
  volume       = {abs/2006.00979},
  year         = {2020},
  url          = {https://arxiv.org/abs/2006.00979},
  eprinttype    = {arXiv},
  eprint       = {2006.00979},
  timestamp    = {Mon, 02 Jan 2023 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2006-00979.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2007-15588,
  author       = {Markus Wulfmeier and
                  Dushyant Rao and
                  Roland Hafner and
                  Thomas Lampe and
                  Abbas Abdolmaleki and
                  Tim Hertweck and
                  Michael Neunert and
                  Dhruva Tirumala and
                  Noah Y. Siegel and
                  Nicolas Heess and
                  Martin A. Riedmiller},
  title        = {Data-efficient Hindsight Off-policy Option Learning},
  journal      = {CoRR},
  volume       = {abs/2007.15588},
  year         = {2020},
  url          = {https://arxiv.org/abs/2007.15588},
  eprinttype    = {arXiv},
  eprint       = {2007.15588},
  timestamp    = {Mon, 03 Aug 2020 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2007-15588.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2010-05545,
  author       = {Jost Tobias Springenberg and
                  Nicolas Heess and
                  Daniel J. Mankowitz and
                  Josh Merel and
                  Arunkumar Byravan and
                  Abbas Abdolmaleki and
                  Jackie Kay and
                  Jonas Degrave and
                  Julian Schrittwieser and
                  Yuval Tassa and
                  Jonas Buchli and
                  Dan Belov and
                  Martin A. Riedmiller},
  title        = {Local Search for Policy Iteration in Continuous Control},
  journal      = {CoRR},
  volume       = {abs/2010.05545},
  year         = {2020},
  url          = {https://arxiv.org/abs/2010.05545},
  eprinttype    = {arXiv},
  eprint       = {2010.05545},
  timestamp    = {Tue, 20 Oct 2020 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2010-05545.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2010-15492,
  author       = {Giulia Vezzani and
                  Michael Neunert and
                  Markus Wulfmeier and
                  Rae Jeong and
                  Thomas Lampe and
                  Noah Y. Siegel and
                  Roland Hafner and
                  Abbas Abdolmaleki and
                  Martin A. Riedmiller and
                  Francesco Nori},
  title        = {"What, not how": Solving an under-actuated insertion task
                  from scratch},
  journal      = {CoRR},
  volume       = {abs/2010.15492},
  year         = {2020},
  url          = {https://arxiv.org/abs/2010.15492},
  eprinttype    = {arXiv},
  eprint       = {2010.15492},
  timestamp    = {Tue, 03 Nov 2020 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2010-15492.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/jirs/AbdolmalekiSLRN19,
  author       = {Abbas Abdolmaleki and
                  David Sim{\~{o}}es and
                  Nuno Lau and
                  Lu{\'{\i}}s Paulo Reis and
                  Gerhard Neumann},
  title        = {Contextual Direct Policy Search - With Regularized Covariance Matrix
                  Estimation},
  journal      = {J. Intell. Robotic Syst.},
  volume       = {96},
  number       = {2},
  pages        = {141--157},
  year         = {2019},
  url          = {https://doi.org/10.1007/s10846-018-0968-4},
  doi          = {10.1007/S10846-018-0968-4},
  timestamp    = {Thu, 21 Jan 2021 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/jirs/AbdolmalekiSLRN19.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/corl/ByravanSAHNLSHR19,
  author       = {Arunkumar Byravan and
                  Jost Tobias Springenberg and
                  Abbas Abdolmaleki and
                  Roland Hafner and
                  Michael Neunert and
                  Thomas Lampe and
                  Noah Y. Siegel and
                  Nicolas Heess and
                  Martin A. Riedmiller},
  editor       = {Leslie Pack Kaelbling and
                  Danica Kragic and
                  Komei Sugiura},
  title        = {Imagined Value Gradients: Model-Based Policy Optimization with Tranferable
                  Latent Dynamics Models},
  booktitle    = {3rd Annual Conference on Robot Learning, CoRL 2019, Osaka, Japan,
                  October 30 - November 1, 2019, Proceedings},
  series       = {Proceedings of Machine Learning Research},
  volume       = {100},
  pages        = {566--589},
  publisher    = {{PMLR}},
  year         = {2019},
  url          = {http://proceedings.mlr.press/v100/byravan20a.html},
  timestamp    = {Tue, 26 May 2020 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/corl/ByravanSAHNLSHR19.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/corl/NeunertAWLSHRBH19,
  author       = {Michael Neunert and
                  Abbas Abdolmaleki and
                  Markus Wulfmeier and
                  Thomas Lampe and
                  Jost Tobias Springenberg and
                  Roland Hafner and
                  Francesco Romano and
                  Jonas Buchli and
                  Nicolas Heess and
                  Martin A. Riedmiller},
  editor       = {Leslie Pack Kaelbling and
                  Danica Kragic and
                  Komei Sugiura},
  title        = {Continuous-Discrete Reinforcement Learning for Hybrid Control in Robotics},
  booktitle    = {3rd Annual Conference on Robot Learning, CoRL 2019, Osaka, Japan,
                  October 30 - November 1, 2019, Proceedings},
  series       = {Proceedings of Machine Learning Research},
  volume       = {100},
  pages        = {735--751},
  publisher    = {{PMLR}},
  year         = {2019},
  url          = {http://proceedings.mlr.press/v100/neunert20a.html},
  timestamp    = {Mon, 25 May 2020 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/corl/NeunertAWLSHRBH19.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/rss/SchwabSMNLAHHNR19,
  author       = {Devin Schwab and
                  Jost Tobias Springenberg and
                  Murilo Fernandes Martins and
                  Michael Neunert and
                  Thomas Lampe and
                  Abbas Abdolmaleki and
                  Tim Hertweck and
                  Roland Hafner and
                  Francesco Nori and
                  Martin A. Riedmiller},
  editor       = {Antonio Bicchi and
                  Hadas Kress{-}Gazit and
                  Seth Hutchinson},
  title        = {Simultaneously Learning Vision and Feature-Based Control Policies
                  for Real-World Ball-In-A-Cup},
  booktitle    = {Robotics: Science and Systems XV, University of Freiburg, Freiburg
                  im Breisgau, Germany, June 22-26, 2019},
  year         = {2019},
  url          = {https://doi.org/10.15607/RSS.2019.XV.027},
  doi          = {10.15607/RSS.2019.XV.027},
  timestamp    = {Thu, 01 Apr 2021 15:25:13 +0200},
  biburl       = {https://dblp.org/rec/conf/rss/SchwabSMNLAHHNR19.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-1902-04623,
  author       = {Steven Bohez and
                  Abbas Abdolmaleki and
                  Michael Neunert and
                  Jonas Buchli and
                  Nicolas Heess and
                  Raia Hadsell},
  title        = {Value constrained model-free continuous control},
  journal      = {CoRR},
  volume       = {abs/1902.04623},
  year         = {2019},
  url          = {http://arxiv.org/abs/1902.04623},
  eprinttype    = {arXiv},
  eprint       = {1902.04623},
  timestamp    = {Tue, 21 May 2019 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-1902-04623.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-1902-04706,
  author       = {Devin Schwab and
                  Jost Tobias Springenberg and
                  Murilo F. Martins and
                  Thomas Lampe and
                  Michael Neunert and
                  Abbas Abdolmaleki and
                  Tim Hertweck and
                  Roland Hafner and
                  Francesco Nori and
                  Martin A. Riedmiller},
  title        = {Simultaneously Learning Vision and Feature-based Control Policies
                  for Real-world Ball-in-a-Cup},
  journal      = {CoRR},
  volume       = {abs/1902.04706},
  year         = {2019},
  url          = {http://arxiv.org/abs/1902.04706},
  eprinttype    = {arXiv},
  eprint       = {1902.04706},
  timestamp    = {Tue, 21 May 2019 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-1902-04706.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-1906-07516,
  author       = {Daniel J. Mankowitz and
                  Nir Levine and
                  Rae Jeong and
                  Abbas Abdolmaleki and
                  Jost Tobias Springenberg and
                  Timothy A. Mann and
                  Todd Hester and
                  Martin A. Riedmiller},
  title        = {Robust Reinforcement Learning for Continuous Control with Model Misspecification},
  journal      = {CoRR},
  volume       = {abs/1906.07516},
  year         = {2019},
  url          = {http://arxiv.org/abs/1906.07516},
  eprinttype    = {arXiv},
  eprint       = {1906.07516},
  timestamp    = {Mon, 24 Jun 2019 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-1906-07516.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-1906-11228,
  author       = {Markus Wulfmeier and
                  Abbas Abdolmaleki and
                  Roland Hafner and
                  Jost Tobias Springenberg and
                  Michael Neunert and
                  Tim Hertweck and
                  Thomas Lampe and
                  Noah Y. Siegel and
                  Nicolas Heess and
                  Martin A. Riedmiller},
  title        = {Regularized Hierarchical Policies for Compositional Transfer in Robotics},
  journal      = {CoRR},
  volume       = {abs/1906.11228},
  year         = {2019},
  url          = {http://arxiv.org/abs/1906.11228},
  eprinttype    = {arXiv},
  eprint       = {1906.11228},
  timestamp    = {Mon, 02 Mar 2020 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/abs-1906-11228.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-1909-12238,
  author       = {H. Francis Song and
                  Abbas Abdolmaleki and
                  Jost Tobias Springenberg and
                  Aidan Clark and
                  Hubert Soyer and
                  Jack W. Rae and
                  Seb Noury and
                  Arun Ahuja and
                  Siqi Liu and
                  Dhruva Tirumala and
                  Nicolas Heess and
                  Dan Belov and
                  Martin A. Riedmiller and
                  Matthew M. Botvinick},
  title        = {{V-MPO:} On-Policy Maximum a Posteriori Policy Optimization for Discrete
                  and Continuous Control},
  journal      = {CoRR},
  volume       = {abs/1909.12238},
  year         = {2019},
  url          = {http://arxiv.org/abs/1909.12238},
  eprinttype    = {arXiv},
  eprint       = {1909.12238},
  timestamp    = {Thu, 11 May 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-1909-12238.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-1910-00528,
  author       = {Shruti Mishra and
                  Abbas Abdolmaleki and
                  Arthur Guez and
                  Piotr Trochim and
                  Doina Precup},
  title        = {Augmenting learning using symmetry in a biologically-inspired domain},
  journal      = {CoRR},
  volume       = {abs/1910.00528},
  year         = {2019},
  url          = {http://arxiv.org/abs/1910.00528},
  eprinttype    = {arXiv},
  eprint       = {1910.00528},
  timestamp    = {Sun, 02 Oct 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-1910-00528.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-1910-04142,
  author       = {Arunkumar Byravan and
                  Jost Tobias Springenberg and
                  Abbas Abdolmaleki and
                  Roland Hafner and
                  Michael Neunert and
                  Thomas Lampe and
                  Noah Y. Siegel and
                  Nicolas Heess and
                  Martin A. Riedmiller},
  title        = {Imagined Value Gradients: Model-Based Policy Optimization with Transferable
                  Latent Dynamics Models},
  journal      = {CoRR},
  volume       = {abs/1910.04142},
  year         = {2019},
  url          = {http://arxiv.org/abs/1910.04142},
  eprinttype    = {arXiv},
  eprint       = {1910.04142},
  timestamp    = {Mon, 02 Mar 2020 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/abs-1910-04142.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-1910-09471,
  author       = {Rae Jeong and
                  Jackie Kay and
                  Francesco Romano and
                  Thomas Lampe and
                  Thomas Roth{\"{o}}rl and
                  Abbas Abdolmaleki and
                  Tom Erez and
                  Yuval Tassa and
                  Francesco Nori},
  title        = {Modelling Generalized Forces with Reinforcement Learning for Sim-to-Real
                  Transfer},
  journal      = {CoRR},
  volume       = {abs/1910.09471},
  year         = {2019},
  url          = {http://arxiv.org/abs/1910.09471},
  eprinttype    = {arXiv},
  eprint       = {1910.09471},
  timestamp    = {Tue, 22 Oct 2019 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-1910-09471.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-1911-01831,
  author       = {Jonas Degrave and
                  Abbas Abdolmaleki and
                  Jost Tobias Springenberg and
                  Nicolas Heess and
                  Martin A. Riedmiller},
  title        = {Quinoa: a Q-function You Infer Normalized Over Actions},
  journal      = {CoRR},
  volume       = {abs/1911.01831},
  year         = {2019},
  url          = {http://arxiv.org/abs/1911.01831},
  eprinttype    = {arXiv},
  eprint       = {1911.01831},
  timestamp    = {Mon, 11 Nov 2019 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/abs-1911-01831.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@phdthesis{DBLP:phd/pt/Abdolmaleki18,
  author       = {Abbas Abdolmaleki},
  title        = {Information theoretic stochastic search},
  school       = {University of Minho, Portugal},
  year         = {2018},
  url          = {https://hdl.handle.net/1822/59005},
  timestamp    = {Thu, 08 Jun 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/phd/pt/Abdolmaleki18.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/jmlr/AkrourAAPN18,
  author       = {Riad Akrour and
                  Abbas Abdolmaleki and
                  Hany Abdulsamad and
                  Jan Peters and
                  Gerhard Neumann},
  title        = {Model-Free Trajectory-based Policy Optimization with Monotonic Improvement},
  journal      = {J. Mach. Learn. Res.},
  volume       = {19},
  pages        = {14:1--14:25},
  year         = {2018},
  url          = {https://jmlr.org/papers/v19/17-329.html},
  timestamp    = {Wed, 11 Sep 2024 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/jmlr/AkrourAAPN18.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/iclr/AbdolmalekiSTMH18,
  author       = {Abbas Abdolmaleki and
                  Jost Tobias Springenberg and
                  Yuval Tassa and
                  R{\'{e}}mi Munos and
                  Nicolas Heess and
                  Martin A. Riedmiller},
  title        = {Maximum a Posteriori Policy Optimisation},
  booktitle    = {6th International Conference on Learning Representations, {ICLR} 2018,
                  Vancouver, BC, Canada, April 30 - May 3, 2018, Conference Track Proceedings},
  publisher    = {OpenReview.net},
  year         = {2018},
  url          = {https://openreview.net/forum?id=S1ANxQW0b},
  timestamp    = {Thu, 25 Jul 2019 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/iclr/AbdolmalekiSTMH18.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/iclr/TangkarattAS18,
  author       = {Voot Tangkaratt and
                  Abbas Abdolmaleki and
                  Masashi Sugiyama},
  title        = {Guide Actor-Critic for Continuous Control},
  booktitle    = {6th International Conference on Learning Representations, {ICLR} 2018,
                  Vancouver, BC, Canada, April 30 - May 3, 2018, Conference Track Proceedings},
  publisher    = {OpenReview.net},
  year         = {2018},
  url          = {https://openreview.net/forum?id=BJk59JZ0b},
  timestamp    = {Thu, 25 Jul 2019 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/iclr/TangkarattAS18.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/icra/BarbarosHAM18,
  author       = {Victor Barbaros and
                  Herke van Hoof and
                  Abbas Abdolmaleki and
                  David Meger},
  title        = {Eager and Memory-Based Non-Parametric Stochastic Search Methods for
                  Learning Control},
  booktitle    = {2018 {IEEE} International Conference on Robotics and Automation, {ICRA}
                  2018, Brisbane, Australia, May 21-25, 2018},
  pages        = {1--9},
  publisher    = {{IEEE}},
  year         = {2018},
  url          = {https://doi.org/10.1109/ICRA.2018.8460633},
  doi          = {10.1109/ICRA.2018.8460633},
  timestamp    = {Wed, 16 Oct 2019 14:14:51 +0200},
  biburl       = {https://dblp.org/rec/conf/icra/BarbarosHAM18.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-1801-00690,
  author       = {Yuval Tassa and
                  Yotam Doron and
                  Alistair Muldal and
                  Tom Erez and
                  Yazhe Li and
                  Diego de Las Casas and
                  David Budden and
                  Abbas Abdolmaleki and
                  Josh Merel and
                  Andrew Lefrancq and
                  Timothy P. Lillicrap and
                  Martin A. Riedmiller},
  title        = {DeepMind Control Suite},
  journal      = {CoRR},
  volume       = {abs/1801.00690},
  year         = {2018},
  url          = {http://arxiv.org/abs/1801.00690},
  eprinttype    = {arXiv},
  eprint       = {1801.00690},
  timestamp    = {Mon, 22 Jul 2019 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-1801-00690.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-1806-06920,
  author       = {Abbas Abdolmaleki and
                  Jost Tobias Springenberg and
                  Yuval Tassa and
                  R{\'{e}}mi Munos and
                  Nicolas Heess and
                  Martin A. Riedmiller},
  title        = {Maximum a Posteriori Policy Optimisation},
  journal      = {CoRR},
  volume       = {abs/1806.06920},
  year         = {2018},
  url          = {http://arxiv.org/abs/1806.06920},
  eprinttype    = {arXiv},
  eprint       = {1806.06920},
  timestamp    = {Mon, 13 Aug 2018 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-1806-06920.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-1812-02256,
  author       = {Abbas Abdolmaleki and
                  Jost Tobias Springenberg and
                  Jonas Degrave and
                  Steven Bohez and
                  Yuval Tassa and
                  Dan Belov and
                  Nicolas Heess and
                  Martin A. Riedmiller},
  title        = {Relative Entropy Regularized Policy Iteration},
  journal      = {CoRR},
  volume       = {abs/1812.02256},
  year         = {2018},
  url          = {http://arxiv.org/abs/1812.02256},
  eprinttype    = {arXiv},
  eprint       = {1812.02256},
  timestamp    = {Tue, 01 Jan 2019 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/abs-1812-02256.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/aaai/AbdolmalekiSLRP17,
  author       = {Abbas Abdolmaleki and
                  David Sim{\~{o}}es and
                  Nuno Lau and
                  Lu{\'{\i}}s Paulo Reis and
                  Bob Price and
                  Gerhard Neumann},
  title        = {Stochastic Search In Changing Situations},
  booktitle    = {The Workshops of the The Thirty-First {AAAI} Conference on Artificial
                  Intelligence, Saturday, February 4-9, 2017, San Francisco, California,
                  {USA}},
  series       = {{AAAI} Technical Report},
  volume       = {{WS-17}},
  publisher    = {{AAAI} Press},
  year         = {2017},
  url          = {http://aaai.org/ocs/index.php/WS/AAAIW17/paper/view/15204},
  timestamp    = {Mon, 04 Sep 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/aaai/AbdolmalekiSLRP17.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/gecco/AbdolmalekiPLRN17,
  author       = {Abbas Abdolmaleki and
                  Bob Price and
                  Nuno Lau and
                  Lu{\'{\i}}s Paulo Reis and
                  Gerhard Neumann},
  editor       = {Peter A. N. Bosman},
  title        = {Deriving and improving {CMA-ES} with information geometric trust regions},
  booktitle    = {Proceedings of the Genetic and Evolutionary Computation Conference,
                  {GECCO} 2017, Berlin, Germany, July 15-19, 2017},
  pages        = {657--664},
  publisher    = {{ACM}},
  year         = {2017},
  url          = {https://doi.org/10.1145/3071178.3071252},
  doi          = {10.1145/3071178.3071252},
  timestamp    = {Sun, 02 Jun 2019 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/gecco/AbdolmalekiPLRN17.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/ijcai/AbdolmalekiPLRN17,
  author       = {Abbas Abdolmaleki and
                  Bob Price and
                  Nuno Lau and
                  Lu{\'{\i}}s Paulo Reis and
                  Gerhard Neumann},
  editor       = {Carles Sierra},
  title        = {Contextual Covariance Matrix Adaptation Evolutionary Strategies},
  booktitle    = {Proceedings of the Twenty-Sixth International Joint Conference on
                  Artificial Intelligence, {IJCAI} 2017, Melbourne, Australia, August
                  19-25, 2017},
  pages        = {1378--1385},
  publisher    = {ijcai.org},
  year         = {2017},
  url          = {https://doi.org/10.24963/ijcai.2017/191},
  doi          = {10.24963/IJCAI.2017/191},
  timestamp    = {Tue, 20 Aug 2019 16:16:54 +0200},
  biburl       = {https://dblp.org/rec/conf/ijcai/AbdolmalekiPLRN17.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/jirs/AbdolmalekiLR0N16,
  author       = {Abbas Abdolmaleki and
                  Nuno Lau and
                  Lu{\'{\i}}s Paulo Reis and
                  Jan Peters and
                  Gerhard Neumann},
  title        = {Contextual Policy Search for Linear and Nonlinear Generalization of
                  a Humanoid Walking Controller},
  journal      = {J. Intell. Robotic Syst.},
  volume       = {83},
  number       = {3-4},
  pages        = {393--408},
  year         = {2016},
  url          = {https://doi.org/10.1007/s10846-016-0347-y},
  doi          = {10.1007/S10846-016-0347-Y},
  timestamp    = {Tue, 07 Apr 2020 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/jirs/AbdolmalekiLR0N16.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/gecco/AbdolmalekiLRN16,
  author       = {Abbas Abdolmaleki and
                  Nuno Lau and
                  Lu{\'{\i}}s Paulo Reis and
                  Gerhard Neumann},
  editor       = {Tobias Friedrich and
                  Frank Neumann and
                  Andrew M. Sutton},
  title        = {Contextual Stochastic Search},
  booktitle    = {Genetic and Evolutionary Computation Conference, {GECCO} 2016, Denver,
                  CO, USA, July 20-24, 2016, Companion Material Proceedings},
  pages        = {29--30},
  publisher    = {{ACM}},
  year         = {2016},
  url          = {https://doi.org/10.1145/2908961.2909012},
  doi          = {10.1145/2908961.2909012},
  timestamp    = {Sun, 02 Jun 2019 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/gecco/AbdolmalekiLRN16.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/gecco/AbdolmalekiLLR016,
  author       = {Abbas Abdolmaleki and
                  Rudolf Lioutikov and
                  Nuno Lau and
                  Lu{\'{\i}}s Paulo Reis and
                  Jan Peters and
                  Gerhard Neumann},
  editor       = {Tobias Friedrich and
                  Frank Neumann and
                  Andrew M. Sutton},
  title        = {Model-Based Relative Entropy Stochastic Search},
  booktitle    = {Genetic and Evolutionary Computation Conference, {GECCO} 2016, Denver,
                  CO, USA, July 20-24, 2016, Companion Material Proceedings},
  pages        = {153--154},
  publisher    = {{ACM}},
  year         = {2016},
  url          = {https://doi.org/10.1145/2908961.2930952},
  doi          = {10.1145/2908961.2930952},
  timestamp    = {Mon, 26 Jun 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/gecco/AbdolmalekiLLR016.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/icarsc/AbdolmalekiSLRN16,
  author       = {Abbas Abdolmaleki and
                  David Sim{\~{o}}es and
                  Nuno Lau and
                  Lu{\'{\i}}s Paulo Reis and
                  Gerhard Neumann},
  editor       = {Bernardo Cunha and
                  Jos{\'{e}} Lima and
                  Manuel F. Silva and
                  Paulo Leit{\~{a}}o},
  title        = {Contextual Relative Entropy Policy Search with Covariance Matrix Adaptation},
  booktitle    = {2016 International Conference on Autonomous Robot Systems and Competitions,
                  {ICARSC} 2016, Bragan{\c{c}}a, Portugal, May 4-6, 2016},
  pages        = {94--99},
  publisher    = {{IEEE}},
  year         = {2016},
  url          = {https://doi.org/10.1109/ICARSC.2016.31},
  doi          = {10.1109/ICARSC.2016.31},
  timestamp    = {Tue, 05 Jan 2021 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/icarsc/AbdolmalekiSLRN16.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/icml/AkrourNAA16,
  author       = {Riad Akrour and
                  Gerhard Neumann and
                  Hany Abdulsamad and
                  Abbas Abdolmaleki},
  editor       = {Maria{-}Florina Balcan and
                  Kilian Q. Weinberger},
  title        = {Model-Free Trajectory Optimization for Reinforcement Learning},
  booktitle    = {Proceedings of the 33nd International Conference on Machine Learning,
                  {ICML} 2016, New York City, NY, USA, June 19-24, 2016},
  series       = {{JMLR} Workshop and Conference Proceedings},
  volume       = {48},
  pages        = {2961--2970},
  publisher    = {JMLR.org},
  year         = {2016},
  url          = {http://proceedings.mlr.press/v48/akrour16.html},
  timestamp    = {Wed, 29 May 2019 08:41:46 +0200},
  biburl       = {https://dblp.org/rec/conf/icml/AkrourNAA16.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/iros/AbdolmalekiLRN16,
  author       = {Abbas Abdolmaleki and
                  Nuno Lau and
                  Lu{\'{\i}}s Paulo Reis and
                  Gerhard Neumann},
  title        = {Non-parametric contextual stochastic search},
  booktitle    = {2016 {IEEE/RSJ} International Conference on Intelligent Robots and
                  Systems, {IROS} 2016, Daejeon, South Korea, October 9-14, 2016},
  pages        = {2643--2648},
  publisher    = {{IEEE}},
  year         = {2016},
  url          = {https://doi.org/10.1109/IROS.2016.7759411},
  doi          = {10.1109/IROS.2016.7759411},
  timestamp    = {Wed, 16 Oct 2019 14:14:51 +0200},
  biburl       = {https://dblp.org/rec/conf/iros/AbdolmalekiLRN16.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/robocup/AbdolmalekiSLRN16,
  author       = {Abbas Abdolmaleki and
                  David Sim{\~{o}}es and
                  Nuno Lau and
                  Lu{\'{\i}}s Paulo Reis and
                  Gerhard Neumann},
  editor       = {Sven Behnke and
                  Raymond Sheh and
                  Sanem Sariel and
                  Daniel D. Lee},
  title        = {Learning a Humanoid Kick with Controlled Distance},
  booktitle    = {RoboCup 2016: Robot World Cup {XX} [Leipzig, Germany, June 30 - July
                  4, 2016]},
  series       = {Lecture Notes in Computer Science},
  volume       = {9776},
  pages        = {45--57},
  publisher    = {Springer},
  year         = {2016},
  url          = {https://doi.org/10.1007/978-3-319-68792-6\_4},
  doi          = {10.1007/978-3-319-68792-6\_4},
  timestamp    = {Tue, 05 Jan 2021 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/robocup/AbdolmalekiSLRN16.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/AkrourAAN16,
  author       = {Riad Akrour and
                  Abbas Abdolmaleki and
                  Hany Abdulsamad and
                  Gerhard Neumann},
  title        = {Model-free Trajectory Optimization for Reinforcement Learning},
  journal      = {CoRR},
  volume       = {abs/1606.09197},
  year         = {2016},
  url          = {http://arxiv.org/abs/1606.09197},
  eprinttype    = {arXiv},
  eprint       = {1606.09197},
  timestamp    = {Mon, 13 Aug 2018 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/AkrourAAN16.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/humanoids/AbdolmalekiLRN15,
  author       = {Abbas Abdolmaleki and
                  Nuno Lau and
                  Lu{\'{\i}}s Paulo Reis and
                  Gerhard Neumann},
  title        = {Regularized covariance estimation for weighted maximum likelihood
                  policy search methods},
  booktitle    = {15th {IEEE-RAS} International Conference on Humanoid Robots, Humanoids
                  2015, Seoul, South Korea, November 3-5, 2015},
  pages        = {154--159},
  publisher    = {{IEEE}},
  year         = {2015},
  url          = {https://doi.org/10.1109/HUMANOIDS.2015.7363529},
  doi          = {10.1109/HUMANOIDS.2015.7363529},
  timestamp    = {Wed, 16 Oct 2019 14:14:50 +0200},
  biburl       = {https://dblp.org/rec/conf/humanoids/AbdolmalekiLRN15.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/icarsc/AbdolmalekiLR0N15,
  author       = {Abbas Abdolmaleki and
                  Nuno Lau and
                  Lu{\'{\i}}s Paulo Reis and
                  Jan Peters and
                  Gerhard Neumann},
  title        = {Contextual Policy Search for Generalizing a Parameterized Biped Walking
                  Controller},
  booktitle    = {2015 {IEEE} International Conference on Autonomous Robot Systems and
                  Competitions, {ICARSC} 2015, Vila Real, Portugal, April 8-10, 2015},
  pages        = {17--22},
  publisher    = {{IEEE}},
  year         = {2015},
  url          = {https://doi.org/10.1109/ICARSC.2015.43},
  doi          = {10.1109/ICARSC.2015.43},
  timestamp    = {Wed, 16 Oct 2019 14:14:57 +0200},
  biburl       = {https://dblp.org/rec/conf/icarsc/AbdolmalekiLR0N15.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/nips/AbdolmalekiLPLR15,
  author       = {Abbas Abdolmaleki and
                  Rudolf Lioutikov and
                  Jan Peters and
                  Nuno Lau and
                  Lu{\'{\i}}s Paulo Reis and
                  Gerhard Neumann},
  editor       = {Corinna Cortes and
                  Neil D. Lawrence and
                  Daniel D. Lee and
                  Masashi Sugiyama and
                  Roman Garnett},
  title        = {Model-Based Relative Entropy Stochastic Search},
  booktitle    = {Advances in Neural Information Processing Systems 28: Annual Conference
                  on Neural Information Processing Systems 2015, December 7-12, 2015,
                  Montreal, Quebec, Canada},
  pages        = {3537--3545},
  year         = {2015},
  url          = {https://proceedings.neurips.cc/paper/2015/hash/36ac8e558ac7690b6f44e2cb5ef93322-Abstract.html},
  timestamp    = {Mon, 16 May 2022 15:41:51 +0200},
  biburl       = {https://dblp.org/rec/conf/nips/AbdolmalekiLPLR15.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/iberamia/AbdolmalekiSRLPN14,
  author       = {Abbas Abdolmaleki and
                  Nima Shafii and
                  Lu{\'{\i}}s Paulo Reis and
                  Nuno Lau and
                  Jan Peters and
                  Gerhard Neumann},
  editor       = {Ana L. C. Bazzan and
                  Karim Pichara},
  title        = {Omnidirectional Walking with a Compliant Inverted Pendulum Model},
  booktitle    = {Advances in Artificial Intelligence - {IBERAMIA} 2014 - 14th Ibero-American
                  Conference on AI, Santiago de Chile, Chile, November 24-27, 2014,
                  Proceedings},
  series       = {Lecture Notes in Computer Science},
  volume       = {8864},
  pages        = {481--493},
  publisher    = {Springer},
  year         = {2014},
  url          = {https://doi.org/10.1007/978-3-319-12027-0\_39},
  doi          = {10.1007/978-3-319-12027-0\_39},
  timestamp    = {Sun, 02 Jun 2019 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/iberamia/AbdolmalekiSRLPN14.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/epia/ShafiiAFLR13,
  author       = {Nima Shafii and
                  Abbas Abdolmaleki and
                  Rui Ferreira and
                  Nuno Lau and
                  Lu{\'{\i}}s Paulo Reis},
  editor       = {Lu{\'{\i}}s Correia and
                  Lu{\'{\i}}s Paulo Reis and
                  Jos{\'{e}} Cascalho},
  title        = {Omnidirectional Walking and Active Balance for Soccer Humanoid Robot},
  booktitle    = {Progress in Artificial Intelligence - 16th Portuguese Conference on
                  Artificial Intelligence, {EPIA} 2013, Angra do Hero{\'{\i}}smo,
                  Azores, Portugal, September 9-12, 2013. Proceedings},
  series       = {Lecture Notes in Computer Science},
  volume       = {8154},
  pages        = {283--294},
  publisher    = {Springer},
  year         = {2013},
  url          = {https://doi.org/10.1007/978-3-642-40669-0\_25},
  doi          = {10.1007/978-3-642-40669-0\_25},
  timestamp    = {Thu, 20 Jan 2022 14:32:12 +0100},
  biburl       = {https://dblp.org/rec/conf/epia/ShafiiAFLR13.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/jtaer/AbediNA12,
  author       = {Leila Abedi and
                  Mohammad Ali Nematbakhsh and
                  Abbas Abdolmaleki},
  title        = {A Model for Context Aware Mobile Payment},
  journal      = {J. Theor. Appl. Electron. Commer. Res.},
  volume       = {7},
  number       = {3},
  pages        = {1--10},
  year         = {2012},
  url          = {https://doi.org/10.4067/s0718-18762012000300002},
  doi          = {10.4067/S0718-18762012000300002},
  timestamp    = {Fri, 10 Jun 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/jtaer/AbediNA12.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/robocup/AbdolmalekiMLR12,
  author       = {Abbas Abdolmaleki and
                  Mostafa Movahedi and
                  Nuno Lau and
                  Lu{\'{\i}}s Paulo Reis},
  editor       = {Xiaoping Chen and
                  Peter Stone and
                  Luis Enrique Sucar and
                  Tijn van der Zant},
  title        = {A Distributed Cooperative Reinforcement Learning Method for Decision
                  Making in Fire Brigade Teams},
  booktitle    = {RoboCup 2012: Robot Soccer World Cup {XVI} [papers from the 16th Annual
                  RoboCup International Symposium, Mexico City, Mexico, June 18-24,
                  2012]},
  series       = {Lecture Notes in Computer Science},
  volume       = {7500},
  pages        = {237--248},
  publisher    = {Springer},
  year         = {2012},
  url          = {https://doi.org/10.1007/978-3-642-39250-4\_22},
  doi          = {10.1007/978-3-642-39250-4\_22},
  timestamp    = {Tue, 20 Aug 2019 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/robocup/AbdolmalekiMLR12.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/epia/AbdolmalekiMSLR11,
  author       = {Abbas Abdolmaleki and
                  Mostafa Movahedi and
                  Sajjad Salehi and
                  Nuno Lau and
                  Lu{\'{\i}}s Paulo Reis},
  editor       = {Luis Antunes and
                  Helena Sofia Pinto},
  title        = {A Reinforcement Learning Based Method for Optimizing the Process of
                  Decision Making in Fire Brigade Agents},
  booktitle    = {Progress in Artificial Intelligence, 15th Portuguese Conference on
                  Artificial Intelligence, {EPIA} 2011, Lisbon, Portugal, October 10-13,
                  2011. Proceedings},
  series       = {Lecture Notes in Computer Science},
  volume       = {7026},
  pages        = {340--351},
  publisher    = {Springer},
  year         = {2011},
  url          = {https://doi.org/10.1007/978-3-642-24769-9\_25},
  doi          = {10.1007/978-3-642-24769-9\_25},
  timestamp    = {Sun, 02 Oct 2022 16:00:30 +0200},
  biburl       = {https://dblp.org/rec/conf/epia/AbdolmalekiMSLR11.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}