default search action
BibTeX records: Abbas Abdolmaleki
@article{DBLP:journals/tmlr/BousmalisVRDLVD24, author = {Konstantinos Bousmalis and Giulia Vezzani and Dushyant Rao and Coline Manon Devin and Alex X. Lee and Maria Bauz{\'{a}} Villalonga and Todor Davchev and Yuxiang Zhou and Agrim Gupta and Akhil Raju and Antoine Laurens and Claudio Fantacci and Valentin Dalibard and Martina Zambelli and Murilo Fernandes Martins and Rugile Pevceviciute and Michiel Blokzijl and Misha Denil and Nathan Batchelor and Thomas Lampe and Emilio Parisotto and Konrad Zolna and Scott E. Reed and Sergio G{\'{o}}mez Colmenarejo and Jon Scholz and Abbas Abdolmaleki and Oliver Groth and Jean{-}Baptiste Regli and Oleg Sushkov and Thomas Roth{\"{o}}rl and Jos{\'{e}} Enrique Chen and Yusuf Aytar and Dave Barker and Joy Ortiz and Martin A. Riedmiller and Jost Tobias Springenberg and Raia Hadsell and Francesco Nori and Nicolas Heess}, title = {RoboCat: {A} Self-Improving Generalist Agent for Robotic Manipulation}, journal = {Trans. Mach. Learn. Res.}, volume = {2024}, year = {2024}, url = {https://openreview.net/forum?id=vsCpILiWHu}, timestamp = {Thu, 08 Aug 2024 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/tmlr/BousmalisVRDLVD24.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/icml/SpringenbergA0G24, author = {Jost Tobias Springenberg and Abbas Abdolmaleki and Jingwei Zhang and Oliver Groth and Michael Bloesch and Thomas Lampe and Philemon Brakel and Sarah Bechtle and Steven Kapturowski and Roland Hafner and Nicolas Heess and Martin A. Riedmiller}, title = {Offline Actor-Critic Reinforcement Learning Scales to Large Models}, booktitle = {Forty-first International Conference on Machine Learning, {ICML} 2024, Vienna, Austria, July 21-27, 2024}, publisher = {OpenReview.net}, year = {2024}, url = {https://openreview.net/forum?id=tl2qmO5kpD}, timestamp = {Mon, 02 Sep 2024 16:45:29 +0200}, biburl = {https://dblp.org/rec/conf/icml/SpringenbergA0G24.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/icra/LampeABHSBGHHNW24, author = {Thomas Lampe and Abbas Abdolmaleki and Sarah Bechtle and Sandy H. Huang and Jost Tobias Springenberg and Michael Bloesch and Oliver Groth and Roland Hafner and Tim Hertweck and Michael Neunert and Markus Wulfmeier and Jingwei Zhang and Francesco Nori and Nicolas Heess and Martin A. Riedmiller}, title = {Mastering Stacking of Diverse Shapes with Large-Scale Iterative Reinforcement Learning on Real Robots}, booktitle = {{IEEE} International Conference on Robotics and Automation, {ICRA} 2024, Yokohama, Japan, May 13-17, 2024}, pages = {7772--7779}, publisher = {{IEEE}}, year = {2024}, url = {https://doi.org/10.1109/ICRA57147.2024.10610297}, doi = {10.1109/ICRA57147.2024.10610297}, timestamp = {Mon, 19 Aug 2024 15:58:53 +0200}, biburl = {https://dblp.org/rec/conf/icra/LampeABHSBGHHNW24.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/l4dc/BhardwajLNRABWR24, author = {Mohak Bhardwaj and Thomas Lampe and Michael Neunert and Francesco Romano and Abbas Abdolmaleki and Arunkumar Byravan and Markus Wulfmeier and Martin A. Riedmiller and Jonas Buchli}, editor = {Alessandro Abate and Mark Cannon and Kostas Margellos and Antonis Papachristodoulou}, title = {Real-world fluid directed rigid body control via deep reinforcement learning}, booktitle = {6th Annual Learning for Dynamics {\&} Control Conference, 15-17 July 2024, University of Oxford, Oxford, {UK}}, series = {Proceedings of Machine Learning Research}, volume = {242}, pages = {414--427}, publisher = {{PMLR}}, year = {2024}, url = {https://proceedings.mlr.press/v242/bhardwaj24a.html}, timestamp = {Fri, 05 Jul 2024 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/l4dc/BhardwajLNRABWR24.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-2402-05546, author = {Jost Tobias Springenberg and Abbas Abdolmaleki and Jingwei Zhang and Oliver Groth and Michael Bloesch and Thomas Lampe and Philemon Brakel and Sarah Bechtle and Steven Kapturowski and Roland Hafner and Nicolas Heess and Martin A. Riedmiller}, title = {Offline Actor-Critic Reinforcement Learning Scales to Large Models}, journal = {CoRR}, volume = {abs/2402.05546}, year = {2024}, url = {https://doi.org/10.48550/arXiv.2402.05546}, doi = {10.48550/ARXIV.2402.05546}, eprinttype = {arXiv}, eprint = {2402.05546}, timestamp = {Wed, 14 Feb 2024 00:00:00 +0100}, biburl = {https://dblp.org/rec/journals/corr/abs-2402-05546.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-2402-06102, author = {Mohak Bhardwaj and Thomas Lampe and Michael Neunert and Francesco Romano and Abbas Abdolmaleki and Arunkumar Byravan and Markus Wulfmeier and Martin A. Riedmiller and Jonas Buchli}, title = {Real-World Fluid Directed Rigid Body Control via Deep Reinforcement Learning}, journal = {CoRR}, volume = {abs/2402.06102}, year = {2024}, url = {https://doi.org/10.48550/arXiv.2402.06102}, doi = {10.48550/ARXIV.2402.06102}, eprinttype = {arXiv}, eprint = {2402.06102}, timestamp = {Fri, 16 Feb 2024 00:00:00 +0100}, biburl = {https://dblp.org/rec/journals/corr/abs-2402-06102.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/tmlr/VezzaniTWRAMHHH23, author = {Giulia Vezzani and Dhruva Tirumala and Markus Wulfmeier and Dushyant Rao and Abbas Abdolmaleki and Ben Moran and Tuomas Haarnoja and Jan Humplik and Roland Hafner and Michael Neunert and Claudio Fantacci and Tim Hertweck and Thomas Lampe and Fereshteh Sadeghi and Nicolas Heess and Martin A. Riedmiller}, title = {SkillS: Adaptive Skill Sequencing for Efficient Temporally-Extended Exploration}, journal = {Trans. Mach. Learn. Res.}, volume = {2023}, year = {2023}, url = {https://openreview.net/forum?id=JwGKVpRfVD}, timestamp = {Thu, 01 Aug 2024 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/tmlr/VezzaniTWRAMHHH23.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-2302-12617, author = {Jingwei Zhang and Jost Tobias Springenberg and Arunkumar Byravan and Leonard Hasenclever and Abbas Abdolmaleki and Dushyant Rao and Nicolas Heess and Martin A. Riedmiller}, title = {Leveraging Jumpy Models for Planning and Fast Learning in Robotic Domains}, journal = {CoRR}, volume = {abs/2302.12617}, year = {2023}, url = {https://doi.org/10.48550/arXiv.2302.12617}, doi = {10.48550/ARXIV.2302.12617}, eprinttype = {arXiv}, eprint = {2302.12617}, timestamp = {Tue, 28 Feb 2023 00:00:00 +0100}, biburl = {https://dblp.org/rec/journals/corr/abs-2302-12617.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-2306-11706, author = {Konstantinos Bousmalis and Giulia Vezzani and Dushyant Rao and Coline Devin and Alex X. Lee and Maria Bauz{\'{a}} and Todor Davchev and Yuxiang Zhou and Agrim Gupta and Akhil Raju and Antoine Laurens and Claudio Fantacci and Valentin Dalibard and Martina Zambelli and Murilo F. Martins and Rugile Pevceviciute and Michiel Blokzijl and Misha Denil and Nathan Batchelor and Thomas Lampe and Emilio Parisotto and Konrad Zolna and Scott E. Reed and Sergio G{\'{o}}mez Colmenarejo and Jon Scholz and Abbas Abdolmaleki and Oliver Groth and Jean{-}Baptiste Regli and Oleg Sushkov and Thomas Roth{\"{o}}rl and Jos{\'{e}} Enrique Chen and Yusuf Aytar and Dave Barker and Joy Ortiz and Martin A. Riedmiller and Jost Tobias Springenberg and Raia Hadsell and Francesco Nori and Nicolas Heess}, title = {RoboCat: {A} Self-Improving Foundation Agent for Robotic Manipulation}, journal = {CoRR}, volume = {abs/2306.11706}, year = {2023}, url = {https://doi.org/10.48550/arXiv.2306.11706}, doi = {10.48550/ARXIV.2306.11706}, eprinttype = {arXiv}, eprint = {2306.11706}, timestamp = {Thu, 08 Aug 2024 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-2306-11706.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-2308-15470, author = {Shruti Mishra and Ankit Anand and Jordan Hoffmann and Nicolas Heess and Martin A. Riedmiller and Abbas Abdolmaleki and Doina Precup}, title = {Policy composition in reinforcement learning via multi-objective policy optimization}, journal = {CoRR}, volume = {abs/2308.15470}, year = {2023}, url = {https://doi.org/10.48550/arXiv.2308.15470}, doi = {10.48550/ARXIV.2308.15470}, eprinttype = {arXiv}, eprint = {2308.15470}, timestamp = {Mon, 04 Sep 2023 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-2308-15470.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-2312-11374, author = {Thomas Lampe and Abbas Abdolmaleki and Sarah Bechtle and Sandy H. Huang and Jost Tobias Springenberg and Michael Bloesch and Oliver Groth and Roland Hafner and Tim Hertweck and Michael Neunert and Markus Wulfmeier and Jingwei Zhang and Francesco Nori and Nicolas Heess and Martin A. Riedmiller}, title = {Mastering Stacking of Diverse Shapes with Large-Scale Iterative Reinforcement Learning on Real Robots}, journal = {CoRR}, volume = {abs/2312.11374}, year = {2023}, url = {https://doi.org/10.48550/arXiv.2312.11374}, doi = {10.48550/ARXIV.2312.11374}, eprinttype = {arXiv}, eprint = {2312.11374}, timestamp = {Tue, 16 Jan 2024 00:00:00 +0100}, biburl = {https://dblp.org/rec/journals/corr/abs-2312-11374.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/nature/DegraveFBNTCEHA22, author = {Jonas Degrave and Federico Felici and Jonas Buchli and Michael Neunert and Brendan D. Tracey and Francesco Carpanese and Timo Ewalds and Roland Hafner and Abbas Abdolmaleki and Diego de Las Casas and Craig Donner and Leslie Fritz and Cristian Galperti and Andrea Huber and James Keeling and Maria Tsimpoukelli and Jackie Kay and Antoine Merle and Jean{-}Marc Moret and Seb Noury and Federico Pesamosca and David Pfau and Olivier Sauter and Cristian Sommariva and Stefano Coda and Basil Duval and Ambrogio Fasoli and Pushmeet Kohli and Koray Kavukcuoglu and Demis Hassabis and Martin A. Riedmiller}, title = {Magnetic control of tokamak plasmas through deep reinforcement learning}, journal = {Nat.}, volume = {602}, number = {7897}, pages = {414--419}, year = {2022}, url = {https://doi.org/10.1038/s41586-021-04301-9}, doi = {10.1038/S41586-021-04301-9}, timestamp = {Sat, 30 Sep 2023 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/nature/DegraveFBNTCEHA22.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/scirobotics/LiuLWMEHCTOASHM22, author = {Siqi Liu and Guy Lever and Zhe Wang and Josh Merel and S. M. Ali Eslami and Daniel Hennes and Wojciech M. Czarnecki and Yuval Tassa and Shayegan Omidshafiei and Abbas Abdolmaleki and Noah Y. Siegel and Leonard Hasenclever and Luke Marris and Saran Tunyasuvunakool and H. Francis Song and Markus Wulfmeier and Paul Muller and Tuomas Haarnoja and Brendan D. Tracey and Karl Tuyls and Thore Graepel and Nicolas Heess}, title = {From motor control to team play in simulated humanoid football}, journal = {Sci. Robotics}, volume = {7}, number = {69}, year = {2022}, url = {https://doi.org/10.1126/scirobotics.abo0235}, doi = {10.1126/SCIROBOTICS.ABO0235}, timestamp = {Mon, 28 Aug 2023 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/scirobotics/LiuLWMEHCTOASHM22.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/collas/ZhouBHHARWH22, author = {Wenxuan Zhou and Steven Bohez and Jan Humplik and Nicolas Heess and Abbas Abdolmaleki and Dushyant Rao and Markus Wulfmeier and Tuomas Haarnoja}, editor = {Sarath Chandar and Razvan Pascanu and Doina Precup}, title = {Forgetting and Imbalance in Robot Lifelong Learning with Off-policy Data}, booktitle = {Conference on Lifelong Learning Agents, CoLLAs 2022, 22-24 August 2022, McGill University, Montr{\'{e}}al, Qu{\'{e}}bec, Canada}, series = {Proceedings of Machine Learning Research}, volume = {199}, pages = {294--309}, publisher = {{PMLR}}, year = {2022}, url = {https://proceedings.mlr.press/v199/zhou22a.html}, timestamp = {Fri, 17 Feb 2023 16:29:10 +0100}, biburl = {https://dblp.org/rec/conf/collas/ZhouBHHARWH22.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/iclr/ByravanHTMITSAH22, author = {Arunkumar Byravan and Leonard Hasenclever and Piotr Trochim and Mehdi Mirza and Alessandro Davide Ialongo and Yuval Tassa and Jost Tobias Springenberg and Abbas Abdolmaleki and Nicolas Heess and Josh Merel and Martin A. Riedmiller}, title = {Evaluating Model-Based Planning and Planner Amortization for Continuous Control}, booktitle = {The Tenth International Conference on Learning Representations, {ICLR} 2022, Virtual Event, April 25-29, 2022}, publisher = {OpenReview.net}, year = {2022}, url = {https://openreview.net/forum?id=SS8F6tFX3-}, timestamp = {Sat, 20 Aug 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/iclr/ByravanHTMITSAH22.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/iros/LeeDSZLAB22, author = {Alex X. Lee and Coline Devin and Jost Tobias Springenberg and Yuxiang Zhou and Thomas Lampe and Abbas Abdolmaleki and Konstantinos Bousmalis}, title = {How to Spend Your Robot Time: Bridging Kickstarting and Offline Reinforcement Learning for Vision-based Robotic Manipulation}, booktitle = {{IEEE/RSJ} International Conference on Intelligent Robots and Systems, {IROS} 2022, Kyoto, Japan, October 23-27, 2022}, pages = {2468--2475}, publisher = {{IEEE}}, year = {2022}, url = {https://doi.org/10.1109/IROS47612.2022.9981126}, doi = {10.1109/IROS47612.2022.9981126}, timestamp = {Tue, 03 Jan 2023 14:18:21 +0100}, biburl = {https://dblp.org/rec/conf/iros/LeeDSZLAB22.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-2204-05893, author = {Wenxuan Zhou and Steven Bohez and Jan Humplik and Abbas Abdolmaleki and Dushyant Rao and Markus Wulfmeier and Tuomas Haarnoja and Nicolas Heess}, title = {Offline Distillation for Robot Lifelong Learning with Imbalanced Experience}, journal = {CoRR}, volume = {abs/2204.05893}, year = {2022}, url = {https://doi.org/10.48550/arXiv.2204.05893}, doi = {10.48550/ARXIV.2204.05893}, eprinttype = {arXiv}, eprint = {2204.05893}, timestamp = {Tue, 19 Apr 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-2204-05893.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-2204-10256, author = {Bobak Shahriari and Abbas Abdolmaleki and Arunkumar Byravan and Abe Friesen and Siqi Liu and Jost Tobias Springenberg and Nicolas Heess and Matt Hoffman and Martin A. Riedmiller}, title = {Revisiting Gaussian mixture critics in off-policy reinforcement learning: a sample-based approach}, journal = {CoRR}, volume = {abs/2204.10256}, year = {2022}, url = {https://doi.org/10.48550/arXiv.2204.10256}, doi = {10.48550/ARXIV.2204.10256}, eprinttype = {arXiv}, eprint = {2204.10256}, timestamp = {Thu, 11 May 2023 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-2204-10256.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-2205-03353, author = {Alex X. Lee and Coline Devin and Jost Tobias Springenberg and Yuxiang Zhou and Thomas Lampe and Abbas Abdolmaleki and Konstantinos Bousmalis}, title = {How to Spend Your Robot Time: Bridging Kickstarting and Offline Reinforcement Learning for Vision-based Robotic Manipulation}, journal = {CoRR}, volume = {abs/2205.03353}, year = {2022}, url = {https://doi.org/10.48550/arXiv.2205.03353}, doi = {10.48550/ARXIV.2205.03353}, eprinttype = {arXiv}, eprint = {2205.03353}, timestamp = {Wed, 11 May 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-2205-03353.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-2211-13743, author = {Giulia Vezzani and Dhruva Tirumala and Markus Wulfmeier and Dushyant Rao and Abbas Abdolmaleki and Ben Moran and Tuomas Haarnoja and Jan Humplik and Roland Hafner and Michael Neunert and Claudio Fantacci and Tim Hertweck and Thomas Lampe and Fereshteh Sadeghi and Nicolas Heess and Martin A. Riedmiller}, title = {SkillS: Adaptive Skill Sequencing for Efficient Temporally-Extended Exploration}, journal = {CoRR}, volume = {abs/2211.13743}, year = {2022}, url = {https://doi.org/10.48550/arXiv.2211.13743}, doi = {10.48550/ARXIV.2211.13743}, eprinttype = {arXiv}, eprint = {2211.13743}, timestamp = {Tue, 29 Nov 2022 00:00:00 +0100}, biburl = {https://dblp.org/rec/journals/corr/abs-2211-13743.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/corl/HuangAVBMNBTHRH21, author = {Sandy H. Huang and Abbas Abdolmaleki and Giulia Vezzani and Philemon Brakel and Daniel J. Mankowitz and Michael Neunert and Steven Bohez and Yuval Tassa and Nicolas Heess and Martin A. Riedmiller and Raia Hadsell}, editor = {Aleksandra Faust and David Hsu and Gerhard Neumann}, title = {A Constrained Multi-Objective Reinforcement Learning Framework}, booktitle = {Conference on Robot Learning, 8-11 November 2021, London, {UK}}, series = {Proceedings of Machine Learning Research}, volume = {164}, pages = {883--893}, publisher = {{PMLR}}, year = {2021}, url = {https://proceedings.mlr.press/v164/huang22a.html}, timestamp = {Wed, 19 Jan 2022 17:10:33 +0100}, biburl = {https://dblp.org/rec/conf/corl/HuangAVBMNBTHRH21.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/corl/LeeDZLBSBAGKFCR21, author = {Alex X. Lee and Coline Manon Devin and Yuxiang Zhou and Thomas Lampe and Konstantinos Bousmalis and Jost Tobias Springenberg and Arunkumar Byravan and Abbas Abdolmaleki and Nimrod Gileadi and David Khosid and Claudio Fantacci and Jos{\'{e}} Enrique Chen and Akhil Raju and Rae Jeong and Michael Neunert and Antoine Laurens and Stefano Saliceti and Federico Casarini and Martin A. Riedmiller and Raia Hadsell and Francesco Nori}, editor = {Aleksandra Faust and David Hsu and Gerhard Neumann}, title = {Beyond Pick-and-Place: Tackling Robotic Stacking of Diverse Shapes}, booktitle = {Conference on Robot Learning, 8-11 November 2021, London, {UK}}, series = {Proceedings of Machine Learning Research}, volume = {164}, pages = {1089--1131}, publisher = {{PMLR}}, year = {2021}, url = {https://proceedings.mlr.press/v164/lee22b.html}, timestamp = {Thu, 08 Aug 2024 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/corl/LeeDZLBSBAGKFCR21.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/icml/WulfmeierRHLAHN21, author = {Markus Wulfmeier and Dushyant Rao and Roland Hafner and Thomas Lampe and Abbas Abdolmaleki and Tim Hertweck and Michael Neunert and Dhruva Tirumala and Noah Y. Siegel and Nicolas Heess and Martin A. Riedmiller}, editor = {Marina Meila and Tong Zhang}, title = {Data-efficient Hindsight Off-policy Option Learning}, booktitle = {Proceedings of the 38th International Conference on Machine Learning, {ICML} 2021, 18-24 July 2021, Virtual Event}, series = {Proceedings of Machine Learning Research}, volume = {139}, pages = {11340--11350}, publisher = {{PMLR}}, year = {2021}, url = {http://proceedings.mlr.press/v139/wulfmeier21a.html}, timestamp = {Wed, 25 Aug 2021 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/icml/WulfmeierRHLAHN21.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-2101-09458, author = {William F. Whitney and Michael Bloesch and Jost Tobias Springenberg and Abbas Abdolmaleki and Martin A. Riedmiller}, title = {Rethinking Exploration for Sample-Efficient Policy Learning}, journal = {CoRR}, volume = {abs/2101.09458}, year = {2021}, url = {https://arxiv.org/abs/2101.09458}, eprinttype = {arXiv}, eprint = {2101.09458}, timestamp = {Sat, 30 Jan 2021 00:00:00 +0100}, biburl = {https://dblp.org/rec/journals/corr/abs-2101-09458.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-2105-12196, author = {Siqi Liu and Guy Lever and Zhe Wang and Josh Merel and S. M. Ali Eslami and Daniel Hennes and Wojciech M. Czarnecki and Yuval Tassa and Shayegan Omidshafiei and Abbas Abdolmaleki and Noah Y. Siegel and Leonard Hasenclever and Luke Marris and Saran Tunyasuvunakool and H. Francis Song and Markus Wulfmeier and Paul Muller and Tuomas Haarnoja and Brendan D. Tracey and Karl Tuyls and Thore Graepel and Nicolas Heess}, title = {From Motor Control to Team Play in Simulated Humanoid Football}, journal = {CoRR}, volume = {abs/2105.12196}, year = {2021}, url = {https://arxiv.org/abs/2105.12196}, eprinttype = {arXiv}, eprint = {2105.12196}, timestamp = {Thu, 11 May 2023 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-2105-12196.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-2106-08199, author = {Abbas Abdolmaleki and Sandy H. Huang and Giulia Vezzani and Bobak Shahriari and Jost Tobias Springenberg and Shruti Mishra and Dhruva TB and Arunkumar Byravan and Konstantinos Bousmalis and Andr{\'{a}}s Gy{\"{o}}rgy and Csaba Szepesv{\'{a}}ri and Raia Hadsell and Nicolas Heess and Martin A. Riedmiller}, title = {On Multi-objective Policy Optimization as a Tool for Reinforcement Learning}, journal = {CoRR}, volume = {abs/2106.08199}, year = {2021}, url = {https://arxiv.org/abs/2106.08199}, eprinttype = {arXiv}, eprint = {2106.08199}, timestamp = {Sun, 02 Oct 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-2106-08199.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-2110-03363, author = {Arunkumar Byravan and Leonard Hasenclever and Piotr Trochim and Mehdi Mirza and Alessandro Davide Ialongo and Yuval Tassa and Jost Tobias Springenberg and Abbas Abdolmaleki and Nicolas Heess and Josh Merel and Martin A. Riedmiller}, title = {Evaluating model-based planning and planner amortization for continuous control}, journal = {CoRR}, volume = {abs/2110.03363}, year = {2021}, url = {https://arxiv.org/abs/2110.03363}, eprinttype = {arXiv}, eprint = {2110.03363}, timestamp = {Thu, 21 Oct 2021 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-2110-03363.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-2110-06192, author = {Alex X. Lee and Coline Devin and Yuxiang Zhou and Thomas Lampe and Konstantinos Bousmalis and Jost Tobias Springenberg and Arunkumar Byravan and Abbas Abdolmaleki and Nimrod Gileadi and David Khosid and Claudio Fantacci and Jos{\'{e}} Enrique Chen and Akhil Raju and Rae Jeong and Michael Neunert and Antoine Laurens and Stefano Saliceti and Federico Casarini and Martin A. Riedmiller and Raia Hadsell and Francesco Nori}, title = {Beyond Pick-and-Place: Tackling Robotic Stacking of Diverse Shapes}, journal = {CoRR}, volume = {abs/2110.06192}, year = {2021}, url = {https://arxiv.org/abs/2110.06192}, eprinttype = {arXiv}, eprint = {2110.06192}, timestamp = {Thu, 08 Aug 2024 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-2110-06192.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/iclr/MankowitzLJASSK20, author = {Daniel J. Mankowitz and Nir Levine and Rae Jeong and Abbas Abdolmaleki and Jost Tobias Springenberg and Yuanyuan Shi and Jackie Kay and Todd Hester and Timothy A. Mann and Martin A. Riedmiller}, title = {Robust Reinforcement Learning for Continuous Control with Model Misspecification}, booktitle = {8th International Conference on Learning Representations, {ICLR} 2020, Addis Ababa, Ethiopia, April 26-30, 2020}, publisher = {OpenReview.net}, year = {2020}, url = {https://openreview.net/forum?id=HJgC60EtwB}, timestamp = {Thu, 07 May 2020 17:11:47 +0200}, biburl = {https://dblp.org/rec/conf/iclr/MankowitzLJASSK20.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/iclr/SiegelSBANLHHR20, author = {Noah Y. Siegel and Jost Tobias Springenberg and Felix Berkenkamp and Abbas Abdolmaleki and Michael Neunert and Thomas Lampe and Roland Hafner and Nicolas Heess and Martin A. Riedmiller}, title = {Keep Doing What Worked: Behavior Modelling Priors for Offline Reinforcement Learning}, booktitle = {8th International Conference on Learning Representations, {ICLR} 2020, Addis Ababa, Ethiopia, April 26-30, 2020}, publisher = {OpenReview.net}, year = {2020}, url = {https://openreview.net/forum?id=rke7geHtwH}, timestamp = {Thu, 07 May 2020 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/iclr/SiegelSBANLHHR20.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/iclr/SongASCSRNALTHB20, author = {H. Francis Song and Abbas Abdolmaleki and Jost Tobias Springenberg and Aidan Clark and Hubert Soyer and Jack W. Rae and Seb Noury and Arun Ahuja and Siqi Liu and Dhruva Tirumala and Nicolas Heess and Dan Belov and Martin A. Riedmiller and Matthew M. Botvinick}, title = {{V-MPO:} On-Policy Maximum a Posteriori Policy Optimization for Discrete and Continuous Control}, booktitle = {8th International Conference on Learning Representations, {ICLR} 2020, Addis Ababa, Ethiopia, April 26-30, 2020}, publisher = {OpenReview.net}, year = {2020}, url = {https://openreview.net/forum?id=SylOlp4FvH}, timestamp = {Thu, 11 May 2023 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/iclr/SongASCSRNALTHB20.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/icml/AbdolmalekiHNS20, author = {Abbas Abdolmaleki and Sandy H. Huang and Leonard Hasenclever and Michael Neunert and H. Francis Song and Martina Zambelli and Murilo F. Martins and Nicolas Heess and Raia Hadsell and Martin A. Riedmiller}, title = {A distributional view on multi-objective policy optimization}, booktitle = {Proceedings of the 37th International Conference on Machine Learning, {ICML} 2020, 13-18 July 2020, Virtual Event}, series = {Proceedings of Machine Learning Research}, volume = {119}, pages = {11--22}, publisher = {{PMLR}}, year = {2020}, url = {http://proceedings.mlr.press/v119/abdolmaleki20a.html}, timestamp = {Tue, 15 Dec 2020 00:00:00 +0100}, biburl = {https://dblp.org/rec/conf/icml/AbdolmalekiHNS20.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/rss/WulfmeierAHSNSH20, author = {Markus Wulfmeier and Abbas Abdolmaleki and Roland Hafner and Jost Tobias Springenberg and Michael Neunert and Noah Y. Siegel and Tim Hertweck and Thomas Lampe and Nicolas Heess and Martin A. Riedmiller}, editor = {Marc Toussaint and Antonio Bicchi and Tucker Hermans}, title = {Compositional Transfer in Hierarchical Reinforcement Learning}, booktitle = {Robotics: Science and Systems XVI, Virtual Event / Corvalis, Oregon, USA, July 12-16, 2020}, year = {2020}, url = {https://doi.org/10.15607/RSS.2020.XVI.054}, doi = {10.15607/RSS.2020.XVI.054}, timestamp = {Thu, 15 Jul 2021 18:53:52 +0200}, biburl = {https://dblp.org/rec/conf/rss/WulfmeierAHSNSH20.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-2001-00449, author = {Michael Neunert and Abbas Abdolmaleki and Markus Wulfmeier and Thomas Lampe and Jost Tobias Springenberg and Roland Hafner and Francesco Romano and Jonas Buchli and Nicolas Heess and Martin A. Riedmiller}, title = {Continuous-Discrete Reinforcement Learning for Hybrid Control in Robotics}, journal = {CoRR}, volume = {abs/2001.00449}, year = {2020}, url = {http://arxiv.org/abs/2001.00449}, eprinttype = {arXiv}, eprint = {2001.00449}, timestamp = {Fri, 10 Jan 2020 00:00:00 +0100}, biburl = {https://dblp.org/rec/journals/corr/abs-2001-00449.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-2002-08396, author = {Noah Y. Siegel and Jost Tobias Springenberg and Felix Berkenkamp and Abbas Abdolmaleki and Michael Neunert and Thomas Lampe and Roland Hafner and Nicolas Heess and Martin A. Riedmiller}, title = {Keep Doing What Worked: Behavioral Modelling Priors for Offline Reinforcement Learning}, journal = {CoRR}, volume = {abs/2002.08396}, year = {2020}, url = {https://arxiv.org/abs/2002.08396}, eprinttype = {arXiv}, eprint = {2002.08396}, timestamp = {Mon, 02 Mar 2020 00:00:00 +0100}, biburl = {https://dblp.org/rec/journals/corr/abs-2002-08396.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-2005-07513, author = {Abbas Abdolmaleki and Sandy H. Huang and Leonard Hasenclever and Michael Neunert and H. Francis Song and Martina Zambelli and Murilo F. Martins and Nicolas Heess and Raia Hadsell and Martin A. Riedmiller}, title = {A Distributional View on Multi-Objective Policy Optimization}, journal = {CoRR}, volume = {abs/2005.07513}, year = {2020}, url = {https://arxiv.org/abs/2005.07513}, eprinttype = {arXiv}, eprint = {2005.07513}, timestamp = {Fri, 22 May 2020 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-2005-07513.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-2006-00979, author = {Matt Hoffman and Bobak Shahriari and John Aslanides and Gabriel Barth{-}Maron and Feryal M. P. Behbahani and Tamara Norman and Abbas Abdolmaleki and Albin Cassirer and Fan Yang and Kate Baumli and Sarah Henderson and Alexander Novikov and Sergio G{\'{o}}mez Colmenarejo and Serkan Cabi and {\c{C}}aglar G{\"{u}}l{\c{c}}ehre and Tom Le Paine and Andrew Cowie and Ziyu Wang and Bilal Piot and Nando de Freitas}, title = {Acme: {A} Research Framework for Distributed Reinforcement Learning}, journal = {CoRR}, volume = {abs/2006.00979}, year = {2020}, url = {https://arxiv.org/abs/2006.00979}, eprinttype = {arXiv}, eprint = {2006.00979}, timestamp = {Mon, 02 Jan 2023 00:00:00 +0100}, biburl = {https://dblp.org/rec/journals/corr/abs-2006-00979.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-2007-15588, author = {Markus Wulfmeier and Dushyant Rao and Roland Hafner and Thomas Lampe and Abbas Abdolmaleki and Tim Hertweck and Michael Neunert and Dhruva Tirumala and Noah Y. Siegel and Nicolas Heess and Martin A. Riedmiller}, title = {Data-efficient Hindsight Off-policy Option Learning}, journal = {CoRR}, volume = {abs/2007.15588}, year = {2020}, url = {https://arxiv.org/abs/2007.15588}, eprinttype = {arXiv}, eprint = {2007.15588}, timestamp = {Mon, 03 Aug 2020 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-2007-15588.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-2010-05545, author = {Jost Tobias Springenberg and Nicolas Heess and Daniel J. Mankowitz and Josh Merel and Arunkumar Byravan and Abbas Abdolmaleki and Jackie Kay and Jonas Degrave and Julian Schrittwieser and Yuval Tassa and Jonas Buchli and Dan Belov and Martin A. Riedmiller}, title = {Local Search for Policy Iteration in Continuous Control}, journal = {CoRR}, volume = {abs/2010.05545}, year = {2020}, url = {https://arxiv.org/abs/2010.05545}, eprinttype = {arXiv}, eprint = {2010.05545}, timestamp = {Tue, 20 Oct 2020 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-2010-05545.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-2010-15492, author = {Giulia Vezzani and Michael Neunert and Markus Wulfmeier and Rae Jeong and Thomas Lampe and Noah Y. Siegel and Roland Hafner and Abbas Abdolmaleki and Martin A. Riedmiller and Francesco Nori}, title = {"What, not how": Solving an under-actuated insertion task from scratch}, journal = {CoRR}, volume = {abs/2010.15492}, year = {2020}, url = {https://arxiv.org/abs/2010.15492}, eprinttype = {arXiv}, eprint = {2010.15492}, timestamp = {Tue, 03 Nov 2020 00:00:00 +0100}, biburl = {https://dblp.org/rec/journals/corr/abs-2010-15492.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/jirs/AbdolmalekiSLRN19, author = {Abbas Abdolmaleki and David Sim{\~{o}}es and Nuno Lau and Lu{\'{\i}}s Paulo Reis and Gerhard Neumann}, title = {Contextual Direct Policy Search - With Regularized Covariance Matrix Estimation}, journal = {J. Intell. Robotic Syst.}, volume = {96}, number = {2}, pages = {141--157}, year = {2019}, url = {https://doi.org/10.1007/s10846-018-0968-4}, doi = {10.1007/S10846-018-0968-4}, timestamp = {Thu, 21 Jan 2021 00:00:00 +0100}, biburl = {https://dblp.org/rec/journals/jirs/AbdolmalekiSLRN19.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/corl/ByravanSAHNLSHR19, author = {Arunkumar Byravan and Jost Tobias Springenberg and Abbas Abdolmaleki and Roland Hafner and Michael Neunert and Thomas Lampe and Noah Y. Siegel and Nicolas Heess and Martin A. Riedmiller}, editor = {Leslie Pack Kaelbling and Danica Kragic and Komei Sugiura}, title = {Imagined Value Gradients: Model-Based Policy Optimization with Tranferable Latent Dynamics Models}, booktitle = {3rd Annual Conference on Robot Learning, CoRL 2019, Osaka, Japan, October 30 - November 1, 2019, Proceedings}, series = {Proceedings of Machine Learning Research}, volume = {100}, pages = {566--589}, publisher = {{PMLR}}, year = {2019}, url = {http://proceedings.mlr.press/v100/byravan20a.html}, timestamp = {Tue, 26 May 2020 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/corl/ByravanSAHNLSHR19.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/corl/NeunertAWLSHRBH19, author = {Michael Neunert and Abbas Abdolmaleki and Markus Wulfmeier and Thomas Lampe and Jost Tobias Springenberg and Roland Hafner and Francesco Romano and Jonas Buchli and Nicolas Heess and Martin A. Riedmiller}, editor = {Leslie Pack Kaelbling and Danica Kragic and Komei Sugiura}, title = {Continuous-Discrete Reinforcement Learning for Hybrid Control in Robotics}, booktitle = {3rd Annual Conference on Robot Learning, CoRL 2019, Osaka, Japan, October 30 - November 1, 2019, Proceedings}, series = {Proceedings of Machine Learning Research}, volume = {100}, pages = {735--751}, publisher = {{PMLR}}, year = {2019}, url = {http://proceedings.mlr.press/v100/neunert20a.html}, timestamp = {Mon, 25 May 2020 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/corl/NeunertAWLSHRBH19.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/rss/SchwabSMNLAHHNR19, author = {Devin Schwab and Jost Tobias Springenberg and Murilo Fernandes Martins and Michael Neunert and Thomas Lampe and Abbas Abdolmaleki and Tim Hertweck and Roland Hafner and Francesco Nori and Martin A. Riedmiller}, editor = {Antonio Bicchi and Hadas Kress{-}Gazit and Seth Hutchinson}, title = {Simultaneously Learning Vision and Feature-Based Control Policies for Real-World Ball-In-A-Cup}, booktitle = {Robotics: Science and Systems XV, University of Freiburg, Freiburg im Breisgau, Germany, June 22-26, 2019}, year = {2019}, url = {https://doi.org/10.15607/RSS.2019.XV.027}, doi = {10.15607/RSS.2019.XV.027}, timestamp = {Thu, 01 Apr 2021 15:25:13 +0200}, biburl = {https://dblp.org/rec/conf/rss/SchwabSMNLAHHNR19.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-1902-04623, author = {Steven Bohez and Abbas Abdolmaleki and Michael Neunert and Jonas Buchli and Nicolas Heess and Raia Hadsell}, title = {Value constrained model-free continuous control}, journal = {CoRR}, volume = {abs/1902.04623}, year = {2019}, url = {http://arxiv.org/abs/1902.04623}, eprinttype = {arXiv}, eprint = {1902.04623}, timestamp = {Tue, 21 May 2019 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-1902-04623.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-1902-04706, author = {Devin Schwab and Jost Tobias Springenberg and Murilo F. Martins and Thomas Lampe and Michael Neunert and Abbas Abdolmaleki and Tim Hertweck and Roland Hafner and Francesco Nori and Martin A. Riedmiller}, title = {Simultaneously Learning Vision and Feature-based Control Policies for Real-world Ball-in-a-Cup}, journal = {CoRR}, volume = {abs/1902.04706}, year = {2019}, url = {http://arxiv.org/abs/1902.04706}, eprinttype = {arXiv}, eprint = {1902.04706}, timestamp = {Tue, 21 May 2019 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-1902-04706.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-1906-07516, author = {Daniel J. Mankowitz and Nir Levine and Rae Jeong and Abbas Abdolmaleki and Jost Tobias Springenberg and Timothy A. Mann and Todd Hester and Martin A. Riedmiller}, title = {Robust Reinforcement Learning for Continuous Control with Model Misspecification}, journal = {CoRR}, volume = {abs/1906.07516}, year = {2019}, url = {http://arxiv.org/abs/1906.07516}, eprinttype = {arXiv}, eprint = {1906.07516}, timestamp = {Mon, 24 Jun 2019 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-1906-07516.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-1906-11228, author = {Markus Wulfmeier and Abbas Abdolmaleki and Roland Hafner and Jost Tobias Springenberg and Michael Neunert and Tim Hertweck and Thomas Lampe and Noah Y. Siegel and Nicolas Heess and Martin A. Riedmiller}, title = {Regularized Hierarchical Policies for Compositional Transfer in Robotics}, journal = {CoRR}, volume = {abs/1906.11228}, year = {2019}, url = {http://arxiv.org/abs/1906.11228}, eprinttype = {arXiv}, eprint = {1906.11228}, timestamp = {Mon, 02 Mar 2020 00:00:00 +0100}, biburl = {https://dblp.org/rec/journals/corr/abs-1906-11228.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-1909-12238, author = {H. Francis Song and Abbas Abdolmaleki and Jost Tobias Springenberg and Aidan Clark and Hubert Soyer and Jack W. Rae and Seb Noury and Arun Ahuja and Siqi Liu and Dhruva Tirumala and Nicolas Heess and Dan Belov and Martin A. Riedmiller and Matthew M. Botvinick}, title = {{V-MPO:} On-Policy Maximum a Posteriori Policy Optimization for Discrete and Continuous Control}, journal = {CoRR}, volume = {abs/1909.12238}, year = {2019}, url = {http://arxiv.org/abs/1909.12238}, eprinttype = {arXiv}, eprint = {1909.12238}, timestamp = {Thu, 11 May 2023 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-1909-12238.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-1910-00528, author = {Shruti Mishra and Abbas Abdolmaleki and Arthur Guez and Piotr Trochim and Doina Precup}, title = {Augmenting learning using symmetry in a biologically-inspired domain}, journal = {CoRR}, volume = {abs/1910.00528}, year = {2019}, url = {http://arxiv.org/abs/1910.00528}, eprinttype = {arXiv}, eprint = {1910.00528}, timestamp = {Sun, 02 Oct 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-1910-00528.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-1910-04142, author = {Arunkumar Byravan and Jost Tobias Springenberg and Abbas Abdolmaleki and Roland Hafner and Michael Neunert and Thomas Lampe and Noah Y. Siegel and Nicolas Heess and Martin A. Riedmiller}, title = {Imagined Value Gradients: Model-Based Policy Optimization with Transferable Latent Dynamics Models}, journal = {CoRR}, volume = {abs/1910.04142}, year = {2019}, url = {http://arxiv.org/abs/1910.04142}, eprinttype = {arXiv}, eprint = {1910.04142}, timestamp = {Mon, 02 Mar 2020 00:00:00 +0100}, biburl = {https://dblp.org/rec/journals/corr/abs-1910-04142.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-1910-09471, author = {Rae Jeong and Jackie Kay and Francesco Romano and Thomas Lampe and Thomas Roth{\"{o}}rl and Abbas Abdolmaleki and Tom Erez and Yuval Tassa and Francesco Nori}, title = {Modelling Generalized Forces with Reinforcement Learning for Sim-to-Real Transfer}, journal = {CoRR}, volume = {abs/1910.09471}, year = {2019}, url = {http://arxiv.org/abs/1910.09471}, eprinttype = {arXiv}, eprint = {1910.09471}, timestamp = {Tue, 22 Oct 2019 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-1910-09471.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-1911-01831, author = {Jonas Degrave and Abbas Abdolmaleki and Jost Tobias Springenberg and Nicolas Heess and Martin A. Riedmiller}, title = {Quinoa: a Q-function You Infer Normalized Over Actions}, journal = {CoRR}, volume = {abs/1911.01831}, year = {2019}, url = {http://arxiv.org/abs/1911.01831}, eprinttype = {arXiv}, eprint = {1911.01831}, timestamp = {Mon, 11 Nov 2019 00:00:00 +0100}, biburl = {https://dblp.org/rec/journals/corr/abs-1911-01831.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@phdthesis{DBLP:phd/pt/Abdolmaleki18, author = {Abbas Abdolmaleki}, title = {Information theoretic stochastic search}, school = {University of Minho, Portugal}, year = {2018}, url = {https://hdl.handle.net/1822/59005}, timestamp = {Thu, 08 Jun 2023 01:00:00 +0200}, biburl = {https://dblp.org/rec/phd/pt/Abdolmaleki18.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/jmlr/AkrourAAPN18, author = {Riad Akrour and Abbas Abdolmaleki and Hany Abdulsamad and Jan Peters and Gerhard Neumann}, title = {Model-Free Trajectory-based Policy Optimization with Monotonic Improvement}, journal = {J. Mach. Learn. Res.}, volume = {19}, pages = {14:1--14:25}, year = {2018}, url = {https://jmlr.org/papers/v19/17-329.html}, timestamp = {Wed, 11 Sep 2024 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/jmlr/AkrourAAPN18.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/iclr/AbdolmalekiSTMH18, author = {Abbas Abdolmaleki and Jost Tobias Springenberg and Yuval Tassa and R{\'{e}}mi Munos and Nicolas Heess and Martin A. Riedmiller}, title = {Maximum a Posteriori Policy Optimisation}, booktitle = {6th International Conference on Learning Representations, {ICLR} 2018, Vancouver, BC, Canada, April 30 - May 3, 2018, Conference Track Proceedings}, publisher = {OpenReview.net}, year = {2018}, url = {https://openreview.net/forum?id=S1ANxQW0b}, timestamp = {Thu, 25 Jul 2019 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/iclr/AbdolmalekiSTMH18.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/iclr/TangkarattAS18, author = {Voot Tangkaratt and Abbas Abdolmaleki and Masashi Sugiyama}, title = {Guide Actor-Critic for Continuous Control}, booktitle = {6th International Conference on Learning Representations, {ICLR} 2018, Vancouver, BC, Canada, April 30 - May 3, 2018, Conference Track Proceedings}, publisher = {OpenReview.net}, year = {2018}, url = {https://openreview.net/forum?id=BJk59JZ0b}, timestamp = {Thu, 25 Jul 2019 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/iclr/TangkarattAS18.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/icra/BarbarosHAM18, author = {Victor Barbaros and Herke van Hoof and Abbas Abdolmaleki and David Meger}, title = {Eager and Memory-Based Non-Parametric Stochastic Search Methods for Learning Control}, booktitle = {2018 {IEEE} International Conference on Robotics and Automation, {ICRA} 2018, Brisbane, Australia, May 21-25, 2018}, pages = {1--9}, publisher = {{IEEE}}, year = {2018}, url = {https://doi.org/10.1109/ICRA.2018.8460633}, doi = {10.1109/ICRA.2018.8460633}, timestamp = {Wed, 16 Oct 2019 14:14:51 +0200}, biburl = {https://dblp.org/rec/conf/icra/BarbarosHAM18.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-1801-00690, author = {Yuval Tassa and Yotam Doron and Alistair Muldal and Tom Erez and Yazhe Li and Diego de Las Casas and David Budden and Abbas Abdolmaleki and Josh Merel and Andrew Lefrancq and Timothy P. Lillicrap and Martin A. Riedmiller}, title = {DeepMind Control Suite}, journal = {CoRR}, volume = {abs/1801.00690}, year = {2018}, url = {http://arxiv.org/abs/1801.00690}, eprinttype = {arXiv}, eprint = {1801.00690}, timestamp = {Mon, 22 Jul 2019 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-1801-00690.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-1806-06920, author = {Abbas Abdolmaleki and Jost Tobias Springenberg and Yuval Tassa and R{\'{e}}mi Munos and Nicolas Heess and Martin A. Riedmiller}, title = {Maximum a Posteriori Policy Optimisation}, journal = {CoRR}, volume = {abs/1806.06920}, year = {2018}, url = {http://arxiv.org/abs/1806.06920}, eprinttype = {arXiv}, eprint = {1806.06920}, timestamp = {Mon, 13 Aug 2018 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-1806-06920.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-1812-02256, author = {Abbas Abdolmaleki and Jost Tobias Springenberg and Jonas Degrave and Steven Bohez and Yuval Tassa and Dan Belov and Nicolas Heess and Martin A. Riedmiller}, title = {Relative Entropy Regularized Policy Iteration}, journal = {CoRR}, volume = {abs/1812.02256}, year = {2018}, url = {http://arxiv.org/abs/1812.02256}, eprinttype = {arXiv}, eprint = {1812.02256}, timestamp = {Tue, 01 Jan 2019 00:00:00 +0100}, biburl = {https://dblp.org/rec/journals/corr/abs-1812-02256.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/aaai/AbdolmalekiSLRP17, author = {Abbas Abdolmaleki and David Sim{\~{o}}es and Nuno Lau and Lu{\'{\i}}s Paulo Reis and Bob Price and Gerhard Neumann}, title = {Stochastic Search In Changing Situations}, booktitle = {The Workshops of the The Thirty-First {AAAI} Conference on Artificial Intelligence, Saturday, February 4-9, 2017, San Francisco, California, {USA}}, series = {{AAAI} Technical Report}, volume = {{WS-17}}, publisher = {{AAAI} Press}, year = {2017}, url = {http://aaai.org/ocs/index.php/WS/AAAIW17/paper/view/15204}, timestamp = {Mon, 04 Sep 2023 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/aaai/AbdolmalekiSLRP17.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/gecco/AbdolmalekiPLRN17, author = {Abbas Abdolmaleki and Bob Price and Nuno Lau and Lu{\'{\i}}s Paulo Reis and Gerhard Neumann}, editor = {Peter A. N. Bosman}, title = {Deriving and improving {CMA-ES} with information geometric trust regions}, booktitle = {Proceedings of the Genetic and Evolutionary Computation Conference, {GECCO} 2017, Berlin, Germany, July 15-19, 2017}, pages = {657--664}, publisher = {{ACM}}, year = {2017}, url = {https://doi.org/10.1145/3071178.3071252}, doi = {10.1145/3071178.3071252}, timestamp = {Sun, 02 Jun 2019 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/gecco/AbdolmalekiPLRN17.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/ijcai/AbdolmalekiPLRN17, author = {Abbas Abdolmaleki and Bob Price and Nuno Lau and Lu{\'{\i}}s Paulo Reis and Gerhard Neumann}, editor = {Carles Sierra}, title = {Contextual Covariance Matrix Adaptation Evolutionary Strategies}, booktitle = {Proceedings of the Twenty-Sixth International Joint Conference on Artificial Intelligence, {IJCAI} 2017, Melbourne, Australia, August 19-25, 2017}, pages = {1378--1385}, publisher = {ijcai.org}, year = {2017}, url = {https://doi.org/10.24963/ijcai.2017/191}, doi = {10.24963/IJCAI.2017/191}, timestamp = {Tue, 20 Aug 2019 16:16:54 +0200}, biburl = {https://dblp.org/rec/conf/ijcai/AbdolmalekiPLRN17.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/jirs/AbdolmalekiLR0N16, author = {Abbas Abdolmaleki and Nuno Lau and Lu{\'{\i}}s Paulo Reis and Jan Peters and Gerhard Neumann}, title = {Contextual Policy Search for Linear and Nonlinear Generalization of a Humanoid Walking Controller}, journal = {J. Intell. Robotic Syst.}, volume = {83}, number = {3-4}, pages = {393--408}, year = {2016}, url = {https://doi.org/10.1007/s10846-016-0347-y}, doi = {10.1007/S10846-016-0347-Y}, timestamp = {Tue, 07 Apr 2020 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/jirs/AbdolmalekiLR0N16.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/gecco/AbdolmalekiLRN16, author = {Abbas Abdolmaleki and Nuno Lau and Lu{\'{\i}}s Paulo Reis and Gerhard Neumann}, editor = {Tobias Friedrich and Frank Neumann and Andrew M. Sutton}, title = {Contextual Stochastic Search}, booktitle = {Genetic and Evolutionary Computation Conference, {GECCO} 2016, Denver, CO, USA, July 20-24, 2016, Companion Material Proceedings}, pages = {29--30}, publisher = {{ACM}}, year = {2016}, url = {https://doi.org/10.1145/2908961.2909012}, doi = {10.1145/2908961.2909012}, timestamp = {Sun, 02 Jun 2019 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/gecco/AbdolmalekiLRN16.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/gecco/AbdolmalekiLLR016, author = {Abbas Abdolmaleki and Rudolf Lioutikov and Nuno Lau and Lu{\'{\i}}s Paulo Reis and Jan Peters and Gerhard Neumann}, editor = {Tobias Friedrich and Frank Neumann and Andrew M. Sutton}, title = {Model-Based Relative Entropy Stochastic Search}, booktitle = {Genetic and Evolutionary Computation Conference, {GECCO} 2016, Denver, CO, USA, July 20-24, 2016, Companion Material Proceedings}, pages = {153--154}, publisher = {{ACM}}, year = {2016}, url = {https://doi.org/10.1145/2908961.2930952}, doi = {10.1145/2908961.2930952}, timestamp = {Mon, 26 Jun 2023 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/gecco/AbdolmalekiLLR016.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/icarsc/AbdolmalekiSLRN16, author = {Abbas Abdolmaleki and David Sim{\~{o}}es and Nuno Lau and Lu{\'{\i}}s Paulo Reis and Gerhard Neumann}, editor = {Bernardo Cunha and Jos{\'{e}} Lima and Manuel F. Silva and Paulo Leit{\~{a}}o}, title = {Contextual Relative Entropy Policy Search with Covariance Matrix Adaptation}, booktitle = {2016 International Conference on Autonomous Robot Systems and Competitions, {ICARSC} 2016, Bragan{\c{c}}a, Portugal, May 4-6, 2016}, pages = {94--99}, publisher = {{IEEE}}, year = {2016}, url = {https://doi.org/10.1109/ICARSC.2016.31}, doi = {10.1109/ICARSC.2016.31}, timestamp = {Tue, 05 Jan 2021 00:00:00 +0100}, biburl = {https://dblp.org/rec/conf/icarsc/AbdolmalekiSLRN16.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/icml/AkrourNAA16, author = {Riad Akrour and Gerhard Neumann and Hany Abdulsamad and Abbas Abdolmaleki}, editor = {Maria{-}Florina Balcan and Kilian Q. Weinberger}, title = {Model-Free Trajectory Optimization for Reinforcement Learning}, booktitle = {Proceedings of the 33nd International Conference on Machine Learning, {ICML} 2016, New York City, NY, USA, June 19-24, 2016}, series = {{JMLR} Workshop and Conference Proceedings}, volume = {48}, pages = {2961--2970}, publisher = {JMLR.org}, year = {2016}, url = {http://proceedings.mlr.press/v48/akrour16.html}, timestamp = {Wed, 29 May 2019 08:41:46 +0200}, biburl = {https://dblp.org/rec/conf/icml/AkrourNAA16.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/iros/AbdolmalekiLRN16, author = {Abbas Abdolmaleki and Nuno Lau and Lu{\'{\i}}s Paulo Reis and Gerhard Neumann}, title = {Non-parametric contextual stochastic search}, booktitle = {2016 {IEEE/RSJ} International Conference on Intelligent Robots and Systems, {IROS} 2016, Daejeon, South Korea, October 9-14, 2016}, pages = {2643--2648}, publisher = {{IEEE}}, year = {2016}, url = {https://doi.org/10.1109/IROS.2016.7759411}, doi = {10.1109/IROS.2016.7759411}, timestamp = {Wed, 16 Oct 2019 14:14:51 +0200}, biburl = {https://dblp.org/rec/conf/iros/AbdolmalekiLRN16.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/robocup/AbdolmalekiSLRN16, author = {Abbas Abdolmaleki and David Sim{\~{o}}es and Nuno Lau and Lu{\'{\i}}s Paulo Reis and Gerhard Neumann}, editor = {Sven Behnke and Raymond Sheh and Sanem Sariel and Daniel D. Lee}, title = {Learning a Humanoid Kick with Controlled Distance}, booktitle = {RoboCup 2016: Robot World Cup {XX} [Leipzig, Germany, June 30 - July 4, 2016]}, series = {Lecture Notes in Computer Science}, volume = {9776}, pages = {45--57}, publisher = {Springer}, year = {2016}, url = {https://doi.org/10.1007/978-3-319-68792-6\_4}, doi = {10.1007/978-3-319-68792-6\_4}, timestamp = {Tue, 05 Jan 2021 00:00:00 +0100}, biburl = {https://dblp.org/rec/conf/robocup/AbdolmalekiSLRN16.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/AkrourAAN16, author = {Riad Akrour and Abbas Abdolmaleki and Hany Abdulsamad and Gerhard Neumann}, title = {Model-free Trajectory Optimization for Reinforcement Learning}, journal = {CoRR}, volume = {abs/1606.09197}, year = {2016}, url = {http://arxiv.org/abs/1606.09197}, eprinttype = {arXiv}, eprint = {1606.09197}, timestamp = {Mon, 13 Aug 2018 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/AkrourAAN16.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/humanoids/AbdolmalekiLRN15, author = {Abbas Abdolmaleki and Nuno Lau and Lu{\'{\i}}s Paulo Reis and Gerhard Neumann}, title = {Regularized covariance estimation for weighted maximum likelihood policy search methods}, booktitle = {15th {IEEE-RAS} International Conference on Humanoid Robots, Humanoids 2015, Seoul, South Korea, November 3-5, 2015}, pages = {154--159}, publisher = {{IEEE}}, year = {2015}, url = {https://doi.org/10.1109/HUMANOIDS.2015.7363529}, doi = {10.1109/HUMANOIDS.2015.7363529}, timestamp = {Wed, 16 Oct 2019 14:14:50 +0200}, biburl = {https://dblp.org/rec/conf/humanoids/AbdolmalekiLRN15.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/icarsc/AbdolmalekiLR0N15, author = {Abbas Abdolmaleki and Nuno Lau and Lu{\'{\i}}s Paulo Reis and Jan Peters and Gerhard Neumann}, title = {Contextual Policy Search for Generalizing a Parameterized Biped Walking Controller}, booktitle = {2015 {IEEE} International Conference on Autonomous Robot Systems and Competitions, {ICARSC} 2015, Vila Real, Portugal, April 8-10, 2015}, pages = {17--22}, publisher = {{IEEE}}, year = {2015}, url = {https://doi.org/10.1109/ICARSC.2015.43}, doi = {10.1109/ICARSC.2015.43}, timestamp = {Wed, 16 Oct 2019 14:14:57 +0200}, biburl = {https://dblp.org/rec/conf/icarsc/AbdolmalekiLR0N15.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/nips/AbdolmalekiLPLR15, author = {Abbas Abdolmaleki and Rudolf Lioutikov and Jan Peters and Nuno Lau and Lu{\'{\i}}s Paulo Reis and Gerhard Neumann}, editor = {Corinna Cortes and Neil D. Lawrence and Daniel D. Lee and Masashi Sugiyama and Roman Garnett}, title = {Model-Based Relative Entropy Stochastic Search}, booktitle = {Advances in Neural Information Processing Systems 28: Annual Conference on Neural Information Processing Systems 2015, December 7-12, 2015, Montreal, Quebec, Canada}, pages = {3537--3545}, year = {2015}, url = {https://proceedings.neurips.cc/paper/2015/hash/36ac8e558ac7690b6f44e2cb5ef93322-Abstract.html}, timestamp = {Mon, 16 May 2022 15:41:51 +0200}, biburl = {https://dblp.org/rec/conf/nips/AbdolmalekiLPLR15.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/iberamia/AbdolmalekiSRLPN14, author = {Abbas Abdolmaleki and Nima Shafii and Lu{\'{\i}}s Paulo Reis and Nuno Lau and Jan Peters and Gerhard Neumann}, editor = {Ana L. C. Bazzan and Karim Pichara}, title = {Omnidirectional Walking with a Compliant Inverted Pendulum Model}, booktitle = {Advances in Artificial Intelligence - {IBERAMIA} 2014 - 14th Ibero-American Conference on AI, Santiago de Chile, Chile, November 24-27, 2014, Proceedings}, series = {Lecture Notes in Computer Science}, volume = {8864}, pages = {481--493}, publisher = {Springer}, year = {2014}, url = {https://doi.org/10.1007/978-3-319-12027-0\_39}, doi = {10.1007/978-3-319-12027-0\_39}, timestamp = {Sun, 02 Jun 2019 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/iberamia/AbdolmalekiSRLPN14.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/epia/ShafiiAFLR13, author = {Nima Shafii and Abbas Abdolmaleki and Rui Ferreira and Nuno Lau and Lu{\'{\i}}s Paulo Reis}, editor = {Lu{\'{\i}}s Correia and Lu{\'{\i}}s Paulo Reis and Jos{\'{e}} Cascalho}, title = {Omnidirectional Walking and Active Balance for Soccer Humanoid Robot}, booktitle = {Progress in Artificial Intelligence - 16th Portuguese Conference on Artificial Intelligence, {EPIA} 2013, Angra do Hero{\'{\i}}smo, Azores, Portugal, September 9-12, 2013. Proceedings}, series = {Lecture Notes in Computer Science}, volume = {8154}, pages = {283--294}, publisher = {Springer}, year = {2013}, url = {https://doi.org/10.1007/978-3-642-40669-0\_25}, doi = {10.1007/978-3-642-40669-0\_25}, timestamp = {Thu, 20 Jan 2022 14:32:12 +0100}, biburl = {https://dblp.org/rec/conf/epia/ShafiiAFLR13.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/jtaer/AbediNA12, author = {Leila Abedi and Mohammad Ali Nematbakhsh and Abbas Abdolmaleki}, title = {A Model for Context Aware Mobile Payment}, journal = {J. Theor. Appl. Electron. Commer. Res.}, volume = {7}, number = {3}, pages = {1--10}, year = {2012}, url = {https://doi.org/10.4067/s0718-18762012000300002}, doi = {10.4067/S0718-18762012000300002}, timestamp = {Fri, 10 Jun 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/jtaer/AbediNA12.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/robocup/AbdolmalekiMLR12, author = {Abbas Abdolmaleki and Mostafa Movahedi and Nuno Lau and Lu{\'{\i}}s Paulo Reis}, editor = {Xiaoping Chen and Peter Stone and Luis Enrique Sucar and Tijn van der Zant}, title = {A Distributed Cooperative Reinforcement Learning Method for Decision Making in Fire Brigade Teams}, booktitle = {RoboCup 2012: Robot Soccer World Cup {XVI} [papers from the 16th Annual RoboCup International Symposium, Mexico City, Mexico, June 18-24, 2012]}, series = {Lecture Notes in Computer Science}, volume = {7500}, pages = {237--248}, publisher = {Springer}, year = {2012}, url = {https://doi.org/10.1007/978-3-642-39250-4\_22}, doi = {10.1007/978-3-642-39250-4\_22}, timestamp = {Tue, 20 Aug 2019 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/robocup/AbdolmalekiMLR12.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/epia/AbdolmalekiMSLR11, author = {Abbas Abdolmaleki and Mostafa Movahedi and Sajjad Salehi and Nuno Lau and Lu{\'{\i}}s Paulo Reis}, editor = {Luis Antunes and Helena Sofia Pinto}, title = {A Reinforcement Learning Based Method for Optimizing the Process of Decision Making in Fire Brigade Agents}, booktitle = {Progress in Artificial Intelligence, 15th Portuguese Conference on Artificial Intelligence, {EPIA} 2011, Lisbon, Portugal, October 10-13, 2011. Proceedings}, series = {Lecture Notes in Computer Science}, volume = {7026}, pages = {340--351}, publisher = {Springer}, year = {2011}, url = {https://doi.org/10.1007/978-3-642-24769-9\_25}, doi = {10.1007/978-3-642-24769-9\_25}, timestamp = {Sun, 02 Oct 2022 16:00:30 +0200}, biburl = {https://dblp.org/rec/conf/epia/AbdolmalekiMSLR11.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
manage site settings
To protect your privacy, all features that rely on external API calls from your browser are turned off by default. You need to opt-in for them to become active. All settings here will be stored as cookies with your web browser. For more information see our F.A.Q.