BibTeX records: Tal Ben-Nun

download as .bib file

@inproceedings{DBLP:conf/ppopp/GianinazziZHLAS24,
  author       = {Lukas Gianinazzi and
                  Alexandros Nikolaos Ziogas and
                  Langwen Huang and
                  Piotr Luczynski and
                  Saleh Ashkboosh and
                  Florian Scheidl and
                  Armon Carigiet and
                  Chio Ge and
                  Nabil Abubaker and
                  Maciej Besta and
                  Tal Ben{-}Nun and
                  Torsten Hoefler},
  editor       = {Michel Steuwer and
                  I{-}Ting Angelina Lee and
                  Milind Chabbi},
  title        = {Arrow Matrix Decomposition: {A} Novel Approach for Communication-Efficient
                  Sparse Matrix Multiplication},
  booktitle    = {Proceedings of the 29th {ACM} {SIGPLAN} Annual Symposium on Principles
                  and Practice of Parallel Programming, PPoPP 2024, Edinburgh, United
                  Kingdom, March 2-6, 2024},
  pages        = {404--416},
  publisher    = {{ACM}},
  year         = {2024},
  url          = {https://doi.org/10.1145/3627535.3638496},
  doi          = {10.1145/3627535.3638496},
  timestamp    = {Mon, 01 Apr 2024 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/ppopp/GianinazziZHLAS24.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2402-19364,
  author       = {Lukas Gianinazzi and
                  Alexandros Nikolaos Ziogas and
                  Langwen Huang and
                  Piotr Luczynski and
                  Saleh Ashkboos and
                  Florian Scheidl and
                  Armon Carigiet and
                  Chio Ge and
                  Nabil Abubaker and
                  Maciej Besta and
                  Tal Ben{-}Nun and
                  Torsten Hoefler},
  title        = {Arrow Matrix Decomposition: {A} Novel Approach for Communication-Efficient
                  Sparse Matrix Multiplication},
  journal      = {CoRR},
  volume       = {abs/2402.19364},
  year         = {2024},
  url          = {https://doi.org/10.48550/arXiv.2402.19364},
  doi          = {10.48550/ARXIV.2402.19364},
  eprinttype    = {arXiv},
  eprint       = {2402.19364},
  timestamp    = {Tue, 26 Mar 2024 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2402-19364.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/cse/DubeyBCSR23,
  author       = {Anshu Dubey and
                  Tal Ben{-}Nun and
                  Bradford L. Chamberlain and
                  Bronis R. de Supinski and
                  Damian W. I. Rouson},
  title        = {Performance on {HPC} Platforms Is Possible Without {C++}},
  journal      = {Comput. Sci. Eng.},
  volume       = {25},
  number       = {5},
  pages        = {48--52},
  year         = {2023},
  url          = {https://doi.org/10.1109/MCSE.2023.3329330},
  doi          = {10.1109/MCSE.2023.3329330},
  timestamp    = {Sat, 04 May 2024 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/cse/DubeyBCSR23.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/cgo/Ben-NunACH23,
  author       = {Tal Ben{-}Nun and
                  Berke Ates and
                  Alexandru Calotoiu and
                  Torsten Hoefler},
  editor       = {Christophe Dubach and
                  Derek Bruening and
                  Ben Hardekopf},
  title        = {Bridging Control-Centric and Data-Centric Optimization},
  booktitle    = {Proceedings of the 21st {ACM/IEEE} International Symposium on Code
                  Generation and Optimization, {CGO} 2023, Montr{\'{e}}al, QC,
                  Canada, 25 February 2023- 1 March 2023},
  pages        = {173--185},
  publisher    = {{ACM}},
  year         = {2023},
  url          = {https://doi.org/10.1145/3579990.3580018},
  doi          = {10.1145/3579990.3580018},
  timestamp    = {Sat, 30 Sep 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/cgo/Ben-NunACH23.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/ciac/BenNunGHO23,
  author       = {Tal Ben{-}Nun and
                  Lukas Gianinazzi and
                  Torsten Hoefler and
                  Yishai Oltchik},
  editor       = {Marios Mavronicolas},
  title        = {Maximum Flows in Parametric Graph Templates},
  booktitle    = {Algorithms and Complexity - 13th International Conference, {CIAC}
                  2023, Larnaca, Cyprus, June 13-16, 2023, Proceedings},
  series       = {Lecture Notes in Computer Science},
  volume       = {13898},
  pages        = {97--111},
  publisher    = {Springer},
  year         = {2023},
  url          = {https://doi.org/10.1007/978-3-031-30448-4\_8},
  doi          = {10.1007/978-3-031-30448-4\_8},
  timestamp    = {Wed, 17 May 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/ciac/BenNunGHO23.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/ics/TrumperBSCH23,
  author       = {Lukas Tr{\"{u}}mper and
                  Tal Ben{-}Nun and
                  Philipp Schaad and
                  Alexandru Calotoiu and
                  Torsten Hoefler},
  editor       = {Kyle A. Gallivan and
                  Efstratios Gallopoulos and
                  Dimitrios S. Nikolopoulos and
                  Ram{\'{o}}n Beivide},
  title        = {Performance Embeddings: {A} Similarity-Based Transfer Tuning Approach
                  to Performance Optimization},
  booktitle    = {Proceedings of the 37th International Conference on Supercomputing,
                  {ICS} 2023, Orlando, FL, USA, June 21-23, 2023},
  pages        = {50--62},
  publisher    = {{ACM}},
  year         = {2023},
  url          = {https://doi.org/10.1145/3577193.3593714},
  doi          = {10.1145/3577193.3593714},
  timestamp    = {Fri, 07 Jul 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/ics/TrumperBSCH23.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/sc/CastroIABFH23,
  author       = {Roberto L. Castro and
                  Andrei Ivanov and
                  Diego Andrade and
                  Tal Ben{-}Nun and
                  Basilio B. Fraguela and
                  Torsten Hoefler},
  editor       = {Dorian Arnold and
                  Rosa M. Badia and
                  Kathryn M. Mohror},
  title        = {{VENOM:} {A} Vectorized {N:} {M} Format for Unleashing the Power of
                  Sparse Tensor Cores},
  booktitle    = {Proceedings of the International Conference for High Performance Computing,
                  Networking, Storage and Analysis, {SC} 2023, Denver, CO, USA, November
                  12-17, 2023},
  pages        = {72:1--72:14},
  publisher    = {{ACM}},
  year         = {2023},
  url          = {https://doi.org/10.1145/3581784.3607087},
  doi          = {10.1145/3581784.3607087},
  timestamp    = {Tue, 28 Nov 2023 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/sc/CastroIABFH23.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/sc/SchaadSBCZH23,
  author       = {Philipp Schaad and
                  Timo Schneider and
                  Tal Ben{-}Nun and
                  Alexandru Calotoiu and
                  Alexandros Nikolaos Ziogas and
                  Torsten Hoefler},
  editor       = {Dorian Arnold and
                  Rosa M. Badia and
                  Kathryn M. Mohror},
  title        = {FuzzyFlow: Leveraging Dataflow To Find and Squash Program Optimization
                  Bugs},
  booktitle    = {Proceedings of the International Conference for High Performance Computing,
                  Networking, Storage and Analysis, {SC} 2023, Denver, CO, USA, November
                  12-17, 2023},
  pages        = {88:1--88:15},
  publisher    = {{ACM}},
  year         = {2023},
  url          = {https://doi.org/10.1145/3581784.3613214},
  doi          = {10.1145/3581784.3613214},
  timestamp    = {Thu, 09 Nov 2023 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/sc/SchaadSBCZH23.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2301-01048,
  author       = {Niels Gleinig and
                  Tal Ben{-}Nun and
                  Torsten Hoefler},
  title        = {A Theory of I/O-Efficient Sparse Neural Network Inference},
  journal      = {CoRR},
  volume       = {abs/2301.01048},
  year         = {2023},
  url          = {https://doi.org/10.48550/arXiv.2301.01048},
  doi          = {10.48550/ARXIV.2301.01048},
  eprinttype    = {arXiv},
  eprint       = {2301.01048},
  timestamp    = {Tue, 10 Jan 2023 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2301-01048.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2303-08142,
  author       = {Lukas Tr{\"{u}}mper and
                  Tal Ben{-}Nun and
                  Philipp Schaad and
                  Alexandru Calotoiu and
                  Torsten Hoefler},
  title        = {Performance Embeddings: {A} Similarity-based Approach to Automatic
                  Performance Optimization},
  journal      = {CoRR},
  volume       = {abs/2303.08142},
  year         = {2023},
  url          = {https://doi.org/10.48550/arXiv.2303.08142},
  doi          = {10.48550/ARXIV.2303.08142},
  eprinttype    = {arXiv},
  eprint       = {2303.08142},
  timestamp    = {Mon, 20 Mar 2023 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2303-08142.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2304-07613,
  author       = {Andrei Ivanov and
                  Nikoli Dryden and
                  Tal Ben{-}Nun and
                  Saleh Ashkboos and
                  Torsten Hoefler},
  title        = {STen: Productive and Efficient Sparsity in PyTorch},
  journal      = {CoRR},
  volume       = {abs/2304.07613},
  year         = {2023},
  url          = {https://doi.org/10.48550/arXiv.2304.07613},
  doi          = {10.48550/ARXIV.2304.07613},
  eprinttype    = {arXiv},
  eprint       = {2304.07613},
  timestamp    = {Fri, 21 Apr 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2304-07613.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2306-00366,
  author       = {Tal Ben{-}Nun and
                  Berke Ates and
                  Alexandru Calotoiu and
                  Torsten Hoefler},
  title        = {Bridging Control-Centric and Data-Centric Optimization},
  journal      = {CoRR},
  volume       = {abs/2306.00366},
  year         = {2023},
  url          = {https://doi.org/10.48550/arXiv.2306.00366},
  doi          = {10.48550/ARXIV.2306.00366},
  eprinttype    = {arXiv},
  eprint       = {2306.00366},
  timestamp    = {Mon, 12 Jun 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2306-00366.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2306-16178,
  author       = {Philipp Schaad and
                  Timo Schneider and
                  Tal Ben{-}Nun and
                  Alexandru Calotoiu and
                  Alexandros Nikolaos Ziogas and
                  Torsten Hoefler},
  title        = {FuzzyFlow: Leveraging Dataflow To Find and Squash Program Optimization
                  Bugs},
  journal      = {CoRR},
  volume       = {abs/2306.16178},
  year         = {2023},
  url          = {https://doi.org/10.48550/arXiv.2306.16178},
  doi          = {10.48550/ARXIV.2306.16178},
  eprinttype    = {arXiv},
  eprint       = {2306.16178},
  timestamp    = {Sat, 30 Sep 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2306-16178.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2307-08420,
  author       = {Tal Ben{-}Nun and
                  Lukas Gianinazzi and
                  Torsten Hoefler and
                  Yishai Oltchik},
  title        = {Maximum Flows in Parametric Graph Templates},
  journal      = {CoRR},
  volume       = {abs/2307.08420},
  year         = {2023},
  url          = {https://doi.org/10.48550/arXiv.2307.08420},
  doi          = {10.48550/ARXIV.2307.08420},
  eprinttype    = {arXiv},
  eprint       = {2307.08420},
  timestamp    = {Tue, 25 Jul 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2307-08420.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2308-12093,
  author       = {Julia Bazinska and
                  Andrei Ivanov and
                  Tal Ben{-}Nun and
                  Nikoli Dryden and
                  Maciej Besta and
                  Siyuan Shen and
                  Torsten Hoefler},
  title        = {Cached Operator Reordering: {A} Unified View for Fast {GNN} Training},
  journal      = {CoRR},
  volume       = {abs/2308.12093},
  year         = {2023},
  url          = {https://doi.org/10.48550/arXiv.2308.12093},
  doi          = {10.48550/ARXIV.2308.12093},
  eprinttype    = {arXiv},
  eprint       = {2308.12093},
  timestamp    = {Wed, 30 Aug 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2308-12093.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2309-15432,
  author       = {Aiden Grossman and
                  Ludger Paehler and
                  Konstantinos Parasyris and
                  Tal Ben{-}Nun and
                  Jacob Hegna and
                  William S. Moses and
                  Jose Manuel Monsalve Diaz and
                  Mircea Trofin and
                  Johannes Doerfert},
  title        = {ComPile: {A} Large {IR} Dataset from Production Sources},
  journal      = {CoRR},
  volume       = {abs/2309.15432},
  year         = {2023},
  url          = {https://doi.org/10.48550/arXiv.2309.15432},
  doi          = {10.48550/ARXIV.2309.15432},
  eprinttype    = {arXiv},
  eprint       = {2309.15432},
  timestamp    = {Tue, 17 Oct 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2309-15432.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2310-02065,
  author       = {Roberto L. Castro and
                  Andrei Ivanov and
                  Diego Andrade and
                  Tal Ben{-}Nun and
                  Basilio B. Fraguela and
                  Torsten Hoefler},
  title        = {{VENOM:} {A} Vectorized {N:} {M} Format for Unleashing the Power of
                  Sparse Tensor Cores},
  journal      = {CoRR},
  volume       = {abs/2310.02065},
  year         = {2023},
  url          = {https://doi.org/10.48550/arXiv.2310.02065},
  doi          = {10.48550/ARXIV.2310.02065},
  eprinttype    = {arXiv},
  eprint       = {2310.02065},
  timestamp    = {Thu, 19 Oct 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2310-02065.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/iccad/JohnsenMBLH22,
  author       = {Carl{-}Johannes Johnsen and
                  Tiziano De Matteis and
                  Tal Ben{-}Nun and
                  Johannes de Fine Licht and
                  Torsten Hoefler},
  editor       = {Tulika Mitra and
                  Evangeline F. Y. Young and
                  Jinjun Xiong},
  title        = {Temporal Vectorization: {A} Compiler Approach to Automatic Multi-Pumping},
  booktitle    = {Proceedings of the 41st {IEEE/ACM} International Conference on Computer-Aided
                  Design, {ICCAD} 2022, San Diego, California, USA, 30 October 2022
                  - 3 November 2022},
  pages        = {85:1--85:9},
  publisher    = {{ACM}},
  year         = {2022},
  url          = {https://doi.org/10.1145/3508352.3549374},
  doi          = {10.1145/3508352.3549374},
  timestamp    = {Tue, 06 Jun 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/iccad/JohnsenMBLH22.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/ics/CalotoiuBKLSSH22,
  author       = {Alexandru Calotoiu and
                  Tal Ben{-}Nun and
                  Grzegorz Kwasniewski and
                  Johannes de Fine Licht and
                  Timo Schneider and
                  Philipp Schaad and
                  Torsten Hoefler},
  editor       = {Lawrence Rauchwerger and
                  Kirk W. Cameron and
                  Dimitrios S. Nikolopoulos and
                  Dionisios N. Pnevmatikatos},
  title        = {Lifting {C} semantics for dataflow optimization},
  booktitle    = {{ICS} '22: 2022 International Conference on Supercomputing, Virtual
                  Event, June 28 - 30, 2022},
  pages        = {17:1--17:13},
  publisher    = {{ACM}},
  year         = {2022},
  url          = {https://doi.org/10.1145/3524059.3532389},
  doi          = {10.1145/3524059.3532389},
  timestamp    = {Sun, 02 Oct 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/ics/CalotoiuBKLSSH22.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/ics/RauschBDI0H22,
  author       = {Oliver Rausch and
                  Tal Ben{-}Nun and
                  Nikoli Dryden and
                  Andrei Ivanov and
                  Shigang Li and
                  Torsten Hoefler},
  editor       = {Lawrence Rauchwerger and
                  Kirk W. Cameron and
                  Dimitrios S. Nikolopoulos and
                  Dionisios N. Pnevmatikatos},
  title        = {A data-centric optimization framework for machine learning},
  booktitle    = {{ICS} '22: 2022 International Conference on Supercomputing, Virtual
                  Event, June 28 - 30, 2022},
  pages        = {36:1--36:13},
  publisher    = {{ACM}},
  year         = {2022},
  url          = {https://doi.org/10.1145/3524059.3532364},
  doi          = {10.1145/3524059.3532364},
  timestamp    = {Wed, 22 Jun 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/ics/RauschBDI0H22.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/nips/AshkboosHDBDGKH22,
  author       = {Saleh Ashkboos and
                  Langwen Huang and
                  Nikoli Dryden and
                  Tal Ben{-}Nun and
                  Peter Dueben and
                  Lukas Gianinazzi and
                  Luca Kummer and
                  Torsten Hoefler},
  editor       = {Sanmi Koyejo and
                  S. Mohamed and
                  A. Agarwal and
                  Danielle Belgrave and
                  K. Cho and
                  A. Oh},
  title        = {{ENS-10:} {A} Dataset For Post-Processing Ensemble Weather Forecasts},
  booktitle    = {Advances in Neural Information Processing Systems 35: Annual Conference
                  on Neural Information Processing Systems 2022, NeurIPS 2022, New Orleans,
                  LA, USA, November 28 - December 9, 2022},
  year         = {2022},
  url          = {http://papers.nips.cc/paper\_files/paper/2022/hash/89e44582fd28ddfea1ea4dcb0ebbf4b0-Abstract-Datasets\_and\_Benchmarks.html},
  timestamp    = {Mon, 08 Jan 2024 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/nips/AshkboosHDBDGKH22.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/sc/ZiogasKBSH22,
  author       = {Alexandros Nikolaos Ziogas and
                  Grzegorz Kwasniewski and
                  Tal Ben{-}Nun and
                  Timo Schneider and
                  Torsten Hoefler},
  editor       = {Felix Wolf and
                  Sameer Shende and
                  Candace Culhane and
                  Sadaf R. Alam and
                  Heike Jagode},
  title        = {Deinsum: Practically {I/O} Optimal Multi-Linear Algebra},
  booktitle    = {{SC22:} International Conference for High Performance Computing, Networking,
                  Storage and Analysis, Dallas, TX, USA, November 13-18, 2022},
  pages        = {25:1--25:15},
  publisher    = {{IEEE}},
  year         = {2022},
  url          = {https://doi.org/10.1109/SC41404.2022.00030},
  doi          = {10.1109/SC41404.2022.00030},
  timestamp    = {Tue, 07 May 2024 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/sc/ZiogasKBSH22.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/sc/SchaadBH22,
  author       = {Philipp Schaad and
                  Tal Ben{-}Nun and
                  Torsten Hoefler},
  editor       = {Felix Wolf and
                  Sameer Shende and
                  Candace Culhane and
                  Sadaf R. Alam and
                  Heike Jagode},
  title        = {Boosting Performance Optimization with Interactive Data Movement Visualization},
  booktitle    = {{SC22:} International Conference for High Performance Computing, Networking,
                  Storage and Analysis, Dallas, TX, USA, November 13-18, 2022},
  pages        = {64:1--64:16},
  publisher    = {{IEEE}},
  year         = {2022},
  url          = {https://doi.org/10.1109/SC41404.2022.00069},
  doi          = {10.1109/SC41404.2022.00069},
  timestamp    = {Wed, 24 May 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/sc/SchaadBH22.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/sc/BenNunGDWDDEGMTWFSH22,
  author       = {Tal Ben{-}Nun and
                  Linus Groner and
                  Florian Deconinck and
                  Tobias Wicky and
                  Eddie Davis and
                  Johann Dahm and
                  Oliver Elbert and
                  Rhea George and
                  Jeremy McGibbon and
                  Lukas Tr{\"{u}}mper and
                  Elynn Wu and
                  Oliver Fuhrer and
                  Thomas C. Schulthess and
                  Torsten Hoefler},
  editor       = {Felix Wolf and
                  Sameer Shende and
                  Candace Culhane and
                  Sadaf R. Alam and
                  Heike Jagode},
  title        = {Productive Performance Engineering for Weather and Climate Modeling
                  with Python},
  booktitle    = {{SC22:} International Conference for High Performance Computing, Networking,
                  Storage and Analysis, Dallas, TX, USA, November 13-18, 2022},
  pages        = {73:1--73:14},
  publisher    = {{IEEE}},
  year         = {2022},
  url          = {https://doi.org/10.1109/SC41404.2022.00078},
  doi          = {10.1109/SC41404.2022.00078},
  timestamp    = {Sat, 30 Sep 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/sc/BenNunGDWDDEGMTWFSH22.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2205-04148,
  author       = {Tal Ben{-}Nun and
                  Linus Groner and
                  Florian Deconinck and
                  Tobias Wicky and
                  Eddie Davis and
                  Johann Dahm and
                  Oliver Elbert and
                  Rhea George and
                  Jeremy McGibbon and
                  Lukas Tr{\"{u}}mper and
                  Elynn Wu and
                  Oliver Fuhrer and
                  Thomas C. Schulthess and
                  Torsten Hoefler},
  title        = {Productive Performance Engineering for Weather and Climate Modeling
                  with Python},
  journal      = {CoRR},
  volume       = {abs/2205.04148},
  year         = {2022},
  url          = {https://doi.org/10.48550/arXiv.2205.04148},
  doi          = {10.48550/ARXIV.2205.04148},
  eprinttype    = {arXiv},
  eprint       = {2205.04148},
  timestamp    = {Wed, 11 May 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2205-04148.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2205-04934,
  author       = {Lukas Gianinazzi and
                  Tal Ben{-}Nun and
                  Saleh Ashkboos and
                  Yves Baumann and
                  Piotr Luczynski and
                  Torsten Hoefler},
  title        = {The spatial computer: {A} model for energy-efficient parallel computation},
  journal      = {CoRR},
  volume       = {abs/2205.04934},
  year         = {2022},
  url          = {https://doi.org/10.48550/arXiv.2205.04934},
  doi          = {10.48550/ARXIV.2205.04934},
  eprinttype    = {arXiv},
  eprint       = {2205.04934},
  timestamp    = {Wed, 11 May 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2205-04934.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2206-08301,
  author       = {Alexandros Nikolaos Ziogas and
                  Grzegorz Kwasniewski and
                  Tal Ben{-}Nun and
                  Timo Schneider and
                  Torsten Hoefler},
  title        = {Deinsum: Practically {I/O} Optimal Multilinear Algebra},
  journal      = {CoRR},
  volume       = {abs/2206.08301},
  year         = {2022},
  url          = {https://doi.org/10.48550/arXiv.2206.08301},
  doi          = {10.48550/ARXIV.2206.08301},
  eprinttype    = {arXiv},
  eprint       = {2206.08301},
  timestamp    = {Tue, 21 Jun 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2206-08301.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2206-14786,
  author       = {Saleh Ashkboos and
                  Langwen Huang and
                  Nikoli Dryden and
                  Tal Ben{-}Nun and
                  Peter Dueben and
                  Lukas Gianinazzi and
                  Luca Kummer and
                  Torsten Hoefler},
  title        = {{ENS-10:} {A} Dataset For Post-Processing Ensemble Weather Forecast},
  journal      = {CoRR},
  volume       = {abs/2206.14786},
  year         = {2022},
  url          = {https://doi.org/10.48550/arXiv.2206.14786},
  doi          = {10.48550/ARXIV.2206.14786},
  eprinttype    = {arXiv},
  eprint       = {2206.14786},
  timestamp    = {Mon, 04 Jul 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2206-14786.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2207-07433,
  author       = {Philipp Schaad and
                  Tal Ben{-}Nun and
                  Torsten Hoefler},
  title        = {Boosting Performance Optimization with Interactive Data Movement Visualization},
  journal      = {CoRR},
  volume       = {abs/2207.07433},
  year         = {2022},
  url          = {https://doi.org/10.48550/arXiv.2207.07433},
  doi          = {10.48550/ARXIV.2207.07433},
  eprinttype    = {arXiv},
  eprint       = {2207.07433},
  timestamp    = {Sun, 02 Oct 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2207-07433.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2210-04598,
  author       = {Carl{-}Johannes Johnsen and
                  Tiziano De Matteis and
                  Tal Ben{-}Nun and
                  Johannes de Fine Licht and
                  Torsten Hoefler},
  title        = {Temporal Vectorization: {A} Compiler Approach to Automatic Multi-Pumping},
  journal      = {CoRR},
  volume       = {abs/2210.04598},
  year         = {2022},
  url          = {https://doi.org/10.48550/arXiv.2210.04598},
  doi          = {10.48550/ARXIV.2210.04598},
  eprinttype    = {arXiv},
  eprint       = {2210.04598},
  timestamp    = {Thu, 13 Oct 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2210-04598.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2212-13768,
  author       = {Johannes de Fine Licht and
                  Tiziano De Matteis and
                  Tal Ben{-}Nun and
                  Andreas Kuster and
                  Oliver Rausch and
                  Manuel Burger and
                  Carl{-}Johannes Johnsen and
                  Torsten Hoefler},
  title        = {Python {FPGA} Programming with Data-Centric Multi-Level Design},
  journal      = {CoRR},
  volume       = {abs/2212.13768},
  year         = {2022},
  url          = {https://doi.org/10.48550/arXiv.2212.13768},
  doi          = {10.48550/ARXIV.2212.13768},
  eprinttype    = {arXiv},
  eprint       = {2212.13768},
  timestamp    = {Tue, 07 May 2024 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2212-13768.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/jmlr/HoeflerABDP21,
  author       = {Torsten Hoefler and
                  Dan Alistarh and
                  Tal Ben{-}Nun and
                  Nikoli Dryden and
                  Alexandra Peste},
  title        = {Sparsity in Deep Learning: Pruning and growth for efficient inference
                  and training in neural networks},
  journal      = {J. Mach. Learn. Res.},
  volume       = {22},
  pages        = {241:1--241:124},
  year         = {2021},
  url          = {http://jmlr.org/papers/v22/21-0366.html},
  timestamp    = {Mon, 31 Jan 2022 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/jmlr/HoeflerABDP21.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/tpds/LiBNGDAH21,
  author       = {Shigang Li and
                  Tal Ben{-}Nun and
                  Giorgi Nadiradze and
                  Salvatore Di Girolamo and
                  Nikoli Dryden and
                  Dan Alistarh and
                  Torsten Hoefler},
  title        = {Breaking (Global) Barriers in Parallel Stochastic Optimization With
                  Wait-Avoiding Group Averaging},
  journal      = {{IEEE} Trans. Parallel Distributed Syst.},
  volume       = {32},
  number       = {7},
  pages        = {1725--1739},
  year         = {2021},
  url          = {https://doi.org/10.1109/TPDS.2020.3040606},
  doi          = {10.1109/TPDS.2020.3040606},
  timestamp    = {Tue, 02 Mar 2021 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/tpds/LiBNGDAH21.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/cgo/LichtKMBHH21,
  author       = {Johannes de Fine Licht and
                  Andreas Kuster and
                  Tiziano De Matteis and
                  Tal Ben{-}Nun and
                  Dominic Hofer and
                  Torsten Hoefler},
  editor       = {Jae W. Lee and
                  Mary Lou Soffa and
                  Ayal Zaks},
  title        = {StencilFlow: Mapping Large Stencil Programs to Distributed Spatial
                  Computing Systems},
  booktitle    = {{IEEE/ACM} International Symposium on Code Generation and Optimization,
                  {CGO} 2021, Seoul, South Korea, February 27 - March 3, 2021},
  pages        = {315--326},
  publisher    = {{IEEE}},
  year         = {2021},
  url          = {https://doi.org/10.1109/CGO51591.2021.9370315},
  doi          = {10.1109/CGO51591.2021.9370315},
  timestamp    = {Sat, 09 Apr 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/cgo/LichtKMBHH21.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/icml/CumminsFBHOL21,
  author       = {Chris Cummins and
                  Zacharias V. Fisches and
                  Tal Ben{-}Nun and
                  Torsten Hoefler and
                  Michael F. P. O'Boyle and
                  Hugh Leather},
  editor       = {Marina Meila and
                  Tong Zhang},
  title        = {ProGraML: {A} Graph-based Program Representation for Data Flow Analysis
                  and Compiler Optimizations},
  booktitle    = {Proceedings of the 38th International Conference on Machine Learning,
                  {ICML} 2021, 18-24 July 2021, Virtual Event},
  series       = {Proceedings of Machine Learning Research},
  volume       = {139},
  pages        = {2244--2253},
  publisher    = {{PMLR}},
  year         = {2021},
  url          = {http://proceedings.mlr.press/v139/cummins21a.html},
  timestamp    = {Wed, 25 Aug 2021 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/icml/CumminsFBHOL21.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/ics/ZiogasBSH21,
  author       = {Alexandros Nikolaos Ziogas and
                  Tal Ben{-}Nun and
                  Timo Schneider and
                  Torsten Hoefler},
  editor       = {Huiyang Zhou and
                  Jose Moreira and
                  Frank Mueller and
                  Yoav Etsion},
  title        = {NPBench: a benchmarking suite for high-performance NumPy},
  booktitle    = {{ICS} '21: 2021 International Conference on Supercomputing, Virtual
                  Event, USA, June 14-17, 2021},
  pages        = {63--74},
  publisher    = {{ACM}},
  year         = {2021},
  url          = {https://doi.org/10.1145/3447818.3460360},
  doi          = {10.1145/3447818.3460360},
  timestamp    = {Tue, 07 May 2024 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/ics/ZiogasBSH21.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/mlsys/IvanovDB0H21,
  author       = {Andrei Ivanov and
                  Nikoli Dryden and
                  Tal Ben{-}Nun and
                  Shigang Li and
                  Torsten Hoefler},
  editor       = {Alex Smola and
                  Alex Dimakis and
                  Ion Stoica},
  title        = {Data Movement Is All You Need: {A} Case Study on Optimizing Transformers},
  booktitle    = {Proceedings of Machine Learning and Systems 2021, MLSys 2021, virtual,
                  April 5-9, 2021},
  publisher    = {mlsys.org},
  year         = {2021},
  url          = {https://proceedings.mlsys.org/paper\_files/paper/2021/hash/bc86e95606a6392f51f95a8de106728d-Abstract.html},
  timestamp    = {Mon, 13 May 2024 16:08:52 +0200},
  biburl       = {https://dblp.org/rec/conf/mlsys/IvanovDB0H21.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/ppopp/KwasniewskiBZSB21,
  author       = {Grzegorz Kwasniewski and
                  Tal Ben{-}Nun and
                  Alexandros Nikolaos Ziogas and
                  Timo Schneider and
                  Maciej Besta and
                  Torsten Hoefler},
  editor       = {Jaejin Lee and
                  Erez Petrank},
  title        = {On the parallel {I/O} optimality of linear algebra kernels: near-optimal
                  {LU} factorization},
  booktitle    = {PPoPP '21: 26th {ACM} {SIGPLAN} Symposium on Principles and Practice
                  of Parallel Programming, Virtual Event, Republic of Korea, February
                  27- March 3, 2021},
  pages        = {463--464},
  publisher    = {{ACM}},
  year         = {2021},
  url          = {https://doi.org/10.1145/3437801.3441590},
  doi          = {10.1145/3437801.3441590},
  timestamp    = {Tue, 07 May 2024 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/ppopp/KwasniewskiBZSB21.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/sc/KwasniewskiKBZS21,
  author       = {Grzegorz Kwasniewski and
                  Marko Kabic and
                  Tal Ben{-}Nun and
                  Alexandros Nikolaos Ziogas and
                  Jens Eirik Saethre and
                  Andr{\'{e}} Gaillard and
                  Timo Schneider and
                  Maciej Besta and
                  Anton Kozhevnikov and
                  Joost VandeVondele and
                  Torsten Hoefler},
  editor       = {Bronis R. de Supinski and
                  Mary W. Hall and
                  Todd Gamblin},
  title        = {On the parallel {I/O} optimality of linear algebra kernels: near-optimal
                  matrix factorizations},
  booktitle    = {International Conference for High Performance Computing, Networking,
                  Storage and Analysis, {SC} 2021, St. Louis, Missouri, USA, November
                  14-19, 2021},
  pages        = {70},
  publisher    = {{ACM}},
  year         = {2021},
  url          = {https://doi.org/10.1145/3458817.3476167},
  doi          = {10.1145/3458817.3476167},
  timestamp    = {Tue, 07 May 2024 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/sc/KwasniewskiKBZS21.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/sc/DrydenBBH21,
  author       = {Nikoli Dryden and
                  Roman B{\"{o}}hringer and
                  Tal Ben{-}Nun and
                  Torsten Hoefler},
  editor       = {Bronis R. de Supinski and
                  Mary W. Hall and
                  Todd Gamblin},
  title        = {Clairvoyant prefetching for distributed machine learning {I/O}},
  booktitle    = {International Conference for High Performance Computing, Networking,
                  Storage and Analysis, {SC} 2021, St. Louis, Missouri, USA, November
                  14-19, 2021},
  pages        = {92},
  publisher    = {{ACM}},
  year         = {2021},
  url          = {https://doi.org/10.1145/3458817.3476181},
  doi          = {10.1145/3458817.3476181},
  timestamp    = {Tue, 08 Nov 2022 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/sc/DrydenBBH21.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/sc/ZiogasSBCMLLH21,
  author       = {Alexandros Nikolaos Ziogas and
                  Timo Schneider and
                  Tal Ben{-}Nun and
                  Alexandru Calotoiu and
                  Tiziano De Matteis and
                  Johannes de Fine Licht and
                  Luca Lavarini and
                  Torsten Hoefler},
  editor       = {Bronis R. de Supinski and
                  Mary W. Hall and
                  Todd Gamblin},
  title        = {Productivity, portability, performance: data-centric Python},
  booktitle    = {International Conference for High Performance Computing, Networking,
                  Storage and Analysis, {SC} 2021, St. Louis, Missouri, USA, November
                  14-19, 2021},
  pages        = {95},
  publisher    = {{ACM}},
  year         = {2021},
  url          = {https://doi.org/10.1145/3458817.3476176},
  doi          = {10.1145/3458817.3476176},
  timestamp    = {Tue, 07 May 2024 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/sc/ZiogasSBCMLLH21.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/spaa/KwasniewskiBGCS21,
  author       = {Grzegorz Kwasniewski and
                  Tal Ben{-}Nun and
                  Lukas Gianinazzi and
                  Alexandru Calotoiu and
                  Timo Schneider and
                  Alexandros Nikolaos Ziogas and
                  Maciej Besta and
                  Torsten Hoefler},
  editor       = {Kunal Agrawal and
                  Yossi Azar},
  title        = {Pebbles, Graphs, and a Pinch of Combinatorics: Towards Tight {I/O}
                  Lower Bounds for Statically Analyzable Programs},
  booktitle    = {{SPAA} '21: 33rd {ACM} Symposium on Parallelism in Algorithms and
                  Architectures, Virtual Event, USA, 6-8 July, 2021},
  pages        = {328--339},
  publisher    = {{ACM}},
  year         = {2021},
  url          = {https://doi.org/10.1145/3409964.3461796},
  doi          = {10.1145/3409964.3461796},
  timestamp    = {Tue, 07 May 2024 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/spaa/KwasniewskiBGCS21.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2101-08734,
  author       = {Roman B{\"{o}}hringer and
                  Nikoli Dryden and
                  Tal Ben{-}Nun and
                  Torsten Hoefler},
  title        = {Clairvoyant Prefetching for Distributed Machine Learning {I/O}},
  journal      = {CoRR},
  volume       = {abs/2101.08734},
  year         = {2021},
  url          = {https://arxiv.org/abs/2101.08734},
  eprinttype    = {arXiv},
  eprint       = {2101.08734},
  timestamp    = {Sat, 30 Jan 2021 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2101-08734.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2102-00554,
  author       = {Torsten Hoefler and
                  Dan Alistarh and
                  Tal Ben{-}Nun and
                  Nikoli Dryden and
                  Alexandra Peste},
  title        = {Sparsity in Deep Learning: Pruning and growth for efficient inference
                  and training in neural networks},
  journal      = {CoRR},
  volume       = {abs/2102.00554},
  year         = {2021},
  url          = {https://arxiv.org/abs/2102.00554},
  eprinttype    = {arXiv},
  eprint       = {2102.00554},
  timestamp    = {Tue, 09 Feb 2021 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2102-00554.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2105-07203,
  author       = {Grzegorz Kwasniewski and
                  Tal Ben{-}Nun and
                  Lukas Gianinazzi and
                  Alexandru Calotoiu and
                  Timo Schneider and
                  Alexandros Nikolaos Ziogas and
                  Maciej Besta and
                  Torsten Hoefler},
  title        = {Pebbles, Graphs, and a Pinch of Combinatorics: Towards Tight {I/O}
                  Lower Bounds for Statically Analyzable Programs},
  journal      = {CoRR},
  volume       = {abs/2105.07203},
  year         = {2021},
  url          = {https://arxiv.org/abs/2105.07203},
  eprinttype    = {arXiv},
  eprint       = {2105.07203},
  timestamp    = {Tue, 18 May 2021 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2105-07203.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2106-03594,
  author       = {Lukas Gianinazzi and
                  Maximilian Fries and
                  Nikoli Dryden and
                  Tal Ben{-}Nun and
                  Maciej Besta and
                  Torsten Hoefler},
  title        = {Learning Combinatorial Node Labeling Algorithms},
  journal      = {CoRR},
  volume       = {abs/2106.03594},
  year         = {2021},
  url          = {https://arxiv.org/abs/2106.03594},
  eprinttype    = {arXiv},
  eprint       = {2106.03594},
  timestamp    = {Thu, 15 Jul 2021 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2106-03594.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2107-00555,
  author       = {Alexandros Nikolaos Ziogas and
                  Timo Schneider and
                  Tal Ben{-}Nun and
                  Alexandru Calotoiu and
                  Tiziano De Matteis and
                  Johannes de Fine Licht and
                  Luca Lavarini and
                  Torsten Hoefler},
  title        = {Productivity, Portability, Performance: Data-Centric Python},
  journal      = {CoRR},
  volume       = {abs/2107.00555},
  year         = {2021},
  url          = {https://arxiv.org/abs/2107.00555},
  eprinttype    = {arXiv},
  eprint       = {2107.00555},
  timestamp    = {Thu, 14 Oct 2021 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2107-00555.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2108-09337,
  author       = {Grzegorz Kwasniewski and
                  Marko Kabic and
                  Tal Ben{-}Nun and
                  Alexandros Nikolaos Ziogas and
                  Jens Eirik Saethre and
                  Andr{\'{e}} Gaillard and
                  Timo Schneider and
                  Maciej Besta and
                  Anton Kozhevnikov and
                  Joost VandeVondele and
                  Torsten Hoefler},
  title        = {On the Parallel {I/O} Optimality of Linear Algebra Kernels: Near-Optimal
                  Matrix Factorizations},
  journal      = {CoRR},
  volume       = {abs/2108.09337},
  year         = {2021},
  url          = {https://arxiv.org/abs/2108.09337},
  eprinttype    = {arXiv},
  eprint       = {2108.09337},
  timestamp    = {Fri, 27 Aug 2021 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2108-09337.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2110-10802,
  author       = {Oliver Rausch and
                  Tal Ben{-}Nun and
                  Nikoli Dryden and
                  Andrei Ivanov and
                  Shigang Li and
                  Torsten Hoefler},
  title        = {A Data-Centric Optimization Framework for Machine Learning},
  journal      = {CoRR},
  volume       = {abs/2110.10802},
  year         = {2021},
  url          = {https://arxiv.org/abs/2110.10802},
  eprinttype    = {arXiv},
  eprint       = {2110.10802},
  timestamp    = {Thu, 28 Oct 2021 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2110-10802.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2112-11879,
  author       = {Alexandru Calotoiu and
                  Tal Ben{-}Nun and
                  Grzegorz Kwasniewski and
                  Johannes de Fine Licht and
                  Timo Schneider and
                  Philipp Schaad and
                  Torsten Hoefler},
  title        = {Lifting {C} Semantics for Dataflow Optimization},
  journal      = {CoRR},
  volume       = {abs/2112.11879},
  year         = {2021},
  url          = {https://arxiv.org/abs/2112.11879},
  eprinttype    = {arXiv},
  eprint       = {2112.11879},
  timestamp    = {Tue, 04 Jan 2022 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2112-11879.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/topc/Ben-NunSPP20,
  author       = {Tal Ben{-}Nun and
                  Michael Sutton and
                  Sreepathi Pai and
                  Keshav Pingali},
  title        = {Groute: Asynchronous Multi-GPU Programming Model with Applications
                  to Large-scale Graph Processing},
  journal      = {{ACM} Trans. Parallel Comput.},
  volume       = {7},
  number       = {3},
  pages        = {18:1--18:27},
  year         = {2020},
  url          = {https://doi.org/10.1145/3399730},
  doi          = {10.1145/3399730},
  timestamp    = {Mon, 03 Jan 2022 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/topc/Ben-NunSPP20.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/trets/BestaFBSLH20,
  author       = {Maciej Besta and
                  Marc Fischer and
                  Tal Ben{-}Nun and
                  Dimitri Stanojevic and
                  Johannes de Fine Licht and
                  Torsten Hoefler},
  title        = {Substream-Centric Maximum Matchings on {FPGA}},
  journal      = {{ACM} Trans. Reconfigurable Technol. Syst.},
  volume       = {13},
  number       = {2},
  pages        = {8:1--8:33},
  year         = {2020},
  url          = {https://doi.org/10.1145/3377871},
  doi          = {10.1145/3377871},
  timestamp    = {Fri, 10 Jul 2020 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/trets/BestaFBSLH20.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/cvpr/HofferBHGHS20,
  author       = {Elad Hoffer and
                  Tal Ben{-}Nun and
                  Itay Hubara and
                  Niv Giladi and
                  Torsten Hoefler and
                  Daniel Soudry},
  title        = {Augment Your Batch: Improving Generalization Through Instance Repetition},
  booktitle    = {2020 {IEEE/CVF} Conference on Computer Vision and Pattern Recognition,
                  {CVPR} 2020, Seattle, WA, USA, June 13-19, 2020},
  pages        = {8126--8135},
  publisher    = {Computer Vision Foundation / {IEEE}},
  year         = {2020},
  url          = {https://openaccess.thecvf.com/content\_CVPR\_2020/html/Hoffer\_Augment\_Your\_Batch\_Improving\_Generalization\_Through\_Instance\_Repetition\_CVPR\_2020\_paper.html},
  doi          = {10.1109/CVPR42600.2020.00815},
  timestamp    = {Tue, 31 Aug 2021 14:00:04 +0200},
  biburl       = {https://dblp.org/rec/conf/cvpr/HofferBHGHS20.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/ppopp/LiBGAH20,
  author       = {Shigang Li and
                  Tal Ben{-}Nun and
                  Salvatore Di Girolamo and
                  Dan Alistarh and
                  Torsten Hoefler},
  editor       = {Rajiv Gupta and
                  Xipeng Shen},
  title        = {Taming unbalanced training workloads in deep learning with partial
                  collective operations},
  booktitle    = {PPoPP '20: 25th {ACM} {SIGPLAN} Symposium on Principles and Practice
                  of Parallel Programming, San Diego, California, USA, February 22-26,
                  2020},
  pages        = {45--61},
  publisher    = {{ACM}},
  year         = {2020},
  url          = {https://doi.org/10.1145/3332466.3374528},
  doi          = {10.1145/3332466.3374528},
  timestamp    = {Sun, 12 Jun 2022 19:46:08 +0200},
  biburl       = {https://dblp.org/rec/conf/ppopp/LiBGAH20.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/sc/Ben-NunGHKN20,
  author       = {Tal Ben{-}Nun and
                  Todd Gamblin and
                  Daisy S. Hollman and
                  Hari Krishnan and
                  Chris J. Newburn},
  title        = {Workflows are the New Applications: Challenges in Performance, Portability,
                  and Productivity},
  booktitle    = {{IEEE/ACM} International Workshop on Performance, Portability and
                  Productivity in HPC, P3HPC@SC 2020, Atlanta, GA, USA, November 13,
                  2020},
  pages        = {57--69},
  publisher    = {{IEEE}},
  year         = {2020},
  url          = {https://doi.org/10.1109/P3HPC51967.2020.00011},
  doi          = {10.1109/P3HPC51967.2020.00011},
  timestamp    = {Wed, 20 Oct 2021 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/sc/Ben-NunGHKN20.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2003-10536,
  author       = {Chris Cummins and
                  Zacharias V. Fisches and
                  Tal Ben{-}Nun and
                  Torsten Hoefler and
                  Hugh Leather},
  title        = {ProGraML: Graph-based Deep Learning for Program Optimization and Analysis},
  journal      = {CoRR},
  volume       = {abs/2003.10536},
  year         = {2020},
  url          = {https://arxiv.org/abs/2003.10536},
  eprinttype    = {arXiv},
  eprint       = {2003.10536},
  timestamp    = {Wed, 01 Apr 2020 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2003-10536.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2005-00124,
  author       = {Shigang Li and
                  Tal Ben{-}Nun and
                  Dan Alistarh and
                  Salvatore Di Girolamo and
                  Nikoli Dryden and
                  Torsten Hoefler},
  title        = {Breaking (Global) Barriers in Parallel Stochastic Optimization with
                  Wait-Avoiding Group Averaging},
  journal      = {CoRR},
  volume       = {abs/2005.00124},
  year         = {2020},
  url          = {https://arxiv.org/abs/2005.00124},
  eprinttype    = {arXiv},
  eprint       = {2005.00124},
  timestamp    = {Fri, 08 May 2020 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2005-00124.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2005-08748,
  author       = {Peter Gr{\"{o}}nquist and
                  Chengyuan Yao and
                  Tal Ben{-}Nun and
                  Nikoli Dryden and
                  Peter Dueben and
                  Shigang Li and
                  Torsten Hoefler},
  title        = {Deep Learning for Post-Processing Ensemble Weather Forecasts},
  journal      = {CoRR},
  volume       = {abs/2005.08748},
  year         = {2020},
  url          = {https://arxiv.org/abs/2005.08748},
  eprinttype    = {arXiv},
  eprint       = {2005.08748},
  timestamp    = {Fri, 22 May 2020 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2005-08748.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2007-00072,
  author       = {Andrei Ivanov and
                  Nikoli Dryden and
                  Tal Ben{-}Nun and
                  Shigang Li and
                  Torsten Hoefler},
  title        = {Data Movement Is All You Need: {A} Case Study on Optimizing Transformers},
  journal      = {CoRR},
  volume       = {abs/2007.00072},
  year         = {2020},
  url          = {https://arxiv.org/abs/2007.00072},
  eprinttype    = {arXiv},
  eprint       = {2007.00072},
  timestamp    = {Mon, 06 Jul 2020 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2007-00072.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2010-05975,
  author       = {Grzegorz Kwasniewski and
                  Tal Ben{-}Nun and
                  Alexandros Nikolaos Ziogas and
                  Timo Schneider and
                  Maciej Besta and
                  Torsten Hoefler},
  title        = {On the Parallel {I/O} Optimality of Linear Algebra Kernels: Near-Optimal
                  {LU} Factorization},
  journal      = {CoRR},
  volume       = {abs/2010.05975},
  year         = {2020},
  url          = {https://arxiv.org/abs/2010.05975},
  eprinttype    = {arXiv},
  eprint       = {2010.05975},
  timestamp    = {Tue, 20 Oct 2020 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2010-05975.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2010-14684,
  author       = {Maciej Besta and
                  Marc Fischer and
                  Tal Ben{-}Nun and
                  Dimitri Stanojevic and
                  Johannes de Fine Licht and
                  Torsten Hoefler},
  title        = {Substream-Centric Maximum Matchings on {FPGA}},
  journal      = {CoRR},
  volume       = {abs/2010.14684},
  year         = {2020},
  url          = {https://arxiv.org/abs/2010.14684},
  eprinttype    = {arXiv},
  eprint       = {2010.14684},
  timestamp    = {Mon, 02 Nov 2020 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2010-14684.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2010-15218,
  author       = {Johannes de Fine Licht and
                  Andreas Kuster and
                  Tiziano De Matteis and
                  Tal Ben{-}Nun and
                  Dominic Hofer and
                  Torsten Hoefler},
  title        = {StencilFlow: Mapping Large Stencil Programs to Distributed Spatial
                  Computing Systems},
  journal      = {CoRR},
  volume       = {abs/2010.15218},
  year         = {2020},
  url          = {https://arxiv.org/abs/2010.15218},
  eprinttype    = {arXiv},
  eprint       = {2010.15218},
  timestamp    = {Tue, 03 Nov 2020 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2010-15218.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2011-07001,
  author       = {Tal Ben{-}Nun and
                  Lukas Gianinazzi and
                  Torsten Hoefler and
                  Yishai Oltchik},
  title        = {Parametric Graph Templates: Properties and Algorithms},
  journal      = {CoRR},
  volume       = {abs/2011.07001},
  year         = {2020},
  url          = {https://arxiv.org/abs/2011.07001},
  eprinttype    = {arXiv},
  eprint       = {2011.07001},
  timestamp    = {Wed, 18 Nov 2020 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2011-07001.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2012-01470,
  author       = {Chris Cummins and
                  Hugh Leather and
                  Zacharias V. Fisches and
                  Tal Ben{-}Nun and
                  Torsten Hoefler and
                  Michael F. P. O'Boyle},
  title        = {Deep Data Flow Analysis},
  journal      = {CoRR},
  volume       = {abs/2012.01470},
  year         = {2020},
  url          = {https://arxiv.org/abs/2012.01470},
  eprinttype    = {arXiv},
  eprint       = {2012.01470},
  timestamp    = {Fri, 04 Dec 2020 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2012-01470.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/csur/Ben-NunH19,
  author       = {Tal Ben{-}Nun and
                  Torsten Hoefler},
  title        = {Demystifying Parallel and Distributed Deep Learning: An In-depth Concurrency
                  Analysis},
  journal      = {{ACM} Comput. Surv.},
  volume       = {52},
  number       = {4},
  pages        = {65:1--65:43},
  year         = {2019},
  url          = {https://doi.org/10.1145/3320060},
  doi          = {10.1145/3320060},
  timestamp    = {Sat, 08 Jan 2022 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/csur/Ben-NunH19.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/fpga/BestaFBLH19,
  author       = {Maciej Besta and
                  Marc Fischer and
                  Tal Ben{-}Nun and
                  Johannes de Fine Licht and
                  Torsten Hoefler},
  editor       = {Kia Bazargan and
                  Stephen Neuendorffer},
  title        = {Substream-Centric Maximum Matchings on {FPGA}},
  booktitle    = {Proceedings of the 2019 {ACM/SIGDA} International Symposium on Field-Programmable
                  Gate Arrays, {FPGA} 2019, Seaside, CA, USA, February 24-26, 2019},
  pages        = {152--161},
  publisher    = {{ACM}},
  year         = {2019},
  url          = {https://doi.org/10.1145/3289602.3293916},
  doi          = {10.1145/3289602.3293916},
  timestamp    = {Sun, 25 Jul 2021 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/fpga/BestaFBLH19.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/ipps/Ben-NunBHZPH19,
  author       = {Tal Ben{-}Nun and
                  Maciej Besta and
                  Simon Huber and
                  Alexandros Nikolaos Ziogas and
                  Daniel Peter and
                  Torsten Hoefler},
  title        = {A Modular Benchmarking Infrastructure for High-Performance and Reproducible
                  Deep Learning},
  booktitle    = {2019 {IEEE} International Parallel and Distributed Processing Symposium,
                  {IPDPS} 2019, Rio de Janeiro, Brazil, May 20-24, 2019},
  pages        = {66--77},
  publisher    = {{IEEE}},
  year         = {2019},
  url          = {https://doi.org/10.1109/IPDPS.2019.00018},
  doi          = {10.1109/IPDPS.2019.00018},
  timestamp    = {Tue, 07 May 2024 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/ipps/Ben-NunBHZPH19.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/sc/ZiogasBFSLH19,
  author       = {Alexandros Nikolaos Ziogas and
                  Tal Ben{-}Nun and
                  Guillermo Indalecio Fern{\'{a}}ndez and
                  Timo Schneider and
                  Mathieu Luisier and
                  Torsten Hoefler},
  editor       = {Michela Taufer and
                  Pavan Balaji and
                  Antonio J. Pe{\~{n}}a},
  title        = {A data-centric approach to extreme-scale \emph{ab initio} dissipative
                  quantum transport simulations},
  booktitle    = {Proceedings of the International Conference for High Performance Computing,
                  Networking, Storage and Analysis, {SC} 2019, Denver, Colorado, USA,
                  November 17-19, 2019},
  pages        = {1:1--1:13},
  publisher    = {{ACM}},
  year         = {2019},
  url          = {https://doi.org/10.1145/3295500.3357156},
  doi          = {10.1145/3295500.3357156},
  timestamp    = {Tue, 07 May 2024 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/sc/ZiogasBFSLH19.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/sc/ZiogasBFSLH19a,
  author       = {Alexandros Nikolaos Ziogas and
                  Tal Ben{-}Nun and
                  Guillermo Indalecio Fern{\'{a}}ndez and
                  Timo Schneider and
                  Mathieu Luisier and
                  Torsten Hoefler},
  editor       = {Michela Taufer and
                  Pavan Balaji and
                  Antonio J. Pe{\~{n}}a},
  title        = {Optimizing the data movement in quantum transport simulations via
                  data-centric parallel programming},
  booktitle    = {Proceedings of the International Conference for High Performance Computing,
                  Networking, Storage and Analysis, {SC} 2019, Denver, Colorado, USA,
                  November 17-19, 2019},
  pages        = {78:1--78:17},
  publisher    = {{ACM}},
  year         = {2019},
  url          = {https://doi.org/10.1145/3295500.3356200},
  doi          = {10.1145/3295500.3356200},
  timestamp    = {Tue, 07 May 2024 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/sc/ZiogasBFSLH19a.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/sc/Ben-NunLZSH19,
  author       = {Tal Ben{-}Nun and
                  Johannes de Fine Licht and
                  Alexandros Nikolaos Ziogas and
                  Timo Schneider and
                  Torsten Hoefler},
  editor       = {Michela Taufer and
                  Pavan Balaji and
                  Antonio J. Pe{\~{n}}a},
  title        = {Stateful dataflow multigraphs: a data-centric model for performance
                  portability on heterogeneous architectures},
  booktitle    = {Proceedings of the International Conference for High Performance Computing,
                  Networking, Storage and Analysis, {SC} 2019, Denver, Colorado, USA,
                  November 17-19, 2019},
  pages        = {81:1--81:14},
  publisher    = {{ACM}},
  year         = {2019},
  url          = {https://doi.org/10.1145/3295500.3356173},
  doi          = {10.1145/3295500.3356173},
  timestamp    = {Tue, 07 May 2024 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/sc/Ben-NunLZSH19.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-1901-09335,
  author       = {Elad Hoffer and
                  Tal Ben{-}Nun and
                  Itay Hubara and
                  Niv Giladi and
                  Torsten Hoefler and
                  Daniel Soudry},
  title        = {Augment your batch: better training with larger batches},
  journal      = {CoRR},
  volume       = {abs/1901.09335},
  year         = {2019},
  url          = {http://arxiv.org/abs/1901.09335},
  eprinttype    = {arXiv},
  eprint       = {1901.09335},
  timestamp    = {Sat, 02 Feb 2019 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/abs-1901-09335.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-1901-10183,
  author       = {Tal Ben{-}Nun and
                  Maciej Besta and
                  Simon Huber and
                  Alexandros Nikolaos Ziogas and
                  Daniel Peter and
                  Torsten Hoefler},
  title        = {A Modular Benchmarking Infrastructure for High-Performance and Reproducible
                  Deep Learning},
  journal      = {CoRR},
  volume       = {abs/1901.10183},
  year         = {2019},
  url          = {http://arxiv.org/abs/1901.10183},
  eprinttype    = {arXiv},
  eprint       = {1901.10183},
  timestamp    = {Sat, 02 Feb 2019 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/abs-1901-10183.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-1902-10345,
  author       = {Tal Ben{-}Nun and
                  Johannes de Fine Licht and
                  Alexandros Nikolaos Ziogas and
                  Timo Schneider and
                  Torsten Hoefler},
  title        = {Stateful Dataflow Multigraphs: {A} Data-Centric Model for High-Performance
                  Parallel Programs},
  journal      = {CoRR},
  volume       = {abs/1902.10345},
  year         = {2019},
  url          = {http://arxiv.org/abs/1902.10345},
  eprinttype    = {arXiv},
  eprint       = {1902.10345},
  timestamp    = {Tue, 21 May 2019 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-1902-10345.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-1903-06697,
  author       = {Maciej Besta and
                  Dimitri Stanojevic and
                  Johannes de Fine Licht and
                  Tal Ben{-}Nun and
                  Torsten Hoefler},
  title        = {Graph Processing on FPGAs: Taxonomy, Survey, Challenges},
  journal      = {CoRR},
  volume       = {abs/1903.06697},
  year         = {2019},
  url          = {http://arxiv.org/abs/1903.06697},
  eprinttype    = {arXiv},
  eprint       = {1903.06697},
  timestamp    = {Mon, 01 Apr 2019 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-1903-06697.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-1908-04207,
  author       = {Shigang Li and
                  Tal Ben{-}Nun and
                  Salvatore Di Girolamo and
                  Dan Alistarh and
                  Torsten Hoefler},
  title        = {Taming Unbalanced Training Workloads in Deep Learning with Partial
                  Collective Operations},
  journal      = {CoRR},
  volume       = {abs/1908.04207},
  year         = {2019},
  url          = {http://arxiv.org/abs/1908.04207},
  eprinttype    = {arXiv},
  eprint       = {1908.04207},
  timestamp    = {Mon, 19 Aug 2019 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-1908-04207.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-1908-08986,
  author       = {Elad Hoffer and
                  Berry Weinstein and
                  Itay Hubara and
                  Tal Ben{-}Nun and
                  Torsten Hoefler and
                  Daniel Soudry},
  title        = {Mix {\&} Match: training convnets with mixed image sizes for improved
                  accuracy, speed and scale resiliency},
  journal      = {CoRR},
  volume       = {abs/1908.08986},
  year         = {2019},
  url          = {http://arxiv.org/abs/1908.08986},
  eprinttype    = {arXiv},
  eprint       = {1908.08986},
  timestamp    = {Thu, 29 Aug 2019 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-1908-08986.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-1911-00630,
  author       = {Peter Gr{\"{o}}nquist and
                  Tal Ben{-}Nun and
                  Nikoli Dryden and
                  Peter Dueben and
                  Luca Lavarini and
                  Shigang Li and
                  Torsten Hoefler},
  title        = {Predicting Weather Uncertainty with Deep Convnets},
  journal      = {CoRR},
  volume       = {abs/1911.00630},
  year         = {2019},
  url          = {http://arxiv.org/abs/1911.00630},
  eprinttype    = {arXiv},
  eprint       = {1911.00630},
  timestamp    = {Mon, 11 Nov 2019 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/abs-1911-00630.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-1912-08810,
  author       = {Alexandros Nikolaos Ziogas and
                  Tal Ben{-}Nun and
                  Guillermo Indalecio Fern{\'{a}}ndez and
                  Timo Schneider and
                  Mathieu Luisier and
                  Torsten Hoefler},
  title        = {Optimizing the Data Movement in Quantum Transport Simulations via
                  Data-Centric Parallel Programming},
  journal      = {CoRR},
  volume       = {abs/1912.08810},
  year         = {2019},
  url          = {http://arxiv.org/abs/1912.08810},
  eprinttype    = {arXiv},
  eprint       = {1912.08810},
  timestamp    = {Fri, 03 Jan 2020 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/abs-1912-08810.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-1912-10024,
  author       = {Alexandros Nikolaos Ziogas and
                  Tal Ben{-}Nun and
                  Guillermo Indalecio Fern{\'{a}}ndez and
                  Timo Schneider and
                  Mathieu Luisier and
                  Torsten Hoefler},
  title        = {A Data-Centric Approach to Extreme-Scale Ab initio Dissipative Quantum
                  Transport Simulations},
  journal      = {CoRR},
  volume       = {abs/1912.10024},
  year         = {2019},
  url          = {http://arxiv.org/abs/1912.10024},
  eprinttype    = {arXiv},
  eprint       = {1912.10024},
  timestamp    = {Fri, 03 Jan 2020 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/abs-1912-10024.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/cluster/OyamaBHM18,
  author       = {Yosuke Oyama and
                  Tal Ben{-}Nun and
                  Torsten Hoefler and
                  Satoshi Matsuoka},
  title        = {Accelerating Deep Learning Frameworks with Micro-Batches},
  booktitle    = {{IEEE} International Conference on Cluster Computing, {CLUSTER} 2018,
                  Belfast, UK, September 10-13, 2018},
  pages        = {402--412},
  publisher    = {{IEEE} Computer Society},
  year         = {2018},
  url          = {https://doi.org/10.1109/CLUSTER.2018.00058},
  doi          = {10.1109/CLUSTER.2018.00058},
  timestamp    = {Thu, 23 Mar 2023 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/cluster/OyamaBHM18.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/ipps/0001BB18,
  author       = {Michael Sutton and
                  Tal Ben{-}Nun and
                  Amnon Barak},
  title        = {Optimizing Parallel Graph Connectivity Computation via Subgraph Sampling},
  booktitle    = {2018 {IEEE} International Parallel and Distributed Processing Symposium,
                  {IPDPS} 2018, Vancouver, BC, Canada, May 21-25, 2018},
  pages        = {12--21},
  publisher    = {{IEEE} Computer Society},
  year         = {2018},
  url          = {https://doi.org/10.1109/IPDPS.2018.00012},
  doi          = {10.1109/IPDPS.2018.00012},
  timestamp    = {Fri, 24 Mar 2023 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/ipps/0001BB18.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/nips/Ben-NunJH18,
  author       = {Tal Ben{-}Nun and
                  Alice Shoshana Jakobovits and
                  Torsten Hoefler},
  editor       = {Samy Bengio and
                  Hanna M. Wallach and
                  Hugo Larochelle and
                  Kristen Grauman and
                  Nicol{\`{o}} Cesa{-}Bianchi and
                  Roman Garnett},
  title        = {Neural Code Comprehension: {A} Learnable Representation of Code Semantics},
  booktitle    = {Advances in Neural Information Processing Systems 31: Annual Conference
                  on Neural Information Processing Systems 2018, NeurIPS 2018, December
                  3-8, 2018, Montr{\'{e}}al, Canada},
  pages        = {3589--3601},
  year         = {2018},
  url          = {https://proceedings.neurips.cc/paper/2018/hash/17c3433fecc21b57000debdf7ad5c930-Abstract.html},
  timestamp    = {Mon, 16 May 2022 15:41:51 +0200},
  biburl       = {https://dblp.org/rec/conf/nips/Ben-NunJH18.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-1802-09941,
  author       = {Tal Ben{-}Nun and
                  Torsten Hoefler},
  title        = {Demystifying Parallel and Distributed Deep Learning: An In-Depth Concurrency
                  Analysis},
  journal      = {CoRR},
  volume       = {abs/1802.09941},
  year         = {2018},
  url          = {http://arxiv.org/abs/1802.09941},
  eprinttype    = {arXiv},
  eprint       = {1802.09941},
  timestamp    = {Mon, 13 Aug 2018 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-1802-09941.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-1804-04806,
  author       = {Yosuke Oyama and
                  Tal Ben{-}Nun and
                  Torsten Hoefler and
                  Satoshi Matsuoka},
  title        = {{\(\mu\)}-cuDNN: Accelerating Deep Learning Frameworks with Micro-Batching},
  journal      = {CoRR},
  volume       = {abs/1804.04806},
  year         = {2018},
  url          = {http://arxiv.org/abs/1804.04806},
  eprinttype    = {arXiv},
  eprint       = {1804.04806},
  timestamp    = {Mon, 13 Aug 2018 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-1804-04806.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-1806-07336,
  author       = {Tal Ben{-}Nun and
                  Alice Shoshana Jakobovits and
                  Torsten Hoefler},
  title        = {Neural Code Comprehension: {A} Learnable Representation of Code Semantics},
  journal      = {CoRR},
  volume       = {abs/1806.07336},
  year         = {2018},
  url          = {http://arxiv.org/abs/1806.07336},
  eprinttype    = {arXiv},
  eprint       = {1806.07336},
  timestamp    = {Mon, 13 Aug 2018 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-1806-07336.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/damon/KarnagelBWHL17,
  author       = {Tomas Karnagel and
                  Tal Ben{-}Nun and
                  Matthias Werner and
                  Dirk Habich and
                  Wolfgang Lehner},
  title        = {Big data causing big {(TLB)} problems: taming random memory accesses
                  on the {GPU}},
  booktitle    = {Proceedings of the 13th International Workshop on Data Management
                  on New Hardware, DaMoN 2017, Chicago, IL, USA, May 15, 2017},
  pages        = {6:1--6:10},
  publisher    = {{ACM}},
  year         = {2017},
  url          = {https://doi.org/10.1145/3076113.3076115},
  doi          = {10.1145/3076113.3076115},
  timestamp    = {Tue, 06 Nov 2018 16:58:57 +0100},
  biburl       = {https://dblp.org/rec/conf/damon/KarnagelBWHL17.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/ppopp/Ben-NunSPP17,
  author       = {Tal Ben{-}Nun and
                  Michael Sutton and
                  Sreepathi Pai and
                  Keshav Pingali},
  editor       = {Vivek Sarkar and
                  Lawrence Rauchwerger},
  title        = {Groute: An Asynchronous Multi-GPU Programming Model for Irregular
                  Computations},
  booktitle    = {Proceedings of the 22nd {ACM} {SIGPLAN} Symposium on Principles and
                  Practice of Parallel Programming, Austin, TX, USA, February 4-8, 2017},
  pages        = {235--248},
  publisher    = {{ACM}},
  year         = {2017},
  url          = {https://doi.org/10.1145/3018743.3018756},
  doi          = {10.1145/3018743.3018756},
  timestamp    = {Sun, 12 Jun 2022 19:46:08 +0200},
  biburl       = {https://dblp.org/rec/conf/ppopp/Ben-NunSPP17.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@phdthesis{DBLP:phd/il/Ben-Nun16,
  author       = {Tal Ben{-}Nun},
  title        = {Memory-Oriented Programming : {A} Data-Centric Programming Model for
                  Systems with Multiple Parallel Accelerators ({\unicode{1513}}{\unicode{1506}}{\unicode{1512}}
                  {\unicode{1504}}{\unicode{1493}}{\unicode{1505}}{\unicode{1507}} {\unicode{1489}}{\unicode{1506}}{\unicode{1489}}{\unicode{1512}}{\unicode{1497}}{\unicode{1514}}:
                  {\unicode{1514}}{\unicode{1499}}{\unicode{1504}}{\unicode{1493}}{\unicode{1514}}
                  {\unicode{1502}}{\unicode{1493}}{\unicode{1504}}{\unicode{1495}}{\unicode{1492}}
                  {\unicode{1494}}{\unicode{1497}}{\unicode{1499}}{\unicode{1512}}{\unicode{1493}}{\unicode{1503}}
                  : {\unicode{1502}}{\unicode{1493}}{\unicode{1491}}{\unicode{1500}}
                  {\unicode{1514}}{\unicode{1499}}{\unicode{1504}}{\unicode{1493}}{\unicode{1514}}
                  {\unicode{1506}}{\unicode{1489}}{\unicode{1493}}{\unicode{1512}} {\unicode{1502}}{\unicode{1506}}{\unicode{1512}}{\unicode{1499}}{\unicode{1493}}{\unicode{1514}}
                  {\unicode{1502}}{\unicode{1512}}{\unicode{1493}}{\unicode{1489}}{\unicode{1493}}{\unicode{1514}}
                  {\unicode{1502}}{\unicode{1488}}{\unicode{1497}}{\unicode{1510}}{\unicode{1497}}{\unicode{1501}}
                  {\unicode{1502}}{\unicode{1511}}{\unicode{1489}}{\unicode{1497}}{\unicode{1500}}{\unicode{1497}}{\unicode{1497}}{\unicode{1501}}.)},
  school       = {Hebrew University of Jerusalem, Israel},
  year         = {2016},
  url          = {https://huji-primo.hosted.exlibrisgroup.com/permalink/f/13ns5ae/972HUJI\_ALMA21220818810003701},
  timestamp    = {Wed, 14 Dec 2022 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/phd/il/Ben-Nun16.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/jcisd/GinsburgBASRR16,
  author       = {Avi Ginsburg and
                  Tal Ben{-}Nun and
                  Roi Asor and
                  Asaf Shemesh and
                  Israel Ringel and
                  Uri Raviv},
  title        = {Reciprocal Grids: {A} Hierarchical Algorithm for Computing Solution
                  X-ray Scattering Curves from Supramolecular Complexes at High Resolution},
  journal      = {J. Chem. Inf. Model.},
  volume       = {56},
  number       = {8},
  pages        = {1518--1527},
  year         = {2016},
  url          = {https://doi.org/10.1021/acs.jcim.6b00159},
  doi          = {10.1021/ACS.JCIM.6B00159},
  timestamp    = {Fri, 06 Mar 2020 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/jcisd/GinsburgBASRR16.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/jpdc/Ben-NunBR16,
  author       = {Tal Ben{-}Nun and
                  Amnon Barak and
                  Uri Raviv},
  title        = {Spline-based parallel nonlinear optimization of function sequences},
  journal      = {J. Parallel Distributed Comput.},
  volume       = {93-94},
  pages        = {132--145},
  year         = {2016},
  url          = {https://doi.org/10.1016/j.jpdc.2016.04.011},
  doi          = {10.1016/J.JPDC.2016.04.011},
  timestamp    = {Sat, 22 Feb 2020 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/jpdc/Ben-NunBR16.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@incollection{DBLP:series/lncse/WeinholdLBKPHSLBBSSFRLN16,
  author       = {Carsten Weinhold and
                  Adam Lackorzynski and
                  Jan Bierbaum and
                  Martin K{\"{u}}ttler and
                  Maksym Planeta and
                  Hermann H{\"{a}}rtig and
                  Amnon Shiloh and
                  Ely Levy and
                  Tal Ben{-}Nun and
                  Amnon Barak and
                  Thomas Steinke and
                  Thorsten Sch{\"{u}}tt and
                  Jan Fajerski and
                  Alexander Reinefeld and
                  Matthias Lieber and
                  Wolfgang E. Nagel},
  editor       = {Hans{-}Joachim Bungartz and
                  Philipp Neumann and
                  Wolfgang E. Nagel},
  title        = {{FFMK:} {A} Fast and Fault-Tolerant Microkernel-Based System for Exascale
                  Computing},
  booktitle    = {Software for Exascale Computing - {SPPEXA} 2013-2015},
  series       = {Lecture Notes in Computational Science and Engineering},
  volume       = {113},
  pages        = {405--426},
  publisher    = {Springer},
  year         = {2016},
  url          = {https://doi.org/10.1007/978-3-319-40528-5\_18},
  doi          = {10.1007/978-3-319-40528-5\_18},
  timestamp    = {Thu, 14 Oct 2021 08:45:21 +0200},
  biburl       = {https://dblp.org/rec/series/lncse/WeinholdLBKPHSLBBSSFRLN16.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/SuttonBBPP16,
  author       = {Michael Sutton and
                  Tal Ben{-}Nun and
                  Amnon Barak and
                  Sreepathi Pai and
                  Keshav Pingali},
  title        = {Adaptive Work-Efficient Connected Components on the {GPU}},
  journal      = {CoRR},
  volume       = {abs/1612.01178},
  year         = {2016},
  url          = {http://arxiv.org/abs/1612.01178},
  eprinttype    = {arXiv},
  eprint       = {1612.01178},
  timestamp    = {Mon, 13 Aug 2018 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/SuttonBBPP16.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/sc/Ben-NunLBR15,
  author       = {Tal Ben{-}Nun and
                  Ely Levy and
                  Amnon Barak and
                  Eri Rubin},
  editor       = {Jackie Kern and
                  Jeffrey S. Vetter},
  title        = {Memory access patterns: the missing piece of the multi-GPU puzzle},
  booktitle    = {Proceedings of the International Conference for High Performance Computing,
                  Networking, Storage and Analysis, {SC} 2015, Austin, TX, USA, November
                  15-20, 2015},
  pages        = {19:1--19:12},
  publisher    = {{ACM}},
  year         = {2015},
  url          = {https://doi.org/10.1145/2807591.2807611},
  doi          = {10.1145/2807591.2807611},
  timestamp    = {Tue, 06 Nov 2018 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/sc/Ben-NunLBR15.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/taco/RubinLBB14,
  author       = {Eri Rubin and
                  Ely Levy and
                  Amnon Barak and
                  Tal Ben{-}Nun},
  title        = {{MAPS:} Optimizing Massively Parallel Applications Using Device-Level
                  Memory Abstraction},
  journal      = {{ACM} Trans. Archit. Code Optim.},
  volume       = {11},
  number       = {4},
  pages        = {44:1--44:22},
  year         = {2014},
  url          = {https://doi.org/10.1145/2680544},
  doi          = {10.1145/2680544},
  timestamp    = {Wed, 17 Feb 2021 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/taco/RubinLBB14.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/systor/Ben-NunEF10,
  author       = {Tal Ben{-}Nun and
                  Yoav Etsion and
                  Dror G. Feitelson},
  editor       = {Gadi Haber and
                  Dilma Da Silva and
                  Ethan L. Miller},
  title        = {Design and implementation of a generic resource sharing virtual time
                  dispatcher},
  booktitle    = {Proceedings of of {SYSTOR} 2010: The 3rd Annual Haifa Experimental
                  Systems Conference, Haifa, Israel, May 24-26, 2010},
  series       = {{ACM} International Conference Proceeding Series},
  publisher    = {{ACM}},
  year         = {2010},
  url          = {https://doi.org/10.1145/1815695.1815700},
  doi          = {10.1145/1815695.1815700},
  timestamp    = {Tue, 06 Nov 2018 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/systor/Ben-NunEF10.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/ipps/EtsionBF09,
  author       = {Yoav Etsion and
                  Tal Ben{-}Nun and
                  Dror G. Feitelson},
  title        = {A global scheduling framework for virtualization environments},
  booktitle    = {23rd {IEEE} International Symposium on Parallel and Distributed Processing,
                  {IPDPS} 2009, Rome, Italy, May 23-29, 2009},
  pages        = {1--8},
  publisher    = {{IEEE}},
  year         = {2009},
  url          = {https://doi.org/10.1109/IPDPS.2009.5161228},
  doi          = {10.1109/IPDPS.2009.5161228},
  timestamp    = {Fri, 24 Mar 2023 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/ipps/EtsionBF09.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
a service of  Schloss Dagstuhl - Leibniz Center for Informatics