BibTeX records: Georg Hager

download as .bib file

@article{DBLP:journals/fgcs/AfzalHMW23,
  author       = {Ayesha Afzal and
                  Georg Hager and
                  Stefano Markidis and
                  Gerhard Wellein},
  title        = {Making applications faster by asynchronous execution: Slowing down
                  processes or relaxing {MPI} collectives},
  journal      = {Future Gener. Comput. Syst.},
  volume       = {148},
  pages        = {472--487},
  year         = {2023},
  url          = {https://doi.org/10.1016/j.future.2023.06.017},
  doi          = {10.1016/J.FUTURE.2023.06.017},
  timestamp    = {Thu, 31 Aug 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/fgcs/AfzalHMW23.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/fgcs/MachadoELHKW23,
  author       = {Rafael Ravedutti Lucio Machado and
                  Jan Eitzinger and
                  Jan Laukemann and
                  Georg Hager and
                  Harald K{\"{o}}stler and
                  Gerhard Wellein},
  title        = {MD-Bench: {A} performance-focused prototyping harness for state-of-the-art
                  short-range molecular dynamics algorithms},
  journal      = {Future Gener. Comput. Syst.},
  volume       = {149},
  pages        = {25--38},
  year         = {2023},
  url          = {https://doi.org/10.1016/j.future.2023.06.023},
  doi          = {10.1016/J.FUTURE.2023.06.023},
  timestamp    = {Thu, 26 Oct 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/fgcs/MachadoELHKW23.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/jpdc/ErnstHHKW23,
  author       = {Dominik Ernst and
                  Markus Holzer and
                  Georg Hager and
                  Matthias Knorr and
                  Gerhard Wellein},
  title        = {Analytical performance estimation during code generation on modern
                  GPUs},
  journal      = {J. Parallel Distributed Comput.},
  volume       = {173},
  pages        = {152--167},
  year         = {2023},
  url          = {https://doi.org/10.1016/j.jpdc.2022.11.003},
  doi          = {10.1016/J.JPDC.2022.11.003},
  timestamp    = {Mon, 05 Feb 2024 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/jpdc/ErnstHHKW23.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/topc/AlvermannHF23,
  author       = {Andreas Alvermann and
                  Georg Hager and
                  Holger Fehske},
  title        = {Orthogonal Layers of Parallelism in Large-Scale Eigenvalue Computations},
  journal      = {{ACM} Trans. Parallel Comput.},
  volume       = {10},
  number       = {3},
  pages        = {16:1--16:31},
  year         = {2023},
  url          = {https://doi.org/10.1145/3614444},
  doi          = {10.1145/3614444},
  timestamp    = {Fri, 27 Oct 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/topc/AlvermannHF23.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/tpds/AlappatHSW23,
  author       = {Christie L. Alappat and
                  Georg Hager and
                  Olaf Schenk and
                  Gerhard Wellein},
  title        = {Level-Based Blocking for Sparse Matrices: Sparse Matrix-Power-Vector
                  Multiplication},
  journal      = {{IEEE} Trans. Parallel Distributed Syst.},
  volume       = {34},
  number       = {2},
  pages        = {581--597},
  year         = {2023},
  url          = {https://doi.org/10.1109/TPDS.2022.3223512},
  doi          = {10.1109/TPDS.2022.3223512},
  timestamp    = {Thu, 27 Jul 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/tpds/AlappatHSW23.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/tpds/AfzalHW23,
  author       = {Ayesha Afzal and
                  Georg Hager and
                  Gerhard Wellein},
  title        = {The Role of Idle Waves, Desynchronization, and Bottleneck Evasion
                  in the Performance of Parallel Programs},
  journal      = {{IEEE} Trans. Parallel Distributed Syst.},
  volume       = {34},
  number       = {2},
  pages        = {623--638},
  year         = {2023},
  url          = {https://doi.org/10.1109/TPDS.2022.3221085},
  doi          = {10.1109/TPDS.2022.3221085},
  timestamp    = {Fri, 10 Feb 2023 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/tpds/AfzalHW23.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/sc/AfzalHW23,
  author       = {Ayesha Afzal and
                  Georg Hager and
                  Gerhard Wellein},
  title        = {Physical Oscillator Model for Supercomputing},
  booktitle    = {Proceedings of the {SC} '23 Workshops of The International Conference
                  on High Performance Computing, Network, Storage, and Analysis, {SC-W}
                  2023, Denver, CO, USA, November 12-17, 2023},
  pages        = {1229--1235},
  publisher    = {{ACM}},
  year         = {2023},
  url          = {https://doi.org/10.1145/3624062.3625535},
  doi          = {10.1145/3624062.3625535},
  timestamp    = {Tue, 28 Nov 2023 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/sc/AfzalHW23.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/sc/AfzalHW23a,
  author       = {Ayesha Afzal and
                  Georg Hager and
                  Gerhard Wellein},
  title        = {SPEChpc 2021 Benchmarks on Ice Lake and Sapphire Rapids Infiniband
                  Clusters: {A} Performance and Energy Case Study},
  booktitle    = {Proceedings of the {SC} '23 Workshops of The International Conference
                  on High Performance Computing, Network, Storage, and Analysis, {SC-W}
                  2023, Denver, CO, USA, November 12-17, 2023},
  pages        = {1245--1254},
  publisher    = {{ACM}},
  year         = {2023},
  url          = {https://doi.org/10.1145/3624062.3624197},
  doi          = {10.1145/3624062.3624197},
  timestamp    = {Tue, 28 Nov 2023 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/sc/AfzalHW23a.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/wosp/Hager23,
  author       = {Georg Hager},
  editor       = {Marco Vieira and
                  Valeria Cardellini and
                  Antinisca Di Marco and
                  Petr Tuma},
  title        = {Application Knowledge Required: Performance Modeling for Fun and Profit},
  booktitle    = {Proceedings of the 2023 {ACM/SPEC} International Conference on Performance
                  Engineering, {ICPE} 2023, Coimbra, Portugal, April 15-19, 2023},
  pages        = {5},
  publisher    = {{ACM}},
  year         = {2023},
  url          = {https://doi.org/10.1145/3578244.3585384},
  doi          = {10.1145/3578244.3585384},
  timestamp    = {Sat, 29 Apr 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/wosp/Hager23.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/wosp/LaukemannH23,
  author       = {Jan Laukemann and
                  Georg Hager},
  editor       = {Marco Vieira and
                  Valeria Cardellini and
                  Antinisca Di Marco and
                  Petr Tuma},
  title        = {Core-Level Performance Engineering with the Open-Source Architecture
                  Code Analyzer {(OSACA)} and the Compiler Explorer},
  booktitle    = {Companion of the 2023 {ACM/SPEC} International Conference on Performance
                  Engineering, {ICPE} 2023, Coimbra, Portugal, April 15-19, 2023},
  pages        = {127--131},
  publisher    = {{ACM}},
  year         = {2023},
  url          = {https://doi.org/10.1145/3578245.3583716},
  doi          = {10.1145/3578245.3583716},
  timestamp    = {Sat, 29 Apr 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/wosp/LaukemannH23.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2302-12164,
  author       = {Ayesha Afzal and
                  Georg Hager and
                  Stefano Markidis and
                  Gerhard Wellein},
  title        = {Making Applications Faster by Asynchronous Execution: Slowing Down
                  Processes or Relaxing {MPI} Collectives},
  journal      = {CoRR},
  volume       = {abs/2302.12164},
  year         = {2023},
  url          = {https://doi.org/10.48550/arXiv.2302.12164},
  doi          = {10.48550/ARXIV.2302.12164},
  eprinttype    = {arXiv},
  eprint       = {2302.12164},
  timestamp    = {Tue, 28 Feb 2023 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2302-12164.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2302-14660,
  author       = {Rafael Ravedutti Lucio Machado and
                  Jan Eitzinger and
                  Jan Laukemann and
                  Georg Hager and
                  Harald K{\"{o}}stler and
                  Gerhard Wellein},
  title        = {MD-Bench: Engineering the in-core performance of short-range molecular
                  dynamics kernels from state-of-the-art simulation packages},
  journal      = {CoRR},
  volume       = {abs/2302.14660},
  year         = {2023},
  url          = {https://doi.org/10.48550/arXiv.2302.14660},
  doi          = {10.48550/ARXIV.2302.14660},
  eprinttype    = {arXiv},
  eprint       = {2302.14660},
  timestamp    = {Thu, 02 Mar 2023 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2302-14660.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2309-02228,
  author       = {Christie L. Alappat and
                  Jonas Thies and
                  Georg Hager and
                  Holger Fehske and
                  Gerhard Wellein},
  title        = {Algebraic Temporal Blocking for Sparse Iterative Solvers on Multi-Core
                  CPUs},
  journal      = {CoRR},
  volume       = {abs/2309.02228},
  year         = {2023},
  url          = {https://doi.org/10.48550/arXiv.2309.02228},
  doi          = {10.48550/ARXIV.2309.02228},
  eprinttype    = {arXiv},
  eprint       = {2309.02228},
  timestamp    = {Mon, 11 Sep 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2309-02228.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2309-05373,
  author       = {Ayesha Afzal and
                  Georg Hager and
                  Gerhard Wellein},
  title        = {SPEChpc 2021 Benchmarks on Ice Lake and Sapphire Rapids Infiniband
                  Clusters: {A} Performance and Energy Case Study},
  journal      = {CoRR},
  volume       = {abs/2309.05373},
  year         = {2023},
  url          = {https://doi.org/10.48550/arXiv.2309.05373},
  doi          = {10.48550/ARXIV.2309.05373},
  eprinttype    = {arXiv},
  eprint       = {2309.05373},
  timestamp    = {Fri, 15 Sep 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2309-05373.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2310-05701,
  author       = {Ayesha Afzal and
                  Georg Hager and
                  Gerhard Wellein},
  title        = {Physical Oscillator Model for Supercomputing},
  journal      = {CoRR},
  volume       = {abs/2310.05701},
  year         = {2023},
  url          = {https://doi.org/10.48550/arXiv.2310.05701},
  doi          = {10.48550/ARXIV.2310.05701},
  eprinttype    = {arXiv},
  eprint       = {2310.05701},
  timestamp    = {Tue, 24 Oct 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2310-05701.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2311-04797,
  author       = {Jan Laukemann and
                  Thomas Gruber and
                  Georg Hager and
                  Dossay Oryspayev and
                  Gerhard Wellein},
  title        = {CloverLeaf on Intel Multi-Core CPUs: {A} Case Study in Write-Allocate
                  Evasion},
  journal      = {CoRR},
  volume       = {abs/2311.04797},
  year         = {2023},
  url          = {https://doi.org/10.48550/arXiv.2311.04797},
  doi          = {10.48550/ARXIV.2311.04797},
  eprinttype    = {arXiv},
  eprint       = {2311.04797},
  timestamp    = {Tue, 14 Nov 2023 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2311-04797.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/concurrency/AfzalHW22,
  author       = {Ayesha Afzal and
                  Georg Hager and
                  Gerhard Wellein},
  title        = {Analytic performance model for parallel overlapping memory-bound kernels},
  journal      = {Concurr. Comput. Pract. Exp.},
  volume       = {34},
  number       = {10},
  year         = {2022},
  url          = {https://doi.org/10.1002/cpe.6816},
  doi          = {10.1002/CPE.6816},
  timestamp    = {Sun, 02 Oct 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/concurrency/AfzalHW22.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/concurrency/AlappatMLGHWW22,
  author       = {Christie L. Alappat and
                  Nils Meyer and
                  Jan Laukemann and
                  Thomas Gruber and
                  Georg Hager and
                  Gerhard Wellein and
                  Tilo Wettig},
  title        = {Execution-Cache-Memory modeling and performance tuning of sparse matrix-vector
                  multiplication and Lattice quantum chromodynamics on {A64FX}},
  journal      = {Concurr. Comput. Pract. Exp.},
  volume       = {34},
  number       = {20},
  year         = {2022},
  url          = {https://doi.org/10.1002/cpe.6512},
  doi          = {10.1002/CPE.6512},
  timestamp    = {Tue, 12 Sep 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/concurrency/AlappatMLGHWW22.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/pads/AfzalWH22,
  author       = {Ayesha Afzal and
                  Gerhard Wellein and
                  Georg Hager},
  editor       = {Kalyan Perumalla and
                  Margaret Loper and
                  Dong (Kevin) Jin and
                  Christopher D. Carothers},
  title        = {Addressing White-box Modeling and Simulation Challenges in Parallel
                  Computing},
  booktitle    = {{SIGSIM-PADS} '22: {SIGSIM} Conference on Principles of Advanced Discrete
                  Simulation, Atlanta, GA, USA, June 8 - 10, 2022},
  pages        = {25--26},
  publisher    = {{ACM}},
  year         = {2022},
  url          = {https://doi.org/10.1145/3518997.3534986},
  doi          = {10.1145/3518997.3534986},
  timestamp    = {Tue, 28 Jun 2022 11:47:06 +0200},
  biburl       = {https://dblp.org/rec/conf/pads/AfzalWH22.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/ppam/AfzalHWM22,
  author       = {Ayesha Afzal and
                  Georg Hager and
                  Gerhard Wellein and
                  Stefano Markidis},
  editor       = {Roman Wyrzykowski and
                  Jack J. Dongarra and
                  Ewa Deelman and
                  Konrad Karczewski},
  title        = {Exploring Techniques for the Analysis of Spontaneous Asynchronicity
                  in MPI-Parallel Applications},
  booktitle    = {Parallel Processing and Applied Mathematics - 14th International Conference,
                  {PPAM} 2022, Gdansk, Poland, September 11-14, 2022, Revised Selected
                  Papers, Part {I}},
  series       = {Lecture Notes in Computer Science},
  volume       = {13826},
  pages        = {155--170},
  publisher    = {Springer},
  year         = {2022},
  url          = {https://doi.org/10.1007/978-3-031-30442-2\_12},
  doi          = {10.1007/978-3-031-30442-2\_12},
  timestamp    = {Wed, 17 May 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/ppam/AfzalHWM22.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2204-14242,
  author       = {Dominik Ernst and
                  Markus Holzer and
                  Georg Hager and
                  Matthias Knorr and
                  Gerhard Wellein},
  title        = {Analytical Performance Estimation during Code Generation on Modern
                  GPUs},
  journal      = {CoRR},
  volume       = {abs/2204.14242},
  year         = {2022},
  url          = {https://doi.org/10.48550/arXiv.2204.14242},
  doi          = {10.48550/ARXIV.2204.14242},
  eprinttype    = {arXiv},
  eprint       = {2204.14242},
  timestamp    = {Mon, 02 May 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2204-14242.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2205-01598,
  author       = {Christie L. Alappat and
                  Georg Hager and
                  Olaf Schenk and
                  Gerhard Wellein},
  title        = {Level-based Blocking for Sparse Matrices: Sparse Matrix-Power-Vector
                  Multiplication},
  journal      = {CoRR},
  volume       = {abs/2205.01598},
  year         = {2022},
  url          = {https://doi.org/10.48550/arXiv.2205.01598},
  doi          = {10.48550/ARXIV.2205.01598},
  eprinttype    = {arXiv},
  eprint       = {2205.01598},
  timestamp    = {Thu, 05 May 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2205-01598.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2205-04190,
  author       = {Ayesha Afzal and
                  Georg Hager and
                  Gerhard Wellein},
  title        = {The Role of Idle Waves, Desynchronization, and Bottleneck Evasion
                  in the Performance of Parallel Programs},
  journal      = {CoRR},
  volume       = {abs/2205.04190},
  year         = {2022},
  url          = {https://doi.org/10.48550/arXiv.2205.04190},
  doi          = {10.48550/ARXIV.2205.04190},
  eprinttype    = {arXiv},
  eprint       = {2205.04190},
  timestamp    = {Wed, 11 May 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2205-04190.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2205-13963,
  author       = {Ayesha Afzal and
                  Georg Hager and
                  Gerhard Wellein and
                  Stefano Markidis},
  title        = {Exploring Techniques for the Analysis of Spontaneous Asynchronicity
                  in MPI-Parallel Applications},
  journal      = {CoRR},
  volume       = {abs/2205.13963},
  year         = {2022},
  url          = {https://doi.org/10.48550/arXiv.2205.13963},
  doi          = {10.48550/ARXIV.2205.13963},
  eprinttype    = {arXiv},
  eprint       = {2205.13963},
  timestamp    = {Tue, 31 May 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2205-13963.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2209-01974,
  author       = {Andreas Alvermann and
                  Georg Hager and
                  Holger Fehske},
  title        = {Orthogonal layers of parallelism in large-scale eigenvalue computations},
  journal      = {CoRR},
  volume       = {abs/2209.01974},
  year         = {2022},
  url          = {https://doi.org/10.48550/arXiv.2209.01974},
  doi          = {10.48550/ARXIV.2209.01974},
  eprinttype    = {arXiv},
  eprint       = {2209.01974},
  timestamp    = {Mon, 26 Sep 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2209-01974.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/ijhpca/ErnstHTW21,
  author       = {Dominik Ernst and
                  Georg Hager and
                  Jonas Thies and
                  Gerhard Wellein},
  title        = {Performance engineering for real and complex tall {\&} skinny
                  matrix multiplication kernels on GPUs},
  journal      = {Int. J. High Perform. Comput. Appl.},
  volume       = {35},
  number       = {1},
  year         = {2021},
  url          = {https://doi.org/10.1177/1094342020965661},
  doi          = {10.1177/1094342020965661},
  timestamp    = {Mon, 28 Aug 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/ijhpca/ErnstHTW21.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/ijhpca/PieperHF21,
  author       = {Andreas Pieper and
                  Georg Hager and
                  Holger Fehske},
  title        = {A domain-specific language and matrix-free stencil code for investigating
                  electronic properties of Dirac and topological materials},
  journal      = {Int. J. High Perform. Comput. Appl.},
  volume       = {35},
  number       = {1},
  year         = {2021},
  url          = {https://doi.org/10.1177/1094342020959423},
  doi          = {10.1177/1094342020959423},
  timestamp    = {Thu, 29 Apr 2021 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/ijhpca/PieperHF21.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/cgo/AlappatSHKRW21,
  author       = {Christie L. Alappat and
                  Johannes Seiferth and
                  Georg Hager and
                  Matthias Korch and
                  Thomas Rauber and
                  Gerhard Wellein},
  editor       = {Jae W. Lee and
                  Mary Lou Soffa and
                  Ayal Zaks},
  title        = {YaskSite: Stencil Optimization Techniques Applied to Explicit {ODE}
                  Methods on Modern Architectures},
  booktitle    = {{IEEE/ACM} International Symposium on Code Generation and Optimization,
                  {CGO} 2021, Seoul, South Korea, February 27 - March 3, 2021},
  pages        = {174--186},
  publisher    = {{IEEE}},
  year         = {2021},
  url          = {https://doi.org/10.1109/CGO51591.2021.9370316},
  doi          = {10.1109/CGO51591.2021.9370316},
  timestamp    = {Thu, 14 Oct 2021 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/cgo/AlappatSHKRW21.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/sbac-pad/ErnstHKWH21,
  author       = {Dominik Ernst and
                  Georg Hager and
                  Matthias Knorr and
                  Gerhard Wellein and
                  Markus Holzer},
  title        = {Opening the Black Box: Performance Estimation during Code Generation
                  for GPUs},
  booktitle    = {33rd {IEEE} International Symposium on Computer Architecture and High
                  Performance Computing, {SBAC-PAD} 2021, Belo Horizonte, Brazil, October
                  26-29, 2021},
  pages        = {22--32},
  publisher    = {{IEEE}},
  year         = {2021},
  url          = {https://doi.org/10.1109/SBAC-PAD53543.2021.00014},
  doi          = {10.1109/SBAC-PAD53543.2021.00014},
  timestamp    = {Sun, 02 Oct 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/sbac-pad/ErnstHKWH21.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/supercomputer/AfzalHW21,
  author       = {Ayesha Afzal and
                  Georg Hager and
                  Gerhard Wellein},
  editor       = {Bradford L. Chamberlain and
                  Ana Lucia Varbanescu and
                  Hatem Ltaief and
                  Piotr Luszczek},
  title        = {Analytic Modeling of Idle Waves in Parallel Programs: Communication,
                  Cluster Topology, and Noise Impact},
  booktitle    = {High Performance Computing - 36th International Conference, {ISC}
                  High Performance 2021, Virtual Event, June 24 - July 2, 2021, Proceedings},
  series       = {Lecture Notes in Computer Science},
  volume       = {12728},
  pages        = {351--371},
  publisher    = {Springer},
  year         = {2021},
  url          = {https://doi.org/10.1007/978-3-030-78713-4\_19},
  doi          = {10.1007/978-3-030-78713-4\_19},
  timestamp    = {Sun, 02 Oct 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/supercomputer/AfzalHW21.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2103-03013,
  author       = {Christie L. Alappat and
                  Nils Meyer and
                  Jan Laukemann and
                  Thomas Gruber and
                  Georg Hager and
                  Gerhard Wellein and
                  Tilo Wettig},
  title        = {{ECM} modeling and performance tuning of SpMV and Lattice {QCD} on
                  {A64FX}},
  journal      = {CoRR},
  volume       = {abs/2103.03013},
  year         = {2021},
  url          = {https://arxiv.org/abs/2103.03013},
  eprinttype    = {arXiv},
  eprint       = {2103.03013},
  timestamp    = {Mon, 15 Mar 2021 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2103-03013.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2103-03175,
  author       = {Ayesha Afzal and
                  Georg Hager and
                  Gerhard Wellein},
  title        = {Analytic Modeling of Idle Waves in Parallel Programs: Communication,
                  Cluster Topology, and Noise Impact},
  journal      = {CoRR},
  volume       = {abs/2103.03175},
  year         = {2021},
  url          = {https://arxiv.org/abs/2103.03175},
  eprinttype    = {arXiv},
  eprint       = {2103.03175},
  timestamp    = {Thu, 14 Oct 2021 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2103-03175.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2107-01143,
  author       = {Dominik Ernst and
                  Georg Hager and
                  Markus Holzer and
                  Matthias Knorr and
                  Gerhard Wellein},
  title        = {Opening the Black Box: Performance Estimation during Code Generation
                  for GPUs},
  journal      = {CoRR},
  volume       = {abs/2107.01143},
  year         = {2021},
  url          = {https://arxiv.org/abs/2107.01143},
  eprinttype    = {arXiv},
  eprint       = {2107.01143},
  timestamp    = {Wed, 23 Feb 2022 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2107-01143.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/ijhpca/CremonesiHWS20,
  author       = {Francesco Cremonesi and
                  Georg Hager and
                  Gerhard Wellein and
                  Felix Sch{\"{u}}rmann},
  title        = {Analytic performance modeling and analysis of detailed neuron simulations},
  journal      = {Int. J. High Perform. Comput. Appl.},
  volume       = {34},
  number       = {4},
  year         = {2020},
  url          = {https://doi.org/10.1177/1094342020912528},
  doi          = {10.1177/1094342020912528},
  timestamp    = {Sun, 02 Oct 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/ijhpca/CremonesiHWS20.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/superfri/0001AHFW20,
  author       = {Johannes Hofmann and
                  Christie L. Alappat and
                  Georg Hager and
                  Dietmar Fey and
                  Gerhard Wellein},
  title        = {Bridging the Architecture Gap: Abstracting Performance-Relevant Properties
                  of Modern Server Processors},
  journal      = {Supercomput. Front. Innov.},
  volume       = {7},
  number       = {2},
  pages        = {54--78},
  year         = {2020},
  url          = {https://doi.org/10.14529/jsfi200204},
  doi          = {10.14529/JSFI200204},
  timestamp    = {Sun, 02 Oct 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/superfri/0001AHFW20.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/toms/ThiesROBEHW20,
  author       = {Jonas Thies and
                  Melven R{\"{o}}hrig{-}Z{\"{o}}llner and
                  Nigel Overmars and
                  Achim Basermann and
                  Dominik Ernst and
                  Georg Hager and
                  Gerhard Wellein},
  title        = {{PHIST:} {A} Pipelined, Hybrid-Parallel Iterative Solver Toolkit},
  journal      = {{ACM} Trans. Math. Softw.},
  volume       = {46},
  number       = {4},
  pages        = {31:1--31:26},
  year         = {2020},
  url          = {https://doi.org/10.1145/3402227},
  doi          = {10.1145/3402227},
  timestamp    = {Sun, 02 Oct 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/toms/ThiesROBEHW20.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/topc/AlappatBBFHSTW20,
  author       = {Christie L. Alappat and
                  Achim Basermann and
                  Alan R. Bishop and
                  Holger Fehske and
                  Georg Hager and
                  Olaf Schenk and
                  Jonas Thies and
                  Gerhard Wellein},
  title        = {A Recursive Algebraic Coloring Technique for Hardware-efficient Symmetric
                  Sparse Matrix-vector Multiplication},
  journal      = {{ACM} Trans. Parallel Comput.},
  volume       = {7},
  number       = {3},
  pages        = {19:1--19:37},
  year         = {2020},
  url          = {https://doi.org/10.1145/3399732},
  doi          = {10.1145/3399732},
  timestamp    = {Mon, 28 Aug 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/topc/AlappatBBFHSTW20.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/pmbs-ws/AlappatLGHWMW20,
  author       = {Christie L. Alappat and
                  Jan Laukemann and
                  Thomas Gruber and
                  Georg Hager and
                  Gerhard Wellein and
                  Nils Meyer and
                  Tilo Wettig},
  title        = {Performance Modeling of Streaming Kernels and Sparse Matrix-Vector
                  Multiplication on {A64FX}},
  booktitle    = {2020 {IEEE/ACM} Performance Modeling, Benchmarking and Simulation
                  of High Performance Computer Systems, PMBS@SC 2020, Atlanta, GA, USA,
                  November 12, 2020},
  pages        = {1--7},
  publisher    = {{IEEE}},
  year         = {2020},
  url          = {https://doi.org/10.1109/PMBS51919.2020.00006},
  doi          = {10.1109/PMBS51919.2020.00006},
  timestamp    = {Thu, 14 Oct 2021 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/pmbs-ws/AlappatLGHWMW20.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/supercomputer/AfzalHW20,
  author       = {Ayesha Afzal and
                  Georg Hager and
                  Gerhard Wellein},
  editor       = {Ponnuswamy Sadayappan and
                  Bradford L. Chamberlain and
                  Guido Juckeland and
                  Hatem Ltaief},
  title        = {Desynchronization and Wave Pattern Formation in MPI-Parallel and Hybrid
                  Memory-Bound Programs},
  booktitle    = {High Performance Computing - 35th International Conference, {ISC}
                  High Performance 2020, Frankfurt/Main, Germany, June 22-25, 2020,
                  Proceedings},
  series       = {Lecture Notes in Computer Science},
  volume       = {12151},
  pages        = {391--411},
  publisher    = {Springer},
  year         = {2020},
  url          = {https://doi.org/10.1007/978-3-030-50743-5\_20},
  doi          = {10.1007/978-3-030-50743-5\_20},
  timestamp    = {Sun, 02 Oct 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/supercomputer/AfzalHW20.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/supercomputer/Alappat0HFBW20,
  author       = {Christie L. Alappat and
                  Johannes Hofmann and
                  Georg Hager and
                  Holger Fehske and
                  Alan R. Bishop and
                  Gerhard Wellein},
  editor       = {Ponnuswamy Sadayappan and
                  Bradford L. Chamberlain and
                  Guido Juckeland and
                  Hatem Ltaief},
  title        = {Understanding {HPC} Benchmark Performance on Intel Broadwell and Cascade
                  Lake Processors},
  booktitle    = {High Performance Computing - 35th International Conference, {ISC}
                  High Performance 2020, Frankfurt/Main, Germany, June 22-25, 2020,
                  Proceedings},
  series       = {Lecture Notes in Computer Science},
  volume       = {12151},
  pages        = {412--433},
  publisher    = {Springer},
  year         = {2020},
  url          = {https://doi.org/10.1007/978-3-030-50743-5\_21},
  doi          = {10.1007/978-3-030-50743-5\_21},
  timestamp    = {Sun, 02 Oct 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/supercomputer/Alappat0HFBW20.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@incollection{DBLP:series/lncse/AlappatABFFGHHIKKLNRSSTW20,
  author       = {Christie L. Alappat and
                  Andreas Alvermann and
                  Achim Basermann and
                  Holger Fehske and
                  Yasunori Futamura and
                  Martin Galgon and
                  Georg Hager and
                  Sarah Huber and
                  Akira Imakura and
                  Masatoshi Kawai and
                  Moritz Kreutzer and
                  Bruno Lang and
                  Kengo Nakajima and
                  Melven R{\"{o}}hrig{-}Z{\"{o}}llner and
                  Tetsuya Sakurai and
                  Faisal Shahzad and
                  Jonas Thies and
                  Gerhard Wellein},
  editor       = {Hans{-}Joachim Bungartz and
                  Severin Reiz and
                  Benjamin Uekermann and
                  Philipp Neumann and
                  Wolfgang E. Nagel},
  title        = {{ESSEX:} Equipping Sparse Solvers For Exascale},
  booktitle    = {Software for Exascale Computing - {SPPEXA} 2016-2019},
  series       = {Lecture Notes in Computational Science and Engineering},
  volume       = {136},
  pages        = {143--187},
  publisher    = {Springer},
  year         = {2020},
  url          = {https://doi.org/10.1007/978-3-030-47956-5\_7},
  doi          = {10.1007/978-3-030-47956-5\_7},
  timestamp    = {Tue, 15 Aug 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/series/lncse/AlappatABFFGHHIKKLNRSSTW20.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2002-02989,
  author       = {Ayesha Afzal and
                  Georg Hager and
                  Gerhard Wellein},
  title        = {Desynchronization and Wave Pattern Formation in MPI-Parallel and Hybrid
                  Memory-Bound Programs},
  journal      = {CoRR},
  volume       = {abs/2002.02989},
  year         = {2020},
  url          = {https://arxiv.org/abs/2002.02989},
  eprinttype    = {arXiv},
  eprint       = {2002.02989},
  timestamp    = {Wed, 12 Feb 2020 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2002-02989.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2002-03344,
  author       = {Christie L. Alappat and
                  Johannes Hofmann and
                  Georg Hager and
                  Holger Fehske and
                  Alan R. Bishop and
                  Gerhard Wellein},
  title        = {Understanding {HPC} Benchmark Performance on Intel Broadwell and Cascade
                  Lake Processors},
  journal      = {CoRR},
  volume       = {abs/2002.03344},
  year         = {2020},
  url          = {https://arxiv.org/abs/2002.03344},
  eprinttype    = {arXiv},
  eprint       = {2002.03344},
  timestamp    = {Wed, 12 Feb 2020 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2002-03344.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2009-13903,
  author       = {Christie L. Alappat and
                  Jan Laukemann and
                  Thomas Gruber and
                  Georg Hager and
                  Gerhard Wellein and
                  Nils Meyer and
                  Tilo Wettig},
  title        = {Performance Modeling of Streaming Kernels and Sparse Matrix-Vector
                  Multiplication on {A64FX}},
  journal      = {CoRR},
  volume       = {abs/2009.13903},
  year         = {2020},
  url          = {https://arxiv.org/abs/2009.13903},
  eprinttype    = {arXiv},
  eprint       = {2009.13903},
  timestamp    = {Wed, 30 Sep 2020 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2009-13903.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2011-00243,
  author       = {Ayesha Afzal and
                  Georg Hager and
                  Gerhard Wellein},
  title        = {An analytic performance model for overlapping execution of memory-bound
                  loop kernels on multicore CPUs},
  journal      = {CoRR},
  volume       = {abs/2011.00243},
  year         = {2020},
  url          = {https://arxiv.org/abs/2011.00243},
  eprinttype    = {arXiv},
  eprint       = {2011.00243},
  timestamp    = {Thu, 14 Oct 2021 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2011-00243.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/superfri/HornichHHGW19,
  author       = {Julian Hornich and
                  Julian Hammer and
                  Georg Hager and
                  Thomas Gruber and
                  Gerhard Wellein},
  title        = {Collecting and Presenting Reproducible Intranode Stencil Performance:
                  {INSPECT}},
  journal      = {Supercomput. Front. Innov.},
  volume       = {6},
  number       = {3},
  pages        = {4--25},
  year         = {2019},
  url          = {https://doi.org/10.14529/jsfi190301},
  doi          = {10.14529/JSFI190301},
  timestamp    = {Fri, 11 Dec 2020 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/superfri/HornichHHGW19.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/tpds/ShahzadTKZHW19,
  author       = {Faisal Shahzad and
                  Jonas Thies and
                  Moritz Kreutzer and
                  Thomas Zeiser and
                  Georg Hager and
                  Gerhard Wellein},
  title        = {{CRAFT:} {A} Library for Easier Application-Level Checkpoint/Restart
                  and Automatic Fault Tolerance},
  journal      = {{IEEE} Trans. Parallel Distributed Syst.},
  volume       = {30},
  number       = {3},
  pages        = {501--514},
  year         = {2019},
  url          = {https://doi.org/10.1109/TPDS.2018.2866794},
  doi          = {10.1109/TPDS.2018.2866794},
  timestamp    = {Tue, 15 Aug 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/tpds/ShahzadTKZHW19.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/cluster/AfzalHW19,
  author       = {Ayesha Afzal and
                  Georg Hager and
                  Gerhard Wellein},
  title        = {Propagation and Decay of Injected One-Off Delays on Clusters: {A}
                  Case Study},
  booktitle    = {2019 {IEEE} International Conference on Cluster Computing, {CLUSTER}
                  2019, Albuquerque, NM, USA, September 23-26, 2019},
  pages        = {1--10},
  publisher    = {{IEEE}},
  year         = {2019},
  url          = {https://doi.org/10.1109/CLUSTER.2019.8890995},
  doi          = {10.1109/CLUSTER.2019.8890995},
  timestamp    = {Sun, 02 Oct 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/cluster/AfzalHW19.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/ppam/ErnstHTW19,
  author       = {Dominik Ernst and
                  Georg Hager and
                  Jonas Thies and
                  Gerhard Wellein},
  editor       = {Roman Wyrzykowski and
                  Ewa Deelman and
                  Jack J. Dongarra and
                  Konrad Karczewski},
  title        = {Performance Engineering for a Tall {\&} Skinny Matrix Multiplication
                  Kernels on GPUs},
  booktitle    = {Parallel Processing and Applied Mathematics - 13th International Conference,
                  {PPAM} 2019, Bialystok, Poland, September 8-11, 2019, Revised Selected
                  Papers, Part {I}},
  series       = {Lecture Notes in Computer Science},
  volume       = {12043},
  pages        = {505--515},
  publisher    = {Springer},
  year         = {2019},
  url          = {https://doi.org/10.1007/978-3-030-43229-4\_43},
  doi          = {10.1007/978-3-030-43229-4\_43},
  timestamp    = {Sun, 02 Oct 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/ppam/ErnstHTW19.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/sc/LaukemannHHW19,
  author       = {Jan Laukemann and
                  Julian Hammer and
                  Georg Hager and
                  Gerhard Wellein},
  title        = {Automatic Throughput and Critical Path Analysis of x86 and {ARM} Assembly
                  Kernels},
  booktitle    = {2019 {IEEE/ACM} Performance Modeling, Benchmarking and Simulation
                  of High Performance Computer Systems, PMBS@SC 2019, Denver, CO, USA,
                  November 18, 2019},
  pages        = {1--6},
  publisher    = {{IEEE}},
  year         = {2019},
  url          = {https://doi.org/10.1109/PMBS49563.2019.00006},
  doi          = {10.1109/PMBS49563.2019.00006},
  timestamp    = {Sun, 25 Oct 2020 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/sc/LaukemannHHW19.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-1901-05344,
  author       = {Francesco Cremonesi and
                  Georg Hager and
                  Gerhard Wellein and
                  Felix Sch{\"{u}}rmann},
  title        = {Analytic Performance Modeling and Analysis of Detailed Neuron Simulations},
  journal      = {CoRR},
  volume       = {abs/1901.05344},
  year         = {2019},
  url          = {http://arxiv.org/abs/1901.05344},
  eprinttype    = {arXiv},
  eprint       = {1901.05344},
  timestamp    = {Fri, 01 Feb 2019 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/abs-1901-05344.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-1905-03136,
  author       = {Dominik Ernst and
                  Georg Hager and
                  Jonas Thies and
                  Gerhard Wellein},
  title        = {Performance Engineering for a Tall {\&} Skinny Matrix Multiplication
                  Kernel on GPUs},
  journal      = {CoRR},
  volume       = {abs/1905.03136},
  year         = {2019},
  url          = {http://arxiv.org/abs/1905.03136},
  eprinttype    = {arXiv},
  eprint       = {1905.03136},
  timestamp    = {Mon, 27 May 2019 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-1905-03136.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-1905-10603,
  author       = {Ayesha Afzal and
                  Georg Hager and
                  Gerhard Wellein},
  title        = {Delay Propagation and Overlapping Mechanisms on Clusters: {A} Case
                  Study of Idle Periods based on Workload, Communication, and Delay
                  Granularity},
  journal      = {CoRR},
  volume       = {abs/1905.10603},
  year         = {2019},
  url          = {http://arxiv.org/abs/1905.10603},
  eprinttype    = {arXiv},
  eprint       = {1905.10603},
  timestamp    = {Mon, 03 Jun 2019 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-1905-10603.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-1906-08138,
  author       = {Julian Hornich and
                  Julian Hammer and
                  Georg Hager and
                  Thomas Gruber and
                  Gerhard Wellein},
  title        = {Collecting and Presenting Reproducible Intranode Stencil Performance:
                  {INSPECT}},
  journal      = {CoRR},
  volume       = {abs/1906.08138},
  year         = {2019},
  url          = {http://arxiv.org/abs/1906.08138},
  eprinttype    = {arXiv},
  eprint       = {1906.08138},
  timestamp    = {Mon, 24 Jun 2019 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-1906-08138.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-1907-00048,
  author       = {Johannes Hofmann and
                  Christie L. Alappat and
                  Georg Hager and
                  Dietmar Fey and
                  Gerhard Wellein},
  title        = {Bridging the Architecture Gap: Abstracting Performance-Relevant Properties
                  of Modern Server Processors},
  journal      = {CoRR},
  volume       = {abs/1907.00048},
  year         = {2019},
  url          = {http://arxiv.org/abs/1907.00048},
  eprinttype    = {arXiv},
  eprint       = {1907.00048},
  timestamp    = {Fri, 17 Jan 2020 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/abs-1907-00048.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-1907-06487,
  author       = {Christie L. Alappat and
                  Georg Hager and
                  Olaf Schenk and
                  Jonas Thies and
                  Achim Basermann and
                  Alan R. Bishop and
                  Holger Fehske and
                  Gerhard Wellein},
  title        = {A Recursive Algebraic Coloring Technique for Hardware-Efficient Symmetric
                  Sparse Matrix-Vector Multiplication},
  journal      = {CoRR},
  volume       = {abs/1907.06487},
  year         = {2019},
  url          = {http://arxiv.org/abs/1907.06487},
  eprinttype    = {arXiv},
  eprint       = {1907.06487},
  timestamp    = {Wed, 17 Jul 2019 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-1907-06487.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-1910-00214,
  author       = {Jan Laukemann and
                  Julian Hammer and
                  Georg Hager and
                  Gerhard Wellein},
  title        = {Automatic Throughput and Critical Path Analysis of x86 and {ARM} Assembly
                  Kernels},
  journal      = {CoRR},
  volume       = {abs/1910.00214},
  year         = {2019},
  url          = {http://arxiv.org/abs/1910.00214},
  eprinttype    = {arXiv},
  eprint       = {1910.00214},
  timestamp    = {Fri, 04 Oct 2019 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-1910-00214.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/ijhpca/ShahzadKZMPHW18,
  author       = {Faisal Shahzad and
                  Moritz Kreutzer and
                  Thomas Zeiser and
                  Rui Machado and
                  Andreas Pieper and
                  Georg Hager and
                  Gerhard Wellein},
  title        = {Building and utilizing fault tolerance support tools for the {GASPI}
                  applications},
  journal      = {Int. J. High Perform. Comput. Appl.},
  volume       = {32},
  number       = {5},
  pages        = {613--626},
  year         = {2018},
  url          = {https://doi.org/10.1177/1094342016677085},
  doi          = {10.1177/1094342016677085},
  timestamp    = {Tue, 15 Aug 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/ijhpca/ShahzadKZMPHW18.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/insk/HagerW18,
  author       = {Georg Hager and
                  Gerhard Wellein},
  title        = {Performance Engineering},
  journal      = {Inform. Spektrum},
  volume       = {41},
  number       = {5},
  pages        = {323--327},
  year         = {2018},
  url          = {https://doi.org/10.1007/s00287-018-1122-1},
  doi          = {10.1007/S00287-018-1122-1},
  timestamp    = {Sun, 02 Oct 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/insk/HagerW18.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/topc/MalasHLK18,
  author       = {Tareq M. Malas and
                  Georg Hager and
                  Hatem Ltaief and
                  David E. Keyes},
  title        = {Multidimensional Intratile Parallelization for Memory-Starved Stencil
                  Computations},
  journal      = {{ACM} Trans. Parallel Comput.},
  volume       = {4},
  number       = {3},
  pages        = {12:1--12:32},
  year         = {2018},
  url          = {https://doi.org/10.1145/3155290},
  doi          = {10.1145/3155290},
  timestamp    = {Mon, 26 Oct 2020 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/topc/MalasHLK18.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/sbac-pad/WittmannHJLKRSW18,
  author       = {Markus Wittmann and
                  Georg Hager and
                  Radim Janal{\'{\i}}k and
                  Martin Lanser and
                  Axel Klawonn and
                  Oliver Rheinbach and
                  Olaf Schenk and
                  Gerhard Wellein},
  title        = {Multicore Performance Engineering of Sparse Triangular Solves Using
                  a Modified Roofline Model},
  booktitle    = {30th International Symposium on Computer Architecture and High Performance
                  Computing, {SBAC-PAD} 2018, Lyon, France, September 24-27, 2018},
  pages        = {233--241},
  publisher    = {{IEEE}},
  year         = {2018},
  url          = {https://doi.org/10.1109/CAHPC.2018.8645938},
  doi          = {10.1109/CAHPC.2018.8645938},
  timestamp    = {Fri, 09 Apr 2021 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/sbac-pad/WittmannHJLKRSW18.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/sc/LaukemannHHHW18,
  author       = {Jan Laukemann and
                  Julian Hammer and
                  Johannes Hofmann and
                  Georg Hager and
                  Gerhard Wellein},
  title        = {Automated Instruction Stream Throughput Prediction for Intel and {AMD}
                  Microarchitectures},
  booktitle    = {2018 {IEEE/ACM} Performance Modeling, Benchmarking and Simulation
                  of High Performance Computer Systems, PMBS@SC 2018, Dallas, TX, USA,
                  November 12, 2018},
  pages        = {121--131},
  publisher    = {{IEEE}},
  year         = {2018},
  url          = {https://doi.org/10.1109/PMBS.2018.8641578},
  doi          = {10.1109/PMBS.2018.8641578},
  timestamp    = {Sun, 25 Oct 2020 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/sc/LaukemannHHHW18.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/supercomputer/HofmannHF18,
  author       = {Johannes Hofmann and
                  Georg Hager and
                  Dietmar Fey},
  editor       = {Rio Yokota and
                  Mich{\`{e}}le Weiland and
                  David E. Keyes and
                  Carsten Trinitis},
  title        = {On the Accuracy and Usefulness of Analytic Energy Models for Contemporary
                  Multicore Processors},
  booktitle    = {High Performance Computing - 33rd International Conference, {ISC}
                  High Performance 2018, Frankfurt, Germany, June 24-28, 2018, Proceedings},
  series       = {Lecture Notes in Computer Science},
  volume       = {10876},
  pages        = {22--43},
  publisher    = {Springer},
  year         = {2018},
  url          = {https://doi.org/10.1007/978-3-319-92040-5\_2},
  doi          = {10.1007/978-3-319-92040-5\_2},
  timestamp    = {Sun, 25 Oct 2020 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/supercomputer/HofmannHF18.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/supercomputer/KreutzerEBFHNW18,
  author       = {Moritz Kreutzer and
                  Dominik Ernst and
                  Alan R. Bishop and
                  Holger Fehske and
                  Georg Hager and
                  Kengo Nakajima and
                  Gerhard Wellein},
  editor       = {Rio Yokota and
                  Mich{\`{e}}le Weiland and
                  David E. Keyes and
                  Carsten Trinitis},
  title        = {Chebyshev Filter Diagonalization on Modern Manycore Processors and
                  GPGPUs},
  booktitle    = {High Performance Computing - 33rd International Conference, {ISC}
                  High Performance 2018, Frankfurt, Germany, June 24-28, 2018, Proceedings},
  series       = {Lecture Notes in Computer Science},
  volume       = {10876},
  pages        = {329--349},
  publisher    = {Springer},
  year         = {2018},
  url          = {https://doi.org/10.1007/978-3-319-92040-5\_17},
  doi          = {10.1007/978-3-319-92040-5\_17},
  timestamp    = {Sun, 02 Oct 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/supercomputer/KreutzerEBFHNW18.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-1803-01618,
  author       = {Johannes Hofmann and
                  Georg Hager and
                  Dietmar Fey},
  title        = {On the accuracy and usefulness of analytic energy models for contemporary
                  multicore processors},
  journal      = {CoRR},
  volume       = {abs/1803.01618},
  year         = {2018},
  url          = {http://arxiv.org/abs/1803.01618},
  eprinttype    = {arXiv},
  eprint       = {1803.01618},
  timestamp    = {Fri, 17 Jan 2020 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/abs-1803-01618.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-1803-02156,
  author       = {Moritz Kreutzer and
                  Georg Hager and
                  Dominik Ernst and
                  Holger Fehske and
                  Alan R. Bishop and
                  Gerhard Wellein},
  title        = {Chebyshev Filter Diagonalization on Modern Manycore Processors and
                  GPGPUs},
  journal      = {CoRR},
  volume       = {abs/1803.02156},
  year         = {2018},
  url          = {http://arxiv.org/abs/1803.02156},
  eprinttype    = {arXiv},
  eprint       = {1803.02156},
  timestamp    = {Mon, 13 Aug 2018 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-1803-02156.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-1809-00912,
  author       = {Jan Laukemann and
                  Julian Hammer and
                  Johannes Hofmann and
                  Georg Hager and
                  Gerhard Wellein},
  title        = {Automated Instruction Stream Throughput Prediction for Intel and {AMD}
                  Microarchitectures},
  journal      = {CoRR},
  volume       = {abs/1809.00912},
  year         = {2018},
  url          = {http://arxiv.org/abs/1809.00912},
  eprinttype    = {arXiv},
  eprint       = {1809.00912},
  timestamp    = {Fri, 17 Jan 2020 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/abs-1809-00912.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/concurrency/HofmannFREHW17,
  author       = {Johannes Hofmann and
                  Dietmar Fey and
                  Michael Riedmann and
                  Jan Eitzinger and
                  Georg Hager and
                  Gerhard Wellein},
  title        = {Performance analysis of the Kahan-enhanced scalar product on current
                  multi-core and many-core processors},
  journal      = {Concurr. Comput. Pract. Exp.},
  volume       = {29},
  number       = {9},
  year         = {2017},
  url          = {https://doi.org/10.1002/cpe.3921},
  doi          = {10.1002/CPE.3921},
  timestamp    = {Sun, 02 Oct 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/concurrency/HofmannFREHW17.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/ijpp/KreutzerTRPSGBF17,
  author       = {Moritz Kreutzer and
                  Jonas Thies and
                  Melven R{\"{o}}hrig{-}Z{\"{o}}llner and
                  Andreas Pieper and
                  Faisal Shahzad and
                  Martin Galgon and
                  Achim Basermann and
                  Holger Fehske and
                  Georg Hager and
                  Gerhard Wellein},
  title        = {{GHOST:} Building Blocks for High Performance Sparse Linear Algebra
                  on Heterogeneous Systems},
  journal      = {Int. J. Parallel Program.},
  volume       = {45},
  number       = {5},
  pages        = {1046--1072},
  year         = {2017},
  url          = {https://doi.org/10.1007/s10766-016-0464-z},
  doi          = {10.1007/S10766-016-0464-Z},
  timestamp    = {Mon, 28 Aug 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/ijpp/KreutzerTRPSGBF17.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/cluster/RohlEHW17,
  author       = {Thomas R{\"{o}}hl and
                  Jan Eitzinger and
                  Georg Hager and
                  Gerhard Wellein},
  title        = {{LIKWID} Monitoring Stack: {A} Flexible Framework Enabling Job Specific
                  Performance monitoring for the masses},
  booktitle    = {2017 {IEEE} International Conference on Cluster Computing, {CLUSTER}
                  2017, Honolulu, HI, USA, September 5-8, 2017},
  pages        = {781--784},
  publisher    = {{IEEE} Computer Society},
  year         = {2017},
  url          = {https://doi.org/10.1109/CLUSTER.2017.115},
  doi          = {10.1109/CLUSTER.2017.115},
  timestamp    = {Thu, 23 Mar 2023 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/cluster/RohlEHW17.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/supercomputer/HofmannHWF17,
  author       = {Johannes Hofmann and
                  Georg Hager and
                  Gerhard Wellein and
                  Dietmar Fey},
  editor       = {Julian M. Kunkel and
                  Rio Yokota and
                  Pavan Balaji and
                  David E. Keyes},
  title        = {An Analysis of Core- and Chip-Level Architectural Features in Four
                  Generations of Intel Server Processors},
  booktitle    = {High Performance Computing - 32nd International Conference, {ISC}
                  High Performance 2017, Frankfurt, Germany, June 18-22, 2017, Proceedings},
  series       = {Lecture Notes in Computer Science},
  volume       = {10266},
  pages        = {294--314},
  publisher    = {Springer},
  year         = {2017},
  url          = {https://doi.org/10.1007/978-3-319-58667-0\_16},
  doi          = {10.1007/978-3-319-58667-0\_16},
  timestamp    = {Sun, 02 Oct 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/supercomputer/HofmannHWF17.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/HammerEHW17,
  author       = {Julian Hammer and
                  Jan Eitzinger and
                  Georg Hager and
                  Gerhard Wellein},
  title        = {Kerncraft: {A} Tool for Analytic Performance Modeling of Loop Kernels},
  journal      = {CoRR},
  volume       = {abs/1702.04653},
  year         = {2017},
  url          = {http://arxiv.org/abs/1702.04653},
  eprinttype    = {arXiv},
  eprint       = {1702.04653},
  timestamp    = {Mon, 13 Aug 2018 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/HammerEHW17.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/HofmannHWF17,
  author       = {Johannes Hofmann and
                  Georg Hager and
                  Gerhard Wellein and
                  Dietmar Fey},
  title        = {An analysis of core- and chip-level architectural features in four
                  generations of Intel server processors},
  journal      = {CoRR},
  volume       = {abs/1702.07554},
  year         = {2017},
  url          = {http://arxiv.org/abs/1702.07554},
  eprinttype    = {arXiv},
  eprint       = {1702.07554},
  timestamp    = {Fri, 17 Jan 2020 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/HofmannHWF17.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-1708-01476,
  author       = {Thomas R{\"{o}}hl and
                  Jan Eitzinger and
                  Georg Hager and
                  Gerhard Wellein},
  title        = {{LIKWID} Monitoring Stack: {A} flexible framework enabling job specific
                  performance monitoring for the masses},
  journal      = {CoRR},
  volume       = {abs/1708.01476},
  year         = {2017},
  url          = {http://arxiv.org/abs/1708.01476},
  eprinttype    = {arXiv},
  eprint       = {1708.01476},
  timestamp    = {Mon, 13 Aug 2018 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-1708-01476.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-1708-02030,
  author       = {Faisal Shahzad and
                  Jonas Thies and
                  Moritz Kreutzer and
                  Thomas Zeiser and
                  Georg Hager and
                  Gerhard Wellein},
  title        = {{CRAFT:} {A} library for easier application-level Checkpoint/Restart
                  and Automatic Fault Tolerance},
  journal      = {CoRR},
  volume       = {abs/1708.02030},
  year         = {2017},
  url          = {http://arxiv.org/abs/1708.02030},
  eprinttype    = {arXiv},
  eprint       = {1708.02030},
  timestamp    = {Tue, 15 Aug 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-1708-02030.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-1708-09689,
  author       = {Andreas Pieper and
                  Georg Hager and
                  Holger Fehske},
  title        = {{PVSC-DTM:} {A} domain-specific language and matrix-free stencil code
                  for investigating electronic properties of Dirac and topological materials},
  journal      = {CoRR},
  volume       = {abs/1708.09689},
  year         = {2017},
  url          = {http://arxiv.org/abs/1708.09689},
  eprinttype    = {arXiv},
  eprint       = {1708.09689},
  timestamp    = {Mon, 13 Aug 2018 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-1708-09689.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-1710-04094,
  author       = {Thomas R{\"{o}}hl and
                  Jan Eitzinger and
                  Georg Hager and
                  Gerhard Wellein},
  title        = {Validation of hardware events for successful performance pattern identification
                  in High Performance Computing},
  journal      = {CoRR},
  volume       = {abs/1710.04094},
  year         = {2017},
  url          = {http://arxiv.org/abs/1710.04094},
  eprinttype    = {arXiv},
  eprint       = {1710.04094},
  timestamp    = {Mon, 13 Aug 2018 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-1710-04094.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/concurrency/HagerKVW16,
  author       = {Georg Hager and
                  Darren J. Kerbyson and
                  Abhinav Vishnu and
                  Gerhard Wellein},
  title        = {Performance and power for highly parallel systems},
  journal      = {Concurr. Comput. Pract. Exp.},
  volume       = {28},
  number       = {2},
  pages        = {187--188},
  year         = {2016},
  url          = {https://doi.org/10.1002/cpe.3761},
  doi          = {10.1002/CPE.3761},
  timestamp    = {Mon, 26 Oct 2020 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/concurrency/HagerKVW16.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/concurrency/HagerTHW16,
  author       = {Georg Hager and
                  Jan Treibig and
                  Johannes Habich and
                  Gerhard Wellein},
  title        = {Exploring performance and power properties of modern multi-core chips
                  via simple machine models},
  journal      = {Concurr. Comput. Pract. Exp.},
  volume       = {28},
  number       = {2},
  pages        = {189--210},
  year         = {2016},
  url          = {https://doi.org/10.1002/cpe.3180},
  doi          = {10.1002/CPE.3180},
  timestamp    = {Sun, 02 Oct 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/concurrency/HagerTHW16.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/concurrency/WittmannHZTW16,
  author       = {Markus Wittmann and
                  Georg Hager and
                  Thomas Zeiser and
                  Jan Treibig and
                  Gerhard Wellein},
  title        = {Chip-level and multi-node analysis of energy-optimized lattice Boltzmann
                  {CFD} simulations},
  journal      = {Concurr. Comput. Pract. Exp.},
  volume       = {28},
  number       = {7},
  pages        = {2295--2315},
  year         = {2016},
  url          = {https://doi.org/10.1002/cpe.3489},
  doi          = {10.1002/CPE.3489},
  timestamp    = {Sun, 02 Oct 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/concurrency/WittmannHZTW16.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/jcphy/PieperKAGFHLW16,
  author       = {Andreas Pieper and
                  Moritz Kreutzer and
                  Andreas Alvermann and
                  Martin Galgon and
                  Holger Fehske and
                  Georg Hager and
                  Bruno Lang and
                  Gerhard Wellein},
  title        = {High-performance implementation of Chebyshev filter diagonalization
                  for interior eigenvalue computations},
  journal      = {J. Comput. Phys.},
  volume       = {325},
  pages        = {226--243},
  year         = {2016},
  url          = {https://doi.org/10.1016/j.jcp.2016.08.027},
  doi          = {10.1016/J.JCP.2016.08.027},
  timestamp    = {Sun, 02 Oct 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/jcphy/PieperKAGFHLW16.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/arcs/HofmannFEHW16,
  author       = {Johannes Hofmann and
                  Dietmar Fey and
                  Jan Eitzinger and
                  Georg Hager and
                  Gerhard Wellein},
  editor       = {Frank Hannig and
                  Jo{\~{a}}o M. P. Cardoso and
                  Thilo Pionteck and
                  Dietmar Fey and
                  Wolfgang Schr{\"{o}}der{-}Preikschat and
                  J{\"{u}}rgen Teich},
  title        = {Analysis of Intel's Haswell Microarchitecture Using the {ECM} Model
                  and Microbenchmarks},
  booktitle    = {Architecture of Computing Systems - {ARCS} 2016 - 29th International
                  Conference, Nuremberg, Germany, April 4-7, 2016, Proceedings},
  series       = {Lecture Notes in Computer Science},
  volume       = {9637},
  pages        = {210--222},
  publisher    = {Springer},
  year         = {2016},
  url          = {https://doi.org/10.1007/978-3-319-30695-7\_16},
  doi          = {10.1007/978-3-319-30695-7\_16},
  timestamp    = {Sun, 02 Oct 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/arcs/HofmannFEHW16.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/ipps/MalasHHLPK16,
  author       = {Tareq M. Malas and
                  Julian Hornich and
                  Georg Hager and
                  Hatem Ltaief and
                  Christoph Pflaum and
                  David E. Keyes},
  title        = {Optimization of an Electromagnetics Code with Multicore Wavefront
                  Diamond Blocking and Multi-dimensional Intra-Tile Parallelization},
  booktitle    = {2016 {IEEE} International Parallel and Distributed Processing Symposium,
                  {IPDPS} 2016, Chicago, IL, USA, May 23-27, 2016},
  pages        = {142--151},
  publisher    = {{IEEE} Computer Society},
  year         = {2016},
  url          = {https://doi.org/10.1109/IPDPS.2016.87},
  doi          = {10.1109/IPDPS.2016.87},
  timestamp    = {Sat, 30 Sep 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/ipps/MalasHHLPK16.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@incollection{DBLP:series/lncse/ThiesGSAKPRBFHLW16,
  author       = {Jonas Thies and
                  Martin Galgon and
                  Faisal Shahzad and
                  Andreas Alvermann and
                  Moritz Kreutzer and
                  Andreas Pieper and
                  Melven R{\"{o}}hrig{-}Z{\"{o}}llner and
                  Achim Basermann and
                  Holger Fehske and
                  Georg Hager and
                  Bruno Lang and
                  Gerhard Wellein},
  editor       = {Hans{-}Joachim Bungartz and
                  Philipp Neumann and
                  Wolfgang E. Nagel},
  title        = {Towards an Exascale Enabled Sparse Solver Repository},
  booktitle    = {Software for Exascale Computing - {SPPEXA} 2013-2015},
  series       = {Lecture Notes in Computational Science and Engineering},
  volume       = {113},
  pages        = {295--316},
  publisher    = {Springer},
  year         = {2016},
  url          = {https://doi.org/10.1007/978-3-319-40528-5\_13},
  doi          = {10.1007/978-3-319-40528-5\_13},
  timestamp    = {Tue, 15 Aug 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/series/lncse/ThiesGSAKPRBFHLW16.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@incollection{DBLP:series/lncse/KreutzerTPAGRSBBFHLW16,
  author       = {Moritz Kreutzer and
                  Jonas Thies and
                  Andreas Pieper and
                  Andreas Alvermann and
                  Martin Galgon and
                  Melven R{\"{o}}hrig{-}Z{\"{o}}llner and
                  Faisal Shahzad and
                  Achim Basermann and
                  Alan R. Bishop and
                  Holger Fehske and
                  Georg Hager and
                  Bruno Lang and
                  Gerhard Wellein},
  editor       = {Hans{-}Joachim Bungartz and
                  Philipp Neumann and
                  Wolfgang E. Nagel},
  title        = {Performance Engineering and Energy Efficiency of Building Blocks for
                  Large, Sparse Eigenvalue Computations on Heterogeneous Supercomputers},
  booktitle    = {Software for Exascale Computing - {SPPEXA} 2013-2015},
  series       = {Lecture Notes in Computational Science and Engineering},
  volume       = {113},
  pages        = {317--338},
  publisher    = {Springer},
  year         = {2016},
  url          = {https://doi.org/10.1007/978-3-319-40528-5\_14},
  doi          = {10.1007/978-3-319-40528-5\_14},
  timestamp    = {Tue, 15 Aug 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/series/lncse/KreutzerTPAGRSBBFHLW16.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/HofmannFREHW16,
  author       = {Johannes Hofmann and
                  Dietmar Fey and
                  Michael Riedmann and
                  Jan Eitzinger and
                  Georg Hager and
                  Gerhard Wellein},
  title        = {Performance analysis of the Kahan-enhanced scalar product on current
                  multi- and manycore processors},
  journal      = {CoRR},
  volume       = {abs/1604.01890},
  year         = {2016},
  url          = {http://arxiv.org/abs/1604.01890},
  eprinttype    = {arXiv},
  eprint       = {1604.01890},
  timestamp    = {Fri, 17 Jan 2020 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/HofmannFREHW16.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/siamsc/MalasHLSWK15,
  author       = {Tareq M. Malas and
                  Georg Hager and
                  Hatem Ltaief and
                  Holger Stengel and
                  Gerhard Wellein and
                  David E. Keyes},
  title        = {Multicore-Optimized Wavefront Diamond Blocking for Optimizing Stencil
                  Updates},
  journal      = {{SIAM} J. Sci. Comput.},
  volume       = {37},
  number       = {4},
  year         = {2015},
  url          = {https://doi.org/10.1137/140991133},
  doi          = {10.1137/140991133},
  timestamp    = {Sun, 02 Oct 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/siamsc/MalasHLSWK15.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/siamsc/Rohrig-ZollnerT15,
  author       = {Melven R{\"{o}}hrig{-}Z{\"{o}}llner and
                  Jonas Thies and
                  Moritz Kreutzer and
                  Andreas Alvermann and
                  Andreas Pieper and
                  Achim Basermann and
                  Georg Hager and
                  Gerhard Wellein and
                  Holger Fehske},
  title        = {Increasing the Performance of the Jacobi-Davidson Method by Blocking},
  journal      = {{SIAM} J. Sci. Comput.},
  volume       = {37},
  number       = {6},
  year         = {2015},
  url          = {https://doi.org/10.1137/140976017},
  doi          = {10.1137/140976017},
  timestamp    = {Sun, 02 Oct 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/siamsc/Rohrig-ZollnerT15.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/cluster/ShahzadKZMPHW15,
  author       = {Faisal Shahzad and
                  Moritz Kreutzer and
                  Thomas Zeiser and
                  Rui Machado and
                  Andreas Pieper and
                  Georg Hager and
                  Gerhard Wellein},
  title        = {Building a Fault Tolerant Application Using the {GASPI} Communication
                  Layer},
  booktitle    = {2015 {IEEE} International Conference on Cluster Computing, {CLUSTER}
                  2015, Chicago, IL, USA, September 8-11, 2015},
  pages        = {580--587},
  publisher    = {{IEEE} Computer Society},
  year         = {2015},
  url          = {https://doi.org/10.1109/CLUSTER.2015.106},
  doi          = {10.1109/CLUSTER.2015.106},
  timestamp    = {Tue, 15 Aug 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/cluster/ShahzadKZMPHW15.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/ics/StengelTHW15,
  author       = {Holger Stengel and
                  Jan Treibig and
                  Georg Hager and
                  Gerhard Wellein},
  editor       = {Laxmi N. Bhuyan and
                  Fred Chong and
                  Vivek Sarkar},
  title        = {Quantifying Performance Bottlenecks of Stencil Computations Using
                  the Execution-Cache-Memory Model},
  booktitle    = {Proceedings of the 29th {ACM} on International Conference on Supercomputing,
                  ICS'15, Newport Beach/Irvine, CA, USA, June 08 - 11, 2015},
  pages        = {207--216},
  publisher    = {{ACM}},
  year         = {2015},
  url          = {https://doi.org/10.1145/2751205.2751240},
  doi          = {10.1145/2751205.2751240},
  timestamp    = {Sun, 25 Oct 2020 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/ics/StengelTHW15.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/ipps/KreutzerPHWAF15,
  author       = {Moritz Kreutzer and
                  Andreas Pieper and
                  Georg Hager and
                  Gerhard Wellein and
                  Andreas Alvermann and
                  Holger Fehske},
  title        = {Performance Engineering of the Kernel Polynomal Method on Large-Scale
                  {CPU-GPU} Systems},
  booktitle    = {2015 {IEEE} International Parallel and Distributed Processing Symposium,
                  {IPDPS} 2015, Hyderabad, India, May 25-29, 2015},
  pages        = {417--426},
  publisher    = {{IEEE} Computer Society},
  year         = {2015},
  url          = {https://doi.org/10.1109/IPDPS.2015.76},
  doi          = {10.1109/IPDPS.2015.76},
  timestamp    = {Fri, 24 Mar 2023 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/ipps/KreutzerPHWAF15.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/ppam/HofmannFREHW15,
  author       = {Johannes Hofmann and
                  Dietmar Fey and
                  Michael Riedmann and
                  Jan Eitzinger and
                  Georg Hager and
                  Gerhard Wellein},
  editor       = {Roman Wyrzykowski and
                  Ewa Deelman and
                  Jack J. Dongarra and
                  Konrad Karczewski and
                  Jacek Kitowski and
                  Kazimierz Wiatr},
  title        = {Performance Analysis of the Kahan-Enhanced Scalar Product on Current
                  Multicore Processors},
  booktitle    = {Parallel Processing and Applied Mathematics - 11th International Conference,
                  {PPAM} 2015, Krakow, Poland, September 6-9, 2015. Revised Selected
                  Papers, Part {I}},
  series       = {Lecture Notes in Computer Science},
  volume       = {9573},
  pages        = {63--73},
  publisher    = {Springer},
  year         = {2015},
  url          = {https://doi.org/10.1007/978-3-319-32149-3\_7},
  doi          = {10.1007/978-3-319-32149-3\_7},
  timestamp    = {Sun, 02 Oct 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/ppam/HofmannFREHW15.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/sc/HammerHEW15,
  author       = {Julian Hammer and
                  Georg Hager and
                  Jan Eitzinger and
                  Gerhard Wellein},
  editor       = {Stephen A. Jarvis and
                  Steven A. Wright and
                  Simon D. Hammond},
  title        = {Automatic loop kernel analysis and performance modeling with Kerncraft},
  booktitle    = {Proceedings of the 6th International Workshop on Performance Modeling,
                  Benchmarking, and Simulation of High Performance Computing Systems,
                  {PMBS} 2015, Austin, Texas, USA, November 15, 2015},
  pages        = {4:1--4:11},
  publisher    = {{ACM}},
  year         = {2015},
  url          = {https://doi.org/10.1145/2832087.2832092},
  doi          = {10.1145/2832087.2832092},
  timestamp    = {Sun, 25 Oct 2020 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/sc/HammerHEW15.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/HofmannFEHW15,
  author       = {Johannes Hofmann and
                  Dietmar Fey and
                  Jan Eitzinger and
                  Georg Hager and
                  Gerhard Wellein},
  title        = {Performance analysis of the Kahan-enhanced scalar product on current
                  multicore processors},
  journal      = {CoRR},
  volume       = {abs/1505.02586},
  year         = {2015},
  url          = {http://arxiv.org/abs/1505.02586},
  eprinttype    = {arXiv},
  eprint       = {1505.02586},
  timestamp    = {Fri, 17 Jan 2020 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/HofmannFEHW15.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/ShahzadKZMPHW15,
  author       = {Faisal Shahzad and
                  Moritz Kreutzer and
                  Thomas Zeiser and
                  Rui Machado and
                  Andreas Pieper and
                  Georg Hager and
                  Gerhard Wellein},
  title        = {Building a fault tolerant application using the {GASPI} communication
                  layer},
  journal      = {CoRR},
  volume       = {abs/1505.04628},
  year         = {2015},
  url          = {http://arxiv.org/abs/1505.04628},
  eprinttype    = {arXiv},
  eprint       = {1505.04628},
  timestamp    = {Tue, 15 Aug 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/ShahzadKZMPHW15.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/WittmannZHW15,
  author       = {Markus Wittmann and
                  Thomas Zeiser and
                  Georg Hager and
                  Gerhard Wellein},
  title        = {Short Note on Costs of Floating Point Operations on current x86-64
                  Architectures: Denormals, Overflow, Underflow, and Division by Zero},
  journal      = {CoRR},
  volume       = {abs/1506.03997},
  year         = {2015},
  url          = {http://arxiv.org/abs/1506.03997},
  eprinttype    = {arXiv},
  eprint       = {1506.03997},
  timestamp    = {Mon, 13 Aug 2018 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/WittmannZHW15.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/KreutzerTRPSGBF15,
  author       = {Moritz Kreutzer and
                  Jonas Thies and
                  Melven R{\"{o}}hrig{-}Z{\"{o}}llner and
                  Andreas Pieper and
                  Faisal Shahzad and
                  Martin Galgon and
                  Achim Basermann and
                  Holger Fehske and
                  Georg Hager and
                  Gerhard Wellein},
  title        = {{GHOST:} Building blocks for high performance sparse linear algebra
                  on heterogeneous systems},
  journal      = {CoRR},
  volume       = {abs/1507.08101},
  year         = {2015},
  url          = {http://arxiv.org/abs/1507.08101},
  eprinttype    = {arXiv},
  eprint       = {1507.08101},
  timestamp    = {Tue, 15 Aug 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/KreutzerTRPSGBF15.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/HammerHEW15,
  author       = {Julian Hammer and
                  Georg Hager and
                  Jan Eitzinger and
                  Gerhard Wellein},
  title        = {Automatic Loop Kernel Analysis and Performance Modeling With Kerncraft},
  journal      = {CoRR},
  volume       = {abs/1509.03778},
  year         = {2015},
  url          = {http://arxiv.org/abs/1509.03778},
  eprinttype    = {arXiv},
  eprint       = {1509.03778},
  timestamp    = {Mon, 13 Aug 2018 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/HammerHEW15.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/PieperKGAFHLW15,
  author       = {Andreas Pieper and
                  Moritz Kreutzer and
                  Martin Galgon and
                  Andreas Alvermann and
                  Holger Fehske and
                  Georg Hager and
                  Bruno Lang and
                  Gerhard Wellein},
  title        = {High-performance implementation of Chebyshev filter diagonalization
                  for interior eigenvalue computations},
  journal      = {CoRR},
  volume       = {abs/1510.04895},
  year         = {2015},
  url          = {http://arxiv.org/abs/1510.04895},
  eprinttype    = {arXiv},
  eprint       = {1510.04895},
  timestamp    = {Mon, 13 Aug 2018 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/PieperKGAFHLW15.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/MalasHLK15,
  author       = {Tareq M. Malas and
                  Georg Hager and
                  Hatem Ltaief and
                  David E. Keyes},
  title        = {Multi-dimensional intra-tile parallelization for memory-starved stencil
                  computations},
  journal      = {CoRR},
  volume       = {abs/1510.04995},
  year         = {2015},
  url          = {http://arxiv.org/abs/1510.04995},
  eprinttype    = {arXiv},
  eprint       = {1510.04995},
  timestamp    = {Fri, 09 Apr 2021 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/MalasHLK15.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/MalasHHLPK15,
  author       = {Tareq M. Malas and
                  Julian Hornich and
                  Georg Hager and
                  Hatem Ltaief and
                  Christoph Pflaum and
                  David E. Keyes},
  title        = {Optimization of an electromagnetics code with multicore wavefront
                  diamond blocking and multi-dimensional intra-tile parallelization},
  journal      = {CoRR},
  volume       = {abs/1510.05218},
  year         = {2015},
  url          = {http://arxiv.org/abs/1510.05218},
  eprinttype    = {arXiv},
  eprint       = {1510.05218},
  timestamp    = {Fri, 09 Apr 2021 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/MalasHHLPK15.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/HofmannFEHW15a,
  author       = {Johannes Hofmann and
                  Dietmar Fey and
                  Jan Eitzinger and
                  Georg Hager and
                  Gerhard Wellein},
  title        = {Analysis of Intel's Haswell Microarchitecture Using The {ECM} Model
                  and Microbenchmarks},
  journal      = {CoRR},
  volume       = {abs/1511.03639},
  year         = {2015},
  url          = {http://arxiv.org/abs/1511.03639},
  eprinttype    = {arXiv},
  eprint       = {1511.03639},
  timestamp    = {Fri, 17 Jan 2020 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/HofmannFEHW15a.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/ppl/KronawitterSHL14,
  author       = {Stefan Kronawitter and
                  Holger Stengel and
                  Georg Hager and
                  Christian Lengauer},
  title        = {Domain-Specific Optimization of Two Jacobi Smoother Kernels and Their
                  Evaluation in the {ECM} Performance Model},
  journal      = {Parallel Process. Lett.},
  volume       = {24},
  number       = {3},
  year         = {2014},
  url          = {https://doi.org/10.1142/S0129626414410047},
  doi          = {10.1142/S0129626414410047},
  timestamp    = {Mon, 26 Oct 2020 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/ppl/KronawitterSHL14.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/siamsc/KreutzerHWFB14,
  author       = {Moritz Kreutzer and
                  Georg Hager and
                  Gerhard Wellein and
                  Holger Fehske and
                  Alan R. Bishop},
  title        = {A Unified Sparse Matrix Data Format for Efficient General Sparse Matrix-Vector
                  Multiplication on Modern Processors with Wide {SIMD} Units},
  journal      = {{SIAM} J. Sci. Comput.},
  volume       = {36},
  number       = {5},
  year         = {2014},
  url          = {https://doi.org/10.1137/130930352},
  doi          = {10.1137/130930352},
  timestamp    = {Sun, 02 Oct 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/siamsc/KreutzerHWFB14.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/arcs/HofmannTHW14,
  author       = {Johannes Hofmann and
                  Jan Treibig and
                  Georg Hager and
                  Gerhard Wellein},
  editor       = {Walter Stechele and
                  Thomas Wild},
  title        = {Performance Engineering for a Medical Imaging Application on the Intel
                  Xeon Phi Accelerator},
  booktitle    = {{ARCS} 2014 - 27th International Conference on Architecture of Computing
                  Systems, Workshop Proceedings, February 25-28, 2014, Luebeck, Germany,
                  University of Luebeck, Institute of Computer Engineering},
  pages        = {1--8},
  publisher    = {{VDE} Verlag / {IEEE} Xplore},
  year         = {2014},
  url          = {https://ieeexplore.ieee.org/document/6775080/},
  timestamp    = {Sun, 08 Aug 2021 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/arcs/HofmannTHW14.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/europar/AlvermannBFGHKKLPRSTW14,
  author       = {Andreas Alvermann and
                  Achim Basermann and
                  Holger Fehske and
                  Martin Galgon and
                  Georg Hager and
                  Moritz Kreutzer and
                  Lukas Kr{\"{a}}mer and
                  Bruno Lang and
                  Andreas Pieper and
                  Melven R{\"{o}}hrig{-}Z{\"{o}}llner and
                  Faisal Shahzad and
                  Jonas Thies and
                  Gerhard Wellein},
  editor       = {Lu{\'{\i}}s M. B. Lopes and
                  Julius Zilinskas and
                  Alexandru Costan and
                  Roberto G. Cascella and
                  Gabor Kecskemeti and
                  Emmanuel Jeannot and
                  Mario Cannataro and
                  Laura Ricci and
                  Siegfried Benkner and
                  Salvador Petit and
                  Vittorio Scarano and
                  Jos{\'{e}} Gracia and
                  Sascha Hunold and
                  Stephen L. Scott and
                  Stefan Lankes and
                  Christian Lengauer and
                  Jes{\'{u}}s Carretero and
                  Jens Breitbart and
                  Michael Alexander},
  title        = {{ESSEX:} Equipping Sparse Solvers for Exascale},
  booktitle    = {Euro-Par 2014: Parallel Processing Workshops - Euro-Par 2014 International
                  Workshops, Porto, Portugal, August 25-26, 2014, Revised Selected Papers,
                  Part {II}},
  series       = {Lecture Notes in Computer Science},
  volume       = {8806},
  pages        = {577--588},
  publisher    = {Springer},
  year         = {2014},
  url          = {https://doi.org/10.1007/978-3-319-14313-2\_49},
  doi          = {10.1007/978-3-319-14313-2\_49},
  timestamp    = {Sun, 12 Nov 2023 02:07:45 +0100},
  biburl       = {https://dblp.org/rec/conf/europar/AlvermannBFGHKKLPRSTW14.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/icppw/RoehlTHW14,
  author       = {Thomas Roehl and
                  Jan Treibig and
                  Georg Hager and
                  Gerhard Wellein},
  title        = {Overhead Analysis of Performance Counter Measurements},
  booktitle    = {43rd International Conference on Parallel Processing Workshops, {ICPPW}
                  2014, Minneapolis, MN, USA, September 9-12, 2014},
  pages        = {176--185},
  publisher    = {{IEEE} Computer Society},
  year         = {2014},
  url          = {https://doi.org/10.1109/ICPPW.2014.34},
  doi          = {10.1109/ICPPW.2014.34},
  timestamp    = {Fri, 24 Mar 2023 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/icppw/RoehlTHW14.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/ppopp/HofmannTHW14,
  author       = {Johannes Hofmann and
                  Jan Treibig and
                  Georg Hager and
                  Gerhard Wellein},
  editor       = {Gabriel Tanase and
                  Peng Wu and
                  Joel Falcou},
  title        = {Comparing the performance of different x86 {SIMD} instruction sets
                  for a medical imaging application on modern multi- and manycore chips},
  booktitle    = {Proceedings of the 2014 Workshop on Programming models for SIMD/Vector
                  processing, {WPMVP} 2014, Orlando, Florida, USA, February 16, 2014},
  pages        = {57--64},
  publisher    = {{ACM}},
  year         = {2014},
  url          = {https://doi.org/10.1145/2568058.2568068},
  doi          = {10.1145/2568058.2568068},
  timestamp    = {Sun, 12 Jun 2022 19:46:08 +0200},
  biburl       = {https://dblp.org/rec/conf/ppopp/HofmannTHW14.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/HofmannTHW14,
  author       = {Johannes Hofmann and
                  Jan Treibig and
                  Georg Hager and
                  Gerhard Wellein},
  title        = {Performance Engineering for a Medical Imaging Application on the Intel
                  Xeon Phi Accelerator},
  journal      = {CoRR},
  volume       = {abs/1401.3615},
  year         = {2014},
  url          = {http://arxiv.org/abs/1401.3615},
  eprinttype    = {arXiv},
  eprint       = {1401.3615},
  timestamp    = {Fri, 17 Jan 2020 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/HofmannTHW14.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/HofmannTHW14a,
  author       = {Johannes Hofmann and
                  Jan Treibig and
                  Georg Hager and
                  Gerhard Wellein},
  title        = {Comparing the Performance of Different x86 {SIMD} Instruction Sets
                  for a Medical Imaging Application on Modern Multi- and Manycore Chips},
  journal      = {CoRR},
  volume       = {abs/1401.7494},
  year         = {2014},
  url          = {http://arxiv.org/abs/1401.7494},
  eprinttype    = {arXiv},
  eprint       = {1401.7494},
  timestamp    = {Fri, 17 Jan 2020 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/HofmannTHW14a.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/WittmannZHW14,
  author       = {Markus Wittmann and
                  Thomas Zeiser and
                  Georg Hager and
                  Gerhard Wellein},
  title        = {Modeling and analyzing performance for highly optimized propagation
                  steps of the lattice Boltzmann method on sparse lattices},
  journal      = {CoRR},
  volume       = {abs/1410.0412},
  year         = {2014},
  url          = {http://arxiv.org/abs/1410.0412},
  eprinttype    = {arXiv},
  eprint       = {1410.0412},
  timestamp    = {Mon, 13 Aug 2018 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/WittmannZHW14.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/MalasHLSWK14,
  author       = {Tareq M. Malas and
                  Georg Hager and
                  Hatem Ltaief and
                  Holger Stengel and
                  Gerhard Wellein and
                  David E. Keyes},
  title        = {Multicore-optimized wavefront diamond blocking for optimizing stencil
                  updates},
  journal      = {CoRR},
  volume       = {abs/1410.3060},
  year         = {2014},
  url          = {http://arxiv.org/abs/1410.3060},
  eprinttype    = {arXiv},
  eprint       = {1410.3060},
  timestamp    = {Thu, 14 Oct 2021 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/MalasHLSWK14.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/StengelTHW14,
  author       = {Holger Stengel and
                  Jan Treibig and
                  Georg Hager and
                  Gerhard Wellein},
  title        = {Quantifying performance bottlenecks of stencil computations using
                  the Execution-Cache-Memory model},
  journal      = {CoRR},
  volume       = {abs/1410.5010},
  year         = {2014},
  url          = {http://arxiv.org/abs/1410.5010},
  eprinttype    = {arXiv},
  eprint       = {1410.5010},
  timestamp    = {Mon, 13 Aug 2018 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/StengelTHW14.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/KreutzerHWPAF14,
  author       = {Moritz Kreutzer and
                  Georg Hager and
                  Gerhard Wellein and
                  Andreas Pieper and
                  Andreas Alvermann and
                  Holger Fehske},
  title        = {Performance Engineering of the Kernel Polynomial Method on Large-Scale
                  {CPU-GPU} Systems},
  journal      = {CoRR},
  volume       = {abs/1410.5242},
  year         = {2014},
  url          = {http://arxiv.org/abs/1410.5242},
  eprinttype    = {arXiv},
  eprint       = {1410.5242},
  timestamp    = {Mon, 13 Aug 2018 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/KreutzerHWPAF14.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/MalasHLK14,
  author       = {Tareq M. Malas and
                  Georg Hager and
                  Hatem Ltaief and
                  David E. Keyes},
  title        = {Towards energy efficiency and maximum computational intensity for
                  stencil algorithms using wavefront diamond temporal blocking},
  journal      = {CoRR},
  volume       = {abs/1410.5561},
  year         = {2014},
  url          = {http://arxiv.org/abs/1410.5561},
  eprinttype    = {arXiv},
  eprint       = {1410.5561},
  timestamp    = {Mon, 13 Aug 2018 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/MalasHLK14.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/cma/WittmannZHW13,
  author       = {Markus Wittmann and
                  Thomas Zeiser and
                  Georg Hager and
                  Gerhard Wellein},
  title        = {Comparison of different propagation steps for lattice Boltzmann methods},
  journal      = {Comput. Math. Appl.},
  volume       = {65},
  number       = {6},
  pages        = {924--935},
  year         = {2013},
  url          = {https://doi.org/10.1016/j.camwa.2012.05.002},
  doi          = {10.1016/J.CAMWA.2012.05.002},
  timestamp    = {Sun, 02 Oct 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/cma/WittmannZHW13.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/ijhpca/TreibigHHHW13,
  author       = {Jan Treibig and
                  Georg Hager and
                  Hannes G. Hofmann and
                  Joachim Hornegger and
                  Gerhard Wellein},
  title        = {Pushing the limits for medical image reconstruction on recent standard
                  multicore processors},
  journal      = {Int. J. High Perform. Comput. Appl.},
  volume       = {27},
  number       = {2},
  pages        = {162--177},
  year         = {2013},
  url          = {https://doi.org/10.1177/1094342012442424},
  doi          = {10.1177/1094342012442424},
  timestamp    = {Sun, 02 Oct 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/ijhpca/TreibigHHHW13.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/ppl/ShahzadWKZHW13,
  author       = {Faisal Shahzad and
                  Markus Wittmann and
                  Moritz Kreutzer and
                  Thomas Zeiser and
                  Georg Hager and
                  Gerhard Wellein},
  title        = {A Survey of Checkpoint/Restart Techniques on Distributed Memory Systems},
  journal      = {Parallel Process. Lett.},
  volume       = {23},
  number       = {4},
  year         = {2013},
  url          = {https://doi.org/10.1142/S0129626413400112},
  doi          = {10.1142/S0129626413400112},
  timestamp    = {Tue, 15 Aug 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/ppl/ShahzadWKZHW13.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/ieeehpcs/ScharpffIHR13,
  author       = {Tobias Scharpff and
                  Klaus Iglberger and
                  Georg Hager and
                  Ulrich R{\"{u}}de},
  title        = {Model-guided performance analysis of the sparse matrix-matrix multiplication},
  booktitle    = {International Conference on High Performance Computing {\&} Simulation,
                  {HPCS} 2013, Helsinki, Finland, July 1-5, 2013},
  pages        = {445--452},
  publisher    = {{IEEE}},
  year         = {2013},
  url          = {https://doi.org/10.1109/HPCSim.2013.6641452},
  doi          = {10.1109/HPCSIM.2013.6641452},
  timestamp    = {Sun, 25 Oct 2020 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/ieeehpcs/ScharpffIHR13.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/ipps/ShahzadWZHW13,
  author       = {Faisal Shahzad and
                  Markus Wittmann and
                  Thomas Zeiser and
                  Georg Hager and
                  Gerhard Wellein},
  title        = {An Evaluation of Different {I/O} Techniques for Checkpoint/Restart},
  booktitle    = {2013 {IEEE} International Symposium on Parallel {\&} Distributed
                  Processing, Workshops and Phd Forum, Cambridge, MA, USA, May 20-24,
                  2013},
  pages        = {1708--1716},
  publisher    = {{IEEE}},
  year         = {2013},
  url          = {https://doi.org/10.1109/IPDPSW.2013.145},
  doi          = {10.1109/IPDPSW.2013.145},
  timestamp    = {Tue, 15 Aug 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/ipps/ShahzadWZHW13.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-1302-4280,
  author       = {Markus Wittmann and
                  Georg Hager and
                  Thomas Zeiser and
                  Gerhard Wellein},
  title        = {Asynchronous {MPI} for the Masses},
  journal      = {CoRR},
  volume       = {abs/1302.4280},
  year         = {2013},
  url          = {http://arxiv.org/abs/1302.4280},
  eprinttype    = {arXiv},
  eprint       = {1302.4280},
  timestamp    = {Mon, 13 Aug 2018 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-1302-4280.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-1303-1651,
  author       = {Tobias Scharpff and
                  Klaus Iglberger and
                  Georg Hager and
                  Ulrich R{\"{u}}de},
  title        = {Model-guided Performance Analysis of the Sparse Matrix-Matrix Multiplication},
  journal      = {CoRR},
  volume       = {abs/1303.1651},
  year         = {2013},
  url          = {http://arxiv.org/abs/1303.1651},
  eprinttype    = {arXiv},
  eprint       = {1303.1651},
  timestamp    = {Mon, 13 Aug 2018 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-1303-1651.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-1303-4538,
  author       = {Christoph Scheit and
                  Georg Hager and
                  Jan Treibig and
                  Stefan Becker and
                  Gerhard Wellein},
  title        = {Optimization of {FASTEST-3D} for Modern Multicore Systems},
  journal      = {CoRR},
  volume       = {abs/1303.4538},
  year         = {2013},
  url          = {http://arxiv.org/abs/1303.4538},
  eprinttype    = {arXiv},
  eprint       = {1303.4538},
  timestamp    = {Mon, 13 Aug 2018 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-1303-4538.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-1304-7664,
  author       = {Markus Wittmann and
                  Georg Hager and
                  Thomas Zeiser and
                  Gerhard Wellein},
  title        = {An analysis of energy-optimized lattice-Boltzmann {CFD} simulations
                  from the chip to the highly parallel level},
  journal      = {CoRR},
  volume       = {abs/1304.7664},
  year         = {2013},
  url          = {http://arxiv.org/abs/1304.7664},
  eprinttype    = {arXiv},
  eprint       = {1304.7664},
  timestamp    = {Mon, 13 Aug 2018 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-1304-7664.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/KreutzerHWFB13,
  author       = {Moritz Kreutzer and
                  Georg Hager and
                  Gerhard Wellein and
                  Holger Fehske and
                  Alan R. Bishop},
  title        = {A unified sparse matrix data format for modern processors with wide
                  {SIMD} units},
  journal      = {CoRR},
  volume       = {abs/1307.6209},
  year         = {2013},
  url          = {http://arxiv.org/abs/1307.6209},
  eprinttype    = {arXiv},
  eprint       = {1307.6209},
  timestamp    = {Mon, 13 Aug 2018 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/KreutzerHWFB13.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/siamsc/IglbergerHTR12,
  author       = {Klaus Iglberger and
                  Georg Hager and
                  Jan Treibig and
                  Ulrich R{\"{u}}de},
  title        = {Expression Templates Revisited: {A} Performance Analysis of Current
                  Methodologies},
  journal      = {{SIAM} J. Sci. Comput.},
  volume       = {34},
  number       = {2},
  year         = {2012},
  url          = {https://doi.org/10.1137/110830125},
  doi          = {10.1137/110830125},
  timestamp    = {Mon, 26 Oct 2020 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/siamsc/IglbergerHTR12.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/europar/Hager12,
  author       = {Georg Hager},
  editor       = {Ioannis Caragiannis and
                  Michael Alexander and
                  Rosa M. Badia and
                  Mario Cannataro and
                  Alexandru Costan and
                  Marco Danelutto and
                  Fr{\'{e}}d{\'{e}}ric Desprez and
                  Bettina Krammer and
                  Julio Sahuquillo and
                  Stephen L. Scott and
                  Josef Weidendorfer},
  title        = {Performance Engineering: From Numbers to Insight},
  booktitle    = {Euro-Par 2012: Parallel Processing Workshops - BDMC, CGWS, HeteroPar,
                  HiBB, OMHI, Paraphrase, PROPER, Resilience, UCHPC, VHPC, Rhodes Islands,
                  Greece, August 27-31, 2012. Revised Selected Papers},
  series       = {Lecture Notes in Computer Science},
  volume       = {7640},
  pages        = {393--394},
  publisher    = {Springer},
  year         = {2012},
  url          = {https://doi.org/10.1007/978-3-642-36949-0\_44},
  doi          = {10.1007/978-3-642-36949-0\_44},
  timestamp    = {Sun, 25 Oct 2020 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/europar/Hager12.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/europar/TreibigHW12,
  author       = {Jan Treibig and
                  Georg Hager and
                  Gerhard Wellein},
  editor       = {Ioannis Caragiannis and
                  Michael Alexander and
                  Rosa M. Badia and
                  Mario Cannataro and
                  Alexandru Costan and
                  Marco Danelutto and
                  Fr{\'{e}}d{\'{e}}ric Desprez and
                  Bettina Krammer and
                  Julio Sahuquillo and
                  Stephen L. Scott and
                  Josef Weidendorfer},
  title        = {Performance Patterns and Hardware Metrics on Modern Multicore Processors:
                  Best Practices for Performance Engineering},
  booktitle    = {Euro-Par 2012: Parallel Processing Workshops - BDMC, CGWS, HeteroPar,
                  HiBB, OMHI, Paraphrase, PROPER, Resilience, UCHPC, VHPC, Rhodes Islands,
                  Greece, August 27-31, 2012. Revised Selected Papers},
  series       = {Lecture Notes in Computer Science},
  volume       = {7640},
  pages        = {451--460},
  publisher    = {Springer},
  year         = {2012},
  url          = {https://doi.org/10.1007/978-3-642-36949-0\_50},
  doi          = {10.1007/978-3-642-36949-0\_50},
  timestamp    = {Sun, 02 Oct 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/europar/TreibigHW12.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/ieeehpcs/IglbergerHTR12,
  author       = {Klaus Iglberger and
                  Georg Hager and
                  Jan Treibig and
                  Ulrich R{\"{u}}de},
  editor       = {Waleed W. Smari and
                  Vesna Zeljkovic},
  title        = {High performance smart expression template math libraries},
  booktitle    = {2012 International Conference on High Performance Computing {\&}
                  Simulation, {HPCS} 2012, Madrid, Spain, July 2-6, 2012},
  pages        = {367--373},
  publisher    = {{IEEE}},
  year         = {2012},
  url          = {https://doi.org/10.1109/HPCSim.2012.6266939},
  doi          = {10.1109/HPCSIM.2012.6266939},
  timestamp    = {Sun, 25 Oct 2020 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/ieeehpcs/IglbergerHTR12.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/ipps/KreutzerHWFBB12,
  author       = {Moritz Kreutzer and
                  Georg Hager and
                  Gerhard Wellein and
                  Holger Fehske and
                  Achim Basermann and
                  Alan R. Bishop},
  title        = {Sparse Matrix-vector Multiplication on {GPGPU} Clusters: {A} New Storage
                  Format and a Scalable Implementation},
  booktitle    = {26th {IEEE} International Parallel and Distributed Processing Symposium
                  Workshops {\&} PhD Forum, {IPDPS} 2012, Shanghai, China, May 21-25,
                  2012},
  pages        = {1696--1702},
  publisher    = {{IEEE} Computer Society},
  year         = {2012},
  url          = {https://doi.org/10.1109/IPDPSW.2012.211},
  doi          = {10.1109/IPDPSW.2012.211},
  timestamp    = {Fri, 24 Mar 2023 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/ipps/KreutzerHWFBB12.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-1206-3738,
  author       = {Jan Treibig and
                  Georg Hager and
                  Gerhard Wellein},
  title        = {Best practices for HPM-assisted performance engineering on modern
                  multicore processors},
  journal      = {CoRR},
  volume       = {abs/1206.3738},
  year         = {2012},
  url          = {http://arxiv.org/abs/1206.3738},
  eprinttype    = {arXiv},
  eprint       = {1206.3738},
  timestamp    = {Mon, 13 Aug 2018 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-1206-3738.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-1208-2908,
  author       = {Georg Hager and
                  Jan Treibig and
                  Johannes Habich and
                  Gerhard Wellein},
  title        = {Exploring performance and power properties of modern multicore chips
                  via simple machine models},
  journal      = {CoRR},
  volume       = {abs/1208.2908},
  year         = {2012},
  url          = {http://arxiv.org/abs/1208.2908},
  eprinttype    = {arXiv},
  eprint       = {1208.2908},
  timestamp    = {Mon, 13 Aug 2018 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-1208-2908.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@book{DBLP:books/daglib/0033670,
  author       = {Georg Hager and
                  Gerhard Wellein},
  title        = {Introduction to High Performance Computing for Scientists and Engineers},
  series       = {Chapman and Hall / {CRC} computational science series},
  publisher    = {{CRC} Press},
  year         = {2011},
  url          = {http://www.crcpress.com/product/isbn/9781439811924},
  isbn         = {978-1-439-81192-4},
  timestamp    = {Mon, 01 Sep 2014 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/books/daglib/0033670.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/aes/HabichZHW11,
  author       = {Johannes Habich and
                  Thomas Zeiser and
                  Georg Hager and
                  Gerhard Wellein},
  title        = {Performance analysis and optimization strategies for a {D3Q19} lattice
                  Boltzmann kernel on nVIDIA GPUs using {CUDA}},
  journal      = {Adv. Eng. Softw.},
  volume       = {42},
  number       = {5},
  pages        = {266--272},
  year         = {2011},
  url          = {https://doi.org/10.1016/j.advengsoft.2010.10.007},
  doi          = {10.1016/J.ADVENGSOFT.2010.10.007},
  timestamp    = {Sun, 02 Oct 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/aes/HabichZHW11.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/jocs/TreibigWH11,
  author       = {Jan Treibig and
                  Gerhard Wellein and
                  Georg Hager},
  title        = {Efficient multicore-aware parallelization strategies for iterative
                  stencil computations},
  journal      = {J. Comput. Sci.},
  volume       = {2},
  number       = {2},
  pages        = {130--137},
  year         = {2011},
  url          = {https://doi.org/10.1016/j.jocs.2011.01.010},
  doi          = {10.1016/J.JOCS.2011.01.010},
  timestamp    = {Sun, 02 Oct 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/jocs/TreibigWH11.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/pc/FeichtingerHKHRW11,
  author       = {Christian Feichtinger and
                  Johannes Habich and
                  Harald K{\"{o}}stler and
                  Georg Hager and
                  Ulrich R{\"{u}}de and
                  Gerhard Wellein},
  title        = {A flexible Patch-based lattice Boltzmann parallelization approach
                  for heterogeneous {GPU-CPU} clusters},
  journal      = {Parallel Comput.},
  volume       = {37},
  number       = {9},
  pages        = {536--549},
  year         = {2011},
  url          = {https://doi.org/10.1016/j.parco.2011.03.005},
  doi          = {10.1016/J.PARCO.2011.03.005},
  timestamp    = {Sun, 02 Oct 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/pc/FeichtingerHKHRW11.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/ppl/SchubertFHW11,
  author       = {Gerald Schubert and
                  Holger Fehske and
                  Georg Hager and
                  Gerhard Wellein},
  title        = {Hybrid-Parallel Sparse Matrix-Vector Multiplication with Explicit
                  Communication Overlap on Current Multicore-Based Systems},
  journal      = {Parallel Process. Lett.},
  volume       = {21},
  number       = {3},
  pages        = {339--358},
  year         = {2011},
  url          = {https://doi.org/10.1142/S0129626411000254},
  doi          = {10.1142/S0129626411000254},
  timestamp    = {Sun, 02 Oct 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/ppl/SchubertFHW11.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/ipps/SchubertHFW11,
  author       = {Gerald Schubert and
                  Georg Hager and
                  Holger Fehske and
                  Gerhard Wellein},
  title        = {Parallel Sparse Matrix-Vector Multiplication as a Test Case for Hybrid
                  MPI+OpenMP Programming},
  booktitle    = {25th {IEEE} International Symposium on Parallel and Distributed Processing,
                  {IPDPS} 2011, Anchorage, Alaska, USA, 16-20 May 2011 - Workshop Proceedings},
  pages        = {1751--1758},
  publisher    = {{IEEE}},
  year         = {2011},
  url          = {https://doi.org/10.1109/IPDPS.2011.332},
  doi          = {10.1109/IPDPS.2011.332},
  timestamp    = {Fri, 24 Mar 2023 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/ipps/SchubertHFW11.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/ptw/TreibigHW11,
  author       = {Jan Treibig and
                  Georg Hager and
                  Gerhard Wellein},
  editor       = {Holger Brunst and
                  Matthias S. M{\"{u}}ller and
                  Wolfgang E. Nagel and
                  Michael M. Resch},
  title        = {likwid-bench: An Extensible Microbenchmarking Platform for x86 Multicore
                  Compute Nodes},
  booktitle    = {Tools for High Performance Computing 2011 - Proceedings of the 5th
                  International Workshop on Parallel Tools for High Performance Computing,
                  ZIH, Dresden, September 2011},
  pages        = {27--36},
  publisher    = {Springer},
  year         = {2011},
  url          = {https://doi.org/10.1007/978-3-642-31476-6\_3},
  doi          = {10.1007/978-3-642-31476-6\_3},
  timestamp    = {Sun, 25 Oct 2020 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/ptw/TreibigHW11.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/sc/TreibigHWM11,
  author       = {Jan Treibig and
                  Georg Hager and
                  Gerhard Wellein and
                  Michael Meier},
  editor       = {Scott A. Lathrop and
                  Jim Costa and
                  William Kramer},
  title        = {Poster: {LIKWID:} lightweight performance tools},
  booktitle    = {Conference on High Performance Computing Networking, Storage and Analysis
                  - Companion Volume, {SC} 2011, Seattle, WA, USA, November 12-18, 2011},
  pages        = {29--30},
  publisher    = {{ACM}},
  year         = {2011},
  url          = {https://doi.org/10.1145/2148600.2148616},
  doi          = {10.1145/2148600.2148616},
  timestamp    = {Sun, 25 Oct 2020 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/sc/TreibigHWM11.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-1101-0091,
  author       = {Gerald Schubert and
                  Georg Hager and
                  Holger Fehske and
                  Gerhard Wellein},
  title        = {Parallel sparse matrix-vector multiplication as a test case for hybrid
                  MPI+OpenMP programming},
  journal      = {CoRR},
  volume       = {abs/1101.0091},
  year         = {2011},
  url          = {http://arxiv.org/abs/1101.0091},
  eprinttype    = {arXiv},
  eprint       = {1101.0091},
  timestamp    = {Mon, 13 Aug 2018 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-1101-0091.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-1101-0093,
  author       = {Markus Wittmann and
                  Georg Hager},
  title        = {Optimizing ccNUMA locality for task-parallel execution under OpenMP
                  and {TBB} on multicore-based systems},
  journal      = {CoRR},
  volume       = {abs/1101.0093},
  year         = {2011},
  url          = {http://arxiv.org/abs/1101.0093},
  eprinttype    = {arXiv},
  eprint       = {1101.0093},
  timestamp    = {Mon, 13 Aug 2018 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-1101-0093.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-1104-1729,
  author       = {Klaus Iglberger and
                  Georg Hager and
                  Jan Treibig and
                  Ulrich R{\"{u}}de},
  title        = {Expression Templates Revisited: {A} Performance Analysis of the Current
                  {ET} Methodology},
  journal      = {CoRR},
  volume       = {abs/1104.1729},
  year         = {2011},
  url          = {http://arxiv.org/abs/1104.1729},
  eprinttype    = {arXiv},
  eprint       = {1104.1729},
  timestamp    = {Mon, 13 Aug 2018 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-1104-1729.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-1104-4874,
  author       = {Jan Treibig and
                  Georg Hager and
                  Gerhard Wellein},
  title        = {{LIKWID:} Lightweight Performance Tools},
  journal      = {CoRR},
  volume       = {abs/1104.4874},
  year         = {2011},
  url          = {http://arxiv.org/abs/1104.4874},
  eprinttype    = {arXiv},
  eprint       = {1104.4874},
  timestamp    = {Mon, 13 Aug 2018 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-1104-4874.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-1104-5243,
  author       = {Jan Treibig and
                  Georg Hager and
                  Hannes G. Hofmann and
                  Joachim Hornegger and
                  Gerhard Wellein},
  title        = {Pushing the limits for medical image reconstruction on recent standard
                  multicore processors},
  journal      = {CoRR},
  volume       = {abs/1104.5243},
  year         = {2011},
  url          = {http://arxiv.org/abs/1104.5243},
  eprinttype    = {arXiv},
  eprint       = {1104.5243},
  timestamp    = {Mon, 13 Aug 2018 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-1104-5243.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-1106-5908,
  author       = {Gerald Schubert and
                  Holger Fehske and
                  Georg Hager and
                  Gerhard Wellein},
  title        = {Hybrid-parallel sparse matrix-vector multiplication with explicit
                  communication overlap on current multicore-based systems},
  journal      = {CoRR},
  volume       = {abs/1106.5908},
  year         = {2011},
  url          = {http://arxiv.org/abs/1106.5908},
  eprinttype    = {arXiv},
  eprint       = {1106.5908},
  timestamp    = {Mon, 13 Aug 2018 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-1106-5908.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-1111-0922,
  author       = {Markus Wittmann and
                  Thomas Zeiser and
                  Georg Hager and
                  Gerhard Wellein},
  title        = {Comparison of different Propagation Steps for the Lattice Boltzmann
                  Method},
  journal      = {CoRR},
  volume       = {abs/1111.0922},
  year         = {2011},
  url          = {http://arxiv.org/abs/1111.0922},
  eprinttype    = {arXiv},
  eprint       = {1111.0922},
  timestamp    = {Mon, 13 Aug 2018 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-1111-0922.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-1111-1129,
  author       = {Markus Wittmann and
                  Thomas Zeiser and
                  Georg Hager and
                  Gerhard Wellein},
  title        = {Domain decomposition and locality optimization for large-scale lattice
                  Boltzmann simulations},
  journal      = {CoRR},
  volume       = {abs/1111.1129},
  year         = {2011},
  url          = {http://arxiv.org/abs/1111.1129},
  eprinttype    = {arXiv},
  eprint       = {1111.1129},
  timestamp    = {Mon, 13 Aug 2018 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-1111-1129.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-1112-0850,
  author       = {Johannes Habich and
                  Christian Feichtinger and
                  Harald K{\"{o}}stler and
                  Georg Hager and
                  Gerhard Wellein},
  title        = {Performance engineering for the Lattice Boltzmann method on GPGPUs:
                  Architectural requirements and performance results},
  journal      = {CoRR},
  volume       = {abs/1112.0850},
  year         = {2011},
  url          = {http://arxiv.org/abs/1112.0850},
  eprinttype    = {arXiv},
  eprint       = {1112.0850},
  timestamp    = {Mon, 13 Aug 2018 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-1112-0850.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-1112-5588,
  author       = {Moritz Kreutzer and
                  Georg Hager and
                  Gerhard Wellein and
                  Holger Fehske and
                  Achim Basermann and
                  Alan R. Bishop},
  title        = {Sparse matrix-vector multiplication on {GPGPU} clusters: {A} new storage
                  format and a scalable implementation},
  journal      = {CoRR},
  volume       = {abs/1112.5588},
  year         = {2011},
  url          = {http://arxiv.org/abs/1112.5588},
  eprinttype    = {arXiv},
  eprint       = {1112.5588},
  timestamp    = {Mon, 13 Aug 2018 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-1112-5588.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/ppl/WittmannHTW10,
  author       = {Markus Wittmann and
                  Georg Hager and
                  Jan Treibig and
                  Gerhard Wellein},
  title        = {Leveraging Shared Caches for Parallel Temporal Blocking of Stencil
                  Codes on Multicore Processors and Clusters},
  journal      = {Parallel Process. Lett.},
  volume       = {20},
  number       = {4},
  pages        = {359--376},
  year         = {2010},
  url          = {https://doi.org/10.1142/S0129626410000296},
  doi          = {10.1142/S0129626410000296},
  timestamp    = {Sun, 02 Oct 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/ppl/WittmannHTW10.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/chpc/TreibigHW10,
  author       = {Jan Treibig and
                  Georg Hager and
                  Gerhard Wellein},
  editor       = {Christian H. Bischof and
                  Heinz{-}Gerd Hegering and
                  Wolfgang E. Nagel and
                  Gabriel Wittum},
  title        = {{LIKWID:} Lightweight Performance Tools},
  booktitle    = {Competence in High Performance Computing 2010 - Proceedings of an
                  International Conference on Competence in High Performance Computing,
                  Schloss Schwetzingen, Germany, June 2010},
  pages        = {165--175},
  publisher    = {Springer},
  year         = {2010},
  url          = {https://doi.org/10.1007/978-3-642-24025-6\_14},
  doi          = {10.1007/978-3-642-24025-6\_14},
  timestamp    = {Wed, 26 Jun 2019 16:38:15 +0200},
  biburl       = {https://dblp.org/rec/conf/chpc/TreibigHW10.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/icppw/TreibigHW10,
  author       = {Jan Treibig and
                  Georg Hager and
                  Gerhard Wellein},
  editor       = {Wang{-}Chien Lee and
                  Xin Yuan},
  title        = {{LIKWID:} {A} Lightweight Performance-Oriented Tool Suite for x86
                  Multicore Environments},
  booktitle    = {39th International Conference on Parallel Processing, {ICPP} Workshops
                  2010, San Diego, California, USA, 13-16 September 2010},
  pages        = {207--216},
  publisher    = {{IEEE} Computer Society},
  year         = {2010},
  url          = {https://doi.org/10.1109/ICPPW.2010.38},
  doi          = {10.1109/ICPPW.2010.38},
  timestamp    = {Fri, 24 Mar 2023 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/icppw/TreibigHW10.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/ipps/WittmannHW10,
  author       = {Markus Wittmann and
                  Georg Hager and
                  Gerhard Wellein},
  title        = {Multicore-aware parallel temporal blocking of stencil codes for shared
                  and distributed memory},
  booktitle    = {24th {IEEE} International Symposium on Parallel and Distributed Processing,
                  {IPDPS} 2010, Atlanta, Georgia, USA, 19-23 April 2010 - Workshop Proceedings},
  pages        = {1--7},
  publisher    = {{IEEE}},
  year         = {2010},
  url          = {https://doi.org/10.1109/IPDPSW.2010.5470813},
  doi          = {10.1109/IPDPSW.2010.5470813},
  timestamp    = {Fri, 24 Mar 2023 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/ipps/WittmannHW10.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-1004-1741,
  author       = {Jan Treibig and
                  Gerhard Wellein and
                  Georg Hager},
  title        = {Efficient multicore-aware parallelization strategies for iterative
                  stencil computations},
  journal      = {CoRR},
  volume       = {abs/1004.1741},
  year         = {2010},
  url          = {http://arxiv.org/abs/1004.1741},
  eprinttype    = {arXiv},
  eprint       = {1004.1741},
  timestamp    = {Mon, 13 Aug 2018 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-1004-1741.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-1004-4431,
  author       = {Jan Treibig and
                  Georg Hager and
                  Gerhard Wellein},
  title        = {{LIKWID:} {A} lightweight performance-oriented tool suite for x86
                  multicore environments},
  journal      = {CoRR},
  volume       = {abs/1004.4431},
  year         = {2010},
  url          = {http://arxiv.org/abs/1004.4431},
  eprinttype    = {arXiv},
  eprint       = {1004.4431},
  timestamp    = {Mon, 13 Aug 2018 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-1004-4431.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-1006-3148,
  author       = {Markus Wittmann and
                  Georg Hager and
                  Jan Treibig and
                  Gerhard Wellein},
  title        = {Leveraging shared caches for parallel temporal blocking of stencil
                  codes on multicore processors and clusters},
  journal      = {CoRR},
  volume       = {abs/1006.3148},
  year         = {2010},
  url          = {http://arxiv.org/abs/1006.3148},
  eprinttype    = {arXiv},
  eprint       = {1006.3148},
  timestamp    = {Mon, 13 Aug 2018 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-1006-3148.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-1007-1388,
  author       = {Christian Feichtinger and
                  Johannes Habich and
                  Harald K{\"{o}}stler and
                  Georg Hager and
                  Ulrich R{\"{u}}de and
                  Gerhard Wellein},
  title        = {A Flexible Patch-Based Lattice Boltzmann Parallelization Approach
                  for Heterogeneous {GPU-CPU} Clusters},
  journal      = {CoRR},
  volume       = {abs/1007.1388},
  year         = {2010},
  url          = {http://arxiv.org/abs/1007.1388},
  eprinttype    = {arXiv},
  eprint       = {1007.1388},
  timestamp    = {Mon, 13 Aug 2018 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-1007-1388.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/ppl/ZeiserHW09,
  author       = {Thomas Zeiser and
                  Georg Hager and
                  Gerhard Wellein},
  title        = {Benchmark Analysis and Application Results for Lattice Boltzmann Simulations
                  on {NEC} {SX} Vector and Intel Nehalem Systems},
  journal      = {Parallel Process. Lett.},
  volume       = {19},
  number       = {4},
  pages        = {491--511},
  year         = {2009},
  url          = {https://doi.org/10.1142/S0129626409000389},
  doi          = {10.1142/S0129626409000389},
  timestamp    = {Sun, 02 Oct 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/ppl/ZeiserHW09.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/compsac/WelleinHZWF09,
  author       = {Gerhard Wellein and
                  Georg Hager and
                  Thomas Zeiser and
                  Markus Wittmann and
                  Holger Fehske},
  editor       = {Sheikh Iqbal Ahamed and
                  Elisa Bertino and
                  Carl K. Chang and
                  Vladimir Getov and
                  Lin Liu and
                  Ming Hua and
                  Rajesh Subramanyan},
  title        = {Efficient Temporal Blocking for Stencil Computations by Multicore-Aware
                  Wavefront Parallelization},
  booktitle    = {Proceedings of the 33rd Annual {IEEE} International Computer Software
                  and Applications Conference, {COMPSAC} 2009, Seattle, Washington,
                  USA, July 20-24, 2009. Volume 1},
  pages        = {579--586},
  publisher    = {{IEEE} Computer Society},
  year         = {2009},
  url          = {https://doi.org/10.1109/COMPSAC.2009.82},
  doi          = {10.1109/COMPSAC.2009.82},
  timestamp    = {Fri, 24 Mar 2023 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/compsac/WelleinHZWF09.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/ipps/ZeiserHW09,
  author       = {Thomas Zeiser and
                  Georg Hager and
                  Gerhard Wellein},
  title        = {The world's fastest {CPU} and {SMP} node: Some performance results
                  from the {NEC} {SX-9}},
  booktitle    = {23rd {IEEE} International Symposium on Parallel and Distributed Processing,
                  {IPDPS} 2009, Rome, Italy, May 23-29, 2009},
  pages        = {1--8},
  publisher    = {{IEEE}},
  year         = {2009},
  url          = {https://doi.org/10.1109/IPDPS.2009.5161089},
  doi          = {10.1109/IPDPS.2009.5161089},
  timestamp    = {Fri, 24 Mar 2023 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/ipps/ZeiserHW09.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/pdp/RabenseifnerHJ09,
  author       = {Rolf Rabenseifner and
                  Georg Hager and
                  Gabriele Jost},
  editor       = {Didier El Baz and
                  Fran{\c{c}}ois Spies and
                  Tom Gross},
  title        = {Hybrid MPI/OpenMP Parallel Programming on Clusters of Multi-Core {SMP}
                  Nodes},
  booktitle    = {Proceedings of the 17th Euromicro International Conference on Parallel,
                  Distributed and Network-Based Processing, {PDP} 2009, Weimar, Germany,
                  18-20 Febuary 2009},
  pages        = {427--436},
  publisher    = {{IEEE} Computer Society},
  year         = {2009},
  url          = {https://doi.org/10.1109/PDP.2009.43},
  doi          = {10.1109/PDP.2009.43},
  timestamp    = {Fri, 24 Mar 2023 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/pdp/RabenseifnerHJ09.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/ppam/TreibigH09,
  author       = {Jan Treibig and
                  Georg Hager},
  editor       = {Roman Wyrzykowski and
                  Jack J. Dongarra and
                  Konrad Karczewski and
                  Jerzy Wasniewski},
  title        = {Introducing a Performance Model for Bandwidth-Limited Loop Kernels},
  booktitle    = {Parallel Processing and Applied Mathematics, 8th International Conference,
                  {PPAM} 2009, Wroclaw, Poland, September 13-16, 2009. Revised Selected
                  Papers, Part {I}},
  series       = {Lecture Notes in Computer Science},
  volume       = {6067},
  pages        = {615--624},
  publisher    = {Springer},
  year         = {2009},
  url          = {https://doi.org/10.1007/978-3-642-14390-8\_64},
  doi          = {10.1007/978-3-642-14390-8\_64},
  timestamp    = {Sun, 25 Oct 2020 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/ppam/TreibigH09.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-0902-1884,
  author       = {Markus Wittmann and
                  Georg Hager},
  title        = {A Proof of Concept for Optimizing Task Parallelism by Locality Queues},
  journal      = {CoRR},
  volume       = {abs/0902.1884},
  year         = {2009},
  url          = {http://arxiv.org/abs/0902.1884},
  eprinttype    = {arXiv},
  eprint       = {0902.1884},
  timestamp    = {Mon, 13 Aug 2018 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-0902-1884.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-0905-0792,
  author       = {Jan Treibig and
                  Georg Hager},
  title        = {Introducing a Performance Model for Bandwidth-Limited Loop Kernels},
  journal      = {CoRR},
  volume       = {abs/0905.0792},
  year         = {2009},
  url          = {http://arxiv.org/abs/0905.0792},
  eprinttype    = {arXiv},
  eprint       = {0905.0792},
  timestamp    = {Mon, 13 Aug 2018 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-0905-0792.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-0910-4836,
  author       = {Gerald Schubert and
                  Georg Hager and
                  Holger Fehske},
  title        = {Performance limitations for sparse matrix-vector multiplications on
                  current multicore environments},
  journal      = {CoRR},
  volume       = {abs/0910.4836},
  year         = {2009},
  url          = {http://arxiv.org/abs/0910.4836},
  eprinttype    = {arXiv},
  eprint       = {0910.4836},
  timestamp    = {Mon, 13 Aug 2018 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-0910-4836.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-0910-4865,
  author       = {Jan Treibig and
                  Georg Hager and
                  Gerhard Wellein},
  title        = {Multi-core architectures: Complexities of performance prediction and
                  the impact of cache topology},
  journal      = {CoRR},
  volume       = {abs/0910.4865},
  year         = {2009},
  url          = {http://arxiv.org/abs/0910.4865},
  eprinttype    = {arXiv},
  eprint       = {0910.4865},
  timestamp    = {Mon, 13 Aug 2018 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-0910-4865.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-0912-4506,
  author       = {Markus Wittmann and
                  Georg Hager and
                  Gerhard Wellein},
  title        = {Multicore-aware parallel temporal blocking of stencil codes for shared
                  and distributed memory},
  journal      = {CoRR},
  volume       = {abs/0912.4506},
  year         = {2009},
  url          = {http://arxiv.org/abs/0912.4506},
  eprinttype    = {arXiv},
  eprint       = {0912.4506},
  timestamp    = {Mon, 13 Aug 2018 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-0912-4506.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/ppl/HagerZW08,
  author       = {Georg Hager and
                  Thomas Zeiser and
                  Gerhard Wellein},
  title        = {Data Access Characteristics and Optimizations for Sun UltraSPARC {T2}
                  and {T2+} Systems},
  journal      = {Parallel Process. Lett.},
  volume       = {18},
  number       = {4},
  pages        = {471--490},
  year         = {2008},
  url          = {https://doi.org/10.1142/S0129626408003521},
  doi          = {10.1142/S0129626408003521},
  timestamp    = {Sun, 02 Oct 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/ppl/HagerZW08.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/ipps/HagerZW08,
  author       = {Georg Hager and
                  Thomas Zeiser and
                  Gerhard Wellein},
  title        = {Data access optimizations for highly threaded multi-core CPUs with
                  multiple memory controllers},
  booktitle    = {22nd {IEEE} International Symposium on Parallel and Distributed Processing,
                  {IPDPS} 2008, Miami, Florida USA, April 14-18, 2008},
  pages        = {1--7},
  publisher    = {{IEEE}},
  year         = {2008},
  url          = {https://doi.org/10.1109/IPDPS.2008.4536341},
  doi          = {10.1109/IPDPS.2008.4536341},
  timestamp    = {Fri, 24 Mar 2023 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/ipps/HagerZW08.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@incollection{DBLP:books/sp/08/ZeiserHW08,
  author       = {Thomas Zeiser and
                  Georg Hager and
                  Gerhard Wellein},
  editor       = {Wolfgang E. Nagel and
                  Dietmar B. Kr{\"{o}}ner and
                  Michael M. Resch},
  title        = {Vector Computers in a World of Commodity Clusters, Massively Parallel
                  Systems and Many-Core Many-Threaded CPUs: Recent Experience Based
                  on an Advanced Lattice Boltzmann Flow Solver},
  booktitle    = {High Performance Computing in Science and Engineering '08 - Transactions
                  of the High Performance Computing Center, Stuttgart {(HLRS)} 2008},
  pages        = {333--347},
  publisher    = {Springer},
  year         = {2008},
  url          = {https://doi.org/10.1007/978-3-540-88303-6\_24},
  doi          = {10.1007/978-3-540-88303-6\_24},
  timestamp    = {Tue, 02 Jun 2020 16:36:27 +0200},
  biburl       = {https://dblp.org/rec/books/sp/08/ZeiserHW08.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-0712-2302,
  author       = {Georg Hager and
                  Thomas Zeiser and
                  Gerhard Wellein},
  title        = {Data access optimizations for highly threaded multi-core CPUs with
                  multiple memory controllers},
  journal      = {CoRR},
  volume       = {abs/0712.2302},
  year         = {2007},
  url          = {http://arxiv.org/abs/0712.2302},
  eprinttype    = {arXiv},
  eprint       = {0712.2302},
  timestamp    = {Mon, 13 Aug 2018 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-0712-2302.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-0712-3389,
  author       = {Georg Hager and
                  Holger Stengel and
                  Thomas Zeiser and
                  Gerhard Wellein},
  title        = {{RZBENCH:} Performance evaluation of current {HPC} architectures using
                  low-level and application benchmarks},
  journal      = {CoRR},
  volume       = {abs/0712.3389},
  year         = {2007},
  url          = {http://arxiv.org/abs/0712.3389},
  eprinttype    = {arXiv},
  eprint       = {0712.3389},
  timestamp    = {Mon, 13 Aug 2018 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-0712-3389.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/pvm/RabenseifnerHJK06,
  author       = {Rolf Rabenseifner and
                  Georg Hager and
                  Gabriele Jost and
                  Rainer Keller},
  editor       = {Bernd Mohr and
                  Jesper Larsson Tr{\"{a}}ff and
                  Joachim Worringen and
                  Jack J. Dongarra},
  title        = {Hybrid {MPI} and OpenMP Parallel Programming},
  booktitle    = {Recent Advances in Parallel Virtual Machine and Message Passing Interface,
                  13th European {PVM/MPI} User's Group Meeting, Bonn, Germany, September
                  17-20, 2006, Proceedings},
  series       = {Lecture Notes in Computer Science},
  volume       = {4192},
  pages        = {11},
  publisher    = {Springer},
  year         = {2006},
  url          = {https://doi.org/10.1007/11846802\_10},
  doi          = {10.1007/11846802\_10},
  timestamp    = {Sun, 25 Oct 2020 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/pvm/RabenseifnerHJK06.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/hpsc/HagerJFW03,
  author       = {Georg Hager and
                  Eric Jeckelmann and
                  Holger Fehske and
                  Gerhard Wellein},
  editor       = {Hans Georg Bock and
                  Hoang Xuan Phu and
                  Ekaterina A. Kostina and
                  Rolf Rannacher},
  title        = {Exact Numerical Treatment of Finite Quantum Systems Using Leading-Edge
                  Supercomputers},
  booktitle    = {Modeling, Simulation and Optimization of Complex Processes, Proceedings
                  of the International Conference on High Performance Scientific Computing,
                  March 10-14, 2003, Hanoi, Vietnam},
  pages        = {165--177},
  publisher    = {Springer},
  year         = {2003},
  url          = {https://doi.org/10.1007/3-540-27170-8\_13},
  doi          = {10.1007/3-540-27170-8\_13},
  timestamp    = {Fri, 26 May 2017 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/hpsc/HagerJFW03.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/vecpar/WelleinHBF02,
  author       = {Gerhard Wellein and
                  Georg Hager and
                  Achim Basermann and
                  Holger Fehske},
  editor       = {Jos{\'{e}} M. Laginha M. Palma and
                  Jack J. Dongarra and
                  Vicente Hern{\'{a}}ndez and
                  Ant{\'{o}}nio Augusto de Sousa},
  title        = {Fast Sparse Matrix-Vector Multiplication for TeraFlop/s Computers},
  booktitle    = {High Performance Computing for Computational Science - {VECPAR} 2002,
                  5th International Conference, Porto, Portugal, June 26-28, 2002, Selected
                  Papers and Invited Talks},
  series       = {Lecture Notes in Computer Science},
  volume       = {2565},
  pages        = {287--301},
  publisher    = {Springer},
  year         = {2002},
  url          = {https://doi.org/10.1007/3-540-36569-9\_18},
  doi          = {10.1007/3-540-36569-9\_18},
  timestamp    = {Sun, 02 Oct 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/vecpar/WelleinHBF02.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
a service of  Schloss Dagstuhl - Leibniz Center for Informatics