Stop the war!
Остановите войну!
for scientists:
default search action
BibTeX records: Georg Hager
@article{DBLP:journals/fgcs/AfzalHMW23, author = {Ayesha Afzal and Georg Hager and Stefano Markidis and Gerhard Wellein}, title = {Making applications faster by asynchronous execution: Slowing down processes or relaxing {MPI} collectives}, journal = {Future Gener. Comput. Syst.}, volume = {148}, pages = {472--487}, year = {2023}, url = {https://doi.org/10.1016/j.future.2023.06.017}, doi = {10.1016/J.FUTURE.2023.06.017}, timestamp = {Thu, 31 Aug 2023 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/fgcs/AfzalHMW23.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/fgcs/MachadoELHKW23, author = {Rafael Ravedutti Lucio Machado and Jan Eitzinger and Jan Laukemann and Georg Hager and Harald K{\"{o}}stler and Gerhard Wellein}, title = {MD-Bench: {A} performance-focused prototyping harness for state-of-the-art short-range molecular dynamics algorithms}, journal = {Future Gener. Comput. Syst.}, volume = {149}, pages = {25--38}, year = {2023}, url = {https://doi.org/10.1016/j.future.2023.06.023}, doi = {10.1016/J.FUTURE.2023.06.023}, timestamp = {Thu, 26 Oct 2023 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/fgcs/MachadoELHKW23.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/jpdc/ErnstHHKW23, author = {Dominik Ernst and Markus Holzer and Georg Hager and Matthias Knorr and Gerhard Wellein}, title = {Analytical performance estimation during code generation on modern GPUs}, journal = {J. Parallel Distributed Comput.}, volume = {173}, pages = {152--167}, year = {2023}, url = {https://doi.org/10.1016/j.jpdc.2022.11.003}, doi = {10.1016/J.JPDC.2022.11.003}, timestamp = {Mon, 05 Feb 2024 00:00:00 +0100}, biburl = {https://dblp.org/rec/journals/jpdc/ErnstHHKW23.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/topc/AlvermannHF23, author = {Andreas Alvermann and Georg Hager and Holger Fehske}, title = {Orthogonal Layers of Parallelism in Large-Scale Eigenvalue Computations}, journal = {{ACM} Trans. Parallel Comput.}, volume = {10}, number = {3}, pages = {16:1--16:31}, year = {2023}, url = {https://doi.org/10.1145/3614444}, doi = {10.1145/3614444}, timestamp = {Fri, 27 Oct 2023 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/topc/AlvermannHF23.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/tpds/AlappatHSW23, author = {Christie L. Alappat and Georg Hager and Olaf Schenk and Gerhard Wellein}, title = {Level-Based Blocking for Sparse Matrices: Sparse Matrix-Power-Vector Multiplication}, journal = {{IEEE} Trans. Parallel Distributed Syst.}, volume = {34}, number = {2}, pages = {581--597}, year = {2023}, url = {https://doi.org/10.1109/TPDS.2022.3223512}, doi = {10.1109/TPDS.2022.3223512}, timestamp = {Thu, 27 Jul 2023 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/tpds/AlappatHSW23.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/tpds/AfzalHW23, author = {Ayesha Afzal and Georg Hager and Gerhard Wellein}, title = {The Role of Idle Waves, Desynchronization, and Bottleneck Evasion in the Performance of Parallel Programs}, journal = {{IEEE} Trans. Parallel Distributed Syst.}, volume = {34}, number = {2}, pages = {623--638}, year = {2023}, url = {https://doi.org/10.1109/TPDS.2022.3221085}, doi = {10.1109/TPDS.2022.3221085}, timestamp = {Fri, 10 Feb 2023 00:00:00 +0100}, biburl = {https://dblp.org/rec/journals/tpds/AfzalHW23.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/sc/AfzalHW23, author = {Ayesha Afzal and Georg Hager and Gerhard Wellein}, title = {Physical Oscillator Model for Supercomputing}, booktitle = {Proceedings of the {SC} '23 Workshops of The International Conference on High Performance Computing, Network, Storage, and Analysis, {SC-W} 2023, Denver, CO, USA, November 12-17, 2023}, pages = {1229--1235}, publisher = {{ACM}}, year = {2023}, url = {https://doi.org/10.1145/3624062.3625535}, doi = {10.1145/3624062.3625535}, timestamp = {Tue, 28 Nov 2023 00:00:00 +0100}, biburl = {https://dblp.org/rec/conf/sc/AfzalHW23.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/sc/AfzalHW23a, author = {Ayesha Afzal and Georg Hager and Gerhard Wellein}, title = {SPEChpc 2021 Benchmarks on Ice Lake and Sapphire Rapids Infiniband Clusters: {A} Performance and Energy Case Study}, booktitle = {Proceedings of the {SC} '23 Workshops of The International Conference on High Performance Computing, Network, Storage, and Analysis, {SC-W} 2023, Denver, CO, USA, November 12-17, 2023}, pages = {1245--1254}, publisher = {{ACM}}, year = {2023}, url = {https://doi.org/10.1145/3624062.3624197}, doi = {10.1145/3624062.3624197}, timestamp = {Tue, 28 Nov 2023 00:00:00 +0100}, biburl = {https://dblp.org/rec/conf/sc/AfzalHW23a.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/wosp/Hager23, author = {Georg Hager}, editor = {Marco Vieira and Valeria Cardellini and Antinisca Di Marco and Petr Tuma}, title = {Application Knowledge Required: Performance Modeling for Fun and Profit}, booktitle = {Proceedings of the 2023 {ACM/SPEC} International Conference on Performance Engineering, {ICPE} 2023, Coimbra, Portugal, April 15-19, 2023}, pages = {5}, publisher = {{ACM}}, year = {2023}, url = {https://doi.org/10.1145/3578244.3585384}, doi = {10.1145/3578244.3585384}, timestamp = {Sat, 29 Apr 2023 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/wosp/Hager23.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/wosp/LaukemannH23, author = {Jan Laukemann and Georg Hager}, editor = {Marco Vieira and Valeria Cardellini and Antinisca Di Marco and Petr Tuma}, title = {Core-Level Performance Engineering with the Open-Source Architecture Code Analyzer {(OSACA)} and the Compiler Explorer}, booktitle = {Companion of the 2023 {ACM/SPEC} International Conference on Performance Engineering, {ICPE} 2023, Coimbra, Portugal, April 15-19, 2023}, pages = {127--131}, publisher = {{ACM}}, year = {2023}, url = {https://doi.org/10.1145/3578245.3583716}, doi = {10.1145/3578245.3583716}, timestamp = {Sat, 29 Apr 2023 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/wosp/LaukemannH23.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-2302-12164, author = {Ayesha Afzal and Georg Hager and Stefano Markidis and Gerhard Wellein}, title = {Making Applications Faster by Asynchronous Execution: Slowing Down Processes or Relaxing {MPI} Collectives}, journal = {CoRR}, volume = {abs/2302.12164}, year = {2023}, url = {https://doi.org/10.48550/arXiv.2302.12164}, doi = {10.48550/ARXIV.2302.12164}, eprinttype = {arXiv}, eprint = {2302.12164}, timestamp = {Tue, 28 Feb 2023 00:00:00 +0100}, biburl = {https://dblp.org/rec/journals/corr/abs-2302-12164.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-2302-14660, author = {Rafael Ravedutti Lucio Machado and Jan Eitzinger and Jan Laukemann and Georg Hager and Harald K{\"{o}}stler and Gerhard Wellein}, title = {MD-Bench: Engineering the in-core performance of short-range molecular dynamics kernels from state-of-the-art simulation packages}, journal = {CoRR}, volume = {abs/2302.14660}, year = {2023}, url = {https://doi.org/10.48550/arXiv.2302.14660}, doi = {10.48550/ARXIV.2302.14660}, eprinttype = {arXiv}, eprint = {2302.14660}, timestamp = {Thu, 02 Mar 2023 00:00:00 +0100}, biburl = {https://dblp.org/rec/journals/corr/abs-2302-14660.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-2309-02228, author = {Christie L. Alappat and Jonas Thies and Georg Hager and Holger Fehske and Gerhard Wellein}, title = {Algebraic Temporal Blocking for Sparse Iterative Solvers on Multi-Core CPUs}, journal = {CoRR}, volume = {abs/2309.02228}, year = {2023}, url = {https://doi.org/10.48550/arXiv.2309.02228}, doi = {10.48550/ARXIV.2309.02228}, eprinttype = {arXiv}, eprint = {2309.02228}, timestamp = {Mon, 11 Sep 2023 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-2309-02228.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-2309-05373, author = {Ayesha Afzal and Georg Hager and Gerhard Wellein}, title = {SPEChpc 2021 Benchmarks on Ice Lake and Sapphire Rapids Infiniband Clusters: {A} Performance and Energy Case Study}, journal = {CoRR}, volume = {abs/2309.05373}, year = {2023}, url = {https://doi.org/10.48550/arXiv.2309.05373}, doi = {10.48550/ARXIV.2309.05373}, eprinttype = {arXiv}, eprint = {2309.05373}, timestamp = {Fri, 15 Sep 2023 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-2309-05373.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-2310-05701, author = {Ayesha Afzal and Georg Hager and Gerhard Wellein}, title = {Physical Oscillator Model for Supercomputing}, journal = {CoRR}, volume = {abs/2310.05701}, year = {2023}, url = {https://doi.org/10.48550/arXiv.2310.05701}, doi = {10.48550/ARXIV.2310.05701}, eprinttype = {arXiv}, eprint = {2310.05701}, timestamp = {Tue, 24 Oct 2023 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-2310-05701.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-2311-04797, author = {Jan Laukemann and Thomas Gruber and Georg Hager and Dossay Oryspayev and Gerhard Wellein}, title = {CloverLeaf on Intel Multi-Core CPUs: {A} Case Study in Write-Allocate Evasion}, journal = {CoRR}, volume = {abs/2311.04797}, year = {2023}, url = {https://doi.org/10.48550/arXiv.2311.04797}, doi = {10.48550/ARXIV.2311.04797}, eprinttype = {arXiv}, eprint = {2311.04797}, timestamp = {Tue, 14 Nov 2023 00:00:00 +0100}, biburl = {https://dblp.org/rec/journals/corr/abs-2311-04797.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/concurrency/AfzalHW22, author = {Ayesha Afzal and Georg Hager and Gerhard Wellein}, title = {Analytic performance model for parallel overlapping memory-bound kernels}, journal = {Concurr. Comput. Pract. Exp.}, volume = {34}, number = {10}, year = {2022}, url = {https://doi.org/10.1002/cpe.6816}, doi = {10.1002/CPE.6816}, timestamp = {Sun, 02 Oct 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/concurrency/AfzalHW22.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/concurrency/AlappatMLGHWW22, author = {Christie L. Alappat and Nils Meyer and Jan Laukemann and Thomas Gruber and Georg Hager and Gerhard Wellein and Tilo Wettig}, title = {Execution-Cache-Memory modeling and performance tuning of sparse matrix-vector multiplication and Lattice quantum chromodynamics on {A64FX}}, journal = {Concurr. Comput. Pract. Exp.}, volume = {34}, number = {20}, year = {2022}, url = {https://doi.org/10.1002/cpe.6512}, doi = {10.1002/CPE.6512}, timestamp = {Tue, 12 Sep 2023 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/concurrency/AlappatMLGHWW22.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/pads/AfzalWH22, author = {Ayesha Afzal and Gerhard Wellein and Georg Hager}, editor = {Kalyan Perumalla and Margaret Loper and Dong (Kevin) Jin and Christopher D. Carothers}, title = {Addressing White-box Modeling and Simulation Challenges in Parallel Computing}, booktitle = {{SIGSIM-PADS} '22: {SIGSIM} Conference on Principles of Advanced Discrete Simulation, Atlanta, GA, USA, June 8 - 10, 2022}, pages = {25--26}, publisher = {{ACM}}, year = {2022}, url = {https://doi.org/10.1145/3518997.3534986}, doi = {10.1145/3518997.3534986}, timestamp = {Tue, 28 Jun 2022 11:47:06 +0200}, biburl = {https://dblp.org/rec/conf/pads/AfzalWH22.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/ppam/AfzalHWM22, author = {Ayesha Afzal and Georg Hager and Gerhard Wellein and Stefano Markidis}, editor = {Roman Wyrzykowski and Jack J. Dongarra and Ewa Deelman and Konrad Karczewski}, title = {Exploring Techniques for the Analysis of Spontaneous Asynchronicity in MPI-Parallel Applications}, booktitle = {Parallel Processing and Applied Mathematics - 14th International Conference, {PPAM} 2022, Gdansk, Poland, September 11-14, 2022, Revised Selected Papers, Part {I}}, series = {Lecture Notes in Computer Science}, volume = {13826}, pages = {155--170}, publisher = {Springer}, year = {2022}, url = {https://doi.org/10.1007/978-3-031-30442-2\_12}, doi = {10.1007/978-3-031-30442-2\_12}, timestamp = {Wed, 17 May 2023 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/ppam/AfzalHWM22.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-2204-14242, author = {Dominik Ernst and Markus Holzer and Georg Hager and Matthias Knorr and Gerhard Wellein}, title = {Analytical Performance Estimation during Code Generation on Modern GPUs}, journal = {CoRR}, volume = {abs/2204.14242}, year = {2022}, url = {https://doi.org/10.48550/arXiv.2204.14242}, doi = {10.48550/ARXIV.2204.14242}, eprinttype = {arXiv}, eprint = {2204.14242}, timestamp = {Mon, 02 May 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-2204-14242.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-2205-01598, author = {Christie L. Alappat and Georg Hager and Olaf Schenk and Gerhard Wellein}, title = {Level-based Blocking for Sparse Matrices: Sparse Matrix-Power-Vector Multiplication}, journal = {CoRR}, volume = {abs/2205.01598}, year = {2022}, url = {https://doi.org/10.48550/arXiv.2205.01598}, doi = {10.48550/ARXIV.2205.01598}, eprinttype = {arXiv}, eprint = {2205.01598}, timestamp = {Thu, 05 May 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-2205-01598.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-2205-04190, author = {Ayesha Afzal and Georg Hager and Gerhard Wellein}, title = {The Role of Idle Waves, Desynchronization, and Bottleneck Evasion in the Performance of Parallel Programs}, journal = {CoRR}, volume = {abs/2205.04190}, year = {2022}, url = {https://doi.org/10.48550/arXiv.2205.04190}, doi = {10.48550/ARXIV.2205.04190}, eprinttype = {arXiv}, eprint = {2205.04190}, timestamp = {Wed, 11 May 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-2205-04190.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-2205-13963, author = {Ayesha Afzal and Georg Hager and Gerhard Wellein and Stefano Markidis}, title = {Exploring Techniques for the Analysis of Spontaneous Asynchronicity in MPI-Parallel Applications}, journal = {CoRR}, volume = {abs/2205.13963}, year = {2022}, url = {https://doi.org/10.48550/arXiv.2205.13963}, doi = {10.48550/ARXIV.2205.13963}, eprinttype = {arXiv}, eprint = {2205.13963}, timestamp = {Tue, 31 May 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-2205-13963.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-2209-01974, author = {Andreas Alvermann and Georg Hager and Holger Fehske}, title = {Orthogonal layers of parallelism in large-scale eigenvalue computations}, journal = {CoRR}, volume = {abs/2209.01974}, year = {2022}, url = {https://doi.org/10.48550/arXiv.2209.01974}, doi = {10.48550/ARXIV.2209.01974}, eprinttype = {arXiv}, eprint = {2209.01974}, timestamp = {Mon, 26 Sep 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-2209-01974.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/ijhpca/ErnstHTW21, author = {Dominik Ernst and Georg Hager and Jonas Thies and Gerhard Wellein}, title = {Performance engineering for real and complex tall {\&} skinny matrix multiplication kernels on GPUs}, journal = {Int. J. High Perform. Comput. Appl.}, volume = {35}, number = {1}, year = {2021}, url = {https://doi.org/10.1177/1094342020965661}, doi = {10.1177/1094342020965661}, timestamp = {Mon, 28 Aug 2023 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/ijhpca/ErnstHTW21.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/ijhpca/PieperHF21, author = {Andreas Pieper and Georg Hager and Holger Fehske}, title = {A domain-specific language and matrix-free stencil code for investigating electronic properties of Dirac and topological materials}, journal = {Int. J. High Perform. Comput. Appl.}, volume = {35}, number = {1}, year = {2021}, url = {https://doi.org/10.1177/1094342020959423}, doi = {10.1177/1094342020959423}, timestamp = {Thu, 29 Apr 2021 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/ijhpca/PieperHF21.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/cgo/AlappatSHKRW21, author = {Christie L. Alappat and Johannes Seiferth and Georg Hager and Matthias Korch and Thomas Rauber and Gerhard Wellein}, editor = {Jae W. Lee and Mary Lou Soffa and Ayal Zaks}, title = {YaskSite: Stencil Optimization Techniques Applied to Explicit {ODE} Methods on Modern Architectures}, booktitle = {{IEEE/ACM} International Symposium on Code Generation and Optimization, {CGO} 2021, Seoul, South Korea, February 27 - March 3, 2021}, pages = {174--186}, publisher = {{IEEE}}, year = {2021}, url = {https://doi.org/10.1109/CGO51591.2021.9370316}, doi = {10.1109/CGO51591.2021.9370316}, timestamp = {Thu, 14 Oct 2021 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/cgo/AlappatSHKRW21.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/sbac-pad/ErnstHKWH21, author = {Dominik Ernst and Georg Hager and Matthias Knorr and Gerhard Wellein and Markus Holzer}, title = {Opening the Black Box: Performance Estimation during Code Generation for GPUs}, booktitle = {33rd {IEEE} International Symposium on Computer Architecture and High Performance Computing, {SBAC-PAD} 2021, Belo Horizonte, Brazil, October 26-29, 2021}, pages = {22--32}, publisher = {{IEEE}}, year = {2021}, url = {https://doi.org/10.1109/SBAC-PAD53543.2021.00014}, doi = {10.1109/SBAC-PAD53543.2021.00014}, timestamp = {Sun, 02 Oct 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/sbac-pad/ErnstHKWH21.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/supercomputer/AfzalHW21, author = {Ayesha Afzal and Georg Hager and Gerhard Wellein}, editor = {Bradford L. Chamberlain and Ana Lucia Varbanescu and Hatem Ltaief and Piotr Luszczek}, title = {Analytic Modeling of Idle Waves in Parallel Programs: Communication, Cluster Topology, and Noise Impact}, booktitle = {High Performance Computing - 36th International Conference, {ISC} High Performance 2021, Virtual Event, June 24 - July 2, 2021, Proceedings}, series = {Lecture Notes in Computer Science}, volume = {12728}, pages = {351--371}, publisher = {Springer}, year = {2021}, url = {https://doi.org/10.1007/978-3-030-78713-4\_19}, doi = {10.1007/978-3-030-78713-4\_19}, timestamp = {Sun, 02 Oct 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/supercomputer/AfzalHW21.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-2103-03013, author = {Christie L. Alappat and Nils Meyer and Jan Laukemann and Thomas Gruber and Georg Hager and Gerhard Wellein and Tilo Wettig}, title = {{ECM} modeling and performance tuning of SpMV and Lattice {QCD} on {A64FX}}, journal = {CoRR}, volume = {abs/2103.03013}, year = {2021}, url = {https://arxiv.org/abs/2103.03013}, eprinttype = {arXiv}, eprint = {2103.03013}, timestamp = {Mon, 15 Mar 2021 00:00:00 +0100}, biburl = {https://dblp.org/rec/journals/corr/abs-2103-03013.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-2103-03175, author = {Ayesha Afzal and Georg Hager and Gerhard Wellein}, title = {Analytic Modeling of Idle Waves in Parallel Programs: Communication, Cluster Topology, and Noise Impact}, journal = {CoRR}, volume = {abs/2103.03175}, year = {2021}, url = {https://arxiv.org/abs/2103.03175}, eprinttype = {arXiv}, eprint = {2103.03175}, timestamp = {Thu, 14 Oct 2021 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-2103-03175.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-2107-01143, author = {Dominik Ernst and Georg Hager and Markus Holzer and Matthias Knorr and Gerhard Wellein}, title = {Opening the Black Box: Performance Estimation during Code Generation for GPUs}, journal = {CoRR}, volume = {abs/2107.01143}, year = {2021}, url = {https://arxiv.org/abs/2107.01143}, eprinttype = {arXiv}, eprint = {2107.01143}, timestamp = {Wed, 23 Feb 2022 00:00:00 +0100}, biburl = {https://dblp.org/rec/journals/corr/abs-2107-01143.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/ijhpca/CremonesiHWS20, author = {Francesco Cremonesi and Georg Hager and Gerhard Wellein and Felix Sch{\"{u}}rmann}, title = {Analytic performance modeling and analysis of detailed neuron simulations}, journal = {Int. J. High Perform. Comput. Appl.}, volume = {34}, number = {4}, year = {2020}, url = {https://doi.org/10.1177/1094342020912528}, doi = {10.1177/1094342020912528}, timestamp = {Sun, 02 Oct 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/ijhpca/CremonesiHWS20.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/superfri/0001AHFW20, author = {Johannes Hofmann and Christie L. Alappat and Georg Hager and Dietmar Fey and Gerhard Wellein}, title = {Bridging the Architecture Gap: Abstracting Performance-Relevant Properties of Modern Server Processors}, journal = {Supercomput. Front. Innov.}, volume = {7}, number = {2}, pages = {54--78}, year = {2020}, url = {https://doi.org/10.14529/jsfi200204}, doi = {10.14529/JSFI200204}, timestamp = {Sun, 02 Oct 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/superfri/0001AHFW20.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/toms/ThiesROBEHW20, author = {Jonas Thies and Melven R{\"{o}}hrig{-}Z{\"{o}}llner and Nigel Overmars and Achim Basermann and Dominik Ernst and Georg Hager and Gerhard Wellein}, title = {{PHIST:} {A} Pipelined, Hybrid-Parallel Iterative Solver Toolkit}, journal = {{ACM} Trans. Math. Softw.}, volume = {46}, number = {4}, pages = {31:1--31:26}, year = {2020}, url = {https://doi.org/10.1145/3402227}, doi = {10.1145/3402227}, timestamp = {Sun, 02 Oct 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/toms/ThiesROBEHW20.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/topc/AlappatBBFHSTW20, author = {Christie L. Alappat and Achim Basermann and Alan R. Bishop and Holger Fehske and Georg Hager and Olaf Schenk and Jonas Thies and Gerhard Wellein}, title = {A Recursive Algebraic Coloring Technique for Hardware-efficient Symmetric Sparse Matrix-vector Multiplication}, journal = {{ACM} Trans. Parallel Comput.}, volume = {7}, number = {3}, pages = {19:1--19:37}, year = {2020}, url = {https://doi.org/10.1145/3399732}, doi = {10.1145/3399732}, timestamp = {Mon, 28 Aug 2023 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/topc/AlappatBBFHSTW20.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/pmbs-ws/AlappatLGHWMW20, author = {Christie L. Alappat and Jan Laukemann and Thomas Gruber and Georg Hager and Gerhard Wellein and Nils Meyer and Tilo Wettig}, title = {Performance Modeling of Streaming Kernels and Sparse Matrix-Vector Multiplication on {A64FX}}, booktitle = {2020 {IEEE/ACM} Performance Modeling, Benchmarking and Simulation of High Performance Computer Systems, PMBS@SC 2020, Atlanta, GA, USA, November 12, 2020}, pages = {1--7}, publisher = {{IEEE}}, year = {2020}, url = {https://doi.org/10.1109/PMBS51919.2020.00006}, doi = {10.1109/PMBS51919.2020.00006}, timestamp = {Thu, 14 Oct 2021 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/pmbs-ws/AlappatLGHWMW20.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/supercomputer/AfzalHW20, author = {Ayesha Afzal and Georg Hager and Gerhard Wellein}, editor = {Ponnuswamy Sadayappan and Bradford L. Chamberlain and Guido Juckeland and Hatem Ltaief}, title = {Desynchronization and Wave Pattern Formation in MPI-Parallel and Hybrid Memory-Bound Programs}, booktitle = {High Performance Computing - 35th International Conference, {ISC} High Performance 2020, Frankfurt/Main, Germany, June 22-25, 2020, Proceedings}, series = {Lecture Notes in Computer Science}, volume = {12151}, pages = {391--411}, publisher = {Springer}, year = {2020}, url = {https://doi.org/10.1007/978-3-030-50743-5\_20}, doi = {10.1007/978-3-030-50743-5\_20}, timestamp = {Sun, 02 Oct 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/supercomputer/AfzalHW20.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/supercomputer/Alappat0HFBW20, author = {Christie L. Alappat and Johannes Hofmann and Georg Hager and Holger Fehske and Alan R. Bishop and Gerhard Wellein}, editor = {Ponnuswamy Sadayappan and Bradford L. Chamberlain and Guido Juckeland and Hatem Ltaief}, title = {Understanding {HPC} Benchmark Performance on Intel Broadwell and Cascade Lake Processors}, booktitle = {High Performance Computing - 35th International Conference, {ISC} High Performance 2020, Frankfurt/Main, Germany, June 22-25, 2020, Proceedings}, series = {Lecture Notes in Computer Science}, volume = {12151}, pages = {412--433}, publisher = {Springer}, year = {2020}, url = {https://doi.org/10.1007/978-3-030-50743-5\_21}, doi = {10.1007/978-3-030-50743-5\_21}, timestamp = {Sun, 02 Oct 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/supercomputer/Alappat0HFBW20.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@incollection{DBLP:series/lncse/AlappatABFFGHHIKKLNRSSTW20, author = {Christie L. Alappat and Andreas Alvermann and Achim Basermann and Holger Fehske and Yasunori Futamura and Martin Galgon and Georg Hager and Sarah Huber and Akira Imakura and Masatoshi Kawai and Moritz Kreutzer and Bruno Lang and Kengo Nakajima and Melven R{\"{o}}hrig{-}Z{\"{o}}llner and Tetsuya Sakurai and Faisal Shahzad and Jonas Thies and Gerhard Wellein}, editor = {Hans{-}Joachim Bungartz and Severin Reiz and Benjamin Uekermann and Philipp Neumann and Wolfgang E. Nagel}, title = {{ESSEX:} Equipping Sparse Solvers For Exascale}, booktitle = {Software for Exascale Computing - {SPPEXA} 2016-2019}, series = {Lecture Notes in Computational Science and Engineering}, volume = {136}, pages = {143--187}, publisher = {Springer}, year = {2020}, url = {https://doi.org/10.1007/978-3-030-47956-5\_7}, doi = {10.1007/978-3-030-47956-5\_7}, timestamp = {Tue, 15 Aug 2023 01:00:00 +0200}, biburl = {https://dblp.org/rec/series/lncse/AlappatABFFGHHIKKLNRSSTW20.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-2002-02989, author = {Ayesha Afzal and Georg Hager and Gerhard Wellein}, title = {Desynchronization and Wave Pattern Formation in MPI-Parallel and Hybrid Memory-Bound Programs}, journal = {CoRR}, volume = {abs/2002.02989}, year = {2020}, url = {https://arxiv.org/abs/2002.02989}, eprinttype = {arXiv}, eprint = {2002.02989}, timestamp = {Wed, 12 Feb 2020 00:00:00 +0100}, biburl = {https://dblp.org/rec/journals/corr/abs-2002-02989.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-2002-03344, author = {Christie L. Alappat and Johannes Hofmann and Georg Hager and Holger Fehske and Alan R. Bishop and Gerhard Wellein}, title = {Understanding {HPC} Benchmark Performance on Intel Broadwell and Cascade Lake Processors}, journal = {CoRR}, volume = {abs/2002.03344}, year = {2020}, url = {https://arxiv.org/abs/2002.03344}, eprinttype = {arXiv}, eprint = {2002.03344}, timestamp = {Wed, 12 Feb 2020 00:00:00 +0100}, biburl = {https://dblp.org/rec/journals/corr/abs-2002-03344.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-2009-13903, author = {Christie L. Alappat and Jan Laukemann and Thomas Gruber and Georg Hager and Gerhard Wellein and Nils Meyer and Tilo Wettig}, title = {Performance Modeling of Streaming Kernels and Sparse Matrix-Vector Multiplication on {A64FX}}, journal = {CoRR}, volume = {abs/2009.13903}, year = {2020}, url = {https://arxiv.org/abs/2009.13903}, eprinttype = {arXiv}, eprint = {2009.13903}, timestamp = {Wed, 30 Sep 2020 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-2009-13903.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-2011-00243, author = {Ayesha Afzal and Georg Hager and Gerhard Wellein}, title = {An analytic performance model for overlapping execution of memory-bound loop kernels on multicore CPUs}, journal = {CoRR}, volume = {abs/2011.00243}, year = {2020}, url = {https://arxiv.org/abs/2011.00243}, eprinttype = {arXiv}, eprint = {2011.00243}, timestamp = {Thu, 14 Oct 2021 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-2011-00243.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/superfri/HornichHHGW19, author = {Julian Hornich and Julian Hammer and Georg Hager and Thomas Gruber and Gerhard Wellein}, title = {Collecting and Presenting Reproducible Intranode Stencil Performance: {INSPECT}}, journal = {Supercomput. Front. Innov.}, volume = {6}, number = {3}, pages = {4--25}, year = {2019}, url = {https://doi.org/10.14529/jsfi190301}, doi = {10.14529/JSFI190301}, timestamp = {Fri, 11 Dec 2020 00:00:00 +0100}, biburl = {https://dblp.org/rec/journals/superfri/HornichHHGW19.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/tpds/ShahzadTKZHW19, author = {Faisal Shahzad and Jonas Thies and Moritz Kreutzer and Thomas Zeiser and Georg Hager and Gerhard Wellein}, title = {{CRAFT:} {A} Library for Easier Application-Level Checkpoint/Restart and Automatic Fault Tolerance}, journal = {{IEEE} Trans. Parallel Distributed Syst.}, volume = {30}, number = {3}, pages = {501--514}, year = {2019}, url = {https://doi.org/10.1109/TPDS.2018.2866794}, doi = {10.1109/TPDS.2018.2866794}, timestamp = {Tue, 15 Aug 2023 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/tpds/ShahzadTKZHW19.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/cluster/AfzalHW19, author = {Ayesha Afzal and Georg Hager and Gerhard Wellein}, title = {Propagation and Decay of Injected One-Off Delays on Clusters: {A} Case Study}, booktitle = {2019 {IEEE} International Conference on Cluster Computing, {CLUSTER} 2019, Albuquerque, NM, USA, September 23-26, 2019}, pages = {1--10}, publisher = {{IEEE}}, year = {2019}, url = {https://doi.org/10.1109/CLUSTER.2019.8890995}, doi = {10.1109/CLUSTER.2019.8890995}, timestamp = {Sun, 02 Oct 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/cluster/AfzalHW19.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/ppam/ErnstHTW19, author = {Dominik Ernst and Georg Hager and Jonas Thies and Gerhard Wellein}, editor = {Roman Wyrzykowski and Ewa Deelman and Jack J. Dongarra and Konrad Karczewski}, title = {Performance Engineering for a Tall {\&} Skinny Matrix Multiplication Kernels on GPUs}, booktitle = {Parallel Processing and Applied Mathematics - 13th International Conference, {PPAM} 2019, Bialystok, Poland, September 8-11, 2019, Revised Selected Papers, Part {I}}, series = {Lecture Notes in Computer Science}, volume = {12043}, pages = {505--515}, publisher = {Springer}, year = {2019}, url = {https://doi.org/10.1007/978-3-030-43229-4\_43}, doi = {10.1007/978-3-030-43229-4\_43}, timestamp = {Sun, 02 Oct 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/ppam/ErnstHTW19.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/sc/LaukemannHHW19, author = {Jan Laukemann and Julian Hammer and Georg Hager and Gerhard Wellein}, title = {Automatic Throughput and Critical Path Analysis of x86 and {ARM} Assembly Kernels}, booktitle = {2019 {IEEE/ACM} Performance Modeling, Benchmarking and Simulation of High Performance Computer Systems, PMBS@SC 2019, Denver, CO, USA, November 18, 2019}, pages = {1--6}, publisher = {{IEEE}}, year = {2019}, url = {https://doi.org/10.1109/PMBS49563.2019.00006}, doi = {10.1109/PMBS49563.2019.00006}, timestamp = {Sun, 25 Oct 2020 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/sc/LaukemannHHW19.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-1901-05344, author = {Francesco Cremonesi and Georg Hager and Gerhard Wellein and Felix Sch{\"{u}}rmann}, title = {Analytic Performance Modeling and Analysis of Detailed Neuron Simulations}, journal = {CoRR}, volume = {abs/1901.05344}, year = {2019}, url = {http://arxiv.org/abs/1901.05344}, eprinttype = {arXiv}, eprint = {1901.05344}, timestamp = {Fri, 01 Feb 2019 00:00:00 +0100}, biburl = {https://dblp.org/rec/journals/corr/abs-1901-05344.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-1905-03136, author = {Dominik Ernst and Georg Hager and Jonas Thies and Gerhard Wellein}, title = {Performance Engineering for a Tall {\&} Skinny Matrix Multiplication Kernel on GPUs}, journal = {CoRR}, volume = {abs/1905.03136}, year = {2019}, url = {http://arxiv.org/abs/1905.03136}, eprinttype = {arXiv}, eprint = {1905.03136}, timestamp = {Mon, 27 May 2019 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-1905-03136.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-1905-10603, author = {Ayesha Afzal and Georg Hager and Gerhard Wellein}, title = {Delay Propagation and Overlapping Mechanisms on Clusters: {A} Case Study of Idle Periods based on Workload, Communication, and Delay Granularity}, journal = {CoRR}, volume = {abs/1905.10603}, year = {2019}, url = {http://arxiv.org/abs/1905.10603}, eprinttype = {arXiv}, eprint = {1905.10603}, timestamp = {Mon, 03 Jun 2019 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-1905-10603.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-1906-08138, author = {Julian Hornich and Julian Hammer and Georg Hager and Thomas Gruber and Gerhard Wellein}, title = {Collecting and Presenting Reproducible Intranode Stencil Performance: {INSPECT}}, journal = {CoRR}, volume = {abs/1906.08138}, year = {2019}, url = {http://arxiv.org/abs/1906.08138}, eprinttype = {arXiv}, eprint = {1906.08138}, timestamp = {Mon, 24 Jun 2019 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-1906-08138.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-1907-00048, author = {Johannes Hofmann and Christie L. Alappat and Georg Hager and Dietmar Fey and Gerhard Wellein}, title = {Bridging the Architecture Gap: Abstracting Performance-Relevant Properties of Modern Server Processors}, journal = {CoRR}, volume = {abs/1907.00048}, year = {2019}, url = {http://arxiv.org/abs/1907.00048}, eprinttype = {arXiv}, eprint = {1907.00048}, timestamp = {Fri, 17 Jan 2020 00:00:00 +0100}, biburl = {https://dblp.org/rec/journals/corr/abs-1907-00048.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-1907-06487, author = {Christie L. Alappat and Georg Hager and Olaf Schenk and Jonas Thies and Achim Basermann and Alan R. Bishop and Holger Fehske and Gerhard Wellein}, title = {A Recursive Algebraic Coloring Technique for Hardware-Efficient Symmetric Sparse Matrix-Vector Multiplication}, journal = {CoRR}, volume = {abs/1907.06487}, year = {2019}, url = {http://arxiv.org/abs/1907.06487}, eprinttype = {arXiv}, eprint = {1907.06487}, timestamp = {Wed, 17 Jul 2019 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-1907-06487.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-1910-00214, author = {Jan Laukemann and Julian Hammer and Georg Hager and Gerhard Wellein}, title = {Automatic Throughput and Critical Path Analysis of x86 and {ARM} Assembly Kernels}, journal = {CoRR}, volume = {abs/1910.00214}, year = {2019}, url = {http://arxiv.org/abs/1910.00214}, eprinttype = {arXiv}, eprint = {1910.00214}, timestamp = {Fri, 04 Oct 2019 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-1910-00214.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/ijhpca/ShahzadKZMPHW18, author = {Faisal Shahzad and Moritz Kreutzer and Thomas Zeiser and Rui Machado and Andreas Pieper and Georg Hager and Gerhard Wellein}, title = {Building and utilizing fault tolerance support tools for the {GASPI} applications}, journal = {Int. J. High Perform. Comput. Appl.}, volume = {32}, number = {5}, pages = {613--626}, year = {2018}, url = {https://doi.org/10.1177/1094342016677085}, doi = {10.1177/1094342016677085}, timestamp = {Tue, 15 Aug 2023 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/ijhpca/ShahzadKZMPHW18.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/insk/HagerW18, author = {Georg Hager and Gerhard Wellein}, title = {Performance Engineering}, journal = {Inform. Spektrum}, volume = {41}, number = {5}, pages = {323--327}, year = {2018}, url = {https://doi.org/10.1007/s00287-018-1122-1}, doi = {10.1007/S00287-018-1122-1}, timestamp = {Sun, 02 Oct 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/insk/HagerW18.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/topc/MalasHLK18, author = {Tareq M. Malas and Georg Hager and Hatem Ltaief and David E. Keyes}, title = {Multidimensional Intratile Parallelization for Memory-Starved Stencil Computations}, journal = {{ACM} Trans. Parallel Comput.}, volume = {4}, number = {3}, pages = {12:1--12:32}, year = {2018}, url = {https://doi.org/10.1145/3155290}, doi = {10.1145/3155290}, timestamp = {Mon, 26 Oct 2020 00:00:00 +0100}, biburl = {https://dblp.org/rec/journals/topc/MalasHLK18.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/sbac-pad/WittmannHJLKRSW18, author = {Markus Wittmann and Georg Hager and Radim Janal{\'{\i}}k and Martin Lanser and Axel Klawonn and Oliver Rheinbach and Olaf Schenk and Gerhard Wellein}, title = {Multicore Performance Engineering of Sparse Triangular Solves Using a Modified Roofline Model}, booktitle = {30th International Symposium on Computer Architecture and High Performance Computing, {SBAC-PAD} 2018, Lyon, France, September 24-27, 2018}, pages = {233--241}, publisher = {{IEEE}}, year = {2018}, url = {https://doi.org/10.1109/CAHPC.2018.8645938}, doi = {10.1109/CAHPC.2018.8645938}, timestamp = {Fri, 09 Apr 2021 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/sbac-pad/WittmannHJLKRSW18.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/sc/LaukemannHHHW18, author = {Jan Laukemann and Julian Hammer and Johannes Hofmann and Georg Hager and Gerhard Wellein}, title = {Automated Instruction Stream Throughput Prediction for Intel and {AMD} Microarchitectures}, booktitle = {2018 {IEEE/ACM} Performance Modeling, Benchmarking and Simulation of High Performance Computer Systems, PMBS@SC 2018, Dallas, TX, USA, November 12, 2018}, pages = {121--131}, publisher = {{IEEE}}, year = {2018}, url = {https://doi.org/10.1109/PMBS.2018.8641578}, doi = {10.1109/PMBS.2018.8641578}, timestamp = {Sun, 25 Oct 2020 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/sc/LaukemannHHHW18.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/supercomputer/HofmannHF18, author = {Johannes Hofmann and Georg Hager and Dietmar Fey}, editor = {Rio Yokota and Mich{\`{e}}le Weiland and David E. Keyes and Carsten Trinitis}, title = {On the Accuracy and Usefulness of Analytic Energy Models for Contemporary Multicore Processors}, booktitle = {High Performance Computing - 33rd International Conference, {ISC} High Performance 2018, Frankfurt, Germany, June 24-28, 2018, Proceedings}, series = {Lecture Notes in Computer Science}, volume = {10876}, pages = {22--43}, publisher = {Springer}, year = {2018}, url = {https://doi.org/10.1007/978-3-319-92040-5\_2}, doi = {10.1007/978-3-319-92040-5\_2}, timestamp = {Sun, 25 Oct 2020 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/supercomputer/HofmannHF18.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/supercomputer/KreutzerEBFHNW18, author = {Moritz Kreutzer and Dominik Ernst and Alan R. Bishop and Holger Fehske and Georg Hager and Kengo Nakajima and Gerhard Wellein}, editor = {Rio Yokota and Mich{\`{e}}le Weiland and David E. Keyes and Carsten Trinitis}, title = {Chebyshev Filter Diagonalization on Modern Manycore Processors and GPGPUs}, booktitle = {High Performance Computing - 33rd International Conference, {ISC} High Performance 2018, Frankfurt, Germany, June 24-28, 2018, Proceedings}, series = {Lecture Notes in Computer Science}, volume = {10876}, pages = {329--349}, publisher = {Springer}, year = {2018}, url = {https://doi.org/10.1007/978-3-319-92040-5\_17}, doi = {10.1007/978-3-319-92040-5\_17}, timestamp = {Sun, 02 Oct 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/supercomputer/KreutzerEBFHNW18.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-1803-01618, author = {Johannes Hofmann and Georg Hager and Dietmar Fey}, title = {On the accuracy and usefulness of analytic energy models for contemporary multicore processors}, journal = {CoRR}, volume = {abs/1803.01618}, year = {2018}, url = {http://arxiv.org/abs/1803.01618}, eprinttype = {arXiv}, eprint = {1803.01618}, timestamp = {Fri, 17 Jan 2020 00:00:00 +0100}, biburl = {https://dblp.org/rec/journals/corr/abs-1803-01618.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-1803-02156, author = {Moritz Kreutzer and Georg Hager and Dominik Ernst and Holger Fehske and Alan R. Bishop and Gerhard Wellein}, title = {Chebyshev Filter Diagonalization on Modern Manycore Processors and GPGPUs}, journal = {CoRR}, volume = {abs/1803.02156}, year = {2018}, url = {http://arxiv.org/abs/1803.02156}, eprinttype = {arXiv}, eprint = {1803.02156}, timestamp = {Mon, 13 Aug 2018 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-1803-02156.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-1809-00912, author = {Jan Laukemann and Julian Hammer and Johannes Hofmann and Georg Hager and Gerhard Wellein}, title = {Automated Instruction Stream Throughput Prediction for Intel and {AMD} Microarchitectures}, journal = {CoRR}, volume = {abs/1809.00912}, year = {2018}, url = {http://arxiv.org/abs/1809.00912}, eprinttype = {arXiv}, eprint = {1809.00912}, timestamp = {Fri, 17 Jan 2020 00:00:00 +0100}, biburl = {https://dblp.org/rec/journals/corr/abs-1809-00912.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/concurrency/HofmannFREHW17, author = {Johannes Hofmann and Dietmar Fey and Michael Riedmann and Jan Eitzinger and Georg Hager and Gerhard Wellein}, title = {Performance analysis of the Kahan-enhanced scalar product on current multi-core and many-core processors}, journal = {Concurr. Comput. Pract. Exp.}, volume = {29}, number = {9}, year = {2017}, url = {https://doi.org/10.1002/cpe.3921}, doi = {10.1002/CPE.3921}, timestamp = {Sun, 02 Oct 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/concurrency/HofmannFREHW17.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/ijpp/KreutzerTRPSGBF17, author = {Moritz Kreutzer and Jonas Thies and Melven R{\"{o}}hrig{-}Z{\"{o}}llner and Andreas Pieper and Faisal Shahzad and Martin Galgon and Achim Basermann and Holger Fehske and Georg Hager and Gerhard Wellein}, title = {{GHOST:} Building Blocks for High Performance Sparse Linear Algebra on Heterogeneous Systems}, journal = {Int. J. Parallel Program.}, volume = {45}, number = {5}, pages = {1046--1072}, year = {2017}, url = {https://doi.org/10.1007/s10766-016-0464-z}, doi = {10.1007/S10766-016-0464-Z}, timestamp = {Mon, 28 Aug 2023 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/ijpp/KreutzerTRPSGBF17.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/cluster/RohlEHW17, author = {Thomas R{\"{o}}hl and Jan Eitzinger and Georg Hager and Gerhard Wellein}, title = {{LIKWID} Monitoring Stack: {A} Flexible Framework Enabling Job Specific Performance monitoring for the masses}, booktitle = {2017 {IEEE} International Conference on Cluster Computing, {CLUSTER} 2017, Honolulu, HI, USA, September 5-8, 2017}, pages = {781--784}, publisher = {{IEEE} Computer Society}, year = {2017}, url = {https://doi.org/10.1109/CLUSTER.2017.115}, doi = {10.1109/CLUSTER.2017.115}, timestamp = {Thu, 23 Mar 2023 00:00:00 +0100}, biburl = {https://dblp.org/rec/conf/cluster/RohlEHW17.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/supercomputer/HofmannHWF17, author = {Johannes Hofmann and Georg Hager and Gerhard Wellein and Dietmar Fey}, editor = {Julian M. Kunkel and Rio Yokota and Pavan Balaji and David E. Keyes}, title = {An Analysis of Core- and Chip-Level Architectural Features in Four Generations of Intel Server Processors}, booktitle = {High Performance Computing - 32nd International Conference, {ISC} High Performance 2017, Frankfurt, Germany, June 18-22, 2017, Proceedings}, series = {Lecture Notes in Computer Science}, volume = {10266}, pages = {294--314}, publisher = {Springer}, year = {2017}, url = {https://doi.org/10.1007/978-3-319-58667-0\_16}, doi = {10.1007/978-3-319-58667-0\_16}, timestamp = {Sun, 02 Oct 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/supercomputer/HofmannHWF17.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/HammerEHW17, author = {Julian Hammer and Jan Eitzinger and Georg Hager and Gerhard Wellein}, title = {Kerncraft: {A} Tool for Analytic Performance Modeling of Loop Kernels}, journal = {CoRR}, volume = {abs/1702.04653}, year = {2017}, url = {http://arxiv.org/abs/1702.04653}, eprinttype = {arXiv}, eprint = {1702.04653}, timestamp = {Mon, 13 Aug 2018 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/HammerEHW17.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/HofmannHWF17, author = {Johannes Hofmann and Georg Hager and Gerhard Wellein and Dietmar Fey}, title = {An analysis of core- and chip-level architectural features in four generations of Intel server processors}, journal = {CoRR}, volume = {abs/1702.07554}, year = {2017}, url = {http://arxiv.org/abs/1702.07554}, eprinttype = {arXiv}, eprint = {1702.07554}, timestamp = {Fri, 17 Jan 2020 00:00:00 +0100}, biburl = {https://dblp.org/rec/journals/corr/HofmannHWF17.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-1708-01476, author = {Thomas R{\"{o}}hl and Jan Eitzinger and Georg Hager and Gerhard Wellein}, title = {{LIKWID} Monitoring Stack: {A} flexible framework enabling job specific performance monitoring for the masses}, journal = {CoRR}, volume = {abs/1708.01476}, year = {2017}, url = {http://arxiv.org/abs/1708.01476}, eprinttype = {arXiv}, eprint = {1708.01476}, timestamp = {Mon, 13 Aug 2018 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-1708-01476.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-1708-02030, author = {Faisal Shahzad and Jonas Thies and Moritz Kreutzer and Thomas Zeiser and Georg Hager and Gerhard Wellein}, title = {{CRAFT:} {A} library for easier application-level Checkpoint/Restart and Automatic Fault Tolerance}, journal = {CoRR}, volume = {abs/1708.02030}, year = {2017}, url = {http://arxiv.org/abs/1708.02030}, eprinttype = {arXiv}, eprint = {1708.02030}, timestamp = {Tue, 15 Aug 2023 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-1708-02030.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-1708-09689, author = {Andreas Pieper and Georg Hager and Holger Fehske}, title = {{PVSC-DTM:} {A} domain-specific language and matrix-free stencil code for investigating electronic properties of Dirac and topological materials}, journal = {CoRR}, volume = {abs/1708.09689}, year = {2017}, url = {http://arxiv.org/abs/1708.09689}, eprinttype = {arXiv}, eprint = {1708.09689}, timestamp = {Mon, 13 Aug 2018 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-1708-09689.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-1710-04094, author = {Thomas R{\"{o}}hl and Jan Eitzinger and Georg Hager and Gerhard Wellein}, title = {Validation of hardware events for successful performance pattern identification in High Performance Computing}, journal = {CoRR}, volume = {abs/1710.04094}, year = {2017}, url = {http://arxiv.org/abs/1710.04094}, eprinttype = {arXiv}, eprint = {1710.04094}, timestamp = {Mon, 13 Aug 2018 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-1710-04094.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/concurrency/HagerKVW16, author = {Georg Hager and Darren J. Kerbyson and Abhinav Vishnu and Gerhard Wellein}, title = {Performance and power for highly parallel systems}, journal = {Concurr. Comput. Pract. Exp.}, volume = {28}, number = {2}, pages = {187--188}, year = {2016}, url = {https://doi.org/10.1002/cpe.3761}, doi = {10.1002/CPE.3761}, timestamp = {Mon, 26 Oct 2020 00:00:00 +0100}, biburl = {https://dblp.org/rec/journals/concurrency/HagerKVW16.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/concurrency/HagerTHW16, author = {Georg Hager and Jan Treibig and Johannes Habich and Gerhard Wellein}, title = {Exploring performance and power properties of modern multi-core chips via simple machine models}, journal = {Concurr. Comput. Pract. Exp.}, volume = {28}, number = {2}, pages = {189--210}, year = {2016}, url = {https://doi.org/10.1002/cpe.3180}, doi = {10.1002/CPE.3180}, timestamp = {Sun, 02 Oct 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/concurrency/HagerTHW16.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/concurrency/WittmannHZTW16, author = {Markus Wittmann and Georg Hager and Thomas Zeiser and Jan Treibig and Gerhard Wellein}, title = {Chip-level and multi-node analysis of energy-optimized lattice Boltzmann {CFD} simulations}, journal = {Concurr. Comput. Pract. Exp.}, volume = {28}, number = {7}, pages = {2295--2315}, year = {2016}, url = {https://doi.org/10.1002/cpe.3489}, doi = {10.1002/CPE.3489}, timestamp = {Sun, 02 Oct 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/concurrency/WittmannHZTW16.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/jcphy/PieperKAGFHLW16, author = {Andreas Pieper and Moritz Kreutzer and Andreas Alvermann and Martin Galgon and Holger Fehske and Georg Hager and Bruno Lang and Gerhard Wellein}, title = {High-performance implementation of Chebyshev filter diagonalization for interior eigenvalue computations}, journal = {J. Comput. Phys.}, volume = {325}, pages = {226--243}, year = {2016}, url = {https://doi.org/10.1016/j.jcp.2016.08.027}, doi = {10.1016/J.JCP.2016.08.027}, timestamp = {Sun, 02 Oct 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/jcphy/PieperKAGFHLW16.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/arcs/HofmannFEHW16, author = {Johannes Hofmann and Dietmar Fey and Jan Eitzinger and Georg Hager and Gerhard Wellein}, editor = {Frank Hannig and Jo{\~{a}}o M. P. Cardoso and Thilo Pionteck and Dietmar Fey and Wolfgang Schr{\"{o}}der{-}Preikschat and J{\"{u}}rgen Teich}, title = {Analysis of Intel's Haswell Microarchitecture Using the {ECM} Model and Microbenchmarks}, booktitle = {Architecture of Computing Systems - {ARCS} 2016 - 29th International Conference, Nuremberg, Germany, April 4-7, 2016, Proceedings}, series = {Lecture Notes in Computer Science}, volume = {9637}, pages = {210--222}, publisher = {Springer}, year = {2016}, url = {https://doi.org/10.1007/978-3-319-30695-7\_16}, doi = {10.1007/978-3-319-30695-7\_16}, timestamp = {Sun, 02 Oct 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/arcs/HofmannFEHW16.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/ipps/MalasHHLPK16, author = {Tareq M. Malas and Julian Hornich and Georg Hager and Hatem Ltaief and Christoph Pflaum and David E. Keyes}, title = {Optimization of an Electromagnetics Code with Multicore Wavefront Diamond Blocking and Multi-dimensional Intra-Tile Parallelization}, booktitle = {2016 {IEEE} International Parallel and Distributed Processing Symposium, {IPDPS} 2016, Chicago, IL, USA, May 23-27, 2016}, pages = {142--151}, publisher = {{IEEE} Computer Society}, year = {2016}, url = {https://doi.org/10.1109/IPDPS.2016.87}, doi = {10.1109/IPDPS.2016.87}, timestamp = {Sat, 30 Sep 2023 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/ipps/MalasHHLPK16.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@incollection{DBLP:series/lncse/ThiesGSAKPRBFHLW16, author = {Jonas Thies and Martin Galgon and Faisal Shahzad and Andreas Alvermann and Moritz Kreutzer and Andreas Pieper and Melven R{\"{o}}hrig{-}Z{\"{o}}llner and Achim Basermann and Holger Fehske and Georg Hager and Bruno Lang and Gerhard Wellein}, editor = {Hans{-}Joachim Bungartz and Philipp Neumann and Wolfgang E. Nagel}, title = {Towards an Exascale Enabled Sparse Solver Repository}, booktitle = {Software for Exascale Computing - {SPPEXA} 2013-2015}, series = {Lecture Notes in Computational Science and Engineering}, volume = {113}, pages = {295--316}, publisher = {Springer}, year = {2016}, url = {https://doi.org/10.1007/978-3-319-40528-5\_13}, doi = {10.1007/978-3-319-40528-5\_13}, timestamp = {Tue, 15 Aug 2023 01:00:00 +0200}, biburl = {https://dblp.org/rec/series/lncse/ThiesGSAKPRBFHLW16.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@incollection{DBLP:series/lncse/KreutzerTPAGRSBBFHLW16, author = {Moritz Kreutzer and Jonas Thies and Andreas Pieper and Andreas Alvermann and Martin Galgon and Melven R{\"{o}}hrig{-}Z{\"{o}}llner and Faisal Shahzad and Achim Basermann and Alan R. Bishop and Holger Fehske and Georg Hager and Bruno Lang and Gerhard Wellein}, editor = {Hans{-}Joachim Bungartz and Philipp Neumann and Wolfgang E. Nagel}, title = {Performance Engineering and Energy Efficiency of Building Blocks for Large, Sparse Eigenvalue Computations on Heterogeneous Supercomputers}, booktitle = {Software for Exascale Computing - {SPPEXA} 2013-2015}, series = {Lecture Notes in Computational Science and Engineering}, volume = {113}, pages = {317--338}, publisher = {Springer}, year = {2016}, url = {https://doi.org/10.1007/978-3-319-40528-5\_14}, doi = {10.1007/978-3-319-40528-5\_14}, timestamp = {Tue, 15 Aug 2023 01:00:00 +0200}, biburl = {https://dblp.org/rec/series/lncse/KreutzerTPAGRSBBFHLW16.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/HofmannFREHW16, author = {Johannes Hofmann and Dietmar Fey and Michael Riedmann and Jan Eitzinger and Georg Hager and Gerhard Wellein}, title = {Performance analysis of the Kahan-enhanced scalar product on current multi- and manycore processors}, journal = {CoRR}, volume = {abs/1604.01890}, year = {2016}, url = {http://arxiv.org/abs/1604.01890}, eprinttype = {arXiv}, eprint = {1604.01890}, timestamp = {Fri, 17 Jan 2020 00:00:00 +0100}, biburl = {https://dblp.org/rec/journals/corr/HofmannFREHW16.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/siamsc/MalasHLSWK15, author = {Tareq M. Malas and Georg Hager and Hatem Ltaief and Holger Stengel and Gerhard Wellein and David E. Keyes}, title = {Multicore-Optimized Wavefront Diamond Blocking for Optimizing Stencil Updates}, journal = {{SIAM} J. Sci. Comput.}, volume = {37}, number = {4}, year = {2015}, url = {https://doi.org/10.1137/140991133}, doi = {10.1137/140991133}, timestamp = {Sun, 02 Oct 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/siamsc/MalasHLSWK15.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/siamsc/Rohrig-ZollnerT15, author = {Melven R{\"{o}}hrig{-}Z{\"{o}}llner and Jonas Thies and Moritz Kreutzer and Andreas Alvermann and Andreas Pieper and Achim Basermann and Georg Hager and Gerhard Wellein and Holger Fehske}, title = {Increasing the Performance of the Jacobi-Davidson Method by Blocking}, journal = {{SIAM} J. Sci. Comput.}, volume = {37}, number = {6}, year = {2015}, url = {https://doi.org/10.1137/140976017}, doi = {10.1137/140976017}, timestamp = {Sun, 02 Oct 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/siamsc/Rohrig-ZollnerT15.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/cluster/ShahzadKZMPHW15, author = {Faisal Shahzad and Moritz Kreutzer and Thomas Zeiser and Rui Machado and Andreas Pieper and Georg Hager and Gerhard Wellein}, title = {Building a Fault Tolerant Application Using the {GASPI} Communication Layer}, booktitle = {2015 {IEEE} International Conference on Cluster Computing, {CLUSTER} 2015, Chicago, IL, USA, September 8-11, 2015}, pages = {580--587}, publisher = {{IEEE} Computer Society}, year = {2015}, url = {https://doi.org/10.1109/CLUSTER.2015.106}, doi = {10.1109/CLUSTER.2015.106}, timestamp = {Tue, 15 Aug 2023 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/cluster/ShahzadKZMPHW15.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/ics/StengelTHW15, author = {Holger Stengel and Jan Treibig and Georg Hager and Gerhard Wellein}, editor = {Laxmi N. Bhuyan and Fred Chong and Vivek Sarkar}, title = {Quantifying Performance Bottlenecks of Stencil Computations Using the Execution-Cache-Memory Model}, booktitle = {Proceedings of the 29th {ACM} on International Conference on Supercomputing, ICS'15, Newport Beach/Irvine, CA, USA, June 08 - 11, 2015}, pages = {207--216}, publisher = {{ACM}}, year = {2015}, url = {https://doi.org/10.1145/2751205.2751240}, doi = {10.1145/2751205.2751240}, timestamp = {Sun, 25 Oct 2020 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/ics/StengelTHW15.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/ipps/KreutzerPHWAF15, author = {Moritz Kreutzer and Andreas Pieper and Georg Hager and Gerhard Wellein and Andreas Alvermann and Holger Fehske}, title = {Performance Engineering of the Kernel Polynomal Method on Large-Scale {CPU-GPU} Systems}, booktitle = {2015 {IEEE} International Parallel and Distributed Processing Symposium, {IPDPS} 2015, Hyderabad, India, May 25-29, 2015}, pages = {417--426}, publisher = {{IEEE} Computer Society}, year = {2015}, url = {https://doi.org/10.1109/IPDPS.2015.76}, doi = {10.1109/IPDPS.2015.76}, timestamp = {Fri, 24 Mar 2023 00:00:00 +0100}, biburl = {https://dblp.org/rec/conf/ipps/KreutzerPHWAF15.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/ppam/HofmannFREHW15, author = {Johannes Hofmann and Dietmar Fey and Michael Riedmann and Jan Eitzinger and Georg Hager and Gerhard Wellein}, editor = {Roman Wyrzykowski and Ewa Deelman and Jack J. Dongarra and Konrad Karczewski and Jacek Kitowski and Kazimierz Wiatr}, title = {Performance Analysis of the Kahan-Enhanced Scalar Product on Current Multicore Processors}, booktitle = {Parallel Processing and Applied Mathematics - 11th International Conference, {PPAM} 2015, Krakow, Poland, September 6-9, 2015. Revised Selected Papers, Part {I}}, series = {Lecture Notes in Computer Science}, volume = {9573}, pages = {63--73}, publisher = {Springer}, year = {2015}, url = {https://doi.org/10.1007/978-3-319-32149-3\_7}, doi = {10.1007/978-3-319-32149-3\_7}, timestamp = {Sun, 02 Oct 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/ppam/HofmannFREHW15.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/sc/HammerHEW15, author = {Julian Hammer and Georg Hager and Jan Eitzinger and Gerhard Wellein}, editor = {Stephen A. Jarvis and Steven A. Wright and Simon D. Hammond}, title = {Automatic loop kernel analysis and performance modeling with Kerncraft}, booktitle = {Proceedings of the 6th International Workshop on Performance Modeling, Benchmarking, and Simulation of High Performance Computing Systems, {PMBS} 2015, Austin, Texas, USA, November 15, 2015}, pages = {4:1--4:11}, publisher = {{ACM}}, year = {2015}, url = {https://doi.org/10.1145/2832087.2832092}, doi = {10.1145/2832087.2832092}, timestamp = {Sun, 25 Oct 2020 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/sc/HammerHEW15.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/HofmannFEHW15, author = {Johannes Hofmann and Dietmar Fey and Jan Eitzinger and Georg Hager and Gerhard Wellein}, title = {Performance analysis of the Kahan-enhanced scalar product on current multicore processors}, journal = {CoRR}, volume = {abs/1505.02586}, year = {2015}, url = {http://arxiv.org/abs/1505.02586}, eprinttype = {arXiv}, eprint = {1505.02586}, timestamp = {Fri, 17 Jan 2020 00:00:00 +0100}, biburl = {https://dblp.org/rec/journals/corr/HofmannFEHW15.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/ShahzadKZMPHW15, author = {Faisal Shahzad and Moritz Kreutzer and Thomas Zeiser and Rui Machado and Andreas Pieper and Georg Hager and Gerhard Wellein}, title = {Building a fault tolerant application using the {GASPI} communication layer}, journal = {CoRR}, volume = {abs/1505.04628}, year = {2015}, url = {http://arxiv.org/abs/1505.04628}, eprinttype = {arXiv}, eprint = {1505.04628}, timestamp = {Tue, 15 Aug 2023 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/ShahzadKZMPHW15.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/WittmannZHW15, author = {Markus Wittmann and Thomas Zeiser and Georg Hager and Gerhard Wellein}, title = {Short Note on Costs of Floating Point Operations on current x86-64 Architectures: Denormals, Overflow, Underflow, and Division by Zero}, journal = {CoRR}, volume = {abs/1506.03997}, year = {2015}, url = {http://arxiv.org/abs/1506.03997}, eprinttype = {arXiv}, eprint = {1506.03997}, timestamp = {Mon, 13 Aug 2018 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/WittmannZHW15.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/KreutzerTRPSGBF15, author = {Moritz Kreutzer and Jonas Thies and Melven R{\"{o}}hrig{-}Z{\"{o}}llner and Andreas Pieper and Faisal Shahzad and Martin Galgon and Achim Basermann and Holger Fehske and Georg Hager and Gerhard Wellein}, title = {{GHOST:} Building blocks for high performance sparse linear algebra on heterogeneous systems}, journal = {CoRR}, volume = {abs/1507.08101}, year = {2015}, url = {http://arxiv.org/abs/1507.08101}, eprinttype = {arXiv}, eprint = {1507.08101}, timestamp = {Tue, 15 Aug 2023 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/KreutzerTRPSGBF15.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/HammerHEW15, author = {Julian Hammer and Georg Hager and Jan Eitzinger and Gerhard Wellein}, title = {Automatic Loop Kernel Analysis and Performance Modeling With Kerncraft}, journal = {CoRR}, volume = {abs/1509.03778}, year = {2015}, url = {http://arxiv.org/abs/1509.03778}, eprinttype = {arXiv}, eprint = {1509.03778}, timestamp = {Mon, 13 Aug 2018 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/HammerHEW15.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/PieperKGAFHLW15, author = {Andreas Pieper and Moritz Kreutzer and Martin Galgon and Andreas Alvermann and Holger Fehske and Georg Hager and Bruno Lang and Gerhard Wellein}, title = {High-performance implementation of Chebyshev filter diagonalization for interior eigenvalue computations}, journal = {CoRR}, volume = {abs/1510.04895}, year = {2015}, url = {http://arxiv.org/abs/1510.04895}, eprinttype = {arXiv}, eprint = {1510.04895}, timestamp = {Mon, 13 Aug 2018 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/PieperKGAFHLW15.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/MalasHLK15, author = {Tareq M. Malas and Georg Hager and Hatem Ltaief and David E. Keyes}, title = {Multi-dimensional intra-tile parallelization for memory-starved stencil computations}, journal = {CoRR}, volume = {abs/1510.04995}, year = {2015}, url = {http://arxiv.org/abs/1510.04995}, eprinttype = {arXiv}, eprint = {1510.04995}, timestamp = {Fri, 09 Apr 2021 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/MalasHLK15.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/MalasHHLPK15, author = {Tareq M. Malas and Julian Hornich and Georg Hager and Hatem Ltaief and Christoph Pflaum and David E. Keyes}, title = {Optimization of an electromagnetics code with multicore wavefront diamond blocking and multi-dimensional intra-tile parallelization}, journal = {CoRR}, volume = {abs/1510.05218}, year = {2015}, url = {http://arxiv.org/abs/1510.05218}, eprinttype = {arXiv}, eprint = {1510.05218}, timestamp = {Fri, 09 Apr 2021 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/MalasHHLPK15.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/HofmannFEHW15a, author = {Johannes Hofmann and Dietmar Fey and Jan Eitzinger and Georg Hager and Gerhard Wellein}, title = {Analysis of Intel's Haswell Microarchitecture Using The {ECM} Model and Microbenchmarks}, journal = {CoRR}, volume = {abs/1511.03639}, year = {2015}, url = {http://arxiv.org/abs/1511.03639}, eprinttype = {arXiv}, eprint = {1511.03639}, timestamp = {Fri, 17 Jan 2020 00:00:00 +0100}, biburl = {https://dblp.org/rec/journals/corr/HofmannFEHW15a.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/ppl/KronawitterSHL14, author = {Stefan Kronawitter and Holger Stengel and Georg Hager and Christian Lengauer}, title = {Domain-Specific Optimization of Two Jacobi Smoother Kernels and Their Evaluation in the {ECM} Performance Model}, journal = {Parallel Process. Lett.}, volume = {24}, number = {3}, year = {2014}, url = {https://doi.org/10.1142/S0129626414410047}, doi = {10.1142/S0129626414410047}, timestamp = {Mon, 26 Oct 2020 00:00:00 +0100}, biburl = {https://dblp.org/rec/journals/ppl/KronawitterSHL14.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/siamsc/KreutzerHWFB14, author = {Moritz Kreutzer and Georg Hager and Gerhard Wellein and Holger Fehske and Alan R. Bishop}, title = {A Unified Sparse Matrix Data Format for Efficient General Sparse Matrix-Vector Multiplication on Modern Processors with Wide {SIMD} Units}, journal = {{SIAM} J. Sci. Comput.}, volume = {36}, number = {5}, year = {2014}, url = {https://doi.org/10.1137/130930352}, doi = {10.1137/130930352}, timestamp = {Sun, 02 Oct 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/siamsc/KreutzerHWFB14.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/arcs/HofmannTHW14, author = {Johannes Hofmann and Jan Treibig and Georg Hager and Gerhard Wellein}, editor = {Walter Stechele and Thomas Wild}, title = {Performance Engineering for a Medical Imaging Application on the Intel Xeon Phi Accelerator}, booktitle = {{ARCS} 2014 - 27th International Conference on Architecture of Computing Systems, Workshop Proceedings, February 25-28, 2014, Luebeck, Germany, University of Luebeck, Institute of Computer Engineering}, pages = {1--8}, publisher = {{VDE} Verlag / {IEEE} Xplore}, year = {2014}, url = {https://ieeexplore.ieee.org/document/6775080/}, timestamp = {Sun, 08 Aug 2021 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/arcs/HofmannTHW14.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/europar/AlvermannBFGHKKLPRSTW14, author = {Andreas Alvermann and Achim Basermann and Holger Fehske and Martin Galgon and Georg Hager and Moritz Kreutzer and Lukas Kr{\"{a}}mer and Bruno Lang and Andreas Pieper and Melven R{\"{o}}hrig{-}Z{\"{o}}llner and Faisal Shahzad and Jonas Thies and Gerhard Wellein}, editor = {Lu{\'{\i}}s M. B. Lopes and Julius Zilinskas and Alexandru Costan and Roberto G. Cascella and Gabor Kecskemeti and Emmanuel Jeannot and Mario Cannataro and Laura Ricci and Siegfried Benkner and Salvador Petit and Vittorio Scarano and Jos{\'{e}} Gracia and Sascha Hunold and Stephen L. Scott and Stefan Lankes and Christian Lengauer and Jes{\'{u}}s Carretero and Jens Breitbart and Michael Alexander}, title = {{ESSEX:} Equipping Sparse Solvers for Exascale}, booktitle = {Euro-Par 2014: Parallel Processing Workshops - Euro-Par 2014 International Workshops, Porto, Portugal, August 25-26, 2014, Revised Selected Papers, Part {II}}, series = {Lecture Notes in Computer Science}, volume = {8806}, pages = {577--588}, publisher = {Springer}, year = {2014}, url = {https://doi.org/10.1007/978-3-319-14313-2\_49}, doi = {10.1007/978-3-319-14313-2\_49}, timestamp = {Sun, 12 Nov 2023 02:07:45 +0100}, biburl = {https://dblp.org/rec/conf/europar/AlvermannBFGHKKLPRSTW14.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/icppw/RoehlTHW14, author = {Thomas Roehl and Jan Treibig and Georg Hager and Gerhard Wellein}, title = {Overhead Analysis of Performance Counter Measurements}, booktitle = {43rd International Conference on Parallel Processing Workshops, {ICPPW} 2014, Minneapolis, MN, USA, September 9-12, 2014}, pages = {176--185}, publisher = {{IEEE} Computer Society}, year = {2014}, url = {https://doi.org/10.1109/ICPPW.2014.34}, doi = {10.1109/ICPPW.2014.34}, timestamp = {Fri, 24 Mar 2023 00:00:00 +0100}, biburl = {https://dblp.org/rec/conf/icppw/RoehlTHW14.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/ppopp/HofmannTHW14, author = {Johannes Hofmann and Jan Treibig and Georg Hager and Gerhard Wellein}, editor = {Gabriel Tanase and Peng Wu and Joel Falcou}, title = {Comparing the performance of different x86 {SIMD} instruction sets for a medical imaging application on modern multi- and manycore chips}, booktitle = {Proceedings of the 2014 Workshop on Programming models for SIMD/Vector processing, {WPMVP} 2014, Orlando, Florida, USA, February 16, 2014}, pages = {57--64}, publisher = {{ACM}}, year = {2014}, url = {https://doi.org/10.1145/2568058.2568068}, doi = {10.1145/2568058.2568068}, timestamp = {Sun, 12 Jun 2022 19:46:08 +0200}, biburl = {https://dblp.org/rec/conf/ppopp/HofmannTHW14.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/HofmannTHW14, author = {Johannes Hofmann and Jan Treibig and Georg Hager and Gerhard Wellein}, title = {Performance Engineering for a Medical Imaging Application on the Intel Xeon Phi Accelerator}, journal = {CoRR}, volume = {abs/1401.3615}, year = {2014}, url = {http://arxiv.org/abs/1401.3615}, eprinttype = {arXiv}, eprint = {1401.3615}, timestamp = {Fri, 17 Jan 2020 00:00:00 +0100}, biburl = {https://dblp.org/rec/journals/corr/HofmannTHW14.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/HofmannTHW14a, author = {Johannes Hofmann and Jan Treibig and Georg Hager and Gerhard Wellein}, title = {Comparing the Performance of Different x86 {SIMD} Instruction Sets for a Medical Imaging Application on Modern Multi- and Manycore Chips}, journal = {CoRR}, volume = {abs/1401.7494}, year = {2014}, url = {http://arxiv.org/abs/1401.7494}, eprinttype = {arXiv}, eprint = {1401.7494}, timestamp = {Fri, 17 Jan 2020 00:00:00 +0100}, biburl = {https://dblp.org/rec/journals/corr/HofmannTHW14a.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/WittmannZHW14, author = {Markus Wittmann and Thomas Zeiser and Georg Hager and Gerhard Wellein}, title = {Modeling and analyzing performance for highly optimized propagation steps of the lattice Boltzmann method on sparse lattices}, journal = {CoRR}, volume = {abs/1410.0412}, year = {2014}, url = {http://arxiv.org/abs/1410.0412}, eprinttype = {arXiv}, eprint = {1410.0412}, timestamp = {Mon, 13 Aug 2018 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/WittmannZHW14.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/MalasHLSWK14, author = {Tareq M. Malas and Georg Hager and Hatem Ltaief and Holger Stengel and Gerhard Wellein and David E. Keyes}, title = {Multicore-optimized wavefront diamond blocking for optimizing stencil updates}, journal = {CoRR}, volume = {abs/1410.3060}, year = {2014}, url = {http://arxiv.org/abs/1410.3060}, eprinttype = {arXiv}, eprint = {1410.3060}, timestamp = {Thu, 14 Oct 2021 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/MalasHLSWK14.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/StengelTHW14, author = {Holger Stengel and Jan Treibig and Georg Hager and Gerhard Wellein}, title = {Quantifying performance bottlenecks of stencil computations using the Execution-Cache-Memory model}, journal = {CoRR}, volume = {abs/1410.5010}, year = {2014}, url = {http://arxiv.org/abs/1410.5010}, eprinttype = {arXiv}, eprint = {1410.5010}, timestamp = {Mon, 13 Aug 2018 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/StengelTHW14.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/KreutzerHWPAF14, author = {Moritz Kreutzer and Georg Hager and Gerhard Wellein and Andreas Pieper and Andreas Alvermann and Holger Fehske}, title = {Performance Engineering of the Kernel Polynomial Method on Large-Scale {CPU-GPU} Systems}, journal = {CoRR}, volume = {abs/1410.5242}, year = {2014}, url = {http://arxiv.org/abs/1410.5242}, eprinttype = {arXiv}, eprint = {1410.5242}, timestamp = {Mon, 13 Aug 2018 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/KreutzerHWPAF14.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/MalasHLK14, author = {Tareq M. Malas and Georg Hager and Hatem Ltaief and David E. Keyes}, title = {Towards energy efficiency and maximum computational intensity for stencil algorithms using wavefront diamond temporal blocking}, journal = {CoRR}, volume = {abs/1410.5561}, year = {2014}, url = {http://arxiv.org/abs/1410.5561}, eprinttype = {arXiv}, eprint = {1410.5561}, timestamp = {Mon, 13 Aug 2018 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/MalasHLK14.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/cma/WittmannZHW13, author = {Markus Wittmann and Thomas Zeiser and Georg Hager and Gerhard Wellein}, title = {Comparison of different propagation steps for lattice Boltzmann methods}, journal = {Comput. Math. Appl.}, volume = {65}, number = {6}, pages = {924--935}, year = {2013}, url = {https://doi.org/10.1016/j.camwa.2012.05.002}, doi = {10.1016/J.CAMWA.2012.05.002}, timestamp = {Sun, 02 Oct 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/cma/WittmannZHW13.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/ijhpca/TreibigHHHW13, author = {Jan Treibig and Georg Hager and Hannes G. Hofmann and Joachim Hornegger and Gerhard Wellein}, title = {Pushing the limits for medical image reconstruction on recent standard multicore processors}, journal = {Int. J. High Perform. Comput. Appl.}, volume = {27}, number = {2}, pages = {162--177}, year = {2013}, url = {https://doi.org/10.1177/1094342012442424}, doi = {10.1177/1094342012442424}, timestamp = {Sun, 02 Oct 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/ijhpca/TreibigHHHW13.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/ppl/ShahzadWKZHW13, author = {Faisal Shahzad and Markus Wittmann and Moritz Kreutzer and Thomas Zeiser and Georg Hager and Gerhard Wellein}, title = {A Survey of Checkpoint/Restart Techniques on Distributed Memory Systems}, journal = {Parallel Process. Lett.}, volume = {23}, number = {4}, year = {2013}, url = {https://doi.org/10.1142/S0129626413400112}, doi = {10.1142/S0129626413400112}, timestamp = {Tue, 15 Aug 2023 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/ppl/ShahzadWKZHW13.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/ieeehpcs/ScharpffIHR13, author = {Tobias Scharpff and Klaus Iglberger and Georg Hager and Ulrich R{\"{u}}de}, title = {Model-guided performance analysis of the sparse matrix-matrix multiplication}, booktitle = {International Conference on High Performance Computing {\&} Simulation, {HPCS} 2013, Helsinki, Finland, July 1-5, 2013}, pages = {445--452}, publisher = {{IEEE}}, year = {2013}, url = {https://doi.org/10.1109/HPCSim.2013.6641452}, doi = {10.1109/HPCSIM.2013.6641452}, timestamp = {Sun, 25 Oct 2020 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/ieeehpcs/ScharpffIHR13.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/ipps/ShahzadWZHW13, author = {Faisal Shahzad and Markus Wittmann and Thomas Zeiser and Georg Hager and Gerhard Wellein}, title = {An Evaluation of Different {I/O} Techniques for Checkpoint/Restart}, booktitle = {2013 {IEEE} International Symposium on Parallel {\&} Distributed Processing, Workshops and Phd Forum, Cambridge, MA, USA, May 20-24, 2013}, pages = {1708--1716}, publisher = {{IEEE}}, year = {2013}, url = {https://doi.org/10.1109/IPDPSW.2013.145}, doi = {10.1109/IPDPSW.2013.145}, timestamp = {Tue, 15 Aug 2023 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/ipps/ShahzadWZHW13.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-1302-4280, author = {Markus Wittmann and Georg Hager and Thomas Zeiser and Gerhard Wellein}, title = {Asynchronous {MPI} for the Masses}, journal = {CoRR}, volume = {abs/1302.4280}, year = {2013}, url = {http://arxiv.org/abs/1302.4280}, eprinttype = {arXiv}, eprint = {1302.4280}, timestamp = {Mon, 13 Aug 2018 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-1302-4280.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-1303-1651, author = {Tobias Scharpff and Klaus Iglberger and Georg Hager and Ulrich R{\"{u}}de}, title = {Model-guided Performance Analysis of the Sparse Matrix-Matrix Multiplication}, journal = {CoRR}, volume = {abs/1303.1651}, year = {2013}, url = {http://arxiv.org/abs/1303.1651}, eprinttype = {arXiv}, eprint = {1303.1651}, timestamp = {Mon, 13 Aug 2018 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-1303-1651.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-1303-4538, author = {Christoph Scheit and Georg Hager and Jan Treibig and Stefan Becker and Gerhard Wellein}, title = {Optimization of {FASTEST-3D} for Modern Multicore Systems}, journal = {CoRR}, volume = {abs/1303.4538}, year = {2013}, url = {http://arxiv.org/abs/1303.4538}, eprinttype = {arXiv}, eprint = {1303.4538}, timestamp = {Mon, 13 Aug 2018 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-1303-4538.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-1304-7664, author = {Markus Wittmann and Georg Hager and Thomas Zeiser and Gerhard Wellein}, title = {An analysis of energy-optimized lattice-Boltzmann {CFD} simulations from the chip to the highly parallel level}, journal = {CoRR}, volume = {abs/1304.7664}, year = {2013}, url = {http://arxiv.org/abs/1304.7664}, eprinttype = {arXiv}, eprint = {1304.7664}, timestamp = {Mon, 13 Aug 2018 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-1304-7664.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/KreutzerHWFB13, author = {Moritz Kreutzer and Georg Hager and Gerhard Wellein and Holger Fehske and Alan R. Bishop}, title = {A unified sparse matrix data format for modern processors with wide {SIMD} units}, journal = {CoRR}, volume = {abs/1307.6209}, year = {2013}, url = {http://arxiv.org/abs/1307.6209}, eprinttype = {arXiv}, eprint = {1307.6209}, timestamp = {Mon, 13 Aug 2018 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/KreutzerHWFB13.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/siamsc/IglbergerHTR12, author = {Klaus Iglberger and Georg Hager and Jan Treibig and Ulrich R{\"{u}}de}, title = {Expression Templates Revisited: {A} Performance Analysis of Current Methodologies}, journal = {{SIAM} J. Sci. Comput.}, volume = {34}, number = {2}, year = {2012}, url = {https://doi.org/10.1137/110830125}, doi = {10.1137/110830125}, timestamp = {Mon, 26 Oct 2020 00:00:00 +0100}, biburl = {https://dblp.org/rec/journals/siamsc/IglbergerHTR12.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/europar/Hager12, author = {Georg Hager}, editor = {Ioannis Caragiannis and Michael Alexander and Rosa M. Badia and Mario Cannataro and Alexandru Costan and Marco Danelutto and Fr{\'{e}}d{\'{e}}ric Desprez and Bettina Krammer and Julio Sahuquillo and Stephen L. Scott and Josef Weidendorfer}, title = {Performance Engineering: From Numbers to Insight}, booktitle = {Euro-Par 2012: Parallel Processing Workshops - BDMC, CGWS, HeteroPar, HiBB, OMHI, Paraphrase, PROPER, Resilience, UCHPC, VHPC, Rhodes Islands, Greece, August 27-31, 2012. Revised Selected Papers}, series = {Lecture Notes in Computer Science}, volume = {7640}, pages = {393--394}, publisher = {Springer}, year = {2012}, url = {https://doi.org/10.1007/978-3-642-36949-0\_44}, doi = {10.1007/978-3-642-36949-0\_44}, timestamp = {Sun, 25 Oct 2020 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/europar/Hager12.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/europar/TreibigHW12, author = {Jan Treibig and Georg Hager and Gerhard Wellein}, editor = {Ioannis Caragiannis and Michael Alexander and Rosa M. Badia and Mario Cannataro and Alexandru Costan and Marco Danelutto and Fr{\'{e}}d{\'{e}}ric Desprez and Bettina Krammer and Julio Sahuquillo and Stephen L. Scott and Josef Weidendorfer}, title = {Performance Patterns and Hardware Metrics on Modern Multicore Processors: Best Practices for Performance Engineering}, booktitle = {Euro-Par 2012: Parallel Processing Workshops - BDMC, CGWS, HeteroPar, HiBB, OMHI, Paraphrase, PROPER, Resilience, UCHPC, VHPC, Rhodes Islands, Greece, August 27-31, 2012. Revised Selected Papers}, series = {Lecture Notes in Computer Science}, volume = {7640}, pages = {451--460}, publisher = {Springer}, year = {2012}, url = {https://doi.org/10.1007/978-3-642-36949-0\_50}, doi = {10.1007/978-3-642-36949-0\_50}, timestamp = {Sun, 02 Oct 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/europar/TreibigHW12.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/ieeehpcs/IglbergerHTR12, author = {Klaus Iglberger and Georg Hager and Jan Treibig and Ulrich R{\"{u}}de}, editor = {Waleed W. Smari and Vesna Zeljkovic}, title = {High performance smart expression template math libraries}, booktitle = {2012 International Conference on High Performance Computing {\&} Simulation, {HPCS} 2012, Madrid, Spain, July 2-6, 2012}, pages = {367--373}, publisher = {{IEEE}}, year = {2012}, url = {https://doi.org/10.1109/HPCSim.2012.6266939}, doi = {10.1109/HPCSIM.2012.6266939}, timestamp = {Sun, 25 Oct 2020 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/ieeehpcs/IglbergerHTR12.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/ipps/KreutzerHWFBB12, author = {Moritz Kreutzer and Georg Hager and Gerhard Wellein and Holger Fehske and Achim Basermann and Alan R. Bishop}, title = {Sparse Matrix-vector Multiplication on {GPGPU} Clusters: {A} New Storage Format and a Scalable Implementation}, booktitle = {26th {IEEE} International Parallel and Distributed Processing Symposium Workshops {\&} PhD Forum, {IPDPS} 2012, Shanghai, China, May 21-25, 2012}, pages = {1696--1702}, publisher = {{IEEE} Computer Society}, year = {2012}, url = {https://doi.org/10.1109/IPDPSW.2012.211}, doi = {10.1109/IPDPSW.2012.211}, timestamp = {Fri, 24 Mar 2023 00:00:00 +0100}, biburl = {https://dblp.org/rec/conf/ipps/KreutzerHWFBB12.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-1206-3738, author = {Jan Treibig and Georg Hager and Gerhard Wellein}, title = {Best practices for HPM-assisted performance engineering on modern multicore processors}, journal = {CoRR}, volume = {abs/1206.3738}, year = {2012}, url = {http://arxiv.org/abs/1206.3738}, eprinttype = {arXiv}, eprint = {1206.3738}, timestamp = {Mon, 13 Aug 2018 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-1206-3738.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-1208-2908, author = {Georg Hager and Jan Treibig and Johannes Habich and Gerhard Wellein}, title = {Exploring performance and power properties of modern multicore chips via simple machine models}, journal = {CoRR}, volume = {abs/1208.2908}, year = {2012}, url = {http://arxiv.org/abs/1208.2908}, eprinttype = {arXiv}, eprint = {1208.2908}, timestamp = {Mon, 13 Aug 2018 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-1208-2908.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@book{DBLP:books/daglib/0033670, author = {Georg Hager and Gerhard Wellein}, title = {Introduction to High Performance Computing for Scientists and Engineers}, series = {Chapman and Hall / {CRC} computational science series}, publisher = {{CRC} Press}, year = {2011}, url = {http://www.crcpress.com/product/isbn/9781439811924}, isbn = {978-1-439-81192-4}, timestamp = {Mon, 01 Sep 2014 01:00:00 +0200}, biburl = {https://dblp.org/rec/books/daglib/0033670.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/aes/HabichZHW11, author = {Johannes Habich and Thomas Zeiser and Georg Hager and Gerhard Wellein}, title = {Performance analysis and optimization strategies for a {D3Q19} lattice Boltzmann kernel on nVIDIA GPUs using {CUDA}}, journal = {Adv. Eng. Softw.}, volume = {42}, number = {5}, pages = {266--272}, year = {2011}, url = {https://doi.org/10.1016/j.advengsoft.2010.10.007}, doi = {10.1016/J.ADVENGSOFT.2010.10.007}, timestamp = {Sun, 02 Oct 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/aes/HabichZHW11.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/jocs/TreibigWH11, author = {Jan Treibig and Gerhard Wellein and Georg Hager}, title = {Efficient multicore-aware parallelization strategies for iterative stencil computations}, journal = {J. Comput. Sci.}, volume = {2}, number = {2}, pages = {130--137}, year = {2011}, url = {https://doi.org/10.1016/j.jocs.2011.01.010}, doi = {10.1016/J.JOCS.2011.01.010}, timestamp = {Sun, 02 Oct 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/jocs/TreibigWH11.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/pc/FeichtingerHKHRW11, author = {Christian Feichtinger and Johannes Habich and Harald K{\"{o}}stler and Georg Hager and Ulrich R{\"{u}}de and Gerhard Wellein}, title = {A flexible Patch-based lattice Boltzmann parallelization approach for heterogeneous {GPU-CPU} clusters}, journal = {Parallel Comput.}, volume = {37}, number = {9}, pages = {536--549}, year = {2011}, url = {https://doi.org/10.1016/j.parco.2011.03.005}, doi = {10.1016/J.PARCO.2011.03.005}, timestamp = {Sun, 02 Oct 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/pc/FeichtingerHKHRW11.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/ppl/SchubertFHW11, author = {Gerald Schubert and Holger Fehske and Georg Hager and Gerhard Wellein}, title = {Hybrid-Parallel Sparse Matrix-Vector Multiplication with Explicit Communication Overlap on Current Multicore-Based Systems}, journal = {Parallel Process. Lett.}, volume = {21}, number = {3}, pages = {339--358}, year = {2011}, url = {https://doi.org/10.1142/S0129626411000254}, doi = {10.1142/S0129626411000254}, timestamp = {Sun, 02 Oct 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/ppl/SchubertFHW11.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/ipps/SchubertHFW11, author = {Gerald Schubert and Georg Hager and Holger Fehske and Gerhard Wellein}, title = {Parallel Sparse Matrix-Vector Multiplication as a Test Case for Hybrid MPI+OpenMP Programming}, booktitle = {25th {IEEE} International Symposium on Parallel and Distributed Processing, {IPDPS} 2011, Anchorage, Alaska, USA, 16-20 May 2011 - Workshop Proceedings}, pages = {1751--1758}, publisher = {{IEEE}}, year = {2011}, url = {https://doi.org/10.1109/IPDPS.2011.332}, doi = {10.1109/IPDPS.2011.332}, timestamp = {Fri, 24 Mar 2023 00:00:00 +0100}, biburl = {https://dblp.org/rec/conf/ipps/SchubertHFW11.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/ptw/TreibigHW11, author = {Jan Treibig and Georg Hager and Gerhard Wellein}, editor = {Holger Brunst and Matthias S. M{\"{u}}ller and Wolfgang E. Nagel and Michael M. Resch}, title = {likwid-bench: An Extensible Microbenchmarking Platform for x86 Multicore Compute Nodes}, booktitle = {Tools for High Performance Computing 2011 - Proceedings of the 5th International Workshop on Parallel Tools for High Performance Computing, ZIH, Dresden, September 2011}, pages = {27--36}, publisher = {Springer}, year = {2011}, url = {https://doi.org/10.1007/978-3-642-31476-6\_3}, doi = {10.1007/978-3-642-31476-6\_3}, timestamp = {Sun, 25 Oct 2020 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/ptw/TreibigHW11.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/sc/TreibigHWM11, author = {Jan Treibig and Georg Hager and Gerhard Wellein and Michael Meier}, editor = {Scott A. Lathrop and Jim Costa and William Kramer}, title = {Poster: {LIKWID:} lightweight performance tools}, booktitle = {Conference on High Performance Computing Networking, Storage and Analysis - Companion Volume, {SC} 2011, Seattle, WA, USA, November 12-18, 2011}, pages = {29--30}, publisher = {{ACM}}, year = {2011}, url = {https://doi.org/10.1145/2148600.2148616}, doi = {10.1145/2148600.2148616}, timestamp = {Sun, 25 Oct 2020 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/sc/TreibigHWM11.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-1101-0091, author = {Gerald Schubert and Georg Hager and Holger Fehske and Gerhard Wellein}, title = {Parallel sparse matrix-vector multiplication as a test case for hybrid MPI+OpenMP programming}, journal = {CoRR}, volume = {abs/1101.0091}, year = {2011}, url = {http://arxiv.org/abs/1101.0091}, eprinttype = {arXiv}, eprint = {1101.0091}, timestamp = {Mon, 13 Aug 2018 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-1101-0091.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-1101-0093, author = {Markus Wittmann and Georg Hager}, title = {Optimizing ccNUMA locality for task-parallel execution under OpenMP and {TBB} on multicore-based systems}, journal = {CoRR}, volume = {abs/1101.0093}, year = {2011}, url = {http://arxiv.org/abs/1101.0093}, eprinttype = {arXiv}, eprint = {1101.0093}, timestamp = {Mon, 13 Aug 2018 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-1101-0093.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-1104-1729, author = {Klaus Iglberger and Georg Hager and Jan Treibig and Ulrich R{\"{u}}de}, title = {Expression Templates Revisited: {A} Performance Analysis of the Current {ET} Methodology}, journal = {CoRR}, volume = {abs/1104.1729}, year = {2011}, url = {http://arxiv.org/abs/1104.1729}, eprinttype = {arXiv}, eprint = {1104.1729}, timestamp = {Mon, 13 Aug 2018 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-1104-1729.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-1104-4874, author = {Jan Treibig and Georg Hager and Gerhard Wellein}, title = {{LIKWID:} Lightweight Performance Tools}, journal = {CoRR}, volume = {abs/1104.4874}, year = {2011}, url = {http://arxiv.org/abs/1104.4874}, eprinttype = {arXiv}, eprint = {1104.4874}, timestamp = {Mon, 13 Aug 2018 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-1104-4874.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-1104-5243, author = {Jan Treibig and Georg Hager and Hannes G. Hofmann and Joachim Hornegger and Gerhard Wellein}, title = {Pushing the limits for medical image reconstruction on recent standard multicore processors}, journal = {CoRR}, volume = {abs/1104.5243}, year = {2011}, url = {http://arxiv.org/abs/1104.5243}, eprinttype = {arXiv}, eprint = {1104.5243}, timestamp = {Mon, 13 Aug 2018 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-1104-5243.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-1106-5908, author = {Gerald Schubert and Holger Fehske and Georg Hager and Gerhard Wellein}, title = {Hybrid-parallel sparse matrix-vector multiplication with explicit communication overlap on current multicore-based systems}, journal = {CoRR}, volume = {abs/1106.5908}, year = {2011}, url = {http://arxiv.org/abs/1106.5908}, eprinttype = {arXiv}, eprint = {1106.5908}, timestamp = {Mon, 13 Aug 2018 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-1106-5908.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-1111-0922, author = {Markus Wittmann and Thomas Zeiser and Georg Hager and Gerhard Wellein}, title = {Comparison of different Propagation Steps for the Lattice Boltzmann Method}, journal = {CoRR}, volume = {abs/1111.0922}, year = {2011}, url = {http://arxiv.org/abs/1111.0922}, eprinttype = {arXiv}, eprint = {1111.0922}, timestamp = {Mon, 13 Aug 2018 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-1111-0922.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-1111-1129, author = {Markus Wittmann and Thomas Zeiser and Georg Hager and Gerhard Wellein}, title = {Domain decomposition and locality optimization for large-scale lattice Boltzmann simulations}, journal = {CoRR}, volume = {abs/1111.1129}, year = {2011}, url = {http://arxiv.org/abs/1111.1129}, eprinttype = {arXiv}, eprint = {1111.1129}, timestamp = {Mon, 13 Aug 2018 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-1111-1129.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-1112-0850, author = {Johannes Habich and Christian Feichtinger and Harald K{\"{o}}stler and Georg Hager and Gerhard Wellein}, title = {Performance engineering for the Lattice Boltzmann method on GPGPUs: Architectural requirements and performance results}, journal = {CoRR}, volume = {abs/1112.0850}, year = {2011}, url = {http://arxiv.org/abs/1112.0850}, eprinttype = {arXiv}, eprint = {1112.0850}, timestamp = {Mon, 13 Aug 2018 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-1112-0850.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-1112-5588, author = {Moritz Kreutzer and Georg Hager and Gerhard Wellein and Holger Fehske and Achim Basermann and Alan R. Bishop}, title = {Sparse matrix-vector multiplication on {GPGPU} clusters: {A} new storage format and a scalable implementation}, journal = {CoRR}, volume = {abs/1112.5588}, year = {2011}, url = {http://arxiv.org/abs/1112.5588}, eprinttype = {arXiv}, eprint = {1112.5588}, timestamp = {Mon, 13 Aug 2018 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-1112-5588.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/ppl/WittmannHTW10, author = {Markus Wittmann and Georg Hager and Jan Treibig and Gerhard Wellein}, title = {Leveraging Shared Caches for Parallel Temporal Blocking of Stencil Codes on Multicore Processors and Clusters}, journal = {Parallel Process. Lett.}, volume = {20}, number = {4}, pages = {359--376}, year = {2010}, url = {https://doi.org/10.1142/S0129626410000296}, doi = {10.1142/S0129626410000296}, timestamp = {Sun, 02 Oct 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/ppl/WittmannHTW10.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/chpc/TreibigHW10, author = {Jan Treibig and Georg Hager and Gerhard Wellein}, editor = {Christian H. Bischof and Heinz{-}Gerd Hegering and Wolfgang E. Nagel and Gabriel Wittum}, title = {{LIKWID:} Lightweight Performance Tools}, booktitle = {Competence in High Performance Computing 2010 - Proceedings of an International Conference on Competence in High Performance Computing, Schloss Schwetzingen, Germany, June 2010}, pages = {165--175}, publisher = {Springer}, year = {2010}, url = {https://doi.org/10.1007/978-3-642-24025-6\_14}, doi = {10.1007/978-3-642-24025-6\_14}, timestamp = {Wed, 26 Jun 2019 16:38:15 +0200}, biburl = {https://dblp.org/rec/conf/chpc/TreibigHW10.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/icppw/TreibigHW10, author = {Jan Treibig and Georg Hager and Gerhard Wellein}, editor = {Wang{-}Chien Lee and Xin Yuan}, title = {{LIKWID:} {A} Lightweight Performance-Oriented Tool Suite for x86 Multicore Environments}, booktitle = {39th International Conference on Parallel Processing, {ICPP} Workshops 2010, San Diego, California, USA, 13-16 September 2010}, pages = {207--216}, publisher = {{IEEE} Computer Society}, year = {2010}, url = {https://doi.org/10.1109/ICPPW.2010.38}, doi = {10.1109/ICPPW.2010.38}, timestamp = {Fri, 24 Mar 2023 00:00:00 +0100}, biburl = {https://dblp.org/rec/conf/icppw/TreibigHW10.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/ipps/WittmannHW10, author = {Markus Wittmann and Georg Hager and Gerhard Wellein}, title = {Multicore-aware parallel temporal blocking of stencil codes for shared and distributed memory}, booktitle = {24th {IEEE} International Symposium on Parallel and Distributed Processing, {IPDPS} 2010, Atlanta, Georgia, USA, 19-23 April 2010 - Workshop Proceedings}, pages = {1--7}, publisher = {{IEEE}}, year = {2010}, url = {https://doi.org/10.1109/IPDPSW.2010.5470813}, doi = {10.1109/IPDPSW.2010.5470813}, timestamp = {Fri, 24 Mar 2023 00:00:00 +0100}, biburl = {https://dblp.org/rec/conf/ipps/WittmannHW10.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-1004-1741, author = {Jan Treibig and Gerhard Wellein and Georg Hager}, title = {Efficient multicore-aware parallelization strategies for iterative stencil computations}, journal = {CoRR}, volume = {abs/1004.1741}, year = {2010}, url = {http://arxiv.org/abs/1004.1741}, eprinttype = {arXiv}, eprint = {1004.1741}, timestamp = {Mon, 13 Aug 2018 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-1004-1741.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-1004-4431, author = {Jan Treibig and Georg Hager and Gerhard Wellein}, title = {{LIKWID:} {A} lightweight performance-oriented tool suite for x86 multicore environments}, journal = {CoRR}, volume = {abs/1004.4431}, year = {2010}, url = {http://arxiv.org/abs/1004.4431}, eprinttype = {arXiv}, eprint = {1004.4431}, timestamp = {Mon, 13 Aug 2018 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-1004-4431.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-1006-3148, author = {Markus Wittmann and Georg Hager and Jan Treibig and Gerhard Wellein}, title = {Leveraging shared caches for parallel temporal blocking of stencil codes on multicore processors and clusters}, journal = {CoRR}, volume = {abs/1006.3148}, year = {2010}, url = {http://arxiv.org/abs/1006.3148}, eprinttype = {arXiv}, eprint = {1006.3148}, timestamp = {Mon, 13 Aug 2018 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-1006-3148.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-1007-1388, author = {Christian Feichtinger and Johannes Habich and Harald K{\"{o}}stler and Georg Hager and Ulrich R{\"{u}}de and Gerhard Wellein}, title = {A Flexible Patch-Based Lattice Boltzmann Parallelization Approach for Heterogeneous {GPU-CPU} Clusters}, journal = {CoRR}, volume = {abs/1007.1388}, year = {2010}, url = {http://arxiv.org/abs/1007.1388}, eprinttype = {arXiv}, eprint = {1007.1388}, timestamp = {Mon, 13 Aug 2018 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-1007-1388.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/ppl/ZeiserHW09, author = {Thomas Zeiser and Georg Hager and Gerhard Wellein}, title = {Benchmark Analysis and Application Results for Lattice Boltzmann Simulations on {NEC} {SX} Vector and Intel Nehalem Systems}, journal = {Parallel Process. Lett.}, volume = {19}, number = {4}, pages = {491--511}, year = {2009}, url = {https://doi.org/10.1142/S0129626409000389}, doi = {10.1142/S0129626409000389}, timestamp = {Sun, 02 Oct 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/ppl/ZeiserHW09.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/compsac/WelleinHZWF09, author = {Gerhard Wellein and Georg Hager and Thomas Zeiser and Markus Wittmann and Holger Fehske}, editor = {Sheikh Iqbal Ahamed and Elisa Bertino and Carl K. Chang and Vladimir Getov and Lin Liu and Ming Hua and Rajesh Subramanyan}, title = {Efficient Temporal Blocking for Stencil Computations by Multicore-Aware Wavefront Parallelization}, booktitle = {Proceedings of the 33rd Annual {IEEE} International Computer Software and Applications Conference, {COMPSAC} 2009, Seattle, Washington, USA, July 20-24, 2009. Volume 1}, pages = {579--586}, publisher = {{IEEE} Computer Society}, year = {2009}, url = {https://doi.org/10.1109/COMPSAC.2009.82}, doi = {10.1109/COMPSAC.2009.82}, timestamp = {Fri, 24 Mar 2023 00:00:00 +0100}, biburl = {https://dblp.org/rec/conf/compsac/WelleinHZWF09.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/ipps/ZeiserHW09, author = {Thomas Zeiser and Georg Hager and Gerhard Wellein}, title = {The world's fastest {CPU} and {SMP} node: Some performance results from the {NEC} {SX-9}}, booktitle = {23rd {IEEE} International Symposium on Parallel and Distributed Processing, {IPDPS} 2009, Rome, Italy, May 23-29, 2009}, pages = {1--8}, publisher = {{IEEE}}, year = {2009}, url = {https://doi.org/10.1109/IPDPS.2009.5161089}, doi = {10.1109/IPDPS.2009.5161089}, timestamp = {Fri, 24 Mar 2023 00:00:00 +0100}, biburl = {https://dblp.org/rec/conf/ipps/ZeiserHW09.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/pdp/RabenseifnerHJ09, author = {Rolf Rabenseifner and Georg Hager and Gabriele Jost}, editor = {Didier El Baz and Fran{\c{c}}ois Spies and Tom Gross}, title = {Hybrid MPI/OpenMP Parallel Programming on Clusters of Multi-Core {SMP} Nodes}, booktitle = {Proceedings of the 17th Euromicro International Conference on Parallel, Distributed and Network-Based Processing, {PDP} 2009, Weimar, Germany, 18-20 Febuary 2009}, pages = {427--436}, publisher = {{IEEE} Computer Society}, year = {2009}, url = {https://doi.org/10.1109/PDP.2009.43}, doi = {10.1109/PDP.2009.43}, timestamp = {Fri, 24 Mar 2023 00:00:00 +0100}, biburl = {https://dblp.org/rec/conf/pdp/RabenseifnerHJ09.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/ppam/TreibigH09, author = {Jan Treibig and Georg Hager}, editor = {Roman Wyrzykowski and Jack J. Dongarra and Konrad Karczewski and Jerzy Wasniewski}, title = {Introducing a Performance Model for Bandwidth-Limited Loop Kernels}, booktitle = {Parallel Processing and Applied Mathematics, 8th International Conference, {PPAM} 2009, Wroclaw, Poland, September 13-16, 2009. Revised Selected Papers, Part {I}}, series = {Lecture Notes in Computer Science}, volume = {6067}, pages = {615--624}, publisher = {Springer}, year = {2009}, url = {https://doi.org/10.1007/978-3-642-14390-8\_64}, doi = {10.1007/978-3-642-14390-8\_64}, timestamp = {Sun, 25 Oct 2020 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/ppam/TreibigH09.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-0902-1884, author = {Markus Wittmann and Georg Hager}, title = {A Proof of Concept for Optimizing Task Parallelism by Locality Queues}, journal = {CoRR}, volume = {abs/0902.1884}, year = {2009}, url = {http://arxiv.org/abs/0902.1884}, eprinttype = {arXiv}, eprint = {0902.1884}, timestamp = {Mon, 13 Aug 2018 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-0902-1884.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-0905-0792, author = {Jan Treibig and Georg Hager}, title = {Introducing a Performance Model for Bandwidth-Limited Loop Kernels}, journal = {CoRR}, volume = {abs/0905.0792}, year = {2009}, url = {http://arxiv.org/abs/0905.0792}, eprinttype = {arXiv}, eprint = {0905.0792}, timestamp = {Mon, 13 Aug 2018 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-0905-0792.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-0910-4836, author = {Gerald Schubert and Georg Hager and Holger Fehske}, title = {Performance limitations for sparse matrix-vector multiplications on current multicore environments}, journal = {CoRR}, volume = {abs/0910.4836}, year = {2009}, url = {http://arxiv.org/abs/0910.4836}, eprinttype = {arXiv}, eprint = {0910.4836}, timestamp = {Mon, 13 Aug 2018 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-0910-4836.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-0910-4865, author = {Jan Treibig and Georg Hager and Gerhard Wellein}, title = {Multi-core architectures: Complexities of performance prediction and the impact of cache topology}, journal = {CoRR}, volume = {abs/0910.4865}, year = {2009}, url = {http://arxiv.org/abs/0910.4865}, eprinttype = {arXiv}, eprint = {0910.4865}, timestamp = {Mon, 13 Aug 2018 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-0910-4865.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-0912-4506, author = {Markus Wittmann and Georg Hager and Gerhard Wellein}, title = {Multicore-aware parallel temporal blocking of stencil codes for shared and distributed memory}, journal = {CoRR}, volume = {abs/0912.4506}, year = {2009}, url = {http://arxiv.org/abs/0912.4506}, eprinttype = {arXiv}, eprint = {0912.4506}, timestamp = {Mon, 13 Aug 2018 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-0912-4506.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/ppl/HagerZW08, author = {Georg Hager and Thomas Zeiser and Gerhard Wellein}, title = {Data Access Characteristics and Optimizations for Sun UltraSPARC {T2} and {T2+} Systems}, journal = {Parallel Process. Lett.}, volume = {18}, number = {4}, pages = {471--490}, year = {2008}, url = {https://doi.org/10.1142/S0129626408003521}, doi = {10.1142/S0129626408003521}, timestamp = {Sun, 02 Oct 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/ppl/HagerZW08.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/ipps/HagerZW08, author = {Georg Hager and Thomas Zeiser and Gerhard Wellein}, title = {Data access optimizations for highly threaded multi-core CPUs with multiple memory controllers}, booktitle = {22nd {IEEE} International Symposium on Parallel and Distributed Processing, {IPDPS} 2008, Miami, Florida USA, April 14-18, 2008}, pages = {1--7}, publisher = {{IEEE}}, year = {2008}, url = {https://doi.org/10.1109/IPDPS.2008.4536341}, doi = {10.1109/IPDPS.2008.4536341}, timestamp = {Fri, 24 Mar 2023 00:00:00 +0100}, biburl = {https://dblp.org/rec/conf/ipps/HagerZW08.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@incollection{DBLP:books/sp/08/ZeiserHW08, author = {Thomas Zeiser and Georg Hager and Gerhard Wellein}, editor = {Wolfgang E. Nagel and Dietmar B. Kr{\"{o}}ner and Michael M. Resch}, title = {Vector Computers in a World of Commodity Clusters, Massively Parallel Systems and Many-Core Many-Threaded CPUs: Recent Experience Based on an Advanced Lattice Boltzmann Flow Solver}, booktitle = {High Performance Computing in Science and Engineering '08 - Transactions of the High Performance Computing Center, Stuttgart {(HLRS)} 2008}, pages = {333--347}, publisher = {Springer}, year = {2008}, url = {https://doi.org/10.1007/978-3-540-88303-6\_24}, doi = {10.1007/978-3-540-88303-6\_24}, timestamp = {Tue, 02 Jun 2020 16:36:27 +0200}, biburl = {https://dblp.org/rec/books/sp/08/ZeiserHW08.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-0712-2302, author = {Georg Hager and Thomas Zeiser and Gerhard Wellein}, title = {Data access optimizations for highly threaded multi-core CPUs with multiple memory controllers}, journal = {CoRR}, volume = {abs/0712.2302}, year = {2007}, url = {http://arxiv.org/abs/0712.2302}, eprinttype = {arXiv}, eprint = {0712.2302}, timestamp = {Mon, 13 Aug 2018 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-0712-2302.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-0712-3389, author = {Georg Hager and Holger Stengel and Thomas Zeiser and Gerhard Wellein}, title = {{RZBENCH:} Performance evaluation of current {HPC} architectures using low-level and application benchmarks}, journal = {CoRR}, volume = {abs/0712.3389}, year = {2007}, url = {http://arxiv.org/abs/0712.3389}, eprinttype = {arXiv}, eprint = {0712.3389}, timestamp = {Mon, 13 Aug 2018 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-0712-3389.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/pvm/RabenseifnerHJK06, author = {Rolf Rabenseifner and Georg Hager and Gabriele Jost and Rainer Keller}, editor = {Bernd Mohr and Jesper Larsson Tr{\"{a}}ff and Joachim Worringen and Jack J. Dongarra}, title = {Hybrid {MPI} and OpenMP Parallel Programming}, booktitle = {Recent Advances in Parallel Virtual Machine and Message Passing Interface, 13th European {PVM/MPI} User's Group Meeting, Bonn, Germany, September 17-20, 2006, Proceedings}, series = {Lecture Notes in Computer Science}, volume = {4192}, pages = {11}, publisher = {Springer}, year = {2006}, url = {https://doi.org/10.1007/11846802\_10}, doi = {10.1007/11846802\_10}, timestamp = {Sun, 25 Oct 2020 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/pvm/RabenseifnerHJK06.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/hpsc/HagerJFW03, author = {Georg Hager and Eric Jeckelmann and Holger Fehske and Gerhard Wellein}, editor = {Hans Georg Bock and Hoang Xuan Phu and Ekaterina A. Kostina and Rolf Rannacher}, title = {Exact Numerical Treatment of Finite Quantum Systems Using Leading-Edge Supercomputers}, booktitle = {Modeling, Simulation and Optimization of Complex Processes, Proceedings of the International Conference on High Performance Scientific Computing, March 10-14, 2003, Hanoi, Vietnam}, pages = {165--177}, publisher = {Springer}, year = {2003}, url = {https://doi.org/10.1007/3-540-27170-8\_13}, doi = {10.1007/3-540-27170-8\_13}, timestamp = {Fri, 26 May 2017 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/hpsc/HagerJFW03.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/vecpar/WelleinHBF02, author = {Gerhard Wellein and Georg Hager and Achim Basermann and Holger Fehske}, editor = {Jos{\'{e}} M. Laginha M. Palma and Jack J. Dongarra and Vicente Hern{\'{a}}ndez and Ant{\'{o}}nio Augusto de Sousa}, title = {Fast Sparse Matrix-Vector Multiplication for TeraFlop/s Computers}, booktitle = {High Performance Computing for Computational Science - {VECPAR} 2002, 5th International Conference, Porto, Portugal, June 26-28, 2002, Selected Papers and Invited Talks}, series = {Lecture Notes in Computer Science}, volume = {2565}, pages = {287--301}, publisher = {Springer}, year = {2002}, url = {https://doi.org/10.1007/3-540-36569-9\_18}, doi = {10.1007/3-540-36569-9\_18}, timestamp = {Sun, 02 Oct 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/vecpar/WelleinHBF02.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
manage site settings
To protect your privacy, all features that rely on external API calls from your browser are turned off by default. You need to opt-in for them to become active. All settings here will be stored as cookies with your web browser. For more information see our F.A.Q.