BibTeX records: Ahmad Abdelfattah

download as .bib file

@inproceedings{DBLP:conf/ipps/SidLakhdarCBALGTJWDDA23,
  author       = {Wissam M. Sid{-}Lakhdar and
                  S{\'{e}}bastien Cayrols and
                  Daniel Bielich and
                  Ahmad Abdelfattah and
                  Piotr Luszczek and
                  Mark Gates and
                  Stanimire Tomov and
                  Hans Johansen and
                  David B. Williams{-}Young and
                  Timothy A. Davis and
                  Jack J. Dongarra and
                  Hartwig Anzt},
  title        = {{PAQR:} Pivoting Avoiding {QR} factorization},
  booktitle    = {{IEEE} International Parallel and Distributed Processing Symposium,
                  {IPDPS} 2023, St. Petersburg, FL, USA, May 15-19, 2023},
  pages        = {322--332},
  publisher    = {{IEEE}},
  year         = {2023},
  url          = {https://doi.org/10.1109/IPDPS54959.2023.00040},
  doi          = {10.1109/IPDPS54959.2023.00040},
  timestamp    = {Sun, 12 Nov 2023 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/ipps/SidLakhdarCBALGTJWDDA23.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/sc/AbdelfattahTLAD23,
  author       = {Ahmad Abdelfattah and
                  Stanimire Tomov and
                  Piotr Luszczek and
                  Hartwig Anzt and
                  Jack J. Dongarra},
  title        = {GPU-based {LU} Factorization and Solve on Batches of Matrices with
                  Band Structure},
  booktitle    = {Proceedings of the {SC} '23 Workshops of The International Conference
                  on High Performance Computing, Network, Storage, and Analysis, {SC-W}
                  2023, Denver, CO, USA, November 12-17, 2023},
  pages        = {1670--1679},
  publisher    = {{ACM}},
  year         = {2023},
  url          = {https://doi.org/10.1145/3624062.3624247},
  doi          = {10.1145/3624062.3624247},
  timestamp    = {Tue, 28 Nov 2023 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/sc/AbdelfattahTLAD23.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/cvpr/ChienFATTK22,
  author       = {Chiang{-}Heng Chien and
                  Hongyi Fan and
                  Ahmad Abdelfattah and
                  Elias P. Tsigaridas and
                  Stanimire Tomov and
                  Benjamin B. Kimia},
  title        = {GPU-Based Homotopy Continuation for Minimal Problems in Computer Vision},
  booktitle    = {{IEEE/CVF} Conference on Computer Vision and Pattern Recognition,
                  {CVPR} 2022, New Orleans, LA, USA, June 18-24, 2022},
  pages        = {15744--15755},
  publisher    = {{IEEE}},
  year         = {2022},
  url          = {https://doi.org/10.1109/CVPR52688.2022.01531},
  doi          = {10.1109/CVPR52688.2022.01531},
  timestamp    = {Wed, 05 Oct 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/cvpr/ChienFATTK22.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/iccS/AbdelfattahTD22,
  author       = {Ahmad Abdelfattah and
                  Stan Tomov and
                  Jack J. Dongarra},
  editor       = {Derek Groen and
                  Cl{\'{e}}lia de Mulatier and
                  Maciej Paszynski and
                  Valeria V. Krzhizhanovskaya and
                  Jack J. Dongarra and
                  Peter M. A. Sloot},
  title        = {Batch {QR} Factorization on GPUs: Design, Optimization, and Tuning},
  booktitle    = {Computational Science - {ICCS} 2022 - 22nd International Conference,
                  London, UK, June 21-23, 2022, Proceedings, Part {I}},
  series       = {Lecture Notes in Computer Science},
  volume       = {13350},
  pages        = {60--74},
  publisher    = {Springer},
  year         = {2022},
  url          = {https://doi.org/10.1007/978-3-031-08751-6\_5},
  doi          = {10.1007/978-3-031-08751-6\_5},
  timestamp    = {Mon, 27 Jun 2022 17:21:53 +0200},
  biburl       = {https://dblp.org/rec/conf/iccS/AbdelfattahTD22.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/p3hpc-ws/GatesYSACBAFD22,
  author       = {Mark Gates and
                  Asim YarKhan and
                  Dalal Sukkari and
                  Kadir Akbudak and
                  S{\'{e}}bastien Cayrols and
                  Daniel Bielich and
                  Ahmad Abdelfattah and
                  Mohammed A. Al Farhan and
                  Jack J. Dongarra},
  title        = {Portable and Efficient Dense Linear Algebra in the Beginning of the
                  Exascale Era},
  booktitle    = {{IEEE/ACM} International Workshop on Performance, Portability and
                  Productivity in HPC, P3HPC@SC 2022, Dallas, TX, USA, November 13-18,
                  2022},
  pages        = {36--46},
  publisher    = {{IEEE}},
  year         = {2022},
  url          = {https://doi.org/10.1109/P3HPC56579.2022.00009},
  doi          = {10.1109/P3HPC56579.2022.00009},
  timestamp    = {Mon, 26 Jun 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/p3hpc-ws/GatesYSACBAFD22.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/sc/AbdelfattahGBTLD22,
  author       = {Ahmad Abdelfattah and
                  Pieter Ghysels and
                  Wajih Boukaram and
                  Stanimire Tomov and
                  Xiaoye Sherry Li and
                  Jack J. Dongarra},
  editor       = {Felix Wolf and
                  Sameer Shende and
                  Candace Culhane and
                  Sadaf R. Alam and
                  Heike Jagode},
  title        = {Addressing Irregular Patterns of Matrix Computations on GPUs and Their
                  Impact on Applications Powered by Sparse Direct Solvers},
  booktitle    = {{SC22:} International Conference for High Performance Computing, Networking,
                  Storage and Analysis, Dallas, TX, USA, November 13-18, 2022},
  pages        = {26:1--26:14},
  publisher    = {{IEEE}},
  year         = {2022},
  url          = {https://doi.org/10.1109/SC41404.2022.00031},
  doi          = {10.1109/SC41404.2022.00031},
  timestamp    = {Wed, 24 May 2023 16:17:06 +0200},
  biburl       = {https://dblp.org/rec/conf/sc/AbdelfattahGBTLD22.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@misc{DBLP:data/10/GatesYSACBAFD22,
  author       = {Mark Gates and
                  Asim YarKhan and
                  Dalal Sukkari and
                  Kadir Akbudak and
                  S{\'{e}}bastien Cayrols and
                  Daniel Bielich and
                  Ahmad Abdelfattah and
                  Mohammed A. Al Farhan and
                  Jack J. Dongarra},
  title        = {Reproducability Artifact for Running SLATE's {GEMM} and {POTRF} Operations
                  on Summit and Crusher (Version 2)},
  publisher    = {Zenodo},
  year         = {2022},
  month        = aug,
  howpublished = {\url{https://doi.org/10.5281/zenodo.7003870}},
  note         = {Accessed on YYYY-MM-DD.},
  url          = {https://doi.org/10.5281/zenodo.7003870},
  doi          = {10.5281/ZENODO.7003870},
  timestamp    = {Mon, 25 Sep 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/data/10/GatesYSACBAFD22.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/ijhpca/AbdelfattahABCC21,
  author       = {Ahmad Abdelfattah and
                  Hartwig Anzt and
                  Erik G. Boman and
                  Erin C. Carson and
                  Terry Cojean and
                  Jack J. Dongarra and
                  Alyson Fox and
                  Mark Gates and
                  Nicholas J. Higham and
                  Xiaoye S. Li and
                  Jennifer A. Loe and
                  Piotr Luszczek and
                  Srikara Pranesh and
                  Siva Rajamanickam and
                  Tobias Ribizel and
                  Barry F. Smith and
                  Kasia Swirydowicz and
                  Stephen J. Thomas and
                  Stanimire Tomov and
                  Yaohung M. Tsai and
                  Ulrike Meier Yang},
  title        = {A survey of numerical linear algebra methods utilizing mixed-precision
                  arithmetic},
  journal      = {Int. J. High Perform. Comput. Appl.},
  volume       = {35},
  number       = {4},
  year         = {2021},
  url          = {https://doi.org/10.1177/10943420211003313},
  doi          = {10.1177/10943420211003313},
  timestamp    = {Wed, 07 Dec 2022 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/ijhpca/AbdelfattahABCC21.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/ijhpca/KolevFMDBDWTSAB21,
  author       = {Tzanio V. Kolev and
                  Paul F. Fischer and
                  Misun Min and
                  Jack J. Dongarra and
                  Jed Brown and
                  Veselin Dobrev and
                  Tim Warburton and
                  Stanimire Tomov and
                  Mark S. Shephard and
                  Ahmad Abdelfattah and
                  Valeria Barra and
                  Natalie Beams and
                  Jean{-}Sylvain Camier and
                  Noel Chalmers and
                  Yohann Dudouit and
                  Ali Karakus and
                  Ian Karlin and
                  Stefan Kerkemeier and
                  Yu{-}Hsiang Lan and
                  David S. Medina and
                  Elia Merzari and
                  Aleksandr Obabko and
                  Will Pazner and
                  Thilina Rathnayake and
                  Cameron W. Smith and
                  Lukas Spies and
                  Kasia Swirydowicz and
                  Jeremy L. Thompson and
                  Ananias Tomboulides and
                  Vladimir Z. Tomov},
  title        = {Efficient exascale discretizations: High-order finite element methods},
  journal      = {Int. J. High Perform. Comput. Appl.},
  volume       = {35},
  number       = {6},
  pages        = {527--552},
  year         = {2021},
  url          = {https://doi.org/10.1177/10943420211020803},
  doi          = {10.1177/10943420211020803},
  timestamp    = {Mon, 05 Feb 2024 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/ijhpca/KolevFMDBDWTSAB21.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/jossw/BrownABBCDDGKMP21,
  author       = {Jed Brown and
                  Ahmad Abdelfattah and
                  Valeria Barra and
                  Natalie N. Beams and
                  Jean{-}Sylvain Camier and
                  Veselin Dobrev and
                  Yohann Dudouit and
                  Leila Ghaffari and
                  Tzanio V. Kolev and
                  David S. Medina and
                  Will Pazner and
                  Thilina Rathnayake and
                  Jeremy L. Thompson and
                  Stan Tomov},
  title        = {libCEED: Fast algebra for high-order element-based discretizations},
  journal      = {J. Open Source Softw.},
  volume       = {6},
  number       = {63},
  pages        = {2945},
  year         = {2021},
  url          = {https://doi.org/10.21105/joss.02945},
  doi          = {10.21105/JOSS.02945},
  timestamp    = {Sun, 25 Dec 2022 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/jossw/BrownABBCDDGKMP21.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/pc/AbdelfattahBBBB21,
  author       = {Ahmad Abdelfattah and
                  Valeria Barra and
                  Natalie Beams and
                  Ryan Bleile and
                  Jed Brown and
                  Jean{-}Sylvain Camier and
                  Robert Carson and
                  Noel Chalmers and
                  Veselin Dobrev and
                  Yohann Dudouit and
                  Paul F. Fischer and
                  Ali Karakus and
                  Stefan Kerkemeier and
                  Tzanio V. Kolev and
                  Yu{-}Hsiang Lan and
                  Elia Merzari and
                  Misun Min and
                  Malachi Phillips and
                  Thilina Rathnayake and
                  Robert N. Rieben and
                  Thomas Stitt and
                  Ananias Tomboulides and
                  Stanimire Tomov and
                  Vladimir Z. Tomov and
                  Arturo Vargas and
                  Tim Warburton and
                  Kenneth Weiss},
  title        = {{GPU} algorithms for Efficient Exascale Discretizations},
  journal      = {Parallel Comput.},
  volume       = {108},
  pages        = {102841},
  year         = {2021},
  url          = {https://doi.org/10.1016/j.parco.2021.102841},
  doi          = {10.1016/J.PARCO.2021.102841},
  timestamp    = {Sat, 30 Sep 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/pc/AbdelfattahBBBB21.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/toms/AbdelfattahCDGH21,
  author       = {Ahmad Abdelfattah and
                  Timothy B. Costa and
                  Jack J. Dongarra and
                  Mark Gates and
                  Azzam Haidar and
                  Sven Hammarling and
                  Nicholas J. Higham and
                  Jakub Kurzak and
                  Piotr Luszczek and
                  Stanimire Tomov and
                  Mawussi Zounon},
  title        = {A Set of Batched Basic Linear Algebra Subprograms and {LAPACK} Routines},
  journal      = {{ACM} Trans. Math. Softw.},
  volume       = {47},
  number       = {3},
  pages        = {21:1--21:23},
  year         = {2021},
  url          = {https://doi.org/10.1145/3431921},
  doi          = {10.1145/3431921},
  timestamp    = {Sun, 02 Oct 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/toms/AbdelfattahCDGH21.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2109-04996,
  author       = {Tzanio V. Kolev and
                  Paul F. Fischer and
                  Misun Min and
                  Jack J. Dongarra and
                  Jed Brown and
                  Veselin Dobrev and
                  Tim Warburton and
                  Stanimire Tomov and
                  Mark S. Shephard and
                  Ahmad Abdelfattah and
                  Valeria Barra and
                  Natalie Beams and
                  Jean{-}Sylvain Camier and
                  Noel Chalmers and
                  Yohann Dudouit and
                  Ali Karakus and
                  Ian Karlin and
                  Stefan Kerkemeier and
                  Yu{-}Hsiang Lan and
                  David S. Medina and
                  Elia Merzari and
                  Aleksandr Obabko and
                  Will Pazner and
                  Thilina Rathnayake and
                  Cameron W. Smith and
                  Lukas Spies and
                  Kasia Swirydowicz and
                  Jeremy L. Thompson and
                  Ananias Tomboulides and
                  Vladimir Z. Tomov},
  title        = {Efficient Exascale Discretizations: High-Order Finite Element Methods},
  journal      = {CoRR},
  volume       = {abs/2109.04996},
  year         = {2021},
  url          = {https://arxiv.org/abs/2109.04996},
  eprinttype    = {arXiv},
  eprint       = {2109.04996},
  timestamp    = {Sat, 03 Dec 2022 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2109-04996.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2109-05072,
  author       = {Ahmad Abdelfattah and
                  Valeria Barra and
                  Natalie Beams and
                  Ryan Bleile and
                  Jed Brown and
                  Jean{-}Sylvain Camier and
                  Robert Carson and
                  Noel Chalmers and
                  Veselin Dobrev and
                  Yohann Dudouit and
                  Paul F. Fischer and
                  Ali Karakus and
                  Stefan Kerkemeier and
                  Tzanio V. Kolev and
                  Yu{-}Hsiang Lan and
                  Elia Merzari and
                  Misun Min and
                  Malachi Phillips and
                  Thilina Rathnayake and
                  Robert N. Rieben and
                  Thomas Stitt and
                  Ananias Tomboulides and
                  Stanimire Tomov and
                  Vladimir Z. Tomov and
                  Arturo Vargas and
                  Tim Warburton and
                  Kenneth Weiss},
  title        = {{GPU} Algorithms for Efficient Exascale Discretizations},
  journal      = {CoRR},
  volume       = {abs/2109.05072},
  year         = {2021},
  url          = {https://arxiv.org/abs/2109.05072},
  eprinttype    = {arXiv},
  eprint       = {2109.05072},
  timestamp    = {Thu, 21 Apr 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2109-05072.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2112-03444,
  author       = {Chiang{-}Heng Chien and
                  Hongyi Fan and
                  Ahmad Abdelfattah and
                  Elias P. Tsigaridas and
                  Stanimire Tomov and
                  Benjamin B. Kimia},
  title        = {GPU-Based Homotopy Continuation for Minimal Problems in Computer Vision},
  journal      = {CoRR},
  volume       = {abs/2112.03444},
  year         = {2021},
  url          = {https://arxiv.org/abs/2112.03444},
  eprinttype    = {arXiv},
  eprint       = {2112.03444},
  timestamp    = {Mon, 13 Dec 2021 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2112-03444.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/ijhpca/FarhanATGSHRD20,
  author       = {Mohammed A. Al Farhan and
                  Ahmad Abdelfattah and
                  Stanimire Tomov and
                  Mark Gates and
                  Dalal Sukkari and
                  Azzam Haidar and
                  Robert Rosenberg and
                  Jack J. Dongarra},
  title        = {{MAGMA} templates for scalable linear algebra on emerging architectures},
  journal      = {Int. J. High Perform. Comput. Appl.},
  volume       = {34},
  number       = {6},
  year         = {2020},
  url          = {https://doi.org/10.1177/1094342020938421},
  doi          = {10.1177/1094342020938421},
  timestamp    = {Sun, 02 Oct 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/ijhpca/FarhanATGSHRD20.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/jpdc/AbdelfattahTD20,
  author       = {Ahmad Abdelfattah and
                  Stanimire Tomov and
                  Jack J. Dongarra},
  title        = {Matrix multiplication on batches of small matrices in half and half-complex
                  precisions},
  journal      = {J. Parallel Distributed Comput.},
  volume       = {145},
  pages        = {188--201},
  year         = {2020},
  url          = {https://doi.org/10.1016/j.jpdc.2020.07.001},
  doi          = {10.1016/J.JPDC.2020.07.001},
  timestamp    = {Mon, 28 Aug 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/jpdc/AbdelfattahTD20.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/hpec/BrownATD20,
  author       = {Cade Brown and
                  Ahmad Abdelfattah and
                  Stanimire Tomov and
                  Jack J. Dongarra},
  title        = {Design, Optimization, and Benchmarking of Dense Linear Algebra Algorithms
                  on {AMD} GPUs},
  booktitle    = {2020 {IEEE} High Performance Extreme Computing Conference, {HPEC}
                  2020, Waltham, MA, USA, September 22-24, 2020},
  pages        = {1--7},
  publisher    = {{IEEE}},
  year         = {2020},
  url          = {https://doi.org/10.1109/HPEC43674.2020.9286214},
  doi          = {10.1109/HPEC43674.2020.9286214},
  timestamp    = {Mon, 03 Jan 2022 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/hpec/BrownATD20.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/iccS/AbdelfattahTD20,
  author       = {Ahmad Abdelfattah and
                  Stan Tomov and
                  Jack J. Dongarra},
  editor       = {Valeria V. Krzhizhanovskaya and
                  G{\'{a}}bor Z{\'{a}}vodszky and
                  Michael Harold Lees and
                  Jack J. Dongarra and
                  Peter M. A. Sloot and
                  S{\'{e}}rgio Brissos and
                  Jo{\~{a}}o Teixeira},
  title        = {Investigating the Benefit of FP16-Enabled Mixed-Precision Solvers
                  for Symmetric Positive Definite Matrices Using GPUs},
  booktitle    = {Computational Science - {ICCS} 2020 - 20th International Conference,
                  Amsterdam, The Netherlands, June 3-5, 2020, Proceedings, Part {II}},
  series       = {Lecture Notes in Computer Science},
  volume       = {12138},
  pages        = {237--250},
  publisher    = {Springer},
  year         = {2020},
  url          = {https://doi.org/10.1007/978-3-030-50417-5\_18},
  doi          = {10.1007/978-3-030-50417-5\_18},
  timestamp    = {Mon, 03 Jan 2022 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/iccS/AbdelfattahTD20.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/pmbs-ws/AnztTACD20,
  author       = {Hartwig Anzt and
                  Yuhsiang M. Tsai and
                  Ahmad Abdelfattah and
                  Terry Cojean and
                  Jack J. Dongarra},
  title        = {Evaluating the Performance of NVIDIA's {A100} Ampere {GPU} for Sparse
                  and Batched Computations},
  booktitle    = {2020 {IEEE/ACM} Performance Modeling, Benchmarking and Simulation
                  of High Performance Computer Systems, PMBS@SC 2020, Atlanta, GA, USA,
                  November 12, 2020},
  pages        = {26--38},
  publisher    = {{IEEE}},
  year         = {2020},
  url          = {https://doi.org/10.1109/PMBS51919.2020.00009},
  doi          = {10.1109/PMBS51919.2020.00009},
  timestamp    = {Mon, 26 Jun 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/pmbs-ws/AnztTACD20.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/scala-ws/BeamsATDKD20,
  author       = {Natalie Beams and
                  Ahmad Abdelfattah and
                  Stan Tomov and
                  Jack J. Dongarra and
                  Tzanio V. Kolev and
                  Yohann Dudouit},
  title        = {High-Order Finite Element Method using Standard and Device-Level Batch
                  {GEMM} on GPUs},
  booktitle    = {11th {IEEE/ACM} Workshop on Latest Advances in Scalable Algorithms
                  for Large-Scale Systems, ScalA@SC 2020, Atlanta, GA, USA, November
                  13, 2020},
  pages        = {53--60},
  publisher    = {{IEEE}},
  year         = {2020},
  url          = {https://doi.org/10.1109/ScalA51936.2020.00012},
  doi          = {10.1109/SCALA51936.2020.00012},
  timestamp    = {Mon, 26 Jun 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/scala-ws/BeamsATDKD20.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2007-06674,
  author       = {Ahmad Abdelfattah and
                  Hartwig Anzt and
                  Erik G. Boman and
                  Erin C. Carson and
                  Terry Cojean and
                  Jack J. Dongarra and
                  Mark Gates and
                  Thomas Gr{\"{u}}tzmacher and
                  Nicholas J. Higham and
                  Xiaoye Sherry Li and
                  Neil Lindquist and
                  Yang Liu and
                  Jennifer A. Loe and
                  Piotr Luszczek and
                  Pratik Nayak and
                  Srikara Pranesh and
                  Sivasankaran Rajamanickam and
                  Tobias Ribizel and
                  Barry Smith and
                  Kasia Swirydowicz and
                  Stephen J. Thomas and
                  Stanimire Tomov and
                  Yaohung M. Tsai and
                  Ichitaro Yamazaki and
                  Ulrike Meier Yang},
  title        = {A Survey of Numerical Methods Utilizing Mixed Precision Arithmetic},
  journal      = {CoRR},
  volume       = {abs/2007.06674},
  year         = {2020},
  url          = {https://arxiv.org/abs/2007.06674},
  eprinttype    = {arXiv},
  eprint       = {2007.06674},
  timestamp    = {Mon, 29 Mar 2021 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2007-06674.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/pc/MasliahAHTBFD19,
  author       = {Ian Masliah and
                  Ahmad Abdelfattah and
                  Azzam Haidar and
                  Stanimire Tomov and
                  Marc Baboulin and
                  Jo{\"{e}}l Falcou and
                  Jack J. Dongarra},
  title        = {Algorithms and optimization techniques for high-performance matrix-matrix
                  multiplications of very small matrices},
  journal      = {Parallel Comput.},
  volume       = {81},
  pages        = {1--21},
  year         = {2019},
  url          = {https://doi.org/10.1016/j.parco.2018.10.003},
  doi          = {10.1016/J.PARCO.2018.10.003},
  timestamp    = {Mon, 28 Aug 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/pc/MasliahAHTBFD19.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/hpec/AbdelfattahTD19,
  author       = {Ahmad Abdelfattah and
                  Stanimire Tomov and
                  Jack J. Dongarra},
  title        = {Progressive Optimization of Batched {LU} Factorization on GPUs},
  booktitle    = {2019 {IEEE} High Performance Extreme Computing Conference, {HPEC}
                  2019, Waltham, MA, USA, September 24-26, 2019},
  pages        = {1--6},
  publisher    = {{IEEE}},
  year         = {2019},
  url          = {https://doi.org/10.1109/HPEC.2019.8916270},
  doi          = {10.1109/HPEC.2019.8916270},
  timestamp    = {Mon, 03 Jan 2022 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/hpec/AbdelfattahTD19.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/icpp/KurzakTGAD19,
  author       = {Jakub Kurzak and
                  Yaohung M. Tsai and
                  Mark Gates and
                  Ahmad Abdelfattah and
                  Jack J. Dongarra},
  title        = {Massively Parallel Automated Software Tuning},
  booktitle    = {Proceedings of the 48th International Conference on Parallel Processing,
                  {ICPP} 2019, Kyoto, Japan, August 05-08, 2019},
  pages        = {92:1--92:10},
  publisher    = {{ACM}},
  year         = {2019},
  url          = {https://doi.org/10.1145/3337821.3337908},
  doi          = {10.1145/3337821.3337908},
  timestamp    = {Tue, 29 Dec 2020 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/icpp/KurzakTGAD19.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/ipps/AbdelfattahTD19,
  author       = {Ahmad Abdelfattah and
                  Stanimire Tomov and
                  Jack J. Dongarra},
  title        = {Fast Batched Matrix Multiplication for Small Sizes Using Half-Precision
                  Arithmetic on GPUs},
  booktitle    = {2019 {IEEE} International Parallel and Distributed Processing Symposium,
                  {IPDPS} 2019, Rio de Janeiro, Brazil, May 20-24, 2019},
  pages        = {111--122},
  publisher    = {{IEEE}},
  year         = {2019},
  url          = {https://doi.org/10.1109/IPDPS.2019.00022},
  doi          = {10.1109/IPDPS.2019.00022},
  timestamp    = {Mon, 03 Jan 2022 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/ipps/AbdelfattahTD19.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/sc/AbdelfattahTD19,
  author       = {Ahmad Abdelfattah and
                  Stanimire Tomov and
                  Jack J. Dongarra},
  title        = {Towards Half-Precision Computation for Complex Matrices: {A} Case
                  Study for Mixed Precision Solvers on GPUs},
  booktitle    = {10th {IEEE/ACM} Workshop on Latest Advances in Scalable Algorithms
                  for Large-Scale Systems, ScalA@SC 2019, Denver, CO, USA, November
                  18, 2019},
  pages        = {17--24},
  publisher    = {{IEEE}},
  year         = {2019},
  url          = {https://doi.org/10.1109/ScalA49573.2019.00008},
  doi          = {10.1109/SCALA49573.2019.00008},
  timestamp    = {Mon, 03 Jan 2022 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/sc/AbdelfattahTD19.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/jocs/AbdelfattahHTD18,
  author       = {Ahmad Abdelfattah and
                  Azzam Haidar and
                  Stanimire Tomov and
                  Jack J. Dongarra},
  title        = {Batched one-sided factorizations of tiny matrices using GPUs: Challenges
                  and countermeasures},
  journal      = {J. Comput. Sci.},
  volume       = {26},
  pages        = {226--236},
  year         = {2018},
  url          = {https://doi.org/10.1016/j.jocs.2018.01.005},
  doi          = {10.1016/J.JOCS.2018.01.005},
  timestamp    = {Mon, 28 Aug 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/jocs/AbdelfattahHTD18.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/tpds/HaidarAZTD18,
  author       = {Azzam Haidar and
                  Ahmad Abdelfattah and
                  Mawussi Zounon and
                  Stanimire Tomov and
                  Jack J. Dongarra},
  title        = {A Guide for Achieving High Performance with Very Small Matrices on
                  {GPU:} {A} Case Study of Batched {LU} and Cholesky Factorizations},
  journal      = {{IEEE} Trans. Parallel Distributed Syst.},
  volume       = {29},
  number       = {5},
  pages        = {973--984},
  year         = {2018},
  url          = {https://doi.org/10.1109/TPDS.2017.2783929},
  doi          = {10.1109/TPDS.2017.2783929},
  timestamp    = {Mon, 28 Aug 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/tpds/HaidarAZTD18.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/tpds/AbdelfattahHTD18,
  author       = {Ahmad Abdelfattah and
                  Azzam Haidar and
                  Stanimire Tomov and
                  Jack J. Dongarra},
  title        = {Analysis and Design Techniques towards High-Performance and Energy-Efficient
                  Dense Linear Solvers on GPUs},
  journal      = {{IEEE} Trans. Parallel Distributed Syst.},
  volume       = {29},
  number       = {12},
  pages        = {2700--2712},
  year         = {2018},
  url          = {https://doi.org/10.1109/TPDS.2018.2842785},
  doi          = {10.1109/TPDS.2018.2842785},
  timestamp    = {Mon, 03 Jan 2022 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/tpds/AbdelfattahHTD18.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/hpec/AbdelfattahHTD18,
  author       = {Ahmad Abdelfattah and
                  Azzam Haidar and
                  Stanimire Tomov and
                  Jack J. Dongarra},
  title        = {Optimizing {GPU} Kernels for Irregular Batch Workloads: {A} Case Study
                  for Cholesky Factorization},
  booktitle    = {2018 {IEEE} High Performance Extreme Computing Conference, {HPEC}
                  2018, Waltham, MA, USA, September 25-27, 2018},
  pages        = {1--7},
  publisher    = {{IEEE}},
  year         = {2018},
  url          = {https://doi.org/10.1109/HPEC.2018.8547576},
  doi          = {10.1109/HPEC.2018.8547576},
  timestamp    = {Mon, 03 Jan 2022 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/hpec/AbdelfattahHTD18.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/iccS/HaidarAZWPTD18,
  author       = {Azzam Haidar and
                  Ahmad Abdelfattah and
                  Mawussi Zounon and
                  Panruo Wu and
                  Srikara Pranesh and
                  Stanimire Tomov and
                  Jack J. Dongarra},
  editor       = {Yong Shi and
                  Haohuan Fu and
                  Yingjie Tian and
                  Valeria V. Krzhizhanovskaya and
                  Michael Harold Lees and
                  Jack J. Dongarra and
                  Peter M. A. Sloot},
  title        = {The Design of Fast and Energy-Efficient Linear Solvers: On the Potential
                  of Half-Precision Arithmetic and Iterative Refinement Techniques},
  booktitle    = {Computational Science - {ICCS} 2018 - 18th International Conference,
                  Wuxi, China, June 11-13, 2018, Proceedings, Part {I}},
  series       = {Lecture Notes in Computer Science},
  volume       = {10860},
  pages        = {586--600},
  publisher    = {Springer},
  year         = {2018},
  url          = {https://doi.org/10.1007/978-3-319-93698-7\_45},
  doi          = {10.1007/978-3-319-93698-7\_45},
  timestamp    = {Mon, 08 May 2023 14:38:37 +0200},
  biburl       = {https://dblp.org/rec/conf/iccS/HaidarAZWPTD18.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/ipps/YamazakiAIOTYD18,
  author       = {Ichitaro Yamazaki and
                  Ahmad Abdelfattah and
                  Akihiro Ida and
                  Satoshi Ohshima and
                  Stanimire Tomov and
                  Rio Yokota and
                  Jack J. Dongarra},
  title        = {Performance of Hierarchical-matrix BiCGStab Solver on {GPU} Clusters},
  booktitle    = {2018 {IEEE} International Parallel and Distributed Processing Symposium,
                  {IPDPS} 2018, Vancouver, BC, Canada, May 21-25, 2018},
  pages        = {930--939},
  publisher    = {{IEEE} Computer Society},
  year         = {2018},
  url          = {https://doi.org/10.1109/IPDPS.2018.00102},
  doi          = {10.1109/IPDPS.2018.00102},
  timestamp    = {Fri, 24 Mar 2023 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/ipps/YamazakiAIOTYD18.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/cse/DongarraTLKGYAH17,
  author       = {Jack J. Dongarra and
                  Stanimire Tomov and
                  Piotr Luszczek and
                  Jakub Kurzak and
                  Mark Gates and
                  Ichitaro Yamazaki and
                  Hartwig Anzt and
                  Azzam Haidar and
                  Ahmad Abdelfattah},
  title        = {With Extreme Computing, the Rules Have Changed},
  journal      = {Comput. Sci. Eng.},
  volume       = {19},
  number       = {3},
  pages        = {52--62},
  year         = {2017},
  url          = {https://doi.org/10.1109/MCSE.2017.48},
  doi          = {10.1109/MCSE.2017.48},
  timestamp    = {Tue, 16 Aug 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/cse/DongarraTLKGYAH17.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/jocs/AbdelfattahHTD17,
  author       = {Ahmad Abdelfattah and
                  Azzam Haidar and
                  Stanimire Tomov and
                  Jack J. Dongarra},
  title        = {Fast Cholesky factorization on GPUs for batch and native modes in
                  {MAGMA}},
  journal      = {J. Comput. Sci.},
  volume       = {20},
  pages        = {85--93},
  year         = {2017},
  url          = {https://doi.org/10.1016/j.jocs.2016.12.009},
  doi          = {10.1016/J.JOCS.2016.12.009},
  timestamp    = {Mon, 03 Jan 2022 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/jocs/AbdelfattahHTD17.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/iccS/AbdelfattahHTD17,
  author       = {Ahmad Abdelfattah and
                  Azzam Haidar and
                  Stanimire Tomov and
                  Jack J. Dongarra},
  editor       = {Petros Koumoutsakos and
                  Michael Lees and
                  Valeria V. Krzhizhanovskaya and
                  Jack J. Dongarra and
                  Peter M. A. Sloot},
  title        = {Factorization and Inversion of a Million Matrices using GPUs: Challenges
                  and Countermeasures},
  booktitle    = {International Conference on Computational Science, {ICCS} 2017, 12-14
                  June 2017, Zurich, Switzerland},
  series       = {Procedia Computer Science},
  volume       = {108},
  pages        = {606--615},
  publisher    = {Elsevier},
  year         = {2017},
  url          = {https://doi.org/10.1016/j.procs.2017.05.250},
  doi          = {10.1016/J.PROCS.2017.05.250},
  timestamp    = {Mon, 03 Jan 2022 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/iccS/AbdelfattahHTD17.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/ics/AbdelfattahHTD17,
  author       = {Ahmad Abdelfattah and
                  Azzam Haidar and
                  Stanimire Tomov and
                  Jack J. Dongarra},
  editor       = {William D. Gropp and
                  Pete Beckman and
                  Zhiyuan Li and
                  Francisco J. Cazorla},
  title        = {Novel {HPC} techniques to batch execution of many variable size {BLAS}
                  computations on GPUs},
  booktitle    = {Proceedings of the International Conference on Supercomputing, {ICS}
                  2017, Chicago, IL, USA, June 14-16, 2017},
  pages        = {5:1--5:10},
  publisher    = {{ACM}},
  year         = {2017},
  url          = {https://doi.org/10.1145/3079079.3079103},
  doi          = {10.1145/3079079.3079103},
  timestamp    = {Mon, 03 Jan 2022 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/ics/AbdelfattahHTD17.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/ppopp/HaidarATD17,
  author       = {Azzam Haidar and
                  Ahmad Abdelfattah and
                  Stanimire Tomov and
                  Jack J. Dongarra},
  title        = {High-performance Cholesky factorization for GPU-only execution},
  booktitle    = {Proceedings of the General Purpose GPUs, GPGPU@PPoPP, Austin, TX,
                  USA, February 4-8, 2017},
  pages        = {42--52},
  publisher    = {{ACM}},
  year         = {2017},
  url          = {https://doi.org/10.1145/3038228.3038237},
  doi          = {10.1145/3038228.3038237},
  timestamp    = {Sun, 12 Jun 2022 19:46:08 +0200},
  biburl       = {https://dblp.org/rec/conf/ppopp/HaidarATD17.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/actanum/AbdelfattahADGH16,
  author       = {Ahmad Abdelfattah and
                  Hartwig Anzt and
                  Jack J. Dongarra and
                  Mark Gates and
                  Azzam Haidar and
                  Jakub Kurzak and
                  Piotr Luszczek and
                  Stanimire Tomov and
                  Ichitaro Yamazaki and
                  Asim YarKhan},
  title        = {Linear algebra software for large-scale accelerated multicore computing},
  journal      = {Acta Numer.},
  volume       = {25},
  pages        = {1--160},
  year         = {2016},
  url          = {https://doi.org/10.1017/S0962492916000015},
  doi          = {10.1017/S0962492916000015},
  timestamp    = {Mon, 03 Jan 2022 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/actanum/AbdelfattahADGH16.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/concurrency/AbdelfattahLKD16,
  author       = {Ahmad Abdelfattah and
                  Hatem Ltaief and
                  David E. Keyes and
                  Jack J. Dongarra},
  title        = {Performance optimization of Sparse Matrix-Vector Multiplication for
                  multi-component PDE-based applications using GPUs},
  journal      = {Concurr. Comput. Pract. Exp.},
  volume       = {28},
  number       = {12},
  pages        = {3447--3465},
  year         = {2016},
  url          = {https://doi.org/10.1002/cpe.3874},
  doi          = {10.1002/CPE.3874},
  timestamp    = {Mon, 02 Mar 2020 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/concurrency/AbdelfattahLKD16.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/toms/AbdelfattahKL16,
  author       = {Ahmad Abdelfattah and
                  David E. Keyes and
                  Hatem Ltaief},
  title        = {{KBLAS:} An Optimized Library for Dense Matrix-Vector Multiplication
                  on {GPU} Accelerators},
  journal      = {{ACM} Trans. Math. Softw.},
  volume       = {42},
  number       = {3},
  pages        = {18:1--18:31},
  year         = {2016},
  url          = {https://doi.org/10.1145/2818311},
  doi          = {10.1145/2818311},
  timestamp    = {Tue, 16 Aug 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/toms/AbdelfattahKL16.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/europar/MasliahAHTBFD16,
  author       = {Ian Masliah and
                  Ahmad Abdelfattah and
                  Azzam Haidar and
                  Stanimire Tomov and
                  Marc Baboulin and
                  Jo{\"{e}}l Falcou and
                  Jack J. Dongarra},
  editor       = {Pierre{-}Fran{\c{c}}ois Dutot and
                  Denis Trystram},
  title        = {High-Performance Matrix-Matrix Multiplications of Very Small Matrices},
  booktitle    = {Euro-Par 2016: Parallel Processing - 22nd International Conference
                  on Parallel and Distributed Computing, Grenoble, France, August 24-26,
                  2016, Proceedings},
  series       = {Lecture Notes in Computer Science},
  volume       = {9833},
  pages        = {659--671},
  publisher    = {Springer},
  year         = {2016},
  url          = {https://doi.org/10.1007/978-3-319-43659-3\_48},
  doi          = {10.1007/978-3-319-43659-3\_48},
  timestamp    = {Mon, 03 Jan 2022 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/europar/MasliahAHTBFD16.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/iccS/AbdelfattahBDDE16,
  author       = {Ahmad Abdelfattah and
                  Marc Baboulin and
                  Veselin Dobrev and
                  Jack J. Dongarra and
                  Christopher W. Earl and
                  Joel Falcou and
                  Azzam Haidar and
                  Ian Karlin and
                  Tzanio V. Kolev and
                  Ian Masliah and
                  Stanimire Tomov},
  editor       = {Michelle Connolly},
  title        = {High-Performance Tensor Contractions for GPUs},
  booktitle    = {International Conference on Computational Science 2016, {ICCS} 2016,
                  6-8 June 2016, San Diego, California, {USA}},
  series       = {Procedia Computer Science},
  volume       = {80},
  pages        = {108--118},
  publisher    = {Elsevier},
  year         = {2016},
  url          = {https://doi.org/10.1016/j.procs.2016.05.302},
  doi          = {10.1016/J.PROCS.2016.05.302},
  timestamp    = {Mon, 03 Jan 2022 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/iccS/AbdelfattahBDDE16.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/iccS/AbdelfattahHTD16,
  author       = {Ahmad Abdelfattah and
                  Azzam Haidar and
                  Stanimire Tomov and
                  Jack J. Dongarra},
  editor       = {Michelle Connolly},
  title        = {Performance Tuning and Optimization Techniques of Fixed and Variable
                  Size Batched Cholesky Factorization on GPUs},
  booktitle    = {International Conference on Computational Science 2016, {ICCS} 2016,
                  6-8 June 2016, San Diego, California, {USA}},
  series       = {Procedia Computer Science},
  volume       = {80},
  pages        = {119--130},
  publisher    = {Elsevier},
  year         = {2016},
  url          = {https://doi.org/10.1016/j.procs.2016.05.303},
  doi          = {10.1016/J.PROCS.2016.05.303},
  timestamp    = {Mon, 03 Jan 2022 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/iccS/AbdelfattahHTD16.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/ipps/AbdelfattahHTD16,
  author       = {Ahmad Abdelfattah and
                  Azzam Haidar and
                  Stanimire Tomov and
                  Jack J. Dongarra},
  title        = {On the Development of Variable Size Batched Computation for Heterogeneous
                  Parallel Architectures},
  booktitle    = {2016 {IEEE} International Parallel and Distributed Processing Symposium
                  Workshops, {IPDPS} Workshops 2016, Chicago, IL, USA, May 23-27, 2016},
  pages        = {1249--1258},
  publisher    = {{IEEE} Computer Society},
  year         = {2016},
  url          = {https://doi.org/10.1109/IPDPSW.2016.190},
  doi          = {10.1109/IPDPSW.2016.190},
  timestamp    = {Fri, 24 Mar 2023 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/ipps/AbdelfattahHTD16.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/supercomputer/AbdelfattahHTD16,
  author       = {Ahmad Abdelfattah and
                  Azzam Haidar and
                  Stanimire Tomov and
                  Jack J. Dongarra},
  editor       = {Julian M. Kunkel and
                  Pavan Balaji and
                  Jack J. Dongarra},
  title        = {Performance, Design, and Autotuning of Batched {GEMM} for GPUs},
  booktitle    = {High Performance Computing - 31st International Conference, {ISC}
                  High Performance 2016, Frankfurt, Germany, June 19-23, 2016, Proceedings},
  series       = {Lecture Notes in Computer Science},
  volume       = {9697},
  pages        = {21--38},
  publisher    = {Springer},
  year         = {2016},
  url          = {https://doi.org/10.1007/978-3-319-41321-1\_2},
  doi          = {10.1007/978-3-319-41321-1\_2},
  timestamp    = {Mon, 03 Jan 2022 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/supercomputer/AbdelfattahHTD16.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@phdthesis{DBLP:phd/basesearch/Abdelfattah15,
  author       = {Ahmad Abdelfattah},
  title        = {Accelerating Scientific Applications using High Performance Dense
                  and Sparse Linear Algebra Kernels on GPUs},
  school       = {King Abdullah University of Science and Technology, Thuwal, Saudi
                  Arabia},
  year         = {2015},
  url          = {http://hdl.handle.net/10754/346955},
  timestamp    = {Sat, 05 Nov 2022 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/phd/basesearch/Abdelfattah15.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/superfri/DongarraAAGHKLT15,
  author       = {Jack J. Dongarra and
                  Maksims Abalenkovs and
                  Ahmad Abdelfattah and
                  Mark Gates and
                  Azzam Haidar and
                  Jakub Kurzak and
                  Piotr Luszczek and
                  Stanimire Tomov and
                  Ichitaro Yamazaki and
                  Asim YarKhan},
  title        = {Parallel Programming Models for Dense Linear Algebra on Heterogeneous
                  Systems},
  journal      = {Supercomput. Front. Innov.},
  volume       = {2},
  number       = {4},
  pages        = {67--86},
  year         = {2015},
  url          = {https://doi.org/10.14529/jsfi150405},
  doi          = {10.14529/JSFI150405},
  timestamp    = {Mon, 28 Aug 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/superfri/DongarraAAGHKLT15.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/europar/AbdelfattahLK15,
  author       = {Ahmad Abdelfattah and
                  Hatem Ltaief and
                  David E. Keyes},
  editor       = {Jesper Larsson Tr{\"{a}}ff and
                  Sascha Hunold and
                  Francesco Versaci},
  title        = {High Performance Multi-GPU SpMV for Multi-component PDE-Based Applications},
  booktitle    = {Euro-Par 2015: Parallel Processing - 21st International Conference
                  on Parallel and Distributed Computing, Vienna, Austria, August 24-28,
                  2015, Proceedings},
  series       = {Lecture Notes in Computer Science},
  volume       = {9233},
  pages        = {601--612},
  publisher    = {Springer},
  year         = {2015},
  url          = {https://doi.org/10.1007/978-3-662-48096-0\_46},
  doi          = {10.1007/978-3-662-48096-0\_46},
  timestamp    = {Sun, 12 Nov 2023 02:07:45 +0100},
  biburl       = {https://dblp.org/rec/conf/europar/AbdelfattahLK15.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/europar/AbdelfattahGGKLSV14,
  author       = {Ahmad Abdelfattah and
                  Eric Gendron and
                  Damien Gratadour and
                  David E. Keyes and
                  Hatem Ltaief and
                  Arnaud Sevin and
                  Fabrice Vidal},
  editor       = {Fernando M. A. Silva and
                  In{\^{e}}s de Castro Dutra and
                  V{\'{\i}}tor Santos Costa},
  title        = {High Performance Pseudo-analytical Simulation of Multi-Object Adaptive
                  Optics over Multi-GPU Systems},
  booktitle    = {Euro-Par 2014 Parallel Processing - 20th International Conference,
                  Porto, Portugal, August 25-29, 2014. Proceedings},
  series       = {Lecture Notes in Computer Science},
  volume       = {8632},
  pages        = {704--715},
  publisher    = {Springer},
  year         = {2014},
  url          = {https://doi.org/10.1007/978-3-319-09873-9\_59},
  doi          = {10.1007/978-3-319-09873-9\_59},
  timestamp    = {Sun, 02 Oct 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/europar/AbdelfattahGGKLSV14.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/sc/ChararaLGKSAGMV14,
  author       = {Ali Charara and
                  Hatem Ltaief and
                  Damien Gratadour and
                  David E. Keyes and
                  Arnaud Sevin and
                  Ahmad Abdelfattah and
                  Eric Gendron and
                  Carine Morel and
                  Fabrice Vidal},
  editor       = {Trish Damkroger and
                  Jack J. Dongarra},
  title        = {Pipelining Computational Stages of the Tomographic Reconstructor for
                  Multi-Object Adaptive Optics on a Multi-GPU System},
  booktitle    = {International Conference for High Performance Computing, Networking,
                  Storage and Analysis, {SC} 2014, New Orleans, LA, USA, November 16-21,
                  2014},
  pages        = {262--273},
  publisher    = {{IEEE} Computer Society},
  year         = {2014},
  url          = {https://doi.org/10.1109/SC.2014.27},
  doi          = {10.1109/SC.2014.27},
  timestamp    = {Fri, 24 Mar 2023 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/sc/ChararaLGKSAGMV14.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/AbdelfattahKL14,
  author       = {Ahmad Abdelfattah and
                  David E. Keyes and
                  Hatem Ltaief},
  title        = {{KBLAS:} An Optimized Library for Dense Matrix-Vector Multiplication
                  on {GPU} Accelerators},
  journal      = {CoRR},
  volume       = {abs/1410.1726},
  year         = {2014},
  url          = {http://arxiv.org/abs/1410.1726},
  eprinttype    = {arXiv},
  eprint       = {1410.1726},
  timestamp    = {Mon, 05 Feb 2024 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/AbdelfattahKL14.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/europar/AbdelfattahKL12,
  author       = {Ahmad Abdelfattah and
                  David E. Keyes and
                  Hatem Ltaief},
  editor       = {Ioannis Caragiannis and
                  Michael Alexander and
                  Rosa M. Badia and
                  Mario Cannataro and
                  Alexandru Costan and
                  Marco Danelutto and
                  Fr{\'{e}}d{\'{e}}ric Desprez and
                  Bettina Krammer and
                  Julio Sahuquillo and
                  Stephen L. Scott and
                  Josef Weidendorfer},
  title        = {Systematic Approach in Optimizing Numerical Memory-Bound Kernels on
                  {GPU}},
  booktitle    = {Euro-Par 2012: Parallel Processing Workshops - BDMC, CGWS, HeteroPar,
                  HiBB, OMHI, Paraphrase, PROPER, Resilience, UCHPC, VHPC, Rhodes Islands,
                  Greece, August 27-31, 2012. Revised Selected Papers},
  series       = {Lecture Notes in Computer Science},
  volume       = {7640},
  pages        = {207--216},
  publisher    = {Springer},
  year         = {2012},
  url          = {https://doi.org/10.1007/978-3-642-36949-0\_23},
  doi          = {10.1007/978-3-642-36949-0\_23},
  timestamp    = {Wed, 19 Feb 2020 14:52:57 +0100},
  biburl       = {https://dblp.org/rec/conf/europar/AbdelfattahKL12.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/vecpar/AbdelfattahDKL12,
  author       = {Ahmad Abdelfattah and
                  Jack J. Dongarra and
                  David E. Keyes and
                  Hatem Ltaief},
  editor       = {Michel J. Dayd{\'{e}} and
                  Osni Marques and
                  Kengo Nakajima},
  title        = {Optimizing Memory-Bound {SYMV} Kernel on {GPU} Hardware Accelerators},
  booktitle    = {High Performance Computing for Computational Science - {VECPAR} 2012,
                  10th International Conference, Kobe, Japan, July 17-20, 2012, Revised
                  Selected Papers},
  series       = {Lecture Notes in Computer Science},
  volume       = {7851},
  pages        = {72--79},
  publisher    = {Springer},
  year         = {2012},
  url          = {https://doi.org/10.1007/978-3-642-38718-0\_10},
  doi          = {10.1007/978-3-642-38718-0\_10},
  timestamp    = {Tue, 14 May 2019 10:00:36 +0200},
  biburl       = {https://dblp.org/rec/conf/vecpar/AbdelfattahDKL12.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
a service of  Schloss Dagstuhl - Leibniz Center for Informatics