Stop the war!
Остановите войну!
for scientists:
default search action
BibTeX records: Ahmad Abdelfattah
@inproceedings{DBLP:conf/ipps/SidLakhdarCBALGTJWDDA23, author = {Wissam M. Sid{-}Lakhdar and S{\'{e}}bastien Cayrols and Daniel Bielich and Ahmad Abdelfattah and Piotr Luszczek and Mark Gates and Stanimire Tomov and Hans Johansen and David B. Williams{-}Young and Timothy A. Davis and Jack J. Dongarra and Hartwig Anzt}, title = {{PAQR:} Pivoting Avoiding {QR} factorization}, booktitle = {{IEEE} International Parallel and Distributed Processing Symposium, {IPDPS} 2023, St. Petersburg, FL, USA, May 15-19, 2023}, pages = {322--332}, publisher = {{IEEE}}, year = {2023}, url = {https://doi.org/10.1109/IPDPS54959.2023.00040}, doi = {10.1109/IPDPS54959.2023.00040}, timestamp = {Sun, 12 Nov 2023 00:00:00 +0100}, biburl = {https://dblp.org/rec/conf/ipps/SidLakhdarCBALGTJWDDA23.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/sc/AbdelfattahTLAD23, author = {Ahmad Abdelfattah and Stanimire Tomov and Piotr Luszczek and Hartwig Anzt and Jack J. Dongarra}, title = {GPU-based {LU} Factorization and Solve on Batches of Matrices with Band Structure}, booktitle = {Proceedings of the {SC} '23 Workshops of The International Conference on High Performance Computing, Network, Storage, and Analysis, {SC-W} 2023, Denver, CO, USA, November 12-17, 2023}, pages = {1670--1679}, publisher = {{ACM}}, year = {2023}, url = {https://doi.org/10.1145/3624062.3624247}, doi = {10.1145/3624062.3624247}, timestamp = {Tue, 28 Nov 2023 00:00:00 +0100}, biburl = {https://dblp.org/rec/conf/sc/AbdelfattahTLAD23.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/cvpr/ChienFATTK22, author = {Chiang{-}Heng Chien and Hongyi Fan and Ahmad Abdelfattah and Elias P. Tsigaridas and Stanimire Tomov and Benjamin B. Kimia}, title = {GPU-Based Homotopy Continuation for Minimal Problems in Computer Vision}, booktitle = {{IEEE/CVF} Conference on Computer Vision and Pattern Recognition, {CVPR} 2022, New Orleans, LA, USA, June 18-24, 2022}, pages = {15744--15755}, publisher = {{IEEE}}, year = {2022}, url = {https://doi.org/10.1109/CVPR52688.2022.01531}, doi = {10.1109/CVPR52688.2022.01531}, timestamp = {Wed, 05 Oct 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/cvpr/ChienFATTK22.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/iccS/AbdelfattahTD22, author = {Ahmad Abdelfattah and Stan Tomov and Jack J. Dongarra}, editor = {Derek Groen and Cl{\'{e}}lia de Mulatier and Maciej Paszynski and Valeria V. Krzhizhanovskaya and Jack J. Dongarra and Peter M. A. Sloot}, title = {Batch {QR} Factorization on GPUs: Design, Optimization, and Tuning}, booktitle = {Computational Science - {ICCS} 2022 - 22nd International Conference, London, UK, June 21-23, 2022, Proceedings, Part {I}}, series = {Lecture Notes in Computer Science}, volume = {13350}, pages = {60--74}, publisher = {Springer}, year = {2022}, url = {https://doi.org/10.1007/978-3-031-08751-6\_5}, doi = {10.1007/978-3-031-08751-6\_5}, timestamp = {Mon, 27 Jun 2022 17:21:53 +0200}, biburl = {https://dblp.org/rec/conf/iccS/AbdelfattahTD22.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/p3hpc-ws/GatesYSACBAFD22, author = {Mark Gates and Asim YarKhan and Dalal Sukkari and Kadir Akbudak and S{\'{e}}bastien Cayrols and Daniel Bielich and Ahmad Abdelfattah and Mohammed A. Al Farhan and Jack J. Dongarra}, title = {Portable and Efficient Dense Linear Algebra in the Beginning of the Exascale Era}, booktitle = {{IEEE/ACM} International Workshop on Performance, Portability and Productivity in HPC, P3HPC@SC 2022, Dallas, TX, USA, November 13-18, 2022}, pages = {36--46}, publisher = {{IEEE}}, year = {2022}, url = {https://doi.org/10.1109/P3HPC56579.2022.00009}, doi = {10.1109/P3HPC56579.2022.00009}, timestamp = {Mon, 26 Jun 2023 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/p3hpc-ws/GatesYSACBAFD22.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/sc/AbdelfattahGBTLD22, author = {Ahmad Abdelfattah and Pieter Ghysels and Wajih Boukaram and Stanimire Tomov and Xiaoye Sherry Li and Jack J. Dongarra}, editor = {Felix Wolf and Sameer Shende and Candace Culhane and Sadaf R. Alam and Heike Jagode}, title = {Addressing Irregular Patterns of Matrix Computations on GPUs and Their Impact on Applications Powered by Sparse Direct Solvers}, booktitle = {{SC22:} International Conference for High Performance Computing, Networking, Storage and Analysis, Dallas, TX, USA, November 13-18, 2022}, pages = {26:1--26:14}, publisher = {{IEEE}}, year = {2022}, url = {https://doi.org/10.1109/SC41404.2022.00031}, doi = {10.1109/SC41404.2022.00031}, timestamp = {Wed, 24 May 2023 16:17:06 +0200}, biburl = {https://dblp.org/rec/conf/sc/AbdelfattahGBTLD22.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@misc{DBLP:data/10/GatesYSACBAFD22, author = {Mark Gates and Asim YarKhan and Dalal Sukkari and Kadir Akbudak and S{\'{e}}bastien Cayrols and Daniel Bielich and Ahmad Abdelfattah and Mohammed A. Al Farhan and Jack J. Dongarra}, title = {Reproducability Artifact for Running SLATE's {GEMM} and {POTRF} Operations on Summit and Crusher (Version 2)}, publisher = {Zenodo}, year = {2022}, month = aug, howpublished = {\url{https://doi.org/10.5281/zenodo.7003870}}, note = {Accessed on YYYY-MM-DD.}, url = {https://doi.org/10.5281/zenodo.7003870}, doi = {10.5281/ZENODO.7003870}, timestamp = {Mon, 25 Sep 2023 01:00:00 +0200}, biburl = {https://dblp.org/rec/data/10/GatesYSACBAFD22.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/ijhpca/AbdelfattahABCC21, author = {Ahmad Abdelfattah and Hartwig Anzt and Erik G. Boman and Erin C. Carson and Terry Cojean and Jack J. Dongarra and Alyson Fox and Mark Gates and Nicholas J. Higham and Xiaoye S. Li and Jennifer A. Loe and Piotr Luszczek and Srikara Pranesh and Siva Rajamanickam and Tobias Ribizel and Barry F. Smith and Kasia Swirydowicz and Stephen J. Thomas and Stanimire Tomov and Yaohung M. Tsai and Ulrike Meier Yang}, title = {A survey of numerical linear algebra methods utilizing mixed-precision arithmetic}, journal = {Int. J. High Perform. Comput. Appl.}, volume = {35}, number = {4}, year = {2021}, url = {https://doi.org/10.1177/10943420211003313}, doi = {10.1177/10943420211003313}, timestamp = {Wed, 07 Dec 2022 00:00:00 +0100}, biburl = {https://dblp.org/rec/journals/ijhpca/AbdelfattahABCC21.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/ijhpca/KolevFMDBDWTSAB21, author = {Tzanio V. Kolev and Paul F. Fischer and Misun Min and Jack J. Dongarra and Jed Brown and Veselin Dobrev and Tim Warburton and Stanimire Tomov and Mark S. Shephard and Ahmad Abdelfattah and Valeria Barra and Natalie Beams and Jean{-}Sylvain Camier and Noel Chalmers and Yohann Dudouit and Ali Karakus and Ian Karlin and Stefan Kerkemeier and Yu{-}Hsiang Lan and David S. Medina and Elia Merzari and Aleksandr Obabko and Will Pazner and Thilina Rathnayake and Cameron W. Smith and Lukas Spies and Kasia Swirydowicz and Jeremy L. Thompson and Ananias Tomboulides and Vladimir Z. Tomov}, title = {Efficient exascale discretizations: High-order finite element methods}, journal = {Int. J. High Perform. Comput. Appl.}, volume = {35}, number = {6}, pages = {527--552}, year = {2021}, url = {https://doi.org/10.1177/10943420211020803}, doi = {10.1177/10943420211020803}, timestamp = {Mon, 05 Feb 2024 00:00:00 +0100}, biburl = {https://dblp.org/rec/journals/ijhpca/KolevFMDBDWTSAB21.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/jossw/BrownABBCDDGKMP21, author = {Jed Brown and Ahmad Abdelfattah and Valeria Barra and Natalie N. Beams and Jean{-}Sylvain Camier and Veselin Dobrev and Yohann Dudouit and Leila Ghaffari and Tzanio V. Kolev and David S. Medina and Will Pazner and Thilina Rathnayake and Jeremy L. Thompson and Stan Tomov}, title = {libCEED: Fast algebra for high-order element-based discretizations}, journal = {J. Open Source Softw.}, volume = {6}, number = {63}, pages = {2945}, year = {2021}, url = {https://doi.org/10.21105/joss.02945}, doi = {10.21105/JOSS.02945}, timestamp = {Sun, 25 Dec 2022 00:00:00 +0100}, biburl = {https://dblp.org/rec/journals/jossw/BrownABBCDDGKMP21.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/pc/AbdelfattahBBBB21, author = {Ahmad Abdelfattah and Valeria Barra and Natalie Beams and Ryan Bleile and Jed Brown and Jean{-}Sylvain Camier and Robert Carson and Noel Chalmers and Veselin Dobrev and Yohann Dudouit and Paul F. Fischer and Ali Karakus and Stefan Kerkemeier and Tzanio V. Kolev and Yu{-}Hsiang Lan and Elia Merzari and Misun Min and Malachi Phillips and Thilina Rathnayake and Robert N. Rieben and Thomas Stitt and Ananias Tomboulides and Stanimire Tomov and Vladimir Z. Tomov and Arturo Vargas and Tim Warburton and Kenneth Weiss}, title = {{GPU} algorithms for Efficient Exascale Discretizations}, journal = {Parallel Comput.}, volume = {108}, pages = {102841}, year = {2021}, url = {https://doi.org/10.1016/j.parco.2021.102841}, doi = {10.1016/J.PARCO.2021.102841}, timestamp = {Sat, 30 Sep 2023 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/pc/AbdelfattahBBBB21.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/toms/AbdelfattahCDGH21, author = {Ahmad Abdelfattah and Timothy B. Costa and Jack J. Dongarra and Mark Gates and Azzam Haidar and Sven Hammarling and Nicholas J. Higham and Jakub Kurzak and Piotr Luszczek and Stanimire Tomov and Mawussi Zounon}, title = {A Set of Batched Basic Linear Algebra Subprograms and {LAPACK} Routines}, journal = {{ACM} Trans. Math. Softw.}, volume = {47}, number = {3}, pages = {21:1--21:23}, year = {2021}, url = {https://doi.org/10.1145/3431921}, doi = {10.1145/3431921}, timestamp = {Sun, 02 Oct 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/toms/AbdelfattahCDGH21.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-2109-04996, author = {Tzanio V. Kolev and Paul F. Fischer and Misun Min and Jack J. Dongarra and Jed Brown and Veselin Dobrev and Tim Warburton and Stanimire Tomov and Mark S. Shephard and Ahmad Abdelfattah and Valeria Barra and Natalie Beams and Jean{-}Sylvain Camier and Noel Chalmers and Yohann Dudouit and Ali Karakus and Ian Karlin and Stefan Kerkemeier and Yu{-}Hsiang Lan and David S. Medina and Elia Merzari and Aleksandr Obabko and Will Pazner and Thilina Rathnayake and Cameron W. Smith and Lukas Spies and Kasia Swirydowicz and Jeremy L. Thompson and Ananias Tomboulides and Vladimir Z. Tomov}, title = {Efficient Exascale Discretizations: High-Order Finite Element Methods}, journal = {CoRR}, volume = {abs/2109.04996}, year = {2021}, url = {https://arxiv.org/abs/2109.04996}, eprinttype = {arXiv}, eprint = {2109.04996}, timestamp = {Sat, 03 Dec 2022 00:00:00 +0100}, biburl = {https://dblp.org/rec/journals/corr/abs-2109-04996.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-2109-05072, author = {Ahmad Abdelfattah and Valeria Barra and Natalie Beams and Ryan Bleile and Jed Brown and Jean{-}Sylvain Camier and Robert Carson and Noel Chalmers and Veselin Dobrev and Yohann Dudouit and Paul F. Fischer and Ali Karakus and Stefan Kerkemeier and Tzanio V. Kolev and Yu{-}Hsiang Lan and Elia Merzari and Misun Min and Malachi Phillips and Thilina Rathnayake and Robert N. Rieben and Thomas Stitt and Ananias Tomboulides and Stanimire Tomov and Vladimir Z. Tomov and Arturo Vargas and Tim Warburton and Kenneth Weiss}, title = {{GPU} Algorithms for Efficient Exascale Discretizations}, journal = {CoRR}, volume = {abs/2109.05072}, year = {2021}, url = {https://arxiv.org/abs/2109.05072}, eprinttype = {arXiv}, eprint = {2109.05072}, timestamp = {Thu, 21 Apr 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-2109-05072.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-2112-03444, author = {Chiang{-}Heng Chien and Hongyi Fan and Ahmad Abdelfattah and Elias P. Tsigaridas and Stanimire Tomov and Benjamin B. Kimia}, title = {GPU-Based Homotopy Continuation for Minimal Problems in Computer Vision}, journal = {CoRR}, volume = {abs/2112.03444}, year = {2021}, url = {https://arxiv.org/abs/2112.03444}, eprinttype = {arXiv}, eprint = {2112.03444}, timestamp = {Mon, 13 Dec 2021 00:00:00 +0100}, biburl = {https://dblp.org/rec/journals/corr/abs-2112-03444.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/ijhpca/FarhanATGSHRD20, author = {Mohammed A. Al Farhan and Ahmad Abdelfattah and Stanimire Tomov and Mark Gates and Dalal Sukkari and Azzam Haidar and Robert Rosenberg and Jack J. Dongarra}, title = {{MAGMA} templates for scalable linear algebra on emerging architectures}, journal = {Int. J. High Perform. Comput. Appl.}, volume = {34}, number = {6}, year = {2020}, url = {https://doi.org/10.1177/1094342020938421}, doi = {10.1177/1094342020938421}, timestamp = {Sun, 02 Oct 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/ijhpca/FarhanATGSHRD20.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/jpdc/AbdelfattahTD20, author = {Ahmad Abdelfattah and Stanimire Tomov and Jack J. Dongarra}, title = {Matrix multiplication on batches of small matrices in half and half-complex precisions}, journal = {J. Parallel Distributed Comput.}, volume = {145}, pages = {188--201}, year = {2020}, url = {https://doi.org/10.1016/j.jpdc.2020.07.001}, doi = {10.1016/J.JPDC.2020.07.001}, timestamp = {Mon, 28 Aug 2023 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/jpdc/AbdelfattahTD20.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/hpec/BrownATD20, author = {Cade Brown and Ahmad Abdelfattah and Stanimire Tomov and Jack J. Dongarra}, title = {Design, Optimization, and Benchmarking of Dense Linear Algebra Algorithms on {AMD} GPUs}, booktitle = {2020 {IEEE} High Performance Extreme Computing Conference, {HPEC} 2020, Waltham, MA, USA, September 22-24, 2020}, pages = {1--7}, publisher = {{IEEE}}, year = {2020}, url = {https://doi.org/10.1109/HPEC43674.2020.9286214}, doi = {10.1109/HPEC43674.2020.9286214}, timestamp = {Mon, 03 Jan 2022 00:00:00 +0100}, biburl = {https://dblp.org/rec/conf/hpec/BrownATD20.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/iccS/AbdelfattahTD20, author = {Ahmad Abdelfattah and Stan Tomov and Jack J. Dongarra}, editor = {Valeria V. Krzhizhanovskaya and G{\'{a}}bor Z{\'{a}}vodszky and Michael Harold Lees and Jack J. Dongarra and Peter M. A. Sloot and S{\'{e}}rgio Brissos and Jo{\~{a}}o Teixeira}, title = {Investigating the Benefit of FP16-Enabled Mixed-Precision Solvers for Symmetric Positive Definite Matrices Using GPUs}, booktitle = {Computational Science - {ICCS} 2020 - 20th International Conference, Amsterdam, The Netherlands, June 3-5, 2020, Proceedings, Part {II}}, series = {Lecture Notes in Computer Science}, volume = {12138}, pages = {237--250}, publisher = {Springer}, year = {2020}, url = {https://doi.org/10.1007/978-3-030-50417-5\_18}, doi = {10.1007/978-3-030-50417-5\_18}, timestamp = {Mon, 03 Jan 2022 00:00:00 +0100}, biburl = {https://dblp.org/rec/conf/iccS/AbdelfattahTD20.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/pmbs-ws/AnztTACD20, author = {Hartwig Anzt and Yuhsiang M. Tsai and Ahmad Abdelfattah and Terry Cojean and Jack J. Dongarra}, title = {Evaluating the Performance of NVIDIA's {A100} Ampere {GPU} for Sparse and Batched Computations}, booktitle = {2020 {IEEE/ACM} Performance Modeling, Benchmarking and Simulation of High Performance Computer Systems, PMBS@SC 2020, Atlanta, GA, USA, November 12, 2020}, pages = {26--38}, publisher = {{IEEE}}, year = {2020}, url = {https://doi.org/10.1109/PMBS51919.2020.00009}, doi = {10.1109/PMBS51919.2020.00009}, timestamp = {Mon, 26 Jun 2023 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/pmbs-ws/AnztTACD20.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/scala-ws/BeamsATDKD20, author = {Natalie Beams and Ahmad Abdelfattah and Stan Tomov and Jack J. Dongarra and Tzanio V. Kolev and Yohann Dudouit}, title = {High-Order Finite Element Method using Standard and Device-Level Batch {GEMM} on GPUs}, booktitle = {11th {IEEE/ACM} Workshop on Latest Advances in Scalable Algorithms for Large-Scale Systems, ScalA@SC 2020, Atlanta, GA, USA, November 13, 2020}, pages = {53--60}, publisher = {{IEEE}}, year = {2020}, url = {https://doi.org/10.1109/ScalA51936.2020.00012}, doi = {10.1109/SCALA51936.2020.00012}, timestamp = {Mon, 26 Jun 2023 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/scala-ws/BeamsATDKD20.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-2007-06674, author = {Ahmad Abdelfattah and Hartwig Anzt and Erik G. Boman and Erin C. Carson and Terry Cojean and Jack J. Dongarra and Mark Gates and Thomas Gr{\"{u}}tzmacher and Nicholas J. Higham and Xiaoye Sherry Li and Neil Lindquist and Yang Liu and Jennifer A. Loe and Piotr Luszczek and Pratik Nayak and Srikara Pranesh and Sivasankaran Rajamanickam and Tobias Ribizel and Barry Smith and Kasia Swirydowicz and Stephen J. Thomas and Stanimire Tomov and Yaohung M. Tsai and Ichitaro Yamazaki and Ulrike Meier Yang}, title = {A Survey of Numerical Methods Utilizing Mixed Precision Arithmetic}, journal = {CoRR}, volume = {abs/2007.06674}, year = {2020}, url = {https://arxiv.org/abs/2007.06674}, eprinttype = {arXiv}, eprint = {2007.06674}, timestamp = {Mon, 29 Mar 2021 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-2007-06674.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/pc/MasliahAHTBFD19, author = {Ian Masliah and Ahmad Abdelfattah and Azzam Haidar and Stanimire Tomov and Marc Baboulin and Jo{\"{e}}l Falcou and Jack J. Dongarra}, title = {Algorithms and optimization techniques for high-performance matrix-matrix multiplications of very small matrices}, journal = {Parallel Comput.}, volume = {81}, pages = {1--21}, year = {2019}, url = {https://doi.org/10.1016/j.parco.2018.10.003}, doi = {10.1016/J.PARCO.2018.10.003}, timestamp = {Mon, 28 Aug 2023 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/pc/MasliahAHTBFD19.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/hpec/AbdelfattahTD19, author = {Ahmad Abdelfattah and Stanimire Tomov and Jack J. Dongarra}, title = {Progressive Optimization of Batched {LU} Factorization on GPUs}, booktitle = {2019 {IEEE} High Performance Extreme Computing Conference, {HPEC} 2019, Waltham, MA, USA, September 24-26, 2019}, pages = {1--6}, publisher = {{IEEE}}, year = {2019}, url = {https://doi.org/10.1109/HPEC.2019.8916270}, doi = {10.1109/HPEC.2019.8916270}, timestamp = {Mon, 03 Jan 2022 00:00:00 +0100}, biburl = {https://dblp.org/rec/conf/hpec/AbdelfattahTD19.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/icpp/KurzakTGAD19, author = {Jakub Kurzak and Yaohung M. Tsai and Mark Gates and Ahmad Abdelfattah and Jack J. Dongarra}, title = {Massively Parallel Automated Software Tuning}, booktitle = {Proceedings of the 48th International Conference on Parallel Processing, {ICPP} 2019, Kyoto, Japan, August 05-08, 2019}, pages = {92:1--92:10}, publisher = {{ACM}}, year = {2019}, url = {https://doi.org/10.1145/3337821.3337908}, doi = {10.1145/3337821.3337908}, timestamp = {Tue, 29 Dec 2020 00:00:00 +0100}, biburl = {https://dblp.org/rec/conf/icpp/KurzakTGAD19.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/ipps/AbdelfattahTD19, author = {Ahmad Abdelfattah and Stanimire Tomov and Jack J. Dongarra}, title = {Fast Batched Matrix Multiplication for Small Sizes Using Half-Precision Arithmetic on GPUs}, booktitle = {2019 {IEEE} International Parallel and Distributed Processing Symposium, {IPDPS} 2019, Rio de Janeiro, Brazil, May 20-24, 2019}, pages = {111--122}, publisher = {{IEEE}}, year = {2019}, url = {https://doi.org/10.1109/IPDPS.2019.00022}, doi = {10.1109/IPDPS.2019.00022}, timestamp = {Mon, 03 Jan 2022 00:00:00 +0100}, biburl = {https://dblp.org/rec/conf/ipps/AbdelfattahTD19.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/sc/AbdelfattahTD19, author = {Ahmad Abdelfattah and Stanimire Tomov and Jack J. Dongarra}, title = {Towards Half-Precision Computation for Complex Matrices: {A} Case Study for Mixed Precision Solvers on GPUs}, booktitle = {10th {IEEE/ACM} Workshop on Latest Advances in Scalable Algorithms for Large-Scale Systems, ScalA@SC 2019, Denver, CO, USA, November 18, 2019}, pages = {17--24}, publisher = {{IEEE}}, year = {2019}, url = {https://doi.org/10.1109/ScalA49573.2019.00008}, doi = {10.1109/SCALA49573.2019.00008}, timestamp = {Mon, 03 Jan 2022 00:00:00 +0100}, biburl = {https://dblp.org/rec/conf/sc/AbdelfattahTD19.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/jocs/AbdelfattahHTD18, author = {Ahmad Abdelfattah and Azzam Haidar and Stanimire Tomov and Jack J. Dongarra}, title = {Batched one-sided factorizations of tiny matrices using GPUs: Challenges and countermeasures}, journal = {J. Comput. Sci.}, volume = {26}, pages = {226--236}, year = {2018}, url = {https://doi.org/10.1016/j.jocs.2018.01.005}, doi = {10.1016/J.JOCS.2018.01.005}, timestamp = {Mon, 28 Aug 2023 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/jocs/AbdelfattahHTD18.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/tpds/HaidarAZTD18, author = {Azzam Haidar and Ahmad Abdelfattah and Mawussi Zounon and Stanimire Tomov and Jack J. Dongarra}, title = {A Guide for Achieving High Performance with Very Small Matrices on {GPU:} {A} Case Study of Batched {LU} and Cholesky Factorizations}, journal = {{IEEE} Trans. Parallel Distributed Syst.}, volume = {29}, number = {5}, pages = {973--984}, year = {2018}, url = {https://doi.org/10.1109/TPDS.2017.2783929}, doi = {10.1109/TPDS.2017.2783929}, timestamp = {Mon, 28 Aug 2023 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/tpds/HaidarAZTD18.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/tpds/AbdelfattahHTD18, author = {Ahmad Abdelfattah and Azzam Haidar and Stanimire Tomov and Jack J. Dongarra}, title = {Analysis and Design Techniques towards High-Performance and Energy-Efficient Dense Linear Solvers on GPUs}, journal = {{IEEE} Trans. Parallel Distributed Syst.}, volume = {29}, number = {12}, pages = {2700--2712}, year = {2018}, url = {https://doi.org/10.1109/TPDS.2018.2842785}, doi = {10.1109/TPDS.2018.2842785}, timestamp = {Mon, 03 Jan 2022 00:00:00 +0100}, biburl = {https://dblp.org/rec/journals/tpds/AbdelfattahHTD18.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/hpec/AbdelfattahHTD18, author = {Ahmad Abdelfattah and Azzam Haidar and Stanimire Tomov and Jack J. Dongarra}, title = {Optimizing {GPU} Kernels for Irregular Batch Workloads: {A} Case Study for Cholesky Factorization}, booktitle = {2018 {IEEE} High Performance Extreme Computing Conference, {HPEC} 2018, Waltham, MA, USA, September 25-27, 2018}, pages = {1--7}, publisher = {{IEEE}}, year = {2018}, url = {https://doi.org/10.1109/HPEC.2018.8547576}, doi = {10.1109/HPEC.2018.8547576}, timestamp = {Mon, 03 Jan 2022 00:00:00 +0100}, biburl = {https://dblp.org/rec/conf/hpec/AbdelfattahHTD18.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/iccS/HaidarAZWPTD18, author = {Azzam Haidar and Ahmad Abdelfattah and Mawussi Zounon and Panruo Wu and Srikara Pranesh and Stanimire Tomov and Jack J. Dongarra}, editor = {Yong Shi and Haohuan Fu and Yingjie Tian and Valeria V. Krzhizhanovskaya and Michael Harold Lees and Jack J. Dongarra and Peter M. A. Sloot}, title = {The Design of Fast and Energy-Efficient Linear Solvers: On the Potential of Half-Precision Arithmetic and Iterative Refinement Techniques}, booktitle = {Computational Science - {ICCS} 2018 - 18th International Conference, Wuxi, China, June 11-13, 2018, Proceedings, Part {I}}, series = {Lecture Notes in Computer Science}, volume = {10860}, pages = {586--600}, publisher = {Springer}, year = {2018}, url = {https://doi.org/10.1007/978-3-319-93698-7\_45}, doi = {10.1007/978-3-319-93698-7\_45}, timestamp = {Mon, 08 May 2023 14:38:37 +0200}, biburl = {https://dblp.org/rec/conf/iccS/HaidarAZWPTD18.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/ipps/YamazakiAIOTYD18, author = {Ichitaro Yamazaki and Ahmad Abdelfattah and Akihiro Ida and Satoshi Ohshima and Stanimire Tomov and Rio Yokota and Jack J. Dongarra}, title = {Performance of Hierarchical-matrix BiCGStab Solver on {GPU} Clusters}, booktitle = {2018 {IEEE} International Parallel and Distributed Processing Symposium, {IPDPS} 2018, Vancouver, BC, Canada, May 21-25, 2018}, pages = {930--939}, publisher = {{IEEE} Computer Society}, year = {2018}, url = {https://doi.org/10.1109/IPDPS.2018.00102}, doi = {10.1109/IPDPS.2018.00102}, timestamp = {Fri, 24 Mar 2023 00:00:00 +0100}, biburl = {https://dblp.org/rec/conf/ipps/YamazakiAIOTYD18.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/cse/DongarraTLKGYAH17, author = {Jack J. Dongarra and Stanimire Tomov and Piotr Luszczek and Jakub Kurzak and Mark Gates and Ichitaro Yamazaki and Hartwig Anzt and Azzam Haidar and Ahmad Abdelfattah}, title = {With Extreme Computing, the Rules Have Changed}, journal = {Comput. Sci. Eng.}, volume = {19}, number = {3}, pages = {52--62}, year = {2017}, url = {https://doi.org/10.1109/MCSE.2017.48}, doi = {10.1109/MCSE.2017.48}, timestamp = {Tue, 16 Aug 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/cse/DongarraTLKGYAH17.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/jocs/AbdelfattahHTD17, author = {Ahmad Abdelfattah and Azzam Haidar and Stanimire Tomov and Jack J. Dongarra}, title = {Fast Cholesky factorization on GPUs for batch and native modes in {MAGMA}}, journal = {J. Comput. Sci.}, volume = {20}, pages = {85--93}, year = {2017}, url = {https://doi.org/10.1016/j.jocs.2016.12.009}, doi = {10.1016/J.JOCS.2016.12.009}, timestamp = {Mon, 03 Jan 2022 00:00:00 +0100}, biburl = {https://dblp.org/rec/journals/jocs/AbdelfattahHTD17.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/iccS/AbdelfattahHTD17, author = {Ahmad Abdelfattah and Azzam Haidar and Stanimire Tomov and Jack J. Dongarra}, editor = {Petros Koumoutsakos and Michael Lees and Valeria V. Krzhizhanovskaya and Jack J. Dongarra and Peter M. A. Sloot}, title = {Factorization and Inversion of a Million Matrices using GPUs: Challenges and Countermeasures}, booktitle = {International Conference on Computational Science, {ICCS} 2017, 12-14 June 2017, Zurich, Switzerland}, series = {Procedia Computer Science}, volume = {108}, pages = {606--615}, publisher = {Elsevier}, year = {2017}, url = {https://doi.org/10.1016/j.procs.2017.05.250}, doi = {10.1016/J.PROCS.2017.05.250}, timestamp = {Mon, 03 Jan 2022 00:00:00 +0100}, biburl = {https://dblp.org/rec/conf/iccS/AbdelfattahHTD17.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/ics/AbdelfattahHTD17, author = {Ahmad Abdelfattah and Azzam Haidar and Stanimire Tomov and Jack J. Dongarra}, editor = {William D. Gropp and Pete Beckman and Zhiyuan Li and Francisco J. Cazorla}, title = {Novel {HPC} techniques to batch execution of many variable size {BLAS} computations on GPUs}, booktitle = {Proceedings of the International Conference on Supercomputing, {ICS} 2017, Chicago, IL, USA, June 14-16, 2017}, pages = {5:1--5:10}, publisher = {{ACM}}, year = {2017}, url = {https://doi.org/10.1145/3079079.3079103}, doi = {10.1145/3079079.3079103}, timestamp = {Mon, 03 Jan 2022 00:00:00 +0100}, biburl = {https://dblp.org/rec/conf/ics/AbdelfattahHTD17.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/ppopp/HaidarATD17, author = {Azzam Haidar and Ahmad Abdelfattah and Stanimire Tomov and Jack J. Dongarra}, title = {High-performance Cholesky factorization for GPU-only execution}, booktitle = {Proceedings of the General Purpose GPUs, GPGPU@PPoPP, Austin, TX, USA, February 4-8, 2017}, pages = {42--52}, publisher = {{ACM}}, year = {2017}, url = {https://doi.org/10.1145/3038228.3038237}, doi = {10.1145/3038228.3038237}, timestamp = {Sun, 12 Jun 2022 19:46:08 +0200}, biburl = {https://dblp.org/rec/conf/ppopp/HaidarATD17.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/actanum/AbdelfattahADGH16, author = {Ahmad Abdelfattah and Hartwig Anzt and Jack J. Dongarra and Mark Gates and Azzam Haidar and Jakub Kurzak and Piotr Luszczek and Stanimire Tomov and Ichitaro Yamazaki and Asim YarKhan}, title = {Linear algebra software for large-scale accelerated multicore computing}, journal = {Acta Numer.}, volume = {25}, pages = {1--160}, year = {2016}, url = {https://doi.org/10.1017/S0962492916000015}, doi = {10.1017/S0962492916000015}, timestamp = {Mon, 03 Jan 2022 00:00:00 +0100}, biburl = {https://dblp.org/rec/journals/actanum/AbdelfattahADGH16.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/concurrency/AbdelfattahLKD16, author = {Ahmad Abdelfattah and Hatem Ltaief and David E. Keyes and Jack J. Dongarra}, title = {Performance optimization of Sparse Matrix-Vector Multiplication for multi-component PDE-based applications using GPUs}, journal = {Concurr. Comput. Pract. Exp.}, volume = {28}, number = {12}, pages = {3447--3465}, year = {2016}, url = {https://doi.org/10.1002/cpe.3874}, doi = {10.1002/CPE.3874}, timestamp = {Mon, 02 Mar 2020 00:00:00 +0100}, biburl = {https://dblp.org/rec/journals/concurrency/AbdelfattahLKD16.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/toms/AbdelfattahKL16, author = {Ahmad Abdelfattah and David E. Keyes and Hatem Ltaief}, title = {{KBLAS:} An Optimized Library for Dense Matrix-Vector Multiplication on {GPU} Accelerators}, journal = {{ACM} Trans. Math. Softw.}, volume = {42}, number = {3}, pages = {18:1--18:31}, year = {2016}, url = {https://doi.org/10.1145/2818311}, doi = {10.1145/2818311}, timestamp = {Tue, 16 Aug 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/toms/AbdelfattahKL16.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/europar/MasliahAHTBFD16, author = {Ian Masliah and Ahmad Abdelfattah and Azzam Haidar and Stanimire Tomov and Marc Baboulin and Jo{\"{e}}l Falcou and Jack J. Dongarra}, editor = {Pierre{-}Fran{\c{c}}ois Dutot and Denis Trystram}, title = {High-Performance Matrix-Matrix Multiplications of Very Small Matrices}, booktitle = {Euro-Par 2016: Parallel Processing - 22nd International Conference on Parallel and Distributed Computing, Grenoble, France, August 24-26, 2016, Proceedings}, series = {Lecture Notes in Computer Science}, volume = {9833}, pages = {659--671}, publisher = {Springer}, year = {2016}, url = {https://doi.org/10.1007/978-3-319-43659-3\_48}, doi = {10.1007/978-3-319-43659-3\_48}, timestamp = {Mon, 03 Jan 2022 00:00:00 +0100}, biburl = {https://dblp.org/rec/conf/europar/MasliahAHTBFD16.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/iccS/AbdelfattahBDDE16, author = {Ahmad Abdelfattah and Marc Baboulin and Veselin Dobrev and Jack J. Dongarra and Christopher W. Earl and Joel Falcou and Azzam Haidar and Ian Karlin and Tzanio V. Kolev and Ian Masliah and Stanimire Tomov}, editor = {Michelle Connolly}, title = {High-Performance Tensor Contractions for GPUs}, booktitle = {International Conference on Computational Science 2016, {ICCS} 2016, 6-8 June 2016, San Diego, California, {USA}}, series = {Procedia Computer Science}, volume = {80}, pages = {108--118}, publisher = {Elsevier}, year = {2016}, url = {https://doi.org/10.1016/j.procs.2016.05.302}, doi = {10.1016/J.PROCS.2016.05.302}, timestamp = {Mon, 03 Jan 2022 00:00:00 +0100}, biburl = {https://dblp.org/rec/conf/iccS/AbdelfattahBDDE16.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/iccS/AbdelfattahHTD16, author = {Ahmad Abdelfattah and Azzam Haidar and Stanimire Tomov and Jack J. Dongarra}, editor = {Michelle Connolly}, title = {Performance Tuning and Optimization Techniques of Fixed and Variable Size Batched Cholesky Factorization on GPUs}, booktitle = {International Conference on Computational Science 2016, {ICCS} 2016, 6-8 June 2016, San Diego, California, {USA}}, series = {Procedia Computer Science}, volume = {80}, pages = {119--130}, publisher = {Elsevier}, year = {2016}, url = {https://doi.org/10.1016/j.procs.2016.05.303}, doi = {10.1016/J.PROCS.2016.05.303}, timestamp = {Mon, 03 Jan 2022 00:00:00 +0100}, biburl = {https://dblp.org/rec/conf/iccS/AbdelfattahHTD16.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/ipps/AbdelfattahHTD16, author = {Ahmad Abdelfattah and Azzam Haidar and Stanimire Tomov and Jack J. Dongarra}, title = {On the Development of Variable Size Batched Computation for Heterogeneous Parallel Architectures}, booktitle = {2016 {IEEE} International Parallel and Distributed Processing Symposium Workshops, {IPDPS} Workshops 2016, Chicago, IL, USA, May 23-27, 2016}, pages = {1249--1258}, publisher = {{IEEE} Computer Society}, year = {2016}, url = {https://doi.org/10.1109/IPDPSW.2016.190}, doi = {10.1109/IPDPSW.2016.190}, timestamp = {Fri, 24 Mar 2023 00:00:00 +0100}, biburl = {https://dblp.org/rec/conf/ipps/AbdelfattahHTD16.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/supercomputer/AbdelfattahHTD16, author = {Ahmad Abdelfattah and Azzam Haidar and Stanimire Tomov and Jack J. Dongarra}, editor = {Julian M. Kunkel and Pavan Balaji and Jack J. Dongarra}, title = {Performance, Design, and Autotuning of Batched {GEMM} for GPUs}, booktitle = {High Performance Computing - 31st International Conference, {ISC} High Performance 2016, Frankfurt, Germany, June 19-23, 2016, Proceedings}, series = {Lecture Notes in Computer Science}, volume = {9697}, pages = {21--38}, publisher = {Springer}, year = {2016}, url = {https://doi.org/10.1007/978-3-319-41321-1\_2}, doi = {10.1007/978-3-319-41321-1\_2}, timestamp = {Mon, 03 Jan 2022 00:00:00 +0100}, biburl = {https://dblp.org/rec/conf/supercomputer/AbdelfattahHTD16.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@phdthesis{DBLP:phd/basesearch/Abdelfattah15, author = {Ahmad Abdelfattah}, title = {Accelerating Scientific Applications using High Performance Dense and Sparse Linear Algebra Kernels on GPUs}, school = {King Abdullah University of Science and Technology, Thuwal, Saudi Arabia}, year = {2015}, url = {http://hdl.handle.net/10754/346955}, timestamp = {Sat, 05 Nov 2022 00:00:00 +0100}, biburl = {https://dblp.org/rec/phd/basesearch/Abdelfattah15.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/superfri/DongarraAAGHKLT15, author = {Jack J. Dongarra and Maksims Abalenkovs and Ahmad Abdelfattah and Mark Gates and Azzam Haidar and Jakub Kurzak and Piotr Luszczek and Stanimire Tomov and Ichitaro Yamazaki and Asim YarKhan}, title = {Parallel Programming Models for Dense Linear Algebra on Heterogeneous Systems}, journal = {Supercomput. Front. Innov.}, volume = {2}, number = {4}, pages = {67--86}, year = {2015}, url = {https://doi.org/10.14529/jsfi150405}, doi = {10.14529/JSFI150405}, timestamp = {Mon, 28 Aug 2023 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/superfri/DongarraAAGHKLT15.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/europar/AbdelfattahLK15, author = {Ahmad Abdelfattah and Hatem Ltaief and David E. Keyes}, editor = {Jesper Larsson Tr{\"{a}}ff and Sascha Hunold and Francesco Versaci}, title = {High Performance Multi-GPU SpMV for Multi-component PDE-Based Applications}, booktitle = {Euro-Par 2015: Parallel Processing - 21st International Conference on Parallel and Distributed Computing, Vienna, Austria, August 24-28, 2015, Proceedings}, series = {Lecture Notes in Computer Science}, volume = {9233}, pages = {601--612}, publisher = {Springer}, year = {2015}, url = {https://doi.org/10.1007/978-3-662-48096-0\_46}, doi = {10.1007/978-3-662-48096-0\_46}, timestamp = {Sun, 12 Nov 2023 02:07:45 +0100}, biburl = {https://dblp.org/rec/conf/europar/AbdelfattahLK15.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/europar/AbdelfattahGGKLSV14, author = {Ahmad Abdelfattah and Eric Gendron and Damien Gratadour and David E. Keyes and Hatem Ltaief and Arnaud Sevin and Fabrice Vidal}, editor = {Fernando M. A. Silva and In{\^{e}}s de Castro Dutra and V{\'{\i}}tor Santos Costa}, title = {High Performance Pseudo-analytical Simulation of Multi-Object Adaptive Optics over Multi-GPU Systems}, booktitle = {Euro-Par 2014 Parallel Processing - 20th International Conference, Porto, Portugal, August 25-29, 2014. Proceedings}, series = {Lecture Notes in Computer Science}, volume = {8632}, pages = {704--715}, publisher = {Springer}, year = {2014}, url = {https://doi.org/10.1007/978-3-319-09873-9\_59}, doi = {10.1007/978-3-319-09873-9\_59}, timestamp = {Sun, 02 Oct 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/europar/AbdelfattahGGKLSV14.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/sc/ChararaLGKSAGMV14, author = {Ali Charara and Hatem Ltaief and Damien Gratadour and David E. Keyes and Arnaud Sevin and Ahmad Abdelfattah and Eric Gendron and Carine Morel and Fabrice Vidal}, editor = {Trish Damkroger and Jack J. Dongarra}, title = {Pipelining Computational Stages of the Tomographic Reconstructor for Multi-Object Adaptive Optics on a Multi-GPU System}, booktitle = {International Conference for High Performance Computing, Networking, Storage and Analysis, {SC} 2014, New Orleans, LA, USA, November 16-21, 2014}, pages = {262--273}, publisher = {{IEEE} Computer Society}, year = {2014}, url = {https://doi.org/10.1109/SC.2014.27}, doi = {10.1109/SC.2014.27}, timestamp = {Fri, 24 Mar 2023 00:00:00 +0100}, biburl = {https://dblp.org/rec/conf/sc/ChararaLGKSAGMV14.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/AbdelfattahKL14, author = {Ahmad Abdelfattah and David E. Keyes and Hatem Ltaief}, title = {{KBLAS:} An Optimized Library for Dense Matrix-Vector Multiplication on {GPU} Accelerators}, journal = {CoRR}, volume = {abs/1410.1726}, year = {2014}, url = {http://arxiv.org/abs/1410.1726}, eprinttype = {arXiv}, eprint = {1410.1726}, timestamp = {Mon, 05 Feb 2024 00:00:00 +0100}, biburl = {https://dblp.org/rec/journals/corr/AbdelfattahKL14.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/europar/AbdelfattahKL12, author = {Ahmad Abdelfattah and David E. Keyes and Hatem Ltaief}, editor = {Ioannis Caragiannis and Michael Alexander and Rosa M. Badia and Mario Cannataro and Alexandru Costan and Marco Danelutto and Fr{\'{e}}d{\'{e}}ric Desprez and Bettina Krammer and Julio Sahuquillo and Stephen L. Scott and Josef Weidendorfer}, title = {Systematic Approach in Optimizing Numerical Memory-Bound Kernels on {GPU}}, booktitle = {Euro-Par 2012: Parallel Processing Workshops - BDMC, CGWS, HeteroPar, HiBB, OMHI, Paraphrase, PROPER, Resilience, UCHPC, VHPC, Rhodes Islands, Greece, August 27-31, 2012. Revised Selected Papers}, series = {Lecture Notes in Computer Science}, volume = {7640}, pages = {207--216}, publisher = {Springer}, year = {2012}, url = {https://doi.org/10.1007/978-3-642-36949-0\_23}, doi = {10.1007/978-3-642-36949-0\_23}, timestamp = {Wed, 19 Feb 2020 14:52:57 +0100}, biburl = {https://dblp.org/rec/conf/europar/AbdelfattahKL12.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/vecpar/AbdelfattahDKL12, author = {Ahmad Abdelfattah and Jack J. Dongarra and David E. Keyes and Hatem Ltaief}, editor = {Michel J. Dayd{\'{e}} and Osni Marques and Kengo Nakajima}, title = {Optimizing Memory-Bound {SYMV} Kernel on {GPU} Hardware Accelerators}, booktitle = {High Performance Computing for Computational Science - {VECPAR} 2012, 10th International Conference, Kobe, Japan, July 17-20, 2012, Revised Selected Papers}, series = {Lecture Notes in Computer Science}, volume = {7851}, pages = {72--79}, publisher = {Springer}, year = {2012}, url = {https://doi.org/10.1007/978-3-642-38718-0\_10}, doi = {10.1007/978-3-642-38718-0\_10}, timestamp = {Tue, 14 May 2019 10:00:36 +0200}, biburl = {https://dblp.org/rec/conf/vecpar/AbdelfattahDKL12.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
manage site settings
To protect your privacy, all features that rely on external API calls from your browser are turned off by default. You need to opt-in for them to become active. All settings here will be stored as cookies with your web browser. For more information see our F.A.Q.