Stop the war!

Остановите войну!

for scientists:

default search action

combined dblp search
author search
venue search
publication search

ask others

BibTeX records: Sheng Li 0010

Name: dblp XML data dump
Creator: Schloss Dagstuhl - Leibniz Center for Informatics
Published: 1993
License: https://creativecommons.org/publicdomain/zero/1.0/
Keywords: dblp, XML, computer science, scholarly publications, metadata

> Home > Persons > Sheng Li 0010

download as .bib file

@article{DBLP:journals/corr/abs-2401-13249,
  author       = {Wangjin Zhou and
                  Zhengdong Yang and
                  Chenhui Chu and
                  Sheng Li and
                  Raj Dabre and
                  Yi Zhao and
                  Tatsuya Kawahara},
  title        = {{MOS-FAD:} Improving Fake Audio Detection Via Automatic Mean Opinion
                  Score Prediction},
  journal      = {CoRR},
  volume       = {abs/2401.13249},
  year         = {2024},
  url          = {https://doi.org/10.48550/arXiv.2401.13249},
  doi          = {10.48550/ARXIV.2401.13249},
  eprinttype    = {arXiv},
  eprint       = {2401.13249},
  timestamp    = {Tue, 26 Mar 2024 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2401-13249.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{DBLP:journals/jclc/KakLCK23,
  author       = {Soky Kak and
                  Sheng Li and
                  Chenhui Chu and
                  Tatsuya Kawahara},
  title        = {Finetuning Pretrained Model with Embedding of Domain and Language
                  Information for {ASR} of Very Low-Resource Settings},
  journal      = {Int. J. Asian Lang. Process.},
  volume       = {33},
  number       = {4},
  pages        = {2350024:1--2350024:17},
  year         = {2023},
  url          = {https://doi.org/10.1142/S2717554523500248},
  doi          = {10.1142/S2717554523500248},
  timestamp    = {Mon, 01 Apr 2024 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/jclc/KakLCK23.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{DBLP:journals/speech/LinDWLD23,
  author       = {Yuqin Lin and
                  Jianwu Dang and
                  Longbiao Wang and
                  Sheng Li and
                  Chenchen Ding},
  title        = {Disordered speech recognition considering low resources and abnormal
                  articulation},
  journal      = {Speech Commun.},
  volume       = {155},
  pages        = {103002},
  year         = {2023},
  url          = {https://doi.org/10.1016/j.specom.2023.103002},
  doi          = {10.1016/J.SPECOM.2023.103002},
  timestamp    = {Mon, 05 Feb 2024 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/speech/LinDWLD23.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{DBLP:conf/acl/ShimizuC0K23,
  author       = {Shuichiro Shimizu and
                  Chenhui Chu and
                  Sheng Li and
                  Sadao Kurohashi},
  editor       = {Anna Rogers and
                  Jordan L. Boyd{-}Graber and
                  Naoaki Okazaki},
  title        = {Towards Speech Dialogue Translation Mediating Speakers of Different
                  Languages},
  booktitle    = {Findings of the Association for Computational Linguistics: {ACL} 2023,
                  Toronto, Canada, July 9-14, 2023},
  pages        = {1122--1134},
  publisher    = {Association for Computational Linguistics},
  year         = {2023},
  url          = {https://doi.org/10.18653/v1/2023.findings-acl.72},
  doi          = {10.18653/V1/2023.FINDINGS-ACL.72},
  timestamp    = {Sat, 30 Sep 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/acl/ShimizuC0K23.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{DBLP:conf/acl/YangLLS23,
  author       = {Longfei Yang and
                  Jiyi Li and
                  Sheng Li and
                  Takahiro Shinozaki},
  editor       = {Anna Rogers and
                  Jordan L. Boyd{-}Graber and
                  Naoaki Okazaki},
  title        = {Multi-Domain Dialogue State Tracking with Disentangled Domain-Slot
                  Attention},
  booktitle    = {Findings of the Association for Computational Linguistics: {ACL} 2023,
                  Toronto, Canada, July 9-14, 2023},
  pages        = {4928--4938},
  publisher    = {Association for Computational Linguistics},
  year         = {2023},
  url          = {https://doi.org/10.18653/v1/2023.findings-acl.304},
  doi          = {10.18653/V1/2023.FINDINGS-ACL.304},
  timestamp    = {Sat, 30 Sep 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/acl/YangLLS23.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{DBLP:conf/asru/QiHZLWLX23,
  author       = {Zili Qi and
                  Xinhui Hu and
                  Wangjin Zhou and
                  Sheng Li and
                  Hao Wu and
                  Jian Lu and
                  Xinkang Xu},
  title        = {{LE-SSL-MOS:} Self-Supervised Learning {MOS} Prediction with Listener
                  Enhancement},
  booktitle    = {{IEEE} Automatic Speech Recognition and Understanding Workshop, {ASRU}
                  2023, Taipei, Taiwan, December 16-20, 2023},
  pages        = {1--6},
  publisher    = {{IEEE}},
  year         = {2023},
  url          = {https://doi.org/10.1109/ASRU57964.2023.10389788},
  doi          = {10.1109/ASRU57964.2023.10389788},
  timestamp    = {Fri, 22 Mar 2024 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/asru/QiHZLWLX23.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{DBLP:conf/asru/WeiYGLCOL23,
  author       = {Wenqing Wei and
                  Zhengdong Yang and
                  Yuan Gao and
                  Jiyi Li and
                  Chenhui Chu and
                  Shogo Okada and
                  Sheng Li},
  title        = {FedCPC: An Effective Federated Contrastive Learning Method for Privacy
                  Preserving Early-Stage Alzheimers Speech Detection},
  booktitle    = {{IEEE} Automatic Speech Recognition and Understanding Workshop, {ASRU}
                  2023, Taipei, Taiwan, December 16-20, 2023},
  pages        = {1--6},
  publisher    = {{IEEE}},
  year         = {2023},
  url          = {https://doi.org/10.1109/ASRU57964.2023.10389690},
  doi          = {10.1109/ASRU57964.2023.10389690},
  timestamp    = {Fri, 22 Mar 2024 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/asru/WeiYGLCOL23.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{DBLP:conf/icann/LiL23,
  author       = {Sheng Li and
                  Jiyi Li},
  editor       = {Lazaros Iliadis and
                  Antonios Papaleonidas and
                  Plamen P. Angelov and
                  Chrisina Jayne},
  title        = {Correction while Recognition: Combining Pretrained Language Model
                  for Taiwan-Accented Speech Recognition},
  booktitle    = {Artificial Neural Networks and Machine Learning - {ICANN} 2023 - 32nd
                  International Conference on Artificial Neural Networks, Heraklion,
                  Crete, Greece, September 26-29, 2023, Proceedings, Part {VII}},
  series       = {Lecture Notes in Computer Science},
  volume       = {14260},
  pages        = {389--400},
  publisher    = {Springer},
  year         = {2023},
  url          = {https://doi.org/10.1007/978-3-031-44195-0\_32},
  doi          = {10.1007/978-3-031-44195-0\_32},
  timestamp    = {Sat, 14 Oct 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/icann/LiL23.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{DBLP:conf/icassp/KakLCK23,
  author       = {Soky Kak and
                  Sheng Li and
                  Chenhui Chu and
                  Tatsuya Kawahara},
  title        = {Domain and Language Adaptation Using Heterogeneous Datasets for Wav2vec2.0-Based
                  Speech Recognition of Low-Resource Language},
  booktitle    = {{IEEE} International Conference on Acoustics, Speech and Signal Processing
                  {ICASSP} 2023, Rhodes Island, Greece, June 4-10, 2023},
  pages        = {1--5},
  publisher    = {{IEEE}},
  year         = {2023},
  url          = {https://doi.org/10.1109/ICASSP49357.2023.10095644},
  doi          = {10.1109/ICASSP49357.2023.10095644},
  timestamp    = {Fri, 10 Nov 2023 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/icassp/KakLCK23.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{DBLP:conf/icassp/KorvingLZSMB23,
  author       = {Helen Korving and
                  Sheng Li and
                  Di Zhou and
                  Paula Sophia Sterkenburg and
                  Panos Markopoulos and
                  Emilia I. Barakova},
  title        = {Development of a Pain Signaling System Using Machine Learning},
  booktitle    = {{IEEE} International Conference on Acoustics, Speech, and Signal Processing,
                  {ICASSP} 2023 - Workshops, Rhodes Island, Greece, June 4-10, 2023},
  pages        = {1--5},
  publisher    = {{IEEE}},
  year         = {2023},
  url          = {https://doi.org/10.1109/ICASSPW59220.2023.10193643},
  doi          = {10.1109/ICASSPW59220.2023.10193643},
  timestamp    = {Sun, 12 Nov 2023 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/icassp/KorvingLZSMB23.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{DBLP:conf/icassp/LiuGYYLDMHCCK23,
  author       = {Qianying Liu and
                  Zhuo Gong and
                  Zhengdong Yang and
                  Yuhang Yang and
                  Sheng Li and
                  Chenchen Ding and
                  Nobuaki Minematsu and
                  Hao Huang and
                  Fei Cheng and
                  Chenhui Chu and
                  Sadao Kurohashi},
  title        = {Hierarchical Softmax for End-To-End Low-Resource Multilingual Speech
                  Recognition},
  booktitle    = {{IEEE} International Conference on Acoustics, Speech and Signal Processing
                  {ICASSP} 2023, Rhodes Island, Greece, June 4-10, 2023},
  pages        = {1--5},
  publisher    = {{IEEE}},
  year         = {2023},
  url          = {https://doi.org/10.1109/ICASSP49357.2023.10095133},
  doi          = {10.1109/ICASSP49357.2023.10095133},
  timestamp    = {Fri, 10 Nov 2023 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/icassp/LiuGYYLDMHCCK23.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{DBLP:conf/icassp/TanCLY23,
  author       = {Chao Tan and
                  Yang Cao and
                  Sheng Li and
                  Masatoshi Yoshikawa},
  title        = {General or Specific? Investigating Effective Privacy Protection in
                  Federated Learning for Speech Emotion Recognition},
  booktitle    = {{IEEE} International Conference on Acoustics, Speech and Signal Processing
                  {ICASSP} 2023, Rhodes Island, Greece, June 4-10, 2023},
  pages        = {1--5},
  publisher    = {{IEEE}},
  year         = {2023},
  url          = {https://doi.org/10.1109/ICASSP49357.2023.10096844},
  doi          = {10.1109/ICASSP49357.2023.10096844},
  timestamp    = {Fri, 10 Nov 2023 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/icassp/TanCLY23.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{DBLP:conf/icassp/WangYHHL23,
  author       = {Kai Wang and
                  Yuhang Yang and
                  Hao Huang and
                  Ying Hu and
                  Sheng Li},
  title        = {Speakeraugment: Data Augmentation for Generalizable Source Separation
                  via Speaker Parameter Manipulation},
  booktitle    = {{IEEE} International Conference on Acoustics, Speech and Signal Processing
                  {ICASSP} 2023, Rhodes Island, Greece, June 4-10, 2023},
  pages        = {1--5},
  publisher    = {{IEEE}},
  year         = {2023},
  url          = {https://doi.org/10.1109/ICASSP49357.2023.10094767},
  doi          = {10.1109/ICASSP49357.2023.10094767},
  timestamp    = {Fri, 10 Nov 2023 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/icassp/WangYHHL23.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{DBLP:conf/icassp/YangXHCL23,
  author       = {Yuhang Yang and
                  Haihua Xu and
                  Hao Huang and
                  Eng Siong Chng and
                  Sheng Li},
  title        = {Speech-Text Based Multi-Modal Training with Bidirectional Attention
                  for Improved Speech Recognition},
  booktitle    = {{IEEE} International Conference on Acoustics, Speech and Signal Processing
                  {ICASSP} 2023, Rhodes Island, Greece, June 4-10, 2023},
  pages        = {1--5},
  publisher    = {{IEEE}},
  year         = {2023},
  url          = {https://doi.org/10.1109/ICASSP49357.2023.10096726},
  doi          = {10.1109/ICASSP49357.2023.10096726},
  timestamp    = {Fri, 10 Nov 2023 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/icassp/YangXHCL23.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{DBLP:conf/iwslt/YangSZ0C23,
  author       = {Zhengdong Yang and
                  Shuichiro Shimizu and
                  Wangjin Zhou and
                  Sheng Li and
                  Chenhui Chu},
  editor       = {Elizabeth Salesky and
                  Marcello Federico and
                  Marine Carpuat},
  title        = {The Kyoto Speech-to-Speech Translation System for {IWSLT} 2023},
  booktitle    = {Proceedings of the 20th International Conference on Spoken Language
                  Translation, IWSLT@ACL 2023, Toronto, Canada (in-person and online),
                  13-14 July, 2023},
  pages        = {357--362},
  publisher    = {Association for Computational Linguistics},
  year         = {2023},
  url          = {https://doi.org/10.18653/v1/2023.iwslt-1.33},
  doi          = {10.18653/V1/2023.IWSLT-1.33},
  timestamp    = {Sat, 30 Sep 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/iwslt/YangSZ0C23.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{DBLP:conf/mmasia/ZhouYLC23,
  author       = {Wangjin Zhou and
                  Zhengdong Yang and
                  Sheng Li and
                  Chenhui Chu},
  editor       = {Wen{-}Huang Cheng and
                  Wei{-}Ta Chu and
                  Min{-}Chun Hu and
                  Jiaying Liu and
                  Munchurl Kim and
                  Wei Zhang},
  title        = {KyotoMOS: An Automatic {MOS} Scoring System for Speech Synthesis},
  booktitle    = {{ACM} Multimedia Asia Workshops, MMAsia 2023, Tainan, Taiwan, December
                  6-8, 2023},
  pages        = {7:1--7:3},
  publisher    = {{ACM}},
  year         = {2023},
  url          = {https://doi.org/10.1145/3611380.3628562},
  doi          = {10.1145/3611380.3628562},
  timestamp    = {Sat, 13 Jan 2024 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/mmasia/ZhouYLC23.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{DBLP:conf/mmasia/Chen0LHCH23,
  author       = {Xiaojiao Chen and
                  Sheng Li and
                  Jiyi Li and
                  Hao Huang and
                  Yang Cao and
                  Liang He},
  editor       = {Wen{-}Huang Cheng and
                  Wei{-}Ta Chu and
                  Min{-}Chun Hu and
                  Jiaying Liu and
                  Munchurl Kim and
                  Wei Zhang},
  title        = {Reprogramming Self-supervised Learning-based Speech Representations
                  for Speaker Anonymization},
  booktitle    = {{ACM} Multimedia Asia 2023, MMAsia 2023, Tainan, Taiwan, December
                  6-8, 2023},
  pages        = {93:1--93:5},
  publisher    = {{ACM}},
  year         = {2023},
  url          = {https://doi.org/10.1145/3595916.3626366},
  doi          = {10.1145/3595916.3626366},
  timestamp    = {Mon, 05 Feb 2024 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/mmasia/Chen0LHCH23.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{DBLP:conf/mmasia/ChenLLCHH23,
  author       = {Xiaojiao Chen and
                  Sheng Li and
                  Jiyi Li and
                  Yang Cao and
                  Hao Huang and
                  Liang He},
  editor       = {Wen{-}Huang Cheng and
                  Wei{-}Ta Chu and
                  Min{-}Chun Hu and
                  Jiaying Liu and
                  Munchurl Kim and
                  Wei Zhang},
  title        = {GhostVec: {A} New Threat to Speaker Privacy of End-to-End Speech Recognition
                  System},
  booktitle    = {{ACM} Multimedia Asia 2023, MMAsia 2023, Tainan, Taiwan, December
                  6-8, 2023},
  pages        = {94:1--94:5},
  publisher    = {{ACM}},
  year         = {2023},
  url          = {https://doi.org/10.1145/3595916.3626367},
  doi          = {10.1145/3595916.3626367},
  timestamp    = {Mon, 05 Feb 2024 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/mmasia/ChenLLCHH23.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{DBLP:journals/corr/abs-2305-09210,
  author       = {Shuichiro Shimizu and
                  Chenhui Chu and
                  Sheng Li and
                  Sadao Kurohashi},
  title        = {Towards Speech Dialogue Translation Mediating Speakers of Different
                  Languages},
  journal      = {CoRR},
  volume       = {abs/2305.09210},
  year         = {2023},
  url          = {https://doi.org/10.48550/arXiv.2305.09210},
  doi          = {10.48550/ARXIV.2305.09210},
  eprinttype    = {arXiv},
  eprint       = {2305.09210},
  timestamp    = {Mon, 26 Jun 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2305-09210.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{DBLP:journals/ejasmp/QinWLDP22,
  author       = {Siqing Qin and
                  Longbiao Wang and
                  Sheng Li and
                  Jianwu Dang and
                  Lixin Pan},
  title        = {Improving low-resource Tibetan end-to-end {ASR} by multilingual and
                  multilevel unit modeling},
  journal      = {{EURASIP} J. Audio Speech Music. Process.},
  volume       = {2022},
  number       = {1},
  pages        = {2},
  year         = {2022},
  url          = {https://doi.org/10.1186/s13636-021-00233-4},
  doi          = {10.1186/S13636-021-00233-4},
  timestamp    = {Fri, 24 Mar 2023 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/ejasmp/QinWLDP22.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{DBLP:conf/eusipco/LiLADLU22,
  author       = {Kai Li and
                  Xugang Lu and
                  Masato Akagi and
                  Jianwu Dang and
                  Sheng Li and
                  Masashi Unoki},
  title        = {Relationship Between Speakers' Physiological Structure and Acoustic
                  Speech Signals: Data-Driven Study Based on Frequency-Wise Attentional
                  Neural Network},
  booktitle    = {30th European Signal Processing Conference, {EUSIPCO} 2022, Belgrade,
                  Serbia, August 29 - Sept. 2, 2022},
  pages        = {379--383},
  publisher    = {{IEEE}},
  year         = {2022},
  url          = {https://ieeexplore.ieee.org/document/9909649},
  timestamp    = {Thu, 30 Mar 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/eusipco/LiLADLU22.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{DBLP:conf/icassp/WangPHHL22,
  author       = {Kai Wang and
                  Yizhou Peng and
                  Hao Huang and
                  Ying Hu and
                  Sheng Li},
  title        = {Mining Hard Samples Locally And Globally For Improved Speech Separation},
  booktitle    = {{IEEE} International Conference on Acoustics, Speech and Signal Processing,
                  {ICASSP} 2022, Virtual and Singapore, 23-27 May 2022},
  pages        = {6037--6041},
  publisher    = {{IEEE}},
  year         = {2022},
  url          = {https://doi.org/10.1109/ICASSP43922.2022.9747797},
  doi          = {10.1109/ICASSP43922.2022.9747797},
  timestamp    = {Thu, 23 Jun 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/icassp/WangPHHL22.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{DBLP:conf/icassp/LvWGLDPWDH22,
  author       = {Yongjie Lv and
                  Longbiao Wang and
                  Meng Ge and
                  Sheng Li and
                  Chenchen Ding and
                  Lixin Pan and
                  Yuguang Wang and
                  Jianwu Dang and
                  Kiyoshi Honda},
  title        = {Compressing Transformer-Based {ASR} Model by Task-Driven Loss and
                  Attention-Based Multi-Level Feature Distillation},
  booktitle    = {{IEEE} International Conference on Acoustics, Speech and Signal Processing,
                  {ICASSP} 2022, Virtual and Singapore, 23-27 May 2022},
  pages        = {7992--7996},
  publisher    = {{IEEE}},
  year         = {2022},
  url          = {https://doi.org/10.1109/ICASSP43922.2022.9746113},
  doi          = {10.1109/ICASSP43922.2022.9746113},
  timestamp    = {Fri, 24 Mar 2023 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/icassp/LvWGLDPWDH22.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{DBLP:conf/iconip/Chen0H22,
  author       = {Xiaojiao Chen and
                  Sheng Li and
                  Hao Huang},
  editor       = {Mohammad Tanveer and
                  Sonali Agarwal and
                  Seiichi Ozawa and
                  Asif Ekbal and
                  Adam Jatowt},
  title        = {GhostVec: Directly Extracting Speaker Embedding from End-to-End Speech
                  Recognition Model Using Adversarial Examples},
  booktitle    = {Neural Information Processing - 29th International Conference, {ICONIP}
                  2022, Virtual Event, November 22-26, 2022, Proceedings, Part {VI}},
  series       = {Communications in Computer and Information Science},
  volume       = {1793},
  pages        = {482--492},
  publisher    = {Springer},
  year         = {2022},
  url          = {https://doi.org/10.1007/978-981-99-1645-0\_40},
  doi          = {10.1007/978-981-99-1645-0\_40},
  timestamp    = {Mon, 26 Jun 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/iconip/Chen0H22.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{DBLP:conf/iconip/LiLLG22,
  author       = {Sheng Li and
                  Jiyi Li and
                  Qianying Liu and
                  Zhuo Gong},
  editor       = {Mohammad Tanveer and
                  Sonali Agarwal and
                  Seiichi Ozawa and
                  Asif Ekbal and
                  Adam Jatowt},
  title        = {An End-to-End Chinese and Japanese Bilingual Speech Recognition Systems
                  with Shared Character Decomposition},
  booktitle    = {Neural Information Processing - 29th International Conference, {ICONIP}
                  2022, Virtual Event, November 22-26, 2022, Proceedings, Part {VI}},
  series       = {Communications in Computer and Information Science},
  volume       = {1793},
  pages        = {493--503},
  publisher    = {Springer},
  year         = {2022},
  url          = {https://doi.org/10.1007/978-981-99-1645-0\_41},
  doi          = {10.1007/978-981-99-1645-0\_41},
  timestamp    = {Mon, 26 Jun 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/iconip/LiLLG22.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{DBLP:conf/iconip/LiZLZYH22,
  author       = {Guangxing Li and
                  Wangjin Zhou and
                  Sheng Li and
                  Yi Zhao and
                  Jichen Yang and
                  Hao Huang},
  editor       = {Mohammad Tanveer and
                  Sonali Agarwal and
                  Seiichi Ozawa and
                  Asif Ekbal and
                  Adam Jatowt},
  title        = {Investigating Effective Domain Adaptation Method for Speaker Verification
                  Task},
  booktitle    = {Neural Information Processing - 29th International Conference, {ICONIP}
                  2022, Virtual Event, November 22-26, 2022, Proceedings, Part {VI}},
  series       = {Communications in Computer and Information Science},
  volume       = {1793},
  pages        = {517--527},
  publisher    = {Springer},
  year         = {2022},
  url          = {https://doi.org/10.1007/978-981-99-1645-0\_43},
  doi          = {10.1007/978-981-99-1645-0\_43},
  timestamp    = {Mon, 26 Jun 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/iconip/LiZLZYH22.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{DBLP:conf/interspeech/ShiW0DK22,
  author       = {Hao Shi and
                  Longbiao Wang and
                  Sheng Li and
                  Jianwu Dang and
                  Tatsuya Kawahara},
  editor       = {Hanseok Ko and
                  John H. L. Hansen},
  title        = {Monaural Speech Enhancement Based on Spectrogram Decomposition for
                  Convolutional Neural Network-sensitive Feature Extraction},
  booktitle    = {Interspeech 2022, 23rd Annual Conference of the International Speech
                  Communication Association, Incheon, Korea, 18-22 September 2022},
  pages        = {221--225},
  publisher    = {{ISCA}},
  year         = {2022},
  url          = {https://doi.org/10.21437/Interspeech.2022-11268},
  doi          = {10.21437/INTERSPEECH.2022-11268},
  timestamp    = {Wed, 21 Jun 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/interspeech/ShiW0DK22.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{DBLP:conf/interspeech/LiGWU0D22,
  author       = {Nan Li and
                  Meng Ge and
                  Longbiao Wang and
                  Masashi Unoki and
                  Sheng Li and
                  Jianwu Dang},
  editor       = {Hanseok Ko and
                  John H. L. Hansen},
  title        = {Global Signal-to-noise Ratio Estimation Based on Multi-subband Processing
                  Using Convolutional Neural Network},
  booktitle    = {Interspeech 2022, 23rd Annual Conference of the International Speech
                  Communication Association, Incheon, Korea, 18-22 September 2022},
  pages        = {361--365},
  publisher    = {{ISCA}},
  year         = {2022},
  url          = {https://doi.org/10.21437/Interspeech.2022-154},
  doi          = {10.21437/INTERSPEECH.2022-154},
  timestamp    = {Wed, 21 Jun 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/interspeech/LiGWU0D22.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{DBLP:conf/interspeech/YangW0LS22,
  author       = {Longfei Yang and
                  Wenqing Wei and
                  Sheng Li and
                  Jiyi Li and
                  Takahiro Shinozaki},
  editor       = {Hanseok Ko and
                  John H. L. Hansen},
  title        = {Augmented Adversarial Self-Supervised Learning for Early-Stage Alzheimer's
                  Speech Detection},
  booktitle    = {Interspeech 2022, 23rd Annual Conference of the International Speech
                  Communication Association, Incheon, Korea, 18-22 September 2022},
  pages        = {541--545},
  publisher    = {{ISCA}},
  year         = {2022},
  url          = {https://doi.org/10.21437/Interspeech.2022-943},
  doi          = {10.21437/INTERSPEECH.2022-943},
  timestamp    = {Wed, 21 Jun 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/interspeech/YangW0LS22.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{DBLP:conf/interspeech/Li0LALZZWDU22,
  author       = {Kai Li and
                  Sheng Li and
                  Xugang Lu and
                  Masato Akagi and
                  Meng Liu and
                  Lin Zhang and
                  Chang Zeng and
                  Longbiao Wang and
                  Jianwu Dang and
                  Masashi Unoki},
  editor       = {Hanseok Ko and
                  John H. L. Hansen},
  title        = {Data Augmentation Using McAdams-Coefficient-Based Speaker Anonymization
                  for Fake Audio Detection},
  booktitle    = {Interspeech 2022, 23rd Annual Conference of the International Speech
                  Communication Association, Incheon, Korea, 18-22 September 2022},
  pages        = {664--668},
  publisher    = {{ISCA}},
  year         = {2022},
  url          = {https://doi.org/10.21437/Interspeech.2022-10088},
  doi          = {10.21437/INTERSPEECH.2022-10088},
  timestamp    = {Wed, 21 Jun 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/interspeech/Li0LALZZWDU22.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{DBLP:conf/interspeech/Kak0MCK22,
  author       = {Soky Kak and
                  Sheng Li and
                  Masato Mimura and
                  Chenhui Chu and
                  Tatsuya Kawahara},
  editor       = {Hanseok Ko and
                  John H. L. Hansen},
  title        = {Leveraging Simultaneous Translation for Enhancing Transcription of
                  Low-resource Language via Cross Attention Mechanism},
  booktitle    = {Interspeech 2022, 23rd Annual Conference of the International Speech
                  Communication Association, Incheon, Korea, 18-22 September 2022},
  pages        = {1362--1366},
  publisher    = {{ISCA}},
  year         = {2022},
  url          = {https://doi.org/10.21437/Interspeech.2022-343},
  doi          = {10.21437/INTERSPEECH.2022-343},
  timestamp    = {Wed, 21 Jun 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/interspeech/Kak0MCK22.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{DBLP:conf/interspeech/QinW0LD22,
  author       = {Siqing Qin and
                  Longbiao Wang and
                  Sheng Li and
                  Yuqin Lin and
                  Jianwu Dang},
  editor       = {Hanseok Ko and
                  John H. L. Hansen},
  title        = {Finer-grained Modeling units-based Meta-Learning for Low-resource
                  Tibetan Speech Recognition},
  booktitle    = {Interspeech 2022, 23rd Annual Conference of the International Speech
                  Communication Association, Incheon, Korea, 18-22 September 2022},
  pages        = {2133--2137},
  publisher    = {{ISCA}},
  year         = {2022},
  url          = {https://doi.org/10.21437/Interspeech.2022-10015},
  doi          = {10.21437/INTERSPEECH.2022-10015},
  timestamp    = {Wed, 21 Jun 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/interspeech/QinW0LD22.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{DBLP:conf/interspeech/YangZC0DRZ22,
  author       = {Zhengdong Yang and
                  Wangjin Zhou and
                  Chenhui Chu and
                  Sheng Li and
                  Raj Dabre and
                  Raphael Rubino and
                  Yi Zhao},
  editor       = {Hanseok Ko and
                  John H. L. Hansen},
  title        = {Fusion of Self-supervised Learned Models for {MOS} Prediction},
  booktitle    = {Interspeech 2022, 23rd Annual Conference of the International Speech
                  Communication Association, Incheon, Korea, 18-22 September 2022},
  pages        = {5443--5447},
  publisher    = {{ISCA}},
  year         = {2022},
  url          = {https://doi.org/10.21437/Interspeech.2022-10262},
  doi          = {10.21437/INTERSPEECH.2022-10262},
  timestamp    = {Wed, 21 Jun 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/interspeech/YangZC0DRZ22.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{DBLP:conf/lrec/0010LLG22,
  author       = {Sheng Li and
                  Jiyi Li and
                  Qianying Liu and
                  Zhuo Gong},
  editor       = {Nicoletta Calzolari and
                  Fr{\'{e}}d{\'{e}}ric B{\'{e}}chet and
                  Philippe Blache and
                  Khalid Choukri and
                  Christopher Cieri and
                  Thierry Declerck and
                  Sara Goggi and
                  Hitoshi Isahara and
                  Bente Maegaard and
                  Joseph Mariani and
                  H{\'{e}}l{\`{e}}ne Mazo and
                  Jan Odijk and
                  Stelios Piperidis},
  title        = {Adversarial Speech Generation and Natural Speech Recovery for Speech
                  Content Protection},
  booktitle    = {Proceedings of the Thirteenth Language Resources and Evaluation Conference,
                  {LREC} 2022, Marseille, France, 20-25 June 2022},
  pages        = {7291--7297},
  publisher    = {European Language Resources Association},
  year         = {2022},
  url          = {https://aclanthology.org/2022.lrec-1.792},
  timestamp    = {Tue, 06 Dec 2022 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/lrec/0010LLG22.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{DBLP:conf/ococosda/KakGL22,
  author       = {Soky Kak and
                  Zhuo Gong and
                  Sheng Li},
  title        = {Nict-Tib1: {A} Public Speech Corpus Of Lhasa Dialect For Benchmarking
                  Tibetan Language Speech Recognition Systems},
  booktitle    = {25th Conference of the Oriental {COCOSDA} International Committee
                  for the Co-ordination and Standardisation of Speech Databases and
                  Assessment Techniques, {O-COCOSDA} 2022, Hanoi, Vietnam, November
                  24-26, 2022},
  pages        = {1--5},
  publisher    = {{IEEE}},
  year         = {2022},
  url          = {https://doi.org/10.1109/O-COCOSDA202257103.2022.9997917},
  doi          = {10.1109/O-COCOSDA202257103.2022.9997917},
  timestamp    = {Tue, 21 Mar 2023 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/ococosda/KakGL22.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{DBLP:conf/odyssey/GongSYS0KM22,
  author       = {Zhuo Gong and
                  Daisuke Saito and
                  Longfei Yang and
                  Takahiro Shinozaki and
                  Sheng Li and
                  Hisashi Kawai and
                  Nobuaki Minematsu},
  editor       = {Thomas Fang Zheng},
  title        = {Self-Adaptive Multilingual {ASR} Rescoring with Language Identification
                  and Unified Language Model},
  booktitle    = {Odyssey 2022: The Speaker and Language Recognition Workshop, 28 June
                  - 1 July 2022, Beijing, China},
  pages        = {415--420},
  publisher    = {{ISCA}},
  year         = {2022},
  url          = {https://doi.org/10.21437/Odyssey.2022-58},
  doi          = {10.21437/ODYSSEY.2022-58},
  timestamp    = {Sun, 02 Oct 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/odyssey/GongSYS0KM22.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{DBLP:conf/sigdial/YangLLS22,
  author       = {Longfei Yang and
                  Jiyi Li and
                  Sheng Li and
                  Takahiro Shinozaki},
  editor       = {Oliver Lemon and
                  Dilek Hakkani{-}T{\"{u}}r and
                  Junyi Jessy Li and
                  Arash Ashrafzadeh and
                  Daniel Hern{\'{a}}ndez Garc{\'{\i}}a and
                  Malihe Alikhani and
                  David Vandyke and
                  Ondrej Dusek},
  title        = {Multi-Domain Dialogue State Tracking with Top-K Slot Self Attention},
  booktitle    = {Proceedings of the 23rd Annual Meeting of the Special Interest Group
                  on Discourse and Dialogue, {SIGDIAL} 2022, Edinburgh, UK, 07-09 September
                  2022},
  pages        = {231--236},
  publisher    = {Association for Computational Linguistics},
  year         = {2022},
  url          = {https://doi.org/10.18653/v1/2022.sigdial-1.24},
  doi          = {10.18653/V1/2022.SIGDIAL-1.24},
  timestamp    = {Fri, 12 Apr 2024 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/sigdial/YangLLS22.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{DBLP:journals/corr/abs-2204-03855,
  author       = {Qianying Liu and
                  Yuhang Yang and
                  Zhuo Gong and
                  Sheng Li and
                  Chenchen Ding and
                  Nobuaki Minematsu and
                  Hao Huang and
                  Fei Cheng and
                  Sadao Kurohashi},
  title        = {Hierarchical Softmax for End-to-End Low-resource Multilingual Speech
                  Recognition},
  journal      = {CoRR},
  volume       = {abs/2204.03855},
  year         = {2022},
  url          = {https://doi.org/10.48550/arXiv.2204.03855},
  doi          = {10.48550/ARXIV.2204.03855},
  eprinttype    = {arXiv},
  eprint       = {2204.03855},
  timestamp    = {Thu, 23 Jun 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2204-03855.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{DBLP:journals/corr/abs-2204-04855,
  author       = {Zhengdong Yang and
                  Wangjin Zhou and
                  Chenhui Chu and
                  Sheng Li and
                  Raj Dabre and
                  Raphael Rubino and
                  Yi Zhao},
  title        = {Fusion of Self-supervised Learned Models for {MOS} Prediction},
  journal      = {CoRR},
  volume       = {abs/2204.04855},
  year         = {2022},
  url          = {https://doi.org/10.48550/arXiv.2204.04855},
  doi          = {10.48550/ARXIV.2204.04855},
  eprinttype    = {arXiv},
  eprint       = {2204.04855},
  timestamp    = {Thu, 23 Jun 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2204-04855.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{DBLP:journals/corr/abs-2211-00325,
  author       = {Yuhang Yang and
                  Haihua Xu and
                  Hao Huang and
                  Eng Siong Chng and
                  Sheng Li},
  title        = {Speech-text based multi-modal training with bidirectional attention
                  for improved speech recognition},
  journal      = {CoRR},
  volume       = {abs/2211.00325},
  year         = {2022},
  url          = {https://doi.org/10.48550/arXiv.2211.00325},
  doi          = {10.48550/ARXIV.2211.00325},
  eprinttype    = {arXiv},
  eprint       = {2211.00325},
  timestamp    = {Wed, 07 Dec 2022 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2211-00325.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{DBLP:journals/jclc/KakMKCLDS21,
  author       = {Soky Kak and
                  Masato Mimura and
                  Tatsuya Kawahara and
                  Chenhui Chu and
                  Sheng Li and
                  Chenchen Ding and
                  Sethserey Sam},
  title        = {TriECCC: Trilingual Corpus of the Extraordinary Chambers in the Courts
                  of Cambodia for Speech Recognition and Translation Studies},
  journal      = {Int. J. Asian Lang. Process.},
  volume       = {31},
  number       = {3{\&}4},
  pages        = {2250007:1--2250007:21},
  year         = {2021},
  url          = {https://doi.org/10.1142/S2717554522500072},
  doi          = {10.1142/S2717554522500072},
  timestamp    = {Tue, 06 Dec 2022 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/jclc/KakMKCLDS21.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{DBLP:conf/apsipa/KakLMCK21,
  author       = {Soky Kak and
                  Sheng Li and
                  Masato Mimura and
                  Chenhui Chu and
                  Tatsuya Kawahara},
  title        = {On the Use of Speaker Information for Automatic Speech Recognition
                  in Speaker-imbalanced Corpora},
  booktitle    = {Asia-Pacific Signal and Information Processing Association Annual
                  Summit and Conference, {APSIPA} {ASC} 2021, Tokyo, Japan, December
                  14-17, 2021},
  pages        = {433--437},
  publisher    = {{IEEE}},
  year         = {2021},
  url          = {https://ieeexplore.ieee.org/document/9689564},
  timestamp    = {Wed, 09 Feb 2022 09:03:08 +0100},
  biburl       = {https://dblp.org/rec/conf/apsipa/KakLMCK21.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{DBLP:conf/apsipa/ShiWLFDK21,
  author       = {Hao Shi and
                  Longbiao Wang and
                  Sheng Li and
                  Cunhang Fan and
                  Jianwu Dang and
                  Tatsuya Kawahara},
  title        = {Spectrograms Fusion-based End-to-end Robust Automatic Speech Recognition},
  booktitle    = {Asia-Pacific Signal and Information Processing Association Annual
                  Summit and Conference, {APSIPA} {ASC} 2021, Tokyo, Japan, December
                  14-17, 2021},
  pages        = {438--442},
  publisher    = {{IEEE}},
  year         = {2021},
  url          = {https://ieeexplore.ieee.org/document/9689650},
  timestamp    = {Fri, 24 Mar 2023 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/apsipa/ShiWLFDK21.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{DBLP:conf/apsipa/PengZZXHLC21,
  author       = {Yizhou Peng and
                  Jicheng Zhang and
                  Haobo Zhang and
                  Haihua Xu and
                  Hao Huang and
                  Sheng Li and
                  Eng Siong Chng},
  title        = {Multilingual Approach to Joint Speech and Accent Recognition with
                  {DNN-HMM} Framework},
  booktitle    = {Asia-Pacific Signal and Information Processing Association Annual
                  Summit and Conference, {APSIPA} {ASC} 2021, Tokyo, Japan, December
                  14-17, 2021},
  pages        = {1043--1048},
  publisher    = {{IEEE}},
  year         = {2021},
  url          = {https://ieeexplore.ieee.org/document/9689498},
  timestamp    = {Wed, 09 Feb 2022 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/apsipa/PengZZXHLC21.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{DBLP:conf/icassp/ChenHLX21,
  author       = {Shunfei Chen and
                  Xinhui Hu and
                  Sheng Li and
                  Xinkang Xu},
  title        = {An Investigation of Using Hybrid Modeling Units for Improving End-to-End
                  Speech Recognition System},
  booktitle    = {{IEEE} International Conference on Acoustics, Speech and Signal Processing,
                  {ICASSP} 2021, Toronto, ON, Canada, June 6-11, 2021},
  pages        = {6743--6747},
  publisher    = {{IEEE}},
  year         = {2021},
  url          = {https://doi.org/10.1109/ICASSP39728.2021.9414598},
  doi          = {10.1109/ICASSP39728.2021.9414598},
  timestamp    = {Thu, 14 Oct 2021 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/icassp/ChenHLX21.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{DBLP:conf/icassp/LiWU0WGD21,
  author       = {Nan Li and
                  Longbiao Wang and
                  Masashi Unoki and
                  Sheng Li and
                  Rui Wang and
                  Meng Ge and
                  Jianwu Dang},
  title        = {Robust Voice Activity Detection Using a Masked Auditory Encoder Based
                  Convolutional Neural Network},
  booktitle    = {{IEEE} International Conference on Acoustics, Speech and Signal Processing,
                  {ICASSP} 2021, Toronto, ON, Canada, June 6-11, 2021},
  pages        = {6828--6832},
  publisher    = {{IEEE}},
  year         = {2021},
  url          = {https://doi.org/10.1109/ICASSP39728.2021.9415045},
  doi          = {10.1109/ICASSP39728.2021.9415045},
  timestamp    = {Fri, 24 Mar 2023 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/icassp/LiWU0WGD21.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{DBLP:conf/icassp/HuangWHL21,
  author       = {Hao Huang and
                  Kai Wang and
                  Ying Hu and
                  Sheng Li},
  title        = {Encoder-Decoder Based Pitch Tracking and Joint Model Training for
                  Mandarin Tone Classification},
  booktitle    = {{IEEE} International Conference on Acoustics, Speech and Signal Processing,
                  {ICASSP} 2021, Toronto, ON, Canada, June 6-11, 2021},
  pages        = {6943--6947},
  publisher    = {{IEEE}},
  year         = {2021},
  url          = {https://doi.org/10.1109/ICASSP39728.2021.9413888},
  doi          = {10.1109/ICASSP39728.2021.9413888},
  timestamp    = {Thu, 14 Oct 2021 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/icassp/HuangWHL21.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{DBLP:conf/iconip/QiangSGYLW0D21,
  author       = {Luya Qiang and
                  Hao Shi and
                  Meng Ge and
                  Haoran Yin and
                  Nan Li and
                  Longbiao Wang and
                  Sheng Li and
                  Jianwu Dang},
  editor       = {Teddy Mantoro and
                  Minho Lee and
                  Media Anugerah Ayu and
                  Kok Wai Wong and
                  Achmad Nizar Hidayanto},
  title        = {Speech Dereverberation Based on Scale-Aware Mean Square Error Loss},
  booktitle    = {Neural Information Processing - 28th International Conference, {ICONIP}
                  2021, Sanur, Bali, Indonesia, December 8-12, 2021, Proceedings, Part
                  {V}},
  series       = {Communications in Computer and Information Science},
  volume       = {1516},
  pages        = {55--63},
  publisher    = {Springer},
  year         = {2021},
  url          = {https://doi.org/10.1007/978-3-030-92307-5\_7},
  doi          = {10.1007/978-3-030-92307-5\_7},
  timestamp    = {Fri, 24 Mar 2023 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/iconip/QiangSGYLW0D21.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{DBLP:conf/iconip/LiuWLLDZD21,
  author       = {Dawei Liu and
                  Longbiao Wang and
                  Sheng Li and
                  Haoyu Li and
                  Chenchen Ding and
                  Ju Zhang and
                  Jianwu Dang},
  editor       = {Teddy Mantoro and
                  Minho Lee and
                  Media Anugerah Ayu and
                  Kok Wai Wong and
                  Achmad Nizar Hidayanto},
  title        = {Exploring Effective Speech Representation via {ASR} for High-Quality
                  End-to-End Multispeaker {TTS}},
  booktitle    = {Neural Information Processing - 28th International Conference, {ICONIP}
                  2021, Sanur, Bali, Indonesia, December 8-12, 2021, Proceedings, Part
                  {VI}},
  series       = {Communications in Computer and Information Science},
  volume       = {1517},
  pages        = {110--118},
  publisher    = {Springer},
  year         = {2021},
  url          = {https://doi.org/10.1007/978-3-030-92310-5\_13},
  doi          = {10.1007/978-3-030-92310-5\_13},
  timestamp    = {Fri, 24 Mar 2023 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/iconip/LiuWLLDZD21.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{DBLP:conf/iconip/YinSWQLGZD21,
  author       = {Haoran Yin and
                  Hao Shi and
                  Longbiao Wang and
                  Luya Qiang and
                  Sheng Li and
                  Meng Ge and
                  Gaoyan Zhang and
                  Jianwu Dang},
  editor       = {Teddy Mantoro and
                  Minho Lee and
                  Media Anugerah Ayu and
                  Kok Wai Wong and
                  Achmad Nizar Hidayanto},
  title        = {Simultaneous Progressive Filtering-Based Monaural Speech Enhancement},
  booktitle    = {Neural Information Processing - 28th International Conference, {ICONIP}
                  2021, Sanur, Bali, Indonesia, December 8-12, 2021, Proceedings, Part
                  {V}},
  series       = {Communications in Computer and Information Science},
  volume       = {1516},
  pages        = {213--221},
  publisher    = {Springer},
  year         = {2021},
  url          = {https://doi.org/10.1007/978-3-030-92307-5\_25},
  doi          = {10.1007/978-3-030-92307-5\_25},
  timestamp    = {Fri, 24 Mar 2023 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/iconip/YinSWQLGZD21.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{DBLP:conf/interspeech/WangHHH021,
  author       = {Kai Wang and
                  Hao Huang and
                  Ying Hu and
                  Zhihua Huang and
                  Sheng Li},
  editor       = {Hynek Hermansky and
                  Honza Cernock{\'{y}} and
                  Luk{\'{a}}s Burget and
                  Lori Lamel and
                  Odette Scharenborg and
                  Petr Motl{\'{\i}}cek},
  title        = {End-to-End Speech Separation Using Orthogonal Representation in Complex
                  and Real Time-Frequency Domain},
  booktitle    = {Interspeech 2021, 22nd Annual Conference of the International Speech
                  Communication Association, Brno, Czechia, 30 August - 3 September
                  2021},
  pages        = {3046--3050},
  publisher    = {{ISCA}},
  year         = {2021},
  url          = {https://doi.org/10.21437/Interspeech.2021-504},
  doi          = {10.21437/INTERSPEECH.2021-504},
  timestamp    = {Wed, 21 Jun 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/interspeech/WangHHH021.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{DBLP:conf/interspeech/WangYHLX21,
  author       = {Ding Wang and
                  Shuaishuai Ye and
                  Xinhui Hu and
                  Sheng Li and
                  Xinkang Xu},
  editor       = {Hynek Hermansky and
                  Honza Cernock{\'{y}} and
                  Luk{\'{a}}s Burget and
                  Lori Lamel and
                  Odette Scharenborg and
                  Petr Motl{\'{\i}}cek},
  title        = {An End-to-End Dialect Identification System with Transfer Learning
                  from a Multilingual Automatic Speech Recognition Model},
  booktitle    = {Interspeech 2021, 22nd Annual Conference of the International Speech
                  Communication Association, Brno, Czechia, 30 August - 3 September
                  2021},
  pages        = {3266--3270},
  publisher    = {{ISCA}},
  year         = {2021},
  url          = {https://doi.org/10.21437/Interspeech.2021-374},
  doi          = {10.21437/INTERSPEECH.2021-374},
  timestamp    = {Wed, 21 Jun 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/interspeech/WangYHLX21.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{DBLP:conf/ococosda/KakMK0DCS21,
  author       = {Soky Kak and
                  Masato Mimura and
                  Tatsuya Kawahara and
                  Sheng Li and
                  Chenchen Ding and
                  Chenhui Chu and
                  Sethserey Sam},
  title        = {Khmer Speech Translation Corpus of the Extraordinary Chambers in the
                  Courts of Cambodia {(ECCC)}},
  booktitle    = {24th Conference of the Oriental {COCOSDA} International Committee
                  for the Co-ordination and Standardisation of Speech Databases and
                  Assessment Techniques, {O-COCOSDA} 2021, Singapore, November 18-20,
                  2021},
  pages        = {122--127},
  publisher    = {{IEEE}},
  year         = {2021},
  url          = {https://doi.org/10.1109/O-COCOSDA202152914.2021.9660421},
  doi          = {10.1109/O-COCOSDA202152914.2021.9660421},
  timestamp    = {Wed, 07 Dec 2022 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/ococosda/KakMK0DCS21.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{DBLP:journals/taslp/ShenL0K20,
  author       = {Peng Shen and
                  Xugang Lu and
                  Sheng Li and
                  Hisashi Kawai},
  title        = {Knowledge Distillation-Based Representation Learning for Short-Utterance
                  Spoken Language Identification},
  journal      = {{IEEE} {ACM} Trans. Audio Speech Lang. Process.},
  volume       = {28},
  pages        = {2674--2683},
  year         = {2020},
  url          = {https://doi.org/10.1109/TASLP.2020.3023627},
  doi          = {10.1109/TASLP.2020.3023627},
  timestamp    = {Tue, 16 Aug 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/taslp/ShenL0K20.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{DBLP:conf/ccs/HanCLMY20,
  author       = {Yaowei Han and
                  Yang Cao and
                  Sheng Li and
                  Qiang Ma and
                  Masatoshi Yoshikawa},
  editor       = {Jay Ligatti and
                  Xinming Ou and
                  Jonathan Katz and
                  Giovanni Vigna},
  title        = {Voice-Indistinguishability - Protecting Voiceprint with Differential
                  Privacy under an Untrusted Server},
  booktitle    = {{CCS} '20: 2020 {ACM} {SIGSAC} Conference on Computer and Communications
                  Security, Virtual Event, USA, November 9-13, 2020},
  pages        = {2125--2127},
  publisher    = {{ACM}},
  year         = {2020},
  url          = {https://doi.org/10.1145/3372297.3420025},
  doi          = {10.1145/3372297.3420025},
  timestamp    = {Sat, 30 Sep 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/ccs/HanCLMY20.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{DBLP:conf/icassp/LinWD0D20,
  author       = {Yuqin Lin and
                  Longbiao Wang and
                  Jianwu Dang and
                  Sheng Li and
                  Chenchen Ding},
  title        = {End-to-End Articulatory Modeling for Dysarthric Articulatory Attribute
                  Detection},
  booktitle    = {2020 {IEEE} International Conference on Acoustics, Speech and Signal
                  Processing, {ICASSP} 2020, Barcelona, Spain, May 4-8, 2020},
  pages        = {7349--7353},
  publisher    = {{IEEE}},
  year         = {2020},
  url          = {https://doi.org/10.1109/ICASSP40776.2020.9054233},
  doi          = {10.1109/ICASSP40776.2020.9054233},
  timestamp    = {Fri, 24 Mar 2023 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/icassp/LinWD0D20.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{DBLP:conf/icassp/ShiWG0D20,
  author       = {Hao Shi and
                  Longbiao Wang and
                  Meng Ge and
                  Sheng Li and
                  Jianwu Dang},
  title        = {Spectrograms Fusion with Minimum Difference Masks Estimation for Monaural
                  Speech Dereverberation},
  booktitle    = {2020 {IEEE} International Conference on Acoustics, Speech and Signal
                  Processing, {ICASSP} 2020, Barcelona, Spain, May 4-8, 2020},
  pages        = {7544--7548},
  publisher    = {{IEEE}},
  year         = {2020},
  url          = {https://doi.org/10.1109/ICASSP40776.2020.9054661},
  doi          = {10.1109/ICASSP40776.2020.9054661},
  timestamp    = {Fri, 24 Mar 2023 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/icassp/ShiWG0D20.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{DBLP:conf/icmcs/Han000Y20,
  author       = {Yaowei Han and
                  Sheng Li and
                  Yang Cao and
                  Qiang Ma and
                  Masatoshi Yoshikawa},
  title        = {Voice-Indistinguishability: Protecting Voiceprint In Privacy-Preserving
                  Speech Data Release},
  booktitle    = {{IEEE} International Conference on Multimedia and Expo, {ICME} 2020,
                  London, UK, July 6-10, 2020},
  pages        = {1--6},
  publisher    = {{IEEE}},
  year         = {2020},
  url          = {https://doi.org/10.1109/ICME46284.2020.9102875},
  doi          = {10.1109/ICME46284.2020.9102875},
  timestamp    = {Sat, 30 Sep 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/icmcs/Han000Y20.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{DBLP:conf/iconip/GuoWLZGWDH20,
  author       = {Shaotong Guo and
                  Longbiao Wang and
                  Sheng Li and
                  Ju Zhang and
                  Cheng Gong and
                  Yuguang Wang and
                  Jianwu Dang and
                  Kiyoshi Honda},
  editor       = {Haiqin Yang and
                  Kitsuchart Pasupa and
                  Andrew Chi{-}Sing Leung and
                  James T. Kwok and
                  Jonathan H. Chan and
                  Irwin King},
  title        = {Investigation of Effectively Synthesizing Code-Switched Speech Using
                  Highly Imbalanced Mix-Lingual Data},
  booktitle    = {Neural Information Processing - 27th International Conference, {ICONIP}
                  2020, Bangkok, Thailand, November 23-27, 2020, Proceedings, Part {I}},
  series       = {Lecture Notes in Computer Science},
  volume       = {12532},
  pages        = {36--47},
  publisher    = {Springer},
  year         = {2020},
  url          = {https://doi.org/10.1007/978-3-030-63830-6\_4},
  doi          = {10.1007/978-3-030-63830-6\_4},
  timestamp    = {Fri, 24 Mar 2023 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/iconip/GuoWLZGWDH20.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{DBLP:conf/interspeech/ShiW0DGLDS20,
  author       = {Hao Shi and
                  Longbiao Wang and
                  Sheng Li and
                  Chenchen Ding and
                  Meng Ge and
                  Nan Li and
                  Jianwu Dang and
                  Hiroshi Seki},
  editor       = {Helen Meng and
                  Bo Xu and
                  Thomas Fang Zheng},
  title        = {Singing Voice Extraction with Attention-Based Spectrograms Fusion},
  booktitle    = {Interspeech 2020, 21st Annual Conference of the International Speech
                  Communication Association, Virtual Event, Shanghai, China, 25-29 October
                  2020},
  pages        = {2412--2416},
  publisher    = {{ISCA}},
  year         = {2020},
  url          = {https://doi.org/10.21437/Interspeech.2020-1043},
  doi          = {10.21437/INTERSPEECH.2020-1043},
  timestamp    = {Fri, 24 Mar 2023 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/interspeech/ShiW0DGLDS20.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{DBLP:conf/interspeech/LinW0DD20,
  author       = {Yuqin Lin and
                  Longbiao Wang and
                  Sheng Li and
                  Jianwu Dang and
                  Chenchen Ding},
  editor       = {Helen Meng and
                  Bo Xu and
                  Thomas Fang Zheng},
  title        = {Staged Knowledge Distillation for End-to-End Dysarthric Speech Recognition
                  and Speech Attribute Transcription},
  booktitle    = {Interspeech 2020, 21st Annual Conference of the International Speech
                  Communication Association, Virtual Event, Shanghai, China, 25-29 October
                  2020},
  pages        = {4791--4795},
  publisher    = {{ISCA}},
  year         = {2020},
  url          = {https://doi.org/10.21437/Interspeech.2020-1755},
  doi          = {10.21437/INTERSPEECH.2020-1755},
  timestamp    = {Fri, 24 Mar 2023 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/interspeech/LinW0DD20.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{DBLP:conf/ococosda/ThidaHO0D20,
  author       = {Aye Thida and
                  Nway Nway Han and
                  Sheinn Thawtar Oo and
                  Sheng Li and
                  Chenchen Ding},
  title        = {{VOIS:} The First Speech Therapy App Specifically Designed for Myanmar
                  Hearing-Impaired Children},
  booktitle    = {23rd Conference of the Oriental {COCOSDA} International Committee
                  for the Co-ordination and Standardisation of Speech Databases and
                  Assessment Techniques, {O-COCOSDA} 2020, Yangon, Myanmar, November
                  5-7, 2020},
  pages        = {151--154},
  publisher    = {{IEEE}},
  year         = {2020},
  url          = {https://doi.org/10.1109/O-COCOSDA50338.2020.9295024},
  doi          = {10.1109/O-COCOSDA50338.2020.9295024},
  timestamp    = {Thu, 14 Oct 2021 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/ococosda/ThidaHO0D20.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{DBLP:conf/odyssey/ShenLS0K20,
  author       = {Peng Shen and
                  Xugang Lu and
                  Komei Sugiura and
                  Sheng Li and
                  Hisashi Kawai},
  editor       = {Kong{-}Aik Lee and
                  Takafumi Koshinaka and
                  Koichi Shinoda},
  title        = {Compensation on x-vector for Short Utterance Spoken Language Identification},
  booktitle    = {Odyssey 2020: The Speaker and Language Recognition Workshop, 1-5 November
                  2020, Tokyo, Japan},
  pages        = {47--52},
  publisher    = {{ISCA}},
  year         = {2020},
  url          = {https://doi.org/10.21437/Odyssey.2020-7},
  doi          = {10.21437/ODYSSEY.2020-7},
  timestamp    = {Tue, 16 Nov 2021 11:36:03 +0100},
  biburl       = {https://dblp.org/rec/conf/odyssey/ShenLS0K20.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{DBLP:conf/odyssey/0010LDSK20,
  author       = {Sheng Li and
                  Xugang Lu and
                  Raj Dabre and
                  Peng Shen and
                  Hisashi Kawai},
  editor       = {Kong{-}Aik Lee and
                  Takafumi Koshinaka and
                  Koichi Shinoda},
  title        = {Joint Training End-to-End Speech Recognition Systems with Speaker
                  Attributes},
  booktitle    = {Odyssey 2020: The Speaker and Language Recognition Workshop, 1-5 November
                  2020, Tokyo, Japan},
  pages        = {385--390},
  publisher    = {{ISCA}},
  year         = {2020},
  url          = {https://doi.org/10.21437/Odyssey.2020-54},
  doi          = {10.21437/ODYSSEY.2020-54},
  timestamp    = {Thu, 14 Oct 2021 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/odyssey/0010LDSK20.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@incollection{DBLP:series/sbcs/Lu0F20,
  author       = {Xugang Lu and
                  Sheng Li and
                  Masakiyo Fujimoto},
  editor       = {Yutaka Kidawara and
                  Eiichiro Sumita and
                  Hisashi Kawai},
  title        = {Automatic Speech Recognition},
  booktitle    = {Speech-to-Speech Translation},
  series       = {Springer Briefs in Computer Science},
  pages        = {21--38},
  publisher    = {Springer},
  year         = {2020},
  url          = {https://doi.org/10.1007/978-981-15-0595-9\_2},
  doi          = {10.1007/978-981-15-0595-9\_2},
  timestamp    = {Fri, 27 Dec 2019 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/series/sbcs/Lu0F20.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{DBLP:journals/corr/abs-2004-07442,
  author       = {Yaowei Han and
                  Sheng Li and
                  Yang Cao and
                  Qiang Ma and
                  Masatoshi Yoshikawa},
  title        = {Voice-Indistinguishability: Protecting Voiceprint in Privacy-Preserving
                  Speech Data Release},
  journal      = {CoRR},
  volume       = {abs/2004.07442},
  year         = {2020},
  url          = {https://arxiv.org/abs/2004.07442},
  eprinttype    = {arXiv},
  eprint       = {2004.07442},
  timestamp    = {Thu, 14 Oct 2021 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2004-07442.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{DBLP:conf/apsipa/PanLWD19,
  author       = {Lixin Pan and
                  Sheng Li and
                  Longbiao Wang and
                  Jianwu Dang},
  title        = {Effective Training End-to-End {ASR} systems for Low-resource Lhasa
                  Dialect of Tibetan Language},
  booktitle    = {2019 Asia-Pacific Signal and Information Processing Association Annual
                  Summit and Conference, {APSIPA} {ASC} 2019, Lanzhou, China, November
                  18-21, 2019},
  pages        = {1152--1156},
  publisher    = {{IEEE}},
  year         = {2019},
  url          = {https://doi.org/10.1109/APSIPAASC47483.2019.9023100},
  doi          = {10.1109/APSIPAASC47483.2019.9023100},
  timestamp    = {Fri, 24 Mar 2023 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/apsipa/PanLWD19.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{DBLP:conf/apsipa/Soky0KS19,
  author       = {Soky Kak and
                  Sheng Li and
                  Tatsuya Kawahara and
                  Sopheap Seng},
  title        = {Multi-lingual Transformer Training for Khmer Automatic Speech Recognition},
  booktitle    = {2019 Asia-Pacific Signal and Information Processing Association Annual
                  Summit and Conference, {APSIPA} {ASC} 2019, Lanzhou, China, November
                  18-21, 2019},
  pages        = {1893--1896},
  publisher    = {{IEEE}},
  year         = {2019},
  url          = {https://doi.org/10.1109/APSIPAASC47483.2019.9023137},
  doi          = {10.1109/APSIPAASC47483.2019.9023137},
  timestamp    = {Sun, 31 Oct 2021 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/apsipa/Soky0KS19.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{DBLP:conf/icassp/ShenL0K19,
  author       = {Peng Shen and
                  Xugang Lu and
                  Sheng Li and
                  Hisashi Kawai},
  title        = {Interactive Learning of Teacher-student Model for Short Utterance
                  Spoken Language Identification},
  booktitle    = {{IEEE} International Conference on Acoustics, Speech and Signal Processing,
                  {ICASSP} 2019, Brighton, United Kingdom, May 12-17, 2019},
  pages        = {5981--5985},
  publisher    = {{IEEE}},
  year         = {2019},
  url          = {https://doi.org/10.1109/ICASSP.2019.8683371},
  doi          = {10.1109/ICASSP.2019.8683371},
  timestamp    = {Wed, 16 Oct 2019 14:14:52 +0200},
  biburl       = {https://dblp.org/rec/conf/icassp/ShenL0K19.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{DBLP:conf/icassp/Takashima0K19,
  author       = {Ryoichi Takashima and
                  Sheng Li and
                  Hisashi Kawai},
  title        = {Investigation of Sequence-level Knowledge Distillation Methods for
                  {CTC} Acoustic Models},
  booktitle    = {{IEEE} International Conference on Acoustics, Speech and Signal Processing,
                  {ICASSP} 2019, Brighton, United Kingdom, May 12-17, 2019},
  pages        = {6156--6160},
  publisher    = {{IEEE}},
  year         = {2019},
  url          = {https://doi.org/10.1109/ICASSP.2019.8682671},
  doi          = {10.1109/ICASSP.2019.8682671},
  timestamp    = {Sat, 19 Oct 2019 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/icassp/Takashima0K19.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{DBLP:conf/interspeech/0010DLSKK19,
  author       = {Sheng Li and
                  Chenchen Ding and
                  Xugang Lu and
                  Peng Shen and
                  Tatsuya Kawahara and
                  Hisashi Kawai},
  editor       = {Gernot Kubin and
                  Zdravko Kacic},
  title        = {End-to-End Articulatory Attribute Modeling for Low-Resource Multilingual
                  Speech Recognition},
  booktitle    = {Interspeech 2019, 20th Annual Conference of the International Speech
                  Communication Association, Graz, Austria, 15-19 September 2019},
  pages        = {2145--2149},
  publisher    = {{ISCA}},
  year         = {2019},
  url          = {https://doi.org/10.21437/Interspeech.2019-2092},
  doi          = {10.21437/INTERSPEECH.2019-2092},
  timestamp    = {Sun, 25 Jul 2021 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/interspeech/0010DLSKK19.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{DBLP:conf/interspeech/LiLDSKK19,
  author       = {Sheng Li and
                  Xugang Lu and
                  Chenchen Ding and
                  Peng Shen and
                  Tatsuya Kawahara and
                  Hisashi Kawai},
  editor       = {Gernot Kubin and
                  Zdravko Kacic},
  title        = {Investigating Radical-Based End-to-End Speech Recognition Systems
                  for Chinese Dialects and Japanese},
  booktitle    = {Interspeech 2019, 20th Annual Conference of the International Speech
                  Communication Association, Graz, Austria, 15-19 September 2019},
  pages        = {2200--2204},
  publisher    = {{ISCA}},
  year         = {2019},
  url          = {https://doi.org/10.21437/Interspeech.2019-2104},
  doi          = {10.21437/INTERSPEECH.2019-2104},
  timestamp    = {Sun, 25 Jul 2021 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/interspeech/LiLDSKK19.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{DBLP:conf/interspeech/LuS00K19,
  author       = {Xugang Lu and
                  Peng Shen and
                  Sheng Li and
                  Yu Tsao and
                  Hisashi Kawai},
  editor       = {Gernot Kubin and
                  Zdravko Kacic},
  title        = {Class-Wise Centroid Distance Metric Learning for Acoustic Event Detection},
  booktitle    = {Interspeech 2019, 20th Annual Conference of the International Speech
                  Communication Association, Graz, Austria, 15-19 September 2019},
  pages        = {3614--3618},
  publisher    = {{ISCA}},
  year         = {2019},
  url          = {https://doi.org/10.21437/Interspeech.2019-2271},
  doi          = {10.21437/INTERSPEECH.2019-2271},
  timestamp    = {Fri, 29 Jan 2021 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/interspeech/LuS00K19.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{DBLP:conf/interspeech/LiRLSKK19,
  author       = {Sheng Li and
                  Raj Dabre and
                  Xugang Lu and
                  Peng Shen and
                  Tatsuya Kawahara and
                  Hisashi Kawai},
  editor       = {Gernot Kubin and
                  Zdravko Kacic},
  title        = {Improving Transformer-Based Speech Recognition Systems with Compressed
                  Structure and Speech Attributes Augmentation},
  booktitle    = {Interspeech 2019, 20th Annual Conference of the International Speech
                  Communication Association, Graz, Austria, 15-19 September 2019},
  pages        = {4400--4404},
  publisher    = {{ISCA}},
  year         = {2019},
  url          = {https://doi.org/10.21437/Interspeech.2019-2112},
  doi          = {10.21437/INTERSPEECH.2019-2112},
  timestamp    = {Wed, 04 Aug 2021 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/interspeech/LiRLSKK19.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{DBLP:journals/corr/abs-1912-12011,
  author       = {Xugang Lu and
                  Peng Shen and
                  Sheng Li and
                  Yu Tsao and
                  Hisashi Kawai},
  title        = {Deep progressive multi-scale attention for acoustic event classification},
  journal      = {CoRR},
  volume       = {abs/1912.12011},
  year         = {2019},
  url          = {http://arxiv.org/abs/1912.12011},
  eprinttype    = {arXiv},
  eprint       = {1912.12011},
  timestamp    = {Thu, 14 Oct 2021 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-1912-12011.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{DBLP:conf/icassp/TakashimaLK18,
  author       = {Ryoichi Takashima and
                  Sheng Li and
                  Hisashi Kawai},
  title        = {An Investigation of a Knowledge Distillation Method for {CTC} Acoustic
                  Models},
  booktitle    = {2018 {IEEE} International Conference on Acoustics, Speech and Signal
                  Processing, {ICASSP} 2018, Calgary, AB, Canada, April 15-20, 2018},
  pages        = {5809--5813},
  publisher    = {{IEEE}},
  year         = {2018},
  url          = {https://doi.org/10.1109/ICASSP.2018.8461995},
  doi          = {10.1109/ICASSP.2018.8461995},
  timestamp    = {Wed, 16 Oct 2019 14:14:52 +0200},
  biburl       = {https://dblp.org/rec/conf/icassp/TakashimaLK18.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{DBLP:conf/icassp/TakashimaLK18a,
  author       = {Ryoichi Takashima and
                  Sheng Li and
                  Hisashi Kawai},
  title        = {{CTC} Loss Function with a Unit-Level Ambiguity Penalty},
  booktitle    = {2018 {IEEE} International Conference on Acoustics, Speech and Signal
                  Processing, {ICASSP} 2018, Calgary, AB, Canada, April 15-20, 2018},
  pages        = {5909--5913},
  publisher    = {{IEEE}},
  year         = {2018},
  url          = {https://doi.org/10.1109/ICASSP.2018.8461762},
  doi          = {10.1109/ICASSP.2018.8461762},
  timestamp    = {Wed, 25 Sep 2019 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/icassp/TakashimaLK18a.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{DBLP:conf/interspeech/LuSLTK18,
  author       = {Xugang Lu and
                  Peng Shen and
                  Sheng Li and
                  Yu Tsao and
                  Hisashi Kawai},
  editor       = {B. Yegnanarayana},
  title        = {Temporal Attentive Pooling for Acoustic Event Detection},
  booktitle    = {Interspeech 2018, 19th Annual Conference of the International Speech
                  Communication Association, Hyderabad, India, 2-6 September 2018},
  pages        = {1354--1357},
  publisher    = {{ISCA}},
  year         = {2018},
  url          = {https://doi.org/10.21437/Interspeech.2018-1552},
  doi          = {10.21437/INTERSPEECH.2018-1552},
  timestamp    = {Fri, 21 May 2021 08:16:43 +0200},
  biburl       = {https://dblp.org/rec/conf/interspeech/LuSLTK18.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{DBLP:conf/interspeech/ShenLLK18,
  author       = {Peng Shen and
                  Xugang Lu and
                  Sheng Li and
                  Hisashi Kawai},
  editor       = {B. Yegnanarayana},
  title        = {Feature Representation of Short Utterances Based on Knowledge Distillation
                  for Spoken Language Identification},
  booktitle    = {Interspeech 2018, 19th Annual Conference of the International Speech
                  Communication Association, Hyderabad, India, 2-6 September 2018},
  pages        = {1813--1817},
  publisher    = {{ISCA}},
  year         = {2018},
  url          = {https://doi.org/10.21437/Interspeech.2018-1519},
  doi          = {10.21437/INTERSPEECH.2018-1519},
  timestamp    = {Fri, 29 Jan 2021 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/interspeech/ShenLLK18.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{DBLP:conf/interspeech/LiLTSKK18,
  author       = {Sheng Li and
                  Xugang Lu and
                  Ryoichi Takashima and
                  Peng Shen and
                  Tatsuya Kawahara and
                  Hisashi Kawai},
  editor       = {B. Yegnanarayana},
  title        = {Improving CTC-based Acoustic Model with Very Deep Residual Time-delay
                  Neural Networks},
  booktitle    = {Interspeech 2018, 19th Annual Conference of the International Speech
                  Communication Association, Hyderabad, India, 2-6 September 2018},
  pages        = {3708--3712},
  publisher    = {{ISCA}},
  year         = {2018},
  url          = {https://doi.org/10.21437/Interspeech.2018-1475},
  doi          = {10.21437/INTERSPEECH.2018-1475},
  timestamp    = {Sun, 25 Jul 2021 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/interspeech/LiLTSKK18.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{DBLP:conf/slt/0010LTSKK18,
  author       = {Sheng Li and
                  Xugang Lu and
                  Ryoichi Takashima and
                  Peng Shen and
                  Tatsuya Kawahara and
                  Hisashi Kawai},
  title        = {Improving Very Deep Time-Delay Neural Network With Vertical-Attention
                  For Effectively Training CTC-Based {ASR} Systems},
  booktitle    = {2018 {IEEE} Spoken Language Technology Workshop, {SLT} 2018, Athens,
                  Greece, December 18-21, 2018},
  pages        = {77--83},
  publisher    = {{IEEE}},
  year         = {2018},
  url          = {https://doi.org/10.1109/SLT.2018.8639675},
  doi          = {10.1109/SLT.2018.8639675},
  timestamp    = {Sun, 25 Jul 2021 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/slt/0010LTSKK18.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{DBLP:conf/asru/LiLSTKK17,
  author       = {Sheng Li and
                  Xugang Lu and
                  Peng Shen and
                  Ryoichi Takashima and
                  Tatsuya Kawahara and
                  Hisashi Kawai},
  title        = {Incremental training and constructing the very deep convolutional
                  residual network acoustic models},
  booktitle    = {2017 {IEEE} Automatic Speech Recognition and Understanding Workshop,
                  {ASRU} 2017, Okinawa, Japan, December 16-20, 2017},
  pages        = {222--227},
  publisher    = {{IEEE}},
  year         = {2017},
  url          = {https://doi.org/10.1109/ASRU.2017.8268939},
  doi          = {10.1109/ASRU.2017.8268939},
  timestamp    = {Sun, 25 Jul 2021 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/asru/LiLSTKK17.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{DBLP:conf/icassp/LiLSMK17,
  author       = {Sheng Li and
                  Xugang Lu and
                  Shinsuke Sakai and
                  Masato Mimura and
                  Tatsuya Kawahara},
  title        = {Semi-supervised ensemble {DNN} acoustic model training},
  booktitle    = {2017 {IEEE} International Conference on Acoustics, Speech and Signal
                  Processing, {ICASSP} 2017, New Orleans, LA, USA, March 5-9, 2017},
  pages        = {5270--5274},
  publisher    = {{IEEE}},
  year         = {2017},
  url          = {https://doi.org/10.1109/ICASSP.2017.7953162},
  doi          = {10.1109/ICASSP.2017.7953162},
  timestamp    = {Wed, 16 Oct 2019 14:14:52 +0200},
  biburl       = {https://dblp.org/rec/conf/icassp/LiLSMK17.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{DBLP:conf/interspeech/ShenLLK17,
  author       = {Peng Shen and
                  Xugang Lu and
                  Sheng Li and
                  Hisashi Kawai},
  editor       = {Francisco Lacerda},
  title        = {Conditional Generative Adversarial Nets Classifier for Spoken Language
                  Identification},
  booktitle    = {Interspeech 2017, 18th Annual Conference of the International Speech
                  Communication Association, Stockholm, Sweden, August 20-24, 2017},
  pages        = {2814--2818},
  publisher    = {{ISCA}},
  year         = {2017},
  url          = {https://doi.org/10.21437/Interspeech.2017-553},
  doi          = {10.21437/INTERSPEECH.2017-553},
  timestamp    = {Sun, 06 Aug 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/interspeech/ShenLLK17.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@phdthesis{DBLP:phd/jp/Li16,
  author       = {Sheng Li},
  title        = {Speech Recognition Enhanced by Lightly-supervised and Semi-supervised
                  Acoustic Model Training},
  school       = {Kyoto University, Japan},
  year         = {2016},
  url          = {https://ci.nii.ac.jp/naid/500001007801},
  doi          = {10.14989/DOCTOR.K19849},
  timestamp    = {Tue, 24 Oct 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/phd/jp/Li16.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{DBLP:journals/taslp/LiAK16,
  author       = {Sheng Li and
                  Yuya Akita and
                  Tatsuya Kawahara},
  title        = {Semi-Supervised Acoustic Model Training by Discriminative Data Selection
                  From Multiple {ASR} Systems' Hypotheses},
  journal      = {{IEEE} {ACM} Trans. Audio Speech Lang. Process.},
  volume       = {24},
  number       = {9},
  pages        = {1524--1534},
  year         = {2016},
  url          = {https://doi.org/10.1109/TASLP.2016.2562505},
  doi          = {10.1109/TASLP.2016.2562505},
  timestamp    = {Fri, 13 Mar 2020 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/taslp/LiAK16.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{DBLP:conf/icassp/LiAK16,
  author       = {Sheng Li and
                  Yuya Akita and
                  Tatsuya Kawahara},
  title        = {Data selection from multiple {ASR} systems' hypotheses for unsupervised
                  acoustic model training},
  booktitle    = {2016 {IEEE} International Conference on Acoustics, Speech and Signal
                  Processing, {ICASSP} 2016, Shanghai, China, March 20-25, 2016},
  pages        = {5875--5879},
  publisher    = {{IEEE}},
  year         = {2016},
  url          = {https://doi.org/10.1109/ICASSP.2016.7472804},
  doi          = {10.1109/ICASSP.2016.7472804},
  timestamp    = {Wed, 16 Oct 2019 14:14:52 +0200},
  biburl       = {https://dblp.org/rec/conf/icassp/LiAK16.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{DBLP:conf/iscslp/LiLMAK16,
  author       = {Sheng Li and
                  Xugang Lu and
                  Shinsuke Mori and
                  Yuya Akita and
                  Tatsuya Kawahara},
  title        = {Confidence estimation for speech recognition systems using conditional
                  random fields trained with partially annotated data},
  booktitle    = {10th International Symposium on Chinese Spoken Language Processing,
                  {ISCSLP} 2016, Tianjin, China, October 17-20, 2016},
  pages        = {1--5},
  publisher    = {{IEEE}},
  year         = {2016},
  url          = {https://doi.org/10.1109/ISCSLP.2016.7918419},
  doi          = {10.1109/ISCSLP.2016.7918419},
  timestamp    = {Wed, 16 Oct 2019 14:14:48 +0200},
  biburl       = {https://dblp.org/rec/conf/iscslp/LiLMAK16.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{DBLP:journals/ieicet/LiAK15,
  author       = {Sheng Li and
                  Yuya Akita and
                  Tatsuya Kawahara},
  title        = {Automatic Lecture Transcription Based on Discriminative Data Selection
                  for Lightly Supervised Acoustic Model Training},
  journal      = {{IEICE} Trans. Inf. Syst.},
  volume       = {98-D},
  number       = {8},
  pages        = {1545--1552},
  year         = {2015},
  url          = {https://doi.org/10.1587/transinf.2015EDP7047},
  doi          = {10.1587/TRANSINF.2015EDP7047},
  timestamp    = {Sat, 11 Apr 2020 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/ieicet/LiAK15.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{DBLP:conf/interspeech/LiLAK15,
  author       = {Sheng Li and
                  Xugang Lu and
                  Yuya Akita and
                  Tatsuya Kawahara},
  title        = {Ensemble speaker modeling using speaker adaptive training deep neural
                  network for speaker adaptation},
  booktitle    = {{INTERSPEECH} 2015, 16th Annual Conference of the International Speech
                  Communication Association, Dresden, Germany, September 6-10, 2015},
  pages        = {2892--2896},
  publisher    = {{ISCA}},
  year         = {2015},
  url          = {https://doi.org/10.21437/Interspeech.2015-608},
  doi          = {10.21437/INTERSPEECH.2015-608},
  timestamp    = {Fri, 23 Jun 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/interspeech/LiLAK15.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{DBLP:conf/interspeech/LiAK15,
  author       = {Sheng Li and
                  Yuya Akita and
                  Tatsuya Kawahara},
  title        = {Discriminative data selection for lightly supervised training of acoustic
                  model using closed caption texts},
  booktitle    = {{INTERSPEECH} 2015, 16th Annual Conference of the International Speech
                  Communication Association, Dresden, Germany, September 6-10, 2015},
  pages        = {3526--3530},
  publisher    = {{ISCA}},
  year         = {2015},
  url          = {https://doi.org/10.21437/Interspeech.2015-699},
  doi          = {10.21437/INTERSPEECH.2015-699},
  timestamp    = {Fri, 23 Jun 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/interspeech/LiAK15.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{DBLP:conf/iscslp/LiAK14,
  author       = {Sheng Li and
                  Yuya Akita and
                  Tatsuya Kawahara},
  editor       = {Minghui Dong and
                  Jianhua Tao and
                  Haizhou Li and
                  Thomas Fang Zheng and
                  Yanfeng Lu},
  title        = {Corpus and transcription system of Chinese Lecture Room},
  booktitle    = {The 9th International Symposium on Chinese Spoken Language Processing,
                  Singapore, September 12-14, 2014},
  pages        = {442--445},
  publisher    = {{IEEE}},
  year         = {2014},
  url          = {https://doi.org/10.1109/ISCSLP.2014.6936595},
  doi          = {10.1109/ISCSLP.2014.6936595},
  timestamp    = {Mon, 18 Mar 2024 17:29:23 +0100},
  biburl       = {https://dblp.org/rec/conf/iscslp/LiAK14.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{DBLP:journals/speech/WangCLM12,
  author       = {Lan Wang and
                  Hui Chen and
                  Sheng Li and
                  Helen M. Meng},
  title        = {Phoneme-level articulatory animation in pronunciation training},
  journal      = {Speech Commun.},
  volume       = {54},
  number       = {7},
  pages        = {845--856},
  year         = {2012},
  url          = {https://doi.org/10.1016/j.specom.2012.02.003},
  doi          = {10.1016/J.SPECOM.2012.02.003},
  timestamp    = {Thu, 29 Dec 2022 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/speech/WangCLM12.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{DBLP:conf/interspeech/LiW12,
  author       = {Sheng Li and
                  Lan Wang},
  title        = {Cross Linguistic Comparison of Mandarin and English {EMA} Articulatory
                  Data},
  booktitle    = {{INTERSPEECH} 2012, 13th Annual Conference of the International Speech
                  Communication Association, Portland, Oregon, USA, September 9-13,
                  2012},
  pages        = {903--906},
  publisher    = {{ISCA}},
  year         = {2012},
  url          = {https://doi.org/10.21437/Interspeech.2012-272},
  doi          = {10.21437/INTERSPEECH.2012-272},
  timestamp    = {Fri, 23 Jun 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/interspeech/LiW12.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{DBLP:conf/ialp/LiWQ11,
  author       = {Sheng Li and
                  Lan Wang and
                  En Qi},
  title        = {The Phoneme-Level Articulator Dynamics for Pronunciation Animation},
  booktitle    = {International Conference on Asian Language Processing, {IALP} 2011,
                  Penang, Malaysia, 15-17 November, 2011},
  pages        = {283--286},
  publisher    = {{IEEE} Computer Society},
  year         = {2011},
  url          = {https://doi.org/10.1109/IALP.2011.13},
  doi          = {10.1109/IALP.2011.13},
  timestamp    = {Fri, 24 Mar 2023 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/ialp/LiWQ11.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

a service of

manage site settings

To protect your privacy, all features that rely on external API calls from your browser are turned off by default. You need to opt-in for them to become active. All settings here will be stored as cookies with your web browser. For more information see our F.A.Q.