Skip to content

Citations

BuzzASR (this work)

@misc{buzzasr2026,
  title  = {BuzzASR: A Swarm of 100+ Monolingual Speech Recognition Models},
  author = {Anonymous},
  year   = {2026},
  note   = {Anonymous ACL submission}
}

Whisper

@inproceedings{radford2023whisper,
  title     = {Robust Speech Recognition via Large-Scale Weak Supervision},
  author    = {Radford, Alec and Kim, Jong Wook and Xu, Tao and Brockman, Greg
               and McLeavey, Christine and Sutskever, Ilya},
  booktitle = {Proceedings of the 40th International Conference on Machine Learning},
  year      = {2023}
}

FLEURS

@inproceedings{conneau2023fleurs,
  title     = {FLEURS: Few-shot Learning Evaluation of Universal Representations of Speech},
  author    = {Conneau, Alexis and Ma, Min and Khanuja, Simran and Zhang, Yu
               and Axelrod, Vera and Dalmia, Siddharth and Riesa, Jason and Rivera, Clara
               and Bapna, Ankur},
  booktitle = {2022 IEEE Spoken Language Technology Workshop (SLT)},
  pages     = {798--805},
  year      = {2023}
}

CommonVoice

@inproceedings{ardila2020commonvoice,
  title     = {Common Voice: A Massively-Multilingual Speech Corpus},
  author    = {Ardila, Rosana and Branson, Megan and Davis, Kelly and Kohler, Michael
               and Meyer, Josh and Henretty, Michael and Morais, Reuben and Saunders, Lindsay
               and Tyers, Francis and Weber, Gregor},
  booktitle = {Proceedings of the Twelfth Language Resources and Evaluation Conference},
  pages     = {4218--4222},
  year      = {2020}
}

Goldfish text corpora

@inproceedings{chang2026goldfish,
  title     = {Goldfish: Monolingual Language Models for 350 Languages},
  author    = {Chang, Tyler A. and Arnett, Catherine and Tu, Zhuowen and Bergen, Benjamin K.},
  booktitle = {LREC},
  year      = {2026}
}

HuggingFace tokenizers

@misc{huggingface2020tokenizers,
  title  = {Tokenizers: Fast State-of-the-Art Tokenizers Optimized for Research and Production},
  author = {{HuggingFace}},
  year   = {2020},
  url    = {https://github.com/huggingface/tokenizers}
}