@ARTICLE{lu2026desta25,
author={Lu, Ke-Han and Chen, Zhehuai and Fu, Szu-Wei and Yang, Chao-Han Huck and Huang, Sung-Feng and Yang, Chih-Kai and Yu, Chee-En and Chen, Chun-Wei and Chen, Wei-Chih and Huang, Chien-yu and Lin, Yi-Cheng and Lin, Yu-Xiang and Fu, Chi-An and Kuan, Chun-Yi and Ren, Wenze and Chen, Xuanjun and Huang, Wei-Ping and Hu, En-Pei and Lin, Tzu-Quan and Wu, Yuan-Kuei and Huang, Kuan-Po and Huang, Hsiao-Ying and Chou, Huang-Cheng and Chang, Kai-Wei and Chiang, Cheng-Han and Ginsburg, Boris and Wang, Yu-Chiang Frank and Lee, Hung-yi},
journal={IEEE Transactions on Audio, Speech and Language Processing},
title={DeSTA2.5-Audio: Toward General-Purpose Large Audio Language Model With Self-Generated Cross-Modal Alignment},
year={2026},
volume={34},
pages={2062--2076},
doi={10.1109/TASLPRO.2026.3675792}
}
@inproceedings{huang2025dynamic,
title={Dynamic-SUPERB Phase-2: A Collaboratively Expanding Benchmark for Measuring the <br> Capabilities of Spoken Language Models with 180 Tasks},
author={Chien-yu Huang and Wei-Chih Chen and Shu-wen Yang and Andy T. Liu and Chen-An Li and Yu-Xiang Lin and Wei-Cheng Tseng and Anuj Diwan and Yi-Jen Shih and Jiatong Shi and William Chen and Chih-Kai Yang and Wenze Ren and Xuanjun Chen and Chi-Yuan Hsiao and Puyuan Peng and Shih-Heng Wang and Chun-Yi Kuan and Ke-Han Lu and Kai-Wei Chang and Fabian Ritter-Gutierrez and Kuan-Po Huang and Siddhant Arora and You-Kuan Lin and Ming To Chuang and Eunjung Yeo and Kalvin Chang and Chung-Ming Chien and Kwanghee Choi and Jun-You Wang and Cheng-Hsiu Hsieh and Yi-Cheng Lin and Chee-En Yu and I-Hsiang Chiu and Heitor R. Guimarães and Jionghao Han and Tzu-Quan Lin and Tzu-Yuan Lin and Homu Chang and Ting-Wu Chang and Chun Wei Chen and Shou-Jen Chen and Yu-Hua Chen and Hsi-Chun Cheng and Kunal Dhawan and Jia-Lin Fang and Shi-Xin Fang and Kuan-Yu Fang Chiang and Chi An Fu and Hsien-Fu Hsiao and Ching Yu Hsu and Shao-Syuan Huang and Lee Chen Wei and Hsi-Che Lin and Hsuan-Hao Lin and Hsuan-Ting Lin and Jian-Ren Lin and Ting-Chun Liu and Li-Chun Lu and Tsung-Min Pai and Ankita Pasad and Shih-Yun Shan Kuan and Suwon Shon and Yuxun Tang and Yun-Shao Tsai and Jui-Chiang Wei and Tzu-Chieh Wei and Chengxi Wu and Dien-Ruei Wu and Chao-Han Huck Yang and Chieh-Chi Yang and Jia Qi Yip and Shao-Xiang Yuan and Vahid Noroozi and Zhehuai Chen and Haibin Wu and Karen Livescu and David Harwath and Shinji Watanabe and Hung-yi Lee},
booktitle={International Conference on Learning Representations},
year={2025},
url={https://openreview.net/forum?id=s7lzZpAW7T}
}
@inproceedings{wu-etal-2024-codec,
title = "Codec-{SUPERB}: An In-Depth Analysis of Sound Codec Models",
author = "Wu, Haibin and Chung, Ho-Lam and Lin, Yi-Cheng and Wu, Yuan-Kuei and Chen, Xuanjun and Pai, Yu-Chi and Wang, Hsiu-Hsuan and Chang, Kai-Wei and Liu, Alexander and Lee, Hung-yi",
editor = "Ku, Lun-Wei and Martins, Andre and Srikumar, Vivek",
booktitle = "Findings of the Association for Computational Linguistics: ACL 2024",
year = "2024",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2024.findings-acl.616/",
doi = "10.18653/v1/2024.findings-acl.616",
pages = "10330--10348",
}
@inproceedings{wu2024codecsuperbslt,
title={Codec-SUPERB @ SLT 2024: A lightweight benchmark for neural codec models},
author={Haibin Wu and Xuanjun Chen and Yi-Cheng Lin and Jiawei Du and Kai-Wei Chang and Ke-Han Lu and Alexander Liu and Ho-Lam Chung and Yuan-Kuei Wu and Dongchao Yang and Songxiang Liu and Yi-Chiao Wu and Xu Tan and James Glass and Shinji Watanabe and Hung-yi Lee},
booktitle={2024 IEEE Spoken Language Technology Workshop (SLT)},
year={2024}
}
@misc{lin2025gpt4o,
title={A Preliminary Exploration with GPT-4o Voice Mode},
author={Yu-Xiang Lin and Chih-Kai Yang and Wei-Chih Chen and Chen-An Li and Chien-yu Huang and Xuanjun Chen and Hung-yi Lee},
year={2025},
eprint={2502.09940},
archivePrefix={arXiv},
primaryClass={cs.CL}
}
@misc{yang2024building,
title={Building a Taiwanese Mandarin Spoken Language Model: A First Attempt},
author={Chih-Kai Yang and Yu-Kuan Fu and Chen-An Li and Yi-Cheng Lin and Yu-Xiang Lin and Wei-Chih Chen and Ho Lam Chung and Chun-Yi Kuan and Wei-Ping Huang and Ke-Han Lu and Tzu-Quan Lin and Hsiu-Hsuan Wang and En-Pei Hu and Chan-Jan Hsu and Liang-Hsuan Tseng and I-Hsiang Chiu and Ulin Sanga and Xuanjun Chen and Po-chun Hsu and Shu-wen Yang and Hung-yi Lee},
year={2024},
eprint={2411.07111},
archivePrefix={arXiv},
primaryClass={cs.CL}
}
@misc{wu2024towards,
title={Towards audio language modeling - an overview},
author={Haibin Wu and Xuanjun Chen and Yi-Cheng Lin and Kai-wei Chang and Ho-Lam Chung and Alexander Liu and Hung-yi Lee},
year={2024},
eprint={2402.13236},
archivePrefix={arXiv},
primaryClass={cs.CL}
}
@misc{cyli2026codarag,
title={CodaRAG: Connecting the Dots with Associativity Inspired by Complementary Learning},
author={Cheng-Yen Li and Xuanjun Chen and Claire Lin and Wei-Yu Chen and Wenhua Nie and Hung-yi Lee and Jyh-Shing Roger Jang},
year={2026},
eprint={2604.10426},
archivePrefix={arXiv},
primaryClass={cs.CL}
}
@misc{chou2026efficient,
title={Only Ask What You Don't Know: Grounded Delta Planning for Efficient Multi-step RAG},
author={Wei-Chieh Chou and Xuanjun Chen and Jian-Ren Lin and Claire Lin and Hung-yi Lee and Jyh-Shing Roger Jang},
year={2026},
note={Submitted to COLM 2026}
}
@inproceedings{lin2025preliminary,
title = "A Preliminary Study of {RAG} for {T}aiwanese Historical Archives",
author = "Lin, Claire and Feng, Bo-Han and Chen, Xuanjun and Yang, Te-Lun and Lee, Hung-yi and Jang, Jyh-Shing Roger",
booktitle = "Proceedings of the 37th Conference on Computational Linguistics and Speech Processing (ROCLING 2025)",
year = "2025",
url = "https://aclanthology.org/2025.rocling-main.6"
}
@article{chen2026mitigating,
title={Mitigating Proxy-to-Wild Domain Gap in Deepfake Speech},
author={Chen, Xuanjun and Wu, Yun-Shing and Lu, Wei-Chung and Lin, Claire and Wu, Haibin and Lee, Hung-yi and Jang, Jyh-Shing Roger},
journal={arXiv preprint arXiv:2606.07494},
year={2026}
}
@article{chen2026joint,
title={Joint Fullband-Subband Modeling for High-Resolution SingFake Detection},
author={Chen, Xuanjun and Hu, Chia-Yu and Huang, Sung-Feng and Wu, Haibin and Lee, Hung-yi and Jang, Jyh-Shing Roger},
journal={arXiv preprint arXiv:2604.04841},
year={2026}
}
@ARTICLE{chen2026codecfake,
author={Chen, Xuanjun and Du, Jiawei and Wu, Haibin and Zhang, Lin and Lin, I-Ming and Chiu, I-Hsiang and Ren, Wenze and Tseng, Yuan and Tsao, Yu and Jang, Jyh-Shing Roger and Lee, Hung-yi},
journal={IEEE Transactions on Audio, Speech and Language Processing},
title={CodecFake+: Codec-Based Resynthesized Data as a Proxy for Detecting CodecFake Speech},
year={2026},
volume={34},
pages={2929--2944},
doi={10.1109/TASLPRO.2026.3692291}
}
@misc{chen2025localizing,
title={Localizing Audio-Visual Deepfakes via Hierarchical Boundary Modeling},
author={Xuanjun Chen and Shih-Peng Cheng and Jiawei Du and Lin Zhang and Xiaoxiao Miao and Chung-Che Wang and Haibin Wu and Hung-yi Lee and Jyh-Shing Roger Jang},
year={2025},
eprint={2508.02000},
archivePrefix={arXiv},
primaryClass={cs.CV}
}
@misc{chen2025how,
title={How Does Instrumental Music Help SingFake Detection?},
author={Xuanjun Chen and Chia-Yu Hu and I-Ming Lin and Yi-Cheng Lin and I-Hsiang Chiu and You Zhang and Sung-Feng Huang and Yi-Hsuan Yang and Haibin Wu and Hung-yi Lee and Jyh-Shing Roger Jang},
year={2025},
eprint={2509.14675},
archivePrefix={arXiv},
primaryClass={cs.SD}
}
@inproceedings{chen2025towards,
title={Towards Generalized Source Tracing for Codec-Based Deepfake Speech},
author={Xuanjun Chen and I-Ming Lin and Lin Zhang and Haibin Wu and Hung-yi Lee and Jyh-Shing Roger Jang},
booktitle={2025 IEEE Automatic Speech Recognition and Understanding Workshop (ASRU)},
year={2025}
}
@inproceedings{chen2025codecbased,
title={Codec-Based Deepfake Source Tracing via Neural Audio Codec Taxonomy},
author={Xuanjun Chen and I-Ming Lin and Lin Zhang and Jiawei Du and Haibin Wu and Hung-yi Lee and Jyh-Shing Roger Jang},
booktitle={Interspeech 2025},
year={2025},
url={https://www.isca-archive.org/interspeech_2025/chen25j_interspeech.pdf}
}
@inproceedings{chen2024singing,
title={Singing Voice Graph Modeling for SingFake Detection},
author={Xuanjun Chen and Haibin Wu and Jyh-Shing Roger Jang and Hung-yi Lee},
booktitle={Interspeech 2024},
year={2024},
url={https://www.isca-archive.org/interspeech_2024/chen24o_interspeech.pdf}
}
@inproceedings{chen2024neural,
title={Neural Codec-based Adversarial Sample Detection for Speaker Verification},
author={Xuanjun Chen and Jiawei Du and Haibin Wu and Jyh-Shing Roger Jang and Hung-yi Lee},
booktitle={Interspeech 2024},
year={2024},
url={https://www.isca-archive.org/interspeech_2024/chen24p_interspeech.pdf}
}
@inproceedings{du2024dfadd,
title={DFADD: The Diffusion and Flow-Matching based Audio Deepfake Dataset},
author={Jiawei Du and I-Ming Lin and I-Hsiang Chiu and Xuanjun Chen and Haibin Wu and Wenze Ren and Yu Tsao and Hung-yi Lee and Jyh-Shing Roger Jang},
booktitle={2024 IEEE Spoken Language Technology Workshop (SLT)},
year={2024}
}
@inproceedings{chen2024multimodal,
title={Multimodal Transformer Distillation for Audio-Visual Synchronization},
author={Xuanjun Chen and Haibin Wu and Chung-Che Wang and Hung-yi Lee and Jyh-Shing Roger Jang},
booktitle={ICASSP 2024 - 2024 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)},
year={2024},
pages={1--5},
doi={10.1109/ICASSP48485.2024.10446372}
}
@inproceedings{chen2022push,
title={Push-Pull: Characterizing the Adversarial Robustness for Audio-Visual Active Speaker Detection},
author={Xuanjun Chen and Haibin Wu and Helen Meng and Hung-yi Lee and Jyh-Shing Roger Jang},
booktitle={2022 IEEE Spoken Language Technology Workshop (SLT)},
year={2022},
pages={1--8},
doi={10.1109/SLT54892.2023.10022646}
}
@inproceedings{liao2022adversarial,
title={Adversarial Speaker Distillation for Countermeasure Model on Automatic Speaker Verification},
author={Xuanjun Chen and Yen-Lun Liao and Chung-Che Wang and Jyh-Shing Roger Jang},
booktitle={Proc. 2022 ISCA Symposium on Security and Privacy in Speech Communication},
year={2022},
url={https://www.isca-speech.org/archive/spsc_2022/liao22_spsc.html}
}
@misc{lee2026ssmttm,
title={Training-Efficient Text-to-Music Generation with State-Space Modeling},
author={Wei-Jaw Lee and Fang-Chih Hsieh and Xuanjun Chen and Fang-Duo Tsai and Yi-Hsuan Yang},
year={2026},
eprint={2601.14786},
archivePrefix={arXiv},
primaryClass={cs.SD}
}
@inproceedings{ren2025leveraging,
title={Leveraging Joint Spectral and Spatial Learning with MAMBA for Multichannel Speech Enhancement},
author={Ren, Wenze and Wu, Haibin and Lin, Yi-Cheng and Chen, Xuanjun and Chao, Rong and Hung, Kuo-Hsuan and Li, You-Jin and Ting, Wen-Yuan and Wang, Hsin-Min and Tsao, Yu},
booktitle={ICASSP 2025 - 2025 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)},
year={2025},
doi={10.1109/ICASSP49660.2025.10890412}
}
@inproceedings{lin2024singer,
title={Singer Separation for Karaoke Content Generation},
author={Lin, Hsuan-Yu and Chen, Xuanjun and Jang, Jyh-Shing Roger},
booktitle={2024 27th Conference of the Oriental COCOSDA International Committee for the Co-ordination and Standardisation of Speech Databases and Assessment Techniques (O-COCOSDA)},
year={2024},
doi={10.1109/O-COCOSDA64382.2024.10800383}
}