2024 CV/ML Vision + X: A Survey on Multimodal Learning in the Light of Data Ye Zhu, Yu Wu, Nicu Sebe, and Yan Yan IEEE Transactions on Pattern Analysis and Machine Intelligence (TPAMI), 2024 Bib PDF @article{zhu2022vision, title = {Vision + X: A Survey on Multimodal Learning in the Light of Data}, author = {Zhu, Ye and Wu, Yu and Sebe, Nicu and Yan, Yan}, journal = {IEEE Transactions on Pattern Analysis and Machine Intelligence (TPAMI)}, year = {2024}, } Discovery and Expansion of New Domains within Diffusion Models Ye Zhu, Yu Wu, Duo Xu, Zhiwei Deng, Olga Russakovsky, and Yan Yan arXiv preprint arXiv:2310.09213, 2024 Bib PDF Code @article{zhu2024discovery, title = {Discovery and Expansion of New Domains within Diffusion Models}, author = {Zhu, Ye and Wu, Yu and Xu, Duo and Deng, Zhiwei and Russakovsky, Olga and Yan, Yan}, journal = {arXiv preprint arXiv:2310.09213}, year = {2024}, } What is Dataset Distillation Learning? William Yang, Ye Zhu, Zhiwei Deng, and Olga Russakovsky In The Forty-first International Conference on Machine Learning (ICML), 2024 Bib PDF @inproceedings{yang2024what, title = {What is Dataset Distillation Learning?}, author = {Yang, William and Zhu, Ye and Deng, Zhiwei and Russakovsky, Olga}, booktitle = {The Forty-first International Conference on Machine Learning (ICML)}, year = {2024}, } ML/Physics Surveying image segmentation approaches in astronomy Duo Xu, and Ye Zhu Astronomy and Computing, 2024 Bib PDF @article{xu2024surveying, title = {Surveying image segmentation approaches in astronomy}, author = {Xu, Duo and Zhu, Ye}, journal = {Astronomy and Computing}, pages = {100838}, year = {2024}, publisher = {Elsevier}, } CV/ML D^3: Scaling Up Deepfake Detection by Learning from Discrepancy Yongqi Yang, Zhihao Qian, Ye Zhu, and Yu Wu arXiv preprint arXiv:2404.04584, 2024 Bib PDF @article{yang2024d, title = {D^3: Scaling Up Deepfake Detection by Learning from Discrepancy}, author = {Yang, Yongqi and Qian, Zhihao and Zhu, Ye and Wu, Yu}, journal = {arXiv preprint arXiv:2404.04584}, year = {2024}, } Diffusion in Diffusion: Cyclic One-Way Diffusion for Text-Vision-Conditioned Generation Ruoyu Wang*, Yongqi Yang*, Zhihao Qian, Ye Zhu, and Yu Wu In The Eleventh International Conference on Learning Representations (ICLR), 2024 Bib Page PDF Code @inproceedings{wang2024diffusion, title = {Diffusion in Diffusion: Cyclic One-Way Diffusion for Text-Vision-Conditioned Generation}, author = {Wang*, Ruoyu and Yang*, Yongqi and Qian, Zhihao and Zhu, Ye and Wu, Yu}, booktitle = {The Eleventh International Conference on Learning Representations (ICLR)}, year = {2024}, } CV/ML Mining and Unifying Heterogeneous Contrastive Relations for Weakly-Supervised Actor-Action Segmentation Bin Duan, Hao Tang, Changchang Sun, Ye Zhu, and Yan Yan In Proceedings of the IEEE/CVF Winter Conference on Applications of Computer Vision (WACV), 2024 Bib PDF @inproceedings{duan2024mining, title = {Mining and Unifying Heterogeneous Contrastive Relations for Weakly-Supervised Actor-Action Segmentation}, author = {Duan, Bin and Tang, Hao and Sun, Changchang and Zhu, Ye and Yan, Yan}, booktitle = {Proceedings of the IEEE/CVF Winter Conference on Applications of Computer Vision (WACV)}, year = {2024}, } 2023 Preprint DETER: Detecting Edited Regions for Deterring Generative Manipulations Sai Wang*, Ye Zhu*, Ruoyu Wang, Amaya Dharmasiri, Olga Russakovsky, and Yu Wu arXiv preprint arXiv:2312.10539, 2023 Bib Page PDF @article{wang2023deter, title = {DETER: Detecting Edited Regions for Deterring Generative Manipulations}, author = {Wang*, Sai and Zhu*, Ye and Wang, Ruoyu and Dharmasiri, Amaya and Russakovsky, Olga and Wu, Yu}, journal = {arXiv preprint arXiv:2312.10539}, year = {2023}, } Boundary Guided Learning-Free Semantic Control with Diffusion Models Ye Zhu, Yu Wu, Zhiwei Deng, Olga Russakovsky, and Yan Yan In Thirty-seventh Conference on Neural Information Processing Systems (NeurIPS), 2023 Bib Page PDF Code @inproceedings{zhu2023boundary, title = {Boundary Guided Learning-Free Semantic Control with Diffusion Models}, author = {Zhu, Ye and Wu, Yu and Deng, Zhiwei and Russakovsky, Olga and Yan, Yan}, booktitle = {Thirty-seventh Conference on Neural Information Processing Systems (NeurIPS)}, year = {2023}, } Discrete Contrastive Diffusion for Cross-Modal Music and Image Generation Ye Zhu, Yu Wu, Kyle Olszewski, Jian Ren, Sergey Tulyakov, and Yan Yan In The Eleventh International Conference on Learning Representations (ICLR), 2023 Bib Page PDF Code @inproceedings{zhu2023discrete, title = {Discrete Contrastive Diffusion for Cross-Modal Music and Image Generation}, author = {Zhu, Ye and Wu, Yu and Olszewski, Kyle and Ren, Jian and Tulyakov, Sergey and Yan, Yan}, booktitle = {The Eleventh International Conference on Learning Representations (ICLR)}, year = {2023}, } Denoising Diffusion Probabilistic Models to Predict the Density of Molecular Clouds Duo Xu, Jonathan C Tan, Chia-Jung Hsu, and Ye Zhu The Astrophysical Journal, 2023 Bib PDF @article{xu2023denoising, title = {Denoising Diffusion Probabilistic Models to Predict the Density of Molecular Clouds}, author = {Xu, Duo and Tan, Jonathan C and Hsu, Chia-Jung and Zhu, Ye}, journal = {The Astrophysical Journal}, volume = {950}, number = {2}, pages = {146}, year = {2023}, publisher = {IOP Publishing}, } ML/RL Discrete Diffusion Reward Guidance Methods for Offline Reinforcement Learning Matthew Coleman, Olga Russakovsky, Christine Allen-Blanchette, and Ye Zhu In ICML 2023 Workshop: Sampling and Optimization in Discrete Space, 2023 Bib PDF @inproceedings{coleman2023discrete, title = {Discrete Diffusion Reward Guidance Methods for Offline Reinforcement Learning}, author = {Coleman, Matthew and Russakovsky, Olga and Allen-Blanchette, Christine and Zhu, Ye}, booktitle = {ICML 2023 Workshop: Sampling and Optimization in Discrete Space}, year = {2023}, } 2022 Quantized GAN for Complex Music Generation from Dance Videos Ye Zhu, Kyle Olszewski, Yu Wu, Panos Achlioptas, Menglei Chai, Yan Yan, and Sergey Tulyakov In European Conference on Computer Vision (ECCV), 2022 Bib Page PDF Code @inproceedings{zhu2022quantized, title = {Quantized GAN for Complex Music Generation from Dance Videos}, author = {Zhu, Ye and Olszewski, Kyle and Wu, Yu and Achlioptas, Panos and Chai, Menglei and Yan, Yan and Tulyakov, Sergey}, booktitle = {European Conference on Computer Vision (ECCV)}, pages = {182--199}, year = {2022}, organization = {Springer}, } CV/ML Saying the Unseen: Video Descriptions via Dialog Agents Ye Zhu, Yu Wu, Yi Yang, and Yan Yan IEEE Transactions on Pattern Analysis and Machine Intelligence (TPAMI), 2022 Bib PDF @article{9468337, author = {Zhu, Ye and Wu, Yu and Yang, Yi and Yan, Yan}, journal = {IEEE Transactions on Pattern Analysis and Machine Intelligence (TPAMI)}, title = {Saying the Unseen: Video Descriptions via Dialog Agents}, year = {2022}, volume = {44}, number = {10}, pages = {7190-7204}, doi = {10.1109/TPAMI.2021.3093360}, } CV/ML Skeleton sequence and RGB frame based multi-modality feature fusion network for action recognition Xiaoguang Zhu, Ye Zhu, Haoyu Wang, Honglin Wen, Yan Yan, and Peilin Liu ACM Transactions on Multimedia Computing, Communications, and Applications (TOMM), 2022 Bib PDF @article{zhu2022skeleton, title = {Skeleton sequence and RGB frame based multi-modality feature fusion network for action recognition}, author = {Zhu, Xiaoguang and Zhu, Ye and Wang, Haoyu and Wen, Honglin and Yan, Yan and Liu, Peilin}, journal = {ACM Transactions on Multimedia Computing, Communications, and Applications (TOMM)}, volume = {18}, number = {3}, pages = {1--24}, year = {2022}, publisher = {ACM New York, NY}, } 2021 CV/ML Learning audio-visual correlations from variational cross-modal generation Ye Zhu, Yu Wu, Hugo Latapie, Yi Yang, and Yan Yan In ICASSP 2021-2021 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), 2021 Bib PDF @inproceedings{zhu2021learning, title = {Learning audio-visual correlations from variational cross-modal generation}, author = {Zhu, Ye and Wu, Yu and Latapie, Hugo and Yang, Yi and Yan, Yan}, booktitle = {ICASSP 2021-2021 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)}, pages = {4300--4304}, year = {2021}, organization = {IEEE}, } 2020 CV/ML Describing unseen videos via multi-modal cooperative dialog agents Ye Zhu, Yu Wu, Yi Yang, and Yan Yan In European Conference on Computer Vision (ECCV), 2020 Bib PDF Code @inproceedings{zhu2020describing, title = {Describing unseen videos via multi-modal cooperative dialog agents}, author = {Zhu, Ye and Wu, Yu and Yang, Yi and Yan, Yan}, booktitle = {European Conference on Computer Vision (ECCV)}, pages = {153--169}, year = {2020}, organization = {Springer}, } CV/ML Hierarchical HMM for eye movement classification Ye Zhu, Yan Yan, and Oleg Komogortsev In European Conference on Computer Vision Workshop (ECCV Workshop), 2020 Bib PDF @inproceedings{zhu2020hierarchical, title = {Hierarchical HMM for eye movement classification}, author = {Zhu, Ye and Yan, Yan and Komogortsev, Oleg}, booktitle = {European Conference on Computer Vision Workshop (ECCV Workshop)}, pages = {544--554}, year = {2020}, organization = {Springer}, }