Research | Ye Zhu

2025

The Silent Assistant: NoiseQuery as Implicit Guidance for Goal-Driven Image Generation

Ruoyu Wang, Huayang Huang, Ye Zhu, Olga Russakovsky, and Yu Wu

In International Conference on Computer Vision (ICCV), 2025

@inproceedings{ywang2025silent,
  title = {The Silent Assistant: NoiseQuery as Implicit Guidance for Goal-Driven Image Generation},
  author = {Wang, Ruoyu and Huang, Huayang and Zhu, Ye and Russakovsky, Olga and Wu, Yu},
  booktitle = {International Conference on Computer Vision (ICCV)},
  year = {2025},
}

D^3: Scaling Up Deepfake Detection by Learning from Discrepancy

Yongqi Yang, Zhihao Qian, Ye Zhu, Olga Russakovsky, and Yu Wu

In IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR), 2025

Bib PDF Code

@inproceedings{yang2025d,
  title = {D^3: Scaling Up Deepfake Detection by Learning from Discrepancy},
  author = {Yang, Yongqi and Qian, Zhihao and Zhu, Ye and Russakovsky, Olga and Wu, Yu},
  booktitle = {IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)},
  year = {2025},
}

Exploring Magnetic Fields in Molecular Clouds through Denoising Diffusion Probabilistic Models

Duo Xu, Jenna Karcheski, Chi-Yan Law, Ye Zhu, Chia-Jung Hsu, and Jonathan C Tan

The Astrophysical Journal (APJ), 2025

Bib PDF Code

@article{xu2025exploring,
  title = {Exploring Magnetic Fields in Molecular Clouds through Denoising Diffusion Probabilistic Models},
  author = {Xu, Duo and Karcheski, Jenna and Law, Chi-Yan and Zhu, Ye and Hsu, Chia-Jung and Tan, Jonathan C},
  journal = {The Astrophysical Journal (APJ)},
  year = {2025},
}

2024

CV/ML

SOWing Information: Cultivating Contextual Coherence with MLLMs in Image Generation

Yuhan Pei, Ruoyu Wang, Yongqi Yang, Ye Zhu, Olga Russakovsky, and Yu Wu

arXiv preprint arXiv:2411.19182, 2024

Bib Page PDF

@article{pei2024sowing,
  title = {SOWing Information: Cultivating Contextual Coherence with MLLMs in Image Generation},
  author = {Pei, Yuhan and Wang, Ruoyu and Yang, Yongqi and Zhu, Ye and Russakovsky, Olga and Wu, Yu},
  journal = {arXiv preprint arXiv:2411.19182},
  year = {2024},
}

Vision + X: A Survey on Multimodal Learning in the Light of Data

Ye Zhu, Yu Wu, Nicu Sebe, and Yan Yan

IEEE Transactions on Pattern Analysis and Machine Intelligence (TPAMI), 2024

Bib PDF

@article{zhu2022vision,
  title = {Vision + X: A Survey on Multimodal Learning in the Light of Data},
  author = {Zhu, Ye and Wu, Yu and Sebe, Nicu and Yan, Yan},
  journal = {IEEE Transactions on Pattern Analysis and Machine Intelligence (TPAMI)},
  year = {2024},
}

CV/ML

Discovery and Expansion of New Domains within Diffusion Models

Ye Zhu, Yu Wu, Duo Xu, Zhiwei Deng, Olga Russakovsky, and Yan Yan

arXiv preprint arXiv:2310.09213, 2024

Bib PDF Code

@article{zhu2024discovery,
  title = {Discovery and Expansion of New Domains within Diffusion Models},
  author = {Zhu, Ye and Wu, Yu and Xu, Duo and Deng, Zhiwei and Russakovsky, Olga and Yan, Yan},
  journal = {arXiv preprint arXiv:2310.09213},
  year = {2024},
}

What is Dataset Distillation Learning?

William Yang, Ye Zhu, Zhiwei Deng, and Olga Russakovsky

In International Conference on Machine Learning (ICML), 2024

Bib PDF Code

@inproceedings{yang2024what,
  title = {What is Dataset Distillation Learning?},
  author = {Yang, William and Zhu, Ye and Deng, Zhiwei and Russakovsky, Olga},
  booktitle = {International Conference on Machine Learning (ICML)},
  year = {2024},
}

Surveying image segmentation approaches in astronomy

Duo Xu, and Ye Zhu

Astronomy and Computing, 2024

Bib PDF

@article{xu2024surveying,
  title = {Surveying image segmentation approaches in astronomy},
  author = {Xu, Duo and Zhu, Ye},
  journal = {Astronomy and Computing},
  pages = {100838},
  year = {2024},
  publisher = {Elsevier},
}

Diffusion in Diffusion: Cyclic One-Way Diffusion for Text-Vision-Conditioned Generation

Ruoyu Wang*, Yongqi Yang*, Zhihao Qian, Ye Zhu, and Yu Wu

In The Eleventh International Conference on Learning Representations (ICLR), 2024

Bib Page PDF Code

@inproceedings{wang2024diffusion,
  title = {Diffusion in Diffusion: Cyclic One-Way Diffusion for Text-Vision-Conditioned Generation},
  author = {Wang*, Ruoyu and Yang*, Yongqi and Qian, Zhihao and Zhu, Ye and Wu, Yu},
  booktitle = {The Eleventh International Conference on Learning Representations (ICLR)},
  year = {2024},
}

Mining and Unifying Heterogeneous Contrastive Relations for Weakly-Supervised Actor-Action Segmentation

Bin Duan, Hao Tang, Changchang Sun, Ye Zhu, and Yan Yan

In IEEE/CVF Winter Conference on Applications of Computer Vision (WACV), 2024

Bib PDF

@inproceedings{duan2024mining,
  title = {Mining and Unifying Heterogeneous Contrastive Relations for Weakly-Supervised Actor-Action Segmentation},
  author = {Duan, Bin and Tang, Hao and Sun, Changchang and Zhu, Ye and Yan, Yan},
  booktitle = {IEEE/CVF Winter Conference on Applications of Computer Vision (WACV)},
  year = {2024},
}

2023

CV/ML

DETER: Detecting Edited Regions for Deterring Generative Manipulations

Sai Wang*, Ye Zhu*, Ruoyu Wang, Amaya Dharmasiri, Olga Russakovsky, and Yu Wu

arXiv preprint arXiv:2312.10539, 2023

Bib Page PDF

@article{wang2023deter,
  title = {DETER: Detecting Edited Regions for Deterring Generative Manipulations},
  author = {Wang*, Sai and Zhu*, Ye and Wang, Ruoyu and Dharmasiri, Amaya and Russakovsky, Olga and Wu, Yu},
  journal = {arXiv preprint arXiv:2312.10539},
  year = {2023},
}

Boundary Guided Learning-Free Semantic Control with Diffusion Models

Ye Zhu, Yu Wu, Zhiwei Deng, Olga Russakovsky, and Yan Yan

In Thirty-seventh Conference on Neural Information Processing Systems (NeurIPS), 2023

Bib Page PDF Code

@inproceedings{zhu2023boundary,
  title = {Boundary Guided Learning-Free Semantic Control with Diffusion Models},
  author = {Zhu, Ye and Wu, Yu and Deng, Zhiwei and Russakovsky, Olga and Yan, Yan},
  booktitle = {Thirty-seventh Conference on Neural Information Processing Systems (NeurIPS)},
  year = {2023},
}

Discrete Contrastive Diffusion for Cross-Modal Music and Image Generation

Ye Zhu, Yu Wu, Kyle Olszewski, Jian Ren, Sergey Tulyakov, and Yan Yan

In The Eleventh International Conference on Learning Representations (ICLR), 2023

Bib Page PDF Code

@inproceedings{zhu2023discrete,
  title = {Discrete Contrastive Diffusion for Cross-Modal Music and Image Generation},
  author = {Zhu, Ye and Wu, Yu and Olszewski, Kyle and Ren, Jian and Tulyakov, Sergey and Yan, Yan},
  booktitle = {The Eleventh International Conference on Learning Representations (ICLR)},
  year = {2023},
}

Denoising Diffusion Probabilistic Models to Predict the Density of Molecular Clouds

Duo Xu, Jonathan C Tan, Chia-Jung Hsu, and Ye Zhu

The Astrophysical Journal (APJ), 2023

Bib PDF

@article{xu2023denoising,
  title = {Denoising Diffusion Probabilistic Models to Predict the Density of Molecular Clouds},
  author = {Xu, Duo and Tan, Jonathan C and Hsu, Chia-Jung and Zhu, Ye},
  journal = {The Astrophysical Journal (APJ)},
  volume = {950},
  number = {2},
  pages = {146},
  year = {2023},
  publisher = {IOP Publishing},
}

Discrete Diffusion Reward Guidance Methods for Offline Reinforcement Learning

Matthew Coleman, Olga Russakovsky, Christine Allen-Blanchette, and Ye Zhu

In ICML 2023 Workshop: Sampling and Optimization in Discrete Space, 2023

Bib PDF

@inproceedings{coleman2023discrete,
  title = {Discrete Diffusion Reward Guidance Methods for Offline Reinforcement Learning},
  author = {Coleman, Matthew and Russakovsky, Olga and Allen-Blanchette, Christine and Zhu, Ye},
  booktitle = {ICML 2023 Workshop: Sampling and Optimization in Discrete Space},
  year = {2023},
}

2022

Quantized GAN for Complex Music Generation from Dance Videos

Ye Zhu, Kyle Olszewski, Yu Wu, Panos Achlioptas, Menglei Chai, Yan Yan, and Sergey Tulyakov

In European Conference on Computer Vision (ECCV), 2022

Bib Page PDF Code

@inproceedings{zhu2022quantized,
  title = {Quantized GAN for Complex Music Generation from Dance Videos},
  author = {Zhu, Ye and Olszewski, Kyle and Wu, Yu and Achlioptas, Panos and Chai, Menglei and Yan, Yan and Tulyakov, Sergey},
  booktitle = {European Conference on Computer Vision (ECCV)},
  pages = {182--199},
  year = {2022},
  organization = {Springer},
}

Saying the Unseen: Video Descriptions via Dialog Agents

Ye Zhu, Yu Wu, Yi Yang, and Yan Yan

IEEE Transactions on Pattern Analysis and Machine Intelligence (TPAMI), 2022

Bib PDF

@article{9468337,
  author = {Zhu, Ye and Wu, Yu and Yang, Yi and Yan, Yan},
  journal = {IEEE Transactions on Pattern Analysis and Machine Intelligence (TPAMI)},
  title = {Saying the Unseen: Video Descriptions via Dialog Agents},
  year = {2022},
  volume = {44},
  number = {10},
  pages = {7190-7204},
  doi = {10.1109/TPAMI.2021.3093360},
}

Skeleton sequence and RGB frame based multi-modality feature fusion network for action recognition

Xiaoguang Zhu, Ye Zhu, Haoyu Wang, Honglin Wen, Yan Yan, and Peilin Liu

ACM Transactions on Multimedia Computing, Communications, and Applications (TOMM), 2022

Bib PDF

@article{zhu2022skeleton,
  title = {Skeleton sequence and RGB frame based multi-modality feature fusion network for action recognition},
  author = {Zhu, Xiaoguang and Zhu, Ye and Wang, Haoyu and Wen, Honglin and Yan, Yan and Liu, Peilin},
  journal = {ACM Transactions on Multimedia Computing, Communications, and Applications (TOMM)},
  volume = {18},
  number = {3},
  pages = {1--24},
  year = {2022},
  publisher = {ACM New York, NY},
}

2021

Learning audio-visual correlations from variational cross-modal generation

Ye Zhu, Yu Wu, Hugo Latapie, Yi Yang, and Yan Yan

In ICASSP 2021-2021 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), 2021

Bib PDF

@inproceedings{zhu2021learning,
  title = {Learning audio-visual correlations from variational cross-modal generation},
  author = {Zhu, Ye and Wu, Yu and Latapie, Hugo and Yang, Yi and Yan, Yan},
  booktitle = {ICASSP 2021-2021 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)},
  pages = {4300--4304},
  year = {2021},
  organization = {IEEE},
}

2020

Describing unseen videos via multi-modal cooperative dialog agents

Ye Zhu, Yu Wu, Yi Yang, and Yan Yan

In European Conference on Computer Vision (ECCV), 2020

Bib PDF Code

@inproceedings{zhu2020describing,
  title = {Describing unseen videos via multi-modal cooperative dialog agents},
  author = {Zhu, Ye and Wu, Yu and Yang, Yi and Yan, Yan},
  booktitle = {European Conference on Computer Vision (ECCV)},
  pages = {153--169},
  year = {2020},
  organization = {Springer},
}

Hierarchical HMM for eye movement classification

Ye Zhu, Yan Yan, and Oleg Komogortsev

In European Conference on Computer Vision Workshop (ECCV Workshop), 2020

Bib PDF

@inproceedings{zhu2020hierarchical,
  title = {Hierarchical HMM for eye movement classification},
  author = {Zhu, Ye and Yan, Yan and Komogortsev, Oleg},
  booktitle = {European Conference on Computer Vision Workshop (ECCV Workshop)},
  pages = {544--554},
  year = {2020},
  organization = {Springer},
}