Research | Ye Zhu

2024

Diffusion in Diffusion: Cyclic One-Way Diffusion for Text-Vision-Conditioned Generation

Ruoyu Wang*, Yongqi Yang*, Zhihao Qian, Ye Zhu, and Yu Wu

In The Eleventh International Conference on Learning Representations (ICLR), 2024

@inproceedings{yang2024diffusion,
  title = {Diffusion in Diffusion: Cyclic One-Way Diffusion for Text-Vision-Conditioned Generation},
  author = {Wang*, Ruoyu and Yang*, Yongqi and Qian, Zhihao and Zhu, Ye and Wu, Yu},
  booktitle = {The Eleventh International Conference on Learning Representations (ICLR)},
  year = {2024},
}

CV/ML

Mining and Unifying Heterogeneous Contrastive Relations for Weakly-Supervised Actor-Action Segmentation

Bin Duan, Hao Tang, Changchang Sun, Ye Zhu, and Yan Yan

In Proceedings of the IEEE/CVF Winter Conference on Applications of Computer Vision (WACV), 2024

Bib PDF

@inproceedings{duan2024mining,
  title = {Mining and Unifying Heterogeneous Contrastive Relations for Weakly-Supervised Actor-Action Segmentation},
  author = {Duan, Bin and Tang, Hao and Sun, Changchang and Zhu, Ye and Yan, Yan},
  booktitle = {Proceedings of the IEEE/CVF Winter Conference on Applications of Computer Vision (WACV)},
  year = {2024},
}

2023

Preprint

DETER: Detecting Edited Regions for Deterring Generative Manipulations

Sai Wang*, Ye Zhu*, Ruoyu Wang, Amaya Dharmasiri, Olga Russakovsky, and Yu Wu

arXiv preprint arXiv:2312.10539, 2023

Bib Page PDF

@article{wang2023deter,
  title = {DETER: Detecting Edited Regions for Deterring Generative Manipulations},
  author = {Wang*, Sai and Zhu*, Ye and Wang, Ruoyu and Dharmasiri, Amaya and Russakovsky, Olga and Wu, Yu},
  journal = {arXiv preprint arXiv:2312.10539},
  year = {2023},
}

Preprint

Unseen Image Synthesis with Diffusion Models

Ye Zhu, Yu Wu, Zhiwei Deng, Olga Russakovsky, and Yan Yan

arXiv preprint arXiv:2310.09213, 2023

Bib PDF

@article{zhu2023unseen,
  title = {Unseen Image Synthesis with Diffusion Models},
  author = {Zhu, Ye and Wu, Yu and Deng, Zhiwei and Russakovsky, Olga and Yan, Yan},
  journal = {arXiv preprint arXiv:2310.09213},
  year = {2023},
}

Boundary Guided Learning-Free Semantic Control with Diffusion Models

Ye Zhu, Yu Wu, Zhiwei Deng, Olga Russakovsky, and Yan Yan

In Thirty-seventh Conference on Neural Information Processing Systems (NeurIPS), 2023

Bib Page PDF Code

@inproceedings{zhu2023boundary,
  title = {Boundary Guided Learning-Free Semantic Control with Diffusion Models},
  author = {Zhu, Ye and Wu, Yu and Deng, Zhiwei and Russakovsky, Olga and Yan, Yan},
  booktitle = {Thirty-seventh Conference on Neural Information Processing Systems (NeurIPS)},
  year = {2023},
}

Discrete Contrastive Diffusion for Cross-Modal Music and Image Generation

Ye Zhu, Yu Wu, Kyle Olszewski, Jian Ren, Sergey Tulyakov, and Yan Yan

In The Eleventh International Conference on Learning Representations (ICLR), 2023

Bib Page PDF Code

@inproceedings{zhu2023discrete,
  title = {Discrete Contrastive Diffusion for Cross-Modal Music and Image Generation},
  author = {Zhu, Ye and Wu, Yu and Olszewski, Kyle and Ren, Jian and Tulyakov, Sergey and Yan, Yan},
  booktitle = {The Eleventh International Conference on Learning Representations (ICLR)},
  year = {2023},
}

Denoising Diffusion Probabilistic Models to Predict the Density of Molecular Clouds

Duo Xu, Jonathan C Tan, Chia-Jung Hsu, and Ye Zhu

The Astrophysical Journal, 2023

Bib PDF

@article{xu2023denoising,
  title = {Denoising Diffusion Probabilistic Models to Predict the Density of Molecular Clouds},
  author = {Xu, Duo and Tan, Jonathan C and Hsu, Chia-Jung and Zhu, Ye},
  journal = {The Astrophysical Journal},
  volume = {950},
  number = {2},
  pages = {146},
  year = {2023},
  publisher = {IOP Publishing},
}

ML/RL

Discrete Diffusion Reward Guidance Methods for Offline Reinforcement Learning

Matthew Coleman, Olga Russakovsky, Christine Allen-Blanchette, and Ye Zhu

In ICML 2023 Workshop: Sampling and Optimization in Discrete Space, 2023

Bib PDF

@inproceedings{coleman2023discrete,
  title = {Discrete Diffusion Reward Guidance Methods for Offline Reinforcement Learning},
  author = {Coleman, Matthew and Russakovsky, Olga and Allen-Blanchette, Christine and Zhu, Ye},
  booktitle = {ICML 2023 Workshop: Sampling and Optimization in Discrete Space},
  year = {2023},
}

2022

Preprint

Vision+ X: A Survey on Multimodal Learning in the Light of Data

Ye Zhu, Yu Wu, Nicu Sebe, and Yan Yan

arXiv preprint arXiv:2210.02884, 2022

Bib PDF

@article{zhu2022vision+,
  title = {Vision+ X: A Survey on Multimodal Learning in the Light of Data},
  author = {Zhu, Ye and Wu, Yu and Sebe, Nicu and Yan, Yan},
  journal = {arXiv preprint arXiv:2210.02884},
  year = {2022},
}

Quantized GAN for Complex Music Generation from Dance Videos

Ye Zhu, Kyle Olszewski, Yu Wu, Panos Achlioptas, Menglei Chai, Yan Yan, and Sergey Tulyakov

In European Conference on Computer Vision (ECCV), 2022

Bib Page PDF Code

@inproceedings{zhu2022quantized,
  title = {Quantized GAN for Complex Music Generation from Dance Videos},
  author = {Zhu, Ye and Olszewski, Kyle and Wu, Yu and Achlioptas, Panos and Chai, Menglei and Yan, Yan and Tulyakov, Sergey},
  booktitle = {European Conference on Computer Vision (ECCV)},
  pages = {182--199},
  year = {2022},
  organization = {Springer},
}

CV/ML

Saying the Unseen: Video Descriptions via Dialog Agents

Ye Zhu, Yu Wu, Yi Yang, and Yan Yan

IEEE Transactions on Pattern Analysis and Machine Intelligence (TPAMI), 2022

Bib PDF

@article{9468337,
  author = {Zhu, Ye and Wu, Yu and Yang, Yi and Yan, Yan},
  journal = {IEEE Transactions on Pattern Analysis and Machine Intelligence (TPAMI)},
  title = {Saying the Unseen: Video Descriptions via Dialog Agents},
  year = {2022},
  volume = {44},
  number = {10},
  pages = {7190-7204},
  doi = {10.1109/TPAMI.2021.3093360},
}

CV/ML

Skeleton sequence and RGB frame based multi-modality feature fusion network for action recognition

Xiaoguang Zhu, Ye Zhu, Haoyu Wang, Honglin Wen, Yan Yan, and Peilin Liu

ACM Transactions on Multimedia Computing, Communications, and Applications (TOMM), 2022

Bib PDF

@article{zhu2022skeleton,
  title = {Skeleton sequence and RGB frame based multi-modality feature fusion network for action recognition},
  author = {Zhu, Xiaoguang and Zhu, Ye and Wang, Haoyu and Wen, Honglin and Yan, Yan and Liu, Peilin},
  journal = {ACM Transactions on Multimedia Computing, Communications, and Applications (TOMM)},
  volume = {18},
  number = {3},
  pages = {1--24},
  year = {2022},
  publisher = {ACM New York, NY},
}

2021

CV/ML

Learning audio-visual correlations from variational cross-modal generation

Ye Zhu, Yu Wu, Hugo Latapie, Yi Yang, and Yan Yan

In ICASSP 2021-2021 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), 2021

Bib PDF

@inproceedings{zhu2021learning,
  title = {Learning audio-visual correlations from variational cross-modal generation},
  author = {Zhu, Ye and Wu, Yu and Latapie, Hugo and Yang, Yi and Yan, Yan},
  booktitle = {ICASSP 2021-2021 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)},
  pages = {4300--4304},
  year = {2021},
  organization = {IEEE},
}

2020

CV/ML

Describing unseen videos via multi-modal cooperative dialog agents

Ye Zhu, Yu Wu, Yi Yang, and Yan Yan

In European Conference on Computer Vision (ECCV), 2020

Bib PDF Code

@inproceedings{zhu2020describing,
  title = {Describing unseen videos via multi-modal cooperative dialog agents},
  author = {Zhu, Ye and Wu, Yu and Yang, Yi and Yan, Yan},
  booktitle = {European Conference on Computer Vision (ECCV)},
  pages = {153--169},
  year = {2020},
  organization = {Springer},
}

CV/ML

Hierarchical HMM for eye movement classification

Ye Zhu, Yan Yan, and Oleg Komogortsev

In European Conference on Computer Vision Workshop (ECCV Workshop), 2020

Bib PDF

@inproceedings{zhu2020hierarchical,
  title = {Hierarchical HMM for eye movement classification},
  author = {Zhu, Ye and Yan, Yan and Komogortsev, Oleg},
  booktitle = {European Conference on Computer Vision Workshop (ECCV Workshop)},
  pages = {544--554},
  year = {2020},
  organization = {Springer},
}