Publications

Ho, C.-H., Peng, K.-C., Vasconcelos, N., "Long-Tailed Anomaly Detection with Learnable Class Names", IEEE Conference on Computer Vision and Pattern Recognition (CVPR), June 2024.
BibTeX TR2024-040 PDF Presentation
- @inproceedings{Ho2024jun,
- author = {Ho, Chih-Hui and Peng, Kuan-Chuan and Vasconcelos, Nuno},
- title = {Long-Tailed Anomaly Detection with Learnable Class Names},
- booktitle = {IEEE Conference on Computer Vision and Pattern Recognition (CVPR)},
- year = 2024,
- month = jun,
- url = {https://www.merl.com/publications/TR2024-040}
- }
Liu, X., Tai, Y.-W., Tang, C.-K., Miraldo, P., Lohit, S., Chatterjee, M., "Gear-NeRF: Free-Viewpoint Rendering and Tracking with Motion-aware Spatio-Temporal Sampling", IEEE Conference on Computer Vision and Pattern Recognition (CVPR), May 2024.
BibTeX TR2024-042 PDF
- @inproceedings{Liu2024may,
- author = {Liu, Xinhang and Tai, Yu-wing and Tang, Chi-Keung and Miraldo, Pedro and Lohit, Suhas and Chatterjee, Moitreya},
- title = {Gear-NeRF: Free-Viewpoint Rendering and Tracking with Motion-aware Spatio-Temporal Sampling},
- booktitle = {IEEE Conference on Computer Vision and Pattern Recognition (CVPR)},
- year = 2024,
- month = may,
- url = {https://www.merl.com/publications/TR2024-042}
- }
Dey, R., Egger, B., Boddeti, V., Wang, Y., Marks, T.K., "CoLa-SDF: Controllable Latent StyleSDF for Disentangled 3D Face Generation", IEEE Conference on Computer Vision and Pattern Recognition Workshops (CVPRW), April 2024.
BibTeX TR2024-045 PDF
- @inproceedings{Dey2024apr,
- author = {Dey, Rahul and Egger, Bernhard and Boddeti, Vishnu and Wang, Ye and Marks, Tim K.},
- title = {CoLa-SDF: Controllable Latent StyleSDF for Disentangled 3D Face Generation},
- booktitle = {IEEE Conference on Computer Vision and Pattern Recognition Workshops (CVPRW)},
- year = 2024,
- month = apr,
- url = {https://www.merl.com/publications/TR2024-045}
- }
Pan, Z., Wichern, G., Germain, F.G., Subramanian, A., Le Roux, J., "Late Audio-Visual Fusion for In-The-Wild Speaker Diarization", Hands-free Speech Communication and Microphone Arrays (HSCMA), April 2024.
BibTeX TR2024-029 PDF
- @inproceedings{Pan2024apr,
- author = {Pan, Zexu and Wichern, Gordon and Germain, François G and Subramanian, Aswin and Le Roux, Jonathan},
- title = {Late Audio-Visual Fusion for In-The-Wild Speaker Diarization},
- booktitle = {Hands-free Speech Communication and Microphone Arrays (HSCMA)},
- year = 2024,
- month = apr,
- url = {https://www.merl.com/publications/TR2024-029}
- }
Yang, Z., Liu, J., Chen, P., Cherian, A., Marks, T.K., Le Roux, J., Gan, C., "RILA: Reflective and Imaginative Language Agent for Zero-Shot Semantic Audio-Visual Navigation", IEEE Conference on Computer Vision and Pattern Recognition (CVPR), April 2024.
BibTeX TR2024-043 PDF
- @inproceedings{Yang2024apr,
- author = {Yang, Zeyuan and Liu, Jiageng and Chen, Peihao and Cherian, Anoop and Marks, Tim K. and Le Roux, Jonathan and Gan, Chuang},
- title = {RILA: Reflective and Imaginative Language Agent for Zero-Shot Semantic Audio-Visual Navigation},
- booktitle = {IEEE Conference on Computer Vision and Pattern Recognition (CVPR)},
- year = 2024,
- month = apr,
- url = {https://www.merl.com/publications/TR2024-043}
- }
Gaur, A., Pais, G., Miraldo, P., "Oriented-grid Encoder for 3D Implicit Representations", International Conference on 3D Vision (3DV), March 2024.
BibTeX TR2024-031 PDF
- @inproceedings{Gaur2024mar,
- author = {Gaur, Arihant and Pais, Goncalo and Miraldo, Pedro},
- title = {Oriented-grid Encoder for 3D Implicit Representations},
- booktitle = {International Conference on 3D Vision (3DV)},
- year = 2024,
- month = mar,
- url = {https://www.merl.com/publications/TR2024-031}
- }
Sholokhov, A., Rapp, J., Nabi, S., Brunton, S., Kutz, N., Mansour, H., "Single-pixel imaging of dynamic flows using Neural ODE regularization", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), March 2024.
BibTeX TR2024-024 PDF
- @inproceedings{Sholokhov2024mar,
- author = {Sholokhov, Aleksei and Rapp, Joshua and Nabi, Saleh and Brunton, Steven and Kutz, Nathan and Mansour, Hassan},
- title = {Single-pixel imaging of dynamic flows using Neural ODE regularization},
- booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
- year = 2024,
- month = mar,
- url = {https://www.merl.com/publications/TR2024-024}
- }
Hori, C., Wang, P., Rahman, M., Vaca-Rubio, C., Khurana, S., Cherian, A., Le Roux, J., "Wi-Fi based Indoor Monitoring Enhanced by Multimodal Fusion", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/ICASSP48485.2024.10447600, March 2024, pp. 13296-13300.
BibTeX TR2024-012 PDF
- @inproceedings{Hori2024mar,
- author = {Hori, Chiori and Wang, Pu and Rahman, Mahbub and Vaca-Rubio, Cristian and Khurana, Sameer and Cherian, Anoop and Le Roux, Jonathan},
- title = {Wi-Fi based Indoor Monitoring Enhanced by Multimodal Fusion},
- booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
- year = 2024,
- pages = {13296--13300},
- month = mar,
- publisher = {IEEE},
- doi = {10.1109/ICASSP48485.2024.10447600},
- issn = {2379-190X},
- isbn = {979-8-3503-4485-1},
- url = {https://www.merl.com/publications/TR2024-012}
- }
Srinivas, N., Vinod, A.P., Di Cairano, S., Weiss, A., "Lunar Landing with Feasible Divert using Controllable Sets", AIAA SciTech, DOI: 10.2514/6.2024-0324, January 2024, pp. AIAA 2024-0324.
BibTeX TR2024-004 PDF
- @inproceedings{Srinivas2024jan,
- author = {Srinivas, Neeraj and Vinod, Abraham P. and Di Cairano, Stefano and Weiss, Avishai},
- title = {Lunar Landing with Feasible Divert using Controllable Sets},
- booktitle = {AIAA SCITECH 2024 Forum},
- year = 2024,
- pages = {AIAA 2024--0324},
- month = jan,
- doi = {10.2514/6.2024-0324},
- url = {https://www.merl.com/publications/TR2024-004}
- }
Carmichael, Z., Jones, L.S., Cherian, A., Michael J., , Scheirer, W., "Pixel-Grounded Prototypical Part Networks", IEEE Winter Conference on Applications of Computer Vision (WACV), January 2024.
BibTeX TR2024-002 PDF Presentation
- @inproceedings{Carmichael2024jan,
- author = {Carmichael, Zachariah and Jones, Lohit, Suhas and Cherian, Anoop and Michael J. and Scheirer, Walter},
- title = {Pixel-Grounded Prototypical Part Networks},
- booktitle = {IEEE Winter Conference on Applications of Computer Vision (WACV)},
- year = 2024,
- month = jan,
- url = {https://www.merl.com/publications/TR2024-002}
- }
Liu, X., Paul, S., Chatterjee, M., Cherian, A., "CAVEN: An Embodied Conversational Agent for Efficient Audio-Visual Navigation in Noisy Environments", AAAI Conference on Artificial Intelligence, DOI: 10.1609/aaai.v38i4.28167, December 2023, pp. 3765-3773.
BibTeX TR2023-154 PDF
- @inproceedings{Liu2023dec2,
- author = {Liu, Xiulong and Paul, Sudipta and Chatterjee, Moitreya and Cherian, Anoop},
- title = {CAVEN: An Embodied Conversational Agent for Efficient Audio-Visual Navigation in Noisy Environments},
- booktitle = {Proceedings of the 38th AAAI Conference on Artificial Intelligence},
- year = 2023,
- pages = {3765--3773},
- month = dec,
- doi = {10.1609/aaai.v38i4.28167},
- url = {https://www.merl.com/publications/TR2023-154}
- }
Pan, Z., Wichern, G., Masuyama, Y., Germain, F.G., Khurana, S., Hori, C., Le Roux, J., "Scenario-Aware Audio-Visual TF-GridNet for Target Speech Extraction", IEEE Workshop on Automatic Speech Recognition and Understanding (ASRU), DOI: 10.1109/ASRU57964.2023.10389618, December 2023.
BibTeX TR2023-152 PDF
- @inproceedings{Pan2023dec2,
- author = {Pan, Zexu and Wichern, Gordon and Masuyama, Yoshiki and Germain, François G and Khurana, Sameer and Hori, Chiori and Le Roux, Jonathan},
- title = {Scenario-Aware Audio-Visual TF-GridNet for Target Speech Extraction},
- booktitle = {IEEE Workshop on Automatic Speech Recognition and Understanding (ASRU)},
- year = 2023,
- month = dec,
- doi = {10.1109/ASRU57964.2023.10389618},
- isbn = {979-8-3503-0689-7},
- url = {https://www.merl.com/publications/TR2023-152}
- }
He, Y., Shin, S., Cherian, A., Markham, A., Trigon, N., "Sound3DVDet: 3D Sound Source Detection using Multiview Microphone Array and RGB Images", IEEE Winter Conference on Applications of Computer Vision (WACV), December 2023, pp. 5496-5507.
BibTeX TR2023-144 PDF
- @inproceedings{He2023dec,
- author = {He, Yuhang and Shin, Sangyun and Cherian, Anoop and Markham, Andrew and Trigon, Niki},
- title = {Sound3DVDet: 3D Sound Source Detection using Multiview Microphone Array and RGB Images},
- booktitle = {IEEE Winter Conference on Applications of Computer Vision (WACV)},
- year = 2023,
- pages = {5496--5507},
- month = dec,
- url = {https://www.merl.com/publications/TR2023-144}
- }
Delattre, F., Dirnfeld, D., Nguyen, P., Scarano, S., Jones, M.J., Miraldo, P., Learned-Miller, E., "Robust Frame-to-Frame Camera Rotation Estimation in Crowded Scenes", IEEE International Conference on Computer Vision (ICCV), DOI: 10.1109/ICCV51070.2023.00894, October 2023, pp. 3715-3724.
BibTeX TR2023-123 PDF Video Software
- @inproceedings{Delattre2023oct,
- author = {Delattre, Fabien and Dirnfeld, David and Nguyen, Phat and Scarano, Stephen and Jones, Michael J. and Miraldo, Pedro and Learned-Miller, Erik},
- title = {Robust Frame-to-Frame Camera Rotation Estimation in Crowded Scenes},
- booktitle = {IEEE International Conference on Computer Vision (ICCV)},
- year = 2023,
- pages = {3715--3724},
- month = oct,
- publisher = {IEEE/CVF},
- doi = {10.1109/ICCV51070.2023.00894},
- issn = {2380-7504},
- isbn = {979-8-3503-0718-4},
- url = {https://www.merl.com/publications/TR2023-123}
- }
Miraldo, P., Piedade, V., "BANSAC: A dynamic BAyesian Network for adaptive SAmple Consensus", IEEE International Conference on Computer Vision (ICCV), DOI: 10.1109/ICCV51070.2023.00346, October 2023, pp. 3715-3724.
BibTeX TR2023-124 PDF Video Software
- @inproceedings{Miraldo2023oct,
- author = {Miraldo, Pedro and Piedade, Valter},
- title = {BANSAC: A dynamic BAyesian Network for adaptive SAmple Consensus},
- booktitle = {IEEE International Conference on Computer Vision (ICCV)},
- year = 2023,
- pages = {3715--3724},
- month = oct,
- publisher = {IEEE/CVF},
- doi = {10.1109/ICCV51070.2023.00346},
- issn = {2380-7504},
- isbn = {979-8-3503-0718-4},
- url = {https://www.merl.com/publications/TR2023-124}
- }
Nair, N.G., Cherian, A., Lohit, S., Wang, Y., Koike-Akino, T., Patel, V.M., Marks, T.K., "Steered Diffusion: A Generalized Framework for Plug-and-Play Conditional Image Synthesis", IEEE International Conference on Computer Vision (ICCV), October 2023, pp. 20850-20860.
BibTeX TR2023-126 PDF Presentation
- @inproceedings{Nair2023sep,
- author = {Nair, Nithin Gopalakrishnan and Cherian, Anoop and Lohit, Suhas and Wang, Ye and Koike-Akino, Toshiaki and Patel, Vishal M. and Marks, Tim K.},
- title = {Steered Diffusion: A Generalized Framework for Plug-and-Play Conditional Image Synthesis},
- booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision},
- year = 2023,
- pages = {20850--20860},
- month = oct,
- publisher = {IEEE/CVF},
- url = {https://www.merl.com/publications/TR2023-126}
- }
Sharma, M., Chatterjee, M., Peng, K.-C., Lohit, S., Jones, M.J., "Tensor Factorization for Leveraging Cross-Modal Knowledge in Data-Constrained Infrared Object Detection", IEEE International Conference on Computer Vision Workshops (ICCV), October 2023, pp. 924-932.
BibTeX TR2023-125 PDF Presentation
- @inproceedings{Sharma2023oct,
- author = {Sharma, Manish and Chatterjee, Moitreya and Peng, Kuan-Chuan and Lohit, Suhas and Jones, Michael J.},
- title = {Tensor Factorization for Leveraging Cross-Modal Knowledge in Data-Constrained Infrared Object Detection},
- booktitle = {IEEE International Conference on Computer Vision Workshops (ICCV)},
- year = 2023,
- pages = {924--932},
- month = oct,
- url = {https://www.merl.com/publications/TR2023-125}
- }
Shenoy, V., Marks, T.K., Mansour, H., Lohit, S., "Unrolled IPPG: Video Heart Rate Esitmation via Unrolling Proximal Gradient Descent", IEEE International Conference on Image Processing (ICIP), DOI: 10.1109/ICIP49359.2023.10222169, September 2023, pp. 2715-2719.
BibTeX TR2023-116 PDF Video
- @inproceedings{Shenoy2023sep,
- author = {Shenoy, Vineet and Marks, Tim K. and Mansour, Hassan and Lohit, Suhas},
- title = {Unrolled IPPG: Video Heart Rate Esitmation via Unrolling Proximal Gradient Descent},
- booktitle = {IEEE International Conference on Image Processing (ICIP)},
- year = 2023,
- pages = {2715--2719},
- month = sep,
- publisher = {IEEE},
- doi = {10.1109/ICIP49359.2023.10222169},
- isbn = {978-1-7281-9835-4},
- url = {https://www.merl.com/publications/TR2023-116}
- }
Yoshino, K., Chen, Y.-N., Crook, P., Kottur, S., Li, J., Hedayatnia, B., Moon, S., Fe, Z., Li, Z., Zhang, J., Fen, Y., Zhou, J., Kim, S., Liu, Y., Jin, D., Papangelis, A., Gopalakrishnan, K., Hakkani-Tur, D., Damavandi, B., Geramifard, A., <br /><br /> Hori, C., Shah, A., Zhang, C., Li, H., Sedoc, J., D’Haro, L.F., Banchs, R., Rudnicky, A., "Overview of the Tenth Dialog System Technology Challenge: DSTC10", IEE/ACM Transactions on Audio, Speech, and Language Processing, DOI: 10.1109/TASLP.2023.3293030, pp. 1-14, August 2023.
BibTeX TR2023-109 PDF
- @article{Yoshino2023aug,
- author = {Yoshino, Koichiro and Chen, Yun-Nung and Crook, Paul and Kottur, Satwik and Li, Jinchao and Hedayatnia, Behnam and Moon, Seungwhan and Fe, Zhengcong and Li, Zekang and Zhang, Jinchao and Fen, Yang and Zhou, Jie and Kim, Seokhwan and Liu, Yang and Jin, Di and Papangelis, Alexandros and Gopalakrishnan, Karthik and Hakkani-Tur, Dilek and Damavandi, Babak and Geramifard, Alborz and
  
  Hori, Chiori and Shah, Ankit and Zhang, Chen and Li, Haizhou and Sedoc, João and D’Haro, Luis F. and Banchs, Rafael and Rudnicky, Alexander},
- title = {Overview of the Tenth Dialog System Technology Challenge: DSTC10},
- journal = {IEE/ACM Transactions on Audio, Speech, and Language Processing},
- year = 2023,
- pages = {1--14},
- month = aug,
- doi = {10.1109/TASLP.2023.3293030},
- issn = {2329-9290},
- url = {https://www.merl.com/publications/TR2023-109}
- }
Hori, C., Peng, P., Harwath, D., Liu, X., Ota, K., Jain, S., Corcodel, R., Jha, D.K., Romeres, D., Le Roux, J., "Style-transfer based Speech and Audio-visual Scene understanding for Robot Action Sequence Acquisition from Videos", Interspeech, DOI: 10.21437/Interspeech.2023-1983, August 2023, pp. 4663-4667.
BibTeX TR2023-104 PDF
- @inproceedings{Hori2023aug,
- author = {Hori, Chiori and Peng, Puyuang and Harwath, David and Liu, Xinyu and Ota, Kei and Jain, Siddarth and Corcodel, Radu and Jha, Devesh K. and Romeres, Diego and Le Roux, Jonathan},
- title = {Style-transfer based Speech and Audio-visual Scene understanding for Robot Action Sequence Acquisition from Videos},
- booktitle = {Interspeech},
- year = 2023,
- pages = {4663--4667},
- month = aug,
- doi = {10.21437/Interspeech.2023-1983},
- url = {https://www.merl.com/publications/TR2023-104}
- }
Singh, A., Jones, M.J., Learned-Miller, E., "EVAL: Explainable Video Anomaly Localization", IEEE Conference on Computer Vision and Pattern Recognition (CVPR), DOI: 10.1109/CVPR52729.2023.01795, June 2023.
BibTeX TR2023-071 PDF Video Presentation
- @inproceedings{Singh2023jun,
- author = {Singh, Ashish and Jones, Michael J. and Learned-Miller, Erik},
- title = {EVAL: Explainable Video Anomaly Localization},
- booktitle = {IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)},
- year = 2023,
- month = jun,
- doi = {10.1109/CVPR52729.2023.01795},
- url = {https://www.merl.com/publications/TR2023-071}
- }
Jha, D.K., Jain, S., Romeres, D., Yerazunis, W.S., Nikovski, D., "Generalizable Human-Robot Collaborative Assembly Using Imitation Learning and Force Control", European Control Conference (ECC), DOI: 10.23919/ECC57647.2023.10178330, May 2023, pp. 1-8.
BibTeX TR2023-065 PDF
- @inproceedings{Jha2023may,
- author = {Jha, Devesh K. and Jain, Siddarth and Romeres, Diego and Yerazunis, William S. and Nikovski, Daniel},
- title = {Generalizable Human-Robot Collaborative Assembly Using Imitation Learning and Force Control},
- booktitle = {European Control Conference (ECC)},
- year = 2023,
- pages = {1--8},
- month = may,
- doi = {10.23919/ECC57647.2023.10178330},
- url = {https://www.merl.com/publications/TR2023-065}
- }
Skibik, T., Vinod, A.P., Weiss, A., Di Cairano, S., "MPC with Integrated Evasive Maneuvers for Failure-safe Automated Driving", American Control Conference (ACC), May 2023, pp. 1122-1128.
BibTeX TR2023-055 PDF
- @inproceedings{Skibik2023may,
- author = {Skibik, Terrence and Vinod, Abraham P. and Weiss, Avishai and Di Cairano, Stefano},
- title = {MPC with Integrated Evasive Maneuvers for Failure-safe Automated Driving},
- booktitle = {American Control Conference (ACC)},
- year = 2023,
- pages = {1122--1128},
- month = may,
- url = {https://www.merl.com/publications/TR2023-055}
- }
Jain, S., Corcodel, R., Jha, D.K., Romeres, D., "Vision Guided Food Assembly by Robot Teaching from Target Composition", ICRA 2023 Workshop on Task-Informed Grasping IV (TIG-IV): From Farm to Fork, May 2023.
BibTeX TR2023-067 PDF
- @inproceedings{Jain2023may,
- author = {Jain, Siddarth and Corcodel, Radu and Jha, Devesh K. and Romeres, Diego},
- title = {Vision Guided Food Assembly by Robot Teaching from Target Composition},
- booktitle = {ICRA 2023 Workshop on Task-Informed Grasping IV (TIG-IV): From Farm to Fork},
- year = 2023,
- month = may,
- url = {https://www.merl.com/publications/TR2023-067}
- }
Cherian, A., Jain, S., Marks, T.K., Sullivan, A., "Discriminative 3D Shape Modeling for Few-Shot Instance Segmentation", IEEE International Conference on Robotics and Automation (ICRA), DOI: 10.1109/ICRA48891.2023.10160644, May 2023, pp. 9296-9302.
BibTeX TR2023-010 PDF Presentation
- @inproceedings{Cherian2023may,
- author = {Cherian, Anoop and Jain, Siddarth and Marks, Tim K. and Sullivan, Alan},
- title = {Discriminative 3D Shape Modeling for Few-Shot Instance Segmentation},
- booktitle = {IEEE International Conference on Robotics and Automation (ICRA)},
- year = 2023,
- pages = {9296--9302},
- month = may,
- publisher = {IEEE},
- doi = {10.1109/ICRA48891.2023.10160644},
- url = {https://www.merl.com/publications/TR2023-010}
- }