Empirical Inference
Conference Paper
2024
What Makes and Breaks Safety Fine-tuning? A Mechanistic Study
| Author(s): | Jain, S. and Lubana, E. S. and Oksuz, K. and Joy, T. and Torr, P. and Sanyal, A. and Dokania, P. K. |
| Book Title: | Advances in Neural Information Processing Systems 37 (NeurIPS 2024) |
| Volume: | 37 |
| Pages: | 93406--93478 |
| Year: | 2024 |
| Month: | December |
| Editors: | A. Globerson and L. Mackey and D. Belgrave and A. Fan and U. Paquet and J. Tomczak and C. Zhang |
| Publisher: | Curran Associates, Inc. |
| BibTeX Type: | Conference Paper (conference) |
| Event Name: | 38th Annual Conference on Neural Information Processing Systems |
| Event Place: | Vancouver, Canada |
| State: | Published |
| URL: | https://proceedings.neurips.cc/paper_files/paper/2024/file/a9bef53eb7b0e5950d4f2d9c74a16006-Paper-Conference.pdf |
BibTeX
@conference{Jainetal24,
title = {What Makes and Breaks Safety Fine-tuning? A Mechanistic Study},
booktitle = {Advances in Neural Information Processing Systems 37 (NeurIPS 2024)},
volume = {37},
pages = {93406--93478},
editors = {A. Globerson and L. Mackey and D. Belgrave and A. Fan and U. Paquet and J. Tomczak and C. Zhang},
publisher = {Curran Associates, Inc.},
month = dec,
year = {2024},
author = {Jain, S. and Lubana, E. S. and Oksuz, K. and Joy, T. and Torr, P. and Sanyal, A. and Dokania, P. K.},
url = {https://proceedings.neurips.cc/paper_files/paper/2024/file/a9bef53eb7b0e5950d4f2d9c74a16006-Paper-Conference.pdf},
month_numeric = {12}
}