We present a general model-independent approach to the analysis of data in cases when these data do not appear in the form of co-occurrence of two variables X, Y, but rather as a sample of values of an unknown (stochastic) function Z(X,Y). For example, in gene expression data, the expression level Z is a function of gene X and condition Y; or in movie ratings data the rating Z is a function of viewer X and movie Y . The approach represents a consistent extension of the Information Bottleneck method that has previously relied on the availability of co-occurrence statistics. By altering the relevance variable we eliminate the need in the sample of joint distribution of all input variables. This new formulation also enables simple MDL-like model complexity control and prediction of missing values of Z. The approach is analyzed and shown to be on a par with the best known clustering algorithms for a wide range of domains. For the prediction of missing values (collaborative filtering) it improves the currently best known results.
| Author(s): | Seldin, Y. and Slonim, N. and Tishby, N. |
| Links: | |
| Book Title: | Advances in Neural Information Processing Systems 19 |
| Journal: | In Advances in Neural Information Processing Systems 19, 2007 (NIPS 2006) |
| Pages: | 1241-1248 |
| Year: | 2007 |
| Month: | September |
| Day: | 0 |
| Editors: | Sch{\"o}lkopf, B. , J. Platt, T. Hofmann |
| Publisher: | MIT Press |
| BibTeX Type: | Conference Paper (inproceedings) |
| Address: | Cambridge, MA, USA |
| Event Name: | Twentieth Annual Conference on Neural Information Processing Systems (NIPS 2006) |
| Event Place: | Vancouver, BC, Canada |
| Digital: | 0 |
| Electronic Archiving: | grant_archive |
| ISBN: | 0-262-19568-2 |
| Language: | en |
| Organization: | Max-Planck-Gesellschaft |
| School: | Biologische Kybernetik |
BibTeX
@inproceedings{6576,
title = {Information Bottleneck for Non Co-Occurrence Data},
journal = {In Advances in Neural Information Processing Systems 19, 2007 (NIPS 2006)},
booktitle = {Advances in Neural Information Processing Systems 19},
abstract = {We present a general model-independent approach to the analysis of data in cases when these data do not appear in the form of co-occurrence of two variables X, Y, but rather as a sample of values of an unknown (stochastic) function Z(X,Y). For example, in gene expression data, the expression level Z is a function of gene X and condition Y; or in movie ratings data the rating Z is a function of viewer X and movie Y . The approach represents a consistent extension of the Information Bottleneck method that has previously relied on the availability of co-occurrence statistics. By altering the relevance variable we eliminate the need in the sample of joint distribution of all input variables. This new formulation also enables simple MDL-like model complexity control and prediction of missing values of Z. The approach is analyzed and shown to be on a par with the best known clustering algorithms for a wide range of domains. For the prediction of missing values (collaborative filtering) it improves the currently best known results.},
pages = {1241-1248},
editors = {Sch{\"o}lkopf, B. , J. Platt, T. Hofmann},
publisher = {MIT Press},
organization = {Max-Planck-Gesellschaft},
school = {Biologische Kybernetik},
address = {Cambridge, MA, USA},
month = sep,
year = {2007},
author = {Seldin, Y. and Slonim, N. and Tishby, N.},
month_numeric = {9}
}
