We introduce a family of unsupervised algorithms, numerical taxonomy clustering, to simultaneously cluster data, and to learn a taxonomy that encodes the relationship between the clusters. The algorithms work by maximizing the dependence between the taxonomy and the original data. The resulting taxonomy is a more informative visualization of complex data than simple clustering; in addition, taking into account the relations between different clusters is shown to substantially improve the quality of the clustering, when compared with state-of-the-art algorithms in the literature (both spectral clustering and a previous dependence maximization approach). We demonstrate our algorithm on image and text data.
| Author(s): | Blaschko, MB. and Gretton, A. |
| Links: | |
| Number (issue): | 181 |
| Year: | 2008 |
| Month: | November |
| Day: | 0 |
| BibTeX Type: | Technical Report (techreport) |
| Digital: | 0 |
| Electronic Archiving: | grant_archive |
| Institution: | Max-Planck Institute for Biological Cybernetics, Tübingen, Germany |
| Language: | en |
| Organization: | Max-Planck-Gesellschaft |
| School: | Biologische Kybernetik |
BibTeX
@techreport{5618,
title = {Taxonomy Inference Using Kernel Dependence
Measures},
abstract = {We introduce a family of unsupervised algorithms, numerical taxonomy clustering, to simultaneously
cluster data, and to learn a taxonomy that encodes the relationship between the clusters. The algorithms
work by maximizing the dependence between the taxonomy and the original data. The resulting taxonomy is a
more informative visualization of complex data than simple clustering; in addition, taking into account the relations
between different clusters is shown to substantially improve the quality of the clustering, when compared
with state-of-the-art algorithms in the literature (both spectral clustering and a previous dependence maximization
approach). We demonstrate our algorithm on image and text data.},
number = {181},
organization = {Max-Planck-Gesellschaft},
institution = {Max-Planck Institute for Biological Cybernetics, Tübingen, Germany},
school = {Biologische Kybernetik},
month = nov,
year = {2008},
author = {Blaschko, MB. and Gretton, A.},
month_numeric = {11}
}
