@techreport{4110,
  title = {Towards a Statistical Theory of Clustering. Presented at the PASCAL workshop on clustering, London},
  abstract = {The goal of this paper is to discuss statistical aspects of clustering
  in a framework where the data to be clustered has been sampled
  from some unknown probability distribution. Firstly, the clustering of
  the data set should reveal some structure of the underlying data rather
  than model artifacts due to the random sampling process. Secondly, the
  more sample points we have, the more reliable the clustering should be.
  We discuss which methods can and cannot be used to tackle those problems.
  In particular we argue that generalization bounds as they are used
  in statistical learning theory of classification are unsuitable in a general
  clustering framework. We suggest that the main replacements of generalization
  bounds should be convergence proofs and stability considerations.
  This paper should be considered as a road map paper which identifies important
  questions and potentially fruitful directions for future research
  about statistical clustering. We do not attempt to present a complete
  statistical theory of clustering.},
  organization = {Max-Planck-Gesellschaft},
  institution = {Presented at the PASCAL workshop on clustering, London},
  school = {Biologische Kybernetik},
  year = {2005},
  author = {von Luxburg, U. and Ben-David, S.}
}
