In this paper we investigate connections between statistical learning theory and data compression on the basis of support vector machine (SVM) model selection. Inspired by several generalization bounds we construct ``compression coefficients'' for SVMs, which measure the amount by which the training labels can be compressed by some classification hypothesis. The main idea is to relate the coding precision of this hypothesis to the width of the margin of the SVM. The compression coefficients connect well known quantities such as the radius-margin ratio R^2/rho^2, the eigenvalues of the kernel matrix and the number of support vectors. To test whether they are useful in practice we ran model selection experiments on several real world datasets. As a result we found that compression coefficients can fairly accurately predict the parameters for which the test error is minimized.
| Author(s): | von Luxburg, U. and Bousquet, O. and Schölkopf, B. |
| Number (issue): | 101 |
| Year: | 2002 |
| Day: | 0 |
| BibTeX Type: | Technical Report (techreport) |
| Digital: | 0 |
| Electronic Archiving: | grant_archive |
| Institution: | Max Planck Institute for Biological Cybernetics |
| Note: | see more detailed JMLR version |
| Organization: | Max-Planck-Gesellschaft |
| School: | Biologische Kybernetik |
BibTeX
@techreport{1868,
title = {A compression approach to support vector model selection},
abstract = {In this paper we investigate connections between statistical learning
theory and data compression on the basis of support vector machine
(SVM) model selection. Inspired by several generalization bounds we
construct ``compression coefficients'' for SVMs, which measure the
amount by which the training labels can be compressed by some
classification hypothesis. The main idea is to relate the coding
precision of this hypothesis to the width of the margin of the
SVM. The compression coefficients connect well known quantities such
as the radius-margin ratio R^2/rho^2, the eigenvalues of the kernel
matrix and the number of support vectors. To test whether they are
useful in practice we ran model selection experiments on several real
world datasets. As a result we found that compression coefficients can
fairly accurately predict the parameters for which the test error is
minimized.},
number = {101},
organization = {Max-Planck-Gesellschaft},
institution = {Max Planck Institute for Biological Cybernetics},
school = {Biologische Kybernetik},
year = {2002},
note = {see more detailed JMLR version},
author = {von Luxburg, U. and Bousquet, O. and Sch{\"o}lkopf, B.}
}
