@inproceedings{gadde2017semantic,
  title = {Semantic Video {CNNs} through Representation Warping},
  booktitle = {Proceedings IEEE International Conference on Computer Vision (ICCV)},
  abstract = {In this work, we propose a technique to convert CNN
  models for semantic segmentation of static images into
  CNNs for video data. We describe a warping method that
  can be used to augment existing architectures with very lit-
  tle extra computational cost. This module is called Net-
  Warp and we demonstrate its use for a range of network
  architectures. The main design principle is to use optical
  flow of adjacent frames for warping internal network repre-
  sentations across time. A key insight of this work is that fast
  optical flow methods can be combined with many different
  CNN architectures for improved performance and end-to-
  end training. Experiments validate that the proposed ap-
  proach incurs only little extra computational cost, while im-
  proving performance, when video streams are available. We
  achieve new state-of-the-art results on the standard CamVid
  and Cityscapes benchmark datasets and show reliable im-
  provements over different baseline networks. Our code and
  models are available at http://segmentation.is.
  tue.mpg.de},
  pages = {4463-4472},
  publisher = {IEEE},
  address = {Piscataway, NJ, USA},
  month = oct,
  year = {2017},
  author = {Gadde, Raghudeep and Jampani, Varun and Gehler, Peter V.},
  month_numeric = {10}
}
