@article{6940,
  title = {Policy gradient methods},
  journal = {Scholarpedia},
  abstract = {Policy gradient methods are a type of reinforcement learning techniques that rely upon optimizing parametrized policies with respect to the expected return (long-term cumulative reward) by gradient descent. They do not suffer from many of the problems that have been marring traditional reinforcement learning approaches such as the lack of guarantees of a value function, the intractability problem resulting from uncertain state information and the complexity arising from continuous states & actions.},
  volume = {5},
  number = {11},
  pages = {3698},
  organization = {Max-Planck-Gesellschaft},
  school = {Biologische Kybernetik},
  month = nov,
  year = {2010},
  slug = {6940},
  author = {Peters, J.},
  month_numeric = {11}
}