Back
Policy gradient methods
Policy gradient methods are a type of reinforcement learning techniques that rely upon optimizing parametrized policies with respect to the expected return (long-term cumulative reward) by gradient descent. They do not suffer from many of the problems that have been marring traditional reinforcement learning approaches such as the lack of guarantees of a value function, the intractability problem resulting from uncertain state information and the complexity arising from continuous states & actions.
@article{6940, title = {Policy gradient methods}, journal = {Scholarpedia}, abstract = {Policy gradient methods are a type of reinforcement learning techniques that rely upon optimizing parametrized policies with respect to the expected return (long-term cumulative reward) by gradient descent. They do not suffer from many of the problems that have been marring traditional reinforcement learning approaches such as the lack of guarantees of a value function, the intractability problem resulting from uncertain state information and the complexity arising from continuous states & actions.}, volume = {5}, number = {11}, pages = {3698}, organization = {Max-Planck-Gesellschaft}, school = {Biologische Kybernetik}, month = nov, year = {2010}, slug = {6940}, author = {Peters, J.}, month_numeric = {11} }