Agent → action (a
t
) →→ Environment
↑ ↓ ↓
↑←← reward (r
t
) ←← r
t+1
↓
↑←← state (s
t
)←←←←← s
t+1