11
2+ from timeit import default_timer as timer
23import torch
34import numpy as np
45from all .environments import State
@@ -26,6 +27,8 @@ def __init__(
2627 self ._returns = []
2728 self ._frame = 1
2829 self ._episode = 1
30+ self ._episode_start_times = [] * self ._n_envs
31+ self ._episode_start_frames = [] * self ._n_envs
2932
3033 # test state
3134 self ._test_episodes = 100
@@ -66,6 +69,8 @@ def _reset(self):
6669 device = self ._envs [0 ].device
6770 )
6871 self ._returns = rewards
72+ self ._episode_start_times = [timer ()] * self ._n_envs
73+ self ._episode_start_frames = [self ._frame ] * self ._n_envs
6974
7075 def _step (self ):
7176 states = self ._aggregate_states ()
@@ -80,10 +85,12 @@ def _step_envs(self, actions):
8085
8186 if env .done :
8287 self ._returns [i ] += env .reward
83- self ._log_training_episode (self ._returns [i ].item (), 0 )
88+ self ._log_training_episode (self ._returns [i ].item (), self . _fps ( i ) )
8489 env .reset ()
8590 self ._returns [i ] = 0
8691 self ._episode += 1
92+ self ._episode_start_times [i ] = timer ()
93+ self ._episode_start_frames [i ] = self ._frame
8794 else :
8895 action = actions [i ]
8996 if action is not None :
@@ -134,5 +141,9 @@ def _aggregate_rewards(self):
134141 device = self ._envs [0 ].device
135142 )
136143
144+ def _fps (self , i ):
145+ end_time = timer ()
146+ return (self ._frame - self ._episode_start_frames [i ]) / (end_time - self ._episode_start_times [i ])
147+
137148 def _make_writer (self , agent_name , env_name , write_loss ):
138149 return ExperimentWriter (self , agent_name , env_name , loss = write_loss )
0 commit comments