from typing import Dict from app.ml.rl_battery.environment import BatteryEnvironment from app.ml.rl_battery.agent import QLearningAgent from app.utils.logger import get_logger logger = get_logger(__name__) class BatteryRLTrainer: def __init__(self, config=None): self.config = config or {} self.agent: QLearningAgent = None self.env: BatteryEnvironment = None def _create_agent(self) -> QLearningAgent: return QLearningAgent( state_bins=self.config.get("charge_level_bins", 10), action_space=3, learning_rate=self.config.get("learning_rate", 0.1), discount_factor=self.config.get("discount_factor", 0.95), epsilon=self.config.get("epsilon", 1.0), epsilon_decay=self.config.get("epsilon_decay", 0.995), epsilon_min=self.config.get("epsilon_min", 0.05), ) def load_data(self): pass def train(self, n_episodes: int = 1000, region: str = "FR") -> Dict: logger.info(f"Starting RL training for {n_episodes} episodes") self.env = BatteryEnvironment( capacity=100.0, charge_rate=50.0, discharge_rate=50.0, efficiency=0.9, min_reserve=0.1, max_charge=0.9, ) self.agent = self._create_agent() self.agent.initialize_q_table(self.env.observation_space) episode_rewards = [] for episode in range(n_episodes): state = self.env.reset() total_reward = 0 steps = 0 while True: action = self.agent.get_action(state, training=True) next_state, reward, done, info = self.env.step(action) self.agent.update(state, action, reward, next_state, done) total_reward += reward state = next_state steps += 1 if done: break episode_rewards.append(total_reward) self.agent.decay_epsilon() if (episode + 1) % 100 == 0: avg_reward = sum(episode_rewards[-100:]) / 100 logger.info(f"Episode {episode + 1}/{n_episodes}, Avg Reward: {avg_reward:.2f}, Epsilon: {self.agent.epsilon:.3f}") final_avg_reward = sum(episode_rewards[-100:]) / 100 results = { "n_episodes": n_episodes, "final_avg_reward": final_avg_reward, "episode_rewards": episode_rewards, "final_epsilon": self.agent.epsilon, } logger.info(f"Training complete. Final avg reward: {final_avg_reward:.2f}") return results def save(self, output_dir: str = "models/rl_battery") -> None: from pathlib import Path output_path = Path(output_dir) output_path.mkdir(parents=True, exist_ok=True) filepath = output_path / "battery_policy.pkl" self.agent.save(filepath) logger.info(f"Saved trained policy to {filepath}") __all__ = ["BatteryRLTrainer"]