Files
energy-trade/backend/app/ml/rl_battery/trainer.py
kbt-devops fe76bc7629 Add FastAPI backend for energy trading system
Implements FastAPI backend with ML model support for energy trading,
including price prediction models and RL-based battery trading policy.
Features dashboard, trading, backtest, and settings API routes with
WebSocket support for real-time updates.
2026-02-12 00:59:26 +07:00

96 lines
3.0 KiB
Python

from typing import Dict
from app.ml.rl_battery.environment import BatteryEnvironment
from app.ml.rl_battery.agent import QLearningAgent
from app.utils.logger import get_logger
logger = get_logger(__name__)
class BatteryRLTrainer:
def __init__(self, config=None):
self.config = config or {}
self.agent: QLearningAgent = None
self.env: BatteryEnvironment = None
def _create_agent(self) -> QLearningAgent:
return QLearningAgent(
state_bins=self.config.get("charge_level_bins", 10),
action_space=3,
learning_rate=self.config.get("learning_rate", 0.1),
discount_factor=self.config.get("discount_factor", 0.95),
epsilon=self.config.get("epsilon", 1.0),
epsilon_decay=self.config.get("epsilon_decay", 0.995),
epsilon_min=self.config.get("epsilon_min", 0.05),
)
def load_data(self):
pass
def train(self, n_episodes: int = 1000, region: str = "FR") -> Dict:
logger.info(f"Starting RL training for {n_episodes} episodes")
self.env = BatteryEnvironment(
capacity=100.0,
charge_rate=50.0,
discharge_rate=50.0,
efficiency=0.9,
min_reserve=0.1,
max_charge=0.9,
)
self.agent = self._create_agent()
self.agent.initialize_q_table(self.env.observation_space)
episode_rewards = []
for episode in range(n_episodes):
state = self.env.reset()
total_reward = 0
steps = 0
while True:
action = self.agent.get_action(state, training=True)
next_state, reward, done, info = self.env.step(action)
self.agent.update(state, action, reward, next_state, done)
total_reward += reward
state = next_state
steps += 1
if done:
break
episode_rewards.append(total_reward)
self.agent.decay_epsilon()
if (episode + 1) % 100 == 0:
avg_reward = sum(episode_rewards[-100:]) / 100
logger.info(f"Episode {episode + 1}/{n_episodes}, Avg Reward: {avg_reward:.2f}, Epsilon: {self.agent.epsilon:.3f}")
final_avg_reward = sum(episode_rewards[-100:]) / 100
results = {
"n_episodes": n_episodes,
"final_avg_reward": final_avg_reward,
"episode_rewards": episode_rewards,
"final_epsilon": self.agent.epsilon,
}
logger.info(f"Training complete. Final avg reward: {final_avg_reward:.2f}")
return results
def save(self, output_dir: str = "models/rl_battery") -> None:
from pathlib import Path
output_path = Path(output_dir)
output_path.mkdir(parents=True, exist_ok=True)
filepath = output_path / "battery_policy.pkl"
self.agent.save(filepath)
logger.info(f"Saved trained policy to {filepath}")
__all__ = ["BatteryRLTrainer"]