Add FastAPI backend for energy trading system
Implements FastAPI backend with ML model support for energy trading, including price prediction models and RL-based battery trading policy. Features dashboard, trading, backtest, and settings API routes with WebSocket support for real-time updates.
This commit is contained in:
95
backend/app/ml/rl_battery/trainer.py
Normal file
95
backend/app/ml/rl_battery/trainer.py
Normal file
@@ -0,0 +1,95 @@
|
||||
from typing import Dict
|
||||
from app.ml.rl_battery.environment import BatteryEnvironment
|
||||
from app.ml.rl_battery.agent import QLearningAgent
|
||||
from app.utils.logger import get_logger
|
||||
|
||||
logger = get_logger(__name__)
|
||||
|
||||
|
||||
class BatteryRLTrainer:
|
||||
def __init__(self, config=None):
|
||||
self.config = config or {}
|
||||
self.agent: QLearningAgent = None
|
||||
self.env: BatteryEnvironment = None
|
||||
|
||||
def _create_agent(self) -> QLearningAgent:
|
||||
return QLearningAgent(
|
||||
state_bins=self.config.get("charge_level_bins", 10),
|
||||
action_space=3,
|
||||
learning_rate=self.config.get("learning_rate", 0.1),
|
||||
discount_factor=self.config.get("discount_factor", 0.95),
|
||||
epsilon=self.config.get("epsilon", 1.0),
|
||||
epsilon_decay=self.config.get("epsilon_decay", 0.995),
|
||||
epsilon_min=self.config.get("epsilon_min", 0.05),
|
||||
)
|
||||
|
||||
def load_data(self):
|
||||
pass
|
||||
|
||||
def train(self, n_episodes: int = 1000, region: str = "FR") -> Dict:
|
||||
logger.info(f"Starting RL training for {n_episodes} episodes")
|
||||
|
||||
self.env = BatteryEnvironment(
|
||||
capacity=100.0,
|
||||
charge_rate=50.0,
|
||||
discharge_rate=50.0,
|
||||
efficiency=0.9,
|
||||
min_reserve=0.1,
|
||||
max_charge=0.9,
|
||||
)
|
||||
|
||||
self.agent = self._create_agent()
|
||||
self.agent.initialize_q_table(self.env.observation_space)
|
||||
|
||||
episode_rewards = []
|
||||
|
||||
for episode in range(n_episodes):
|
||||
state = self.env.reset()
|
||||
total_reward = 0
|
||||
steps = 0
|
||||
|
||||
while True:
|
||||
action = self.agent.get_action(state, training=True)
|
||||
next_state, reward, done, info = self.env.step(action)
|
||||
|
||||
self.agent.update(state, action, reward, next_state, done)
|
||||
|
||||
total_reward += reward
|
||||
state = next_state
|
||||
steps += 1
|
||||
|
||||
if done:
|
||||
break
|
||||
|
||||
episode_rewards.append(total_reward)
|
||||
self.agent.decay_epsilon()
|
||||
|
||||
if (episode + 1) % 100 == 0:
|
||||
avg_reward = sum(episode_rewards[-100:]) / 100
|
||||
logger.info(f"Episode {episode + 1}/{n_episodes}, Avg Reward: {avg_reward:.2f}, Epsilon: {self.agent.epsilon:.3f}")
|
||||
|
||||
final_avg_reward = sum(episode_rewards[-100:]) / 100
|
||||
|
||||
results = {
|
||||
"n_episodes": n_episodes,
|
||||
"final_avg_reward": final_avg_reward,
|
||||
"episode_rewards": episode_rewards,
|
||||
"final_epsilon": self.agent.epsilon,
|
||||
}
|
||||
|
||||
logger.info(f"Training complete. Final avg reward: {final_avg_reward:.2f}")
|
||||
|
||||
return results
|
||||
|
||||
def save(self, output_dir: str = "models/rl_battery") -> None:
|
||||
from pathlib import Path
|
||||
|
||||
output_path = Path(output_dir)
|
||||
output_path.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
filepath = output_path / "battery_policy.pkl"
|
||||
self.agent.save(filepath)
|
||||
logger.info(f"Saved trained policy to {filepath}")
|
||||
|
||||
|
||||
__all__ = ["BatteryRLTrainer"]
|
||||
Reference in New Issue
Block a user