Implements FastAPI backend with ML model support for energy trading, including price prediction models and RL-based battery trading policy. Features dashboard, trading, backtest, and settings API routes with WebSocket support for real-time updates.
96 lines
3.0 KiB
Python
96 lines
3.0 KiB
Python
from typing import Dict
|
|
from app.ml.rl_battery.environment import BatteryEnvironment
|
|
from app.ml.rl_battery.agent import QLearningAgent
|
|
from app.utils.logger import get_logger
|
|
|
|
logger = get_logger(__name__)
|
|
|
|
|
|
class BatteryRLTrainer:
|
|
def __init__(self, config=None):
|
|
self.config = config or {}
|
|
self.agent: QLearningAgent = None
|
|
self.env: BatteryEnvironment = None
|
|
|
|
def _create_agent(self) -> QLearningAgent:
|
|
return QLearningAgent(
|
|
state_bins=self.config.get("charge_level_bins", 10),
|
|
action_space=3,
|
|
learning_rate=self.config.get("learning_rate", 0.1),
|
|
discount_factor=self.config.get("discount_factor", 0.95),
|
|
epsilon=self.config.get("epsilon", 1.0),
|
|
epsilon_decay=self.config.get("epsilon_decay", 0.995),
|
|
epsilon_min=self.config.get("epsilon_min", 0.05),
|
|
)
|
|
|
|
def load_data(self):
|
|
pass
|
|
|
|
def train(self, n_episodes: int = 1000, region: str = "FR") -> Dict:
|
|
logger.info(f"Starting RL training for {n_episodes} episodes")
|
|
|
|
self.env = BatteryEnvironment(
|
|
capacity=100.0,
|
|
charge_rate=50.0,
|
|
discharge_rate=50.0,
|
|
efficiency=0.9,
|
|
min_reserve=0.1,
|
|
max_charge=0.9,
|
|
)
|
|
|
|
self.agent = self._create_agent()
|
|
self.agent.initialize_q_table(self.env.observation_space)
|
|
|
|
episode_rewards = []
|
|
|
|
for episode in range(n_episodes):
|
|
state = self.env.reset()
|
|
total_reward = 0
|
|
steps = 0
|
|
|
|
while True:
|
|
action = self.agent.get_action(state, training=True)
|
|
next_state, reward, done, info = self.env.step(action)
|
|
|
|
self.agent.update(state, action, reward, next_state, done)
|
|
|
|
total_reward += reward
|
|
state = next_state
|
|
steps += 1
|
|
|
|
if done:
|
|
break
|
|
|
|
episode_rewards.append(total_reward)
|
|
self.agent.decay_epsilon()
|
|
|
|
if (episode + 1) % 100 == 0:
|
|
avg_reward = sum(episode_rewards[-100:]) / 100
|
|
logger.info(f"Episode {episode + 1}/{n_episodes}, Avg Reward: {avg_reward:.2f}, Epsilon: {self.agent.epsilon:.3f}")
|
|
|
|
final_avg_reward = sum(episode_rewards[-100:]) / 100
|
|
|
|
results = {
|
|
"n_episodes": n_episodes,
|
|
"final_avg_reward": final_avg_reward,
|
|
"episode_rewards": episode_rewards,
|
|
"final_epsilon": self.agent.epsilon,
|
|
}
|
|
|
|
logger.info(f"Training complete. Final avg reward: {final_avg_reward:.2f}")
|
|
|
|
return results
|
|
|
|
def save(self, output_dir: str = "models/rl_battery") -> None:
|
|
from pathlib import Path
|
|
|
|
output_path = Path(output_dir)
|
|
output_path.mkdir(parents=True, exist_ok=True)
|
|
|
|
filepath = output_path / "battery_policy.pkl"
|
|
self.agent.save(filepath)
|
|
logger.info(f"Saved trained policy to {filepath}")
|
|
|
|
|
|
__all__ = ["BatteryRLTrainer"]
|