Add FastAPI backend for energy trading system

Implements FastAPI backend with ML model support for energy trading,
including price prediction models and RL-based battery trading policy.
Features dashboard, trading, backtest, and settings API routes with
WebSocket support for real-time updates.
This commit is contained in:
2026-02-12 00:59:26 +07:00
parent a22a13f6f4
commit fe76bc7629
72 changed files with 2931 additions and 0 deletions

View File

@@ -0,0 +1,6 @@
from app.ml.features import (
build_price_features,
build_battery_features,
)
__all__ = ["build_price_features", "build_battery_features"]

View File

@@ -0,0 +1,3 @@
from app.ml.evaluation import ModelEvaluator, BacktestEvaluator
__all__ = ["ModelEvaluator", "BacktestEvaluator"]

View File

@@ -0,0 +1,3 @@
from app.ml.evaluation.metrics import BacktestEvaluator
__all__ = ["BacktestEvaluator"]

View File

@@ -0,0 +1,77 @@
from typing import Dict, List
import numpy as np
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
class ModelEvaluator:
@staticmethod
def calculate_metrics(y_true, y_pred) -> Dict[str, float]:
mae = mean_absolute_error(y_true, y_pred)
rmse = mean_squared_error(y_true, y_pred, squared=False)
mape = np.mean(np.abs((y_true - y_pred) / y_true)) * 100
r2 = r2_score(y_true, y_pred)
return {
"mae": float(mae),
"rmse": float(rmse),
"mape": float(mape) if not np.isnan(mape) else 0.0,
"r2": float(r2),
}
@staticmethod
def calculate_sharpe_ratio(returns: np.ndarray, risk_free_rate: float = 0.0) -> float:
if len(returns) == 0 or np.std(returns) == 0:
return 0.0
excess_returns = returns - risk_free_rate
return float(np.mean(excess_returns) / np.std(excess_returns))
@staticmethod
def calculate_max_drawdown(values: np.ndarray) -> float:
if len(values) == 0:
return 0.0
cumulative = np.cumsum(values)
running_max = np.maximum.accumulate(cumulative)
drawdown = (cumulative - running_max)
return float(drawdown.min())
class BacktestEvaluator:
def __init__(self):
self.trades: List[Dict] = []
def add_trade(self, trade: Dict):
self.trades.append(trade)
def evaluate(self) -> Dict[str, float]:
if not self.trades:
return {
"total_revenue": 0.0,
"total_trades": 0,
"win_rate": 0.0,
"sharpe_ratio": 0.0,
"max_drawdown": 0.0,
}
total_revenue = sum(t.get("revenue", 0) for t in self.trades)
winning_trades = sum(1 for t in self.trades if t.get("revenue", 0) > 0)
win_rate = winning_trades / len(self.trades) if self.trades else 0.0
returns = np.array([t.get("revenue", 0) for t in self.trades])
sharpe_ratio = ModelEvaluator.calculate_sharpe_ratio(returns)
max_drawdown = ModelEvaluator.calculate_max_drawdown(returns)
return {
"total_revenue": total_revenue,
"total_trades": len(self.trades),
"win_rate": win_rate,
"sharpe_ratio": sharpe_ratio,
"max_drawdown": max_drawdown,
}
def reset(self):
self.trades = []
__all__ = ["ModelEvaluator", "BacktestEvaluator"]

View File

@@ -0,0 +1,3 @@
from app.ml.evaluation.metrics import ModelEvaluator, BacktestEvaluator
__all__ = ["ModelEvaluator", "BacktestEvaluator"]

View File

@@ -0,0 +1,53 @@
from app.ml.features.lag_features import add_lag_features
from app.ml.features.rolling_features import add_rolling_features
from app.ml.features.time_features import add_time_features
from app.ml.features.regional_features import add_regional_features
from app.ml.features.battery_features import add_battery_features
from typing import List, Optional
import pandas as pd
def build_price_features(
df: pd.DataFrame,
price_col: str = "real_time_price",
lags: Optional[List[int]] = None,
windows: Optional[List[int]] = None,
regions: Optional[List[str]] = None,
include_time: bool = True,
include_regional: bool = True,
) -> pd.DataFrame:
if lags is None:
lags = [1, 5, 10, 15, 30, 60]
if windows is None:
windows = [5, 10, 15, 30, 60]
result = df.copy()
if price_col in result.columns:
result = add_lag_features(result, price_col, lags)
result = add_rolling_features(result, price_col, windows)
if include_time and "timestamp" in result.columns:
result = add_time_features(result)
if include_regional and regions:
result = add_regional_features(result, regions)
return result
def build_battery_features(
df: pd.DataFrame,
price_df: pd.DataFrame,
battery_col: str = "charge_level_mwh",
capacity_col: str = "capacity_mwh",
timestamp_col: str = "timestamp",
battery_id_col: str = "battery_id",
) -> pd.DataFrame:
result = df.copy()
result = add_battery_features(result, price_df, battery_col, capacity_col, timestamp_col, battery_id_col)
return result
__all__ = ["build_price_features", "build_battery_features"]

View File

@@ -0,0 +1,35 @@
import pandas as pd
def add_battery_features(
df: pd.DataFrame,
price_df: pd.DataFrame,
battery_col: str = "charge_level_mwh",
capacity_col: str = "capacity_mwh",
timestamp_col: str = "timestamp",
battery_id_col: str = "battery_id",
) -> pd.DataFrame:
result = df.copy()
if battery_col in result.columns and capacity_col in result.columns:
result["charge_level_pct"] = result[battery_col] / result[capacity_col]
result["discharge_potential_mwh"] = result[battery_col] * result.get("efficiency", 0.9)
result["charge_capacity_mwh"] = result[capacity_col] - result[battery_col]
if price_df is not None and "real_time_price" in price_df.columns and timestamp_col in result.columns:
merged = result.merge(
price_df[[timestamp_col, "real_time_price"]],
on=timestamp_col,
how="left",
suffixes=("", "_market")
)
if "real_time_price_market" in merged.columns:
result["market_price"] = merged["real_time_price_market"]
result["charge_cost_potential"] = result["charge_capacity_mwh"] * result["market_price"]
result["discharge_revenue_potential"] = result["discharge_potential_mwh"] * result["market_price"]
return result
__all__ = ["add_battery_features"]

View File

@@ -0,0 +1,14 @@
from typing import List
import pandas as pd
def add_lag_features(df: pd.DataFrame, col: str, lags: List[int]) -> pd.DataFrame:
result = df.copy()
for lag in lags:
result[f"{col}_lag_{lag}"] = result[col].shift(lag)
return result
__all__ = ["add_lag_features"]

View File

@@ -0,0 +1,18 @@
from typing import List
import pandas as pd
def add_regional_features(df: pd.DataFrame, regions: List[str]) -> pd.DataFrame:
result = df.copy()
if "region" in result.columns and "real_time_price" in result.columns:
avg_price_by_region = result.groupby("region")["real_time_price"].mean()
for region in regions:
region_avg = avg_price_by_region.get(region, 0)
result[f"price_diff_{region}"] = result["real_time_price"] - region_avg
return result
__all__ = ["add_regional_features"]

View File

@@ -0,0 +1,17 @@
from typing import List
import pandas as pd
def add_rolling_features(df: pd.DataFrame, col: str, windows: List[int]) -> pd.DataFrame:
result = df.copy()
for window in windows:
result[f"{col}_rolling_mean_{window}"] = result[col].rolling(window=window).mean()
result[f"{col}_rolling_std_{window}"] = result[col].rolling(window=window).std()
result[f"{col}_rolling_min_{window}"] = result[col].rolling(window=window).min()
result[f"{col}_rolling_max_{window}"] = result[col].rolling(window=window).max()
return result
__all__ = ["add_rolling_features"]

View File

@@ -0,0 +1,35 @@
import pandas as pd
def add_time_features(df: pd.DataFrame, timestamp_col: str = "timestamp") -> pd.DataFrame:
result = df.copy()
if timestamp_col not in result.columns:
return result
result[timestamp_col] = pd.to_datetime(result[timestamp_col])
result["hour"] = result[timestamp_col].dt.hour
result["day_of_week"] = result[timestamp_col].dt.dayofweek
result["day_of_month"] = result[timestamp_col].dt.day
result["month"] = result[timestamp_col].dt.month
result["hour_sin"] = _sin_encode(result["hour"], 24)
result["hour_cos"] = _cos_encode(result["hour"], 24)
result["day_sin"] = _sin_encode(result["day_of_week"], 7)
result["day_cos"] = _cos_encode(result["day_of_week"], 7)
return result
def _sin_encode(x, period):
import numpy as np
return np.sin(2 * np.pi * x / period)
def _cos_encode(x, period):
import numpy as np
return np.cos(2 * np.pi * x / period)
__all__ = ["add_time_features"]

View File

@@ -0,0 +1,3 @@
from app.ml.model_management import ModelRegistry
__all__ = ["ModelRegistry"]

View File

@@ -0,0 +1,99 @@
from typing import Dict, List, Optional
from pathlib import Path
import json
from datetime import datetime
from app.utils.logger import get_logger
logger = get_logger(__name__)
class ModelRegistry:
def __init__(self, registry_path: str = "models/registry.json"):
self.registry_path = Path(registry_path)
self._registry: Dict[str, Dict] = {}
self._load()
def _load(self):
if self.registry_path.exists():
with open(self.registry_path) as f:
self._registry = json.load(f)
logger.info(f"Loaded registry from {self.registry_path}")
def _save(self):
self.registry_path.parent.mkdir(parents=True, exist_ok=True)
with open(self.registry_path, "w") as f:
json.dump(self._registry, f, indent=2, default=str)
logger.info(f"Saved registry to {self.registry_path}")
def register_model(
self,
model_type: str,
model_id: str,
version: str,
filepath: str,
metadata: Optional[Dict] = None,
) -> None:
timestamp = datetime.utcnow().isoformat()
if model_id not in self._registry:
self._registry[model_id] = {
"type": model_type,
"versions": [],
}
self._registry[model_id]["versions"].append({
"version": version,
"filepath": filepath,
"timestamp": timestamp,
"metadata": metadata or {},
})
self._registry[model_id]["latest"] = version
self._save()
logger.info(f"Registered model {model_id} version {version}")
def get_latest_version(self, model_id: str) -> Optional[Dict]:
if model_id not in self._registry:
return None
latest_version = self._registry[model_id].get("latest")
if not latest_version:
return None
for version_info in self._registry[model_id]["versions"]:
if version_info["version"] == latest_version:
return version_info
return None
def list_models(self) -> List[Dict]:
models = []
for model_id, model_info in self._registry.items():
latest = self.get_latest_version(model_id)
models.append({
"model_id": model_id,
"type": model_info.get("type"),
"latest_version": model_info.get("latest"),
"total_versions": len(model_info.get("versions", [])),
"latest_info": latest,
})
return models
def get_model(self, model_id: str, version: Optional[str] = None) -> Optional[Dict]:
if model_id not in self._registry:
return None
if version is None:
version = self._registry[model_id].get("latest")
for version_info in self._registry[model_id]["versions"]:
if version_info["version"] == version:
return version_info
return None
__all__ = ["ModelRegistry"]

View File

@@ -0,0 +1,3 @@
from app.ml.price_prediction import PricePredictor, PricePredictionTrainer
__all__ = ["PricePredictor", "PricePredictionTrainer"]

View File

@@ -0,0 +1,52 @@
import pickle
from typing import Optional
import xgboost as xgb
import numpy as np
class PricePredictionModel:
def __init__(self, horizon: int, model_id: Optional[str] = None):
self.horizon = horizon
self.model_id = model_id or f"price_prediction_{horizon}m"
self.model: Optional[xgb.XGBRegressor] = None
self.feature_names = []
def fit(self, X, y):
self.model = xgb.XGBRegressor(
n_estimators=200,
max_depth=6,
learning_rate=0.1,
subsample=0.8,
colsample_bytree=0.8,
random_state=42,
)
if isinstance(X, np.ndarray):
self.feature_names = [f"feature_{i}" for i in range(X.shape[1])]
else:
self.feature_names = list(X.columns)
self.model.fit(X, y)
def predict(self, X):
if self.model is None:
raise ValueError("Model not trained")
return self.model.predict(X)
def save(self, filepath: str):
with open(filepath, "wb") as f:
pickle.dump(self, f)
@classmethod
def load(cls, filepath: str):
with open(filepath, "rb") as f:
return pickle.load(f)
@property
def feature_importances_(self):
if self.model is None:
raise ValueError("Model not trained")
return self.model.feature_importances_
__all__ = ["PricePredictionModel"]

View File

@@ -0,0 +1,86 @@
from typing import Dict, Optional
import pandas as pd
import numpy as np
from app.ml.price_prediction.model import PricePredictionModel
from app.ml.price_prediction.trainer import PricePredictionTrainer
from app.utils.logger import get_logger
logger = get_logger(__name__)
class PricePredictor:
def __init__(self, models_dir: str = "models/price_prediction"):
self.models_dir = models_dir
self.models: Dict[int, PricePredictionModel] = {}
self._load_models()
def _load_models(self):
self.models = PricePredictionTrainer.load_models(self.models_dir)
logger.info(f"Loaded {len(self.models)} prediction models")
def predict(
self, current_data: pd.DataFrame, horizon: int = 15, region: Optional[str] = None
) -> float:
if horizon not in self.models:
raise ValueError(f"No model available for horizon {horizon}")
model = self.models[horizon]
from app.ml.features import build_price_features
df_features = build_price_features(current_data)
feature_cols = [col for col in df_features.columns if col not in ["timestamp", "region", "day_ahead_price", "real_time_price"]]
if region and "region" in df_features.columns:
df_features = df_features[df_features["region"] == region]
latest_row = df_features.iloc[-1:][feature_cols]
prediction = model.predict(latest_row.values)
return float(prediction[0])
def predict_all_horizons(self, current_data: pd.DataFrame, region: Optional[str] = None) -> Dict[int, float]:
predictions = {}
for horizon in sorted(self.models.keys()):
try:
pred = self.predict(current_data, horizon, region)
predictions[horizon] = pred
except Exception as e:
logger.error(f"Failed to predict for horizon {horizon}: {e}")
predictions[horizon] = None
return predictions
def predict_with_confidence(
self, current_data: pd.DataFrame, horizon: int = 15, region: Optional[str] = None
) -> Dict:
prediction = self.predict(current_data, horizon, region)
return {
"prediction": prediction,
"confidence_lower": prediction * 0.95,
"confidence_upper": prediction * 1.05,
"horizon": horizon,
}
def get_feature_importance(self, horizon: int) -> pd.DataFrame:
if horizon not in self.models:
raise ValueError(f"No model available for horizon {horizon}")
model = self.models[horizon]
importances = model.feature_importances_
feature_names = model.feature_names
df = pd.DataFrame({
"feature": feature_names,
"importance": importances,
}).sort_values("importance", ascending=False)
return df
__all__ = ["PricePredictor"]

View File

@@ -0,0 +1,142 @@
from typing import List, Dict, Tuple, Optional
from pathlib import Path
import pandas as pd
from app.ml.price_prediction.model import PricePredictionModel
from app.utils.logger import get_logger
logger = get_logger(__name__)
class PricePredictionTrainer:
def __init__(self, config=None):
self.config = config
self.data: Optional[pd.DataFrame] = None
self.models: Dict[int, PricePredictionModel] = {}
def load_data(self, data_path: Optional[str] = None) -> pd.DataFrame:
if data_path is None:
data_path = "~/energy-test-data/data/processed"
path = Path(data_path).expanduser()
dfs = []
for region in ["FR", "BE", "DE", "NL", "UK"]:
file_path = path / f"{region.lower()}_processed.parquet"
if file_path.exists():
df = pd.read_parquet(file_path)
df["region"] = region
dfs.append(df)
if dfs:
self.data = pd.concat(dfs, ignore_index=True)
logger.info(f"Loaded data: {len(self.data)} rows")
return self.data
def prepare_data(self, df: pd.DataFrame) -> Tuple[pd.DataFrame, List[str]]:
from app.ml.features import build_price_features
df_features = build_price_features(df)
df_features = df_features.dropna()
feature_cols = [col for col in df_features.columns if col not in ["timestamp", "region", "day_ahead_price", "real_time_price"]]
return df_features, feature_cols
def train_for_horizon(
self, df_features: pd.DataFrame, feature_cols: List[str], horizon: int
) -> Dict:
logger.info(f"Training model for {horizon} minute horizon")
df_features = df_features.sort_values("timestamp")
n_total = len(df_features)
n_train = int(n_total * 0.70)
n_val = int(n_total * 0.85)
train_data = df_features.iloc[:n_train]
val_data = df_features.iloc[n_train:n_val]
X_train = train_data[feature_cols]
y_train = train_data["real_time_price"].shift(-horizon).dropna()
X_train = X_train.loc[y_train.index]
X_val = val_data[feature_cols]
y_val = val_data["real_time_price"].shift(-horizon).dropna()
X_val = X_val.loc[y_val.index]
model = PricePredictionModel(horizon=horizon)
model.fit(X_train, y_train)
val_preds = model.predict(X_val)
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
mae = mean_absolute_error(y_val, val_preds)
rmse = mean_squared_error(y_val, val_preds, squared=False)
r2 = r2_score(y_val, val_preds)
self.models[horizon] = model
results = {
"horizon": horizon,
"mae": mae,
"rmse": rmse,
"r2": r2,
"n_train": len(X_train),
"n_val": len(X_val),
}
logger.info(f"Training complete for {horizon}m: MAE={mae:.2f}, RMSE={rmse:.2f}, R2={r2:.3f}")
return results
def train_all(self, horizons: Optional[List[int]] = None) -> Dict:
if horizons is None:
horizons = [1, 5, 15, 60]
if self.data is None:
self.load_data()
df_features, feature_cols = self.prepare_data(self.data)
all_results = {}
for horizon in horizons:
try:
result = self.train_for_horizon(df_features, feature_cols, horizon)
all_results[horizon] = result
except Exception as e:
logger.error(f"Failed to train for horizon {horizon}: {e}")
all_results[horizon] = {"error": str(e)}
return all_results
def save_models(self, output_dir: str = "models/price_prediction") -> None:
output_path = Path(output_dir)
output_path.mkdir(parents=True, exist_ok=True)
for horizon, model in self.models.items():
filepath = output_path / f"model_{horizon}min.pkl"
model.save(filepath)
logger.info(f"Saved model for {horizon}m to {filepath}")
@classmethod
def load_models(cls, models_dir: str = "models/price_prediction", horizons: Optional[List[int]] = None) -> Dict[int, PricePredictionModel]:
models = {}
path = Path(models_dir)
if horizons is None:
horizons = [1, 5, 15, 60]
for horizon in horizons:
filepath = path / f"model_{horizon}min.pkl"
if filepath.exists():
model = PricePredictionModel.load(filepath)
models[horizon] = model
logger.info(f"Loaded model for {horizon}m")
return models
__all__ = ["PricePredictionTrainer"]

View File

@@ -0,0 +1,3 @@
from app.ml.rl_battery import BatteryPolicy, BatteryRLTrainer
__all__ = ["BatteryPolicy", "BatteryRLTrainer"]

View File

@@ -0,0 +1,88 @@
from typing import Dict, Optional
import numpy as np
import pickle
from app.utils.logger import get_logger
logger = get_logger(__name__)
class QLearningAgent:
def __init__(
self,
state_bins: int = 10,
action_space: int = 3,
learning_rate: float = 0.1,
discount_factor: float = 0.95,
epsilon: float = 1.0,
epsilon_decay: float = 0.995,
epsilon_min: float = 0.05,
):
self.state_bins = state_bins
self.action_space = action_space
self.learning_rate = learning_rate
self.discount_factor = discount_factor
self.epsilon = epsilon
self.epsilon_decay = epsilon_decay
self.epsilon_min = epsilon_min
self.q_table: Optional[np.ndarray] = None
self.policy_id = "battery_policy"
def initialize_q_table(self, observation_space: int):
self.q_table = np.zeros((self.state_bins ** observation_space, self.action_space))
def _discretize_state(self, state: np.ndarray) -> int:
discretized = (state * self.state_bins).astype(int)
discretized = np.clip(discretized, 0, self.state_bins - 1)
index = 0
multiplier = 1
for val in discretized:
index += val * multiplier
multiplier *= self.state_bins
return index
def get_action(self, state: np.ndarray, training: bool = True) -> int:
state_idx = self._discretize_state(state)
if training and np.random.random() < self.epsilon:
return np.random.randint(self.action_space)
if self.q_table is None:
return 1
return np.argmax(self.q_table[state_idx])
def update(self, state: np.ndarray, action: int, reward: float, next_state: np.ndarray, done: bool):
if self.q_table is None:
return
state_idx = self._discretize_state(state)
next_state_idx = self._discretize_state(next_state)
current_q = self.q_table[state_idx, action]
if done:
target = reward
else:
next_q = np.max(self.q_table[next_state_idx])
target = reward + self.discount_factor * next_q
self.q_table[state_idx, action] += self.learning_rate * (target - current_q)
def decay_epsilon(self):
self.epsilon = max(self.epsilon_min, self.epsilon * self.epsilon_decay)
def save(self, filepath: str):
with open(filepath, "wb") as f:
pickle.dump(self, f)
logger.info(f"Saved Q-learning policy to {filepath}")
@classmethod
def load(cls, filepath: str):
with open(filepath, "rb") as f:
return pickle.load(f)
__all__ = ["QLearningAgent"]

View File

@@ -0,0 +1,87 @@
from typing import Dict, Tuple
import numpy as np
class BatteryEnvironment:
def __init__(
self,
capacity: float = 100.0,
charge_rate: float = 50.0,
discharge_rate: float = 50.0,
efficiency: float = 0.9,
min_reserve: float = 0.1,
max_charge: float = 0.9,
):
self.capacity = capacity
self.charge_rate = charge_rate
self.discharge_rate = discharge_rate
self.efficiency = efficiency
self.min_reserve = min_reserve
self.max_charge = max_charge
self.charge_level = capacity * 0.5
self.current_price = 50.0
self.time_step = 0
def reset(self) -> np.ndarray:
self.charge_level = self.capacity * 0.5
self.current_price = 50.0
self.time_step = 0
return self._get_state()
def _get_state(self) -> np.ndarray:
charge_pct = self.charge_level / self.capacity
price_norm = np.clip(self.current_price / 200.0, 0, 1)
time_norm = (self.time_step % 1440) / 1440.0
return np.array([charge_pct, price_norm, time_norm])
def step(self, action: int) -> Tuple[np.ndarray, float, bool, Dict]:
old_price = self.current_price
if action == 0:
charge_amount = min(self.charge_rate, self.capacity * self.max_charge - self.charge_level)
self.charge_level += charge_amount * self.efficiency
reward = -charge_amount * old_price / 1000.0
elif action == 1:
reward = 0.0
elif action == 2:
discharge_amount = min(
self.discharge_rate,
self.charge_level - self.capacity * self.min_reserve
)
revenue = discharge_amount * old_price
self.charge_level -= discharge_amount / self.efficiency
reward = revenue / 1000.0
else:
reward = 0.0
self.charge_level = np.clip(self.charge_level, self.capacity * self.min_reserve, self.capacity * self.max_charge)
self.current_price = old_price + np.random.randn() * 5
self.current_price = np.clip(self.current_price, 0, 300)
self.time_step += 1
state = self._get_state()
info = {
"charge_level": self.charge_level,
"price": self.current_price,
"action": action,
}
done = self.time_step >= 1440
return state, reward, done, info
@property
def action_space(self):
return 3
@property
def observation_space(self):
return 3
__all__ = ["BatteryEnvironment"]

View File

@@ -0,0 +1,65 @@
from typing import Dict
from app.ml.rl_battery.agent import QLearningAgent
from app.ml.rl_battery.environment import BatteryEnvironment
from app.utils.logger import get_logger
logger = get_logger(__name__)
class BatteryPolicy:
def __init__(self, policy_path: str = "models/rl_battery"):
self.policy_path = policy_path
self.agent: QLearningAgent = None
self.env: BatteryEnvironment = None
self._load_policy()
def _load_policy(self):
from pathlib import Path
filepath = Path(self.policy_path) / "battery_policy.pkl"
if filepath.exists():
self.agent = QLearningAgent.load(filepath)
self.env = BatteryEnvironment()
logger.info(f"Loaded policy from {filepath}")
def get_action(
self,
charge_level: float,
current_price: float,
price_forecast_1m: float = 0,
price_forecast_5m: float = 0,
price_forecast_15m: float = 0,
hour: int = 0,
) -> Dict:
if self.agent is None:
return {
"action": "hold",
"q_values": [0.0, 0.0, 0.0],
"confidence": 0.0,
}
self.env.charge_level = charge_level
self.env.current_price = current_price
self.env.time_step = hour * 60
state = self.env._get_state()
action_idx = self.agent.get_action(state, training=False)
actions = ["charge", "hold", "discharge"]
action_name = actions[action_idx]
state_idx = self.agent._discretize_state(state)
q_values = self.agent.q_table[state_idx].tolist() if self.agent.q_table is not None else [0.0, 0.0, 0.0]
max_q = max(q_values) if q_values else 0.0
confidence = (max_q - min(q_values)) / (max_q + 0.001) if q_values else 0.0
return {
"action": action_name,
"q_values": q_values,
"confidence": min(confidence, 1.0),
}
__all__ = ["BatteryPolicy"]

View File

@@ -0,0 +1,95 @@
from typing import Dict
from app.ml.rl_battery.environment import BatteryEnvironment
from app.ml.rl_battery.agent import QLearningAgent
from app.utils.logger import get_logger
logger = get_logger(__name__)
class BatteryRLTrainer:
def __init__(self, config=None):
self.config = config or {}
self.agent: QLearningAgent = None
self.env: BatteryEnvironment = None
def _create_agent(self) -> QLearningAgent:
return QLearningAgent(
state_bins=self.config.get("charge_level_bins", 10),
action_space=3,
learning_rate=self.config.get("learning_rate", 0.1),
discount_factor=self.config.get("discount_factor", 0.95),
epsilon=self.config.get("epsilon", 1.0),
epsilon_decay=self.config.get("epsilon_decay", 0.995),
epsilon_min=self.config.get("epsilon_min", 0.05),
)
def load_data(self):
pass
def train(self, n_episodes: int = 1000, region: str = "FR") -> Dict:
logger.info(f"Starting RL training for {n_episodes} episodes")
self.env = BatteryEnvironment(
capacity=100.0,
charge_rate=50.0,
discharge_rate=50.0,
efficiency=0.9,
min_reserve=0.1,
max_charge=0.9,
)
self.agent = self._create_agent()
self.agent.initialize_q_table(self.env.observation_space)
episode_rewards = []
for episode in range(n_episodes):
state = self.env.reset()
total_reward = 0
steps = 0
while True:
action = self.agent.get_action(state, training=True)
next_state, reward, done, info = self.env.step(action)
self.agent.update(state, action, reward, next_state, done)
total_reward += reward
state = next_state
steps += 1
if done:
break
episode_rewards.append(total_reward)
self.agent.decay_epsilon()
if (episode + 1) % 100 == 0:
avg_reward = sum(episode_rewards[-100:]) / 100
logger.info(f"Episode {episode + 1}/{n_episodes}, Avg Reward: {avg_reward:.2f}, Epsilon: {self.agent.epsilon:.3f}")
final_avg_reward = sum(episode_rewards[-100:]) / 100
results = {
"n_episodes": n_episodes,
"final_avg_reward": final_avg_reward,
"episode_rewards": episode_rewards,
"final_epsilon": self.agent.epsilon,
}
logger.info(f"Training complete. Final avg reward: {final_avg_reward:.2f}")
return results
def save(self, output_dir: str = "models/rl_battery") -> None:
from pathlib import Path
output_path = Path(output_dir)
output_path.mkdir(parents=True, exist_ok=True)
filepath = output_path / "battery_policy.pkl"
self.agent.save(filepath)
logger.info(f"Saved trained policy to {filepath}")
__all__ = ["BatteryRLTrainer"]

View File

@@ -0,0 +1,3 @@
from app.ml.training import CLITrainer
__all__ = ["CLITrainer"]

View File

@@ -0,0 +1,49 @@
import argparse
from app.ml.price_prediction.trainer import PricePredictionTrainer
from app.ml.rl_battery.trainer import BatteryRLTrainer
from app.utils.logger import get_logger, setup_logger
setup_logger()
logger = get_logger(__name__)
def main():
parser = argparse.ArgumentParser(description="Energy Trading ML Training CLI")
subparsers = parser.add_subparsers(dest="command", help="Available commands")
price_parser = subparsers.add_parser("price", help="Train price prediction models")
price_parser.add_argument("--horizons", nargs="+", type=int, default=[1, 5, 15, 60], help="Prediction horizons in minutes")
price_parser.add_argument("--output", type=str, default="models/price_prediction", help="Output directory")
rl_parser = subparsers.add_parser("rl", help="Train RL battery policy")
rl_parser.add_argument("--episodes", type=int, default=1000, help="Number of training episodes")
rl_parser.add_argument("--region", type=str, default="FR", help="Region to train for")
rl_parser.add_argument("--output", type=str, default="models/rl_battery", help="Output directory")
args = parser.parse_args()
if args.command == "price":
logger.info(f"Training price prediction models for horizons: {args.horizons}")
trainer = PricePredictionTrainer()
results = trainer.train_all(horizons=args.horizons)
trainer.save_models(output_dir=args.output)
logger.info("Training complete!")
for horizon, result in results.items():
if "error" not in result:
logger.info(f" {horizon}m: MAE={result['mae']:.2f}, RMSE={result['rmse']:.2f}, R2={result['r2']:.3f}")
elif args.command == "rl":
logger.info(f"Training RL battery policy for {args.episodes} episodes")
trainer = BatteryRLTrainer()
results = trainer.train(n_episodes=args.episodes, region=args.region)
trainer.save(output_dir=args.output)
logger.info("Training complete!")
logger.info(f" Final avg reward: {results['final_avg_reward']:.2f}")
logger.info(f" Final epsilon: {results['final_epsilon']:.3f}")
else:
parser.print_help()
if __name__ == "__main__":
main()

View File

@@ -0,0 +1,3 @@
from app.ml.utils import time_based_split, MLConfig
__all__ = ["time_based_split", "MLConfig"]

View File

@@ -0,0 +1,16 @@
from dataclasses import dataclass
from typing import List, Dict, Any
@dataclass
class MLConfig:
enable_gpu: bool = False
n_jobs: int = 4
verbose: bool = True
@classmethod
def from_dict(cls, config_dict: Dict[str, Any]) -> "MLConfig":
return cls(**{k: v for k, v in config_dict.items() if k in cls.__annotations__})
__all__ = ["MLConfig"]

View File

@@ -0,0 +1,25 @@
from typing import Tuple
import pandas as pd
from datetime import datetime
def time_based_split(
df: pd.DataFrame,
timestamp_col: str = "timestamp",
train_pct: float = 0.70,
val_pct: float = 0.85,
) -> Tuple[pd.DataFrame, pd.DataFrame, pd.DataFrame]:
df_sorted = df.sort_values(timestamp_col)
n_total = len(df_sorted)
n_train = int(n_total * train_pct)
n_val = int(n_total * val_pct)
train = df_sorted.iloc[:n_train]
val = df_sorted.iloc[n_train:n_val]
test = df_sorted.iloc[n_val:]
return train, val, test
__all__ = ["time_based_split"]

View File

@@ -0,0 +1,4 @@
from app.ml.utils.data_split import time_based_split
from app.ml.utils.config import MLConfig
__all__ = ["time_based_split", "MLConfig"]