Add FastAPI backend for energy trading system

Implements FastAPI backend with ML model support for energy trading, including price prediction models and RL-based battery trading policy. Features dashboard, trading, backtest, and settings API routes with WebSocket support for real-time updates.
2026-02-12 00:59:26 +07:00
parent a22a13f6f4
commit fe76bc7629
72 changed files with 2931 additions and 0 deletions
--- a/backend/app/ml/init.py
+++ b/backend/app/ml/init.py
@@ -0,0 +1,6 @@
+from app.ml.features import (
+    build_price_features,
+    build_battery_features,
+)
+
+__all__ = ["build_price_features", "build_battery_features"]
--- a/backend/app/ml/evaluation/init.py
+++ b/backend/app/ml/evaluation/init.py
@@ -0,0 +1,3 @@
+from app.ml.evaluation import ModelEvaluator, BacktestEvaluator
+
+__all__ = ["ModelEvaluator", "BacktestEvaluator"]
--- a/backend/app/ml/evaluation/backtest_evaluator.py
+++ b/backend/app/ml/evaluation/backtest_evaluator.py
@@ -0,0 +1,3 @@
+from app.ml.evaluation.metrics import BacktestEvaluator
+
+__all__ = ["BacktestEvaluator"]
--- a/backend/app/ml/evaluation/metrics.py
+++ b/backend/app/ml/evaluation/metrics.py
@@ -0,0 +1,77 @@
+from typing import Dict, List
+import numpy as np
+from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
+
+
+class ModelEvaluator:
+    @staticmethod
+    def calculate_metrics(y_true, y_pred) -> Dict[str, float]:
+        mae = mean_absolute_error(y_true, y_pred)
+        rmse = mean_squared_error(y_true, y_pred, squared=False)
+        mape = np.mean(np.abs((y_true - y_pred) / y_true)) * 100
+        r2 = r2_score(y_true, y_pred)
+
+        return {
+            "mae": float(mae),
+            "rmse": float(rmse),
+            "mape": float(mape) if not np.isnan(mape) else 0.0,
+            "r2": float(r2),
+        }
+
+    @staticmethod
+    def calculate_sharpe_ratio(returns: np.ndarray, risk_free_rate: float = 0.0) -> float:
+        if len(returns) == 0 or np.std(returns) == 0:
+            return 0.0
+
+        excess_returns = returns - risk_free_rate
+        return float(np.mean(excess_returns) / np.std(excess_returns))
+
+    @staticmethod
+    def calculate_max_drawdown(values: np.ndarray) -> float:
+        if len(values) == 0:
+            return 0.0
+
+        cumulative = np.cumsum(values)
+        running_max = np.maximum.accumulate(cumulative)
+        drawdown = (cumulative - running_max)
+        return float(drawdown.min())
+
+
+class BacktestEvaluator:
+    def __init__(self):
+        self.trades: List[Dict] = []
+
+    def add_trade(self, trade: Dict):
+        self.trades.append(trade)
+
+    def evaluate(self) -> Dict[str, float]:
+        if not self.trades:
+            return {
+                "total_revenue": 0.0,
+                "total_trades": 0,
+                "win_rate": 0.0,
+                "sharpe_ratio": 0.0,
+                "max_drawdown": 0.0,
+            }
+
+        total_revenue = sum(t.get("revenue", 0) for t in self.trades)
+        winning_trades = sum(1 for t in self.trades if t.get("revenue", 0) > 0)
+        win_rate = winning_trades / len(self.trades) if self.trades else 0.0
+
+        returns = np.array([t.get("revenue", 0) for t in self.trades])
+        sharpe_ratio = ModelEvaluator.calculate_sharpe_ratio(returns)
+        max_drawdown = ModelEvaluator.calculate_max_drawdown(returns)
+
+        return {
+            "total_revenue": total_revenue,
+            "total_trades": len(self.trades),
+            "win_rate": win_rate,
+            "sharpe_ratio": sharpe_ratio,
+            "max_drawdown": max_drawdown,
+        }
+
+    def reset(self):
+        self.trades = []
+
+
+__all__ = ["ModelEvaluator", "BacktestEvaluator"]
--- a/backend/app/ml/evaluation/reports.py
+++ b/backend/app/ml/evaluation/reports.py
@@ -0,0 +1,3 @@
+from app.ml.evaluation.metrics import ModelEvaluator, BacktestEvaluator
+
+__all__ = ["ModelEvaluator", "BacktestEvaluator"]
--- a/backend/app/ml/features/init.py
+++ b/backend/app/ml/features/init.py
@@ -0,0 +1,53 @@
+from app.ml.features.lag_features import add_lag_features
+from app.ml.features.rolling_features import add_rolling_features
+from app.ml.features.time_features import add_time_features
+from app.ml.features.regional_features import add_regional_features
+from app.ml.features.battery_features import add_battery_features
+from typing import List, Optional
+import pandas as pd
+
+
+def build_price_features(
+    df: pd.DataFrame,
+    price_col: str = "real_time_price",
+    lags: Optional[List[int]] = None,
+    windows: Optional[List[int]] = None,
+    regions: Optional[List[str]] = None,
+    include_time: bool = True,
+    include_regional: bool = True,
+) -> pd.DataFrame:
+    if lags is None:
+        lags = [1, 5, 10, 15, 30, 60]
+
+    if windows is None:
+        windows = [5, 10, 15, 30, 60]
+
+    result = df.copy()
+
+    if price_col in result.columns:
+        result = add_lag_features(result, price_col, lags)
+        result = add_rolling_features(result, price_col, windows)
+
+    if include_time and "timestamp" in result.columns:
+        result = add_time_features(result)
+
+    if include_regional and regions:
+        result = add_regional_features(result, regions)
+
+    return result
+
+
+def build_battery_features(
+    df: pd.DataFrame,
+    price_df: pd.DataFrame,
+    battery_col: str = "charge_level_mwh",
+    capacity_col: str = "capacity_mwh",
+    timestamp_col: str = "timestamp",
+    battery_id_col: str = "battery_id",
+) -> pd.DataFrame:
+    result = df.copy()
+    result = add_battery_features(result, price_df, battery_col, capacity_col, timestamp_col, battery_id_col)
+    return result
+
+
+__all__ = ["build_price_features", "build_battery_features"]
--- a/backend/app/ml/features/battery_features.py
+++ b/backend/app/ml/features/battery_features.py
@@ -0,0 +1,35 @@
+import pandas as pd
+
+
+def add_battery_features(
+    df: pd.DataFrame,
+    price_df: pd.DataFrame,
+    battery_col: str = "charge_level_mwh",
+    capacity_col: str = "capacity_mwh",
+    timestamp_col: str = "timestamp",
+    battery_id_col: str = "battery_id",
+) -> pd.DataFrame:
+    result = df.copy()
+
+    if battery_col in result.columns and capacity_col in result.columns:
+        result["charge_level_pct"] = result[battery_col] / result[capacity_col]
+        result["discharge_potential_mwh"] = result[battery_col] * result.get("efficiency", 0.9)
+        result["charge_capacity_mwh"] = result[capacity_col] - result[battery_col]
+
+    if price_df is not None and "real_time_price" in price_df.columns and timestamp_col in result.columns:
+        merged = result.merge(
+            price_df[[timestamp_col, "real_time_price"]],
+            on=timestamp_col,
+            how="left",
+            suffixes=("", "_market")
+        )
+
+        if "real_time_price_market" in merged.columns:
+            result["market_price"] = merged["real_time_price_market"]
+            result["charge_cost_potential"] = result["charge_capacity_mwh"] * result["market_price"]
+            result["discharge_revenue_potential"] = result["discharge_potential_mwh"] * result["market_price"]
+
+    return result
+
+
+__all__ = ["add_battery_features"]
--- a/backend/app/ml/features/lag_features.py
+++ b/backend/app/ml/features/lag_features.py
@@ -0,0 +1,14 @@
+from typing import List
+import pandas as pd
+
+
+def add_lag_features(df: pd.DataFrame, col: str, lags: List[int]) -> pd.DataFrame:
+    result = df.copy()
+
+    for lag in lags:
+        result[f"{col}_lag_{lag}"] = result[col].shift(lag)
+
+    return result
+
+
+__all__ = ["add_lag_features"]
--- a/backend/app/ml/features/regional_features.py
+++ b/backend/app/ml/features/regional_features.py
@@ -0,0 +1,18 @@
+from typing import List
+import pandas as pd
+
+
+def add_regional_features(df: pd.DataFrame, regions: List[str]) -> pd.DataFrame:
+    result = df.copy()
+
+    if "region" in result.columns and "real_time_price" in result.columns:
+        avg_price_by_region = result.groupby("region")["real_time_price"].mean()
+
+        for region in regions:
+            region_avg = avg_price_by_region.get(region, 0)
+            result[f"price_diff_{region}"] = result["real_time_price"] - region_avg
+
+    return result
+
+
+__all__ = ["add_regional_features"]
--- a/backend/app/ml/features/rolling_features.py
+++ b/backend/app/ml/features/rolling_features.py
@@ -0,0 +1,17 @@
+from typing import List
+import pandas as pd
+
+
+def add_rolling_features(df: pd.DataFrame, col: str, windows: List[int]) -> pd.DataFrame:
+    result = df.copy()
+
+    for window in windows:
+        result[f"{col}_rolling_mean_{window}"] = result[col].rolling(window=window).mean()
+        result[f"{col}_rolling_std_{window}"] = result[col].rolling(window=window).std()
+        result[f"{col}_rolling_min_{window}"] = result[col].rolling(window=window).min()
+        result[f"{col}_rolling_max_{window}"] = result[col].rolling(window=window).max()
+
+    return result
+
+
+__all__ = ["add_rolling_features"]
--- a/backend/app/ml/features/time_features.py
+++ b/backend/app/ml/features/time_features.py
@@ -0,0 +1,35 @@
+import pandas as pd
+
+
+def add_time_features(df: pd.DataFrame, timestamp_col: str = "timestamp") -> pd.DataFrame:
+    result = df.copy()
+
+    if timestamp_col not in result.columns:
+        return result
+
+    result[timestamp_col] = pd.to_datetime(result[timestamp_col])
+
+    result["hour"] = result[timestamp_col].dt.hour
+    result["day_of_week"] = result[timestamp_col].dt.dayofweek
+    result["day_of_month"] = result[timestamp_col].dt.day
+    result["month"] = result[timestamp_col].dt.month
+
+    result["hour_sin"] = _sin_encode(result["hour"], 24)
+    result["hour_cos"] = _cos_encode(result["hour"], 24)
+    result["day_sin"] = _sin_encode(result["day_of_week"], 7)
+    result["day_cos"] = _cos_encode(result["day_of_week"], 7)
+
+    return result
+
+
+def _sin_encode(x, period):
+    import numpy as np
+    return np.sin(2 * np.pi * x / period)
+
+
+def _cos_encode(x, period):
+    import numpy as np
+    return np.cos(2 * np.pi * x / period)
+
+
+__all__ = ["add_time_features"]
--- a/backend/app/ml/model_management/init.py
+++ b/backend/app/ml/model_management/init.py
@@ -0,0 +1,3 @@
+from app.ml.model_management import ModelRegistry
+
+__all__ = ["ModelRegistry"]
--- a/backend/app/ml/model_management/registry.py
+++ b/backend/app/ml/model_management/registry.py
@@ -0,0 +1,99 @@
+from typing import Dict, List, Optional
+from pathlib import Path
+import json
+from datetime import datetime
+from app.utils.logger import get_logger
+
+logger = get_logger(__name__)
+
+
+class ModelRegistry:
+    def __init__(self, registry_path: str = "models/registry.json"):
+        self.registry_path = Path(registry_path)
+        self._registry: Dict[str, Dict] = {}
+        self._load()
+
+    def _load(self):
+        if self.registry_path.exists():
+            with open(self.registry_path) as f:
+                self._registry = json.load(f)
+            logger.info(f"Loaded registry from {self.registry_path}")
+
+    def _save(self):
+        self.registry_path.parent.mkdir(parents=True, exist_ok=True)
+        with open(self.registry_path, "w") as f:
+            json.dump(self._registry, f, indent=2, default=str)
+        logger.info(f"Saved registry to {self.registry_path}")
+
+    def register_model(
+        self,
+        model_type: str,
+        model_id: str,
+        version: str,
+        filepath: str,
+        metadata: Optional[Dict] = None,
+    ) -> None:
+        timestamp = datetime.utcnow().isoformat()
+
+        if model_id not in self._registry:
+            self._registry[model_id] = {
+                "type": model_type,
+                "versions": [],
+            }
+
+        self._registry[model_id]["versions"].append({
+            "version": version,
+            "filepath": filepath,
+            "timestamp": timestamp,
+            "metadata": metadata or {},
+        })
+
+        self._registry[model_id]["latest"] = version
+
+        self._save()
+        logger.info(f"Registered model {model_id} version {version}")
+
+    def get_latest_version(self, model_id: str) -> Optional[Dict]:
+        if model_id not in self._registry:
+            return None
+
+        latest_version = self._registry[model_id].get("latest")
+        if not latest_version:
+            return None
+
+        for version_info in self._registry[model_id]["versions"]:
+            if version_info["version"] == latest_version:
+                return version_info
+
+        return None
+
+    def list_models(self) -> List[Dict]:
+        models = []
+
+        for model_id, model_info in self._registry.items():
+            latest = self.get_latest_version(model_id)
+            models.append({
+                "model_id": model_id,
+                "type": model_info.get("type"),
+                "latest_version": model_info.get("latest"),
+                "total_versions": len(model_info.get("versions", [])),
+                "latest_info": latest,
+            })
+
+        return models
+
+    def get_model(self, model_id: str, version: Optional[str] = None) -> Optional[Dict]:
+        if model_id not in self._registry:
+            return None
+
+        if version is None:
+            version = self._registry[model_id].get("latest")
+
+        for version_info in self._registry[model_id]["versions"]:
+            if version_info["version"] == version:
+                return version_info
+
+        return None
+
+
+__all__ = ["ModelRegistry"]
--- a/backend/app/ml/price_prediction/init.py
+++ b/backend/app/ml/price_prediction/init.py
@@ -0,0 +1,3 @@
+from app.ml.price_prediction import PricePredictor, PricePredictionTrainer
+
+__all__ = ["PricePredictor", "PricePredictionTrainer"]
--- a/backend/app/ml/price_prediction/model.py
+++ b/backend/app/ml/price_prediction/model.py
@@ -0,0 +1,52 @@
+import pickle
+from typing import Optional
+import xgboost as xgb
+import numpy as np
+
+
+class PricePredictionModel:
+    def __init__(self, horizon: int, model_id: Optional[str] = None):
+        self.horizon = horizon
+        self.model_id = model_id or f"price_prediction_{horizon}m"
+        self.model: Optional[xgb.XGBRegressor] = None
+        self.feature_names = []
+
+    def fit(self, X, y):
+        self.model = xgb.XGBRegressor(
+            n_estimators=200,
+            max_depth=6,
+            learning_rate=0.1,
+            subsample=0.8,
+            colsample_bytree=0.8,
+            random_state=42,
+        )
+
+        if isinstance(X, np.ndarray):
+            self.feature_names = [f"feature_{i}" for i in range(X.shape[1])]
+        else:
+            self.feature_names = list(X.columns)
+
+        self.model.fit(X, y)
+
+    def predict(self, X):
+        if self.model is None:
+            raise ValueError("Model not trained")
+        return self.model.predict(X)
+
+    def save(self, filepath: str):
+        with open(filepath, "wb") as f:
+            pickle.dump(self, f)
+
+    @classmethod
+    def load(cls, filepath: str):
+        with open(filepath, "rb") as f:
+            return pickle.load(f)
+
+    @property
+    def feature_importances_(self):
+        if self.model is None:
+            raise ValueError("Model not trained")
+        return self.model.feature_importances_
+
+
+__all__ = ["PricePredictionModel"]
--- a/backend/app/ml/price_prediction/predictor.py
+++ b/backend/app/ml/price_prediction/predictor.py
@@ -0,0 +1,86 @@
+from typing import Dict, Optional
+import pandas as pd
+import numpy as np
+from app.ml.price_prediction.model import PricePredictionModel
+from app.ml.price_prediction.trainer import PricePredictionTrainer
+from app.utils.logger import get_logger
+
+logger = get_logger(__name__)
+
+
+class PricePredictor:
+    def __init__(self, models_dir: str = "models/price_prediction"):
+        self.models_dir = models_dir
+        self.models: Dict[int, PricePredictionModel] = {}
+        self._load_models()
+
+    def _load_models(self):
+        self.models = PricePredictionTrainer.load_models(self.models_dir)
+        logger.info(f"Loaded {len(self.models)} prediction models")
+
+    def predict(
+        self, current_data: pd.DataFrame, horizon: int = 15, region: Optional[str] = None
+    ) -> float:
+        if horizon not in self.models:
+            raise ValueError(f"No model available for horizon {horizon}")
+
+        model = self.models[horizon]
+
+        from app.ml.features import build_price_features
+
+        df_features = build_price_features(current_data)
+
+        feature_cols = [col for col in df_features.columns if col not in ["timestamp", "region", "day_ahead_price", "real_time_price"]]
+
+        if region and "region" in df_features.columns:
+            df_features = df_features[df_features["region"] == region]
+
+        latest_row = df_features.iloc[-1:][feature_cols]
+
+        prediction = model.predict(latest_row.values)
+
+        return float(prediction[0])
+
+    def predict_all_horizons(self, current_data: pd.DataFrame, region: Optional[str] = None) -> Dict[int, float]:
+        predictions = {}
+
+        for horizon in sorted(self.models.keys()):
+            try:
+                pred = self.predict(current_data, horizon, region)
+                predictions[horizon] = pred
+            except Exception as e:
+                logger.error(f"Failed to predict for horizon {horizon}: {e}")
+                predictions[horizon] = None
+
+        return predictions
+
+    def predict_with_confidence(
+        self, current_data: pd.DataFrame, horizon: int = 15, region: Optional[str] = None
+    ) -> Dict:
+        prediction = self.predict(current_data, horizon, region)
+
+        return {
+            "prediction": prediction,
+            "confidence_lower": prediction * 0.95,
+            "confidence_upper": prediction * 1.05,
+            "horizon": horizon,
+        }
+
+    def get_feature_importance(self, horizon: int) -> pd.DataFrame:
+        if horizon not in self.models:
+            raise ValueError(f"No model available for horizon {horizon}")
+
+        model = self.models[horizon]
+
+        importances = model.feature_importances_
+        feature_names = model.feature_names
+
+        df = pd.DataFrame({
+            "feature": feature_names,
+            "importance": importances,
+        }).sort_values("importance", ascending=False)
+
+        return df
+
+
+__all__ = ["PricePredictor"]
--- a/backend/app/ml/price_prediction/trainer.py
+++ b/backend/app/ml/price_prediction/trainer.py
@@ -0,0 +1,142 @@
+from typing import List, Dict, Tuple, Optional
+from pathlib import Path
+import pandas as pd
+from app.ml.price_prediction.model import PricePredictionModel
+from app.utils.logger import get_logger
+
+logger = get_logger(__name__)
+
+
+class PricePredictionTrainer:
+    def __init__(self, config=None):
+        self.config = config
+        self.data: Optional[pd.DataFrame] = None
+        self.models: Dict[int, PricePredictionModel] = {}
+
+    def load_data(self, data_path: Optional[str] = None) -> pd.DataFrame:
+        if data_path is None:
+            data_path = "~/energy-test-data/data/processed"
+
+        path = Path(data_path).expanduser()
+        dfs = []
+
+        for region in ["FR", "BE", "DE", "NL", "UK"]:
+            file_path = path / f"{region.lower()}_processed.parquet"
+            if file_path.exists():
+                df = pd.read_parquet(file_path)
+                df["region"] = region
+                dfs.append(df)
+
+        if dfs:
+            self.data = pd.concat(dfs, ignore_index=True)
+            logger.info(f"Loaded data: {len(self.data)} rows")
+
+        return self.data
+
+    def prepare_data(self, df: pd.DataFrame) -> Tuple[pd.DataFrame, List[str]]:
+        from app.ml.features import build_price_features
+
+        df_features = build_price_features(df)
+
+        df_features = df_features.dropna()
+
+        feature_cols = [col for col in df_features.columns if col not in ["timestamp", "region", "day_ahead_price", "real_time_price"]]
+
+        return df_features, feature_cols
+
+    def train_for_horizon(
+        self, df_features: pd.DataFrame, feature_cols: List[str], horizon: int
+    ) -> Dict:
+        logger.info(f"Training model for {horizon} minute horizon")
+
+        df_features = df_features.sort_values("timestamp")
+
+        n_total = len(df_features)
+        n_train = int(n_total * 0.70)
+        n_val = int(n_total * 0.85)
+
+        train_data = df_features.iloc[:n_train]
+        val_data = df_features.iloc[n_train:n_val]
+
+        X_train = train_data[feature_cols]
+        y_train = train_data["real_time_price"].shift(-horizon).dropna()
+        X_train = X_train.loc[y_train.index]
+
+        X_val = val_data[feature_cols]
+        y_val = val_data["real_time_price"].shift(-horizon).dropna()
+        X_val = X_val.loc[y_val.index]
+
+        model = PricePredictionModel(horizon=horizon)
+        model.fit(X_train, y_train)
+
+        val_preds = model.predict(X_val)
+
+        from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
+
+        mae = mean_absolute_error(y_val, val_preds)
+        rmse = mean_squared_error(y_val, val_preds, squared=False)
+        r2 = r2_score(y_val, val_preds)
+
+        self.models[horizon] = model
+
+        results = {
+            "horizon": horizon,
+            "mae": mae,
+            "rmse": rmse,
+            "r2": r2,
+            "n_train": len(X_train),
+            "n_val": len(X_val),
+        }
+
+        logger.info(f"Training complete for {horizon}m: MAE={mae:.2f}, RMSE={rmse:.2f}, R2={r2:.3f}")
+
+        return results
+
+    def train_all(self, horizons: Optional[List[int]] = None) -> Dict:
+        if horizons is None:
+            horizons = [1, 5, 15, 60]
+
+        if self.data is None:
+            self.load_data()
+
+        df_features, feature_cols = self.prepare_data(self.data)
+
+        all_results = {}
+        for horizon in horizons:
+            try:
+                result = self.train_for_horizon(df_features, feature_cols, horizon)
+                all_results[horizon] = result
+            except Exception as e:
+                logger.error(f"Failed to train for horizon {horizon}: {e}")
+                all_results[horizon] = {"error": str(e)}
+
+        return all_results
+
+    def save_models(self, output_dir: str = "models/price_prediction") -> None:
+        output_path = Path(output_dir)
+        output_path.mkdir(parents=True, exist_ok=True)
+
+        for horizon, model in self.models.items():
+            filepath = output_path / f"model_{horizon}min.pkl"
+            model.save(filepath)
+            logger.info(f"Saved model for {horizon}m to {filepath}")
+
+    @classmethod
+    def load_models(cls, models_dir: str = "models/price_prediction", horizons: Optional[List[int]] = None) -> Dict[int, PricePredictionModel]:
+        models = {}
+        path = Path(models_dir)
+
+        if horizons is None:
+            horizons = [1, 5, 15, 60]
+
+        for horizon in horizons:
+            filepath = path / f"model_{horizon}min.pkl"
+            if filepath.exists():
+                model = PricePredictionModel.load(filepath)
+                models[horizon] = model
+                logger.info(f"Loaded model for {horizon}m")
+
+        return models
+
+
+__all__ = ["PricePredictionTrainer"]
--- a/backend/app/ml/rl_battery/init.py
+++ b/backend/app/ml/rl_battery/init.py
@@ -0,0 +1,3 @@
+from app.ml.rl_battery import BatteryPolicy, BatteryRLTrainer
+
+__all__ = ["BatteryPolicy", "BatteryRLTrainer"]
--- a/backend/app/ml/rl_battery/agent.py
+++ b/backend/app/ml/rl_battery/agent.py
@@ -0,0 +1,88 @@
+from typing import Dict, Optional
+import numpy as np
+import pickle
+from app.utils.logger import get_logger
+
+logger = get_logger(__name__)
+
+
+class QLearningAgent:
+    def __init__(
+        self,
+        state_bins: int = 10,
+        action_space: int = 3,
+        learning_rate: float = 0.1,
+        discount_factor: float = 0.95,
+        epsilon: float = 1.0,
+        epsilon_decay: float = 0.995,
+        epsilon_min: float = 0.05,
+    ):
+        self.state_bins = state_bins
+        self.action_space = action_space
+        self.learning_rate = learning_rate
+        self.discount_factor = discount_factor
+        self.epsilon = epsilon
+        self.epsilon_decay = epsilon_decay
+        self.epsilon_min = epsilon_min
+
+        self.q_table: Optional[np.ndarray] = None
+        self.policy_id = "battery_policy"
+
+    def initialize_q_table(self, observation_space: int):
+        self.q_table = np.zeros((self.state_bins ** observation_space, self.action_space))
+
+    def _discretize_state(self, state: np.ndarray) -> int:
+        discretized = (state * self.state_bins).astype(int)
+        discretized = np.clip(discretized, 0, self.state_bins - 1)
+
+        index = 0
+        multiplier = 1
+        for val in discretized:
+            index += val * multiplier
+            multiplier *= self.state_bins
+
+        return index
+
+    def get_action(self, state: np.ndarray, training: bool = True) -> int:
+        state_idx = self._discretize_state(state)
+
+        if training and np.random.random() < self.epsilon:
+            return np.random.randint(self.action_space)
+
+        if self.q_table is None:
+            return 1
+
+        return np.argmax(self.q_table[state_idx])
+
+    def update(self, state: np.ndarray, action: int, reward: float, next_state: np.ndarray, done: bool):
+        if self.q_table is None:
+            return
+
+        state_idx = self._discretize_state(state)
+        next_state_idx = self._discretize_state(next_state)
+
+        current_q = self.q_table[state_idx, action]
+
+        if done:
+            target = reward
+        else:
+            next_q = np.max(self.q_table[next_state_idx])
+            target = reward + self.discount_factor * next_q
+
+        self.q_table[state_idx, action] += self.learning_rate * (target - current_q)
+
+    def decay_epsilon(self):
+        self.epsilon = max(self.epsilon_min, self.epsilon * self.epsilon_decay)
+
+    def save(self, filepath: str):
+        with open(filepath, "wb") as f:
+            pickle.dump(self, f)
+        logger.info(f"Saved Q-learning policy to {filepath}")
+
+    @classmethod
+    def load(cls, filepath: str):
+        with open(filepath, "rb") as f:
+            return pickle.load(f)
+
+
+__all__ = ["QLearningAgent"]
--- a/backend/app/ml/rl_battery/environment.py
+++ b/backend/app/ml/rl_battery/environment.py
@@ -0,0 +1,87 @@
+from typing import Dict, Tuple
+import numpy as np
+
+
+class BatteryEnvironment:
+    def __init__(
+        self,
+        capacity: float = 100.0,
+        charge_rate: float = 50.0,
+        discharge_rate: float = 50.0,
+        efficiency: float = 0.9,
+        min_reserve: float = 0.1,
+        max_charge: float = 0.9,
+    ):
+        self.capacity = capacity
+        self.charge_rate = charge_rate
+        self.discharge_rate = discharge_rate
+        self.efficiency = efficiency
+        self.min_reserve = min_reserve
+        self.max_charge = max_charge
+
+        self.charge_level = capacity * 0.5
+        self.current_price = 50.0
+        self.time_step = 0
+
+    def reset(self) -> np.ndarray:
+        self.charge_level = self.capacity * 0.5
+        self.current_price = 50.0
+        self.time_step = 0
+        return self._get_state()
+
+    def _get_state(self) -> np.ndarray:
+        charge_pct = self.charge_level / self.capacity
+        price_norm = np.clip(self.current_price / 200.0, 0, 1)
+        time_norm = (self.time_step % 1440) / 1440.0
+
+        return np.array([charge_pct, price_norm, time_norm])
+
+    def step(self, action: int) -> Tuple[np.ndarray, float, bool, Dict]:
+        old_price = self.current_price
+
+        if action == 0:
+            charge_amount = min(self.charge_rate, self.capacity * self.max_charge - self.charge_level)
+            self.charge_level += charge_amount * self.efficiency
+            reward = -charge_amount * old_price / 1000.0
+        elif action == 1:
+            reward = 0.0
+        elif action == 2:
+            discharge_amount = min(
+                self.discharge_rate,
+                self.charge_level - self.capacity * self.min_reserve
+            )
+            revenue = discharge_amount * old_price
+            self.charge_level -= discharge_amount / self.efficiency
+            reward = revenue / 1000.0
+        else:
+            reward = 0.0
+
+        self.charge_level = np.clip(self.charge_level, self.capacity * self.min_reserve, self.capacity * self.max_charge)
+
+        self.current_price = old_price + np.random.randn() * 5
+        self.current_price = np.clip(self.current_price, 0, 300)
+
+        self.time_step += 1
+
+        state = self._get_state()
+
+        info = {
+            "charge_level": self.charge_level,
+            "price": self.current_price,
+            "action": action,
+        }
+
+        done = self.time_step >= 1440
+
+        return state, reward, done, info
+
+    @property
+    def action_space(self):
+        return 3
+
+    @property
+    def observation_space(self):
+        return 3
+
+
+__all__ = ["BatteryEnvironment"]
--- a/backend/app/ml/rl_battery/policy.py
+++ b/backend/app/ml/rl_battery/policy.py
@@ -0,0 +1,65 @@
+from typing import Dict
+from app.ml.rl_battery.agent import QLearningAgent
+from app.ml.rl_battery.environment import BatteryEnvironment
+from app.utils.logger import get_logger
+
+logger = get_logger(__name__)
+
+
+class BatteryPolicy:
+    def __init__(self, policy_path: str = "models/rl_battery"):
+        self.policy_path = policy_path
+        self.agent: QLearningAgent = None
+        self.env: BatteryEnvironment = None
+        self._load_policy()
+
+    def _load_policy(self):
+        from pathlib import Path
+
+        filepath = Path(self.policy_path) / "battery_policy.pkl"
+        if filepath.exists():
+            self.agent = QLearningAgent.load(filepath)
+            self.env = BatteryEnvironment()
+            logger.info(f"Loaded policy from {filepath}")
+
+    def get_action(
+        self,
+        charge_level: float,
+        current_price: float,
+        price_forecast_1m: float = 0,
+        price_forecast_5m: float = 0,
+        price_forecast_15m: float = 0,
+        hour: int = 0,
+    ) -> Dict:
+        if self.agent is None:
+            return {
+                "action": "hold",
+                "q_values": [0.0, 0.0, 0.0],
+                "confidence": 0.0,
+            }
+
+        self.env.charge_level = charge_level
+        self.env.current_price = current_price
+        self.env.time_step = hour * 60
+
+        state = self.env._get_state()
+
+        action_idx = self.agent.get_action(state, training=False)
+
+        actions = ["charge", "hold", "discharge"]
+        action_name = actions[action_idx]
+
+        state_idx = self.agent._discretize_state(state)
+        q_values = self.agent.q_table[state_idx].tolist() if self.agent.q_table is not None else [0.0, 0.0, 0.0]
+
+        max_q = max(q_values) if q_values else 0.0
+        confidence = (max_q - min(q_values)) / (max_q + 0.001) if q_values else 0.0
+
+        return {
+            "action": action_name,
+            "q_values": q_values,
+            "confidence": min(confidence, 1.0),
+        }
+
+
+__all__ = ["BatteryPolicy"]
--- a/backend/app/ml/rl_battery/trainer.py
+++ b/backend/app/ml/rl_battery/trainer.py
@@ -0,0 +1,95 @@
+from typing import Dict
+from app.ml.rl_battery.environment import BatteryEnvironment
+from app.ml.rl_battery.agent import QLearningAgent
+from app.utils.logger import get_logger
+
+logger = get_logger(__name__)
+
+
+class BatteryRLTrainer:
+    def __init__(self, config=None):
+        self.config = config or {}
+        self.agent: QLearningAgent = None
+        self.env: BatteryEnvironment = None
+
+    def _create_agent(self) -> QLearningAgent:
+        return QLearningAgent(
+            state_bins=self.config.get("charge_level_bins", 10),
+            action_space=3,
+            learning_rate=self.config.get("learning_rate", 0.1),
+            discount_factor=self.config.get("discount_factor", 0.95),
+            epsilon=self.config.get("epsilon", 1.0),
+            epsilon_decay=self.config.get("epsilon_decay", 0.995),
+            epsilon_min=self.config.get("epsilon_min", 0.05),
+        )
+
+    def load_data(self):
+        pass
+
+    def train(self, n_episodes: int = 1000, region: str = "FR") -> Dict:
+        logger.info(f"Starting RL training for {n_episodes} episodes")
+
+        self.env = BatteryEnvironment(
+            capacity=100.0,
+            charge_rate=50.0,
+            discharge_rate=50.0,
+            efficiency=0.9,
+            min_reserve=0.1,
+            max_charge=0.9,
+        )
+
+        self.agent = self._create_agent()
+        self.agent.initialize_q_table(self.env.observation_space)
+
+        episode_rewards = []
+
+        for episode in range(n_episodes):
+            state = self.env.reset()
+            total_reward = 0
+            steps = 0
+
+            while True:
+                action = self.agent.get_action(state, training=True)
+                next_state, reward, done, info = self.env.step(action)
+
+                self.agent.update(state, action, reward, next_state, done)
+
+                total_reward += reward
+                state = next_state
+                steps += 1
+
+                if done:
+                    break
+
+            episode_rewards.append(total_reward)
+            self.agent.decay_epsilon()
+
+            if (episode + 1) % 100 == 0:
+                avg_reward = sum(episode_rewards[-100:]) / 100
+                logger.info(f"Episode {episode + 1}/{n_episodes}, Avg Reward: {avg_reward:.2f}, Epsilon: {self.agent.epsilon:.3f}")
+
+        final_avg_reward = sum(episode_rewards[-100:]) / 100
+
+        results = {
+            "n_episodes": n_episodes,
+            "final_avg_reward": final_avg_reward,
+            "episode_rewards": episode_rewards,
+            "final_epsilon": self.agent.epsilon,
+        }
+
+        logger.info(f"Training complete. Final avg reward: {final_avg_reward:.2f}")
+
+        return results
+
+    def save(self, output_dir: str = "models/rl_battery") -> None:
+        from pathlib import Path
+
+        output_path = Path(output_dir)
+        output_path.mkdir(parents=True, exist_ok=True)
+
+        filepath = output_path / "battery_policy.pkl"
+        self.agent.save(filepath)
+        logger.info(f"Saved trained policy to {filepath}")
+
+
+__all__ = ["BatteryRLTrainer"]
--- a/backend/app/ml/training/init.py
+++ b/backend/app/ml/training/init.py
@@ -0,0 +1,3 @@
+from app.ml.training import CLITrainer
+
+__all__ = ["CLITrainer"]
--- a/backend/app/ml/training/cli.py
+++ b/backend/app/ml/training/cli.py
@@ -0,0 +1,49 @@
+import argparse
+from app.ml.price_prediction.trainer import PricePredictionTrainer
+from app.ml.rl_battery.trainer import BatteryRLTrainer
+from app.utils.logger import get_logger, setup_logger
+
+setup_logger()
+logger = get_logger(__name__)
+
+
+def main():
+    parser = argparse.ArgumentParser(description="Energy Trading ML Training CLI")
+    subparsers = parser.add_subparsers(dest="command", help="Available commands")
+
+    price_parser = subparsers.add_parser("price", help="Train price prediction models")
+    price_parser.add_argument("--horizons", nargs="+", type=int, default=[1, 5, 15, 60], help="Prediction horizons in minutes")
+    price_parser.add_argument("--output", type=str, default="models/price_prediction", help="Output directory")
+
+    rl_parser = subparsers.add_parser("rl", help="Train RL battery policy")
+    rl_parser.add_argument("--episodes", type=int, default=1000, help="Number of training episodes")
+    rl_parser.add_argument("--region", type=str, default="FR", help="Region to train for")
+    rl_parser.add_argument("--output", type=str, default="models/rl_battery", help="Output directory")
+
+    args = parser.parse_args()
+
+    if args.command == "price":
+        logger.info(f"Training price prediction models for horizons: {args.horizons}")
+        trainer = PricePredictionTrainer()
+        results = trainer.train_all(horizons=args.horizons)
+        trainer.save_models(output_dir=args.output)
+        logger.info("Training complete!")
+        for horizon, result in results.items():
+            if "error" not in result:
+                logger.info(f"  {horizon}m: MAE={result['mae']:.2f}, RMSE={result['rmse']:.2f}, R2={result['r2']:.3f}")
+
+    elif args.command == "rl":
+        logger.info(f"Training RL battery policy for {args.episodes} episodes")
+        trainer = BatteryRLTrainer()
+        results = trainer.train(n_episodes=args.episodes, region=args.region)
+        trainer.save(output_dir=args.output)
+        logger.info("Training complete!")
+        logger.info(f"  Final avg reward: {results['final_avg_reward']:.2f}")
+        logger.info(f"  Final epsilon: {results['final_epsilon']:.3f}")
+
+    else:
+        parser.print_help()
+
+
+if __name__ == "__main__":
+    main()
--- a/backend/app/ml/utils/init.py
+++ b/backend/app/ml/utils/init.py
@@ -0,0 +1,3 @@
+from app.ml.utils import time_based_split, MLConfig
+
+__all__ = ["time_based_split", "MLConfig"]
--- a/backend/app/ml/utils/config.py
+++ b/backend/app/ml/utils/config.py
@@ -0,0 +1,16 @@
+from dataclasses import dataclass
+from typing import List, Dict, Any
+
+
+@dataclass
+class MLConfig:
+    enable_gpu: bool = False
+    n_jobs: int = 4
+    verbose: bool = True
+
+    @classmethod
+    def from_dict(cls, config_dict: Dict[str, Any]) -> "MLConfig":
+        return cls(**{k: v for k, v in config_dict.items() if k in cls.__annotations__})
+
+
+__all__ = ["MLConfig"]
--- a/backend/app/ml/utils/data_split.py
+++ b/backend/app/ml/utils/data_split.py
@@ -0,0 +1,25 @@
+from typing import Tuple
+import pandas as pd
+from datetime import datetime
+
+
+def time_based_split(
+    df: pd.DataFrame,
+    timestamp_col: str = "timestamp",
+    train_pct: float = 0.70,
+    val_pct: float = 0.85,
+) -> Tuple[pd.DataFrame, pd.DataFrame, pd.DataFrame]:
+    df_sorted = df.sort_values(timestamp_col)
+
+    n_total = len(df_sorted)
+    n_train = int(n_total * train_pct)
+    n_val = int(n_total * val_pct)
+
+    train = df_sorted.iloc[:n_train]
+    val = df_sorted.iloc[n_train:n_val]
+    test = df_sorted.iloc[n_val:]
+
+    return train, val, test
+
+
+__all__ = ["time_based_split"]
--- a/backend/app/ml/utils/evaluation.py
+++ b/backend/app/ml/utils/evaluation.py
@@ -0,0 +1,4 @@
+from app.ml.utils.data_split import time_based_split
+from app.ml.utils.config import MLConfig
+
+__all__ = ["time_based_split", "MLConfig"]