from typing import Dict, Tuple import numpy as np class BatteryEnvironment: def __init__( self, capacity: float = 100.0, charge_rate: float = 50.0, discharge_rate: float = 50.0, efficiency: float = 0.9, min_reserve: float = 0.1, max_charge: float = 0.9, ): self.capacity = capacity self.charge_rate = charge_rate self.discharge_rate = discharge_rate self.efficiency = efficiency self.min_reserve = min_reserve self.max_charge = max_charge self.charge_level = capacity * 0.5 self.current_price = 50.0 self.time_step = 0 def reset(self) -> np.ndarray: self.charge_level = self.capacity * 0.5 self.current_price = 50.0 self.time_step = 0 return self._get_state() def _get_state(self) -> np.ndarray: charge_pct = self.charge_level / self.capacity price_norm = np.clip(self.current_price / 200.0, 0, 1) time_norm = (self.time_step % 1440) / 1440.0 return np.array([charge_pct, price_norm, time_norm]) def step(self, action: int) -> Tuple[np.ndarray, float, bool, Dict]: old_price = self.current_price if action == 0: charge_amount = min(self.charge_rate, self.capacity * self.max_charge - self.charge_level) self.charge_level += charge_amount * self.efficiency reward = -charge_amount * old_price / 1000.0 elif action == 1: reward = 0.0 elif action == 2: discharge_amount = min( self.discharge_rate, self.charge_level - self.capacity * self.min_reserve ) revenue = discharge_amount * old_price self.charge_level -= discharge_amount / self.efficiency reward = revenue / 1000.0 else: reward = 0.0 self.charge_level = np.clip(self.charge_level, self.capacity * self.min_reserve, self.capacity * self.max_charge) self.current_price = old_price + np.random.randn() * 5 self.current_price = np.clip(self.current_price, 0, 300) self.time_step += 1 state = self._get_state() info = { "charge_level": self.charge_level, "price": self.current_price, "action": action, } done = self.time_step >= 1440 return state, reward, done, info @property def action_space(self): return 3 @property def observation_space(self): return 3 __all__ = ["BatteryEnvironment"]