Add transmission datasets and update mining data
Add two new static datasets for cross-region arbitrage calculations: - transmission_capacity: region-to-region capacity limits (20 rows) - transmission_cost: transmission costs per path (20 rows) Update mining dataset with EUR pricing and power metrics: - Change btc_price_usd to btc_price_eur - Add power_efficiency_th_per_mw, power_demand_mw - Add revenue_eur_per_mwh, profit_eur_per_mwh - Remove mining_profitability column Changes include: - scripts/02_fetch_historical.py: rewrite fetch_bitcoin_mining_data() - scripts/01_generate_synthetic.py: add transmission data generators - config/data_config.yaml: add transmission config, update bitcoin config - config/schema.yaml: add 2 new schemas, update bitcoin_mining schema - scripts/03_process_merge.py: add 2 new datasets - scripts/04_validate.py: add 2 new datasets - test/test_data.py: update for new datasets and bitcoin price reference Total datasets: 9 (734,491 rows, 17.89 MB)
This commit is contained in:
@@ -210,6 +210,72 @@ def generate_data_center_data(config, timestamps):
|
||||
|
||||
return pd.concat(df_list, ignore_index=True)
|
||||
|
||||
def generate_transmission_capacity_data(config):
|
||||
np.random.seed(config['generation']['seed'] + 13)
|
||||
|
||||
regions = config['regions']
|
||||
params = config['transmission']
|
||||
|
||||
data = []
|
||||
|
||||
for i, src in enumerate(regions):
|
||||
for j, tgt in enumerate(regions):
|
||||
if i == j:
|
||||
continue
|
||||
|
||||
base_capacity = np.random.uniform(*params['capacity_base_range'])
|
||||
|
||||
if src == 'UK' or tgt == 'UK':
|
||||
base_capacity *= params['capacity_uk_multiplier']
|
||||
|
||||
capacity = base_capacity * np.random.uniform(0.8, 1.2)
|
||||
efficiency = np.random.uniform(*params['efficiency_range'])
|
||||
direction = 'bidirectional'
|
||||
|
||||
data.append({
|
||||
'source_region': src,
|
||||
'target_region': tgt,
|
||||
'capacity_mw': capacity,
|
||||
'direction': direction,
|
||||
'efficiency': efficiency
|
||||
})
|
||||
|
||||
return pd.DataFrame(data)
|
||||
|
||||
def generate_transmission_cost_data(config):
|
||||
np.random.seed(config['generation']['seed'] + 14)
|
||||
|
||||
regions = config['regions']
|
||||
params = config['transmission']
|
||||
|
||||
avg_electricity_price = 80
|
||||
|
||||
data = []
|
||||
|
||||
for i, src in enumerate(regions):
|
||||
for j, tgt in enumerate(regions):
|
||||
if i == j:
|
||||
continue
|
||||
|
||||
efficiency = np.random.uniform(*params['efficiency_range'])
|
||||
loss_percent = (1 - efficiency) * 100
|
||||
congestion_surcharge = np.random.uniform(*params['congestion_surcharge_range'])
|
||||
fee = np.random.uniform(*params['fee_range'])
|
||||
|
||||
loss_cost = (loss_percent / 100) * avg_electricity_price
|
||||
cost_eur_mwh = loss_cost + congestion_surcharge + fee
|
||||
|
||||
data.append({
|
||||
'source_region': src,
|
||||
'target_region': tgt,
|
||||
'cost_eur_mwh': cost_eur_mwh,
|
||||
'loss_percent': loss_percent,
|
||||
'congestion_surcharge_eur_mwh': congestion_surcharge,
|
||||
'fee_eur_mwh': fee
|
||||
})
|
||||
|
||||
return pd.DataFrame(data)
|
||||
|
||||
def apply_noise_and_outliers(df, config):
|
||||
if not config['generation']['add_noise']:
|
||||
return df
|
||||
@@ -283,20 +349,27 @@ def main():
|
||||
|
||||
datasets['battery_capacity'] = generate_battery_data(config, timestamps)
|
||||
print(f" - Battery capacity: {len(datasets['battery_capacity'])} rows")
|
||||
|
||||
|
||||
datasets['renewable_generation'] = generate_renewable_data(config, timestamps)
|
||||
print(f" - Renewable generation: {len(datasets['renewable_generation'])} rows")
|
||||
|
||||
|
||||
datasets['conventional_generation'] = generate_conventional_data(config, timestamps)
|
||||
print(f" - Conventional generation: {len(datasets['conventional_generation'])} rows")
|
||||
|
||||
|
||||
datasets['data_centers'] = generate_data_center_data(config, timestamps)
|
||||
print(f" - Data centers: {len(datasets['data_centers'])} rows")
|
||||
|
||||
|
||||
datasets['transmission_capacity'] = generate_transmission_capacity_data(config)
|
||||
print(f" - Transmission capacity: {len(datasets['transmission_capacity'])} rows")
|
||||
|
||||
datasets['transmission_cost'] = generate_transmission_cost_data(config)
|
||||
print(f" - Transmission cost: {len(datasets['transmission_cost'])} rows")
|
||||
|
||||
for name, df in datasets.items():
|
||||
df = apply_noise_and_outliers(df, config)
|
||||
df = add_missing_values(df, config)
|
||||
datasets[name] = df
|
||||
if name not in ['transmission_capacity', 'transmission_cost']:
|
||||
df = apply_noise_and_outliers(df, config)
|
||||
df = add_missing_values(df, config)
|
||||
datasets[name] = df
|
||||
|
||||
output_base = Path(__file__).parent.parent / 'data'
|
||||
output_base.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
@@ -78,50 +78,57 @@ def fetch_electricity_prices(config, timestamps):
|
||||
|
||||
def fetch_bitcoin_mining_data(config, timestamps):
|
||||
np.random.seed(config['generation']['seed'] + 11)
|
||||
|
||||
|
||||
print(f"Fetching bitcoin mining data from mempool.space (simulated)...")
|
||||
|
||||
|
||||
n = len(timestamps)
|
||||
|
||||
|
||||
try:
|
||||
btc_api = "https://mempool.space/api/v1/fees/recommended"
|
||||
response = requests.get(btc_api, timeout=10)
|
||||
if response.status_code == 200:
|
||||
fees = response.json()
|
||||
base_btc_price = 45000
|
||||
else:
|
||||
base_btc_price = 45000
|
||||
pass
|
||||
except:
|
||||
base_btc_price = 45000
|
||||
|
||||
pass
|
||||
|
||||
btc_params = config['bitcoin']
|
||||
|
||||
btc_trend = np.linspace(0.95, 1.05, n)
|
||||
|
||||
btc_eur_trend = np.linspace(0.95, 1.05, n)
|
||||
btc_daily_volatility = np.cumsum(np.random.normal(0, 0.01, n)) + 1
|
||||
btc_daily_volatility = btc_daily_volatility / btc_daily_volatility[0]
|
||||
|
||||
btc_price = base_btc_price * btc_trend * btc_daily_volatility * (1 + 0.03 * np.random.randn(n))
|
||||
|
||||
|
||||
base_btc_price_eur = 41400
|
||||
btc_price_eur = base_btc_price_eur * btc_eur_trend * btc_daily_volatility * (1 + 0.03 * np.random.randn(n))
|
||||
|
||||
hashrate_base = np.random.uniform(*btc_params['hashrate_range'])
|
||||
hashrate = hashrate_base * (1 + 0.05 * np.sin(2 * np.pi * np.arange(n) / (n / 10))) * (1 + 0.02 * np.random.randn(n))
|
||||
|
||||
electricity_efficiency = np.random.uniform(*btc_params['mining_efficiency_range'])
|
||||
|
||||
btc_price_eur = btc_price * 0.92
|
||||
power_cost_eur = 50
|
||||
mining_profitability = (btc_price_eur * 0.0001 / 3.6) / (electricity_efficiency / 1000)
|
||||
|
||||
electricity_breakeven = (btc_price_eur * 0.0001 / 3.6) / (mining_profitability / 24 * electricity_efficiency / 1000) * 24
|
||||
|
||||
|
||||
power_efficiency = np.random.uniform(*btc_params['power_efficiency_range'])
|
||||
|
||||
power_demand = hashrate / power_efficiency
|
||||
|
||||
mining_profitability = (btc_price_eur * 0.0001 / 3.6) / (power_efficiency / 1000)
|
||||
|
||||
revenue_eur_per_mwh = mining_profitability * power_efficiency * 24
|
||||
|
||||
electricity_breakeven = 40 + np.random.normal(0, 5, n)
|
||||
|
||||
profit_eur_per_mwh = revenue_eur_per_mwh - electricity_breakeven
|
||||
|
||||
data = pd.DataFrame({
|
||||
'timestamp': timestamps,
|
||||
'pool_id': 'POOL_001',
|
||||
'hashrate_ths': hashrate,
|
||||
'btc_price_usd': btc_price,
|
||||
'mining_profitability': mining_profitability,
|
||||
'btc_price_eur': btc_price_eur,
|
||||
'power_efficiency_th_per_mw': power_efficiency,
|
||||
'power_demand_mw': power_demand,
|
||||
'revenue_eur_per_mwh': revenue_eur_per_mwh,
|
||||
'profit_eur_per_mwh': profit_eur_per_mwh,
|
||||
'electricity_cost': electricity_breakeven
|
||||
})
|
||||
|
||||
|
||||
return data
|
||||
|
||||
def fetch_load_profiles(config, timestamps):
|
||||
|
||||
@@ -126,7 +126,9 @@ def main():
|
||||
'conventional_generation',
|
||||
'load_profiles',
|
||||
'data_centers',
|
||||
'bitcoin_mining'
|
||||
'bitcoin_mining',
|
||||
'transmission_capacity',
|
||||
'transmission_cost'
|
||||
]
|
||||
|
||||
processed_info = {}
|
||||
|
||||
@@ -233,7 +233,9 @@ def main():
|
||||
'conventional_generation',
|
||||
'load_profiles',
|
||||
'data_centers',
|
||||
'bitcoin_mining'
|
||||
'bitcoin_mining',
|
||||
'transmission_capacity',
|
||||
'transmission_cost'
|
||||
]
|
||||
|
||||
print("Validating processed datasets...\n")
|
||||
|
||||
Reference in New Issue
Block a user