Add transmission datasets and update mining data

Add two new static datasets for cross-region arbitrage calculations:
- transmission_capacity: region-to-region capacity limits (20 rows)
- transmission_cost: transmission costs per path (20 rows)

Update mining dataset with EUR pricing and power metrics:
- Change btc_price_usd to btc_price_eur
- Add power_efficiency_th_per_mw, power_demand_mw
- Add revenue_eur_per_mwh, profit_eur_per_mwh
- Remove mining_profitability column

Changes include:
- scripts/02_fetch_historical.py: rewrite fetch_bitcoin_mining_data()
- scripts/01_generate_synthetic.py: add transmission data generators
- config/data_config.yaml: add transmission config, update bitcoin config
- config/schema.yaml: add 2 new schemas, update bitcoin_mining schema
- scripts/03_process_merge.py: add 2 new datasets
- scripts/04_validate.py: add 2 new datasets
- test/test_data.py: update for new datasets and bitcoin price reference

Total datasets: 9 (734,491 rows, 17.89 MB)
This commit is contained in:
2026-02-11 01:09:33 +07:00
parent d981f7c56c
commit faaadc1297
10 changed files with 361 additions and 70 deletions

View File

@@ -22,7 +22,9 @@ def main():
'conventional_generation',
'load_profiles',
'data_centers',
'bitcoin_mining'
'bitcoin_mining',
'transmission_capacity',
'transmission_cost'
]
print("\n1. LOADING DATASETS")
@@ -37,7 +39,7 @@ def main():
else:
print(f"{name:25} NOT FOUND")
print(f"\nTotal datasets loaded: {len(loaded)}/7")
print(f"\nTotal datasets loaded: {len(loaded)}/9")
print("\n2. SAMPLE DATA PREVIEWS")
print("-" * 60)
@@ -80,9 +82,25 @@ def main():
if 'bitcoin_mining' in loaded:
df = loaded['bitcoin_mining']
print(f"\nBitcoin Mining:")
print(f" BTC Price: ${df['btc_price_usd'].mean():.2f} avg, ${df['btc_price_usd'].max():.2f} max")
print(f" BTC Price: {df['btc_price_eur'].mean():.2f} avg, {df['btc_price_eur'].max():.2f} max")
print(f" Hashrate: {df['hashrate_ths'].mean():.2f} EH/s avg")
print(f" Profitability: ${df['mining_profitability'].mean():.4f} /TH/day avg")
print(f" Power Demand: {df['power_demand_mw'].mean():.1f} MW avg")
print(f" Revenue: €{df['revenue_eur_per_mwh'].mean():.2f} /MWh avg")
print(f" Profit: €{df['profit_eur_per_mwh'].mean():.2f} /MWh avg")
if 'transmission_capacity' in loaded:
df = loaded['transmission_capacity']
print(f"\nTransmission Capacity:")
print(f" Total interconnectors: {len(df)}")
print(f" Avg capacity: {df['capacity_mw'].mean():.0f} MW")
print(f" Avg efficiency: {df['efficiency'].mean():.2%}")
if 'transmission_cost' in loaded:
df = loaded['transmission_cost']
print(f"\nTransmission Cost:")
print(f" Total paths: {len(df)}")
print(f" Avg cost: €{df['cost_eur_mwh'].mean():.2f} /MWh")
print(f" Avg loss: {df['loss_percent'].mean():.2f}%")
if 'data_centers' in loaded:
df = loaded['data_centers']