Add two new static datasets for cross-region arbitrage calculations: - transmission_capacity: region-to-region capacity limits (20 rows) - transmission_cost: transmission costs per path (20 rows) Update mining dataset with EUR pricing and power metrics: - Change btc_price_usd to btc_price_eur - Add power_efficiency_th_per_mw, power_demand_mw - Add revenue_eur_per_mwh, profit_eur_per_mwh - Remove mining_profitability column Changes include: - scripts/02_fetch_historical.py: rewrite fetch_bitcoin_mining_data() - scripts/01_generate_synthetic.py: add transmission data generators - config/data_config.yaml: add transmission config, update bitcoin config - config/schema.yaml: add 2 new schemas, update bitcoin_mining schema - scripts/03_process_merge.py: add 2 new datasets - scripts/04_validate.py: add 2 new datasets - test/test_data.py: update for new datasets and bitcoin price reference Total datasets: 9 (734,491 rows, 17.89 MB)
317 lines
8.4 KiB
YAML
317 lines
8.4 KiB
YAML
# Schema definitions for energy test data datasets
|
|
|
|
schemas:
|
|
electricity_prices:
|
|
columns:
|
|
- name: "timestamp"
|
|
type: "datetime64[ns]"
|
|
description: "Timestamp of price observation"
|
|
- name: "region"
|
|
type: "category"
|
|
description: "Market region code"
|
|
- name: "day_ahead_price"
|
|
type: "float32"
|
|
unit: "EUR/MWh"
|
|
description: "Day-ahead market clearing price"
|
|
- name: "real_time_price"
|
|
type: "float32"
|
|
unit: "EUR/MWh"
|
|
description: "Real-time market price"
|
|
- name: "capacity_price"
|
|
type: "float32"
|
|
unit: "EUR/MWh"
|
|
description: "Capacity market price"
|
|
- name: "regulation_price"
|
|
type: "float32"
|
|
unit: "EUR/MWh"
|
|
description: "Frequency regulation price"
|
|
- name: "volume_mw"
|
|
type: "float32"
|
|
unit: "MW"
|
|
description: "Traded volume"
|
|
|
|
battery_capacity:
|
|
columns:
|
|
- name: "timestamp"
|
|
type: "datetime64[ns]"
|
|
description: "Timestamp of battery state"
|
|
- name: "battery_id"
|
|
type: "category"
|
|
description: "Unique battery identifier"
|
|
- name: "capacity_mwh"
|
|
type: "float32"
|
|
unit: "MWh"
|
|
description: "Total storage capacity"
|
|
- name: "charge_level_mwh"
|
|
type: "float32"
|
|
unit: "MWh"
|
|
description: "Current energy stored"
|
|
- name: "charge_rate_mw"
|
|
type: "float32"
|
|
unit: "MW"
|
|
description: "Current charging rate (positive) or discharging (negative)"
|
|
- name: "discharge_rate_mw"
|
|
type: "float32"
|
|
unit: "MW"
|
|
description: "Maximum discharge rate"
|
|
- name: "efficiency"
|
|
type: "float32"
|
|
description: "Round-trip efficiency (0-1)"
|
|
|
|
renewable_generation:
|
|
columns:
|
|
- name: "timestamp"
|
|
type: "datetime64[ns]"
|
|
description: "Timestamp of generation measurement"
|
|
- name: "source"
|
|
type: "category"
|
|
description: "Renewable source type (solar, wind, hydro)"
|
|
- name: "plant_id"
|
|
type: "category"
|
|
description: "Unique plant identifier"
|
|
- name: "generation_mw"
|
|
type: "float32"
|
|
unit: "MW"
|
|
description: "Actual generation output"
|
|
- name: "forecast_mw"
|
|
type: "float32"
|
|
unit: "MW"
|
|
description: "Forecasted generation"
|
|
- name: "actual_mw"
|
|
type: "float32"
|
|
unit: "MW"
|
|
description: "Actual measured generation (after correction)"
|
|
- name: "capacity_factor"
|
|
type: "float32"
|
|
description: "Capacity utilization factor (0-1)"
|
|
|
|
conventional_generation:
|
|
columns:
|
|
- name: "timestamp"
|
|
type: "datetime64[ns]"
|
|
description: "Timestamp of generation measurement"
|
|
- name: "plant_id"
|
|
type: "category"
|
|
description: "Unique plant identifier"
|
|
- name: "fuel_type"
|
|
type: "category"
|
|
description: "Primary fuel type (gas, coal, nuclear)"
|
|
- name: "generation_mw"
|
|
type: "float32"
|
|
unit: "MW"
|
|
description: "Current generation output"
|
|
- name: "marginal_cost"
|
|
type: "float32"
|
|
unit: "EUR/MWh"
|
|
description: "Short-run marginal cost"
|
|
- name: "heat_rate"
|
|
type: "float32"
|
|
unit: "MMBtu/MWh"
|
|
description: "Thermal efficiency metric"
|
|
|
|
load_profiles:
|
|
columns:
|
|
- name: "timestamp"
|
|
type: "datetime64[ns]"
|
|
description: "Timestamp of load measurement"
|
|
- name: "region"
|
|
type: "category"
|
|
description: "Region code"
|
|
- name: "load_mw"
|
|
type: "float32"
|
|
unit: "MW"
|
|
description: "Actual system load"
|
|
- name: "forecast_mw"
|
|
type: "float32"
|
|
unit: "MW"
|
|
description: "Load forecast"
|
|
- name: "weather_temp"
|
|
type: "float32"
|
|
unit: "Celsius"
|
|
description: "Average temperature"
|
|
- name: "humidity"
|
|
type: "float32"
|
|
unit: "%"
|
|
description: "Relative humidity"
|
|
|
|
data_centers:
|
|
columns:
|
|
- name: "timestamp"
|
|
type: "datetime64[ns]"
|
|
description: "Timestamp of demand measurement"
|
|
- name: "data_center_id"
|
|
type: "category"
|
|
description: "Data center identifier"
|
|
- name: "location"
|
|
type: "category"
|
|
description: "Geographic location"
|
|
- name: "power_demand_mw"
|
|
type: "float32"
|
|
unit: "MW"
|
|
description: "Current power demand"
|
|
- name: "max_bid_price"
|
|
type: "float32"
|
|
unit: "EUR/MWh"
|
|
description: "Maximum price willing to pay"
|
|
- name: "client_type"
|
|
type: "category"
|
|
description: "Client type (bitcoin, enterprise, etc.)"
|
|
|
|
bitcoin_mining:
|
|
columns:
|
|
- name: "timestamp"
|
|
type: "datetime64[ns]"
|
|
description: "Timestamp of mining measurement"
|
|
- name: "pool_id"
|
|
type: "category"
|
|
description: "Mining pool identifier"
|
|
- name: "hashrate_ths"
|
|
type: "float32"
|
|
unit: "TH/s"
|
|
description: "Mining pool hashrate"
|
|
- name: "btc_price_eur"
|
|
type: "float32"
|
|
unit: "EUR"
|
|
description: "Bitcoin price in EUR"
|
|
- name: "power_efficiency_th_per_mw"
|
|
type: "float32"
|
|
unit: "TH/s per MW"
|
|
description: "Mining efficiency"
|
|
- name: "power_demand_mw"
|
|
type: "float32"
|
|
unit: "MW"
|
|
description: "Power consumption for mining"
|
|
- name: "revenue_eur_per_mwh"
|
|
type: "float32"
|
|
unit: "EUR/MWh"
|
|
description: "Mining revenue per MWh of electricity"
|
|
- name: "profit_eur_per_mwh"
|
|
type: "float32"
|
|
unit: "EUR/MWh"
|
|
description: "Mining profit after electricity cost"
|
|
- name: "electricity_cost"
|
|
type: "float32"
|
|
unit: "EUR/MWh"
|
|
description: "Electricity cost for mining"
|
|
|
|
transmission_capacity:
|
|
columns:
|
|
- name: "source_region"
|
|
type: "category"
|
|
description: "Source region code"
|
|
- name: "target_region"
|
|
type: "category"
|
|
description: "Target region code"
|
|
- name: "capacity_mw"
|
|
type: "float32"
|
|
unit: "MW"
|
|
description: "Maximum transmission capacity"
|
|
- name: "direction"
|
|
type: "category"
|
|
description: "Transmission direction"
|
|
- name: "efficiency"
|
|
type: "float32"
|
|
description: "Transmission efficiency (0-1)"
|
|
|
|
transmission_cost:
|
|
columns:
|
|
- name: "source_region"
|
|
type: "category"
|
|
description: "Source region code"
|
|
- name: "target_region"
|
|
type: "category"
|
|
description: "Target region code"
|
|
- name: "cost_eur_mwh"
|
|
type: "float32"
|
|
unit: "EUR/MWh"
|
|
description: "Total transmission cost per MWh"
|
|
- name: "loss_percent"
|
|
type: "float32"
|
|
unit: "%"
|
|
description: "Transmission loss percentage"
|
|
- name: "congestion_surcharge_eur_mwh"
|
|
type: "float32"
|
|
unit: "EUR/MWh"
|
|
description: "Additional congestion charge"
|
|
- name: "fee_eur_mwh"
|
|
type: "float32"
|
|
unit: "EUR/MWh"
|
|
description: "Transmission fee"
|
|
|
|
validation_rules:
|
|
electricity_prices:
|
|
- column: "day_ahead_price"
|
|
min: -500
|
|
max: 3000
|
|
- column: "real_time_price"
|
|
min: -500
|
|
max: 5000
|
|
|
|
battery_capacity:
|
|
- column: "charge_level_mwh"
|
|
min: 0
|
|
check_max: "capacity_mwh"
|
|
- column: "efficiency"
|
|
min: 0.5
|
|
max: 1.0
|
|
|
|
renewable_generation:
|
|
- column: "generation_mw"
|
|
min: 0
|
|
- column: "capacity_factor"
|
|
min: 0
|
|
max: 1.0
|
|
|
|
conventional_generation:
|
|
- column: "generation_mw"
|
|
min: 0
|
|
- column: "heat_rate"
|
|
min: 5
|
|
max: 15
|
|
|
|
load_profiles:
|
|
- column: "load_mw"
|
|
min: 0
|
|
- column: "weather_temp"
|
|
min: -30
|
|
max: 50
|
|
|
|
data_centers:
|
|
- column: "power_demand_mw"
|
|
min: 0
|
|
- column: "max_bid_price"
|
|
min: 0
|
|
|
|
bitcoin_mining:
|
|
- column: "hashrate_ths"
|
|
min: 0
|
|
max: 1000000
|
|
- column: "btc_price_eur"
|
|
min: 1000
|
|
max: 200000
|
|
- column: "power_efficiency_th_per_mw"
|
|
min: 50
|
|
max: 150
|
|
- column: "power_demand_mw"
|
|
min: 10
|
|
max: 1000
|
|
- column: "revenue_eur_per_mwh"
|
|
min: 0
|
|
max: 500
|
|
|
|
transmission_capacity:
|
|
- column: "capacity_mw"
|
|
min: 100
|
|
max: 10000
|
|
- column: "efficiency"
|
|
min: 0.9
|
|
max: 1.0
|
|
|
|
transmission_cost:
|
|
- column: "cost_eur_mwh"
|
|
min: 0
|
|
max: 50
|
|
- column: "loss_percent"
|
|
min: 0
|
|
max: 15
|