Files
energy-test-data/config/data_config.yaml
kbt-devops faaadc1297 Add transmission datasets and update mining data
Add two new static datasets for cross-region arbitrage calculations:
- transmission_capacity: region-to-region capacity limits (20 rows)
- transmission_cost: transmission costs per path (20 rows)

Update mining dataset with EUR pricing and power metrics:
- Change btc_price_usd to btc_price_eur
- Add power_efficiency_th_per_mw, power_demand_mw
- Add revenue_eur_per_mwh, profit_eur_per_mwh
- Remove mining_profitability column

Changes include:
- scripts/02_fetch_historical.py: rewrite fetch_bitcoin_mining_data()
- scripts/01_generate_synthetic.py: add transmission data generators
- config/data_config.yaml: add transmission config, update bitcoin config
- config/schema.yaml: add 2 new schemas, update bitcoin_mining schema
- scripts/03_process_merge.py: add 2 new datasets
- scripts/04_validate.py: add 2 new datasets
- test/test_data.py: update for new datasets and bitcoin price reference

Total datasets: 9 (734,491 rows, 17.89 MB)
2026-02-11 01:09:33 +07:00

105 lines
2.4 KiB
YAML

# Energy Test Data Configuration
# For energy trading strategy demo
time_range:
# Last 10 days from current date (adjustable)
start_date: "2026-01-31"
end_date: "2026-02-10"
granularity: "1min" # 1-minute intervals
regions:
# European energy markets
- "FR" # France
- "BE" # Belgium
- "DE" # Germany
- "NL" # Netherlands
- "UK" # United Kingdom
data_sources:
electricity_prices:
type: "hybrid"
historical_source: "epex_spot"
synthetic_patterns: true
battery_capacity:
type: "synthetic"
num_batteries: 10
renewable_generation:
type: "synthetic"
plants_per_source: 5
sources: ["solar", "wind", "hydro"]
conventional_generation:
type: "synthetic"
num_plants: 10
fuel_types: ["gas", "coal", "nuclear"]
load_profiles:
type: "synthetic"
historical_source: "entso_e"
data_centers:
type: "synthetic"
num_centers: 5
special_client: "bitcoin"
bitcoin_mining:
type: "hybrid"
historical_source: "mempool.space"
synthetic_patterns: true
output:
format: "parquet"
compression: "snappy"
target_size_mb: 200
precision: "float32"
generation:
seed: 42
add_noise: true
noise_level: 0.05
include_outliers: true
outlier_rate: 0.01
include_missing_values: true
missing_rate: 0.005
battery:
capacity_range: [10, 100] # MWh
charge_rate_range: [5, 50] # MW
discharge_rate_range: [5, 50] # MW
efficiency_range: [0.85, 0.95]
degradation_rate: 0.001
renewable:
solar:
capacity_range: [50, 500] # MW
forecast_error_sd: 0.15
wind:
capacity_range: [100, 800] # MW
forecast_error_sd: 0.20
hydro:
capacity_range: [50, 300] # MW
forecast_error_sd: 0.05
conventional:
gas:
capacity_range: [200, 1000] # MW
marginal_cost_range: [30, 80] # EUR/MWh
coal:
capacity_range: [300, 1500] # MW
marginal_cost_range: [40, 70] # EUR/MWh
nuclear:
capacity_range: [800, 1600] # MW
marginal_cost_range: [10, 30] # EUR/MWh
data_center:
power_demand_range: [10, 100] # MW
price_sensitivity_range: [0.8, 1.2]
bitcoin:
hashrate_range: [150, 250] # EH/s
power_efficiency_range: [80, 120] # TH/s per MW
eur_usd_rate: 0.92 # For converting to EUR base price
transmission:
capacity_base_range: [1000, 4000] # MW
capacity_uk_multiplier: 0.6 # UK connections typically lower
efficiency_range: [0.95, 0.99]
congestion_surcharge_range: [0.5, 5.0] # EUR/MWh
fee_range: [0, 2.0] # EUR/MWh