Initial commit: Energy test data generation pipeline
Add complete test data preparation system for energy trading strategy demo. Includes configuration, data generation scripts, and validation tools for 7 datasets covering electricity prices, battery capacity, renewable/conventional generation, load profiles, data centers, and mining data. Excluded from git: Actual parquet data files (data/raw/, data/processed/) can be regenerated using the provided scripts. Datasets: - electricity_prices: Day-ahead and real-time prices (5 regions) - battery_capacity: Storage system charge/discharge cycles - renewable_generation: Solar, wind, hydro with forecast errors - conventional_generation: Gas, coal, nuclear plant outputs - load_profiles: Regional demand with weather correlations - data_centers: Power demand profiles including mining operations - mining_data: Hashrate, price, profitability (mempool.space API)
This commit is contained in:
96
config/data_config.yaml
Normal file
96
config/data_config.yaml
Normal file
@@ -0,0 +1,96 @@
|
||||
# Energy Test Data Configuration
|
||||
# For energy trading strategy demo
|
||||
|
||||
time_range:
|
||||
# Last 10 days from current date (adjustable)
|
||||
start_date: "2026-01-31"
|
||||
end_date: "2026-02-10"
|
||||
granularity: "1min" # 1-minute intervals
|
||||
|
||||
regions:
|
||||
# European energy markets
|
||||
- "FR" # France
|
||||
- "BE" # Belgium
|
||||
- "DE" # Germany
|
||||
- "NL" # Netherlands
|
||||
- "UK" # United Kingdom
|
||||
|
||||
data_sources:
|
||||
electricity_prices:
|
||||
type: "hybrid"
|
||||
historical_source: "epex_spot"
|
||||
synthetic_patterns: true
|
||||
battery_capacity:
|
||||
type: "synthetic"
|
||||
num_batteries: 10
|
||||
renewable_generation:
|
||||
type: "synthetic"
|
||||
plants_per_source: 5
|
||||
sources: ["solar", "wind", "hydro"]
|
||||
conventional_generation:
|
||||
type: "synthetic"
|
||||
num_plants: 10
|
||||
fuel_types: ["gas", "coal", "nuclear"]
|
||||
load_profiles:
|
||||
type: "synthetic"
|
||||
historical_source: "entso_e"
|
||||
data_centers:
|
||||
type: "synthetic"
|
||||
num_centers: 5
|
||||
special_client: "bitcoin"
|
||||
bitcoin_mining:
|
||||
type: "hybrid"
|
||||
historical_source: "mempool.space"
|
||||
synthetic_patterns: true
|
||||
|
||||
output:
|
||||
format: "parquet"
|
||||
compression: "snappy"
|
||||
target_size_mb: 200
|
||||
precision: "float32"
|
||||
|
||||
generation:
|
||||
seed: 42
|
||||
add_noise: true
|
||||
noise_level: 0.05
|
||||
include_outliers: true
|
||||
outlier_rate: 0.01
|
||||
include_missing_values: true
|
||||
missing_rate: 0.005
|
||||
|
||||
battery:
|
||||
capacity_range: [10, 100] # MWh
|
||||
charge_rate_range: [5, 50] # MW
|
||||
discharge_rate_range: [5, 50] # MW
|
||||
efficiency_range: [0.85, 0.95]
|
||||
degradation_rate: 0.001
|
||||
|
||||
renewable:
|
||||
solar:
|
||||
capacity_range: [50, 500] # MW
|
||||
forecast_error_sd: 0.15
|
||||
wind:
|
||||
capacity_range: [100, 800] # MW
|
||||
forecast_error_sd: 0.20
|
||||
hydro:
|
||||
capacity_range: [50, 300] # MW
|
||||
forecast_error_sd: 0.05
|
||||
|
||||
conventional:
|
||||
gas:
|
||||
capacity_range: [200, 1000] # MW
|
||||
marginal_cost_range: [30, 80] # EUR/MWh
|
||||
coal:
|
||||
capacity_range: [300, 1500] # MW
|
||||
marginal_cost_range: [40, 70] # EUR/MWh
|
||||
nuclear:
|
||||
capacity_range: [800, 1600] # MW
|
||||
marginal_cost_range: [10, 30] # EUR/MWh
|
||||
|
||||
data_center:
|
||||
power_demand_range: [10, 100] # MW
|
||||
price_sensitivity_range: [0.8, 1.2]
|
||||
|
||||
bitcoin:
|
||||
hashrate_range: [150, 250] # EH/s
|
||||
mining_efficiency_range: [25, 35] # J/TH
|
||||
233
config/schema.yaml
Normal file
233
config/schema.yaml
Normal file
@@ -0,0 +1,233 @@
|
||||
# Schema definitions for energy test data datasets
|
||||
|
||||
schemas:
|
||||
electricity_prices:
|
||||
columns:
|
||||
- name: "timestamp"
|
||||
type: "datetime64[ns]"
|
||||
description: "Timestamp of price observation"
|
||||
- name: "region"
|
||||
type: "category"
|
||||
description: "Market region code"
|
||||
- name: "day_ahead_price"
|
||||
type: "float32"
|
||||
unit: "EUR/MWh"
|
||||
description: "Day-ahead market clearing price"
|
||||
- name: "real_time_price"
|
||||
type: "float32"
|
||||
unit: "EUR/MWh"
|
||||
description: "Real-time market price"
|
||||
- name: "capacity_price"
|
||||
type: "float32"
|
||||
unit: "EUR/MWh"
|
||||
description: "Capacity market price"
|
||||
- name: "regulation_price"
|
||||
type: "float32"
|
||||
unit: "EUR/MWh"
|
||||
description: "Frequency regulation price"
|
||||
- name: "volume_mw"
|
||||
type: "float32"
|
||||
unit: "MW"
|
||||
description: "Traded volume"
|
||||
|
||||
battery_capacity:
|
||||
columns:
|
||||
- name: "timestamp"
|
||||
type: "datetime64[ns]"
|
||||
description: "Timestamp of battery state"
|
||||
- name: "battery_id"
|
||||
type: "category"
|
||||
description: "Unique battery identifier"
|
||||
- name: "capacity_mwh"
|
||||
type: "float32"
|
||||
unit: "MWh"
|
||||
description: "Total storage capacity"
|
||||
- name: "charge_level_mwh"
|
||||
type: "float32"
|
||||
unit: "MWh"
|
||||
description: "Current energy stored"
|
||||
- name: "charge_rate_mw"
|
||||
type: "float32"
|
||||
unit: "MW"
|
||||
description: "Current charging rate (positive) or discharging (negative)"
|
||||
- name: "discharge_rate_mw"
|
||||
type: "float32"
|
||||
unit: "MW"
|
||||
description: "Maximum discharge rate"
|
||||
- name: "efficiency"
|
||||
type: "float32"
|
||||
description: "Round-trip efficiency (0-1)"
|
||||
|
||||
renewable_generation:
|
||||
columns:
|
||||
- name: "timestamp"
|
||||
type: "datetime64[ns]"
|
||||
description: "Timestamp of generation measurement"
|
||||
- name: "source"
|
||||
type: "category"
|
||||
description: "Renewable source type (solar, wind, hydro)"
|
||||
- name: "plant_id"
|
||||
type: "category"
|
||||
description: "Unique plant identifier"
|
||||
- name: "generation_mw"
|
||||
type: "float32"
|
||||
unit: "MW"
|
||||
description: "Actual generation output"
|
||||
- name: "forecast_mw"
|
||||
type: "float32"
|
||||
unit: "MW"
|
||||
description: "Forecasted generation"
|
||||
- name: "actual_mw"
|
||||
type: "float32"
|
||||
unit: "MW"
|
||||
description: "Actual measured generation (after correction)"
|
||||
- name: "capacity_factor"
|
||||
type: "float32"
|
||||
description: "Capacity utilization factor (0-1)"
|
||||
|
||||
conventional_generation:
|
||||
columns:
|
||||
- name: "timestamp"
|
||||
type: "datetime64[ns]"
|
||||
description: "Timestamp of generation measurement"
|
||||
- name: "plant_id"
|
||||
type: "category"
|
||||
description: "Unique plant identifier"
|
||||
- name: "fuel_type"
|
||||
type: "category"
|
||||
description: "Primary fuel type (gas, coal, nuclear)"
|
||||
- name: "generation_mw"
|
||||
type: "float32"
|
||||
unit: "MW"
|
||||
description: "Current generation output"
|
||||
- name: "marginal_cost"
|
||||
type: "float32"
|
||||
unit: "EUR/MWh"
|
||||
description: "Short-run marginal cost"
|
||||
- name: "heat_rate"
|
||||
type: "float32"
|
||||
unit: "MMBtu/MWh"
|
||||
description: "Thermal efficiency metric"
|
||||
|
||||
load_profiles:
|
||||
columns:
|
||||
- name: "timestamp"
|
||||
type: "datetime64[ns]"
|
||||
description: "Timestamp of load measurement"
|
||||
- name: "region"
|
||||
type: "category"
|
||||
description: "Region code"
|
||||
- name: "load_mw"
|
||||
type: "float32"
|
||||
unit: "MW"
|
||||
description: "Actual system load"
|
||||
- name: "forecast_mw"
|
||||
type: "float32"
|
||||
unit: "MW"
|
||||
description: "Load forecast"
|
||||
- name: "weather_temp"
|
||||
type: "float32"
|
||||
unit: "Celsius"
|
||||
description: "Average temperature"
|
||||
- name: "humidity"
|
||||
type: "float32"
|
||||
unit: "%"
|
||||
description: "Relative humidity"
|
||||
|
||||
data_centers:
|
||||
columns:
|
||||
- name: "timestamp"
|
||||
type: "datetime64[ns]"
|
||||
description: "Timestamp of demand measurement"
|
||||
- name: "data_center_id"
|
||||
type: "category"
|
||||
description: "Data center identifier"
|
||||
- name: "location"
|
||||
type: "category"
|
||||
description: "Geographic location"
|
||||
- name: "power_demand_mw"
|
||||
type: "float32"
|
||||
unit: "MW"
|
||||
description: "Current power demand"
|
||||
- name: "max_bid_price"
|
||||
type: "float32"
|
||||
unit: "EUR/MWh"
|
||||
description: "Maximum price willing to pay"
|
||||
- name: "client_type"
|
||||
type: "category"
|
||||
description: "Client type (bitcoin, enterprise, etc.)"
|
||||
|
||||
bitcoin_mining:
|
||||
columns:
|
||||
- name: "timestamp"
|
||||
type: "datetime64[ns]"
|
||||
description: "Timestamp of mining measurement"
|
||||
- name: "pool_id"
|
||||
type: "category"
|
||||
description: "Mining pool identifier"
|
||||
- name: "hashrate_ths"
|
||||
type: "float32"
|
||||
unit: "TH/s"
|
||||
description: "Mining pool hashrate"
|
||||
- name: "btc_price_usd"
|
||||
type: "float32"
|
||||
unit: "USD"
|
||||
description: "Bitcoin price"
|
||||
- name: "mining_profitability"
|
||||
type: "float32"
|
||||
unit: "USD/TH/day"
|
||||
description: "Mining profitability per terahash per day"
|
||||
- name: "electricity_cost"
|
||||
type: "float32"
|
||||
unit: "EUR/MWh"
|
||||
description: "Electricity cost breakeven point"
|
||||
|
||||
validation_rules:
|
||||
electricity_prices:
|
||||
- column: "day_ahead_price"
|
||||
min: -500
|
||||
max: 3000
|
||||
- column: "real_time_price"
|
||||
min: -500
|
||||
max: 5000
|
||||
|
||||
battery_capacity:
|
||||
- column: "charge_level_mwh"
|
||||
min: 0
|
||||
check_max: "capacity_mwh"
|
||||
- column: "efficiency"
|
||||
min: 0.5
|
||||
max: 1.0
|
||||
|
||||
renewable_generation:
|
||||
- column: "generation_mw"
|
||||
min: 0
|
||||
- column: "capacity_factor"
|
||||
min: 0
|
||||
max: 1.0
|
||||
|
||||
conventional_generation:
|
||||
- column: "generation_mw"
|
||||
min: 0
|
||||
- column: "heat_rate"
|
||||
min: 5
|
||||
max: 15
|
||||
|
||||
load_profiles:
|
||||
- column: "load_mw"
|
||||
min: 0
|
||||
- column: "weather_temp"
|
||||
min: -30
|
||||
max: 50
|
||||
|
||||
data_centers:
|
||||
- column: "power_demand_mw"
|
||||
min: 0
|
||||
- column: "max_bid_price"
|
||||
min: 0
|
||||
|
||||
bitcoin_mining:
|
||||
- column: "hashrate_ths"
|
||||
min: 0
|
||||
- column: "btc_price_usd"
|
||||
min: 1000
|
||||
Reference in New Issue
Block a user