Files
energy-test-data/config/data_config.yaml
kbt-devops a643767359 Initial commit: Energy test data generation pipeline
Add complete test data preparation system for energy trading strategy
demo. Includes configuration, data generation scripts, and validation
tools for 7 datasets covering electricity prices, battery capacity,
renewable/conventional generation, load profiles, data centers, and
mining data.

Excluded from git: Actual parquet data files (data/raw/, data/processed/)
can be regenerated using the provided scripts.

Datasets:
- electricity_prices: Day-ahead and real-time prices (5 regions)
- battery_capacity: Storage system charge/discharge cycles
- renewable_generation: Solar, wind, hydro with forecast errors
- conventional_generation: Gas, coal, nuclear plant outputs
- load_profiles: Regional demand with weather correlations
- data_centers: Power demand profiles including mining operations
- mining_data: Hashrate, price, profitability (mempool.space API)
2026-02-10 23:28:23 +07:00

97 lines
2.1 KiB
YAML

# Energy Test Data Configuration
# For energy trading strategy demo
time_range:
# Last 10 days from current date (adjustable)
start_date: "2026-01-31"
end_date: "2026-02-10"
granularity: "1min" # 1-minute intervals
regions:
# European energy markets
- "FR" # France
- "BE" # Belgium
- "DE" # Germany
- "NL" # Netherlands
- "UK" # United Kingdom
data_sources:
electricity_prices:
type: "hybrid"
historical_source: "epex_spot"
synthetic_patterns: true
battery_capacity:
type: "synthetic"
num_batteries: 10
renewable_generation:
type: "synthetic"
plants_per_source: 5
sources: ["solar", "wind", "hydro"]
conventional_generation:
type: "synthetic"
num_plants: 10
fuel_types: ["gas", "coal", "nuclear"]
load_profiles:
type: "synthetic"
historical_source: "entso_e"
data_centers:
type: "synthetic"
num_centers: 5
special_client: "bitcoin"
bitcoin_mining:
type: "hybrid"
historical_source: "mempool.space"
synthetic_patterns: true
output:
format: "parquet"
compression: "snappy"
target_size_mb: 200
precision: "float32"
generation:
seed: 42
add_noise: true
noise_level: 0.05
include_outliers: true
outlier_rate: 0.01
include_missing_values: true
missing_rate: 0.005
battery:
capacity_range: [10, 100] # MWh
charge_rate_range: [5, 50] # MW
discharge_rate_range: [5, 50] # MW
efficiency_range: [0.85, 0.95]
degradation_rate: 0.001
renewable:
solar:
capacity_range: [50, 500] # MW
forecast_error_sd: 0.15
wind:
capacity_range: [100, 800] # MW
forecast_error_sd: 0.20
hydro:
capacity_range: [50, 300] # MW
forecast_error_sd: 0.05
conventional:
gas:
capacity_range: [200, 1000] # MW
marginal_cost_range: [30, 80] # EUR/MWh
coal:
capacity_range: [300, 1500] # MW
marginal_cost_range: [40, 70] # EUR/MWh
nuclear:
capacity_range: [800, 1600] # MW
marginal_cost_range: [10, 30] # EUR/MWh
data_center:
power_demand_range: [10, 100] # MW
price_sensitivity_range: [0.8, 1.2]
bitcoin:
hashrate_range: [150, 250] # EH/s
mining_efficiency_range: [25, 35] # J/TH