Initial commit: Energy test data generation pipeline

Add complete test data preparation system for energy trading strategy
demo. Includes configuration, data generation scripts, and validation
tools for 7 datasets covering electricity prices, battery capacity,
renewable/conventional generation, load profiles, data centers, and
mining data.

Excluded from git: Actual parquet data files (data/raw/, data/processed/)
can be regenerated using the provided scripts.

Datasets:
- electricity_prices: Day-ahead and real-time prices (5 regions)
- battery_capacity: Storage system charge/discharge cycles
- renewable_generation: Solar, wind, hydro with forecast errors
- conventional_generation: Gas, coal, nuclear plant outputs
- load_profiles: Regional demand with weather correlations
- data_centers: Power demand profiles including mining operations
- mining_data: Hashrate, price, profitability (mempool.space API)
This commit is contained in:
2026-02-10 23:28:23 +07:00
commit a643767359
12 changed files with 1869 additions and 0 deletions

View File

@@ -0,0 +1,49 @@
{
"processed_at": "2026-02-10T16:10:49.295018+00:00",
"total_datasets": 7,
"total_size_mb": 16.977967262268066,
"datasets": {
"electricity_prices": {
"path": "/home/user/energy-test-data/data/processed/electricity_prices.parquet",
"size_mb": 2.2755775451660156,
"rows": 72005,
"columns": 7
},
"battery_capacity": {
"path": "/home/user/energy-test-data/data/processed/battery_capacity.parquet",
"size_mb": 4.204527854919434,
"rows": 144010,
"columns": 7
},
"renewable_generation": {
"path": "/home/user/energy-test-data/data/processed/renewable_generation.parquet",
"size_mb": 4.482715606689453,
"rows": 216015,
"columns": 7
},
"conventional_generation": {
"path": "/home/user/energy-test-data/data/processed/conventional_generation.parquet",
"size_mb": 2.749570846557617,
"rows": 144010,
"columns": 6
},
"load_profiles": {
"path": "/home/user/energy-test-data/data/processed/load_profiles.parquet",
"size_mb": 1.861943244934082,
"rows": 72005,
"columns": 6
},
"data_centers": {
"path": "/home/user/energy-test-data/data/processed/data_centers.parquet",
"size_mb": 1.0422554016113281,
"rows": 72005,
"columns": 6
},
"bitcoin_mining": {
"path": "/home/user/energy-test-data/data/processed/bitcoin_mining.parquet",
"size_mb": 0.3613767623901367,
"rows": 14401,
"columns": 6
}
}
}

View File

@@ -0,0 +1,89 @@
{
"generated_at": "2026-02-10T16:10:43.522420",
"datasets": {
"battery_capacity": {
"rows": 144010,
"columns": [
"timestamp",
"battery_id",
"capacity_mwh",
"charge_level_mwh",
"charge_rate_mw",
"discharge_rate_mw",
"efficiency"
],
"memory_usage_mb": 15.38205337524414,
"dtypes": {
"timestamp": "datetime64[ns]",
"battery_id": "object",
"capacity_mwh": "float64",
"charge_level_mwh": "float64",
"charge_rate_mw": "float64",
"discharge_rate_mw": "float64",
"efficiency": "float64"
}
},
"renewable_generation": {
"rows": 216015,
"columns": [
"timestamp",
"source",
"plant_id",
"generation_mw",
"forecast_mw",
"actual_mw",
"capacity_factor"
],
"memory_usage_mb": 34.472124099731445,
"dtypes": {
"timestamp": "datetime64[ns]",
"source": "object",
"plant_id": "object",
"generation_mw": "float64",
"forecast_mw": "float64",
"actual_mw": "float64",
"capacity_factor": "float64"
}
},
"conventional_generation": {
"rows": 144010,
"columns": [
"timestamp",
"plant_id",
"fuel_type",
"generation_mw",
"marginal_cost",
"heat_rate"
],
"memory_usage_mb": 26.149402618408203,
"dtypes": {
"timestamp": "datetime64[ns]",
"plant_id": "object",
"fuel_type": "object",
"generation_mw": "float64",
"marginal_cost": "float64",
"heat_rate": "float64"
}
},
"data_centers": {
"rows": 72005,
"columns": [
"timestamp",
"data_center_id",
"location",
"power_demand_mw",
"max_bid_price",
"client_type"
],
"memory_usage_mb": 14.585489273071289,
"dtypes": {
"timestamp": "datetime64[ns]",
"data_center_id": "object",
"location": "object",
"power_demand_mw": "float64",
"max_bid_price": "float64",
"client_type": "object"
}
}
}
}

View File

@@ -0,0 +1,239 @@
{
"generated_at": "2026-02-10T16:10:53.614368",
"summary": {
"total_datasets": 7,
"passed": 2,
"warnings": 5,
"failed": 0,
"total_size_mb": 17.72,
"total_rows": 734451
},
"datasets": [
{
"dataset": "electricity_prices",
"rows": 72005,
"columns": 7,
"memory_mb": 1.99,
"missing_values": {},
"duplicated_rows": 0,
"timestamp_continuity": {
"status": "checked",
"expected_frequency": "1min",
"gaps_detected": 0,
"total_rows": 72005
},
"data_ranges": [],
"data_types": [],
"status": "pass"
},
{
"dataset": "battery_capacity",
"rows": 144010,
"columns": 7,
"memory_mb": 3.98,
"missing_values": {
"capacity_mwh": {
"count": 720,
"percentage": 0.5
},
"charge_level_mwh": {
"count": 720,
"percentage": 0.5
},
"charge_rate_mw": {
"count": 720,
"percentage": 0.5
},
"discharge_rate_mw": {
"count": 720,
"percentage": 0.5
},
"efficiency": {
"count": 720,
"percentage": 0.5
}
},
"duplicated_rows": 0,
"timestamp_continuity": {
"status": "checked",
"expected_frequency": "1min",
"gaps_detected": 0,
"total_rows": 144010
},
"data_ranges": [
{
"column": "efficiency",
"rule": "min >= 0.5",
"violations": 36,
"severity": "error"
},
{
"column": "efficiency",
"rule": "max <= 1.0",
"violations": 4371,
"severity": "error"
}
],
"data_types": [],
"status": "warning"
},
{
"dataset": "renewable_generation",
"rows": 216015,
"columns": 7,
"memory_mb": 5.36,
"missing_values": {
"generation_mw": {
"count": 1080,
"percentage": 0.5
},
"forecast_mw": {
"count": 1080,
"percentage": 0.5
},
"actual_mw": {
"count": 1080,
"percentage": 0.5
},
"capacity_factor": {
"count": 1080,
"percentage": 0.5
}
},
"duplicated_rows": 0,
"timestamp_continuity": {
"status": "checked",
"expected_frequency": "1min",
"gaps_detected": 0,
"total_rows": 216015
},
"data_ranges": [
{
"column": "capacity_factor",
"rule": "max <= 1.0",
"violations": 6382,
"severity": "error"
}
],
"data_types": [],
"status": "warning"
},
{
"dataset": "conventional_generation",
"rows": 144010,
"columns": 6,
"memory_mb": 3.02,
"missing_values": {
"generation_mw": {
"count": 720,
"percentage": 0.5
},
"marginal_cost": {
"count": 720,
"percentage": 0.5
},
"heat_rate": {
"count": 720,
"percentage": 0.5
}
},
"duplicated_rows": 0,
"timestamp_continuity": {
"status": "checked",
"expected_frequency": "1min",
"gaps_detected": 0,
"total_rows": 144010
},
"data_ranges": [
{
"column": "heat_rate",
"rule": "min >= 5",
"violations": 29,
"severity": "error"
},
{
"column": "heat_rate",
"rule": "max <= 15",
"violations": 867,
"severity": "error"
}
],
"data_types": [],
"status": "warning"
},
{
"dataset": "load_profiles",
"rows": 72005,
"columns": 6,
"memory_mb": 1.72,
"missing_values": {},
"duplicated_rows": 0,
"timestamp_continuity": {
"status": "checked",
"expected_frequency": "1min",
"gaps_detected": 0,
"total_rows": 72005
},
"data_ranges": [],
"data_types": [],
"status": "pass"
},
{
"dataset": "data_centers",
"rows": 72005,
"columns": 6,
"memory_mb": 1.31,
"missing_values": {
"power_demand_mw": {
"count": 360,
"percentage": 0.5
},
"max_bid_price": {
"count": 360,
"percentage": 0.5
}
},
"duplicated_rows": 0,
"timestamp_continuity": {
"status": "checked",
"expected_frequency": "1min",
"gaps_detected": 0,
"total_rows": 72005
},
"data_ranges": [
{
"column": "power_demand_mw",
"rule": "min >= 0",
"violations": 137,
"severity": "error"
}
],
"data_types": [],
"status": "warning"
},
{
"dataset": "bitcoin_mining",
"rows": 14401,
"columns": 6,
"memory_mb": 0.34,
"missing_values": {},
"duplicated_rows": 0,
"timestamp_continuity": {
"status": "checked",
"expected_frequency": "1min",
"gaps_detected": 0,
"total_rows": 14401
},
"data_ranges": [
{
"column": "btc_price_usd",
"rule": "min >= 1000",
"violations": 456,
"severity": "error"
}
],
"data_types": [],
"status": "warning"
}
]
}