Add two new static datasets for cross-region arbitrage calculations: - transmission_capacity: region-to-region capacity limits (20 rows) - transmission_cost: transmission costs per path (20 rows) Update mining dataset with EUR pricing and power metrics: - Change btc_price_usd to btc_price_eur - Add power_efficiency_th_per_mw, power_demand_mw - Add revenue_eur_per_mwh, profit_eur_per_mwh - Remove mining_profitability column Changes include: - scripts/02_fetch_historical.py: rewrite fetch_bitcoin_mining_data() - scripts/01_generate_synthetic.py: add transmission data generators - config/data_config.yaml: add transmission config, update bitcoin config - config/schema.yaml: add 2 new schemas, update bitcoin_mining schema - scripts/03_process_merge.py: add 2 new datasets - scripts/04_validate.py: add 2 new datasets - test/test_data.py: update for new datasets and bitcoin price reference Total datasets: 9 (734,491 rows, 17.89 MB)
287 lines
6.5 KiB
JSON
287 lines
6.5 KiB
JSON
{
|
|
"generated_at": "2026-02-10T17:49:31.592598",
|
|
"summary": {
|
|
"total_datasets": 9,
|
|
"passed": 4,
|
|
"warnings": 5,
|
|
"failed": 0,
|
|
"total_size_mb": 17.89,
|
|
"total_rows": 734491
|
|
},
|
|
"datasets": [
|
|
{
|
|
"dataset": "electricity_prices",
|
|
"rows": 72005,
|
|
"columns": 7,
|
|
"memory_mb": 1.99,
|
|
"missing_values": {},
|
|
"duplicated_rows": 0,
|
|
"timestamp_continuity": {
|
|
"status": "checked",
|
|
"expected_frequency": "1min",
|
|
"gaps_detected": 0,
|
|
"total_rows": 72005
|
|
},
|
|
"data_ranges": [],
|
|
"data_types": [],
|
|
"status": "pass"
|
|
},
|
|
{
|
|
"dataset": "battery_capacity",
|
|
"rows": 144010,
|
|
"columns": 7,
|
|
"memory_mb": 3.98,
|
|
"missing_values": {
|
|
"capacity_mwh": {
|
|
"count": 720,
|
|
"percentage": 0.5
|
|
},
|
|
"charge_level_mwh": {
|
|
"count": 720,
|
|
"percentage": 0.5
|
|
},
|
|
"charge_rate_mw": {
|
|
"count": 720,
|
|
"percentage": 0.5
|
|
},
|
|
"discharge_rate_mw": {
|
|
"count": 720,
|
|
"percentage": 0.5
|
|
},
|
|
"efficiency": {
|
|
"count": 720,
|
|
"percentage": 0.5
|
|
}
|
|
},
|
|
"duplicated_rows": 0,
|
|
"timestamp_continuity": {
|
|
"status": "checked",
|
|
"expected_frequency": "1min",
|
|
"gaps_detected": 0,
|
|
"total_rows": 144010
|
|
},
|
|
"data_ranges": [
|
|
{
|
|
"column": "efficiency",
|
|
"rule": "min >= 0.5",
|
|
"violations": 56,
|
|
"severity": "error"
|
|
},
|
|
{
|
|
"column": "efficiency",
|
|
"rule": "max <= 1.0",
|
|
"violations": 4460,
|
|
"severity": "error"
|
|
}
|
|
],
|
|
"data_types": [],
|
|
"status": "warning"
|
|
},
|
|
{
|
|
"dataset": "renewable_generation",
|
|
"rows": 216015,
|
|
"columns": 7,
|
|
"memory_mb": 5.36,
|
|
"missing_values": {
|
|
"generation_mw": {
|
|
"count": 1080,
|
|
"percentage": 0.5
|
|
},
|
|
"forecast_mw": {
|
|
"count": 1080,
|
|
"percentage": 0.5
|
|
},
|
|
"actual_mw": {
|
|
"count": 1080,
|
|
"percentage": 0.5
|
|
},
|
|
"capacity_factor": {
|
|
"count": 1080,
|
|
"percentage": 0.5
|
|
}
|
|
},
|
|
"duplicated_rows": 0,
|
|
"timestamp_continuity": {
|
|
"status": "checked",
|
|
"expected_frequency": "1min",
|
|
"gaps_detected": 0,
|
|
"total_rows": 216015
|
|
},
|
|
"data_ranges": [
|
|
{
|
|
"column": "capacity_factor",
|
|
"rule": "max <= 1.0",
|
|
"violations": 6284,
|
|
"severity": "error"
|
|
}
|
|
],
|
|
"data_types": [],
|
|
"status": "warning"
|
|
},
|
|
{
|
|
"dataset": "conventional_generation",
|
|
"rows": 144010,
|
|
"columns": 6,
|
|
"memory_mb": 3.02,
|
|
"missing_values": {
|
|
"generation_mw": {
|
|
"count": 720,
|
|
"percentage": 0.5
|
|
},
|
|
"marginal_cost": {
|
|
"count": 720,
|
|
"percentage": 0.5
|
|
},
|
|
"heat_rate": {
|
|
"count": 720,
|
|
"percentage": 0.5
|
|
}
|
|
},
|
|
"duplicated_rows": 0,
|
|
"timestamp_continuity": {
|
|
"status": "checked",
|
|
"expected_frequency": "1min",
|
|
"gaps_detected": 0,
|
|
"total_rows": 144010
|
|
},
|
|
"data_ranges": [
|
|
{
|
|
"column": "heat_rate",
|
|
"rule": "min >= 5",
|
|
"violations": 27,
|
|
"severity": "error"
|
|
},
|
|
{
|
|
"column": "heat_rate",
|
|
"rule": "max <= 15",
|
|
"violations": 845,
|
|
"severity": "error"
|
|
}
|
|
],
|
|
"data_types": [],
|
|
"status": "warning"
|
|
},
|
|
{
|
|
"dataset": "load_profiles",
|
|
"rows": 72005,
|
|
"columns": 6,
|
|
"memory_mb": 1.72,
|
|
"missing_values": {},
|
|
"duplicated_rows": 0,
|
|
"timestamp_continuity": {
|
|
"status": "checked",
|
|
"expected_frequency": "1min",
|
|
"gaps_detected": 0,
|
|
"total_rows": 72005
|
|
},
|
|
"data_ranges": [],
|
|
"data_types": [],
|
|
"status": "pass"
|
|
},
|
|
{
|
|
"dataset": "data_centers",
|
|
"rows": 72005,
|
|
"columns": 6,
|
|
"memory_mb": 1.31,
|
|
"missing_values": {
|
|
"power_demand_mw": {
|
|
"count": 360,
|
|
"percentage": 0.5
|
|
},
|
|
"max_bid_price": {
|
|
"count": 360,
|
|
"percentage": 0.5
|
|
}
|
|
},
|
|
"duplicated_rows": 0,
|
|
"timestamp_continuity": {
|
|
"status": "checked",
|
|
"expected_frequency": "1min",
|
|
"gaps_detected": 0,
|
|
"total_rows": 72005
|
|
},
|
|
"data_ranges": [
|
|
{
|
|
"column": "power_demand_mw",
|
|
"rule": "min >= 0",
|
|
"violations": 135,
|
|
"severity": "error"
|
|
}
|
|
],
|
|
"data_types": [],
|
|
"status": "warning"
|
|
},
|
|
{
|
|
"dataset": "bitcoin_mining",
|
|
"rows": 14401,
|
|
"columns": 9,
|
|
"memory_mb": 0.51,
|
|
"missing_values": {},
|
|
"duplicated_rows": 0,
|
|
"timestamp_continuity": {
|
|
"status": "checked",
|
|
"expected_frequency": "1min",
|
|
"gaps_detected": 0,
|
|
"total_rows": 14401
|
|
},
|
|
"data_ranges": [
|
|
{
|
|
"column": "btc_price_eur",
|
|
"rule": "min >= 1000",
|
|
"violations": 466,
|
|
"severity": "error"
|
|
},
|
|
{
|
|
"column": "power_demand_mw",
|
|
"rule": "min >= 10",
|
|
"violations": 14401,
|
|
"severity": "error"
|
|
},
|
|
{
|
|
"column": "revenue_eur_per_mwh",
|
|
"rule": "min >= 0",
|
|
"violations": 359,
|
|
"severity": "error"
|
|
},
|
|
{
|
|
"column": "revenue_eur_per_mwh",
|
|
"rule": "max <= 500",
|
|
"violations": 13959,
|
|
"severity": "error"
|
|
}
|
|
],
|
|
"data_types": [],
|
|
"status": "warning"
|
|
},
|
|
{
|
|
"dataset": "transmission_capacity",
|
|
"rows": 20,
|
|
"columns": 5,
|
|
"memory_mb": 0.0,
|
|
"missing_values": {},
|
|
"duplicated_rows": 0,
|
|
"timestamp_continuity": {
|
|
"status": "skipped",
|
|
"reason": "no timestamp column"
|
|
},
|
|
"data_ranges": [],
|
|
"data_types": [],
|
|
"status": "pass"
|
|
},
|
|
{
|
|
"dataset": "transmission_cost",
|
|
"rows": 20,
|
|
"columns": 6,
|
|
"memory_mb": 0.0,
|
|
"missing_values": {},
|
|
"duplicated_rows": 0,
|
|
"timestamp_continuity": {
|
|
"status": "skipped",
|
|
"reason": "no timestamp column"
|
|
},
|
|
"data_ranges": [],
|
|
"data_types": [],
|
|
"status": "pass"
|
|
}
|
|
]
|
|
} |