Add transmission datasets and update mining data

Add two new static datasets for cross-region arbitrage calculations:
- transmission_capacity: region-to-region capacity limits (20 rows)
- transmission_cost: transmission costs per path (20 rows)

Update mining dataset with EUR pricing and power metrics:
- Change btc_price_usd to btc_price_eur
- Add power_efficiency_th_per_mw, power_demand_mw
- Add revenue_eur_per_mwh, profit_eur_per_mwh
- Remove mining_profitability column

Changes include:
- scripts/02_fetch_historical.py: rewrite fetch_bitcoin_mining_data()
- scripts/01_generate_synthetic.py: add transmission data generators
- config/data_config.yaml: add transmission config, update bitcoin config
- config/schema.yaml: add 2 new schemas, update bitcoin_mining schema
- scripts/03_process_merge.py: add 2 new datasets
- scripts/04_validate.py: add 2 new datasets
- test/test_data.py: update for new datasets and bitcoin price reference

Total datasets: 9 (734,491 rows, 17.89 MB)
This commit is contained in:
2026-02-11 01:09:33 +07:00
parent d981f7c56c
commit faaadc1297
10 changed files with 361 additions and 70 deletions

View File

@@ -1,12 +1,12 @@
{
"generated_at": "2026-02-10T16:10:53.614368",
"generated_at": "2026-02-10T17:49:31.592598",
"summary": {
"total_datasets": 7,
"passed": 2,
"total_datasets": 9,
"passed": 4,
"warnings": 5,
"failed": 0,
"total_size_mb": 17.72,
"total_rows": 734451
"total_size_mb": 17.89,
"total_rows": 734491
},
"datasets": [
{
@@ -64,13 +64,13 @@
{
"column": "efficiency",
"rule": "min >= 0.5",
"violations": 36,
"violations": 56,
"severity": "error"
},
{
"column": "efficiency",
"rule": "max <= 1.0",
"violations": 4371,
"violations": 4460,
"severity": "error"
}
],
@@ -111,7 +111,7 @@
{
"column": "capacity_factor",
"rule": "max <= 1.0",
"violations": 6382,
"violations": 6284,
"severity": "error"
}
],
@@ -148,13 +148,13 @@
{
"column": "heat_rate",
"rule": "min >= 5",
"violations": 29,
"violations": 27,
"severity": "error"
},
{
"column": "heat_rate",
"rule": "max <= 15",
"violations": 867,
"violations": 845,
"severity": "error"
}
],
@@ -204,7 +204,7 @@
{
"column": "power_demand_mw",
"rule": "min >= 0",
"violations": 137,
"violations": 135,
"severity": "error"
}
],
@@ -214,8 +214,8 @@
{
"dataset": "bitcoin_mining",
"rows": 14401,
"columns": 6,
"memory_mb": 0.34,
"columns": 9,
"memory_mb": 0.51,
"missing_values": {},
"duplicated_rows": 0,
"timestamp_continuity": {
@@ -226,14 +226,62 @@
},
"data_ranges": [
{
"column": "btc_price_usd",
"column": "btc_price_eur",
"rule": "min >= 1000",
"violations": 456,
"violations": 466,
"severity": "error"
},
{
"column": "power_demand_mw",
"rule": "min >= 10",
"violations": 14401,
"severity": "error"
},
{
"column": "revenue_eur_per_mwh",
"rule": "min >= 0",
"violations": 359,
"severity": "error"
},
{
"column": "revenue_eur_per_mwh",
"rule": "max <= 500",
"violations": 13959,
"severity": "error"
}
],
"data_types": [],
"status": "warning"
},
{
"dataset": "transmission_capacity",
"rows": 20,
"columns": 5,
"memory_mb": 0.0,
"missing_values": {},
"duplicated_rows": 0,
"timestamp_continuity": {
"status": "skipped",
"reason": "no timestamp column"
},
"data_ranges": [],
"data_types": [],
"status": "pass"
},
{
"dataset": "transmission_cost",
"rows": 20,
"columns": 6,
"memory_mb": 0.0,
"missing_values": {},
"duplicated_rows": 0,
"timestamp_continuity": {
"status": "skipped",
"reason": "no timestamp column"
},
"data_ranges": [],
"data_types": [],
"status": "pass"
}
]
}