Add transmission datasets and update mining data
Add two new static datasets for cross-region arbitrage calculations: - transmission_capacity: region-to-region capacity limits (20 rows) - transmission_cost: transmission costs per path (20 rows) Update mining dataset with EUR pricing and power metrics: - Change btc_price_usd to btc_price_eur - Add power_efficiency_th_per_mw, power_demand_mw - Add revenue_eur_per_mwh, profit_eur_per_mwh - Remove mining_profitability column Changes include: - scripts/02_fetch_historical.py: rewrite fetch_bitcoin_mining_data() - scripts/01_generate_synthetic.py: add transmission data generators - config/data_config.yaml: add transmission config, update bitcoin config - config/schema.yaml: add 2 new schemas, update bitcoin_mining schema - scripts/03_process_merge.py: add 2 new datasets - scripts/04_validate.py: add 2 new datasets - test/test_data.py: update for new datasets and bitcoin price reference Total datasets: 9 (734,491 rows, 17.89 MB)
This commit is contained in:
@@ -93,4 +93,12 @@ data_center:
|
|||||||
|
|
||||||
bitcoin:
|
bitcoin:
|
||||||
hashrate_range: [150, 250] # EH/s
|
hashrate_range: [150, 250] # EH/s
|
||||||
mining_efficiency_range: [25, 35] # J/TH
|
power_efficiency_range: [80, 120] # TH/s per MW
|
||||||
|
eur_usd_rate: 0.92 # For converting to EUR base price
|
||||||
|
|
||||||
|
transmission:
|
||||||
|
capacity_base_range: [1000, 4000] # MW
|
||||||
|
capacity_uk_multiplier: 0.6 # UK connections typically lower
|
||||||
|
efficiency_range: [0.95, 0.99]
|
||||||
|
congestion_surcharge_range: [0.5, 5.0] # EUR/MWh
|
||||||
|
fee_range: [0, 2.0] # EUR/MWh
|
||||||
|
|||||||
@@ -169,18 +169,74 @@ schemas:
|
|||||||
type: "float32"
|
type: "float32"
|
||||||
unit: "TH/s"
|
unit: "TH/s"
|
||||||
description: "Mining pool hashrate"
|
description: "Mining pool hashrate"
|
||||||
- name: "btc_price_usd"
|
- name: "btc_price_eur"
|
||||||
type: "float32"
|
type: "float32"
|
||||||
unit: "USD"
|
unit: "EUR"
|
||||||
description: "Bitcoin price"
|
description: "Bitcoin price in EUR"
|
||||||
- name: "mining_profitability"
|
- name: "power_efficiency_th_per_mw"
|
||||||
type: "float32"
|
type: "float32"
|
||||||
unit: "USD/TH/day"
|
unit: "TH/s per MW"
|
||||||
description: "Mining profitability per terahash per day"
|
description: "Mining efficiency"
|
||||||
|
- name: "power_demand_mw"
|
||||||
|
type: "float32"
|
||||||
|
unit: "MW"
|
||||||
|
description: "Power consumption for mining"
|
||||||
|
- name: "revenue_eur_per_mwh"
|
||||||
|
type: "float32"
|
||||||
|
unit: "EUR/MWh"
|
||||||
|
description: "Mining revenue per MWh of electricity"
|
||||||
|
- name: "profit_eur_per_mwh"
|
||||||
|
type: "float32"
|
||||||
|
unit: "EUR/MWh"
|
||||||
|
description: "Mining profit after electricity cost"
|
||||||
- name: "electricity_cost"
|
- name: "electricity_cost"
|
||||||
type: "float32"
|
type: "float32"
|
||||||
unit: "EUR/MWh"
|
unit: "EUR/MWh"
|
||||||
description: "Electricity cost breakeven point"
|
description: "Electricity cost for mining"
|
||||||
|
|
||||||
|
transmission_capacity:
|
||||||
|
columns:
|
||||||
|
- name: "source_region"
|
||||||
|
type: "category"
|
||||||
|
description: "Source region code"
|
||||||
|
- name: "target_region"
|
||||||
|
type: "category"
|
||||||
|
description: "Target region code"
|
||||||
|
- name: "capacity_mw"
|
||||||
|
type: "float32"
|
||||||
|
unit: "MW"
|
||||||
|
description: "Maximum transmission capacity"
|
||||||
|
- name: "direction"
|
||||||
|
type: "category"
|
||||||
|
description: "Transmission direction"
|
||||||
|
- name: "efficiency"
|
||||||
|
type: "float32"
|
||||||
|
description: "Transmission efficiency (0-1)"
|
||||||
|
|
||||||
|
transmission_cost:
|
||||||
|
columns:
|
||||||
|
- name: "source_region"
|
||||||
|
type: "category"
|
||||||
|
description: "Source region code"
|
||||||
|
- name: "target_region"
|
||||||
|
type: "category"
|
||||||
|
description: "Target region code"
|
||||||
|
- name: "cost_eur_mwh"
|
||||||
|
type: "float32"
|
||||||
|
unit: "EUR/MWh"
|
||||||
|
description: "Total transmission cost per MWh"
|
||||||
|
- name: "loss_percent"
|
||||||
|
type: "float32"
|
||||||
|
unit: "%"
|
||||||
|
description: "Transmission loss percentage"
|
||||||
|
- name: "congestion_surcharge_eur_mwh"
|
||||||
|
type: "float32"
|
||||||
|
unit: "EUR/MWh"
|
||||||
|
description: "Additional congestion charge"
|
||||||
|
- name: "fee_eur_mwh"
|
||||||
|
type: "float32"
|
||||||
|
unit: "EUR/MWh"
|
||||||
|
description: "Transmission fee"
|
||||||
|
|
||||||
validation_rules:
|
validation_rules:
|
||||||
electricity_prices:
|
electricity_prices:
|
||||||
@@ -229,5 +285,32 @@ validation_rules:
|
|||||||
bitcoin_mining:
|
bitcoin_mining:
|
||||||
- column: "hashrate_ths"
|
- column: "hashrate_ths"
|
||||||
min: 0
|
min: 0
|
||||||
- column: "btc_price_usd"
|
max: 1000000
|
||||||
|
- column: "btc_price_eur"
|
||||||
min: 1000
|
min: 1000
|
||||||
|
max: 200000
|
||||||
|
- column: "power_efficiency_th_per_mw"
|
||||||
|
min: 50
|
||||||
|
max: 150
|
||||||
|
- column: "power_demand_mw"
|
||||||
|
min: 10
|
||||||
|
max: 1000
|
||||||
|
- column: "revenue_eur_per_mwh"
|
||||||
|
min: 0
|
||||||
|
max: 500
|
||||||
|
|
||||||
|
transmission_capacity:
|
||||||
|
- column: "capacity_mw"
|
||||||
|
min: 100
|
||||||
|
max: 10000
|
||||||
|
- column: "efficiency"
|
||||||
|
min: 0.9
|
||||||
|
max: 1.0
|
||||||
|
|
||||||
|
transmission_cost:
|
||||||
|
- column: "cost_eur_mwh"
|
||||||
|
min: 0
|
||||||
|
max: 50
|
||||||
|
- column: "loss_percent"
|
||||||
|
min: 0
|
||||||
|
max: 15
|
||||||
|
|||||||
@@ -1,7 +1,7 @@
|
|||||||
{
|
{
|
||||||
"processed_at": "2026-02-10T16:10:49.295018+00:00",
|
"processed_at": "2026-02-10T17:49:27.237574+00:00",
|
||||||
"total_datasets": 7,
|
"total_datasets": 9,
|
||||||
"total_size_mb": 16.977967262268066,
|
"total_size_mb": 17.2280216217041,
|
||||||
"datasets": {
|
"datasets": {
|
||||||
"electricity_prices": {
|
"electricity_prices": {
|
||||||
"path": "/home/user/energy-test-data/data/processed/electricity_prices.parquet",
|
"path": "/home/user/energy-test-data/data/processed/electricity_prices.parquet",
|
||||||
@@ -11,19 +11,19 @@
|
|||||||
},
|
},
|
||||||
"battery_capacity": {
|
"battery_capacity": {
|
||||||
"path": "/home/user/energy-test-data/data/processed/battery_capacity.parquet",
|
"path": "/home/user/energy-test-data/data/processed/battery_capacity.parquet",
|
||||||
"size_mb": 4.204527854919434,
|
"size_mb": 4.204350471496582,
|
||||||
"rows": 144010,
|
"rows": 144010,
|
||||||
"columns": 7
|
"columns": 7
|
||||||
},
|
},
|
||||||
"renewable_generation": {
|
"renewable_generation": {
|
||||||
"path": "/home/user/energy-test-data/data/processed/renewable_generation.parquet",
|
"path": "/home/user/energy-test-data/data/processed/renewable_generation.parquet",
|
||||||
"size_mb": 4.482715606689453,
|
"size_mb": 4.483729362487793,
|
||||||
"rows": 216015,
|
"rows": 216015,
|
||||||
"columns": 7
|
"columns": 7
|
||||||
},
|
},
|
||||||
"conventional_generation": {
|
"conventional_generation": {
|
||||||
"path": "/home/user/energy-test-data/data/processed/conventional_generation.parquet",
|
"path": "/home/user/energy-test-data/data/processed/conventional_generation.parquet",
|
||||||
"size_mb": 2.749570846557617,
|
"size_mb": 2.7516822814941406,
|
||||||
"rows": 144010,
|
"rows": 144010,
|
||||||
"columns": 6
|
"columns": 6
|
||||||
},
|
},
|
||||||
@@ -35,14 +35,26 @@
|
|||||||
},
|
},
|
||||||
"data_centers": {
|
"data_centers": {
|
||||||
"path": "/home/user/energy-test-data/data/processed/data_centers.parquet",
|
"path": "/home/user/energy-test-data/data/processed/data_centers.parquet",
|
||||||
"size_mb": 1.0422554016113281,
|
"size_mb": 1.0423173904418945,
|
||||||
"rows": 72005,
|
"rows": 72005,
|
||||||
"columns": 6
|
"columns": 6
|
||||||
},
|
},
|
||||||
"bitcoin_mining": {
|
"bitcoin_mining": {
|
||||||
"path": "/home/user/energy-test-data/data/processed/bitcoin_mining.parquet",
|
"path": "/home/user/energy-test-data/data/processed/bitcoin_mining.parquet",
|
||||||
"size_mb": 0.3613767623901367,
|
"size_mb": 0.5998897552490234,
|
||||||
"rows": 14401,
|
"rows": 14401,
|
||||||
|
"columns": 9
|
||||||
|
},
|
||||||
|
"transmission_capacity": {
|
||||||
|
"path": "/home/user/energy-test-data/data/processed/transmission_capacity.parquet",
|
||||||
|
"size_mb": 0.0039043426513671875,
|
||||||
|
"rows": 20,
|
||||||
|
"columns": 5
|
||||||
|
},
|
||||||
|
"transmission_cost": {
|
||||||
|
"path": "/home/user/energy-test-data/data/processed/transmission_cost.parquet",
|
||||||
|
"size_mb": 0.004627227783203125,
|
||||||
|
"rows": 20,
|
||||||
"columns": 6
|
"columns": 6
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,5 +1,5 @@
|
|||||||
{
|
{
|
||||||
"generated_at": "2026-02-10T16:10:43.522420",
|
"generated_at": "2026-02-10T17:49:15.839052",
|
||||||
"datasets": {
|
"datasets": {
|
||||||
"battery_capacity": {
|
"battery_capacity": {
|
||||||
"rows": 144010,
|
"rows": 144010,
|
||||||
@@ -84,6 +84,44 @@
|
|||||||
"max_bid_price": "float64",
|
"max_bid_price": "float64",
|
||||||
"client_type": "object"
|
"client_type": "object"
|
||||||
}
|
}
|
||||||
|
},
|
||||||
|
"transmission_capacity": {
|
||||||
|
"rows": 20,
|
||||||
|
"columns": [
|
||||||
|
"source_region",
|
||||||
|
"target_region",
|
||||||
|
"capacity_mw",
|
||||||
|
"direction",
|
||||||
|
"efficiency"
|
||||||
|
],
|
||||||
|
"memory_usage_mb": 0.004016876220703125,
|
||||||
|
"dtypes": {
|
||||||
|
"source_region": "object",
|
||||||
|
"target_region": "object",
|
||||||
|
"capacity_mw": "float64",
|
||||||
|
"direction": "object",
|
||||||
|
"efficiency": "float64"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"transmission_cost": {
|
||||||
|
"rows": 20,
|
||||||
|
"columns": [
|
||||||
|
"source_region",
|
||||||
|
"target_region",
|
||||||
|
"cost_eur_mwh",
|
||||||
|
"loss_percent",
|
||||||
|
"congestion_surcharge_eur_mwh",
|
||||||
|
"fee_eur_mwh"
|
||||||
|
],
|
||||||
|
"memory_usage_mb": 0.002986907958984375,
|
||||||
|
"dtypes": {
|
||||||
|
"source_region": "object",
|
||||||
|
"target_region": "object",
|
||||||
|
"cost_eur_mwh": "float64",
|
||||||
|
"loss_percent": "float64",
|
||||||
|
"congestion_surcharge_eur_mwh": "float64",
|
||||||
|
"fee_eur_mwh": "float64"
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -1,12 +1,12 @@
|
|||||||
{
|
{
|
||||||
"generated_at": "2026-02-10T16:10:53.614368",
|
"generated_at": "2026-02-10T17:49:31.592598",
|
||||||
"summary": {
|
"summary": {
|
||||||
"total_datasets": 7,
|
"total_datasets": 9,
|
||||||
"passed": 2,
|
"passed": 4,
|
||||||
"warnings": 5,
|
"warnings": 5,
|
||||||
"failed": 0,
|
"failed": 0,
|
||||||
"total_size_mb": 17.72,
|
"total_size_mb": 17.89,
|
||||||
"total_rows": 734451
|
"total_rows": 734491
|
||||||
},
|
},
|
||||||
"datasets": [
|
"datasets": [
|
||||||
{
|
{
|
||||||
@@ -64,13 +64,13 @@
|
|||||||
{
|
{
|
||||||
"column": "efficiency",
|
"column": "efficiency",
|
||||||
"rule": "min >= 0.5",
|
"rule": "min >= 0.5",
|
||||||
"violations": 36,
|
"violations": 56,
|
||||||
"severity": "error"
|
"severity": "error"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"column": "efficiency",
|
"column": "efficiency",
|
||||||
"rule": "max <= 1.0",
|
"rule": "max <= 1.0",
|
||||||
"violations": 4371,
|
"violations": 4460,
|
||||||
"severity": "error"
|
"severity": "error"
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
@@ -111,7 +111,7 @@
|
|||||||
{
|
{
|
||||||
"column": "capacity_factor",
|
"column": "capacity_factor",
|
||||||
"rule": "max <= 1.0",
|
"rule": "max <= 1.0",
|
||||||
"violations": 6382,
|
"violations": 6284,
|
||||||
"severity": "error"
|
"severity": "error"
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
@@ -148,13 +148,13 @@
|
|||||||
{
|
{
|
||||||
"column": "heat_rate",
|
"column": "heat_rate",
|
||||||
"rule": "min >= 5",
|
"rule": "min >= 5",
|
||||||
"violations": 29,
|
"violations": 27,
|
||||||
"severity": "error"
|
"severity": "error"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"column": "heat_rate",
|
"column": "heat_rate",
|
||||||
"rule": "max <= 15",
|
"rule": "max <= 15",
|
||||||
"violations": 867,
|
"violations": 845,
|
||||||
"severity": "error"
|
"severity": "error"
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
@@ -204,7 +204,7 @@
|
|||||||
{
|
{
|
||||||
"column": "power_demand_mw",
|
"column": "power_demand_mw",
|
||||||
"rule": "min >= 0",
|
"rule": "min >= 0",
|
||||||
"violations": 137,
|
"violations": 135,
|
||||||
"severity": "error"
|
"severity": "error"
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
@@ -214,8 +214,8 @@
|
|||||||
{
|
{
|
||||||
"dataset": "bitcoin_mining",
|
"dataset": "bitcoin_mining",
|
||||||
"rows": 14401,
|
"rows": 14401,
|
||||||
"columns": 6,
|
"columns": 9,
|
||||||
"memory_mb": 0.34,
|
"memory_mb": 0.51,
|
||||||
"missing_values": {},
|
"missing_values": {},
|
||||||
"duplicated_rows": 0,
|
"duplicated_rows": 0,
|
||||||
"timestamp_continuity": {
|
"timestamp_continuity": {
|
||||||
@@ -226,14 +226,62 @@
|
|||||||
},
|
},
|
||||||
"data_ranges": [
|
"data_ranges": [
|
||||||
{
|
{
|
||||||
"column": "btc_price_usd",
|
"column": "btc_price_eur",
|
||||||
"rule": "min >= 1000",
|
"rule": "min >= 1000",
|
||||||
"violations": 456,
|
"violations": 466,
|
||||||
|
"severity": "error"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"column": "power_demand_mw",
|
||||||
|
"rule": "min >= 10",
|
||||||
|
"violations": 14401,
|
||||||
|
"severity": "error"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"column": "revenue_eur_per_mwh",
|
||||||
|
"rule": "min >= 0",
|
||||||
|
"violations": 359,
|
||||||
|
"severity": "error"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"column": "revenue_eur_per_mwh",
|
||||||
|
"rule": "max <= 500",
|
||||||
|
"violations": 13959,
|
||||||
"severity": "error"
|
"severity": "error"
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"data_types": [],
|
"data_types": [],
|
||||||
"status": "warning"
|
"status": "warning"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"dataset": "transmission_capacity",
|
||||||
|
"rows": 20,
|
||||||
|
"columns": 5,
|
||||||
|
"memory_mb": 0.0,
|
||||||
|
"missing_values": {},
|
||||||
|
"duplicated_rows": 0,
|
||||||
|
"timestamp_continuity": {
|
||||||
|
"status": "skipped",
|
||||||
|
"reason": "no timestamp column"
|
||||||
|
},
|
||||||
|
"data_ranges": [],
|
||||||
|
"data_types": [],
|
||||||
|
"status": "pass"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"dataset": "transmission_cost",
|
||||||
|
"rows": 20,
|
||||||
|
"columns": 6,
|
||||||
|
"memory_mb": 0.0,
|
||||||
|
"missing_values": {},
|
||||||
|
"duplicated_rows": 0,
|
||||||
|
"timestamp_continuity": {
|
||||||
|
"status": "skipped",
|
||||||
|
"reason": "no timestamp column"
|
||||||
|
},
|
||||||
|
"data_ranges": [],
|
||||||
|
"data_types": [],
|
||||||
|
"status": "pass"
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
@@ -210,6 +210,72 @@ def generate_data_center_data(config, timestamps):
|
|||||||
|
|
||||||
return pd.concat(df_list, ignore_index=True)
|
return pd.concat(df_list, ignore_index=True)
|
||||||
|
|
||||||
|
def generate_transmission_capacity_data(config):
|
||||||
|
np.random.seed(config['generation']['seed'] + 13)
|
||||||
|
|
||||||
|
regions = config['regions']
|
||||||
|
params = config['transmission']
|
||||||
|
|
||||||
|
data = []
|
||||||
|
|
||||||
|
for i, src in enumerate(regions):
|
||||||
|
for j, tgt in enumerate(regions):
|
||||||
|
if i == j:
|
||||||
|
continue
|
||||||
|
|
||||||
|
base_capacity = np.random.uniform(*params['capacity_base_range'])
|
||||||
|
|
||||||
|
if src == 'UK' or tgt == 'UK':
|
||||||
|
base_capacity *= params['capacity_uk_multiplier']
|
||||||
|
|
||||||
|
capacity = base_capacity * np.random.uniform(0.8, 1.2)
|
||||||
|
efficiency = np.random.uniform(*params['efficiency_range'])
|
||||||
|
direction = 'bidirectional'
|
||||||
|
|
||||||
|
data.append({
|
||||||
|
'source_region': src,
|
||||||
|
'target_region': tgt,
|
||||||
|
'capacity_mw': capacity,
|
||||||
|
'direction': direction,
|
||||||
|
'efficiency': efficiency
|
||||||
|
})
|
||||||
|
|
||||||
|
return pd.DataFrame(data)
|
||||||
|
|
||||||
|
def generate_transmission_cost_data(config):
|
||||||
|
np.random.seed(config['generation']['seed'] + 14)
|
||||||
|
|
||||||
|
regions = config['regions']
|
||||||
|
params = config['transmission']
|
||||||
|
|
||||||
|
avg_electricity_price = 80
|
||||||
|
|
||||||
|
data = []
|
||||||
|
|
||||||
|
for i, src in enumerate(regions):
|
||||||
|
for j, tgt in enumerate(regions):
|
||||||
|
if i == j:
|
||||||
|
continue
|
||||||
|
|
||||||
|
efficiency = np.random.uniform(*params['efficiency_range'])
|
||||||
|
loss_percent = (1 - efficiency) * 100
|
||||||
|
congestion_surcharge = np.random.uniform(*params['congestion_surcharge_range'])
|
||||||
|
fee = np.random.uniform(*params['fee_range'])
|
||||||
|
|
||||||
|
loss_cost = (loss_percent / 100) * avg_electricity_price
|
||||||
|
cost_eur_mwh = loss_cost + congestion_surcharge + fee
|
||||||
|
|
||||||
|
data.append({
|
||||||
|
'source_region': src,
|
||||||
|
'target_region': tgt,
|
||||||
|
'cost_eur_mwh': cost_eur_mwh,
|
||||||
|
'loss_percent': loss_percent,
|
||||||
|
'congestion_surcharge_eur_mwh': congestion_surcharge,
|
||||||
|
'fee_eur_mwh': fee
|
||||||
|
})
|
||||||
|
|
||||||
|
return pd.DataFrame(data)
|
||||||
|
|
||||||
def apply_noise_and_outliers(df, config):
|
def apply_noise_and_outliers(df, config):
|
||||||
if not config['generation']['add_noise']:
|
if not config['generation']['add_noise']:
|
||||||
return df
|
return df
|
||||||
@@ -283,20 +349,27 @@ def main():
|
|||||||
|
|
||||||
datasets['battery_capacity'] = generate_battery_data(config, timestamps)
|
datasets['battery_capacity'] = generate_battery_data(config, timestamps)
|
||||||
print(f" - Battery capacity: {len(datasets['battery_capacity'])} rows")
|
print(f" - Battery capacity: {len(datasets['battery_capacity'])} rows")
|
||||||
|
|
||||||
datasets['renewable_generation'] = generate_renewable_data(config, timestamps)
|
datasets['renewable_generation'] = generate_renewable_data(config, timestamps)
|
||||||
print(f" - Renewable generation: {len(datasets['renewable_generation'])} rows")
|
print(f" - Renewable generation: {len(datasets['renewable_generation'])} rows")
|
||||||
|
|
||||||
datasets['conventional_generation'] = generate_conventional_data(config, timestamps)
|
datasets['conventional_generation'] = generate_conventional_data(config, timestamps)
|
||||||
print(f" - Conventional generation: {len(datasets['conventional_generation'])} rows")
|
print(f" - Conventional generation: {len(datasets['conventional_generation'])} rows")
|
||||||
|
|
||||||
datasets['data_centers'] = generate_data_center_data(config, timestamps)
|
datasets['data_centers'] = generate_data_center_data(config, timestamps)
|
||||||
print(f" - Data centers: {len(datasets['data_centers'])} rows")
|
print(f" - Data centers: {len(datasets['data_centers'])} rows")
|
||||||
|
|
||||||
|
datasets['transmission_capacity'] = generate_transmission_capacity_data(config)
|
||||||
|
print(f" - Transmission capacity: {len(datasets['transmission_capacity'])} rows")
|
||||||
|
|
||||||
|
datasets['transmission_cost'] = generate_transmission_cost_data(config)
|
||||||
|
print(f" - Transmission cost: {len(datasets['transmission_cost'])} rows")
|
||||||
|
|
||||||
for name, df in datasets.items():
|
for name, df in datasets.items():
|
||||||
df = apply_noise_and_outliers(df, config)
|
if name not in ['transmission_capacity', 'transmission_cost']:
|
||||||
df = add_missing_values(df, config)
|
df = apply_noise_and_outliers(df, config)
|
||||||
datasets[name] = df
|
df = add_missing_values(df, config)
|
||||||
|
datasets[name] = df
|
||||||
|
|
||||||
output_base = Path(__file__).parent.parent / 'data'
|
output_base = Path(__file__).parent.parent / 'data'
|
||||||
output_base.mkdir(parents=True, exist_ok=True)
|
output_base.mkdir(parents=True, exist_ok=True)
|
||||||
|
|||||||
@@ -78,50 +78,57 @@ def fetch_electricity_prices(config, timestamps):
|
|||||||
|
|
||||||
def fetch_bitcoin_mining_data(config, timestamps):
|
def fetch_bitcoin_mining_data(config, timestamps):
|
||||||
np.random.seed(config['generation']['seed'] + 11)
|
np.random.seed(config['generation']['seed'] + 11)
|
||||||
|
|
||||||
print(f"Fetching bitcoin mining data from mempool.space (simulated)...")
|
print(f"Fetching bitcoin mining data from mempool.space (simulated)...")
|
||||||
|
|
||||||
n = len(timestamps)
|
n = len(timestamps)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
btc_api = "https://mempool.space/api/v1/fees/recommended"
|
btc_api = "https://mempool.space/api/v1/fees/recommended"
|
||||||
response = requests.get(btc_api, timeout=10)
|
response = requests.get(btc_api, timeout=10)
|
||||||
if response.status_code == 200:
|
if response.status_code == 200:
|
||||||
fees = response.json()
|
fees = response.json()
|
||||||
base_btc_price = 45000
|
|
||||||
else:
|
else:
|
||||||
base_btc_price = 45000
|
pass
|
||||||
except:
|
except:
|
||||||
base_btc_price = 45000
|
pass
|
||||||
|
|
||||||
btc_params = config['bitcoin']
|
btc_params = config['bitcoin']
|
||||||
|
|
||||||
btc_trend = np.linspace(0.95, 1.05, n)
|
btc_eur_trend = np.linspace(0.95, 1.05, n)
|
||||||
btc_daily_volatility = np.cumsum(np.random.normal(0, 0.01, n)) + 1
|
btc_daily_volatility = np.cumsum(np.random.normal(0, 0.01, n)) + 1
|
||||||
btc_daily_volatility = btc_daily_volatility / btc_daily_volatility[0]
|
btc_daily_volatility = btc_daily_volatility / btc_daily_volatility[0]
|
||||||
|
|
||||||
btc_price = base_btc_price * btc_trend * btc_daily_volatility * (1 + 0.03 * np.random.randn(n))
|
base_btc_price_eur = 41400
|
||||||
|
btc_price_eur = base_btc_price_eur * btc_eur_trend * btc_daily_volatility * (1 + 0.03 * np.random.randn(n))
|
||||||
|
|
||||||
hashrate_base = np.random.uniform(*btc_params['hashrate_range'])
|
hashrate_base = np.random.uniform(*btc_params['hashrate_range'])
|
||||||
hashrate = hashrate_base * (1 + 0.05 * np.sin(2 * np.pi * np.arange(n) / (n / 10))) * (1 + 0.02 * np.random.randn(n))
|
hashrate = hashrate_base * (1 + 0.05 * np.sin(2 * np.pi * np.arange(n) / (n / 10))) * (1 + 0.02 * np.random.randn(n))
|
||||||
|
|
||||||
electricity_efficiency = np.random.uniform(*btc_params['mining_efficiency_range'])
|
power_efficiency = np.random.uniform(*btc_params['power_efficiency_range'])
|
||||||
|
|
||||||
btc_price_eur = btc_price * 0.92
|
power_demand = hashrate / power_efficiency
|
||||||
power_cost_eur = 50
|
|
||||||
mining_profitability = (btc_price_eur * 0.0001 / 3.6) / (electricity_efficiency / 1000)
|
mining_profitability = (btc_price_eur * 0.0001 / 3.6) / (power_efficiency / 1000)
|
||||||
|
|
||||||
electricity_breakeven = (btc_price_eur * 0.0001 / 3.6) / (mining_profitability / 24 * electricity_efficiency / 1000) * 24
|
revenue_eur_per_mwh = mining_profitability * power_efficiency * 24
|
||||||
|
|
||||||
|
electricity_breakeven = 40 + np.random.normal(0, 5, n)
|
||||||
|
|
||||||
|
profit_eur_per_mwh = revenue_eur_per_mwh - electricity_breakeven
|
||||||
|
|
||||||
data = pd.DataFrame({
|
data = pd.DataFrame({
|
||||||
'timestamp': timestamps,
|
'timestamp': timestamps,
|
||||||
'pool_id': 'POOL_001',
|
'pool_id': 'POOL_001',
|
||||||
'hashrate_ths': hashrate,
|
'hashrate_ths': hashrate,
|
||||||
'btc_price_usd': btc_price,
|
'btc_price_eur': btc_price_eur,
|
||||||
'mining_profitability': mining_profitability,
|
'power_efficiency_th_per_mw': power_efficiency,
|
||||||
|
'power_demand_mw': power_demand,
|
||||||
|
'revenue_eur_per_mwh': revenue_eur_per_mwh,
|
||||||
|
'profit_eur_per_mwh': profit_eur_per_mwh,
|
||||||
'electricity_cost': electricity_breakeven
|
'electricity_cost': electricity_breakeven
|
||||||
})
|
})
|
||||||
|
|
||||||
return data
|
return data
|
||||||
|
|
||||||
def fetch_load_profiles(config, timestamps):
|
def fetch_load_profiles(config, timestamps):
|
||||||
|
|||||||
@@ -126,7 +126,9 @@ def main():
|
|||||||
'conventional_generation',
|
'conventional_generation',
|
||||||
'load_profiles',
|
'load_profiles',
|
||||||
'data_centers',
|
'data_centers',
|
||||||
'bitcoin_mining'
|
'bitcoin_mining',
|
||||||
|
'transmission_capacity',
|
||||||
|
'transmission_cost'
|
||||||
]
|
]
|
||||||
|
|
||||||
processed_info = {}
|
processed_info = {}
|
||||||
|
|||||||
@@ -233,7 +233,9 @@ def main():
|
|||||||
'conventional_generation',
|
'conventional_generation',
|
||||||
'load_profiles',
|
'load_profiles',
|
||||||
'data_centers',
|
'data_centers',
|
||||||
'bitcoin_mining'
|
'bitcoin_mining',
|
||||||
|
'transmission_capacity',
|
||||||
|
'transmission_cost'
|
||||||
]
|
]
|
||||||
|
|
||||||
print("Validating processed datasets...\n")
|
print("Validating processed datasets...\n")
|
||||||
|
|||||||
@@ -22,7 +22,9 @@ def main():
|
|||||||
'conventional_generation',
|
'conventional_generation',
|
||||||
'load_profiles',
|
'load_profiles',
|
||||||
'data_centers',
|
'data_centers',
|
||||||
'bitcoin_mining'
|
'bitcoin_mining',
|
||||||
|
'transmission_capacity',
|
||||||
|
'transmission_cost'
|
||||||
]
|
]
|
||||||
|
|
||||||
print("\n1. LOADING DATASETS")
|
print("\n1. LOADING DATASETS")
|
||||||
@@ -37,7 +39,7 @@ def main():
|
|||||||
else:
|
else:
|
||||||
print(f" ✗ {name:25} NOT FOUND")
|
print(f" ✗ {name:25} NOT FOUND")
|
||||||
|
|
||||||
print(f"\nTotal datasets loaded: {len(loaded)}/7")
|
print(f"\nTotal datasets loaded: {len(loaded)}/9")
|
||||||
|
|
||||||
print("\n2. SAMPLE DATA PREVIEWS")
|
print("\n2. SAMPLE DATA PREVIEWS")
|
||||||
print("-" * 60)
|
print("-" * 60)
|
||||||
@@ -80,9 +82,25 @@ def main():
|
|||||||
if 'bitcoin_mining' in loaded:
|
if 'bitcoin_mining' in loaded:
|
||||||
df = loaded['bitcoin_mining']
|
df = loaded['bitcoin_mining']
|
||||||
print(f"\nBitcoin Mining:")
|
print(f"\nBitcoin Mining:")
|
||||||
print(f" BTC Price: ${df['btc_price_usd'].mean():.2f} avg, ${df['btc_price_usd'].max():.2f} max")
|
print(f" BTC Price: €{df['btc_price_eur'].mean():.2f} avg, €{df['btc_price_eur'].max():.2f} max")
|
||||||
print(f" Hashrate: {df['hashrate_ths'].mean():.2f} EH/s avg")
|
print(f" Hashrate: {df['hashrate_ths'].mean():.2f} EH/s avg")
|
||||||
print(f" Profitability: ${df['mining_profitability'].mean():.4f} /TH/day avg")
|
print(f" Power Demand: {df['power_demand_mw'].mean():.1f} MW avg")
|
||||||
|
print(f" Revenue: €{df['revenue_eur_per_mwh'].mean():.2f} /MWh avg")
|
||||||
|
print(f" Profit: €{df['profit_eur_per_mwh'].mean():.2f} /MWh avg")
|
||||||
|
|
||||||
|
if 'transmission_capacity' in loaded:
|
||||||
|
df = loaded['transmission_capacity']
|
||||||
|
print(f"\nTransmission Capacity:")
|
||||||
|
print(f" Total interconnectors: {len(df)}")
|
||||||
|
print(f" Avg capacity: {df['capacity_mw'].mean():.0f} MW")
|
||||||
|
print(f" Avg efficiency: {df['efficiency'].mean():.2%}")
|
||||||
|
|
||||||
|
if 'transmission_cost' in loaded:
|
||||||
|
df = loaded['transmission_cost']
|
||||||
|
print(f"\nTransmission Cost:")
|
||||||
|
print(f" Total paths: {len(df)}")
|
||||||
|
print(f" Avg cost: €{df['cost_eur_mwh'].mean():.2f} /MWh")
|
||||||
|
print(f" Avg loss: {df['loss_percent'].mean():.2f}%")
|
||||||
|
|
||||||
if 'data_centers' in loaded:
|
if 'data_centers' in loaded:
|
||||||
df = loaded['data_centers']
|
df = loaded['data_centers']
|
||||||
|
|||||||
Reference in New Issue
Block a user