Files
energy-test-data/test/test_data.py
kbt-devops faaadc1297 Add transmission datasets and update mining data
Add two new static datasets for cross-region arbitrage calculations:
- transmission_capacity: region-to-region capacity limits (20 rows)
- transmission_cost: transmission costs per path (20 rows)

Update mining dataset with EUR pricing and power metrics:
- Change btc_price_usd to btc_price_eur
- Add power_efficiency_th_per_mw, power_demand_mw
- Add revenue_eur_per_mwh, profit_eur_per_mwh
- Remove mining_profitability column

Changes include:
- scripts/02_fetch_historical.py: rewrite fetch_bitcoin_mining_data()
- scripts/01_generate_synthetic.py: add transmission data generators
- config/data_config.yaml: add transmission config, update bitcoin config
- config/schema.yaml: add 2 new schemas, update bitcoin_mining schema
- scripts/03_process_merge.py: add 2 new datasets
- scripts/04_validate.py: add 2 new datasets
- test/test_data.py: update for new datasets and bitcoin price reference

Total datasets: 9 (734,491 rows, 17.89 MB)
2026-02-11 01:09:33 +07:00

134 lines
4.9 KiB
Python

#!/usr/bin/env python3
"""
Quick test script to verify and explore the generated test data.
"""
import pandas as pd
from pathlib import Path
import json
def main():
data_dir = Path(__file__).parent.parent / 'data' / 'processed'
metadata_dir = Path(__file__).parent.parent / 'data' / 'metadata'
print("="*60)
print("ENERGY TEST DATA - QUICK VERIFICATION")
print("="*60)
datasets = [
'electricity_prices',
'battery_capacity',
'renewable_generation',
'conventional_generation',
'load_profiles',
'data_centers',
'bitcoin_mining',
'transmission_capacity',
'transmission_cost'
]
print("\n1. LOADING DATASETS")
print("-" * 60)
loaded = {}
for name in datasets:
file_path = data_dir / f'{name}.parquet'
if file_path.exists():
loaded[name] = pd.read_parquet(file_path)
print(f"{name:25} {len(loaded[name]):>10,} rows")
else:
print(f"{name:25} NOT FOUND")
print(f"\nTotal datasets loaded: {len(loaded)}/9")
print("\n2. SAMPLE DATA PREVIEWS")
print("-" * 60)
for name, df in loaded.items():
print(f"\n{name}:")
print(df.head(3).to_string(index=False))
print(f" Columns: {list(df.columns)}")
print("\n3. TIME RANGE VERIFICATION")
print("-" * 60)
for name, df in loaded.items():
if 'timestamp' in df.columns:
df['timestamp'] = pd.to_datetime(df['timestamp'])
min_time = df['timestamp'].min()
max_time = df['timestamp'].max()
duration = (max_time - min_time).total_seconds() / 3600
print(f" {name:25} {min_time} to {max_time}")
print(f" Duration: {duration:.2f} hours")
print("\n4. KEY STATISTICS")
print("-" * 60)
if 'electricity_prices' in loaded:
df = loaded['electricity_prices']
print(f"\nElectricity Prices (by region):")
for region in df['region'].unique():
region_data = df[df['region'] == region]
print(f" {region:3s} Day-Ahead: €{region_data['day_ahead_price'].mean():.2f} avg, €{region_data['day_ahead_price'].max():.2f} max")
print(f" Real-Time: €{region_data['real_time_price'].mean():.2f} avg, €{region_data['real_time_price'].max():.2f} max")
if 'battery_capacity' in loaded:
df = loaded['battery_capacity']
print(f"\nBattery Capacity:")
for battery_id in df['battery_id'].unique()[:3]:
bat_data = df[df['battery_id'] == battery_id]
print(f" {battery_id} Cap: {bat_data['capacity_mwh'].iloc[0]:.1f} MWh, Avg Charge: {bat_data['charge_level_mwh'].mean():.1f} MWh")
if 'bitcoin_mining' in loaded:
df = loaded['bitcoin_mining']
print(f"\nBitcoin Mining:")
print(f" BTC Price: €{df['btc_price_eur'].mean():.2f} avg, €{df['btc_price_eur'].max():.2f} max")
print(f" Hashrate: {df['hashrate_ths'].mean():.2f} EH/s avg")
print(f" Power Demand: {df['power_demand_mw'].mean():.1f} MW avg")
print(f" Revenue: €{df['revenue_eur_per_mwh'].mean():.2f} /MWh avg")
print(f" Profit: €{df['profit_eur_per_mwh'].mean():.2f} /MWh avg")
if 'transmission_capacity' in loaded:
df = loaded['transmission_capacity']
print(f"\nTransmission Capacity:")
print(f" Total interconnectors: {len(df)}")
print(f" Avg capacity: {df['capacity_mw'].mean():.0f} MW")
print(f" Avg efficiency: {df['efficiency'].mean():.2%}")
if 'transmission_cost' in loaded:
df = loaded['transmission_cost']
print(f"\nTransmission Cost:")
print(f" Total paths: {len(df)}")
print(f" Avg cost: €{df['cost_eur_mwh'].mean():.2f} /MWh")
print(f" Avg loss: {df['loss_percent'].mean():.2f}%")
if 'data_centers' in loaded:
df = loaded['data_centers']
print(f"\nData Centers:")
for dc_id in df['data_center_id'].unique():
dc_data = df[df['data_center_id'] == dc_id]
client = dc_data['client_type'].iloc[0]
print(f" {dc_id} Client: {client:8s}, Avg Demand: {dc_data['power_demand_mw'].mean():.1f} MW")
print("\n5. METADATA SUMMARY")
print("-" * 60)
meta_file = metadata_dir / 'final_metadata.json'
if meta_file.exists():
with open(meta_file) as f:
meta = json.load(f)
print(f" Total datasets: {meta['total_datasets']}")
print(f" Total size: {meta['total_size_mb']:.2f} MB")
total_rows = sum(d['rows'] for d in meta['datasets'].values())
print(f" Total rows: {total_rows:,}")
print(f" Generated at: {meta['processed_at']}")
print("\n" + "="*60)
print("TEST COMPLETE - ALL DATA LOADED SUCCESSFULLY")
print("="*60)
return loaded
if __name__ == '__main__':
main()