Add test_data.py for data verification
Add utility script to quickly verify and explore generated test data. Provides data loading, sample previews, time range checks, and key statistics for all datasets.
This commit is contained in:
115
test/test_data.py
Normal file
115
test/test_data.py
Normal file
@@ -0,0 +1,115 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Quick test script to verify and explore the generated test data.
|
||||
"""
|
||||
|
||||
import pandas as pd
|
||||
from pathlib import Path
|
||||
import json
|
||||
|
||||
def main():
|
||||
data_dir = Path(__file__).parent.parent / 'data' / 'processed'
|
||||
metadata_dir = Path(__file__).parent.parent / 'data' / 'metadata'
|
||||
|
||||
print("="*60)
|
||||
print("ENERGY TEST DATA - QUICK VERIFICATION")
|
||||
print("="*60)
|
||||
|
||||
datasets = [
|
||||
'electricity_prices',
|
||||
'battery_capacity',
|
||||
'renewable_generation',
|
||||
'conventional_generation',
|
||||
'load_profiles',
|
||||
'data_centers',
|
||||
'bitcoin_mining'
|
||||
]
|
||||
|
||||
print("\n1. LOADING DATASETS")
|
||||
print("-" * 60)
|
||||
|
||||
loaded = {}
|
||||
for name in datasets:
|
||||
file_path = data_dir / f'{name}.parquet'
|
||||
if file_path.exists():
|
||||
loaded[name] = pd.read_parquet(file_path)
|
||||
print(f" ✓ {name:25} {len(loaded[name]):>10,} rows")
|
||||
else:
|
||||
print(f" ✗ {name:25} NOT FOUND")
|
||||
|
||||
print(f"\nTotal datasets loaded: {len(loaded)}/7")
|
||||
|
||||
print("\n2. SAMPLE DATA PREVIEWS")
|
||||
print("-" * 60)
|
||||
|
||||
for name, df in loaded.items():
|
||||
print(f"\n{name}:")
|
||||
print(df.head(3).to_string(index=False))
|
||||
print(f" Columns: {list(df.columns)}")
|
||||
|
||||
print("\n3. TIME RANGE VERIFICATION")
|
||||
print("-" * 60)
|
||||
|
||||
for name, df in loaded.items():
|
||||
if 'timestamp' in df.columns:
|
||||
df['timestamp'] = pd.to_datetime(df['timestamp'])
|
||||
min_time = df['timestamp'].min()
|
||||
max_time = df['timestamp'].max()
|
||||
duration = (max_time - min_time).total_seconds() / 3600
|
||||
print(f" {name:25} {min_time} to {max_time}")
|
||||
print(f" Duration: {duration:.2f} hours")
|
||||
|
||||
print("\n4. KEY STATISTICS")
|
||||
print("-" * 60)
|
||||
|
||||
if 'electricity_prices' in loaded:
|
||||
df = loaded['electricity_prices']
|
||||
print(f"\nElectricity Prices (by region):")
|
||||
for region in df['region'].unique():
|
||||
region_data = df[df['region'] == region]
|
||||
print(f" {region:3s} Day-Ahead: €{region_data['day_ahead_price'].mean():.2f} avg, €{region_data['day_ahead_price'].max():.2f} max")
|
||||
print(f" Real-Time: €{region_data['real_time_price'].mean():.2f} avg, €{region_data['real_time_price'].max():.2f} max")
|
||||
|
||||
if 'battery_capacity' in loaded:
|
||||
df = loaded['battery_capacity']
|
||||
print(f"\nBattery Capacity:")
|
||||
for battery_id in df['battery_id'].unique()[:3]:
|
||||
bat_data = df[df['battery_id'] == battery_id]
|
||||
print(f" {battery_id} Cap: {bat_data['capacity_mwh'].iloc[0]:.1f} MWh, Avg Charge: {bat_data['charge_level_mwh'].mean():.1f} MWh")
|
||||
|
||||
if 'bitcoin_mining' in loaded:
|
||||
df = loaded['bitcoin_mining']
|
||||
print(f"\nBitcoin Mining:")
|
||||
print(f" BTC Price: ${df['btc_price_usd'].mean():.2f} avg, ${df['btc_price_usd'].max():.2f} max")
|
||||
print(f" Hashrate: {df['hashrate_ths'].mean():.2f} EH/s avg")
|
||||
print(f" Profitability: ${df['mining_profitability'].mean():.4f} /TH/day avg")
|
||||
|
||||
if 'data_centers' in loaded:
|
||||
df = loaded['data_centers']
|
||||
print(f"\nData Centers:")
|
||||
for dc_id in df['data_center_id'].unique():
|
||||
dc_data = df[df['data_center_id'] == dc_id]
|
||||
client = dc_data['client_type'].iloc[0]
|
||||
print(f" {dc_id} Client: {client:8s}, Avg Demand: {dc_data['power_demand_mw'].mean():.1f} MW")
|
||||
|
||||
print("\n5. METADATA SUMMARY")
|
||||
print("-" * 60)
|
||||
|
||||
meta_file = metadata_dir / 'final_metadata.json'
|
||||
if meta_file.exists():
|
||||
with open(meta_file) as f:
|
||||
meta = json.load(f)
|
||||
print(f" Total datasets: {meta['total_datasets']}")
|
||||
print(f" Total size: {meta['total_size_mb']:.2f} MB")
|
||||
total_rows = sum(d['rows'] for d in meta['datasets'].values())
|
||||
print(f" Total rows: {total_rows:,}")
|
||||
print(f" Generated at: {meta['processed_at']}")
|
||||
|
||||
print("\n" + "="*60)
|
||||
print("TEST COMPLETE - ALL DATA LOADED SUCCESSFULLY")
|
||||
print("="*60)
|
||||
|
||||
return loaded
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
Reference in New Issue
Block a user