#!/usr/bin/env python3 """ Quick test script to verify and explore the generated test data. """ import pandas as pd from pathlib import Path import json def main(): data_dir = Path(__file__).parent.parent / 'data' / 'processed' metadata_dir = Path(__file__).parent.parent / 'data' / 'metadata' print("="*60) print("ENERGY TEST DATA - QUICK VERIFICATION") print("="*60) datasets = [ 'electricity_prices', 'battery_capacity', 'renewable_generation', 'conventional_generation', 'load_profiles', 'data_centers', 'bitcoin_mining' ] print("\n1. LOADING DATASETS") print("-" * 60) loaded = {} for name in datasets: file_path = data_dir / f'{name}.parquet' if file_path.exists(): loaded[name] = pd.read_parquet(file_path) print(f" ✓ {name:25} {len(loaded[name]):>10,} rows") else: print(f" ✗ {name:25} NOT FOUND") print(f"\nTotal datasets loaded: {len(loaded)}/7") print("\n2. SAMPLE DATA PREVIEWS") print("-" * 60) for name, df in loaded.items(): print(f"\n{name}:") print(df.head(3).to_string(index=False)) print(f" Columns: {list(df.columns)}") print("\n3. TIME RANGE VERIFICATION") print("-" * 60) for name, df in loaded.items(): if 'timestamp' in df.columns: df['timestamp'] = pd.to_datetime(df['timestamp']) min_time = df['timestamp'].min() max_time = df['timestamp'].max() duration = (max_time - min_time).total_seconds() / 3600 print(f" {name:25} {min_time} to {max_time}") print(f" Duration: {duration:.2f} hours") print("\n4. KEY STATISTICS") print("-" * 60) if 'electricity_prices' in loaded: df = loaded['electricity_prices'] print(f"\nElectricity Prices (by region):") for region in df['region'].unique(): region_data = df[df['region'] == region] print(f" {region:3s} Day-Ahead: €{region_data['day_ahead_price'].mean():.2f} avg, €{region_data['day_ahead_price'].max():.2f} max") print(f" Real-Time: €{region_data['real_time_price'].mean():.2f} avg, €{region_data['real_time_price'].max():.2f} max") if 'battery_capacity' in loaded: df = loaded['battery_capacity'] print(f"\nBattery Capacity:") for battery_id in df['battery_id'].unique()[:3]: bat_data = df[df['battery_id'] == battery_id] print(f" {battery_id} Cap: {bat_data['capacity_mwh'].iloc[0]:.1f} MWh, Avg Charge: {bat_data['charge_level_mwh'].mean():.1f} MWh") if 'bitcoin_mining' in loaded: df = loaded['bitcoin_mining'] print(f"\nBitcoin Mining:") print(f" BTC Price: ${df['btc_price_usd'].mean():.2f} avg, ${df['btc_price_usd'].max():.2f} max") print(f" Hashrate: {df['hashrate_ths'].mean():.2f} EH/s avg") print(f" Profitability: ${df['mining_profitability'].mean():.4f} /TH/day avg") if 'data_centers' in loaded: df = loaded['data_centers'] print(f"\nData Centers:") for dc_id in df['data_center_id'].unique(): dc_data = df[df['data_center_id'] == dc_id] client = dc_data['client_type'].iloc[0] print(f" {dc_id} Client: {client:8s}, Avg Demand: {dc_data['power_demand_mw'].mean():.1f} MW") print("\n5. METADATA SUMMARY") print("-" * 60) meta_file = metadata_dir / 'final_metadata.json' if meta_file.exists(): with open(meta_file) as f: meta = json.load(f) print(f" Total datasets: {meta['total_datasets']}") print(f" Total size: {meta['total_size_mb']:.2f} MB") total_rows = sum(d['rows'] for d in meta['datasets'].values()) print(f" Total rows: {total_rows:,}") print(f" Generated at: {meta['processed_at']}") print("\n" + "="*60) print("TEST COMPLETE - ALL DATA LOADED SUCCESSFULLY") print("="*60) return loaded if __name__ == '__main__': main()