# Data Regression Testing Framework - Example Configuration # This file demonstrates all available configuration options # ============================================================================ # DATABASE PAIRS # Define baseline (production) and target (test) database connections # ============================================================================ database_pairs: # Example 1: Data Warehouse Comparison - name: "DWH_Comparison" enabled: true description: "Compare production and test data warehouse" baseline: server: "" database: "" timeout: connection: 30 # seconds query: 300 # seconds (5 minutes) target: server: "" database: "" timeout: connection: 30 query: 300 # Example 2: Operational Database Comparison (disabled) - name: "OPS_Comparison" enabled: false description: "Compare operational databases (currently disabled)" baseline: server: "" database: "" target: server: "" database: "" # ============================================================================ # COMPARISON SETTINGS # Configure what types of comparisons to perform # ============================================================================ comparison: # Comparison mode: "health_check" or "full" # - health_check: Quick validation (row counts, schema) # - full: Comprehensive validation (includes aggregates) mode: "health_check" # Row Count Comparison row_count: enabled: true tolerance_percent: 0.0 # 0% = exact match required # Examples: # 0.0 = exact match # 0.1 = allow 0.1% difference # 1.0 = allow 1% difference # Schema Comparison schema: enabled: true checks: column_names: true # Verify column names match data_types: true # Verify data types match nullable: true # Verify nullable constraints match primary_keys: true # Verify primary keys match # Aggregate Comparison (sums of numeric columns) aggregates: enabled: true tolerance_percent: 0.01 # 0.01% tolerance for rounding differences # Note: Only applies when mode is "full" # ============================================================================ # TABLES TO COMPARE # List all tables to include in comparison # ============================================================================ tables: # Example 1: Fact table with aggregates - schema: "dbo" name: "FactTable1" enabled: true expected_in_target: true aggregate_columns: - "Amount1" - "Amount2" - "Amount3" - "Quantity" notes: "Example fact table with numeric aggregates" # Example 2: Dimension table without aggregates - schema: "dbo" name: "DimTable1" enabled: true expected_in_target: true aggregate_columns: [] notes: "Example dimension table - no numeric aggregates" # Example 3: Table expected to be missing in target - schema: "dbo" name: "TempTable1" enabled: true expected_in_target: false aggregate_columns: [] notes: "Example temporary table - should not exist in target" # Example 4: Disabled table (skipped during comparison) - schema: "dbo" name: "Table4" enabled: false expected_in_target: true aggregate_columns: [] notes: "Example disabled table - excluded from comparison" # Example 5: Table with multiple schemas - schema: "staging" name: "StagingTable1" enabled: true expected_in_target: true aggregate_columns: - "Amount" notes: "Example staging table" # Example 6: Large fact table - schema: "dbo" name: "FactTable2" enabled: true expected_in_target: true aggregate_columns: - "Amount" - "Fee" - "NetAmount" notes: "Example high-volume fact table" # Example 7: Reference data table - schema: "ref" name: "RefTable1" enabled: true expected_in_target: true aggregate_columns: [] notes: "Example reference data table" # ============================================================================ # REPORTING SETTINGS # Configure report generation and output # ============================================================================ reporting: # Output directory for reports (use relative path or set via environment variable) output_dir: "./reports" # Output directory for investigation reports (use relative path or set via environment variable) investigation_dir: "./investigation_reports" # Report formats to generate formats: html: true # Rich HTML report with styling csv: true # CSV report for Excel/analysis pdf: false # PDF report (requires weasyprint) # Report naming filename_prefix: "regression_test" include_timestamp: true # Append YYYYMMDD_HHMMSS to filename # Report content options include_passed: true # Include passed checks in report include_warnings: true # Include warnings in report summary_only: false # Only show summary (no details) # ============================================================================ # LOGGING SETTINGS # Configure logging behavior # ============================================================================ logging: # Log level: DEBUG, INFO, WARNING, ERROR, CRITICAL level: "INFO" # Log output directory (use relative path or set via environment variable) output_dir: "./logs" # Log file naming filename_prefix: "drt" include_timestamp: true # Console output console: enabled: true level: "INFO" colored: true # Use colored output (if terminal supports it) # File output file: enabled: true level: "DEBUG" max_size_mb: 10 # Rotate after 10MB backup_count: 5 # Keep 5 backup files # ============================================================================ # EXECUTION SETTINGS # Configure execution behavior # ============================================================================ execution: # Parallel execution (future feature) parallel: enabled: false max_workers: 4 # Retry settings for transient failures retry: enabled: true max_attempts: 3 delay_seconds: 5 # Performance settings performance: batch_size: 1000 # Rows per batch for large queries use_nolock: true # Use NOLOCK hints (read uncommitted) connection_pooling: true # ============================================================================ # FILTERS # Global filters applied to all tables # ============================================================================ filters: # Schema filters (include/exclude patterns) schemas: include: - "dbo" - "staging" - "ref" exclude: - "sys" - "temp" # Table name filters (wildcard patterns) tables: include: - "*" # Include all tables exclude: - "tmp_*" # Exclude temporary tables - "backup_*" # Exclude backup tables - "archive_*" # Exclude archive tables # Column filters for aggregate comparisons columns: exclude_patterns: - "*_id" # Exclude ID columns - "*_key" # Exclude key columns - "created_*" # Exclude audit columns - "modified_*" # Exclude audit columns # ============================================================================ # NOTIFICATIONS (future feature) # Configure notifications for test results # ============================================================================ notifications: enabled: false # Email notifications email: enabled: false smtp_server: "smtp.company.com" smtp_port: 587 from_address: "drt@company.com" to_addresses: - "qa-team@company.com" on_failure_only: true # Slack notifications slack: enabled: false webhook_url: "https://hooks.slack.com/services/YOUR/WEBHOOK/URL" channel: "#qa-alerts" on_failure_only: true # ============================================================================ # METADATA # Optional metadata about this configuration # ============================================================================ metadata: version: "1.0" created_by: "QA Team" created_date: "2024-01-15" description: "Standard regression test configuration for DWH migration" project: "DWH Migration Phase 2" environment: "UAT" tags: - "migration" - "data-quality" - "regression"