drt/config.example.yaml

# Data Regression Testing Framework - Example Configuration
# This file demonstrates all available configuration options

# ============================================================================
# DATABASE PAIRS
# Define baseline (production) and target (test) database connections
# ============================================================================
database_pairs:
  # Example 1: Data Warehouse Comparison
  - name: "DWH_Comparison"
    enabled: true
    description: "Compare production and test data warehouse"
    baseline:
      server: "<YOUR_SERVER_NAME>"
      database: "<YOUR_BASELINE_DB>"
      timeout:
        connection: 30  # seconds
        query: 300      # seconds (5 minutes)
    target:
      server: "<YOUR_SERVER_NAME>"
      database: "<YOUR_TARGET_DB>"
      timeout:
        connection: 30
        query: 300

  # Example 2: Operational Database Comparison (disabled)
  - name: "OPS_Comparison"
    enabled: false
    description: "Compare operational databases (currently disabled)"
    baseline:
      server: "<YOUR_SERVER_NAME>"
      database: "<YOUR_BASELINE_DB_2>"
    target:
      server: "<YOUR_SERVER_NAME>"
      database: "<YOUR_TARGET_DB_2>"

# ============================================================================
# COMPARISON SETTINGS
# Configure what types of comparisons to perform
# ============================================================================
comparison:
  # Comparison mode: "health_check" or "full"
  # - health_check: Quick validation (row counts, schema)
  # - full: Comprehensive validation (includes aggregates)
  mode: "health_check"

  # Row Count Comparison
  row_count:
    enabled: true
    tolerance_percent: 0.0  # 0% = exact match required
    # Examples:
    # 0.0 = exact match
    # 0.1 = allow 0.1% difference
    # 1.0 = allow 1% difference

  # Schema Comparison
  schema:
    enabled: true
    checks:
      column_names: true    # Verify column names match
      data_types: true      # Verify data types match
      nullable: true        # Verify nullable constraints match
      primary_keys: true    # Verify primary keys match

  # Aggregate Comparison (sums of numeric columns)
  aggregates:
    enabled: true
    tolerance_percent: 0.01  # 0.01% tolerance for rounding differences
    # Note: Only applies when mode is "full"

# ============================================================================
# TABLES TO COMPARE
# List all tables to include in comparison
# ============================================================================
tables:
  # Example 1: Fact table with aggregates
  - schema: "dbo"
    name: "FactTable1"
    enabled: true
    expected_in_target: true
    aggregate_columns:
      - "Amount1"
      - "Amount2"
      - "Amount3"
      - "Quantity"
    notes: "Example fact table with numeric aggregates"

  # Example 2: Dimension table without aggregates
  - schema: "dbo"
    name: "DimTable1"
    enabled: true
    expected_in_target: true
    aggregate_columns: []
    notes: "Example dimension table - no numeric aggregates"

  # Example 3: Table expected to be missing in target
  - schema: "dbo"
    name: "TempTable1"
    enabled: true
    expected_in_target: false
    aggregate_columns: []
    notes: "Example temporary table - should not exist in target"

  # Example 4: Disabled table (skipped during comparison)
  - schema: "dbo"
    name: "Table4"
    enabled: false
    expected_in_target: true
    aggregate_columns: []
    notes: "Example disabled table - excluded from comparison"

  # Example 5: Table with multiple schemas
  - schema: "staging"
    name: "StagingTable1"
    enabled: true
    expected_in_target: true
    aggregate_columns:
      - "Amount"
    notes: "Example staging table"

  # Example 6: Large fact table
  - schema: "dbo"
    name: "FactTable2"
    enabled: true
    expected_in_target: true
    aggregate_columns:
      - "Amount"
      - "Fee"
      - "NetAmount"
    notes: "Example high-volume fact table"

  # Example 7: Reference data table
  - schema: "ref"
    name: "RefTable1"
    enabled: true
    expected_in_target: true
    aggregate_columns: []
    notes: "Example reference data table"

# ============================================================================
# REPORTING SETTINGS
# Configure report generation and output
# ============================================================================
reporting:
  # Output directory for reports (use relative path or set via environment variable)
  output_dir: "./reports"

  # Output directory for investigation reports (use relative path or set via environment variable)
  investigation_dir: "./investigation_reports"

  # Report formats to generate
  formats:
    html: true   # Rich HTML report with styling
    csv: true    # CSV report for Excel/analysis
    pdf: false   # PDF report (requires weasyprint)

  # Report naming
  filename_prefix: "regression_test"
  include_timestamp: true  # Append YYYYMMDD_HHMMSS to filename

  # Report content options
  include_passed: true     # Include passed checks in report
  include_warnings: true   # Include warnings in report
  summary_only: false      # Only show summary (no details)

# ============================================================================
# LOGGING SETTINGS
# Configure logging behavior
# ============================================================================
logging:
  # Log level: DEBUG, INFO, WARNING, ERROR, CRITICAL
  level: "INFO"

  # Log output directory (use relative path or set via environment variable)
  output_dir: "./logs"

  # Log file naming
  filename_prefix: "drt"
  include_timestamp: true

  # Console output
  console:
    enabled: true
    level: "INFO"
    colored: true  # Use colored output (if terminal supports it)

  # File output
  file:
    enabled: true
    level: "DEBUG"
    max_size_mb: 10      # Rotate after 10MB
    backup_count: 5      # Keep 5 backup files

# ============================================================================
# EXECUTION SETTINGS
# Configure execution behavior
# ============================================================================
execution:
  # Parallel execution (future feature)
  parallel:
    enabled: false
    max_workers: 4

  # Retry settings for transient failures
  retry:
    enabled: true
    max_attempts: 3
    delay_seconds: 5

  # Performance settings
  performance:
    batch_size: 1000        # Rows per batch for large queries
    use_nolock: true        # Use NOLOCK hints (read uncommitted)
    connection_pooling: true

# ============================================================================
# FILTERS
# Global filters applied to all tables
# ============================================================================
filters:
  # Schema filters (include/exclude patterns)
  schemas:
    include:
      - "dbo"
      - "staging"
      - "ref"
    exclude:
      - "sys"
      - "temp"

  # Table name filters (wildcard patterns)
  tables:
    include:
      - "*"  # Include all tables
    exclude:
      - "tmp_*"      # Exclude temporary tables
      - "backup_*"   # Exclude backup tables
      - "archive_*"  # Exclude archive tables

  # Column filters for aggregate comparisons
  columns:
    exclude_patterns:
      - "*_id"       # Exclude ID columns
      - "*_key"      # Exclude key columns
      - "created_*"  # Exclude audit columns
      - "modified_*" # Exclude audit columns

# ============================================================================
# NOTIFICATIONS (future feature)
# Configure notifications for test results
# ============================================================================
notifications:
  enabled: false

  # Email notifications
  email:
    enabled: false
    smtp_server: "smtp.company.com"
    smtp_port: 587
    from_address: "drt@company.com"
    to_addresses:
      - "qa-team@company.com"
    on_failure_only: true

  # Slack notifications
  slack:
    enabled: false
    webhook_url: "https://hooks.slack.com/services/YOUR/WEBHOOK/URL"
    channel: "#qa-alerts"
    on_failure_only: true

# ============================================================================
# METADATA
# Optional metadata about this configuration
# ============================================================================
metadata:
  version: "1.0"
  created_by: "QA Team"
  created_date: "2024-01-15"
  description: "Standard regression test configuration for DWH migration"
  project: "DWH Migration Phase 2"
  environment: "UAT"
  tags:
    - "migration"
    - "data-quality"
    - "regression"