Initial commit
This commit is contained in:
97
.gitignore
vendored
Normal file
97
.gitignore
vendored
Normal file
@@ -0,0 +1,97 @@
|
||||
# Security: Sensitive Files and Credentials
|
||||
# Add these patterns to your .gitignore to prevent accidental commits of sensitive data
|
||||
|
||||
# Environment variables
|
||||
.env
|
||||
.env.local
|
||||
.env.*.local
|
||||
|
||||
# Configuration files with credentials
|
||||
config.*.yaml
|
||||
!config.example.yaml
|
||||
!config.quickstart.yaml
|
||||
!config.test.yaml
|
||||
|
||||
# Logs (may contain sensitive information)
|
||||
logs/
|
||||
*.log
|
||||
|
||||
# Reports and analysis output
|
||||
reports/
|
||||
investigation_reports/
|
||||
analysis/
|
||||
|
||||
# IDE and editor files
|
||||
.vscode/
|
||||
.idea/
|
||||
*.swp
|
||||
*.swo
|
||||
*~
|
||||
|
||||
# Python
|
||||
__pycache__/
|
||||
*.py[cod]
|
||||
*$py.class
|
||||
*.so
|
||||
.Python
|
||||
build/
|
||||
develop-eggs/
|
||||
dist/
|
||||
downloads/
|
||||
eggs/
|
||||
.eggs/
|
||||
lib/
|
||||
lib64/
|
||||
parts/
|
||||
sdist/
|
||||
var/
|
||||
wheels/
|
||||
*.egg-info/
|
||||
.installed.cfg
|
||||
*.egg
|
||||
|
||||
# Virtual environments
|
||||
venv/
|
||||
ENV/
|
||||
env/
|
||||
|
||||
# Testing
|
||||
.pytest_cache/
|
||||
.coverage
|
||||
htmlcov/
|
||||
|
||||
# OS
|
||||
.DS_Store
|
||||
Thumbs.db
|
||||
|
||||
# Temporary files
|
||||
*.tmp
|
||||
*.bak
|
||||
*.backup
|
||||
*~
|
||||
|
||||
# Database files
|
||||
*.db
|
||||
*.sqlite
|
||||
*.sqlite3
|
||||
|
||||
# Docker
|
||||
.dockerignore
|
||||
docker-compose.override.yml
|
||||
|
||||
# Credentials and secrets (CRITICAL)
|
||||
**/secrets/
|
||||
**/credentials/
|
||||
**/.aws/
|
||||
**/.azure/
|
||||
**/.gcp/
|
||||
**/private_key*
|
||||
**/secret_key*
|
||||
**/api_key*
|
||||
**/token*
|
||||
**/password*
|
||||
|
||||
# Configuration with real values
|
||||
config.prod.yaml
|
||||
config.production.yaml
|
||||
config.live.yaml
|
||||
21
LICENSE
Executable file
21
LICENSE
Executable file
@@ -0,0 +1,21 @@
|
||||
MIT License
|
||||
|
||||
Copyright (c) 2024 QA Engineering Team
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in all
|
||||
copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
SOFTWARE.
|
||||
741
README.md
Executable file
741
README.md
Executable file
@@ -0,0 +1,741 @@
|
||||
# Data Regression Testing Framework
|
||||
|
||||
A comprehensive framework for validating data integrity during code migration and system updates by comparing data outputs between Baseline (Production) and Target (Test) SQL Server databases.
|
||||
|
||||
## ✨ Features
|
||||
|
||||
- **Automated Discovery** - Scan databases and auto-generate configuration files
|
||||
- **Multiple Comparison Types** - Row counts, schema validation, aggregate sums
|
||||
- **Investigation Queries** - Execute diagnostic SQL queries from regression analysis
|
||||
- **Flexible Configuration** - YAML-based setup with extensive customization
|
||||
- **Rich Reporting** - HTML, CSV, and PDF reports with detailed results
|
||||
- **Windows Authentication** - Secure, credential-free database access
|
||||
- **Read-Only Operations** - All queries are SELECT-only for safety
|
||||
- **Comprehensive Logging** - Detailed execution logs with timestamps
|
||||
|
||||
## 🚀 Quick Start
|
||||
|
||||
### Prerequisites
|
||||
|
||||
- Python 3.9+
|
||||
- Microsoft ODBC Driver 17+ for SQL Server
|
||||
- Windows environment with domain authentication (or Linux with Kerberos)
|
||||
- Read access to SQL Server databases
|
||||
|
||||
### Installation
|
||||
|
||||
```bash
|
||||
# Clone the repository
|
||||
git clone <repository-url>
|
||||
cd data_regression_testing
|
||||
|
||||
# Create virtual environment
|
||||
python -m venv venv
|
||||
source venv/bin/activate # On Windows: venv\Scripts\activate
|
||||
|
||||
# Install the framework
|
||||
pip install -e .
|
||||
|
||||
# Verify installation
|
||||
drt --version
|
||||
```
|
||||
|
||||
### Basic Usage
|
||||
|
||||
```bash
|
||||
# 1. Discover tables from baseline database
|
||||
drt discover --server <YOUR_SERVER> --database <YOUR_BASELINE_DB> --output config.yaml
|
||||
|
||||
# 2. Edit config.yaml to add target database connection
|
||||
|
||||
# 3. Validate configuration
|
||||
drt validate --config config.yaml
|
||||
|
||||
# 4. Run comparison
|
||||
drt compare --config config.yaml
|
||||
|
||||
# 5. (Optional) Investigate regression issues
|
||||
drt investigate --analysis-dir analysis/output_<TIMESTAMP>/ --config config.yaml
|
||||
```
|
||||
|
||||
## 📦 Platform-Specific Installation
|
||||
|
||||
### Windows
|
||||
|
||||
1. Install Python 3.9+ from https://www.python.org/downloads/
|
||||
2. ODBC Driver is usually pre-installed on Windows
|
||||
3. Install Framework:
|
||||
```cmd
|
||||
python -m venv venv
|
||||
venv\Scripts\activate
|
||||
pip install -e .
|
||||
```
|
||||
|
||||
### Linux (Debian/Ubuntu)
|
||||
|
||||
```bash
|
||||
# Install ODBC Driver
|
||||
curl -fsSL https://packages.microsoft.com/keys/microsoft.asc | sudo gpg --dearmor -o /usr/share/keyrings/microsoft-prod.gpg
|
||||
curl https://packages.microsoft.com/config/debian/12/prod.list | sudo tee /etc/apt/sources.list.d/mssql-release.list
|
||||
sudo apt-get update
|
||||
sudo ACCEPT_EULA=Y apt-get install -y msodbcsql18 unixodbc-dev
|
||||
|
||||
# Install Kerberos for Windows Authentication
|
||||
sudo apt-get install -y krb5-user
|
||||
|
||||
# Configure /etc/krb5.conf with your domain settings
|
||||
# Then obtain ticket: kinit username@YOUR_DOMAIN.COM
|
||||
|
||||
# Install framework
|
||||
python3 -m venv venv
|
||||
source venv/bin/activate
|
||||
pip install -e .
|
||||
```
|
||||
|
||||
## 📋 Commands
|
||||
|
||||
### Discovery
|
||||
|
||||
Automatically scan databases and generate configuration files.
|
||||
|
||||
```bash
|
||||
drt discover --server <YOUR_SERVER> --database <YOUR_DATABASE> [OPTIONS]
|
||||
```
|
||||
|
||||
**Options:**
|
||||
- `--server TEXT` - SQL Server hostname (required)
|
||||
- `--database TEXT` - Database name (required)
|
||||
- `--output, -o TEXT` - Output file (default: config_discovered.yaml)
|
||||
- `--schemas TEXT` - Specific schemas to include
|
||||
- `--verbose, -v` - Enable verbose output
|
||||
|
||||
### Validate
|
||||
|
||||
Validate configuration file syntax and database connectivity.
|
||||
|
||||
```bash
|
||||
drt validate --config <CONFIG_FILE> [OPTIONS]
|
||||
```
|
||||
|
||||
**Options:**
|
||||
- `--config, -c PATH` - Configuration file (required)
|
||||
- `--verbose, -v` - Enable verbose output
|
||||
|
||||
### Compare
|
||||
|
||||
Execute data comparison between baseline and target databases.
|
||||
|
||||
```bash
|
||||
drt compare --config <CONFIG_FILE> [OPTIONS]
|
||||
```
|
||||
|
||||
**Options:**
|
||||
- `--config, -c PATH` - Configuration file (required)
|
||||
- `--verbose, -v` - Enable verbose output
|
||||
- `--dry-run` - Show what would be compared without executing
|
||||
|
||||
### Investigate
|
||||
|
||||
Execute diagnostic queries from regression analysis.
|
||||
|
||||
```bash
|
||||
drt investigate --analysis-dir <ANALYSIS_DIR> --config <CONFIG_FILE> [OPTIONS]
|
||||
```
|
||||
|
||||
**Options:**
|
||||
- `--analysis-dir, -a PATH` - Analysis output directory containing `*_investigate.sql` files (required)
|
||||
- `--config, -c PATH` - Configuration file (required)
|
||||
- `--output-dir, -o PATH` - Output directory for reports (default: ./investigation_reports)
|
||||
- `--verbose, -v` - Enable verbose output
|
||||
- `--dry-run` - Show what would be executed without running
|
||||
|
||||
**Example:**
|
||||
```bash
|
||||
drt investigate -a analysis/output_20251209_184032/ -c config.yaml
|
||||
drt investigate -a analysis/output_20251209_184032/ -c config.yaml -o ./my_reports
|
||||
```
|
||||
|
||||
**What it does:**
|
||||
- Discovers all `*_investigate.sql` files in the analysis directory
|
||||
- Parses SQL files (handles markdown, multiple queries per file)
|
||||
- Executes queries on both baseline and target databases
|
||||
- Handles errors gracefully (continues on failures)
|
||||
- Generates HTML and CSV reports with side-by-side comparisons
|
||||
|
||||
## ⚙️ Configuration
|
||||
|
||||
### Database Connections
|
||||
|
||||
```yaml
|
||||
database_pairs:
|
||||
- name: "DWH_Comparison"
|
||||
enabled: true
|
||||
baseline:
|
||||
server: "<YOUR_SERVER>"
|
||||
database: "<YOUR_BASELINE_DB>"
|
||||
timeout:
|
||||
connection: 30
|
||||
query: 300
|
||||
target:
|
||||
server: "<YOUR_SERVER>"
|
||||
database: "<YOUR_TARGET_DB>"
|
||||
```
|
||||
|
||||
### Comparison Settings
|
||||
|
||||
```yaml
|
||||
comparison:
|
||||
mode: "health_check" # or "full"
|
||||
row_count:
|
||||
enabled: true
|
||||
tolerance_percent: 0.0
|
||||
schema:
|
||||
enabled: true
|
||||
checks:
|
||||
column_names: true
|
||||
data_types: true
|
||||
aggregates:
|
||||
enabled: true
|
||||
tolerance_percent: 0.01
|
||||
```
|
||||
|
||||
### Table Configuration
|
||||
|
||||
```yaml
|
||||
tables:
|
||||
- schema: "dbo"
|
||||
name: "FactTable1"
|
||||
enabled: true
|
||||
expected_in_target: true
|
||||
aggregate_columns:
|
||||
- "Amount"
|
||||
- "Quantity"
|
||||
```
|
||||
|
||||
### Output Directories
|
||||
|
||||
```yaml
|
||||
reporting:
|
||||
output_dir: "./reports"
|
||||
investigation_dir: "./investigation_reports"
|
||||
|
||||
logging:
|
||||
output_dir: "./logs"
|
||||
|
||||
discovery:
|
||||
analysis_directory: "./analysis"
|
||||
```
|
||||
|
||||
**Benefits:**
|
||||
- Centralized storage of all output files
|
||||
- Easy cleanup and management of generated files
|
||||
- Configuration flexibility via YAML
|
||||
- Backward compatibility with CLI overrides
|
||||
|
||||
## 📊 Reports
|
||||
|
||||
### Comparison Reports
|
||||
|
||||
The framework generates comprehensive reports in multiple formats:
|
||||
|
||||
- **HTML Report** - Visual summary with color-coded results and detailed breakdowns
|
||||
- **CSV Report** - Machine-readable format for Excel or databases
|
||||
- **PDF Report** - Professional formatted output (requires weasyprint)
|
||||
|
||||
Reports are saved to `./reports/` with timestamps.
|
||||
|
||||
### Investigation Reports
|
||||
|
||||
- **HTML Report** - Interactive report with collapsible query results, side-by-side baseline vs target comparison
|
||||
- **CSV Report** - Flattened structure with one row per query execution
|
||||
|
||||
Investigation reports are saved to `./investigation_reports/` with timestamps.
|
||||
|
||||
## 🔄 Exit Codes
|
||||
|
||||
| Code | Meaning |
|
||||
|------|---------|
|
||||
| 0 | Success - all comparisons passed |
|
||||
| 1 | Failures detected - one or more FAIL results |
|
||||
| 2 | Execution error - configuration or connection issues |
|
||||
|
||||
## 🧪 Testing
|
||||
|
||||
### Docker Test Environment
|
||||
|
||||
```bash
|
||||
# Start test SQL Server containers
|
||||
bash test_data/setup_test_environment.sh
|
||||
|
||||
# Test discovery
|
||||
drt discover --server localhost,1433 --database TestDB_Baseline --output test.yaml
|
||||
|
||||
# Test comparison
|
||||
drt compare --config config.test.yaml
|
||||
|
||||
# Cleanup
|
||||
docker-compose -f docker-compose.test.yml down -v
|
||||
```
|
||||
|
||||
### Manual Testing
|
||||
|
||||
```bash
|
||||
# Connect to test databases (use SA_PASSWORD environment variable)
|
||||
docker exec -it drt-sqlserver-baseline /opt/mssql-tools/bin/sqlcmd -S localhost -U sa -P "$SA_PASSWORD"
|
||||
|
||||
# Run queries to verify data
|
||||
SELECT COUNT(*) FROM dbo.FactTable1;
|
||||
```
|
||||
|
||||
## 🚢 Deployment
|
||||
|
||||
### Scheduled Execution
|
||||
|
||||
**Windows Task Scheduler:**
|
||||
```batch
|
||||
@echo off
|
||||
cd C:\path\to\framework
|
||||
call venv\Scripts\activate.bat
|
||||
drt compare --config config.yaml
|
||||
if %ERRORLEVEL% NEQ 0 (
|
||||
echo Test failed with exit code %ERRORLEVEL%
|
||||
exit /b %ERRORLEVEL%
|
||||
)
|
||||
```
|
||||
|
||||
**Linux Cron:**
|
||||
```bash
|
||||
# Run daily at 2 AM
|
||||
0 2 * * * /path/to/venv/bin/drt compare --config /path/to/config.yaml >> /path/to/logs/cron.log 2>&1
|
||||
```
|
||||
|
||||
### Monitoring
|
||||
|
||||
```bash
|
||||
# Watch logs
|
||||
tail -f logs/drt_*.log
|
||||
|
||||
# Search for failures
|
||||
grep -i "FAIL\|ERROR" logs/drt_*.log
|
||||
```
|
||||
|
||||
## 🏗️ Architecture
|
||||
|
||||
```
|
||||
src/drt/
|
||||
├── cli/ # Command-line interface
|
||||
│ └── commands/ # CLI commands (compare, discover, validate, investigate)
|
||||
├── config/ # Configuration management
|
||||
├── database/ # Database connectivity (READ ONLY)
|
||||
├── models/ # Data models
|
||||
├── reporting/ # Report generators
|
||||
├── services/ # Business logic
|
||||
│ ├── checkers/ # Comparison checkers
|
||||
│ ├── investigation.py # Investigation service
|
||||
│ └── sql_parser.py # SQL file parser
|
||||
└── utils/ # Utilities
|
||||
```
|
||||
|
||||
## 🔒 Security
|
||||
|
||||
- **Windows Authentication Only** - No stored credentials
|
||||
- **Read-Only Operations** - All queries are SELECT-only
|
||||
- **Minimal Permissions** - Only requires db_datareader role
|
||||
- **No Data Logging** - Sensitive data never logged
|
||||
|
||||
## 🔧 Troubleshooting
|
||||
|
||||
### Connection Failed
|
||||
|
||||
```bash
|
||||
# Test connectivity
|
||||
drt discover --server <YOUR_SERVER> --database master
|
||||
|
||||
# Verify ODBC driver
|
||||
odbcinst -q -d
|
||||
|
||||
# Check permissions
|
||||
# User needs db_datareader role on target databases
|
||||
```
|
||||
|
||||
### Query Timeout
|
||||
|
||||
Increase timeout in configuration:
|
||||
```yaml
|
||||
baseline:
|
||||
timeout:
|
||||
query: 600 # 10 minutes
|
||||
```
|
||||
|
||||
### Linux Kerberos Issues
|
||||
|
||||
```bash
|
||||
# Check ticket
|
||||
klist
|
||||
|
||||
# Renew if expired
|
||||
kinit username@YOUR_DOMAIN.COM
|
||||
|
||||
# Verify ticket is valid
|
||||
klist
|
||||
```
|
||||
|
||||
## ⚡ Performance
|
||||
|
||||
### Diagnostic Logging
|
||||
|
||||
Enable verbose mode to see detailed timing:
|
||||
```bash
|
||||
drt compare --config config.yaml --verbose
|
||||
```
|
||||
|
||||
This shows:
|
||||
- Per-check timing (existence, row count, schema, aggregates)
|
||||
- Query execution times
|
||||
- Parallelization opportunities
|
||||
|
||||
### Optimization Tips
|
||||
|
||||
- Disable aggregate checks for surrogate keys
|
||||
- Increase query timeouts for large tables
|
||||
- Use table filtering to focus on critical tables
|
||||
- Consider parallel execution for multiple database pairs
|
||||
|
||||
## 👨💻 Development
|
||||
|
||||
### Getting Started
|
||||
|
||||
1. Fork the repository on GitHub
|
||||
2. Clone your fork locally:
|
||||
```bash
|
||||
git clone https://github.com/your-username/data_regression_testing.git
|
||||
cd data_regression_testing
|
||||
```
|
||||
3. Create a virtual environment:
|
||||
```bash
|
||||
python -m venv venv
|
||||
source venv/bin/activate # On Windows: venv\Scripts\activate
|
||||
```
|
||||
4. Install dependencies:
|
||||
```bash
|
||||
pip install -r requirements.txt
|
||||
pip install -e .
|
||||
```
|
||||
5. Install development dependencies:
|
||||
```bash
|
||||
pip install pytest pytest-cov black flake8 mypy
|
||||
```
|
||||
|
||||
### Development Workflow
|
||||
|
||||
#### 1. Create a Branch
|
||||
|
||||
```bash
|
||||
git checkout -b feature/your-feature-name
|
||||
# or
|
||||
git checkout -b bugfix/issue-description
|
||||
```
|
||||
|
||||
#### 2. Make Your Changes
|
||||
|
||||
- Write clean, readable code
|
||||
- Follow the existing code style
|
||||
- Add docstrings to all functions and classes
|
||||
- Update documentation as needed
|
||||
|
||||
#### 3. Run Tests
|
||||
|
||||
```bash
|
||||
# All tests
|
||||
pytest
|
||||
|
||||
# With coverage
|
||||
pytest --cov=src/drt --cov-report=html
|
||||
|
||||
# Specific test file
|
||||
pytest tests/test_models.py
|
||||
```
|
||||
|
||||
#### 4. Code Quality Checks
|
||||
|
||||
```bash
|
||||
# Format code with black
|
||||
black src/ tests/
|
||||
|
||||
# Check code style with flake8
|
||||
flake8 src/ tests/
|
||||
|
||||
# Type checking with mypy
|
||||
mypy src/
|
||||
```
|
||||
|
||||
#### 5. Commit Your Changes
|
||||
|
||||
Write clear, descriptive commit messages:
|
||||
|
||||
```bash
|
||||
git add .
|
||||
git commit -m "Add feature: description of your changes"
|
||||
```
|
||||
|
||||
**Commit message guidelines:**
|
||||
- Use present tense ("Add feature" not "Added feature")
|
||||
- Use imperative mood ("Move cursor to..." not "Moves cursor to...")
|
||||
- Limit first line to 72 characters
|
||||
- Reference issues and pull requests when relevant
|
||||
|
||||
#### 6. Push and Create Pull Request
|
||||
|
||||
```bash
|
||||
git push origin feature/your-feature-name
|
||||
```
|
||||
|
||||
Create a pull request on GitHub with:
|
||||
- Clear title and description
|
||||
- Reference to related issues
|
||||
- Screenshots (if applicable)
|
||||
- Test results
|
||||
|
||||
### Code Style Guidelines
|
||||
|
||||
#### Python Style
|
||||
|
||||
- Follow PEP 8 style guide
|
||||
- Use type hints for function parameters and return values
|
||||
- Maximum line length: 100 characters
|
||||
- Use meaningful variable and function names
|
||||
|
||||
**Example:**
|
||||
```python
|
||||
def calculate_row_count_difference(
|
||||
baseline_count: int,
|
||||
target_count: int,
|
||||
tolerance_percent: float
|
||||
) -> tuple[bool, float]:
|
||||
"""
|
||||
Calculate if row count difference is within tolerance.
|
||||
|
||||
Args:
|
||||
baseline_count: Row count from baseline database
|
||||
target_count: Row count from target database
|
||||
tolerance_percent: Acceptable difference percentage
|
||||
|
||||
Returns:
|
||||
Tuple of (is_within_tolerance, actual_difference_percent)
|
||||
"""
|
||||
# Implementation here
|
||||
pass
|
||||
```
|
||||
|
||||
#### Documentation
|
||||
|
||||
- Add docstrings to all public functions, classes, and modules
|
||||
- Use Google-style docstrings
|
||||
- Include examples in docstrings when helpful
|
||||
- Update README.md for user-facing changes
|
||||
|
||||
#### Testing
|
||||
|
||||
- Write unit tests for all new functionality
|
||||
- Aim for >80% code coverage
|
||||
- Use descriptive test names
|
||||
- Follow AAA pattern (Arrange, Act, Assert)
|
||||
|
||||
**Example:**
|
||||
```python
|
||||
def test_row_count_checker_exact_match():
|
||||
"""Test row count checker with exact match"""
|
||||
# Arrange
|
||||
checker = RowCountChecker(tolerance_percent=0.0)
|
||||
|
||||
# Act
|
||||
result = checker.check(baseline_count=1000, target_count=1000)
|
||||
|
||||
# Assert
|
||||
assert result.status == Status.PASS
|
||||
assert result.baseline_value == 1000
|
||||
assert result.target_value == 1000
|
||||
```
|
||||
|
||||
### Adding New Features
|
||||
|
||||
#### New Checker Type
|
||||
|
||||
To add a new comparison checker:
|
||||
|
||||
1. Create new checker in `src/drt/services/checkers/`
|
||||
2. Inherit from `BaseChecker`
|
||||
3. Implement `check()` method
|
||||
4. Add new `CheckType` enum value
|
||||
5. Register in `ComparisonService`
|
||||
6. Add tests in `tests/test_checkers.py`
|
||||
7. Update documentation
|
||||
|
||||
#### New Report Format
|
||||
|
||||
To add a new report format:
|
||||
|
||||
1. Create new reporter in `src/drt/reporting/`
|
||||
2. Implement `generate()` method
|
||||
3. Add format option to configuration
|
||||
4. Update `ReportGenerator` to use new format
|
||||
5. Add tests
|
||||
6. Update documentation
|
||||
|
||||
### Testing
|
||||
|
||||
#### Unit Tests
|
||||
|
||||
Run the test suite:
|
||||
|
||||
```bash
|
||||
# All tests
|
||||
pytest
|
||||
|
||||
# With coverage report
|
||||
pytest --cov=src/drt --cov-report=html
|
||||
|
||||
# Specific test file
|
||||
pytest tests/test_models.py -v
|
||||
|
||||
# Specific test function
|
||||
pytest tests/test_models.py::test_status_enum -v
|
||||
```
|
||||
|
||||
#### Integration Tests
|
||||
|
||||
Use the Docker test environment:
|
||||
|
||||
```bash
|
||||
# Start test databases
|
||||
bash test_data/setup_test_environment.sh
|
||||
|
||||
# Run integration tests
|
||||
drt discover --server localhost,1433 --database TestDB_Baseline --output test.yaml
|
||||
drt compare --config config.test.yaml
|
||||
|
||||
# Cleanup
|
||||
docker-compose -f docker-compose.test.yml down -v
|
||||
```
|
||||
|
||||
#### Manual Testing
|
||||
|
||||
```bash
|
||||
# Test against real databases (requires access)
|
||||
drt discover --server <YOUR_SERVER> --database <YOUR_DB> --output manual_test.yaml
|
||||
drt validate --config manual_test.yaml
|
||||
drt compare --config manual_test.yaml --dry-run
|
||||
```
|
||||
|
||||
### Reporting Issues
|
||||
|
||||
When reporting issues, please include:
|
||||
|
||||
- Clear description of the problem
|
||||
- Steps to reproduce
|
||||
- Expected vs actual behavior
|
||||
- Environment details (OS, Python version, ODBC driver version)
|
||||
- Relevant logs or error messages
|
||||
- Configuration file (sanitized - remove server names/credentials)
|
||||
|
||||
**Example:**
|
||||
```markdown
|
||||
**Description:** Row count comparison fails with timeout error
|
||||
|
||||
**Steps to Reproduce:**
|
||||
1. Configure comparison for large table (>1M rows)
|
||||
2. Run `drt compare --config config.yaml`
|
||||
3. Observe timeout error
|
||||
|
||||
**Expected:** Comparison completes successfully
|
||||
**Actual:** Query timeout after 300 seconds
|
||||
|
||||
**Environment:**
|
||||
- OS: Windows 10
|
||||
- Python: 3.9.7
|
||||
- ODBC Driver: 17 for SQL Server
|
||||
|
||||
**Logs:**
|
||||
```
|
||||
ERROR: Query timeout on table dbo.FactTable1
|
||||
```
|
||||
```
|
||||
|
||||
### Feature Requests
|
||||
|
||||
For feature requests, please:
|
||||
|
||||
- Check if feature already exists or is planned
|
||||
- Describe the use case clearly
|
||||
- Explain why it would be valuable
|
||||
- Provide examples if possible
|
||||
|
||||
### Code Review Process
|
||||
|
||||
All contributions go through code review:
|
||||
|
||||
1. Automated checks must pass (tests, linting)
|
||||
2. At least one maintainer approval required
|
||||
3. Address review feedback promptly
|
||||
4. Keep pull requests focused and reasonably sized
|
||||
|
||||
### Release Process
|
||||
|
||||
Releases follow semantic versioning (MAJOR.MINOR.PATCH):
|
||||
|
||||
- **MAJOR** - Breaking changes
|
||||
- **MINOR** - New features (backward compatible)
|
||||
- **PATCH** - Bug fixes (backward compatible)
|
||||
|
||||
### Development Tips
|
||||
|
||||
#### Debugging
|
||||
|
||||
```bash
|
||||
# Enable verbose logging
|
||||
drt compare --config config.yaml --verbose
|
||||
|
||||
# Use dry-run to test without execution
|
||||
drt compare --config config.yaml --dry-run
|
||||
|
||||
# Check configuration validity
|
||||
drt validate --config config.yaml
|
||||
```
|
||||
|
||||
#### Performance Profiling
|
||||
|
||||
```bash
|
||||
# Enable diagnostic logging
|
||||
drt compare --config config.yaml --verbose
|
||||
|
||||
# Look for timing information in logs
|
||||
grep "execution time" logs/drt_*.log
|
||||
```
|
||||
|
||||
#### Docker Development
|
||||
|
||||
```bash
|
||||
# Build and test in Docker
|
||||
docker build -t drt:dev .
|
||||
docker run -v $(pwd)/config.yaml:/app/config.yaml drt:dev compare --config /app/config.yaml
|
||||
```
|
||||
|
||||
## 📝 License
|
||||
|
||||
MIT License - see LICENSE file for details
|
||||
|
||||
## 📞 Support
|
||||
|
||||
For issues and questions:
|
||||
- GitHub Issues: <repository-url>/issues
|
||||
- Check logs in `./logs/`
|
||||
- Review configuration with `drt validate`
|
||||
- Test connectivity with `drt discover`
|
||||
|
||||
## 👥 Authors
|
||||
|
||||
QA Engineering Team
|
||||
|
||||
## 📌 Version
|
||||
|
||||
Current version: 1.0.0
|
||||
286
config.example.yaml
Executable file
286
config.example.yaml
Executable file
@@ -0,0 +1,286 @@
|
||||
# Data Regression Testing Framework - Example Configuration
|
||||
# This file demonstrates all available configuration options
|
||||
|
||||
# ============================================================================
|
||||
# DATABASE PAIRS
|
||||
# Define baseline (production) and target (test) database connections
|
||||
# ============================================================================
|
||||
database_pairs:
|
||||
# Example 1: Data Warehouse Comparison
|
||||
- name: "DWH_Comparison"
|
||||
enabled: true
|
||||
description: "Compare production and test data warehouse"
|
||||
baseline:
|
||||
server: "<YOUR_SERVER_NAME>"
|
||||
database: "<YOUR_BASELINE_DB>"
|
||||
timeout:
|
||||
connection: 30 # seconds
|
||||
query: 300 # seconds (5 minutes)
|
||||
target:
|
||||
server: "<YOUR_SERVER_NAME>"
|
||||
database: "<YOUR_TARGET_DB>"
|
||||
timeout:
|
||||
connection: 30
|
||||
query: 300
|
||||
|
||||
# Example 2: Operational Database Comparison (disabled)
|
||||
- name: "OPS_Comparison"
|
||||
enabled: false
|
||||
description: "Compare operational databases (currently disabled)"
|
||||
baseline:
|
||||
server: "<YOUR_SERVER_NAME>"
|
||||
database: "<YOUR_BASELINE_DB_2>"
|
||||
target:
|
||||
server: "<YOUR_SERVER_NAME>"
|
||||
database: "<YOUR_TARGET_DB_2>"
|
||||
|
||||
# ============================================================================
|
||||
# COMPARISON SETTINGS
|
||||
# Configure what types of comparisons to perform
|
||||
# ============================================================================
|
||||
comparison:
|
||||
# Comparison mode: "health_check" or "full"
|
||||
# - health_check: Quick validation (row counts, schema)
|
||||
# - full: Comprehensive validation (includes aggregates)
|
||||
mode: "health_check"
|
||||
|
||||
# Row Count Comparison
|
||||
row_count:
|
||||
enabled: true
|
||||
tolerance_percent: 0.0 # 0% = exact match required
|
||||
# Examples:
|
||||
# 0.0 = exact match
|
||||
# 0.1 = allow 0.1% difference
|
||||
# 1.0 = allow 1% difference
|
||||
|
||||
# Schema Comparison
|
||||
schema:
|
||||
enabled: true
|
||||
checks:
|
||||
column_names: true # Verify column names match
|
||||
data_types: true # Verify data types match
|
||||
nullable: true # Verify nullable constraints match
|
||||
primary_keys: true # Verify primary keys match
|
||||
|
||||
# Aggregate Comparison (sums of numeric columns)
|
||||
aggregates:
|
||||
enabled: true
|
||||
tolerance_percent: 0.01 # 0.01% tolerance for rounding differences
|
||||
# Note: Only applies when mode is "full"
|
||||
|
||||
# ============================================================================
|
||||
# TABLES TO COMPARE
|
||||
# List all tables to include in comparison
|
||||
# ============================================================================
|
||||
tables:
|
||||
# Example 1: Fact table with aggregates
|
||||
- schema: "dbo"
|
||||
name: "FactTable1"
|
||||
enabled: true
|
||||
expected_in_target: true
|
||||
aggregate_columns:
|
||||
- "Amount1"
|
||||
- "Amount2"
|
||||
- "Amount3"
|
||||
- "Quantity"
|
||||
notes: "Example fact table with numeric aggregates"
|
||||
|
||||
# Example 2: Dimension table without aggregates
|
||||
- schema: "dbo"
|
||||
name: "DimTable1"
|
||||
enabled: true
|
||||
expected_in_target: true
|
||||
aggregate_columns: []
|
||||
notes: "Example dimension table - no numeric aggregates"
|
||||
|
||||
# Example 3: Table expected to be missing in target
|
||||
- schema: "dbo"
|
||||
name: "TempTable1"
|
||||
enabled: true
|
||||
expected_in_target: false
|
||||
aggregate_columns: []
|
||||
notes: "Example temporary table - should not exist in target"
|
||||
|
||||
# Example 4: Disabled table (skipped during comparison)
|
||||
- schema: "dbo"
|
||||
name: "Table4"
|
||||
enabled: false
|
||||
expected_in_target: true
|
||||
aggregate_columns: []
|
||||
notes: "Example disabled table - excluded from comparison"
|
||||
|
||||
# Example 5: Table with multiple schemas
|
||||
- schema: "staging"
|
||||
name: "StagingTable1"
|
||||
enabled: true
|
||||
expected_in_target: true
|
||||
aggregate_columns:
|
||||
- "Amount"
|
||||
notes: "Example staging table"
|
||||
|
||||
# Example 6: Large fact table
|
||||
- schema: "dbo"
|
||||
name: "FactTable2"
|
||||
enabled: true
|
||||
expected_in_target: true
|
||||
aggregate_columns:
|
||||
- "Amount"
|
||||
- "Fee"
|
||||
- "NetAmount"
|
||||
notes: "Example high-volume fact table"
|
||||
|
||||
# Example 7: Reference data table
|
||||
- schema: "ref"
|
||||
name: "RefTable1"
|
||||
enabled: true
|
||||
expected_in_target: true
|
||||
aggregate_columns: []
|
||||
notes: "Example reference data table"
|
||||
|
||||
# ============================================================================
|
||||
# REPORTING SETTINGS
|
||||
# Configure report generation and output
|
||||
# ============================================================================
|
||||
reporting:
|
||||
# Output directory for reports (use relative path or set via environment variable)
|
||||
output_dir: "./reports"
|
||||
|
||||
# Output directory for investigation reports (use relative path or set via environment variable)
|
||||
investigation_dir: "./investigation_reports"
|
||||
|
||||
# Report formats to generate
|
||||
formats:
|
||||
html: true # Rich HTML report with styling
|
||||
csv: true # CSV report for Excel/analysis
|
||||
pdf: false # PDF report (requires weasyprint)
|
||||
|
||||
# Report naming
|
||||
filename_prefix: "regression_test"
|
||||
include_timestamp: true # Append YYYYMMDD_HHMMSS to filename
|
||||
|
||||
# Report content options
|
||||
include_passed: true # Include passed checks in report
|
||||
include_warnings: true # Include warnings in report
|
||||
summary_only: false # Only show summary (no details)
|
||||
|
||||
# ============================================================================
|
||||
# LOGGING SETTINGS
|
||||
# Configure logging behavior
|
||||
# ============================================================================
|
||||
logging:
|
||||
# Log level: DEBUG, INFO, WARNING, ERROR, CRITICAL
|
||||
level: "INFO"
|
||||
|
||||
# Log output directory (use relative path or set via environment variable)
|
||||
output_dir: "./logs"
|
||||
|
||||
# Log file naming
|
||||
filename_prefix: "drt"
|
||||
include_timestamp: true
|
||||
|
||||
# Console output
|
||||
console:
|
||||
enabled: true
|
||||
level: "INFO"
|
||||
colored: true # Use colored output (if terminal supports it)
|
||||
|
||||
# File output
|
||||
file:
|
||||
enabled: true
|
||||
level: "DEBUG"
|
||||
max_size_mb: 10 # Rotate after 10MB
|
||||
backup_count: 5 # Keep 5 backup files
|
||||
|
||||
# ============================================================================
|
||||
# EXECUTION SETTINGS
|
||||
# Configure execution behavior
|
||||
# ============================================================================
|
||||
execution:
|
||||
# Parallel execution (future feature)
|
||||
parallel:
|
||||
enabled: false
|
||||
max_workers: 4
|
||||
|
||||
# Retry settings for transient failures
|
||||
retry:
|
||||
enabled: true
|
||||
max_attempts: 3
|
||||
delay_seconds: 5
|
||||
|
||||
# Performance settings
|
||||
performance:
|
||||
batch_size: 1000 # Rows per batch for large queries
|
||||
use_nolock: true # Use NOLOCK hints (read uncommitted)
|
||||
connection_pooling: true
|
||||
|
||||
# ============================================================================
|
||||
# FILTERS
|
||||
# Global filters applied to all tables
|
||||
# ============================================================================
|
||||
filters:
|
||||
# Schema filters (include/exclude patterns)
|
||||
schemas:
|
||||
include:
|
||||
- "dbo"
|
||||
- "staging"
|
||||
- "ref"
|
||||
exclude:
|
||||
- "sys"
|
||||
- "temp"
|
||||
|
||||
# Table name filters (wildcard patterns)
|
||||
tables:
|
||||
include:
|
||||
- "*" # Include all tables
|
||||
exclude:
|
||||
- "tmp_*" # Exclude temporary tables
|
||||
- "backup_*" # Exclude backup tables
|
||||
- "archive_*" # Exclude archive tables
|
||||
|
||||
# Column filters for aggregate comparisons
|
||||
columns:
|
||||
exclude_patterns:
|
||||
- "*_id" # Exclude ID columns
|
||||
- "*_key" # Exclude key columns
|
||||
- "created_*" # Exclude audit columns
|
||||
- "modified_*" # Exclude audit columns
|
||||
|
||||
# ============================================================================
|
||||
# NOTIFICATIONS (future feature)
|
||||
# Configure notifications for test results
|
||||
# ============================================================================
|
||||
notifications:
|
||||
enabled: false
|
||||
|
||||
# Email notifications
|
||||
email:
|
||||
enabled: false
|
||||
smtp_server: "smtp.company.com"
|
||||
smtp_port: 587
|
||||
from_address: "drt@company.com"
|
||||
to_addresses:
|
||||
- "qa-team@company.com"
|
||||
on_failure_only: true
|
||||
|
||||
# Slack notifications
|
||||
slack:
|
||||
enabled: false
|
||||
webhook_url: "https://hooks.slack.com/services/YOUR/WEBHOOK/URL"
|
||||
channel: "#qa-alerts"
|
||||
on_failure_only: true
|
||||
|
||||
# ============================================================================
|
||||
# METADATA
|
||||
# Optional metadata about this configuration
|
||||
# ============================================================================
|
||||
metadata:
|
||||
version: "1.0"
|
||||
created_by: "QA Team"
|
||||
created_date: "2024-01-15"
|
||||
description: "Standard regression test configuration for DWH migration"
|
||||
project: "DWH Migration Phase 2"
|
||||
environment: "UAT"
|
||||
tags:
|
||||
- "migration"
|
||||
- "data-quality"
|
||||
- "regression"
|
||||
46
config.quickstart.yaml
Executable file
46
config.quickstart.yaml
Executable file
@@ -0,0 +1,46 @@
|
||||
# Quick Start Configuration
|
||||
# Minimal configuration to get started quickly
|
||||
|
||||
database_pairs:
|
||||
- name: "Quick_Test"
|
||||
enabled: true
|
||||
baseline:
|
||||
server: "YOUR_SERVER_NAME"
|
||||
database: "YOUR_BASELINE_DB"
|
||||
target:
|
||||
server: "YOUR_SERVER_NAME"
|
||||
database: "YOUR_TARGET_DB"
|
||||
|
||||
comparison:
|
||||
mode: "health_check"
|
||||
row_count:
|
||||
enabled: true
|
||||
tolerance_percent: 0.0
|
||||
schema:
|
||||
enabled: true
|
||||
checks:
|
||||
column_names: true
|
||||
data_types: true
|
||||
aggregates:
|
||||
enabled: false
|
||||
|
||||
tables:
|
||||
# Add your tables here after running discovery
|
||||
# Example:
|
||||
# - schema: "dbo"
|
||||
# name: "YourTable"
|
||||
# enabled: true
|
||||
# expected_in_target: true
|
||||
# aggregate_columns: []
|
||||
|
||||
reporting:
|
||||
output_dir: "./reports"
|
||||
investigation_dir: "./investigation_reports"
|
||||
formats:
|
||||
html: true
|
||||
csv: true
|
||||
pdf: false
|
||||
|
||||
logging:
|
||||
level: "INFO"
|
||||
output_dir: "./logs"
|
||||
83
config.test.yaml
Executable file
83
config.test.yaml
Executable file
@@ -0,0 +1,83 @@
|
||||
# Test Configuration for Docker SQL Server Environment
|
||||
# Use this configuration with the Docker test environment
|
||||
|
||||
database_pairs:
|
||||
- name: "Docker_Test_Comparison"
|
||||
enabled: true
|
||||
description: "Compare Docker test databases"
|
||||
baseline:
|
||||
server: "localhost,1433"
|
||||
database: "TestDB_Baseline"
|
||||
# Use environment variables for credentials: DRT_DB_USERNAME, DRT_DB_PASSWORD
|
||||
# username: "${DRT_DB_USERNAME}"
|
||||
# password: "${DRT_DB_PASSWORD}"
|
||||
timeout:
|
||||
connection: 30
|
||||
query: 300
|
||||
target:
|
||||
server: "localhost,1434"
|
||||
database: "TestDB_Target"
|
||||
# Use environment variables for credentials: DRT_DB_USERNAME, DRT_DB_PASSWORD
|
||||
# username: "${DRT_DB_USERNAME}"
|
||||
# password: "${DRT_DB_PASSWORD}"
|
||||
timeout:
|
||||
connection: 30
|
||||
query: 300
|
||||
|
||||
comparison:
|
||||
mode: "health_check"
|
||||
row_count:
|
||||
enabled: true
|
||||
tolerance_percent: 0.0
|
||||
schema:
|
||||
enabled: true
|
||||
checks:
|
||||
column_names: true
|
||||
data_types: true
|
||||
aggregates:
|
||||
enabled: true
|
||||
tolerance_percent: 0.01
|
||||
|
||||
tables:
|
||||
- schema: "dbo"
|
||||
name: "DimTable1"
|
||||
enabled: true
|
||||
expected_in_target: true
|
||||
aggregate_columns: []
|
||||
notes: "Example dimension table"
|
||||
|
||||
- schema: "dbo"
|
||||
name: "DimTable2"
|
||||
enabled: true
|
||||
expected_in_target: true
|
||||
aggregate_columns: []
|
||||
notes: "Example dimension table with schema differences"
|
||||
|
||||
- schema: "dbo"
|
||||
name: "FactTable1"
|
||||
enabled: true
|
||||
expected_in_target: true
|
||||
aggregate_columns:
|
||||
- "Quantity"
|
||||
- "Amount"
|
||||
- "Tax"
|
||||
notes: "Example fact table with numeric aggregates"
|
||||
|
||||
- schema: "dbo"
|
||||
name: "TempTable1"
|
||||
enabled: true
|
||||
expected_in_target: false
|
||||
aggregate_columns: []
|
||||
notes: "Example temporary table - only exists in target"
|
||||
|
||||
reporting:
|
||||
output_directory: "/home/user/reports"
|
||||
investigation_directory: "/home/user/investigation_reports"
|
||||
formats: ["html", "csv"]
|
||||
filename_template: "test_regression_{timestamp}"
|
||||
|
||||
logging:
|
||||
level: "INFO"
|
||||
directory: "/home/user/logs"
|
||||
filename_template: "drt_test_{timestamp}.log"
|
||||
console: true
|
||||
0
config/.gitkeep
Executable file
0
config/.gitkeep
Executable file
52
docker-compose.test.yml
Executable file
52
docker-compose.test.yml
Executable file
@@ -0,0 +1,52 @@
|
||||
version: '3.8'
|
||||
|
||||
services:
|
||||
# SQL Server 2022 - Baseline (Production)
|
||||
sqlserver-baseline:
|
||||
image: mcr.microsoft.com/mssql/server:2022-latest
|
||||
container_name: drt-sqlserver-baseline
|
||||
environment:
|
||||
- ACCEPT_EULA=Y
|
||||
- SA_PASSWORD=${SA_PASSWORD:-YourStrong!Passw0rd}
|
||||
- MSSQL_PID=Developer
|
||||
ports:
|
||||
- "1433:1433"
|
||||
volumes:
|
||||
- ./test_data/init_baseline.sql:/docker-entrypoint-initdb.d/init.sql
|
||||
- sqlserver_baseline_data:/var/opt/mssql
|
||||
healthcheck:
|
||||
test: /opt/mssql-tools/bin/sqlcmd -S localhost -U sa -P ${SA_PASSWORD:-YourStrong!Passw0rd} -Q "SELECT 1"
|
||||
interval: 10s
|
||||
timeout: 5s
|
||||
retries: 5
|
||||
networks:
|
||||
- drt-network
|
||||
|
||||
# SQL Server 2022 - Target (Test)
|
||||
sqlserver-target:
|
||||
image: mcr.microsoft.com/mssql/server:2022-latest
|
||||
container_name: drt-sqlserver-target
|
||||
environment:
|
||||
- ACCEPT_EULA=Y
|
||||
- SA_PASSWORD=${SA_PASSWORD:-YourStrong!Passw0rd}
|
||||
- MSSQL_PID=Developer
|
||||
ports:
|
||||
- "1434:1433"
|
||||
volumes:
|
||||
- ./test_data/init_target.sql:/docker-entrypoint-initdb.d/init.sql
|
||||
- sqlserver_target_data:/var/opt/mssql
|
||||
healthcheck:
|
||||
test: /opt/mssql-tools/bin/sqlcmd -S localhost -U sa -P ${SA_PASSWORD:-YourStrong!Passw0rd} -Q "SELECT 1"
|
||||
interval: 10s
|
||||
timeout: 5s
|
||||
retries: 5
|
||||
networks:
|
||||
- drt-network
|
||||
|
||||
volumes:
|
||||
sqlserver_baseline_data:
|
||||
sqlserver_target_data:
|
||||
|
||||
networks:
|
||||
drt-network:
|
||||
driver: bridge
|
||||
121
install_docker_debian.sh
Executable file
121
install_docker_debian.sh
Executable file
@@ -0,0 +1,121 @@
|
||||
#!/bin/bash
|
||||
# Docker Installation Script for Debian 12
|
||||
|
||||
set -e
|
||||
|
||||
echo "=========================================="
|
||||
echo "Docker Installation for Debian 12"
|
||||
echo "=========================================="
|
||||
echo ""
|
||||
|
||||
# Check if running as root
|
||||
if [ "$EUID" -ne 0 ]; then
|
||||
echo "Please run with sudo: sudo bash install_docker_debian.sh"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Detect OS
|
||||
if [ -f /etc/os-release ]; then
|
||||
. /etc/os-release
|
||||
OS=$ID
|
||||
VER=$VERSION_ID
|
||||
echo "Detected OS: $PRETTY_NAME"
|
||||
else
|
||||
echo "Cannot detect OS version"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Remove old versions
|
||||
echo ""
|
||||
echo "Step 1: Removing old Docker versions (if any)..."
|
||||
apt-get remove -y docker docker-engine docker.io containerd runc 2>/dev/null || true
|
||||
|
||||
# Install prerequisites
|
||||
echo ""
|
||||
echo "Step 2: Installing prerequisites..."
|
||||
apt-get update
|
||||
apt-get install -y \
|
||||
ca-certificates \
|
||||
curl \
|
||||
gnupg \
|
||||
lsb-release
|
||||
|
||||
# Add Docker's official GPG key
|
||||
echo ""
|
||||
echo "Step 3: Adding Docker GPG key..."
|
||||
install -m 0755 -d /etc/apt/keyrings
|
||||
curl -fsSL https://download.docker.com/linux/debian/gpg | gpg --dearmor -o /etc/apt/keyrings/docker.gpg
|
||||
chmod a+r /etc/apt/keyrings/docker.gpg
|
||||
|
||||
# Set up Docker repository
|
||||
echo ""
|
||||
echo "Step 4: Adding Docker repository..."
|
||||
echo \
|
||||
"deb [arch=$(dpkg --print-architecture) signed-by=/etc/apt/keyrings/docker.gpg] https://download.docker.com/linux/debian \
|
||||
$(. /etc/os-release && echo "$VERSION_CODENAME") stable" | \
|
||||
tee /etc/apt/sources.list.d/docker.list > /dev/null
|
||||
|
||||
# Install Docker Engine
|
||||
echo ""
|
||||
echo "Step 5: Installing Docker Engine..."
|
||||
apt-get update
|
||||
apt-get install -y docker-ce docker-ce-cli containerd.io docker-buildx-plugin docker-compose-plugin
|
||||
|
||||
# Start Docker service
|
||||
echo ""
|
||||
echo "Step 6: Starting Docker service..."
|
||||
systemctl start docker
|
||||
systemctl enable docker
|
||||
|
||||
# Add current user to docker group (if not root)
|
||||
if [ -n "$SUDO_USER" ]; then
|
||||
echo ""
|
||||
echo "Step 7: Adding user $SUDO_USER to docker group..."
|
||||
usermod -aG docker $SUDO_USER
|
||||
echo "Note: You'll need to log out and back in for group changes to take effect"
|
||||
fi
|
||||
|
||||
# Verify installation
|
||||
echo ""
|
||||
echo "Step 8: Verifying Docker installation..."
|
||||
if docker --version; then
|
||||
echo "✓ Docker installed successfully"
|
||||
else
|
||||
echo "✗ Docker installation failed"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if docker compose version; then
|
||||
echo "✓ Docker Compose installed successfully"
|
||||
else
|
||||
echo "✗ Docker Compose installation failed"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Test Docker
|
||||
echo ""
|
||||
echo "Step 9: Testing Docker..."
|
||||
if docker run --rm hello-world > /dev/null 2>&1; then
|
||||
echo "✓ Docker is working correctly"
|
||||
else
|
||||
echo "⚠ Docker test failed - you may need to log out and back in"
|
||||
fi
|
||||
|
||||
echo ""
|
||||
echo "=========================================="
|
||||
echo "Installation completed successfully!"
|
||||
echo "=========================================="
|
||||
echo ""
|
||||
echo "Docker version:"
|
||||
docker --version
|
||||
echo ""
|
||||
echo "Docker Compose version:"
|
||||
docker compose version
|
||||
echo ""
|
||||
echo "IMPORTANT: If you're not root, log out and back in for group changes to take effect"
|
||||
echo ""
|
||||
echo "Next steps:"
|
||||
echo "1. Log out and back in (or run: newgrp docker)"
|
||||
echo "2. Test Docker: docker run hello-world"
|
||||
echo "3. Set up test environment: bash test_data/setup_test_environment.sh"
|
||||
echo ""
|
||||
112
install_odbc_debian.sh
Executable file
112
install_odbc_debian.sh
Executable file
@@ -0,0 +1,112 @@
|
||||
#!/bin/bash
|
||||
# ODBC Driver Installation Script for Debian 12
|
||||
# This script installs Microsoft ODBC Driver 18 for SQL Server
|
||||
|
||||
set -e
|
||||
|
||||
echo "=========================================="
|
||||
echo "ODBC Driver Installation for Debian 12"
|
||||
echo "=========================================="
|
||||
echo ""
|
||||
|
||||
# Check if running as root
|
||||
if [ "$EUID" -ne 0 ]; then
|
||||
echo "Please run with sudo: sudo bash install_odbc_debian.sh"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Detect OS
|
||||
if [ -f /etc/os-release ]; then
|
||||
. /etc/os-release
|
||||
OS=$ID
|
||||
VER=$VERSION_ID
|
||||
echo "Detected OS: $PRETTY_NAME"
|
||||
else
|
||||
echo "Cannot detect OS version"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Clean up any corrupted repository files
|
||||
echo ""
|
||||
echo "Step 1: Cleaning up any previous installation attempts..."
|
||||
if [ -f /etc/apt/sources.list.d/mssql-release.list ]; then
|
||||
echo "Removing corrupted mssql-release.list..."
|
||||
rm -f /etc/apt/sources.list.d/mssql-release.list
|
||||
fi
|
||||
|
||||
# Install prerequisites
|
||||
echo ""
|
||||
echo "Step 2: Installing prerequisites..."
|
||||
apt-get update
|
||||
apt-get install -y curl gnupg2 apt-transport-https ca-certificates
|
||||
|
||||
# Add Microsoft GPG key
|
||||
echo ""
|
||||
echo "Step 3: Adding Microsoft GPG key..."
|
||||
curl -fsSL https://packages.microsoft.com/keys/microsoft.asc | gpg --dearmor -o /usr/share/keyrings/microsoft-prod.gpg
|
||||
|
||||
# Add Microsoft repository based on OS
|
||||
echo ""
|
||||
echo "Step 4: Adding Microsoft repository..."
|
||||
if [ "$OS" = "debian" ]; then
|
||||
if [ "$VER" = "12" ]; then
|
||||
curl https://packages.microsoft.com/config/debian/12/prod.list | tee /etc/apt/sources.list.d/mssql-release.list
|
||||
elif [ "$VER" = "11" ]; then
|
||||
curl https://packages.microsoft.com/config/debian/11/prod.list | tee /etc/apt/sources.list.d/mssql-release.list
|
||||
else
|
||||
echo "Unsupported Debian version: $VER"
|
||||
exit 1
|
||||
fi
|
||||
elif [ "$OS" = "ubuntu" ]; then
|
||||
curl https://packages.microsoft.com/config/ubuntu/$VER/prod.list | tee /etc/apt/sources.list.d/mssql-release.list
|
||||
else
|
||||
echo "Unsupported OS: $OS"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Update package list
|
||||
echo ""
|
||||
echo "Step 5: Updating package list..."
|
||||
apt-get update
|
||||
|
||||
# Install ODBC Driver
|
||||
echo ""
|
||||
echo "Step 6: Installing ODBC Driver 18 for SQL Server..."
|
||||
ACCEPT_EULA=Y apt-get install -y msodbcsql18
|
||||
|
||||
# Install unixODBC development headers
|
||||
echo ""
|
||||
echo "Step 7: Installing unixODBC development headers..."
|
||||
apt-get install -y unixodbc-dev
|
||||
|
||||
# Verify installation
|
||||
echo ""
|
||||
echo "Step 8: Verifying installation..."
|
||||
if odbcinst -q -d -n "ODBC Driver 18 for SQL Server" > /dev/null 2>&1; then
|
||||
echo "✓ ODBC Driver 18 for SQL Server installed successfully"
|
||||
odbcinst -q -d -n "ODBC Driver 18 for SQL Server"
|
||||
else
|
||||
echo "✗ ODBC Driver installation failed"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Check for ODBC Driver 17 as fallback
|
||||
if odbcinst -q -d -n "ODBC Driver 17 for SQL Server" > /dev/null 2>&1; then
|
||||
echo "✓ ODBC Driver 17 for SQL Server also available"
|
||||
fi
|
||||
|
||||
echo ""
|
||||
echo "=========================================="
|
||||
echo "Installation completed successfully!"
|
||||
echo "=========================================="
|
||||
echo ""
|
||||
echo "Next steps:"
|
||||
echo "1. Install Python dependencies: pip install -r requirements.txt"
|
||||
echo "2. Install the framework: pip install -e ."
|
||||
echo "3. Test the installation: drt --version"
|
||||
echo ""
|
||||
echo "For Windows Authentication, you'll also need to:"
|
||||
echo "1. Install Kerberos: apt-get install -y krb5-user"
|
||||
echo "2. Configure /etc/krb5.conf with your domain settings"
|
||||
echo "3. Get a Kerberos ticket: kinit username@YOUR_DOMAIN.COM"
|
||||
echo ""
|
||||
73
pyproject.toml
Executable file
73
pyproject.toml
Executable file
@@ -0,0 +1,73 @@
|
||||
[project]
|
||||
name = "data-regression-tester"
|
||||
version = "1.0.0"
|
||||
description = "Data Regression Testing Framework for SQL Server"
|
||||
readme = "README.md"
|
||||
requires-python = ">=3.9"
|
||||
license = {text = "MIT"}
|
||||
authors = [
|
||||
{name = "QA Engineering Team"}
|
||||
]
|
||||
keywords = ["data", "regression", "testing", "sql-server", "comparison"]
|
||||
classifiers = [
|
||||
"Development Status :: 4 - Beta",
|
||||
"Environment :: Console",
|
||||
"Intended Audience :: Developers",
|
||||
"Operating System :: Microsoft :: Windows",
|
||||
"Programming Language :: Python :: 3.9",
|
||||
"Programming Language :: Python :: 3.10",
|
||||
"Programming Language :: Python :: 3.11",
|
||||
"Programming Language :: Python :: 3.12",
|
||||
"Topic :: Database",
|
||||
"Topic :: Software Development :: Testing",
|
||||
]
|
||||
|
||||
dependencies = [
|
||||
"pandas>=2.0",
|
||||
"sqlalchemy>=2.0",
|
||||
"pyodbc>=4.0",
|
||||
"pyyaml>=6.0",
|
||||
"pydantic>=2.0",
|
||||
"click>=8.0",
|
||||
"rich>=13.0",
|
||||
"jinja2>=3.0",
|
||||
"weasyprint>=60.0",
|
||||
]
|
||||
|
||||
[project.optional-dependencies]
|
||||
dev = [
|
||||
"pytest>=7.0",
|
||||
"pytest-cov>=4.0",
|
||||
"black>=23.0",
|
||||
"ruff>=0.1.0",
|
||||
"mypy>=1.0",
|
||||
"pre-commit>=3.0",
|
||||
]
|
||||
|
||||
[project.scripts]
|
||||
drt = "drt.cli.main:cli"
|
||||
|
||||
[build-system]
|
||||
requires = ["setuptools>=61.0", "wheel"]
|
||||
build-backend = "setuptools.build_meta"
|
||||
|
||||
[tool.setuptools.packages.find]
|
||||
where = ["src"]
|
||||
|
||||
[tool.black]
|
||||
line-length = 100
|
||||
target-version = ["py39", "py310", "py311", "py312"]
|
||||
|
||||
[tool.ruff]
|
||||
line-length = 100
|
||||
select = ["E", "F", "W", "I", "N", "UP", "B", "C4"]
|
||||
|
||||
[tool.mypy]
|
||||
python_version = "3.9"
|
||||
warn_return_any = true
|
||||
warn_unused_configs = true
|
||||
ignore_missing_imports = true
|
||||
|
||||
[tool.pytest.ini_options]
|
||||
testpaths = ["tests"]
|
||||
addopts = "-v --cov=drt --cov-report=term-missing"
|
||||
14
pytest.ini
Executable file
14
pytest.ini
Executable file
@@ -0,0 +1,14 @@
|
||||
[pytest]
|
||||
testpaths = tests
|
||||
python_files = test_*.py
|
||||
python_classes = Test*
|
||||
python_functions = test_*
|
||||
addopts =
|
||||
-v
|
||||
--strict-markers
|
||||
--tb=short
|
||||
--disable-warnings
|
||||
markers =
|
||||
unit: Unit tests
|
||||
integration: Integration tests
|
||||
slow: Slow running tests
|
||||
9
requirements.txt
Executable file
9
requirements.txt
Executable file
@@ -0,0 +1,9 @@
|
||||
pandas>=2.0
|
||||
sqlalchemy>=2.0
|
||||
pyodbc>=4.0
|
||||
pyyaml>=6.0
|
||||
pydantic>=2.0
|
||||
click>=8.0
|
||||
rich>=13.0
|
||||
jinja2>=3.0
|
||||
weasyprint>=60.0
|
||||
14
src/drt/__init__.py
Executable file
14
src/drt/__init__.py
Executable file
@@ -0,0 +1,14 @@
|
||||
"""
|
||||
Data Regression Testing Framework
|
||||
|
||||
A comprehensive framework for validating data integrity during code migration
|
||||
and system updates by comparing data outputs between Baseline (Production)
|
||||
and Target (Test) SQL Server databases.
|
||||
"""
|
||||
|
||||
__version__ = "1.0.0"
|
||||
__author__ = "QA Engineering Team"
|
||||
|
||||
from drt.models.enums import Status, CheckType
|
||||
|
||||
__all__ = ["__version__", "__author__", "Status", "CheckType"]
|
||||
11
src/drt/__main__.py
Executable file
11
src/drt/__main__.py
Executable file
@@ -0,0 +1,11 @@
|
||||
"""
|
||||
Entry point for running the framework as a module.
|
||||
|
||||
Usage:
|
||||
python -m drt <command> [options]
|
||||
"""
|
||||
|
||||
from drt.cli.main import cli
|
||||
|
||||
if __name__ == "__main__":
|
||||
cli()
|
||||
5
src/drt/cli/__init__.py
Executable file
5
src/drt/cli/__init__.py
Executable file
@@ -0,0 +1,5 @@
|
||||
"""Command-line interface for the framework."""
|
||||
|
||||
from drt.cli.main import cli
|
||||
|
||||
__all__ = ["cli"]
|
||||
5
src/drt/cli/commands/__init__.py
Executable file
5
src/drt/cli/commands/__init__.py
Executable file
@@ -0,0 +1,5 @@
|
||||
"""CLI commands."""
|
||||
|
||||
from drt.cli.commands import discover, compare, validate, investigate
|
||||
|
||||
__all__ = ["discover", "compare", "validate", "investigate"]
|
||||
137
src/drt/cli/commands/compare.py
Executable file
137
src/drt/cli/commands/compare.py
Executable file
@@ -0,0 +1,137 @@
|
||||
"""Compare command implementation."""
|
||||
|
||||
import click
|
||||
import sys
|
||||
from pathlib import Path
|
||||
from drt.config.loader import load_config
|
||||
from drt.services.comparison import ComparisonService
|
||||
from drt.reporting.generator import ReportGenerator
|
||||
from drt.utils.logging import setup_logging, get_logger
|
||||
from drt.utils.timestamps import format_duration
|
||||
|
||||
logger = get_logger(__name__)
|
||||
|
||||
|
||||
@click.command()
|
||||
@click.option('--config', '-c', required=True, type=click.Path(exists=True), help='Configuration file path')
|
||||
@click.option('--verbose', '-v', is_flag=True, help='Enable verbose output')
|
||||
@click.option('--dry-run', is_flag=True, help='Show what would be compared without executing')
|
||||
def compare(config, verbose, dry_run):
|
||||
"""
|
||||
Execute comparison between Baseline and Target databases.
|
||||
|
||||
Compares configured tables between baseline and target databases,
|
||||
checking for data regression issues.
|
||||
|
||||
Example:
|
||||
drt compare --config ./config.yaml
|
||||
"""
|
||||
# Load config first to get log directory
|
||||
from drt.config.loader import load_config
|
||||
cfg = load_config(config)
|
||||
|
||||
# Setup logging using config
|
||||
log_level = "DEBUG" if verbose else "INFO"
|
||||
log_dir = cfg.logging.directory
|
||||
setup_logging(log_level=log_level, log_dir=log_dir, log_to_file=not dry_run)
|
||||
|
||||
click.echo("=" * 60)
|
||||
click.echo("Data Regression Testing Framework")
|
||||
click.echo("=" * 60)
|
||||
click.echo()
|
||||
|
||||
try:
|
||||
# Load configuration
|
||||
click.echo(f"Loading configuration: {config}")
|
||||
cfg = load_config(config)
|
||||
click.echo(f"✓ Configuration loaded")
|
||||
click.echo(f" Database pairs: {len(cfg.database_pairs)}")
|
||||
click.echo(f" Tables configured: {len(cfg.tables)}")
|
||||
click.echo()
|
||||
|
||||
if dry_run:
|
||||
click.echo("=" * 60)
|
||||
click.echo("DRY RUN - Preview Only")
|
||||
click.echo("=" * 60)
|
||||
|
||||
for pair in cfg.database_pairs:
|
||||
if not pair.enabled:
|
||||
continue
|
||||
|
||||
click.echo(f"\nDatabase Pair: {pair.name}")
|
||||
click.echo(f" Baseline: {pair.baseline.server}.{pair.baseline.database}")
|
||||
click.echo(f" Target: {pair.target.server}.{pair.target.database}")
|
||||
|
||||
# Count enabled tables
|
||||
enabled_tables = [t for t in cfg.tables if t.enabled]
|
||||
click.echo(f" Tables to compare: {len(enabled_tables)}")
|
||||
|
||||
click.echo("\n" + "=" * 60)
|
||||
click.echo("Use without --dry-run to execute comparison")
|
||||
click.echo("=" * 60)
|
||||
sys.exit(0)
|
||||
|
||||
# Execute comparison for each database pair
|
||||
all_summaries = []
|
||||
|
||||
for pair in cfg.database_pairs:
|
||||
if not pair.enabled:
|
||||
click.echo(f"Skipping disabled pair: {pair.name}")
|
||||
continue
|
||||
|
||||
click.echo(f"Comparing: {pair.name}")
|
||||
click.echo(f" Baseline: {pair.baseline.server}.{pair.baseline.database}")
|
||||
click.echo(f" Target: {pair.target.server}.{pair.target.database}")
|
||||
click.echo()
|
||||
|
||||
# Run comparison
|
||||
comparison_service = ComparisonService(cfg)
|
||||
summary = comparison_service.run_comparison(pair)
|
||||
all_summaries.append(summary)
|
||||
|
||||
click.echo()
|
||||
|
||||
# Generate reports for all summaries
|
||||
if all_summaries:
|
||||
click.echo("=" * 60)
|
||||
click.echo("Generating Reports")
|
||||
click.echo("=" * 60)
|
||||
|
||||
report_gen = ReportGenerator(cfg)
|
||||
|
||||
for summary in all_summaries:
|
||||
report_files = report_gen.generate_reports(summary)
|
||||
|
||||
for filepath in report_files:
|
||||
click.echo(f" ✓ {filepath}")
|
||||
|
||||
click.echo()
|
||||
|
||||
# Display final summary
|
||||
click.echo("=" * 60)
|
||||
click.echo("EXECUTION COMPLETE")
|
||||
click.echo("=" * 60)
|
||||
|
||||
total_passed = sum(s.passed for s in all_summaries)
|
||||
total_failed = sum(s.failed for s in all_summaries)
|
||||
total_warnings = sum(s.warnings for s in all_summaries)
|
||||
total_errors = sum(s.errors for s in all_summaries)
|
||||
|
||||
click.echo(f" PASS: {total_passed:3d}")
|
||||
click.echo(f" FAIL: {total_failed:3d}")
|
||||
click.echo(f" WARNING: {total_warnings:3d}")
|
||||
click.echo(f" ERROR: {total_errors:3d}")
|
||||
click.echo("=" * 60)
|
||||
|
||||
# Exit with appropriate code
|
||||
if total_errors > 0 or total_failed > 0:
|
||||
click.echo("Status: FAILED ❌")
|
||||
sys.exit(1)
|
||||
else:
|
||||
click.echo("Status: PASSED ✓")
|
||||
sys.exit(0)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Comparison failed: {e}", exc_info=verbose)
|
||||
click.echo(f"✗ Error: {e}", err=True)
|
||||
sys.exit(2)
|
||||
118
src/drt/cli/commands/discover.py
Executable file
118
src/drt/cli/commands/discover.py
Executable file
@@ -0,0 +1,118 @@
|
||||
"""Discovery command implementation."""
|
||||
|
||||
import click
|
||||
import sys
|
||||
from drt.services.discovery import DiscoveryService
|
||||
from drt.config.models import ConnectionConfig, Config
|
||||
from drt.config.loader import save_config
|
||||
from drt.utils.logging import setup_logging, get_logger
|
||||
|
||||
logger = get_logger(__name__)
|
||||
|
||||
|
||||
@click.command()
|
||||
@click.option('--server', required=True, help='SQL Server hostname or instance')
|
||||
@click.option('--database', required=True, help='Database name to discover')
|
||||
@click.option('--output', '-o', default='./config_discovered.yaml', help='Output configuration file')
|
||||
@click.option('--schemas', multiple=True, help='Specific schemas to include (can specify multiple)')
|
||||
@click.option('--verbose', '-v', is_flag=True, help='Enable verbose output')
|
||||
def discover(server, database, output, schemas, verbose):
|
||||
"""
|
||||
Discover tables and generate configuration file.
|
||||
|
||||
Scans the specified database and automatically generates a configuration
|
||||
file with all discovered tables, columns, and metadata.
|
||||
|
||||
Example:
|
||||
drt discover --server SQLSERVER01 --database ORBIS_DWH_PROD
|
||||
"""
|
||||
# Setup logging
|
||||
log_level = "DEBUG" if verbose else "INFO"
|
||||
setup_logging(log_level=log_level)
|
||||
|
||||
click.echo("=" * 60)
|
||||
click.echo("Data Regression Testing Framework - Discovery Mode")
|
||||
click.echo("=" * 60)
|
||||
click.echo()
|
||||
|
||||
try:
|
||||
# Create connection config
|
||||
conn_config = ConnectionConfig(
|
||||
server=server,
|
||||
database=database
|
||||
)
|
||||
|
||||
# Create base config with schema filters if provided
|
||||
config = Config()
|
||||
if schemas:
|
||||
config.discovery.include_schemas = list(schemas)
|
||||
|
||||
# Initialize discovery service
|
||||
click.echo(f"Connecting to {server}.{database}...")
|
||||
discovery_service = DiscoveryService(conn_config, config)
|
||||
|
||||
# Test connection
|
||||
if not discovery_service.conn_mgr.test_connection():
|
||||
click.echo("✗ Connection failed", err=True)
|
||||
sys.exit(2)
|
||||
|
||||
click.echo("✓ Connected (Windows Authentication)")
|
||||
click.echo()
|
||||
|
||||
# Discover tables
|
||||
click.echo("Scanning tables...")
|
||||
tables = discovery_service.discover_tables()
|
||||
|
||||
if not tables:
|
||||
click.echo("⚠ No tables found", err=True)
|
||||
sys.exit(0)
|
||||
|
||||
click.echo(f"✓ Found {len(tables)} tables")
|
||||
click.echo()
|
||||
|
||||
# Generate configuration
|
||||
click.echo("Generating configuration...")
|
||||
generated_config = discovery_service.generate_config(tables)
|
||||
|
||||
# Save configuration
|
||||
save_config(generated_config, output)
|
||||
click.echo(f"✓ Configuration saved to: {output}")
|
||||
click.echo()
|
||||
|
||||
# Display summary
|
||||
click.echo("=" * 60)
|
||||
click.echo("Discovery Summary")
|
||||
click.echo("=" * 60)
|
||||
click.echo(f" Tables discovered: {len(tables)}")
|
||||
|
||||
# Count columns
|
||||
total_cols = sum(len(t.columns) for t in tables)
|
||||
click.echo(f" Total columns: {total_cols}")
|
||||
|
||||
# Count numeric columns
|
||||
numeric_cols = sum(len(t.aggregate_columns) for t in tables)
|
||||
click.echo(f" Numeric columns: {numeric_cols}")
|
||||
|
||||
# Show largest tables
|
||||
if tables:
|
||||
largest = sorted(tables, key=lambda t: t.estimated_row_count, reverse=True)[:3]
|
||||
click.echo()
|
||||
click.echo(" Largest tables:")
|
||||
for table in largest:
|
||||
click.echo(f" • {table.full_name:40s} {table.estimated_row_count:>12,} rows")
|
||||
|
||||
click.echo()
|
||||
click.echo("=" * 60)
|
||||
click.echo("Next Steps:")
|
||||
click.echo(f" 1. Review {output}")
|
||||
click.echo(" 2. Configure target database connection")
|
||||
click.echo(" 3. Set 'expected_in_target: false' for tables being removed")
|
||||
click.echo(f" 4. Run: drt compare --config {output}")
|
||||
click.echo("=" * 60)
|
||||
|
||||
sys.exit(0)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Discovery failed: {e}", exc_info=verbose)
|
||||
click.echo(f"✗ Error: {e}", err=True)
|
||||
sys.exit(2)
|
||||
177
src/drt/cli/commands/investigate.py
Normal file
177
src/drt/cli/commands/investigate.py
Normal file
@@ -0,0 +1,177 @@
|
||||
"""Investigate command implementation."""
|
||||
|
||||
import click
|
||||
import sys
|
||||
from pathlib import Path
|
||||
from drt.config.loader import load_config
|
||||
from drt.services.investigation import InvestigationService
|
||||
from drt.reporting.investigation_report import (
|
||||
InvestigationHTMLReportGenerator,
|
||||
InvestigationCSVReportGenerator
|
||||
)
|
||||
from drt.utils.logging import setup_logging, get_logger
|
||||
from drt.utils.timestamps import get_timestamp
|
||||
|
||||
logger = get_logger(__name__)
|
||||
|
||||
|
||||
@click.command()
|
||||
@click.option('--analysis-dir', '-a', required=True, type=click.Path(exists=True),
|
||||
help='Analysis output directory containing *_investigate.sql files')
|
||||
@click.option('--config', '-c', required=True, type=click.Path(exists=True),
|
||||
help='Configuration file path')
|
||||
@click.option('--output-dir', '-o', default=None,
|
||||
help='Output directory for reports (overrides config setting)')
|
||||
@click.option('--verbose', '-v', is_flag=True, help='Enable verbose output')
|
||||
@click.option('--dry-run', is_flag=True, help='Show what would be executed without running')
|
||||
def investigate(analysis_dir, config, output_dir, verbose, dry_run):
|
||||
"""
|
||||
Execute investigation queries from regression analysis.
|
||||
|
||||
Processes all *_investigate.sql files in the analysis directory,
|
||||
executes queries on both baseline and target databases, and
|
||||
generates comprehensive reports.
|
||||
|
||||
Example:
|
||||
drt investigate -a /home/user/analysis/output_20251209_184032/ -c config.yaml
|
||||
"""
|
||||
# Load config first to get log directory
|
||||
from drt.config.loader import load_config
|
||||
cfg = load_config(config)
|
||||
|
||||
# Setup logging using config
|
||||
log_level = "DEBUG" if verbose else "INFO"
|
||||
log_dir = cfg.logging.directory
|
||||
setup_logging(log_level=log_level, log_dir=log_dir, log_to_file=not dry_run)
|
||||
|
||||
click.echo("=" * 60)
|
||||
click.echo("Data Regression Testing Framework - Investigation")
|
||||
click.echo("=" * 60)
|
||||
click.echo()
|
||||
|
||||
try:
|
||||
# Use output_dir from CLI if provided, otherwise use config
|
||||
if output_dir is None:
|
||||
output_dir = cfg.reporting.investigation_directory
|
||||
|
||||
click.echo(f"✓ Configuration loaded")
|
||||
click.echo(f" Database pairs: {len(cfg.database_pairs)}")
|
||||
click.echo()
|
||||
|
||||
# Convert paths
|
||||
analysis_path = Path(analysis_dir)
|
||||
output_path = Path(output_dir)
|
||||
|
||||
# Create output directory
|
||||
output_path.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
if dry_run:
|
||||
click.echo("=" * 60)
|
||||
click.echo("DRY RUN - Preview Only")
|
||||
click.echo("=" * 60)
|
||||
|
||||
# Discover SQL files
|
||||
from drt.services.sql_parser import discover_sql_files
|
||||
sql_files = discover_sql_files(analysis_path)
|
||||
|
||||
click.echo(f"\nAnalysis Directory: {analysis_path}")
|
||||
click.echo(f"Found {len(sql_files)} investigation SQL files")
|
||||
|
||||
if sql_files:
|
||||
click.echo("\nTables with investigation queries:")
|
||||
for schema, table, sql_path in sql_files[:10]: # Show first 10
|
||||
click.echo(f" • {schema}.{table}")
|
||||
|
||||
if len(sql_files) > 10:
|
||||
click.echo(f" ... and {len(sql_files) - 10} more")
|
||||
|
||||
for pair in cfg.database_pairs:
|
||||
if not pair.enabled:
|
||||
continue
|
||||
|
||||
click.echo(f"\nDatabase Pair: {pair.name}")
|
||||
click.echo(f" Baseline: {pair.baseline.server}.{pair.baseline.database}")
|
||||
click.echo(f" Target: {pair.target.server}.{pair.target.database}")
|
||||
|
||||
click.echo(f"\nReports would be saved to: {output_path}")
|
||||
click.echo("\n" + "=" * 60)
|
||||
click.echo("Use without --dry-run to execute investigation")
|
||||
click.echo("=" * 60)
|
||||
sys.exit(0)
|
||||
|
||||
# Execute investigation for each database pair
|
||||
all_summaries = []
|
||||
|
||||
for pair in cfg.database_pairs:
|
||||
if not pair.enabled:
|
||||
click.echo(f"Skipping disabled pair: {pair.name}")
|
||||
continue
|
||||
|
||||
click.echo(f"Investigating: {pair.name}")
|
||||
click.echo(f" Baseline: {pair.baseline.server}.{pair.baseline.database}")
|
||||
click.echo(f" Target: {pair.target.server}.{pair.target.database}")
|
||||
click.echo()
|
||||
|
||||
# Run investigation
|
||||
investigation_service = InvestigationService(cfg)
|
||||
summary = investigation_service.run_investigation(analysis_path, pair)
|
||||
all_summaries.append(summary)
|
||||
|
||||
click.echo()
|
||||
|
||||
# Generate reports for all summaries
|
||||
if all_summaries:
|
||||
click.echo("=" * 60)
|
||||
click.echo("Generating Reports")
|
||||
click.echo("=" * 60)
|
||||
|
||||
for summary in all_summaries:
|
||||
timestamp = get_timestamp()
|
||||
|
||||
# Generate HTML report
|
||||
html_gen = InvestigationHTMLReportGenerator(cfg)
|
||||
html_path = output_path / f"investigation_report_{timestamp}.html"
|
||||
html_gen.generate(summary, html_path)
|
||||
click.echo(f" ✓ HTML: {html_path}")
|
||||
|
||||
# Generate CSV report
|
||||
csv_gen = InvestigationCSVReportGenerator(cfg)
|
||||
csv_path = output_path / f"investigation_report_{timestamp}.csv"
|
||||
csv_gen.generate(summary, csv_path)
|
||||
click.echo(f" ✓ CSV: {csv_path}")
|
||||
|
||||
click.echo()
|
||||
|
||||
# Display final summary
|
||||
click.echo("=" * 60)
|
||||
click.echo("INVESTIGATION COMPLETE")
|
||||
click.echo("=" * 60)
|
||||
|
||||
total_processed = sum(s.tables_processed for s in all_summaries)
|
||||
total_successful = sum(s.tables_successful for s in all_summaries)
|
||||
total_partial = sum(s.tables_partial for s in all_summaries)
|
||||
total_failed = sum(s.tables_failed for s in all_summaries)
|
||||
total_queries = sum(s.total_queries_executed for s in all_summaries)
|
||||
|
||||
click.echo(f" Tables Processed: {total_processed:3d}")
|
||||
click.echo(f" Successful: {total_successful:3d}")
|
||||
click.echo(f" Partial: {total_partial:3d}")
|
||||
click.echo(f" Failed: {total_failed:3d}")
|
||||
click.echo(f" Total Queries: {total_queries:3d}")
|
||||
click.echo("=" * 60)
|
||||
|
||||
# Exit with appropriate code
|
||||
if total_failed > 0:
|
||||
click.echo("Status: COMPLETED WITH FAILURES ⚠️")
|
||||
sys.exit(1)
|
||||
elif total_partial > 0:
|
||||
click.echo("Status: COMPLETED WITH PARTIAL RESULTS ◐")
|
||||
sys.exit(0)
|
||||
else:
|
||||
click.echo("Status: SUCCESS ✓")
|
||||
sys.exit(0)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Investigation failed: {e}", exc_info=verbose)
|
||||
click.echo(f"✗ Error: {e}", err=True)
|
||||
sys.exit(2)
|
||||
92
src/drt/cli/commands/validate.py
Executable file
92
src/drt/cli/commands/validate.py
Executable file
@@ -0,0 +1,92 @@
|
||||
"""Validate command implementation."""
|
||||
|
||||
import click
|
||||
import sys
|
||||
from drt.config.loader import load_config
|
||||
from drt.config.validator import validate_config
|
||||
from drt.utils.logging import setup_logging, get_logger
|
||||
|
||||
logger = get_logger(__name__)
|
||||
|
||||
|
||||
@click.command()
|
||||
@click.option('--config', '-c', required=True, type=click.Path(exists=True), help='Configuration file path')
|
||||
@click.option('--verbose', '-v', is_flag=True, help='Enable verbose output')
|
||||
def validate(config, verbose):
|
||||
"""
|
||||
Validate configuration file without running comparison.
|
||||
|
||||
Checks configuration for completeness and correctness, reporting
|
||||
any errors or warnings.
|
||||
|
||||
Example:
|
||||
drt validate --config ./config.yaml
|
||||
"""
|
||||
# Setup logging
|
||||
log_level = "DEBUG" if verbose else "INFO"
|
||||
setup_logging(log_level=log_level, log_to_console=True, log_to_file=False)
|
||||
|
||||
click.echo("=" * 60)
|
||||
click.echo("Configuration Validation")
|
||||
click.echo("=" * 60)
|
||||
click.echo()
|
||||
|
||||
try:
|
||||
# Load configuration
|
||||
click.echo(f"Loading: {config}")
|
||||
cfg = load_config(config)
|
||||
click.echo("✓ YAML syntax valid")
|
||||
click.echo("✓ Configuration structure valid")
|
||||
click.echo()
|
||||
|
||||
# Validate configuration
|
||||
click.echo("Validating configuration...")
|
||||
is_valid, errors = validate_config(cfg)
|
||||
|
||||
if errors:
|
||||
click.echo()
|
||||
click.echo("Validation Errors:")
|
||||
for error in errors:
|
||||
click.echo(f" ✗ {error}", err=True)
|
||||
click.echo()
|
||||
|
||||
# Display configuration summary
|
||||
click.echo("=" * 60)
|
||||
click.echo("Configuration Summary")
|
||||
click.echo("=" * 60)
|
||||
click.echo(f" Database pairs: {len(cfg.database_pairs)}")
|
||||
click.echo(f" Tables configured: {len(cfg.tables)}")
|
||||
click.echo(f" Enabled tables: {sum(1 for t in cfg.tables if t.enabled)}")
|
||||
click.echo(f" Disabled tables: {sum(1 for t in cfg.tables if not t.enabled)}")
|
||||
click.echo()
|
||||
|
||||
# Check for tables not expected in target
|
||||
not_expected = sum(1 for t in cfg.tables if not t.expected_in_target)
|
||||
if not_expected > 0:
|
||||
click.echo(f" ⚠ {not_expected} table(s) marked as expected_in_target: false")
|
||||
|
||||
# Display database pairs
|
||||
click.echo()
|
||||
click.echo("Database Pairs:")
|
||||
for pair in cfg.database_pairs:
|
||||
status = "✓" if pair.enabled else "○"
|
||||
click.echo(f" {status} {pair.name}")
|
||||
click.echo(f" Baseline: {pair.baseline.server}.{pair.baseline.database}")
|
||||
click.echo(f" Target: {pair.target.server}.{pair.target.database}")
|
||||
|
||||
click.echo()
|
||||
click.echo("=" * 60)
|
||||
|
||||
if is_valid:
|
||||
click.echo("Configuration is VALID ✓")
|
||||
click.echo("=" * 60)
|
||||
sys.exit(0)
|
||||
else:
|
||||
click.echo("Configuration is INVALID ✗")
|
||||
click.echo("=" * 60)
|
||||
sys.exit(1)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Validation failed: {e}", exc_info=verbose)
|
||||
click.echo(f"✗ Error: {e}", err=True)
|
||||
sys.exit(2)
|
||||
52
src/drt/cli/main.py
Executable file
52
src/drt/cli/main.py
Executable file
@@ -0,0 +1,52 @@
|
||||
"""Main CLI entry point."""
|
||||
|
||||
import click
|
||||
import sys
|
||||
from drt import __version__
|
||||
from drt.cli.commands import discover, compare, validate, investigate
|
||||
from drt.utils.logging import setup_logging
|
||||
|
||||
|
||||
@click.group()
|
||||
@click.version_option(version=__version__, prog_name="drt")
|
||||
@click.option('--verbose', '-v', is_flag=True, help='Enable verbose output')
|
||||
@click.pass_context
|
||||
def cli(ctx, verbose):
|
||||
"""
|
||||
Data Regression Testing Framework
|
||||
|
||||
A comprehensive framework for validating data integrity during code migration
|
||||
and system updates by comparing data outputs between Baseline (Production)
|
||||
and Target (Test) SQL Server databases.
|
||||
"""
|
||||
ctx.ensure_object(dict)
|
||||
ctx.obj['verbose'] = verbose
|
||||
|
||||
# Setup logging
|
||||
log_level = "DEBUG" if verbose else "INFO"
|
||||
setup_logging(log_level=log_level, log_to_console=True, log_to_file=False)
|
||||
|
||||
|
||||
@cli.command()
|
||||
def version():
|
||||
"""Display version information."""
|
||||
import platform
|
||||
|
||||
click.echo("=" * 60)
|
||||
click.echo("Data Regression Testing Framework")
|
||||
click.echo("=" * 60)
|
||||
click.echo(f"Version: {__version__}")
|
||||
click.echo(f"Python: {platform.python_version()}")
|
||||
click.echo(f"Platform: {platform.platform()}")
|
||||
click.echo("=" * 60)
|
||||
|
||||
|
||||
# Register commands
|
||||
cli.add_command(discover.discover)
|
||||
cli.add_command(compare.compare)
|
||||
cli.add_command(validate.validate)
|
||||
cli.add_command(investigate.investigate)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
cli()
|
||||
7
src/drt/config/__init__.py
Executable file
7
src/drt/config/__init__.py
Executable file
@@ -0,0 +1,7 @@
|
||||
"""Configuration management for the framework."""
|
||||
|
||||
from drt.config.loader import load_config
|
||||
from drt.config.validator import validate_config
|
||||
from drt.config.models import Config
|
||||
|
||||
__all__ = ["load_config", "validate_config", "Config"]
|
||||
84
src/drt/config/loader.py
Executable file
84
src/drt/config/loader.py
Executable file
@@ -0,0 +1,84 @@
|
||||
"""Configuration file loader."""
|
||||
|
||||
import yaml
|
||||
from pathlib import Path
|
||||
from typing import Union
|
||||
from drt.config.models import Config
|
||||
from drt.utils.logging import get_logger
|
||||
|
||||
logger = get_logger(__name__)
|
||||
|
||||
|
||||
def load_config(config_path: Union[str, Path]) -> Config:
|
||||
"""
|
||||
Load configuration from YAML file.
|
||||
|
||||
Args:
|
||||
config_path: Path to configuration file
|
||||
|
||||
Returns:
|
||||
Parsed configuration object
|
||||
|
||||
Raises:
|
||||
FileNotFoundError: If config file doesn't exist
|
||||
yaml.YAMLError: If YAML is invalid
|
||||
ValueError: If configuration is invalid
|
||||
"""
|
||||
config_path = Path(config_path)
|
||||
|
||||
if not config_path.exists():
|
||||
raise FileNotFoundError(f"Configuration file not found: {config_path}")
|
||||
|
||||
logger.info(f"Loading configuration from: {config_path}")
|
||||
|
||||
try:
|
||||
with open(config_path, "r", encoding="utf-8") as f:
|
||||
config_data = yaml.safe_load(f)
|
||||
|
||||
if not config_data:
|
||||
raise ValueError("Configuration file is empty")
|
||||
|
||||
# Parse with Pydantic
|
||||
config = Config(**config_data)
|
||||
|
||||
logger.info(f"Configuration loaded successfully")
|
||||
logger.info(f" Database pairs: {len(config.database_pairs)}")
|
||||
logger.info(f" Tables configured: {len(config.tables)}")
|
||||
|
||||
return config
|
||||
|
||||
except yaml.YAMLError as e:
|
||||
logger.error(f"YAML parsing error: {e}")
|
||||
raise
|
||||
except Exception as e:
|
||||
logger.error(f"Configuration loading error: {e}")
|
||||
raise
|
||||
|
||||
|
||||
def save_config(config: Config, output_path: Union[str, Path]) -> None:
|
||||
"""
|
||||
Save configuration to YAML file.
|
||||
|
||||
Args:
|
||||
config: Configuration object to save
|
||||
output_path: Path where to save the configuration
|
||||
"""
|
||||
output_path = Path(output_path)
|
||||
output_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
logger.info(f"Saving configuration to: {output_path}")
|
||||
|
||||
# Convert to dict and save as YAML
|
||||
config_dict = config.model_dump(exclude_none=True)
|
||||
|
||||
with open(output_path, "w", encoding="utf-8") as f:
|
||||
yaml.dump(
|
||||
config_dict,
|
||||
f,
|
||||
default_flow_style=False,
|
||||
sort_keys=False,
|
||||
allow_unicode=True,
|
||||
width=100,
|
||||
)
|
||||
|
||||
logger.info(f"Configuration saved successfully")
|
||||
199
src/drt/config/models.py
Executable file
199
src/drt/config/models.py
Executable file
@@ -0,0 +1,199 @@
|
||||
"""Pydantic models for configuration."""
|
||||
|
||||
from typing import List, Optional, Dict, Any
|
||||
from pydantic import BaseModel, Field, field_validator
|
||||
|
||||
|
||||
class ConnectionConfig(BaseModel):
|
||||
"""Database connection configuration."""
|
||||
server: str
|
||||
database: str
|
||||
username: Optional[str] = None
|
||||
password: Optional[str] = None
|
||||
timeout: Dict[str, int] = Field(default_factory=lambda: {"connection": 30, "query": 300})
|
||||
|
||||
|
||||
class DatabasePairConfig(BaseModel):
|
||||
"""Configuration for a database pair to compare."""
|
||||
name: str
|
||||
enabled: bool = True
|
||||
baseline: ConnectionConfig
|
||||
target: ConnectionConfig
|
||||
|
||||
|
||||
class RowCountConfig(BaseModel):
|
||||
"""Row count comparison configuration."""
|
||||
enabled: bool = True
|
||||
tolerance_percent: float = 0.0
|
||||
|
||||
|
||||
class SchemaConfig(BaseModel):
|
||||
"""Schema comparison configuration."""
|
||||
enabled: bool = True
|
||||
checks: Dict[str, bool] = Field(default_factory=lambda: {
|
||||
"column_names": True,
|
||||
"data_types": True,
|
||||
"nullability": False,
|
||||
"column_order": False
|
||||
})
|
||||
severity: Dict[str, str] = Field(default_factory=lambda: {
|
||||
"missing_column_in_target": "FAIL",
|
||||
"extra_column_in_target": "WARNING",
|
||||
"data_type_mismatch": "WARNING"
|
||||
})
|
||||
|
||||
|
||||
class AggregatesConfig(BaseModel):
|
||||
"""Aggregate comparison configuration."""
|
||||
enabled: bool = True
|
||||
tolerance_percent: float = 0.01
|
||||
large_table_threshold: int = 10000000
|
||||
sample_size: int = 100000
|
||||
|
||||
|
||||
class TableExistenceConfig(BaseModel):
|
||||
"""Table existence check configuration."""
|
||||
missing_table_default: str = "FAIL"
|
||||
extra_table_action: str = "INFO"
|
||||
|
||||
|
||||
class ComparisonConfig(BaseModel):
|
||||
"""Comparison settings."""
|
||||
mode: str = "health_check"
|
||||
row_count: RowCountConfig = Field(default_factory=RowCountConfig)
|
||||
schema_config: SchemaConfig = Field(default_factory=SchemaConfig, alias="schema")
|
||||
aggregates: AggregatesConfig = Field(default_factory=AggregatesConfig)
|
||||
table_existence: TableExistenceConfig = Field(default_factory=TableExistenceConfig)
|
||||
|
||||
@property
|
||||
def schema(self) -> SchemaConfig:
|
||||
"""Return schema config for backward compatibility."""
|
||||
return self.schema_config
|
||||
|
||||
class Config:
|
||||
populate_by_name = True
|
||||
|
||||
|
||||
class ExecutionConfig(BaseModel):
|
||||
"""Execution settings."""
|
||||
continue_on_error: bool = True
|
||||
retry: Dict[str, int] = Field(default_factory=lambda: {"attempts": 3, "delay_seconds": 5})
|
||||
|
||||
|
||||
class TableFilterConfig(BaseModel):
|
||||
"""Table filtering configuration."""
|
||||
mode: str = "all"
|
||||
include_list: List[Dict[str, str]] = Field(default_factory=list)
|
||||
exclude_patterns: List[str] = Field(default_factory=lambda: [
|
||||
"*_TEMP", "*_TMP", "*_BAK", "*_BACKUP", "*_OLD", "tmp*", "temp*", "#*"
|
||||
])
|
||||
exclude_schemas: List[str] = Field(default_factory=lambda: [
|
||||
"sys", "INFORMATION_SCHEMA", "guest"
|
||||
])
|
||||
|
||||
|
||||
class TableConfig(BaseModel):
|
||||
"""Individual table configuration."""
|
||||
schema_name: str = Field(..., alias="schema")
|
||||
name: str
|
||||
enabled: bool = True
|
||||
expected_in_target: bool = True
|
||||
estimated_row_count: int = 0
|
||||
primary_key_columns: List[str] = Field(default_factory=list)
|
||||
aggregate_columns: List[str] = Field(default_factory=list)
|
||||
notes: str = ""
|
||||
|
||||
@property
|
||||
def schema(self) -> str:
|
||||
"""Return schema name for backward compatibility."""
|
||||
return self.schema_name
|
||||
|
||||
class Config:
|
||||
populate_by_name = True
|
||||
|
||||
|
||||
class ReportingConfig(BaseModel):
|
||||
"""Reporting configuration."""
|
||||
output_directory: str = "./reports"
|
||||
investigation_directory: str = "./investigation_reports"
|
||||
formats: List[str] = Field(default_factory=lambda: ["html", "csv"])
|
||||
filename_template: str = "regression_report_{timestamp}"
|
||||
html: Dict[str, Any] = Field(default_factory=lambda: {
|
||||
"embed_styles": True,
|
||||
"include_charts": True,
|
||||
"colors": {
|
||||
"pass": "#28a745",
|
||||
"fail": "#dc3545",
|
||||
"warning": "#ffc107",
|
||||
"error": "#6f42c1",
|
||||
"info": "#17a2b8",
|
||||
"skip": "#6c757d"
|
||||
}
|
||||
})
|
||||
csv: Dict[str, Any] = Field(default_factory=lambda: {
|
||||
"delimiter": ",",
|
||||
"include_header": True,
|
||||
"encoding": "utf-8-sig"
|
||||
})
|
||||
pdf: Dict[str, str] = Field(default_factory=lambda: {
|
||||
"page_size": "A4",
|
||||
"orientation": "landscape"
|
||||
})
|
||||
|
||||
|
||||
class LoggingConfig(BaseModel):
|
||||
"""Logging configuration."""
|
||||
level: str = "INFO"
|
||||
directory: str = "./logs"
|
||||
filename_template: str = "drt_{timestamp}.log"
|
||||
console: bool = True
|
||||
format: str = "%(asctime)s | %(levelname)-8s | %(name)-20s | %(message)s"
|
||||
date_format: str = "%Y%m%d_%H%M%S"
|
||||
|
||||
|
||||
class DiscoveryConfig(BaseModel):
|
||||
"""Discovery settings."""
|
||||
output_file: str = "./config_discovered.yaml"
|
||||
analysis_directory: str = "./analysis"
|
||||
include_schemas: List[str] = Field(default_factory=list)
|
||||
exclude_schemas: List[str] = Field(default_factory=lambda: [
|
||||
"sys", "INFORMATION_SCHEMA", "guest"
|
||||
])
|
||||
exclude_patterns: List[str] = Field(default_factory=lambda: [
|
||||
"*_TEMP", "*_TMP", "*_BAK", "#*"
|
||||
])
|
||||
include_row_counts: bool = True
|
||||
include_column_details: bool = True
|
||||
detect_numeric_columns: bool = True
|
||||
detect_primary_keys: bool = True
|
||||
default_expected_in_target: bool = True
|
||||
|
||||
|
||||
class MetadataConfig(BaseModel):
|
||||
"""Configuration metadata."""
|
||||
config_version: str = "1.0"
|
||||
generated_date: Optional[str] = None
|
||||
generated_by: Optional[str] = None
|
||||
framework_version: str = "1.0.0"
|
||||
|
||||
|
||||
class Config(BaseModel):
|
||||
"""Main configuration model."""
|
||||
metadata: MetadataConfig = Field(default_factory=MetadataConfig)
|
||||
connections: Dict[str, ConnectionConfig] = Field(default_factory=dict)
|
||||
database_pairs: List[DatabasePairConfig] = Field(default_factory=list)
|
||||
comparison: ComparisonConfig = Field(default_factory=ComparisonConfig)
|
||||
execution: ExecutionConfig = Field(default_factory=ExecutionConfig)
|
||||
table_filters: TableFilterConfig = Field(default_factory=TableFilterConfig)
|
||||
tables: List[TableConfig] = Field(default_factory=list)
|
||||
reporting: ReportingConfig = Field(default_factory=ReportingConfig)
|
||||
logging: LoggingConfig = Field(default_factory=LoggingConfig)
|
||||
discovery: DiscoveryConfig = Field(default_factory=DiscoveryConfig)
|
||||
|
||||
@field_validator('database_pairs')
|
||||
@classmethod
|
||||
def validate_database_pairs(cls, v):
|
||||
"""Ensure at least one database pair is configured."""
|
||||
if not v:
|
||||
raise ValueError("At least one database pair must be configured")
|
||||
return v
|
||||
79
src/drt/config/validator.py
Executable file
79
src/drt/config/validator.py
Executable file
@@ -0,0 +1,79 @@
|
||||
"""Configuration validator."""
|
||||
|
||||
from typing import List, Tuple
|
||||
from drt.config.models import Config
|
||||
from drt.utils.logging import get_logger
|
||||
|
||||
logger = get_logger(__name__)
|
||||
|
||||
|
||||
def validate_config(config: Config) -> Tuple[bool, List[str]]:
|
||||
"""
|
||||
Validate configuration for completeness and correctness.
|
||||
|
||||
Args:
|
||||
config: Configuration to validate
|
||||
|
||||
Returns:
|
||||
Tuple of (is_valid, list_of_errors)
|
||||
"""
|
||||
errors = []
|
||||
warnings = []
|
||||
|
||||
# Check database pairs
|
||||
if not config.database_pairs:
|
||||
errors.append("No database pairs configured")
|
||||
|
||||
for pair in config.database_pairs:
|
||||
if not pair.baseline.server or not pair.baseline.database:
|
||||
errors.append(f"Database pair '{pair.name}': Baseline connection incomplete")
|
||||
if not pair.target.server or not pair.target.database:
|
||||
errors.append(f"Database pair '{pair.name}': Target connection incomplete")
|
||||
|
||||
# Check comparison mode
|
||||
valid_modes = ["health_check", "detailed"]
|
||||
if config.comparison.mode not in valid_modes:
|
||||
errors.append(f"Invalid comparison mode: {config.comparison.mode}. Must be one of {valid_modes}")
|
||||
|
||||
# Check table configuration
|
||||
if config.table_filters.mode == "include_list" and not config.table_filters.include_list:
|
||||
warnings.append("Table filter mode is 'include_list' but include_list is empty")
|
||||
|
||||
# Check for tables marked as not expected in target
|
||||
not_expected_count = sum(1 for t in config.tables if not t.expected_in_target)
|
||||
if not_expected_count > 0:
|
||||
warnings.append(f"{not_expected_count} table(s) marked as expected_in_target: false")
|
||||
|
||||
# Check for disabled tables
|
||||
disabled_count = sum(1 for t in config.tables if not t.enabled)
|
||||
if disabled_count > 0:
|
||||
warnings.append(f"{disabled_count} table(s) disabled (enabled: false)")
|
||||
|
||||
# Check reporting formats
|
||||
valid_formats = ["html", "csv", "pdf"]
|
||||
for fmt in config.reporting.formats:
|
||||
if fmt not in valid_formats:
|
||||
errors.append(f"Invalid report format: {fmt}. Must be one of {valid_formats}")
|
||||
|
||||
# Check logging level
|
||||
valid_levels = ["DEBUG", "INFO", "WARNING", "ERROR"]
|
||||
if config.logging.level.upper() not in valid_levels:
|
||||
errors.append(f"Invalid logging level: {config.logging.level}. Must be one of {valid_levels}")
|
||||
|
||||
# Log results
|
||||
if errors:
|
||||
logger.error(f"Configuration validation failed with {len(errors)} error(s)")
|
||||
for error in errors:
|
||||
logger.error(f" ❌ {error}")
|
||||
|
||||
if warnings:
|
||||
logger.warning(f"Configuration has {len(warnings)} warning(s)")
|
||||
for warning in warnings:
|
||||
logger.warning(f" ⚠️ {warning}")
|
||||
|
||||
if not errors and not warnings:
|
||||
logger.info("✓ Configuration is valid")
|
||||
elif not errors:
|
||||
logger.info("✓ Configuration is valid (with warnings)")
|
||||
|
||||
return len(errors) == 0, errors
|
||||
7
src/drt/database/__init__.py
Executable file
7
src/drt/database/__init__.py
Executable file
@@ -0,0 +1,7 @@
|
||||
"""Database access layer."""
|
||||
|
||||
from drt.database.connection import ConnectionManager
|
||||
from drt.database.executor import QueryExecutor
|
||||
from drt.database.queries import SQLQueries
|
||||
|
||||
__all__ = ["ConnectionManager", "QueryExecutor", "SQLQueries"]
|
||||
176
src/drt/database/connection.py
Executable file
176
src/drt/database/connection.py
Executable file
@@ -0,0 +1,176 @@
|
||||
"""Database connection management."""
|
||||
|
||||
import pyodbc
|
||||
import platform
|
||||
from typing import Optional
|
||||
from contextlib import contextmanager
|
||||
from drt.config.models import ConnectionConfig
|
||||
from drt.utils.logging import get_logger
|
||||
|
||||
logger = get_logger(__name__)
|
||||
|
||||
|
||||
def get_odbc_driver() -> str:
|
||||
"""
|
||||
Detect available ODBC driver for SQL Server.
|
||||
|
||||
Returns:
|
||||
ODBC driver name
|
||||
"""
|
||||
# Get list of available drivers
|
||||
drivers = [driver for driver in pyodbc.drivers() if 'SQL Server' in driver]
|
||||
|
||||
# Prefer newer drivers
|
||||
preferred_order = [
|
||||
'ODBC Driver 18 for SQL Server',
|
||||
'ODBC Driver 17 for SQL Server',
|
||||
'ODBC Driver 13 for SQL Server',
|
||||
'SQL Server Native Client 11.0',
|
||||
'SQL Server'
|
||||
]
|
||||
|
||||
for preferred in preferred_order:
|
||||
if preferred in drivers:
|
||||
logger.debug(f"Using ODBC driver: {preferred}")
|
||||
return preferred
|
||||
|
||||
# Fallback to first available
|
||||
if drivers:
|
||||
logger.warning(f"Using fallback driver: {drivers[0]}")
|
||||
return drivers[0]
|
||||
|
||||
# Default fallback
|
||||
logger.warning("No SQL Server ODBC driver found, using default")
|
||||
return 'ODBC Driver 17 for SQL Server'
|
||||
|
||||
|
||||
class ConnectionManager:
|
||||
"""Manages database connections using Windows Authentication."""
|
||||
|
||||
def __init__(self, config: ConnectionConfig):
|
||||
"""
|
||||
Initialize connection manager.
|
||||
|
||||
Args:
|
||||
config: Connection configuration
|
||||
"""
|
||||
self.config = config
|
||||
self._connection: Optional[pyodbc.Connection] = None
|
||||
|
||||
def connect(self) -> pyodbc.Connection:
|
||||
"""
|
||||
Establish database connection using Windows or SQL Authentication.
|
||||
|
||||
Returns:
|
||||
Database connection
|
||||
|
||||
Raises:
|
||||
pyodbc.Error: If connection fails
|
||||
"""
|
||||
if self._connection and not self._connection.closed:
|
||||
return self._connection
|
||||
|
||||
try:
|
||||
# Detect available ODBC driver
|
||||
driver = get_odbc_driver()
|
||||
|
||||
# Build connection string
|
||||
conn_str_parts = [
|
||||
f"DRIVER={{{driver}}}",
|
||||
f"SERVER={self.config.server}",
|
||||
f"DATABASE={self.config.database}",
|
||||
f"Connection Timeout={self.config.timeout.get('connection', 30)}"
|
||||
]
|
||||
|
||||
# Check if username/password are provided for SQL Authentication
|
||||
if hasattr(self.config, 'username') and self.config.username:
|
||||
conn_str_parts.append(f"UID={self.config.username}")
|
||||
conn_str_parts.append(f"PWD={self.config.password}")
|
||||
auth_type = "SQL Authentication"
|
||||
else:
|
||||
# Use Windows Authentication
|
||||
conn_str_parts.append("Trusted_Connection=yes")
|
||||
auth_type = "Windows Authentication"
|
||||
|
||||
# Add TrustServerCertificate on Linux for self-signed certs
|
||||
if platform.system() != 'Windows':
|
||||
conn_str_parts.append("TrustServerCertificate=yes")
|
||||
|
||||
conn_str = ";".join(conn_str_parts) + ";"
|
||||
|
||||
logger.info(f"Connecting to {self.config.server}.{self.config.database}")
|
||||
logger.debug(f"Connection string: {conn_str.replace(self.config.server, 'SERVER').replace(self.config.password if hasattr(self.config, 'password') and self.config.password else '', '***')}")
|
||||
self._connection = pyodbc.connect(conn_str)
|
||||
|
||||
# Set query timeout
|
||||
query_timeout = self.config.timeout.get('query', 300)
|
||||
self._connection.timeout = query_timeout
|
||||
|
||||
logger.info(f"✓ Connected ({auth_type})")
|
||||
return self._connection
|
||||
|
||||
except pyodbc.Error as e:
|
||||
logger.error(f"Connection failed: {e}")
|
||||
raise
|
||||
|
||||
def disconnect(self) -> None:
|
||||
"""Close database connection."""
|
||||
if self._connection and not self._connection.closed:
|
||||
self._connection.close()
|
||||
logger.info("Connection closed")
|
||||
self._connection = None
|
||||
|
||||
@contextmanager
|
||||
def get_connection(self):
|
||||
"""
|
||||
Context manager for database connections.
|
||||
|
||||
Yields:
|
||||
Database connection
|
||||
|
||||
Example:
|
||||
with conn_mgr.get_connection() as conn:
|
||||
cursor = conn.cursor()
|
||||
cursor.execute("SELECT 1")
|
||||
"""
|
||||
conn = self.connect()
|
||||
try:
|
||||
yield conn
|
||||
finally:
|
||||
# Don't close connection here - reuse it
|
||||
pass
|
||||
|
||||
def test_connection(self) -> bool:
|
||||
"""
|
||||
Test database connectivity.
|
||||
|
||||
Returns:
|
||||
True if connection successful, False otherwise
|
||||
"""
|
||||
try:
|
||||
with self.get_connection() as conn:
|
||||
cursor = conn.cursor()
|
||||
cursor.execute("SELECT 1")
|
||||
cursor.fetchone()
|
||||
return True
|
||||
except Exception as e:
|
||||
logger.error(f"Connection test failed: {e}")
|
||||
return False
|
||||
|
||||
@property
|
||||
def is_connected(self) -> bool:
|
||||
"""Check if connection is active."""
|
||||
return self._connection is not None and not self._connection.closed
|
||||
|
||||
def __enter__(self):
|
||||
"""Context manager entry."""
|
||||
self.connect()
|
||||
return self
|
||||
|
||||
def __exit__(self, exc_type, exc_val, exc_tb):
|
||||
"""Context manager exit."""
|
||||
self.disconnect()
|
||||
|
||||
def __del__(self):
|
||||
"""Cleanup on deletion."""
|
||||
self.disconnect()
|
||||
267
src/drt/database/executor.py
Executable file
267
src/drt/database/executor.py
Executable file
@@ -0,0 +1,267 @@
|
||||
"""Query executor for READ ONLY database operations."""
|
||||
|
||||
import pandas as pd
|
||||
import time
|
||||
from typing import Any, Dict, List, Optional, Tuple
|
||||
from drt.database.connection import ConnectionManager
|
||||
from drt.database.queries import SQLQueries
|
||||
from drt.models.enums import Status
|
||||
from drt.utils.logging import get_logger
|
||||
|
||||
logger = get_logger(__name__)
|
||||
|
||||
|
||||
class QueryExecutor:
|
||||
"""Executes READ ONLY queries against the database."""
|
||||
|
||||
def __init__(self, connection_manager: ConnectionManager):
|
||||
"""
|
||||
Initialize query executor.
|
||||
|
||||
Args:
|
||||
connection_manager: Connection manager instance
|
||||
"""
|
||||
self.conn_mgr = connection_manager
|
||||
|
||||
def execute_query(self, query: str, params: tuple = None) -> pd.DataFrame:
|
||||
"""
|
||||
Execute a SELECT query and return results as DataFrame.
|
||||
|
||||
Args:
|
||||
query: SQL query string (SELECT only)
|
||||
params: Query parameters
|
||||
|
||||
Returns:
|
||||
Query results as pandas DataFrame
|
||||
|
||||
Raises:
|
||||
ValueError: If query is not a SELECT statement
|
||||
Exception: If query execution fails
|
||||
"""
|
||||
# Safety check - only allow SELECT queries
|
||||
query_upper = query.strip().upper()
|
||||
if not query_upper.startswith('SELECT'):
|
||||
raise ValueError("Only SELECT queries are allowed (READ ONLY)")
|
||||
|
||||
try:
|
||||
with self.conn_mgr.get_connection() as conn:
|
||||
if params:
|
||||
df = pd.read_sql(query, conn, params=params)
|
||||
else:
|
||||
df = pd.read_sql(query, conn)
|
||||
return df
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Query execution failed: {e}")
|
||||
logger.debug(f"Query: {query}")
|
||||
raise
|
||||
|
||||
def execute_scalar(self, query: str, params: tuple = None) -> Any:
|
||||
"""
|
||||
Execute query and return single scalar value.
|
||||
|
||||
Args:
|
||||
query: SQL query string
|
||||
params: Query parameters
|
||||
|
||||
Returns:
|
||||
Single scalar value
|
||||
"""
|
||||
df = self.execute_query(query, params)
|
||||
if df.empty:
|
||||
return None
|
||||
return df.iloc[0, 0]
|
||||
|
||||
def get_row_count(self, schema: str, table: str) -> int:
|
||||
"""
|
||||
Get row count for a table.
|
||||
|
||||
Args:
|
||||
schema: Schema name
|
||||
table: Table name
|
||||
|
||||
Returns:
|
||||
Row count
|
||||
"""
|
||||
query = SQLQueries.build_row_count_query(schema, table)
|
||||
count = self.execute_scalar(query)
|
||||
return int(count) if count is not None else 0
|
||||
|
||||
def table_exists(self, schema: str, table: str) -> bool:
|
||||
"""
|
||||
Check if table exists.
|
||||
|
||||
Args:
|
||||
schema: Schema name
|
||||
table: Table name
|
||||
|
||||
Returns:
|
||||
True if table exists, False otherwise
|
||||
"""
|
||||
count = self.execute_scalar(SQLQueries.CHECK_TABLE_EXISTS, (schema, table))
|
||||
return int(count) > 0 if count is not None else False
|
||||
|
||||
def get_all_tables(self) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
Get list of all user tables in the database.
|
||||
|
||||
Returns:
|
||||
List of table information dictionaries
|
||||
"""
|
||||
df = self.execute_query(SQLQueries.GET_ALL_TABLES)
|
||||
return df.to_dict('records')
|
||||
|
||||
def get_columns(self, schema: str, table: str) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
Get column information for a table.
|
||||
|
||||
Args:
|
||||
schema: Schema name
|
||||
table: Table name
|
||||
|
||||
Returns:
|
||||
List of column information dictionaries
|
||||
"""
|
||||
df = self.execute_query(SQLQueries.GET_COLUMNS, (schema, table))
|
||||
return df.to_dict('records')
|
||||
|
||||
def get_primary_keys(self, schema: str, table: str) -> List[str]:
|
||||
"""
|
||||
Get primary key columns for a table.
|
||||
|
||||
Args:
|
||||
schema: Schema name
|
||||
table: Table name
|
||||
|
||||
Returns:
|
||||
List of primary key column names
|
||||
"""
|
||||
# Diagnostic: Check what columns are available in CONSTRAINT_COLUMN_USAGE
|
||||
try:
|
||||
logger.debug("Checking CONSTRAINT_COLUMN_USAGE schema...")
|
||||
constraint_cols_df = self.execute_query(SQLQueries.GET_CONSTRAINT_COLUMNS_SCHEMA)
|
||||
logger.debug(f"CONSTRAINT_COLUMN_USAGE columns: {constraint_cols_df['COLUMN_NAME'].tolist()}")
|
||||
except Exception as e:
|
||||
logger.debug(f"Could not query CONSTRAINT_COLUMN_USAGE schema: {e}")
|
||||
|
||||
# Diagnostic: Check what columns are available in KEY_COLUMN_USAGE
|
||||
try:
|
||||
logger.debug("Checking KEY_COLUMN_USAGE schema...")
|
||||
key_cols_df = self.execute_query(SQLQueries.GET_KEY_COLUMNS_SCHEMA)
|
||||
logger.debug(f"KEY_COLUMN_USAGE columns: {key_cols_df['COLUMN_NAME'].tolist()}")
|
||||
except Exception as e:
|
||||
logger.debug(f"Could not query KEY_COLUMN_USAGE schema: {e}")
|
||||
|
||||
df = self.execute_query(SQLQueries.GET_PRIMARY_KEYS, (schema, table))
|
||||
return df['COLUMN_NAME'].tolist() if not df.empty else []
|
||||
|
||||
def get_aggregate_sums(self, schema: str, table: str, columns: List[str]) -> Dict[str, float]:
|
||||
"""
|
||||
Get aggregate sums for numeric columns.
|
||||
|
||||
Args:
|
||||
schema: Schema name
|
||||
table: Table name
|
||||
columns: List of column names to aggregate
|
||||
|
||||
Returns:
|
||||
Dictionary mapping column names to their sums
|
||||
"""
|
||||
if not columns:
|
||||
return {}
|
||||
|
||||
query = SQLQueries.build_aggregate_query(schema, table, columns)
|
||||
if not query:
|
||||
return {}
|
||||
|
||||
df = self.execute_query(query)
|
||||
if df.empty:
|
||||
return {col: 0.0 for col in columns}
|
||||
|
||||
# Extract results
|
||||
results = {}
|
||||
for col in columns:
|
||||
sum_col = f"{col}_sum"
|
||||
if sum_col in df.columns:
|
||||
value = df.iloc[0][sum_col]
|
||||
results[col] = float(value) if pd.notna(value) else 0.0
|
||||
else:
|
||||
results[col] = 0.0
|
||||
|
||||
return results
|
||||
|
||||
def execute_investigation_query(
|
||||
self,
|
||||
query: str,
|
||||
timeout: Optional[int] = None
|
||||
) -> Tuple[Status, Optional[pd.DataFrame], Optional[str], int]:
|
||||
"""
|
||||
Execute investigation query with comprehensive error handling.
|
||||
|
||||
This method is specifically for investigation queries and does NOT
|
||||
enforce the SELECT-only restriction. It handles errors gracefully
|
||||
and returns detailed status information.
|
||||
|
||||
Args:
|
||||
query: SQL query to execute
|
||||
timeout: Query timeout in seconds (optional)
|
||||
|
||||
Returns:
|
||||
Tuple of (status, result_df, error_message, execution_time_ms)
|
||||
"""
|
||||
start_time = time.time()
|
||||
|
||||
try:
|
||||
# Execute query
|
||||
with self.conn_mgr.get_connection() as conn:
|
||||
if timeout:
|
||||
# Set query timeout if supported
|
||||
try:
|
||||
cursor = conn.cursor()
|
||||
cursor.execute(f"SET QUERY_TIMEOUT {timeout}")
|
||||
except Exception:
|
||||
# Timeout setting not supported, continue anyway
|
||||
pass
|
||||
|
||||
df = pd.read_sql(query, conn)
|
||||
|
||||
execution_time = int((time.time() - start_time) * 1000)
|
||||
|
||||
return (Status.PASS, df, None, execution_time)
|
||||
|
||||
except Exception as e:
|
||||
execution_time = int((time.time() - start_time) * 1000)
|
||||
error_msg = str(e)
|
||||
error_type = type(e).__name__
|
||||
|
||||
# Categorize error
|
||||
if any(phrase in error_msg.lower() for phrase in [
|
||||
'does not exist',
|
||||
'invalid object name',
|
||||
'could not find',
|
||||
'not found'
|
||||
]):
|
||||
status = Status.SKIP
|
||||
message = f"Object not found: {error_msg}"
|
||||
|
||||
elif 'timeout' in error_msg.lower():
|
||||
status = Status.FAIL
|
||||
message = f"Query timeout: {error_msg}"
|
||||
|
||||
elif any(phrase in error_msg.lower() for phrase in [
|
||||
'syntax error',
|
||||
'incorrect syntax'
|
||||
]):
|
||||
status = Status.FAIL
|
||||
message = f"Syntax error: {error_msg}"
|
||||
|
||||
elif 'permission' in error_msg.lower():
|
||||
status = Status.FAIL
|
||||
message = f"Permission denied: {error_msg}"
|
||||
|
||||
else:
|
||||
status = Status.FAIL
|
||||
message = f"{error_type}: {error_msg}"
|
||||
|
||||
logger.debug(f"Query execution failed: {message}")
|
||||
return (status, None, message, execution_time)
|
||||
128
src/drt/database/queries.py
Executable file
128
src/drt/database/queries.py
Executable file
@@ -0,0 +1,128 @@
|
||||
"""SQL query templates for database operations."""
|
||||
|
||||
|
||||
class SQLQueries:
|
||||
"""Collection of SQL query templates (READ ONLY)."""
|
||||
|
||||
# Table discovery queries
|
||||
GET_ALL_TABLES = """
|
||||
SELECT
|
||||
s.name AS schema_name,
|
||||
t.name AS table_name,
|
||||
SUM(p.rows) AS estimated_rows
|
||||
FROM sys.tables t WITH (NOLOCK)
|
||||
INNER JOIN sys.schemas s WITH (NOLOCK) ON t.schema_id = s.schema_id
|
||||
INNER JOIN sys.partitions p WITH (NOLOCK) ON t.object_id = p.object_id
|
||||
WHERE t.type = 'U'
|
||||
AND p.index_id IN (0, 1)
|
||||
GROUP BY s.name, t.name
|
||||
ORDER BY s.name, t.name
|
||||
"""
|
||||
|
||||
GET_COLUMNS = """
|
||||
SELECT
|
||||
COLUMN_NAME,
|
||||
DATA_TYPE,
|
||||
CHARACTER_MAXIMUM_LENGTH,
|
||||
NUMERIC_PRECISION,
|
||||
NUMERIC_SCALE,
|
||||
IS_NULLABLE,
|
||||
ORDINAL_POSITION
|
||||
FROM INFORMATION_SCHEMA.COLUMNS WITH (NOLOCK)
|
||||
WHERE TABLE_SCHEMA = ?
|
||||
AND TABLE_NAME = ?
|
||||
ORDER BY ORDINAL_POSITION
|
||||
"""
|
||||
|
||||
# Diagnostic query to check available columns in CONSTRAINT_COLUMN_USAGE
|
||||
GET_CONSTRAINT_COLUMNS_SCHEMA = """
|
||||
SELECT COLUMN_NAME
|
||||
FROM INFORMATION_SCHEMA.COLUMNS WITH (NOLOCK)
|
||||
WHERE TABLE_SCHEMA = 'INFORMATION_SCHEMA'
|
||||
AND TABLE_NAME = 'CONSTRAINT_COLUMN_USAGE'
|
||||
ORDER BY ORDINAL_POSITION
|
||||
"""
|
||||
|
||||
# Diagnostic query to check available columns in KEY_COLUMN_USAGE
|
||||
GET_KEY_COLUMNS_SCHEMA = """
|
||||
SELECT COLUMN_NAME
|
||||
FROM INFORMATION_SCHEMA.COLUMNS WITH (NOLOCK)
|
||||
WHERE TABLE_SCHEMA = 'INFORMATION_SCHEMA'
|
||||
AND TABLE_NAME = 'KEY_COLUMN_USAGE'
|
||||
ORDER BY ORDINAL_POSITION
|
||||
"""
|
||||
|
||||
GET_PRIMARY_KEYS = """
|
||||
SELECT
|
||||
c.COLUMN_NAME
|
||||
FROM INFORMATION_SCHEMA.TABLE_CONSTRAINTS tc WITH (NOLOCK)
|
||||
INNER JOIN INFORMATION_SCHEMA.CONSTRAINT_COLUMN_USAGE c WITH (NOLOCK)
|
||||
ON tc.CONSTRAINT_NAME = c.CONSTRAINT_NAME
|
||||
WHERE tc.CONSTRAINT_TYPE = 'PRIMARY KEY'
|
||||
AND tc.TABLE_SCHEMA = ?
|
||||
AND tc.TABLE_NAME = ?
|
||||
"""
|
||||
|
||||
# Comparison queries
|
||||
GET_ROW_COUNT = """
|
||||
SELECT COUNT(*) AS row_count
|
||||
FROM [{schema}].[{table}] WITH (NOLOCK)
|
||||
"""
|
||||
|
||||
CHECK_TABLE_EXISTS = """
|
||||
SELECT COUNT(*) AS table_exists
|
||||
FROM INFORMATION_SCHEMA.TABLES WITH (NOLOCK)
|
||||
WHERE TABLE_SCHEMA = ?
|
||||
AND TABLE_NAME = ?
|
||||
"""
|
||||
|
||||
GET_AGGREGATE_SUMS = """
|
||||
SELECT {column_expressions}
|
||||
FROM [{schema}].[{table}] WITH (NOLOCK)
|
||||
"""
|
||||
|
||||
@staticmethod
|
||||
def build_row_count_query(schema: str, table: str) -> str:
|
||||
"""Build row count query for a specific table."""
|
||||
return SQLQueries.GET_ROW_COUNT.format(schema=schema, table=table)
|
||||
|
||||
@staticmethod
|
||||
def build_aggregate_query(schema: str, table: str, columns: list[str]) -> str:
|
||||
"""
|
||||
Build aggregate query for numeric columns.
|
||||
|
||||
Args:
|
||||
schema: Schema name
|
||||
table: Table name
|
||||
columns: List of column names to aggregate
|
||||
|
||||
Returns:
|
||||
SQL query string
|
||||
"""
|
||||
if not columns:
|
||||
return None
|
||||
|
||||
# Build column expressions
|
||||
column_expressions = []
|
||||
for col in columns:
|
||||
# Cast to FLOAT to handle different numeric types
|
||||
expr = f"SUM(CAST([{col}] AS FLOAT)) AS [{col}_sum]"
|
||||
column_expressions.append(expr)
|
||||
|
||||
column_expr_str = ",\n ".join(column_expressions)
|
||||
|
||||
return SQLQueries.GET_AGGREGATE_SUMS.format(
|
||||
schema=schema,
|
||||
table=table,
|
||||
column_expressions=column_expr_str
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def is_numeric_type(data_type: str) -> bool:
|
||||
"""Check if a data type is numeric."""
|
||||
numeric_types = {
|
||||
'int', 'bigint', 'smallint', 'tinyint',
|
||||
'decimal', 'numeric', 'float', 'real',
|
||||
'money', 'smallmoney'
|
||||
}
|
||||
return data_type.lower() in numeric_types
|
||||
16
src/drt/models/__init__.py
Executable file
16
src/drt/models/__init__.py
Executable file
@@ -0,0 +1,16 @@
|
||||
"""Data models for the regression testing framework."""
|
||||
|
||||
from drt.models.enums import Status, CheckType
|
||||
from drt.models.table import TableInfo, ColumnInfo
|
||||
from drt.models.results import ComparisonResult, CheckResult
|
||||
from drt.models.summary import ExecutionSummary
|
||||
|
||||
__all__ = [
|
||||
"Status",
|
||||
"CheckType",
|
||||
"TableInfo",
|
||||
"ColumnInfo",
|
||||
"ComparisonResult",
|
||||
"CheckResult",
|
||||
"ExecutionSummary",
|
||||
]
|
||||
49
src/drt/models/enums.py
Executable file
49
src/drt/models/enums.py
Executable file
@@ -0,0 +1,49 @@
|
||||
"""Enumerations for status and check types."""
|
||||
|
||||
from enum import Enum
|
||||
|
||||
|
||||
class Status(str, Enum):
|
||||
"""Result status enumeration."""
|
||||
|
||||
PASS = "PASS"
|
||||
FAIL = "FAIL"
|
||||
WARNING = "WARNING"
|
||||
ERROR = "ERROR"
|
||||
INFO = "INFO"
|
||||
SKIP = "SKIP"
|
||||
|
||||
def __str__(self) -> str:
|
||||
return self.value
|
||||
|
||||
@property
|
||||
def severity(self) -> int:
|
||||
"""Return severity level for comparison (higher = more severe)."""
|
||||
severity_map = {
|
||||
Status.ERROR: 6,
|
||||
Status.FAIL: 5,
|
||||
Status.WARNING: 4,
|
||||
Status.INFO: 3,
|
||||
Status.PASS: 2,
|
||||
Status.SKIP: 1,
|
||||
}
|
||||
return severity_map[self]
|
||||
|
||||
@classmethod
|
||||
def most_severe(cls, statuses: list["Status"]) -> "Status":
|
||||
"""Return the most severe status from a list."""
|
||||
if not statuses:
|
||||
return cls.SKIP
|
||||
return max(statuses, key=lambda s: s.severity)
|
||||
|
||||
|
||||
class CheckType(str, Enum):
|
||||
"""Type of comparison check."""
|
||||
|
||||
EXISTENCE = "TABLE_EXISTENCE"
|
||||
ROW_COUNT = "ROW_COUNT"
|
||||
SCHEMA = "SCHEMA"
|
||||
AGGREGATE = "AGGREGATE"
|
||||
|
||||
def __str__(self) -> str:
|
||||
return self.value
|
||||
70
src/drt/models/investigation.py
Normal file
70
src/drt/models/investigation.py
Normal file
@@ -0,0 +1,70 @@
|
||||
"""Data models for investigation feature."""
|
||||
|
||||
from dataclasses import dataclass, field
|
||||
from typing import List, Optional
|
||||
import pandas as pd
|
||||
from drt.models.enums import Status
|
||||
|
||||
|
||||
@dataclass
|
||||
class QueryExecutionResult:
|
||||
"""Result of executing a single query."""
|
||||
query_number: int
|
||||
query_text: str
|
||||
status: Status
|
||||
execution_time_ms: int
|
||||
result_data: Optional[pd.DataFrame] = None
|
||||
error_message: Optional[str] = None
|
||||
row_count: int = 0
|
||||
|
||||
|
||||
@dataclass
|
||||
class TableInvestigationResult:
|
||||
"""Results for all queries in a table's investigation."""
|
||||
schema: str
|
||||
table: str
|
||||
sql_file_path: str
|
||||
baseline_results: List[QueryExecutionResult]
|
||||
target_results: List[QueryExecutionResult]
|
||||
overall_status: Status
|
||||
timestamp: str
|
||||
|
||||
@property
|
||||
def full_name(self) -> str:
|
||||
"""Get full table name."""
|
||||
return f"{self.schema}.{self.table}"
|
||||
|
||||
@property
|
||||
def total_queries(self) -> int:
|
||||
"""Get total number of queries."""
|
||||
return len(self.baseline_results)
|
||||
|
||||
@property
|
||||
def successful_queries(self) -> int:
|
||||
"""Get number of successful queries."""
|
||||
all_results = self.baseline_results + self.target_results
|
||||
return sum(1 for r in all_results if r.status == Status.PASS)
|
||||
|
||||
|
||||
@dataclass
|
||||
class InvestigationSummary:
|
||||
"""Overall investigation execution summary."""
|
||||
start_time: str
|
||||
end_time: str
|
||||
duration_seconds: int
|
||||
analysis_directory: str
|
||||
baseline_info: str
|
||||
target_info: str
|
||||
tables_processed: int
|
||||
tables_successful: int
|
||||
tables_partial: int
|
||||
tables_failed: int
|
||||
total_queries_executed: int
|
||||
results: List[TableInvestigationResult] = field(default_factory=list)
|
||||
|
||||
@property
|
||||
def success_rate(self) -> float:
|
||||
"""Calculate success rate percentage."""
|
||||
if self.tables_processed == 0:
|
||||
return 0.0
|
||||
return (self.tables_successful / self.tables_processed) * 100
|
||||
49
src/drt/models/results.py
Executable file
49
src/drt/models/results.py
Executable file
@@ -0,0 +1,49 @@
|
||||
"""Result models for comparison operations."""
|
||||
|
||||
from typing import Any, Dict, Optional
|
||||
from pydantic import BaseModel, Field
|
||||
from drt.models.enums import Status, CheckType
|
||||
from drt.models.table import TableInfo
|
||||
|
||||
|
||||
class CheckResult(BaseModel):
|
||||
"""Result of a single check operation."""
|
||||
|
||||
check_type: CheckType
|
||||
status: Status
|
||||
baseline_value: Any = None
|
||||
target_value: Any = None
|
||||
difference: Any = None
|
||||
message: str = ""
|
||||
details: Dict[str, Any] = Field(default_factory=dict)
|
||||
|
||||
class Config:
|
||||
arbitrary_types_allowed = True
|
||||
|
||||
|
||||
class ComparisonResult(BaseModel):
|
||||
"""Result of comparing a single table."""
|
||||
|
||||
table: TableInfo
|
||||
overall_status: Status
|
||||
check_results: list[CheckResult] = Field(default_factory=list)
|
||||
execution_time_ms: int = 0
|
||||
error_message: str = ""
|
||||
timestamp: str = ""
|
||||
|
||||
def add_check(self, check_result: CheckResult) -> None:
|
||||
"""Add a check result and update overall status."""
|
||||
self.check_results.append(check_result)
|
||||
# Update overall status to most severe
|
||||
all_statuses = [cr.status for cr in self.check_results]
|
||||
self.overall_status = Status.most_severe(all_statuses)
|
||||
|
||||
def get_check(self, check_type: CheckType) -> Optional[CheckResult]:
|
||||
"""Get check result by type."""
|
||||
for check in self.check_results:
|
||||
if check.check_type == check_type:
|
||||
return check
|
||||
return None
|
||||
|
||||
class Config:
|
||||
arbitrary_types_allowed = True
|
||||
65
src/drt/models/summary.py
Executable file
65
src/drt/models/summary.py
Executable file
@@ -0,0 +1,65 @@
|
||||
"""Execution summary model."""
|
||||
|
||||
from typing import List
|
||||
from pydantic import BaseModel, Field
|
||||
from drt.models.results import ComparisonResult
|
||||
from drt.models.enums import Status
|
||||
|
||||
|
||||
class ExecutionSummary(BaseModel):
|
||||
"""Summary of an entire test execution."""
|
||||
|
||||
start_time: str
|
||||
end_time: str
|
||||
duration_seconds: int
|
||||
total_tables: int = 0
|
||||
passed: int = 0
|
||||
failed: int = 0
|
||||
warnings: int = 0
|
||||
errors: int = 0
|
||||
skipped: int = 0
|
||||
info: int = 0
|
||||
results: List[ComparisonResult] = Field(default_factory=list)
|
||||
config_file: str = ""
|
||||
baseline_info: str = ""
|
||||
target_info: str = ""
|
||||
|
||||
def add_result(self, result: ComparisonResult) -> None:
|
||||
"""Add a comparison result and update counters."""
|
||||
self.results.append(result)
|
||||
self.total_tables += 1
|
||||
|
||||
# Update status counters
|
||||
status = result.overall_status
|
||||
if status == Status.PASS:
|
||||
self.passed += 1
|
||||
elif status == Status.FAIL:
|
||||
self.failed += 1
|
||||
elif status == Status.WARNING:
|
||||
self.warnings += 1
|
||||
elif status == Status.ERROR:
|
||||
self.errors += 1
|
||||
elif status == Status.INFO:
|
||||
self.info += 1
|
||||
elif status == Status.SKIP:
|
||||
self.skipped += 1
|
||||
|
||||
@property
|
||||
def has_failures(self) -> bool:
|
||||
"""Check if there are any failures."""
|
||||
return self.failed > 0
|
||||
|
||||
@property
|
||||
def has_errors(self) -> bool:
|
||||
"""Check if there are any errors."""
|
||||
return self.errors > 0
|
||||
|
||||
@property
|
||||
def success_rate(self) -> float:
|
||||
"""Calculate success rate percentage."""
|
||||
if self.total_tables == 0:
|
||||
return 0.0
|
||||
return (self.passed / self.total_tables) * 100
|
||||
|
||||
class Config:
|
||||
arbitrary_types_allowed = True
|
||||
53
src/drt/models/table.py
Executable file
53
src/drt/models/table.py
Executable file
@@ -0,0 +1,53 @@
|
||||
"""Table and column information models."""
|
||||
|
||||
from typing import List, Optional
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
|
||||
class ColumnInfo(BaseModel):
|
||||
"""Information about a database column."""
|
||||
|
||||
name: str
|
||||
data_type: str
|
||||
max_length: Optional[int] = None
|
||||
precision: Optional[int] = None
|
||||
scale: Optional[int] = None
|
||||
is_nullable: bool = True
|
||||
is_numeric: bool = False
|
||||
ordinal_position: int
|
||||
|
||||
class Config:
|
||||
frozen = True
|
||||
|
||||
|
||||
class TableInfo(BaseModel):
|
||||
"""Information about a database table."""
|
||||
|
||||
schema_name: str = Field(..., alias="schema")
|
||||
name: str
|
||||
estimated_row_count: int = 0
|
||||
columns: List[ColumnInfo] = Field(default_factory=list)
|
||||
primary_key_columns: List[str] = Field(default_factory=list)
|
||||
enabled: bool = True
|
||||
expected_in_target: bool = True
|
||||
aggregate_columns: List[str] = Field(default_factory=list)
|
||||
notes: str = ""
|
||||
|
||||
@property
|
||||
def schema(self) -> str:
|
||||
"""Return schema name for backward compatibility."""
|
||||
return self.schema_name
|
||||
|
||||
@property
|
||||
def full_name(self) -> str:
|
||||
"""Return fully qualified table name."""
|
||||
return f"{self.schema_name}.{self.name}"
|
||||
|
||||
@property
|
||||
def numeric_columns(self) -> List[ColumnInfo]:
|
||||
"""Return list of numeric columns."""
|
||||
return [col for col in self.columns if col.is_numeric]
|
||||
|
||||
class Config:
|
||||
frozen = False
|
||||
populate_by_name = True # Allow both 'schema' and 'schema_name'
|
||||
7
src/drt/reporting/__init__.py
Executable file
7
src/drt/reporting/__init__.py
Executable file
@@ -0,0 +1,7 @@
|
||||
"""Reporting module for generating test reports."""
|
||||
|
||||
from drt.reporting.generator import ReportGenerator
|
||||
from drt.reporting.html import HTMLReportGenerator
|
||||
from drt.reporting.csv import CSVReportGenerator
|
||||
|
||||
__all__ = ["ReportGenerator", "HTMLReportGenerator", "CSVReportGenerator"]
|
||||
97
src/drt/reporting/csv.py
Executable file
97
src/drt/reporting/csv.py
Executable file
@@ -0,0 +1,97 @@
|
||||
"""CSV report generator."""
|
||||
|
||||
import csv
|
||||
from pathlib import Path
|
||||
from drt.models.summary import ExecutionSummary
|
||||
from drt.models.enums import CheckType
|
||||
from drt.config.models import Config
|
||||
from drt.utils.logging import get_logger
|
||||
|
||||
logger = get_logger(__name__)
|
||||
|
||||
|
||||
class CSVReportGenerator:
|
||||
"""Generates CSV format reports."""
|
||||
|
||||
def __init__(self, config: Config):
|
||||
"""
|
||||
Initialize CSV generator.
|
||||
|
||||
Args:
|
||||
config: Configuration object
|
||||
"""
|
||||
self.config = config
|
||||
|
||||
def generate(self, summary: ExecutionSummary, filepath: Path) -> None:
|
||||
"""
|
||||
Generate CSV report.
|
||||
|
||||
Args:
|
||||
summary: Execution summary
|
||||
filepath: Output file path
|
||||
"""
|
||||
csv_config = self.config.reporting.csv
|
||||
delimiter = csv_config.get("delimiter", ",")
|
||||
encoding = csv_config.get("encoding", "utf-8-sig")
|
||||
|
||||
with open(filepath, "w", newline="", encoding=encoding) as f:
|
||||
writer = csv.writer(f, delimiter=delimiter)
|
||||
|
||||
# Write header
|
||||
writer.writerow([
|
||||
"Timestamp",
|
||||
"Schema",
|
||||
"Table",
|
||||
"Overall_Status",
|
||||
"Existence_Status",
|
||||
"RowCount_Status",
|
||||
"Baseline_Rows",
|
||||
"Target_Rows",
|
||||
"Row_Difference",
|
||||
"Row_Diff_Pct",
|
||||
"Schema_Status",
|
||||
"Schema_Details",
|
||||
"Aggregate_Status",
|
||||
"Aggregate_Details",
|
||||
"Expected_In_Target",
|
||||
"Notes",
|
||||
"Execution_Time_Ms"
|
||||
])
|
||||
|
||||
# Write data rows
|
||||
for result in summary.results:
|
||||
# Get check results
|
||||
existence = result.get_check(CheckType.EXISTENCE)
|
||||
row_count = result.get_check(CheckType.ROW_COUNT)
|
||||
schema = result.get_check(CheckType.SCHEMA)
|
||||
aggregate = result.get_check(CheckType.AGGREGATE)
|
||||
|
||||
# Extract values
|
||||
baseline_rows = row_count.baseline_value if row_count else "N/A"
|
||||
target_rows = row_count.target_value if row_count else "N/A"
|
||||
row_diff = row_count.difference if row_count else "N/A"
|
||||
row_diff_pct = ""
|
||||
if row_count and row_count.baseline_value and row_count.baseline_value > 0:
|
||||
row_diff_pct = f"{(row_count.difference / row_count.baseline_value * 100):.2f}%"
|
||||
|
||||
writer.writerow([
|
||||
result.timestamp,
|
||||
result.table.schema,
|
||||
result.table.name,
|
||||
result.overall_status.value,
|
||||
existence.status.value if existence else "N/A",
|
||||
row_count.status.value if row_count else "N/A",
|
||||
baseline_rows,
|
||||
target_rows,
|
||||
row_diff,
|
||||
row_diff_pct,
|
||||
schema.status.value if schema else "N/A",
|
||||
schema.message if schema else "",
|
||||
aggregate.status.value if aggregate else "N/A",
|
||||
aggregate.message if aggregate else "",
|
||||
result.table.expected_in_target,
|
||||
result.table.notes,
|
||||
result.execution_time_ms
|
||||
])
|
||||
|
||||
logger.debug(f"CSV report written to {filepath}")
|
||||
84
src/drt/reporting/generator.py
Executable file
84
src/drt/reporting/generator.py
Executable file
@@ -0,0 +1,84 @@
|
||||
"""Report generator orchestrator."""
|
||||
|
||||
from pathlib import Path
|
||||
from typing import List
|
||||
from drt.models.summary import ExecutionSummary
|
||||
from drt.config.models import Config
|
||||
from drt.reporting.html import HTMLReportGenerator
|
||||
from drt.reporting.csv import CSVReportGenerator
|
||||
from drt.utils.logging import get_logger
|
||||
from drt.utils.timestamps import get_timestamp
|
||||
|
||||
logger = get_logger(__name__)
|
||||
|
||||
|
||||
class ReportGenerator:
|
||||
"""Orchestrates report generation in multiple formats."""
|
||||
|
||||
def __init__(self, config: Config):
|
||||
"""
|
||||
Initialize report generator.
|
||||
|
||||
Args:
|
||||
config: Configuration object
|
||||
"""
|
||||
self.config = config
|
||||
# Use absolute path from config
|
||||
self.output_dir = Path(config.reporting.output_directory).expanduser().resolve()
|
||||
self.output_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
def generate_reports(self, summary: ExecutionSummary) -> List[str]:
|
||||
"""
|
||||
Generate reports in all configured formats.
|
||||
|
||||
Args:
|
||||
summary: Execution summary
|
||||
|
||||
Returns:
|
||||
List of generated report file paths
|
||||
"""
|
||||
logger.info("Generating reports...")
|
||||
|
||||
generated_files = []
|
||||
timestamp = summary.start_time
|
||||
|
||||
# Generate filename
|
||||
filename_base = self.config.reporting.filename_template.format(
|
||||
timestamp=timestamp,
|
||||
config_name="regression"
|
||||
)
|
||||
|
||||
for fmt in self.config.reporting.formats:
|
||||
try:
|
||||
if fmt == "html":
|
||||
filepath = self._generate_html(summary, filename_base)
|
||||
generated_files.append(filepath)
|
||||
elif fmt == "csv":
|
||||
filepath = self._generate_csv(summary, filename_base)
|
||||
generated_files.append(filepath)
|
||||
elif fmt == "pdf":
|
||||
logger.warning("PDF generation not yet implemented")
|
||||
else:
|
||||
logger.warning(f"Unknown report format: {fmt}")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to generate {fmt} report: {e}")
|
||||
|
||||
logger.info(f"Generated {len(generated_files)} report(s)")
|
||||
return generated_files
|
||||
|
||||
def _generate_html(self, summary: ExecutionSummary, filename_base: str) -> str:
|
||||
"""Generate HTML report."""
|
||||
generator = HTMLReportGenerator(self.config)
|
||||
filepath = self.output_dir / f"{filename_base}.html"
|
||||
generator.generate(summary, filepath)
|
||||
logger.info(f"✓ HTML: {filepath}")
|
||||
return str(filepath)
|
||||
|
||||
def _generate_csv(self, summary: ExecutionSummary, filename_base: str) -> str:
|
||||
"""Generate CSV report."""
|
||||
generator = CSVReportGenerator(self.config)
|
||||
filepath = self.output_dir / f"{filename_base}.csv"
|
||||
generator.generate(summary, filepath)
|
||||
logger.info(f"✓ CSV: {filepath}")
|
||||
return str(filepath)
|
||||
239
src/drt/reporting/html.py
Executable file
239
src/drt/reporting/html.py
Executable file
@@ -0,0 +1,239 @@
|
||||
"""HTML report generator."""
|
||||
|
||||
from pathlib import Path
|
||||
from drt.models.summary import ExecutionSummary
|
||||
from drt.models.enums import Status, CheckType
|
||||
from drt.config.models import Config
|
||||
from drt.utils.logging import get_logger
|
||||
from drt.utils.timestamps import format_duration
|
||||
|
||||
logger = get_logger(__name__)
|
||||
|
||||
|
||||
class HTMLReportGenerator:
|
||||
"""Generates HTML format reports."""
|
||||
|
||||
def __init__(self, config: Config):
|
||||
"""
|
||||
Initialize HTML generator.
|
||||
|
||||
Args:
|
||||
config: Configuration object
|
||||
"""
|
||||
self.config = config
|
||||
self.colors = config.reporting.html.get("colors", {})
|
||||
|
||||
def generate(self, summary: ExecutionSummary, filepath: Path) -> None:
|
||||
"""
|
||||
Generate HTML report.
|
||||
|
||||
Args:
|
||||
summary: Execution summary
|
||||
filepath: Output file path
|
||||
"""
|
||||
html_content = self._build_html(summary)
|
||||
|
||||
with open(filepath, "w", encoding="utf-8") as f:
|
||||
f.write(html_content)
|
||||
|
||||
logger.debug(f"HTML report written to {filepath}")
|
||||
|
||||
def _build_html(self, summary: ExecutionSummary) -> str:
|
||||
"""Build complete HTML document."""
|
||||
return f"""<!DOCTYPE html>
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta charset="UTF-8">
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
<title>Data Regression Test Report - {summary.start_time}</title>
|
||||
{self._get_styles()}
|
||||
</head>
|
||||
<body>
|
||||
<div class="container">
|
||||
{self._build_header(summary)}
|
||||
{self._build_summary(summary)}
|
||||
{self._build_failures(summary)}
|
||||
{self._build_warnings(summary)}
|
||||
{self._build_detailed_results(summary)}
|
||||
{self._build_footer(summary)}
|
||||
</div>
|
||||
</body>
|
||||
</html>"""
|
||||
|
||||
def _get_styles(self) -> str:
|
||||
"""Get embedded CSS styles."""
|
||||
return """<style>
|
||||
* { margin: 0; padding: 0; box-sizing: border-box; }
|
||||
body { font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif; background: #f5f5f5; padding: 20px; }
|
||||
.container { max-width: 1400px; margin: 0 auto; background: white; padding: 30px; border-radius: 8px; box-shadow: 0 2px 10px rgba(0,0,0,0.1); }
|
||||
h1 { color: #333; border-bottom: 3px solid #007bff; padding-bottom: 10px; margin-bottom: 20px; }
|
||||
h2 { color: #555; margin-top: 30px; margin-bottom: 15px; border-left: 4px solid #007bff; padding-left: 10px; }
|
||||
.header { background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); color: white; padding: 20px; border-radius: 8px; margin-bottom: 30px; }
|
||||
.header h1 { color: white; border: none; }
|
||||
.info-grid { display: grid; grid-template-columns: repeat(auto-fit, minmax(250px, 1fr)); gap: 15px; margin: 20px 0; }
|
||||
.info-box { background: #f8f9fa; padding: 15px; border-radius: 5px; border-left: 4px solid #007bff; }
|
||||
.info-label { font-weight: bold; color: #666; font-size: 0.9em; }
|
||||
.info-value { color: #333; font-size: 1.1em; margin-top: 5px; }
|
||||
.summary-grid { display: grid; grid-template-columns: repeat(auto-fit, minmax(150px, 1fr)); gap: 15px; margin: 20px 0; }
|
||||
.summary-box { padding: 20px; border-radius: 8px; text-align: center; color: white; }
|
||||
.summary-box.pass { background: #28a745; }
|
||||
.summary-box.fail { background: #dc3545; }
|
||||
.summary-box.warning { background: #ffc107; color: #333; }
|
||||
.summary-box.error { background: #6f42c1; }
|
||||
.summary-box.info { background: #17a2b8; }
|
||||
.summary-box.skip { background: #6c757d; }
|
||||
.summary-number { font-size: 2.5em; font-weight: bold; }
|
||||
.summary-label { font-size: 0.9em; margin-top: 5px; }
|
||||
.summary-percent { font-size: 0.8em; opacity: 0.9; }
|
||||
table { width: 100%; border-collapse: collapse; margin: 20px 0; }
|
||||
th { background: #007bff; color: white; padding: 12px; text-align: left; font-weight: 600; }
|
||||
td { padding: 10px 12px; border-bottom: 1px solid #dee2e6; }
|
||||
tr:hover { background: #f8f9fa; }
|
||||
.status-badge { display: inline-block; padding: 4px 12px; border-radius: 12px; font-size: 0.85em; font-weight: 600; }
|
||||
.status-PASS { background: #d4edda; color: #155724; }
|
||||
.status-FAIL { background: #f8d7da; color: #721c24; }
|
||||
.status-WARNING { background: #fff3cd; color: #856404; }
|
||||
.status-ERROR { background: #e7d6f5; color: #4a148c; }
|
||||
.status-INFO { background: #d1ecf1; color: #0c5460; }
|
||||
.status-SKIP { background: #e2e3e5; color: #383d41; }
|
||||
.failure-box { background: #fff5f5; border: 1px solid #feb2b2; border-radius: 5px; padding: 15px; margin: 10px 0; }
|
||||
.failure-title { font-weight: bold; color: #c53030; margin-bottom: 8px; }
|
||||
.failure-detail { color: #666; margin: 5px 0; font-size: 0.95em; }
|
||||
.footer { margin-top: 40px; padding-top: 20px; border-top: 1px solid #dee2e6; text-align: center; color: #666; font-size: 0.9em; }
|
||||
</style>"""
|
||||
|
||||
def _build_header(self, summary: ExecutionSummary) -> str:
|
||||
"""Build report header."""
|
||||
return f"""<div class="header">
|
||||
<h1>📊 Data Regression Test Report</h1>
|
||||
<p>Generated: {summary.start_time}</p>
|
||||
</div>
|
||||
|
||||
<div class="info-grid">
|
||||
<div class="info-box">
|
||||
<div class="info-label">Start Time</div>
|
||||
<div class="info-value">{summary.start_time}</div>
|
||||
</div>
|
||||
<div class="info-box">
|
||||
<div class="info-label">End Time</div>
|
||||
<div class="info-value">{summary.end_time}</div>
|
||||
</div>
|
||||
<div class="info-box">
|
||||
<div class="info-label">Duration</div>
|
||||
<div class="info-value">{format_duration(summary.duration_seconds)}</div>
|
||||
</div>
|
||||
<div class="info-box">
|
||||
<div class="info-label">Baseline</div>
|
||||
<div class="info-value">{summary.baseline_info}</div>
|
||||
</div>
|
||||
<div class="info-box">
|
||||
<div class="info-label">Target</div>
|
||||
<div class="info-value">{summary.target_info}</div>
|
||||
</div>
|
||||
<div class="info-box">
|
||||
<div class="info-label">Total Tables</div>
|
||||
<div class="info-value">{summary.total_tables}</div>
|
||||
</div>
|
||||
</div>"""
|
||||
|
||||
def _build_summary(self, summary: ExecutionSummary) -> str:
|
||||
"""Build summary section."""
|
||||
return f"""<h2>Summary</h2>
|
||||
<div class="summary-grid">
|
||||
<div class="summary-box pass">
|
||||
<div class="summary-number">{summary.passed}</div>
|
||||
<div class="summary-label">PASS</div>
|
||||
<div class="summary-percent">{(summary.passed/summary.total_tables*100) if summary.total_tables > 0 else 0:.1f}%</div>
|
||||
</div>
|
||||
<div class="summary-box fail">
|
||||
<div class="summary-number">{summary.failed}</div>
|
||||
<div class="summary-label">FAIL</div>
|
||||
<div class="summary-percent">{(summary.failed/summary.total_tables*100) if summary.total_tables > 0 else 0:.1f}%</div>
|
||||
</div>
|
||||
<div class="summary-box warning">
|
||||
<div class="summary-number">{summary.warnings}</div>
|
||||
<div class="summary-label">WARNING</div>
|
||||
<div class="summary-percent">{(summary.warnings/summary.total_tables*100) if summary.total_tables > 0 else 0:.1f}%</div>
|
||||
</div>
|
||||
<div class="summary-box error">
|
||||
<div class="summary-number">{summary.errors}</div>
|
||||
<div class="summary-label">ERROR</div>
|
||||
<div class="summary-percent">{(summary.errors/summary.total_tables*100) if summary.total_tables > 0 else 0:.1f}%</div>
|
||||
</div>
|
||||
<div class="summary-box info">
|
||||
<div class="summary-number">{summary.info}</div>
|
||||
<div class="summary-label">INFO</div>
|
||||
<div class="summary-percent">{(summary.info/summary.total_tables*100) if summary.total_tables > 0 else 0:.1f}%</div>
|
||||
</div>
|
||||
<div class="summary-box skip">
|
||||
<div class="summary-number">{summary.skipped}</div>
|
||||
<div class="summary-label">SKIP</div>
|
||||
<div class="summary-percent">{(summary.skipped/summary.total_tables*100) if summary.total_tables > 0 else 0:.1f}%</div>
|
||||
</div>
|
||||
</div>"""
|
||||
|
||||
def _build_failures(self, summary: ExecutionSummary) -> str:
|
||||
"""Build failures section."""
|
||||
failures = [r for r in summary.results if r.overall_status == Status.FAIL]
|
||||
|
||||
if not failures:
|
||||
return ""
|
||||
|
||||
html = '<h2>❌ Failures (Immediate Action Required)</h2>'
|
||||
|
||||
for result in failures:
|
||||
html += f"""<div class="failure-box">
|
||||
<div class="failure-title">{result.table.full_name}</div>"""
|
||||
|
||||
for check in result.check_results:
|
||||
if check.status == Status.FAIL:
|
||||
html += f'<div class="failure-detail">• {check.check_type.value}: {check.message}</div>'
|
||||
|
||||
html += '</div>'
|
||||
|
||||
return html
|
||||
|
||||
def _build_warnings(self, summary: ExecutionSummary) -> str:
|
||||
"""Build warnings section."""
|
||||
warnings = [r for r in summary.results if r.overall_status == Status.WARNING]
|
||||
|
||||
if not warnings:
|
||||
return ""
|
||||
|
||||
html = '<h2>⚠️ Warnings</h2><ul>'
|
||||
|
||||
for result in warnings:
|
||||
for check in result.check_results:
|
||||
if check.status == Status.WARNING:
|
||||
html += f'<li><strong>{result.table.full_name}</strong>: {check.message}</li>'
|
||||
|
||||
html += '</ul>'
|
||||
return html
|
||||
|
||||
def _build_detailed_results(self, summary: ExecutionSummary) -> str:
|
||||
"""Build detailed results table."""
|
||||
html = '<h2>Detailed Results</h2><table><thead><tr>'
|
||||
html += '<th>Table</th><th>Status</th><th>Row Count</th><th>Schema</th><th>Aggregates</th><th>Time (ms)</th>'
|
||||
html += '</tr></thead><tbody>'
|
||||
|
||||
for result in summary.results:
|
||||
row_count = result.get_check(CheckType.ROW_COUNT)
|
||||
schema = result.get_check(CheckType.SCHEMA)
|
||||
aggregate = result.get_check(CheckType.AGGREGATE)
|
||||
|
||||
html += f'<tr><td>{result.table.full_name}</td>'
|
||||
html += f'<td><span class="status-badge status-{result.overall_status.value}">{result.overall_status.value}</span></td>'
|
||||
html += f'<td><span class="status-badge status-{row_count.status.value if row_count else "SKIP"}">{row_count.status.value if row_count else "SKIP"}</span></td>'
|
||||
html += f'<td><span class="status-badge status-{schema.status.value if schema else "SKIP"}">{schema.status.value if schema else "SKIP"}</span></td>'
|
||||
html += f'<td><span class="status-badge status-{aggregate.status.value if aggregate else "SKIP"}">{aggregate.status.value if aggregate else "SKIP"}</span></td>'
|
||||
html += f'<td>{result.execution_time_ms}</td></tr>'
|
||||
|
||||
html += '</tbody></table>'
|
||||
return html
|
||||
|
||||
def _build_footer(self, summary: ExecutionSummary) -> str:
|
||||
"""Build report footer."""
|
||||
return f"""<div class="footer">
|
||||
<p>Generated by Data Regression Testing Framework v1.0.0</p>
|
||||
<p>Success Rate: {summary.success_rate:.1f}%</p>
|
||||
</div>"""
|
||||
357
src/drt/reporting/investigation_report.py
Normal file
357
src/drt/reporting/investigation_report.py
Normal file
@@ -0,0 +1,357 @@
|
||||
"""Investigation report generators for HTML and CSV formats."""
|
||||
|
||||
import csv
|
||||
from pathlib import Path
|
||||
from typing import Optional
|
||||
from drt.models.investigation import InvestigationSummary, QueryExecutionResult
|
||||
from drt.models.enums import Status
|
||||
from drt.config.models import Config
|
||||
from drt.utils.logging import get_logger
|
||||
from drt.utils.timestamps import format_duration
|
||||
|
||||
logger = get_logger(__name__)
|
||||
|
||||
|
||||
class InvestigationHTMLReportGenerator:
|
||||
"""Generates HTML format investigation reports."""
|
||||
|
||||
def __init__(self, config: Config):
|
||||
"""
|
||||
Initialize HTML generator.
|
||||
|
||||
Args:
|
||||
config: Configuration object
|
||||
"""
|
||||
self.config = config
|
||||
self.max_rows = 100 # Limit rows displayed in HTML
|
||||
|
||||
def generate(self, summary: InvestigationSummary, filepath: Path) -> None:
|
||||
"""
|
||||
Generate HTML investigation report.
|
||||
|
||||
Args:
|
||||
summary: Investigation summary
|
||||
filepath: Output file path
|
||||
"""
|
||||
html_content = self._build_html(summary)
|
||||
|
||||
with open(filepath, "w", encoding="utf-8") as f:
|
||||
f.write(html_content)
|
||||
|
||||
logger.debug(f"Investigation HTML report written to {filepath}")
|
||||
|
||||
def _build_html(self, summary: InvestigationSummary) -> str:
|
||||
"""Build complete HTML document."""
|
||||
return f"""<!DOCTYPE html>
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta charset="UTF-8">
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
<title>Investigation Report - {summary.start_time}</title>
|
||||
{self._get_styles()}
|
||||
{self._get_scripts()}
|
||||
</head>
|
||||
<body>
|
||||
<div class="container">
|
||||
{self._build_header(summary)}
|
||||
{self._build_summary(summary)}
|
||||
{self._build_table_results(summary)}
|
||||
{self._build_footer(summary)}
|
||||
</div>
|
||||
</body>
|
||||
</html>"""
|
||||
|
||||
def _get_styles(self) -> str:
|
||||
"""Get embedded CSS styles."""
|
||||
return """<style>
|
||||
* { margin: 0; padding: 0; box-sizing: border-box; }
|
||||
body { font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif; background: #f5f5f5; padding: 20px; }
|
||||
.container { max-width: 1600px; margin: 0 auto; background: white; padding: 30px; border-radius: 8px; box-shadow: 0 2px 10px rgba(0,0,0,0.1); }
|
||||
h1 { color: #333; border-bottom: 3px solid #007bff; padding-bottom: 10px; margin-bottom: 20px; }
|
||||
h2 { color: #555; margin-top: 30px; margin-bottom: 15px; border-left: 4px solid #007bff; padding-left: 10px; }
|
||||
h3 { color: #666; margin-top: 20px; margin-bottom: 10px; }
|
||||
.header { background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); color: white; padding: 20px; border-radius: 8px; margin-bottom: 30px; }
|
||||
.header h1 { color: white; border: none; }
|
||||
.info-grid { display: grid; grid-template-columns: repeat(auto-fit, minmax(250px, 1fr)); gap: 15px; margin: 20px 0; }
|
||||
.info-box { background: #f8f9fa; padding: 15px; border-radius: 5px; border-left: 4px solid #007bff; }
|
||||
.info-label { font-weight: bold; color: #666; font-size: 0.9em; }
|
||||
.info-value { color: #333; font-size: 1.1em; margin-top: 5px; }
|
||||
.summary-grid { display: grid; grid-template-columns: repeat(auto-fit, minmax(150px, 1fr)); gap: 15px; margin: 20px 0; }
|
||||
.summary-box { padding: 20px; border-radius: 8px; text-align: center; color: white; }
|
||||
.summary-box.success { background: #28a745; }
|
||||
.summary-box.partial { background: #ffc107; color: #333; }
|
||||
.summary-box.failed { background: #dc3545; }
|
||||
.summary-number { font-size: 2.5em; font-weight: bold; }
|
||||
.summary-label { font-size: 0.9em; margin-top: 5px; }
|
||||
.table-card { background: #fff; border: 1px solid #dee2e6; border-radius: 8px; margin: 20px 0; overflow: hidden; }
|
||||
.table-header { background: #f8f9fa; padding: 15px; border-bottom: 2px solid #dee2e6; cursor: pointer; }
|
||||
.table-header:hover { background: #e9ecef; }
|
||||
.table-name { font-size: 1.2em; font-weight: bold; color: #333; }
|
||||
.table-status { display: inline-block; padding: 4px 12px; border-radius: 12px; font-size: 0.85em; font-weight: 600; margin-left: 10px; }
|
||||
.status-SUCCESS { background: #d4edda; color: #155724; }
|
||||
.status-PASS { background: #d4edda; color: #155724; }
|
||||
.status-FAIL { background: #f8d7da; color: #721c24; }
|
||||
.status-WARNING { background: #fff3cd; color: #856404; }
|
||||
.status-SKIP { background: #e2e3e5; color: #383d41; }
|
||||
.table-content { padding: 20px; display: none; }
|
||||
.table-content.active { display: block; }
|
||||
.query-section { margin: 20px 0; padding: 15px; background: #f8f9fa; border-radius: 5px; }
|
||||
.query-header { font-weight: bold; margin-bottom: 10px; color: #555; }
|
||||
.comparison-grid { display: grid; grid-template-columns: 1fr 1fr; gap: 20px; margin: 15px 0; }
|
||||
.env-section { background: white; padding: 15px; border-radius: 5px; border: 1px solid #dee2e6; }
|
||||
.env-title { font-weight: bold; color: #007bff; margin-bottom: 10px; }
|
||||
.query-code { background: #2d2d2d; color: #f8f8f2; padding: 15px; border-radius: 5px; overflow-x: auto; font-family: 'Courier New', monospace; font-size: 0.9em; margin: 10px 0; }
|
||||
.result-table { width: 100%; border-collapse: collapse; margin: 10px 0; font-size: 0.9em; }
|
||||
.result-table th { background: #007bff; color: white; padding: 8px; text-align: left; }
|
||||
.result-table td { padding: 8px; border-bottom: 1px solid #dee2e6; }
|
||||
.result-table tr:hover { background: #f8f9fa; }
|
||||
.error-box { background: #fff5f5; border: 1px solid #feb2b2; border-radius: 5px; padding: 15px; margin: 10px 0; color: #c53030; }
|
||||
.result-meta { display: flex; gap: 20px; margin: 10px 0; font-size: 0.9em; color: #666; }
|
||||
.footer { margin-top: 40px; padding-top: 20px; border-top: 1px solid #dee2e6; text-align: center; color: #666; font-size: 0.9em; }
|
||||
.toggle-icon { float: right; transition: transform 0.3s; }
|
||||
.toggle-icon.active { transform: rotate(180deg); }
|
||||
</style>"""
|
||||
|
||||
def _get_scripts(self) -> str:
|
||||
"""Get embedded JavaScript."""
|
||||
return """<script>
|
||||
function toggleTable(id) {
|
||||
const content = document.getElementById('content-' + id);
|
||||
const icon = document.getElementById('icon-' + id);
|
||||
content.classList.toggle('active');
|
||||
icon.classList.toggle('active');
|
||||
}
|
||||
</script>"""
|
||||
|
||||
def _build_header(self, summary: InvestigationSummary) -> str:
|
||||
"""Build report header."""
|
||||
return f"""<div class="header">
|
||||
<h1>🔍 Investigation Report</h1>
|
||||
<p>Analysis Directory: {summary.analysis_directory}</p>
|
||||
</div>
|
||||
|
||||
<div class="info-grid">
|
||||
<div class="info-box">
|
||||
<div class="info-label">Start Time</div>
|
||||
<div class="info-value">{summary.start_time}</div>
|
||||
</div>
|
||||
<div class="info-box">
|
||||
<div class="info-label">End Time</div>
|
||||
<div class="info-value">{summary.end_time}</div>
|
||||
</div>
|
||||
<div class="info-box">
|
||||
<div class="info-label">Duration</div>
|
||||
<div class="info-value">{format_duration(summary.duration_seconds)}</div>
|
||||
</div>
|
||||
<div class="info-box">
|
||||
<div class="info-label">Baseline</div>
|
||||
<div class="info-value">{summary.baseline_info}</div>
|
||||
</div>
|
||||
<div class="info-box">
|
||||
<div class="info-label">Target</div>
|
||||
<div class="info-value">{summary.target_info}</div>
|
||||
</div>
|
||||
<div class="info-box">
|
||||
<div class="info-label">Total Queries</div>
|
||||
<div class="info-value">{summary.total_queries_executed}</div>
|
||||
</div>
|
||||
</div>"""
|
||||
|
||||
def _build_summary(self, summary: InvestigationSummary) -> str:
|
||||
"""Build summary section."""
|
||||
return f"""<h2>Summary</h2>
|
||||
<div class="summary-grid">
|
||||
<div class="summary-box success">
|
||||
<div class="summary-number">{summary.tables_successful}</div>
|
||||
<div class="summary-label">Successful</div>
|
||||
</div>
|
||||
<div class="summary-box partial">
|
||||
<div class="summary-number">{summary.tables_partial}</div>
|
||||
<div class="summary-label">Partial</div>
|
||||
</div>
|
||||
<div class="summary-box failed">
|
||||
<div class="summary-number">{summary.tables_failed}</div>
|
||||
<div class="summary-label">Failed</div>
|
||||
</div>
|
||||
</div>"""
|
||||
|
||||
def _build_table_results(self, summary: InvestigationSummary) -> str:
|
||||
"""Build table-by-table results."""
|
||||
html = '<h2>Investigation Results</h2>'
|
||||
|
||||
for idx, table_result in enumerate(summary.results):
|
||||
html += f"""<div class="table-card">
|
||||
<div class="table-header" onclick="toggleTable({idx})">
|
||||
<span class="table-name">{table_result.full_name}</span>
|
||||
<span class="table-status status-{table_result.overall_status.value}">{table_result.overall_status.value}</span>
|
||||
<span class="toggle-icon" id="icon-{idx}">▼</span>
|
||||
</div>
|
||||
<div class="table-content" id="content-{idx}">
|
||||
<p><strong>SQL File:</strong> {table_result.sql_file_path}</p>
|
||||
<p><strong>Total Queries:</strong> {table_result.total_queries}</p>
|
||||
<p><strong>Successful Queries:</strong> {table_result.successful_queries}</p>
|
||||
{self._build_queries(table_result)}
|
||||
</div>
|
||||
</div>"""
|
||||
|
||||
return html
|
||||
|
||||
def _build_queries(self, table_result) -> str:
|
||||
"""Build query results for a table."""
|
||||
html = ""
|
||||
|
||||
for i, (baseline_result, target_result) in enumerate(zip(
|
||||
table_result.baseline_results,
|
||||
table_result.target_results
|
||||
), 1):
|
||||
html += f"""<div class="query-section">
|
||||
<div class="query-header">Query {baseline_result.query_number}</div>
|
||||
<details>
|
||||
<summary>View SQL</summary>
|
||||
<div class="query-code">{self._escape_html(baseline_result.query_text)}</div>
|
||||
</details>
|
||||
<div class="comparison-grid">
|
||||
{self._build_query_result(baseline_result, "Baseline")}
|
||||
{self._build_query_result(target_result, "Target")}
|
||||
</div>
|
||||
</div>"""
|
||||
|
||||
return html
|
||||
|
||||
def _build_query_result(self, result: QueryExecutionResult, env: str) -> str:
|
||||
"""Build single query result."""
|
||||
html = f"""<div class="env-section">
|
||||
<div class="env-title">{env}</div>
|
||||
<span class="table-status status-{result.status.value}">{result.status.value}</span>
|
||||
<div class="result-meta">
|
||||
<span>⏱️ {result.execution_time_ms}ms</span>
|
||||
<span>📊 {result.row_count} rows</span>
|
||||
</div>"""
|
||||
|
||||
if result.error_message:
|
||||
html += f'<div class="error-box">❌ {self._escape_html(result.error_message)}</div>'
|
||||
elif result.result_data is not None and not result.result_data.empty:
|
||||
html += self._build_result_table(result)
|
||||
|
||||
html += '</div>'
|
||||
return html
|
||||
|
||||
def _build_result_table(self, result: QueryExecutionResult) -> str:
|
||||
"""Build HTML table from DataFrame."""
|
||||
df = result.result_data
|
||||
|
||||
if df is None or df.empty:
|
||||
return '<p>No data returned</p>'
|
||||
|
||||
# Limit rows
|
||||
display_df = df.head(self.max_rows)
|
||||
|
||||
html = '<table class="result-table"><thead><tr>'
|
||||
for col in display_df.columns:
|
||||
html += f'<th>{self._escape_html(str(col))}</th>'
|
||||
html += '</tr></thead><tbody>'
|
||||
|
||||
for _, row in display_df.iterrows():
|
||||
html += '<tr>'
|
||||
for val in row:
|
||||
html += f'<td>{self._escape_html(str(val))}</td>'
|
||||
html += '</tr>'
|
||||
|
||||
html += '</tbody></table>'
|
||||
|
||||
if len(df) > self.max_rows:
|
||||
html += f'<p><em>Showing first {self.max_rows} of {len(df)} rows</em></p>'
|
||||
|
||||
return html
|
||||
|
||||
def _escape_html(self, text: str) -> str:
|
||||
"""Escape HTML special characters."""
|
||||
return (text
|
||||
.replace('&', '&')
|
||||
.replace('<', '<')
|
||||
.replace('>', '>')
|
||||
.replace('"', '"')
|
||||
.replace("'", '''))
|
||||
|
||||
def _build_footer(self, summary: InvestigationSummary) -> str:
|
||||
"""Build report footer."""
|
||||
return f"""<div class="footer">
|
||||
<p>Generated by Data Regression Testing Framework - Investigation Module</p>
|
||||
<p>Success Rate: {summary.success_rate:.1f}%</p>
|
||||
</div>"""
|
||||
|
||||
|
||||
class InvestigationCSVReportGenerator:
|
||||
"""Generates CSV format investigation reports."""
|
||||
|
||||
def __init__(self, config: Config):
|
||||
"""
|
||||
Initialize CSV generator.
|
||||
|
||||
Args:
|
||||
config: Configuration object
|
||||
"""
|
||||
self.config = config
|
||||
|
||||
def generate(self, summary: InvestigationSummary, filepath: Path) -> None:
|
||||
"""
|
||||
Generate CSV investigation report.
|
||||
|
||||
Args:
|
||||
summary: Investigation summary
|
||||
filepath: Output file path
|
||||
"""
|
||||
csv_config = self.config.reporting.csv
|
||||
delimiter = csv_config.get("delimiter", ",")
|
||||
encoding = csv_config.get("encoding", "utf-8-sig")
|
||||
|
||||
with open(filepath, "w", newline="", encoding=encoding) as f:
|
||||
writer = csv.writer(f, delimiter=delimiter)
|
||||
|
||||
# Write header
|
||||
writer.writerow([
|
||||
"Timestamp",
|
||||
"Schema",
|
||||
"Table",
|
||||
"Query_Number",
|
||||
"Environment",
|
||||
"Status",
|
||||
"Row_Count",
|
||||
"Execution_Time_Ms",
|
||||
"Error_Message",
|
||||
"SQL_File_Path"
|
||||
])
|
||||
|
||||
# Write data rows
|
||||
for table_result in summary.results:
|
||||
# Baseline results
|
||||
for query_result in table_result.baseline_results:
|
||||
writer.writerow([
|
||||
table_result.timestamp,
|
||||
table_result.schema,
|
||||
table_result.table,
|
||||
query_result.query_number,
|
||||
"baseline",
|
||||
query_result.status.value,
|
||||
query_result.row_count,
|
||||
query_result.execution_time_ms,
|
||||
query_result.error_message or "",
|
||||
table_result.sql_file_path
|
||||
])
|
||||
|
||||
# Target results
|
||||
for query_result in table_result.target_results:
|
||||
writer.writerow([
|
||||
table_result.timestamp,
|
||||
table_result.schema,
|
||||
table_result.table,
|
||||
query_result.query_number,
|
||||
"target",
|
||||
query_result.status.value,
|
||||
query_result.row_count,
|
||||
query_result.execution_time_ms,
|
||||
query_result.error_message or "",
|
||||
table_result.sql_file_path
|
||||
])
|
||||
|
||||
logger.debug(f"Investigation CSV report written to {filepath}")
|
||||
6
src/drt/services/__init__.py
Executable file
6
src/drt/services/__init__.py
Executable file
@@ -0,0 +1,6 @@
|
||||
"""Business logic services."""
|
||||
|
||||
from drt.services.discovery import DiscoveryService
|
||||
from drt.services.comparison import ComparisonService
|
||||
|
||||
__all__ = ["DiscoveryService", "ComparisonService"]
|
||||
15
src/drt/services/checkers/__init__.py
Executable file
15
src/drt/services/checkers/__init__.py
Executable file
@@ -0,0 +1,15 @@
|
||||
"""Comparison checkers."""
|
||||
|
||||
from drt.services.checkers.base import BaseChecker
|
||||
from drt.services.checkers.existence import ExistenceChecker
|
||||
from drt.services.checkers.row_count import RowCountChecker
|
||||
from drt.services.checkers.schema import SchemaChecker
|
||||
from drt.services.checkers.aggregate import AggregateChecker
|
||||
|
||||
__all__ = [
|
||||
"BaseChecker",
|
||||
"ExistenceChecker",
|
||||
"RowCountChecker",
|
||||
"SchemaChecker",
|
||||
"AggregateChecker",
|
||||
]
|
||||
111
src/drt/services/checkers/aggregate.py
Executable file
111
src/drt/services/checkers/aggregate.py
Executable file
@@ -0,0 +1,111 @@
|
||||
"""Aggregate checker."""
|
||||
|
||||
import time
|
||||
from drt.services.checkers.base import BaseChecker
|
||||
from drt.models.results import CheckResult
|
||||
from drt.models.table import TableInfo
|
||||
from drt.models.enums import Status, CheckType
|
||||
from drt.utils.logging import get_logger
|
||||
|
||||
logger = get_logger(__name__)
|
||||
|
||||
|
||||
class AggregateChecker(BaseChecker):
|
||||
"""Checks aggregate sums for numeric columns."""
|
||||
|
||||
def check(self, table: TableInfo) -> CheckResult:
|
||||
"""
|
||||
Check aggregate sums.
|
||||
|
||||
Args:
|
||||
table: Table information
|
||||
|
||||
Returns:
|
||||
Check result
|
||||
"""
|
||||
if not self.config.comparison.aggregates.enabled:
|
||||
return CheckResult(
|
||||
check_type=CheckType.AGGREGATE,
|
||||
status=Status.SKIP,
|
||||
message="Aggregate check disabled"
|
||||
)
|
||||
|
||||
if not table.aggregate_columns:
|
||||
return CheckResult(
|
||||
check_type=CheckType.AGGREGATE,
|
||||
status=Status.SKIP,
|
||||
message="No aggregate columns configured"
|
||||
)
|
||||
|
||||
try:
|
||||
# Time baseline query
|
||||
baseline_start = time.time()
|
||||
baseline_sums = self.baseline_executor.get_aggregate_sums(
|
||||
table.schema, table.name, table.aggregate_columns
|
||||
)
|
||||
baseline_time = (time.time() - baseline_start) * 1000
|
||||
logger.debug(f" └─ Baseline aggregate query: {baseline_time:.0f}ms")
|
||||
|
||||
# Time target query
|
||||
target_start = time.time()
|
||||
target_sums = self.target_executor.get_aggregate_sums(
|
||||
table.schema, table.name, table.aggregate_columns
|
||||
)
|
||||
target_time = (time.time() - target_start) * 1000
|
||||
logger.debug(f" └─ Target aggregate query: {target_time:.0f}ms")
|
||||
logger.debug(f" └─ Total aggregate time: {baseline_time + target_time:.0f}ms (could be parallelized)")
|
||||
|
||||
tolerance_pct = self.config.comparison.aggregates.tolerance_percent
|
||||
issues = []
|
||||
statuses = []
|
||||
|
||||
for col in table.aggregate_columns:
|
||||
baseline_val = baseline_sums.get(col, 0.0)
|
||||
target_val = target_sums.get(col, 0.0)
|
||||
|
||||
if baseline_val == target_val:
|
||||
continue
|
||||
|
||||
# Calculate percentage difference
|
||||
if baseline_val != 0:
|
||||
pct_diff = abs((target_val - baseline_val) / baseline_val * 100)
|
||||
else:
|
||||
pct_diff = 100.0 if target_val != 0 else 0.0
|
||||
|
||||
if pct_diff > tolerance_pct:
|
||||
statuses.append(Status.FAIL)
|
||||
issues.append(
|
||||
f"Column '{col}': SUM differs by {pct_diff:.2f}% "
|
||||
f"(Baseline: {baseline_val:,.2f}, Target: {target_val:,.2f})"
|
||||
)
|
||||
|
||||
# Determine overall status
|
||||
if not statuses:
|
||||
status = Status.PASS
|
||||
message = f"All {len(table.aggregate_columns)} aggregate(s) match"
|
||||
else:
|
||||
status = Status.most_severe(statuses)
|
||||
message = "; ".join(issues)
|
||||
|
||||
return CheckResult(
|
||||
check_type=CheckType.AGGREGATE,
|
||||
status=status,
|
||||
baseline_value=baseline_sums,
|
||||
target_value=target_sums,
|
||||
message=message,
|
||||
details={
|
||||
"baseline_sums": baseline_sums,
|
||||
"target_sums": target_sums,
|
||||
"tolerance_percent": tolerance_pct,
|
||||
"columns_checked": table.aggregate_columns,
|
||||
"issues": issues
|
||||
}
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Aggregate check failed for {table.full_name}: {e}")
|
||||
return CheckResult(
|
||||
check_type=CheckType.AGGREGATE,
|
||||
status=Status.ERROR,
|
||||
message=f"Aggregate check error: {str(e)}"
|
||||
)
|
||||
42
src/drt/services/checkers/base.py
Executable file
42
src/drt/services/checkers/base.py
Executable file
@@ -0,0 +1,42 @@
|
||||
"""Base checker class."""
|
||||
|
||||
from abc import ABC, abstractmethod
|
||||
from drt.models.results import CheckResult
|
||||
from drt.models.table import TableInfo
|
||||
from drt.database.executor import QueryExecutor
|
||||
from drt.config.models import Config
|
||||
|
||||
|
||||
class BaseChecker(ABC):
|
||||
"""Abstract base class for all checkers."""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
baseline_executor: QueryExecutor,
|
||||
target_executor: QueryExecutor,
|
||||
config: Config
|
||||
):
|
||||
"""
|
||||
Initialize checker.
|
||||
|
||||
Args:
|
||||
baseline_executor: Query executor for baseline database
|
||||
target_executor: Query executor for target database
|
||||
config: Configuration object
|
||||
"""
|
||||
self.baseline_executor = baseline_executor
|
||||
self.target_executor = target_executor
|
||||
self.config = config
|
||||
|
||||
@abstractmethod
|
||||
def check(self, table: TableInfo) -> CheckResult:
|
||||
"""
|
||||
Perform the check.
|
||||
|
||||
Args:
|
||||
table: Table information
|
||||
|
||||
Returns:
|
||||
Check result
|
||||
"""
|
||||
pass
|
||||
78
src/drt/services/checkers/existence.py
Executable file
78
src/drt/services/checkers/existence.py
Executable file
@@ -0,0 +1,78 @@
|
||||
"""Table existence checker."""
|
||||
|
||||
import time
|
||||
from drt.services.checkers.base import BaseChecker
|
||||
from drt.models.results import CheckResult
|
||||
from drt.models.table import TableInfo
|
||||
from drt.models.enums import Status, CheckType
|
||||
from drt.utils.logging import get_logger
|
||||
|
||||
logger = get_logger(__name__)
|
||||
|
||||
|
||||
class ExistenceChecker(BaseChecker):
|
||||
"""Checks if table exists in both baseline and target."""
|
||||
|
||||
def check(self, table: TableInfo) -> CheckResult:
|
||||
"""
|
||||
Check table existence.
|
||||
|
||||
Args:
|
||||
table: Table information
|
||||
|
||||
Returns:
|
||||
Check result
|
||||
"""
|
||||
try:
|
||||
# Time baseline query
|
||||
baseline_start = time.time()
|
||||
baseline_exists = self.baseline_executor.table_exists(table.schema, table.name)
|
||||
baseline_time = (time.time() - baseline_start) * 1000
|
||||
logger.debug(f" └─ Baseline existence query: {baseline_time:.0f}ms")
|
||||
|
||||
# Time target query
|
||||
target_start = time.time()
|
||||
target_exists = self.target_executor.table_exists(table.schema, table.name)
|
||||
target_time = (time.time() - target_start) * 1000
|
||||
logger.debug(f" └─ Target existence query: {target_time:.0f}ms")
|
||||
logger.debug(f" └─ Total existence time: {baseline_time + target_time:.0f}ms (could be parallelized)")
|
||||
|
||||
# Determine status
|
||||
if baseline_exists and target_exists:
|
||||
status = Status.PASS
|
||||
message = "Table exists in both databases"
|
||||
elif baseline_exists and not target_exists:
|
||||
# Table missing in target
|
||||
if table.expected_in_target:
|
||||
status = Status.FAIL
|
||||
message = "Table exists in Baseline but missing in Target (REGRESSION)"
|
||||
else:
|
||||
status = Status.INFO
|
||||
message = "Table removed from Target (expected per configuration)"
|
||||
elif not baseline_exists and target_exists:
|
||||
status = Status.INFO
|
||||
message = "Table exists only in Target (new table)"
|
||||
else:
|
||||
status = Status.ERROR
|
||||
message = "Table does not exist in either database"
|
||||
|
||||
return CheckResult(
|
||||
check_type=CheckType.EXISTENCE,
|
||||
status=status,
|
||||
baseline_value=baseline_exists,
|
||||
target_value=target_exists,
|
||||
message=message,
|
||||
details={
|
||||
"baseline_exists": baseline_exists,
|
||||
"target_exists": target_exists,
|
||||
"expected_in_target": table.expected_in_target
|
||||
}
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Existence check failed for {table.full_name}: {e}")
|
||||
return CheckResult(
|
||||
check_type=CheckType.EXISTENCE,
|
||||
status=Status.ERROR,
|
||||
message=f"Existence check error: {str(e)}"
|
||||
)
|
||||
90
src/drt/services/checkers/row_count.py
Executable file
90
src/drt/services/checkers/row_count.py
Executable file
@@ -0,0 +1,90 @@
|
||||
"""Row count checker."""
|
||||
|
||||
import time
|
||||
from drt.services.checkers.base import BaseChecker
|
||||
from drt.models.results import CheckResult
|
||||
from drt.models.table import TableInfo
|
||||
from drt.models.enums import Status, CheckType
|
||||
from drt.utils.logging import get_logger
|
||||
|
||||
logger = get_logger(__name__)
|
||||
|
||||
|
||||
class RowCountChecker(BaseChecker):
|
||||
"""Checks row count differences between baseline and target."""
|
||||
|
||||
def check(self, table: TableInfo) -> CheckResult:
|
||||
"""
|
||||
Check row counts.
|
||||
|
||||
Args:
|
||||
table: Table information
|
||||
|
||||
Returns:
|
||||
Check result
|
||||
"""
|
||||
if not self.config.comparison.row_count.enabled:
|
||||
return CheckResult(
|
||||
check_type=CheckType.ROW_COUNT,
|
||||
status=Status.SKIP,
|
||||
message="Row count check disabled"
|
||||
)
|
||||
|
||||
try:
|
||||
# Time baseline query
|
||||
baseline_start = time.time()
|
||||
baseline_count = self.baseline_executor.get_row_count(table.schema, table.name)
|
||||
baseline_time = (time.time() - baseline_start) * 1000
|
||||
logger.debug(f" └─ Baseline row count query: {baseline_time:.0f}ms")
|
||||
|
||||
# Time target query
|
||||
target_start = time.time()
|
||||
target_count = self.target_executor.get_row_count(table.schema, table.name)
|
||||
target_time = (time.time() - target_start) * 1000
|
||||
logger.debug(f" └─ Target row count query: {target_time:.0f}ms")
|
||||
logger.debug(f" └─ Total row count time: {baseline_time + target_time:.0f}ms (could be parallelized)")
|
||||
|
||||
difference = target_count - baseline_count
|
||||
tolerance_pct = self.config.comparison.row_count.tolerance_percent
|
||||
|
||||
# Determine status
|
||||
if baseline_count == target_count:
|
||||
status = Status.PASS
|
||||
message = f"Row counts match: {baseline_count:,}"
|
||||
elif target_count > baseline_count:
|
||||
pct_diff = (difference / baseline_count * 100) if baseline_count > 0 else 0
|
||||
status = Status.WARNING
|
||||
message = f"Target has {difference:,} more rows (+{pct_diff:.2f}%)"
|
||||
else: # target_count < baseline_count
|
||||
pct_diff = abs(difference / baseline_count * 100) if baseline_count > 0 else 0
|
||||
|
||||
if pct_diff <= tolerance_pct:
|
||||
status = Status.WARNING
|
||||
message = f"Target has {abs(difference):,} fewer rows (-{pct_diff:.2f}%) - within tolerance"
|
||||
else:
|
||||
status = Status.FAIL
|
||||
message = f"Target missing {abs(difference):,} rows (-{pct_diff:.2f}%) - REGRESSION"
|
||||
|
||||
return CheckResult(
|
||||
check_type=CheckType.ROW_COUNT,
|
||||
status=status,
|
||||
baseline_value=baseline_count,
|
||||
target_value=target_count,
|
||||
difference=difference,
|
||||
message=message,
|
||||
details={
|
||||
"baseline_count": baseline_count,
|
||||
"target_count": target_count,
|
||||
"difference": difference,
|
||||
"percent_difference": (difference / baseline_count * 100) if baseline_count > 0 else 0,
|
||||
"tolerance_percent": tolerance_pct
|
||||
}
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Row count check failed for {table.full_name}: {e}")
|
||||
return CheckResult(
|
||||
check_type=CheckType.ROW_COUNT,
|
||||
status=Status.ERROR,
|
||||
message=f"Row count check error: {str(e)}"
|
||||
)
|
||||
132
src/drt/services/checkers/schema.py
Executable file
132
src/drt/services/checkers/schema.py
Executable file
@@ -0,0 +1,132 @@
|
||||
"""Schema checker."""
|
||||
|
||||
import time
|
||||
from typing import Set
|
||||
from drt.services.checkers.base import BaseChecker
|
||||
from drt.models.results import CheckResult
|
||||
from drt.models.table import TableInfo
|
||||
from drt.models.enums import Status, CheckType
|
||||
from drt.utils.logging import get_logger
|
||||
|
||||
logger = get_logger(__name__)
|
||||
|
||||
|
||||
class SchemaChecker(BaseChecker):
|
||||
"""Checks schema differences between baseline and target."""
|
||||
|
||||
def check(self, table: TableInfo) -> CheckResult:
|
||||
"""
|
||||
Check schema compatibility.
|
||||
|
||||
Args:
|
||||
table: Table information
|
||||
|
||||
Returns:
|
||||
Check result
|
||||
"""
|
||||
if not self.config.comparison.schema.enabled:
|
||||
return CheckResult(
|
||||
check_type=CheckType.SCHEMA,
|
||||
status=Status.SKIP,
|
||||
message="Schema check disabled"
|
||||
)
|
||||
|
||||
try:
|
||||
# Time baseline query
|
||||
baseline_start = time.time()
|
||||
baseline_cols = self.baseline_executor.get_columns(table.schema, table.name)
|
||||
baseline_time = (time.time() - baseline_start) * 1000
|
||||
logger.debug(f" └─ Baseline schema query: {baseline_time:.0f}ms")
|
||||
|
||||
# Time target query
|
||||
target_start = time.time()
|
||||
target_cols = self.target_executor.get_columns(table.schema, table.name)
|
||||
target_time = (time.time() - target_start) * 1000
|
||||
logger.debug(f" └─ Target schema query: {target_time:.0f}ms")
|
||||
logger.debug(f" └─ Total schema time: {baseline_time + target_time:.0f}ms (could be parallelized)")
|
||||
|
||||
baseline_col_names = {col['COLUMN_NAME'] for col in baseline_cols}
|
||||
target_col_names = {col['COLUMN_NAME'] for col in target_cols}
|
||||
|
||||
missing_in_target = baseline_col_names - target_col_names
|
||||
extra_in_target = target_col_names - baseline_col_names
|
||||
|
||||
issues = []
|
||||
statuses = []
|
||||
|
||||
# Check for missing columns
|
||||
if missing_in_target:
|
||||
severity = self.config.comparison.schema.severity.get(
|
||||
"missing_column_in_target", "FAIL"
|
||||
)
|
||||
statuses.append(Status[severity])
|
||||
issues.append(f"Missing columns in Target: {', '.join(sorted(missing_in_target))}")
|
||||
|
||||
# Check for extra columns
|
||||
if extra_in_target:
|
||||
severity = self.config.comparison.schema.severity.get(
|
||||
"extra_column_in_target", "WARNING"
|
||||
)
|
||||
statuses.append(Status[severity])
|
||||
issues.append(f"Extra columns in Target: {', '.join(sorted(extra_in_target))}")
|
||||
|
||||
# Check data types for matching columns
|
||||
if self.config.comparison.schema.checks.get("data_types", True):
|
||||
type_mismatches = self._check_data_types(baseline_cols, target_cols)
|
||||
if type_mismatches:
|
||||
severity = self.config.comparison.schema.severity.get(
|
||||
"data_type_mismatch", "WARNING"
|
||||
)
|
||||
statuses.append(Status[severity])
|
||||
issues.extend(type_mismatches)
|
||||
|
||||
# Determine overall status
|
||||
if not statuses:
|
||||
status = Status.PASS
|
||||
message = f"Schema matches: {len(baseline_col_names)} columns"
|
||||
else:
|
||||
status = Status.most_severe(statuses)
|
||||
message = "; ".join(issues)
|
||||
|
||||
return CheckResult(
|
||||
check_type=CheckType.SCHEMA,
|
||||
status=status,
|
||||
baseline_value=len(baseline_col_names),
|
||||
target_value=len(target_col_names),
|
||||
message=message,
|
||||
details={
|
||||
"baseline_columns": sorted(baseline_col_names),
|
||||
"target_columns": sorted(target_col_names),
|
||||
"missing_in_target": sorted(missing_in_target),
|
||||
"extra_in_target": sorted(extra_in_target),
|
||||
"issues": issues
|
||||
}
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Schema check failed for {table.full_name}: {e}")
|
||||
return CheckResult(
|
||||
check_type=CheckType.SCHEMA,
|
||||
status=Status.ERROR,
|
||||
message=f"Schema check error: {str(e)}"
|
||||
)
|
||||
|
||||
def _check_data_types(self, baseline_cols: list, target_cols: list) -> list:
|
||||
"""Check for data type mismatches."""
|
||||
mismatches = []
|
||||
|
||||
# Create lookup dictionaries
|
||||
baseline_types = {col['COLUMN_NAME']: col['DATA_TYPE'] for col in baseline_cols}
|
||||
target_types = {col['COLUMN_NAME']: col['DATA_TYPE'] for col in target_cols}
|
||||
|
||||
# Check common columns
|
||||
common_cols = set(baseline_types.keys()) & set(target_types.keys())
|
||||
|
||||
for col in sorted(common_cols):
|
||||
if baseline_types[col] != target_types[col]:
|
||||
mismatches.append(
|
||||
f"Column '{col}': type mismatch "
|
||||
f"(Baseline: {baseline_types[col]}, Target: {target_types[col]})"
|
||||
)
|
||||
|
||||
return mismatches
|
||||
250
src/drt/services/comparison.py
Executable file
250
src/drt/services/comparison.py
Executable file
@@ -0,0 +1,250 @@
|
||||
"""Comparison service for executing database comparisons."""
|
||||
|
||||
import time
|
||||
from typing import List
|
||||
from drt.database.connection import ConnectionManager
|
||||
from drt.database.executor import QueryExecutor
|
||||
from drt.config.models import Config, DatabasePairConfig
|
||||
from drt.models.table import TableInfo
|
||||
from drt.models.results import ComparisonResult
|
||||
from drt.models.summary import ExecutionSummary
|
||||
from drt.models.enums import Status
|
||||
from drt.services.checkers import (
|
||||
ExistenceChecker,
|
||||
RowCountChecker,
|
||||
SchemaChecker,
|
||||
AggregateChecker
|
||||
)
|
||||
from drt.utils.logging import get_logger
|
||||
from drt.utils.timestamps import get_timestamp
|
||||
from drt.utils.patterns import matches_pattern
|
||||
|
||||
logger = get_logger(__name__)
|
||||
|
||||
|
||||
class ComparisonService:
|
||||
"""Service for comparing baseline and target databases."""
|
||||
|
||||
def __init__(self, config: Config):
|
||||
"""
|
||||
Initialize comparison service.
|
||||
|
||||
Args:
|
||||
config: Configuration object
|
||||
"""
|
||||
self.config = config
|
||||
|
||||
def run_comparison(self, db_pair: DatabasePairConfig) -> ExecutionSummary:
|
||||
"""
|
||||
Run comparison for a database pair.
|
||||
|
||||
Args:
|
||||
db_pair: Database pair configuration
|
||||
|
||||
Returns:
|
||||
Execution summary with results
|
||||
"""
|
||||
start_time = get_timestamp()
|
||||
start_ts = time.time()
|
||||
|
||||
logger.info("=" * 60)
|
||||
logger.info(f"Starting comparison: {db_pair.name}")
|
||||
logger.info("=" * 60)
|
||||
|
||||
# Initialize connections
|
||||
baseline_mgr = ConnectionManager(db_pair.baseline)
|
||||
target_mgr = ConnectionManager(db_pair.target)
|
||||
|
||||
try:
|
||||
# Connect to databases
|
||||
baseline_mgr.connect()
|
||||
target_mgr.connect()
|
||||
|
||||
# Create executors
|
||||
baseline_executor = QueryExecutor(baseline_mgr)
|
||||
target_executor = QueryExecutor(target_mgr)
|
||||
|
||||
# Initialize checkers
|
||||
existence_checker = ExistenceChecker(baseline_executor, target_executor, self.config)
|
||||
row_count_checker = RowCountChecker(baseline_executor, target_executor, self.config)
|
||||
schema_checker = SchemaChecker(baseline_executor, target_executor, self.config)
|
||||
aggregate_checker = AggregateChecker(baseline_executor, target_executor, self.config)
|
||||
|
||||
# Get tables to compare
|
||||
tables = self._get_tables_to_compare()
|
||||
logger.info(f"Tables to compare: {len(tables)}")
|
||||
|
||||
# Create summary
|
||||
summary = ExecutionSummary(
|
||||
start_time=start_time,
|
||||
end_time="",
|
||||
duration_seconds=0,
|
||||
config_file=self.config.metadata.generated_date or "",
|
||||
baseline_info=f"{db_pair.baseline.server}.{db_pair.baseline.database}",
|
||||
target_info=f"{db_pair.target.server}.{db_pair.target.database}"
|
||||
)
|
||||
|
||||
# Compare each table
|
||||
for idx, table in enumerate(tables, 1):
|
||||
if not table.enabled:
|
||||
logger.info(f"[{idx:3d}/{len(tables)}] {table.full_name:40s} SKIP (disabled)")
|
||||
result = self._create_skipped_result(table)
|
||||
summary.add_result(result)
|
||||
continue
|
||||
|
||||
logger.info(f"[{idx:3d}/{len(tables)}] {table.full_name:40s} ...", extra={'end': ''})
|
||||
|
||||
result = self._compare_table(
|
||||
table,
|
||||
existence_checker,
|
||||
row_count_checker,
|
||||
schema_checker,
|
||||
aggregate_checker
|
||||
)
|
||||
|
||||
summary.add_result(result)
|
||||
|
||||
# Log result
|
||||
status_symbol = self._get_status_symbol(result.overall_status)
|
||||
logger.info(f" {status_symbol} {result.overall_status.value}")
|
||||
|
||||
if not self.config.execution.continue_on_error and result.overall_status == Status.ERROR:
|
||||
logger.error("Stopping due to error (continue_on_error=False)")
|
||||
break
|
||||
|
||||
# Finalize summary
|
||||
end_time = get_timestamp()
|
||||
duration = int(time.time() - start_ts)
|
||||
summary.end_time = end_time
|
||||
summary.duration_seconds = duration
|
||||
|
||||
# Log summary
|
||||
self._log_summary(summary)
|
||||
|
||||
return summary
|
||||
|
||||
finally:
|
||||
baseline_mgr.disconnect()
|
||||
target_mgr.disconnect()
|
||||
|
||||
def _compare_table(
|
||||
self,
|
||||
table: TableInfo,
|
||||
existence_checker: ExistenceChecker,
|
||||
row_count_checker: RowCountChecker,
|
||||
schema_checker: SchemaChecker,
|
||||
aggregate_checker: AggregateChecker
|
||||
) -> ComparisonResult:
|
||||
"""Compare a single table."""
|
||||
start_ms = time.time() * 1000
|
||||
|
||||
result = ComparisonResult(
|
||||
table=table,
|
||||
overall_status=Status.PASS,
|
||||
timestamp=get_timestamp()
|
||||
)
|
||||
|
||||
try:
|
||||
# Check existence first
|
||||
check_start = time.time()
|
||||
existence_result = existence_checker.check(table)
|
||||
existence_time = (time.time() - check_start) * 1000
|
||||
logger.debug(f" └─ Existence check: {existence_time:.0f}ms")
|
||||
result.add_check(existence_result)
|
||||
|
||||
# Only proceed with other checks if table exists in both
|
||||
if existence_result.status == Status.PASS:
|
||||
# Row count check
|
||||
check_start = time.time()
|
||||
row_count_result = row_count_checker.check(table)
|
||||
row_count_time = (time.time() - check_start) * 1000
|
||||
logger.debug(f" └─ Row count check: {row_count_time:.0f}ms")
|
||||
result.add_check(row_count_result)
|
||||
|
||||
# Schema check
|
||||
check_start = time.time()
|
||||
schema_result = schema_checker.check(table)
|
||||
schema_time = (time.time() - check_start) * 1000
|
||||
logger.debug(f" └─ Schema check: {schema_time:.0f}ms")
|
||||
result.add_check(schema_result)
|
||||
|
||||
# Aggregate check
|
||||
check_start = time.time()
|
||||
aggregate_result = aggregate_checker.check(table)
|
||||
aggregate_time = (time.time() - check_start) * 1000
|
||||
logger.debug(f" └─ Aggregate check: {aggregate_time:.0f}ms")
|
||||
result.add_check(aggregate_result)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Comparison failed for {table.full_name}: {e}")
|
||||
result.overall_status = Status.ERROR
|
||||
result.error_message = str(e)
|
||||
|
||||
result.execution_time_ms = int(time.time() * 1000 - start_ms)
|
||||
logger.debug(f" └─ Total table time: {result.execution_time_ms}ms")
|
||||
return result
|
||||
|
||||
def _get_tables_to_compare(self) -> List[TableInfo]:
|
||||
"""Get list of tables to compare based on configuration."""
|
||||
tables = []
|
||||
|
||||
for table_config in self.config.tables:
|
||||
table = TableInfo(
|
||||
schema=table_config.schema,
|
||||
name=table_config.name,
|
||||
enabled=table_config.enabled,
|
||||
expected_in_target=table_config.expected_in_target,
|
||||
estimated_row_count=table_config.estimated_row_count,
|
||||
primary_key_columns=table_config.primary_key_columns,
|
||||
aggregate_columns=table_config.aggregate_columns,
|
||||
notes=table_config.notes
|
||||
)
|
||||
tables.append(table)
|
||||
|
||||
# Apply filters
|
||||
if self.config.table_filters.mode == "include_list":
|
||||
if self.config.table_filters.include_list:
|
||||
include_names = {f"{t['schema']}.{t['name']}" for t in self.config.table_filters.include_list}
|
||||
tables = [t for t in tables if t.full_name in include_names]
|
||||
|
||||
# Apply exclusions
|
||||
tables = [
|
||||
t for t in tables
|
||||
if not matches_pattern(t.name, self.config.table_filters.exclude_patterns)
|
||||
and t.schema not in self.config.table_filters.exclude_schemas
|
||||
]
|
||||
|
||||
return tables
|
||||
|
||||
def _create_skipped_result(self, table: TableInfo) -> ComparisonResult:
|
||||
"""Create a skipped result for disabled tables."""
|
||||
return ComparisonResult(
|
||||
table=table,
|
||||
overall_status=Status.SKIP,
|
||||
timestamp=get_timestamp()
|
||||
)
|
||||
|
||||
def _get_status_symbol(self, status: Status) -> str:
|
||||
"""Get symbol for status."""
|
||||
symbols = {
|
||||
Status.PASS: "✓",
|
||||
Status.FAIL: "✗",
|
||||
Status.WARNING: "⚠",
|
||||
Status.ERROR: "🔴",
|
||||
Status.INFO: "ℹ",
|
||||
Status.SKIP: "○"
|
||||
}
|
||||
return symbols.get(status, "?")
|
||||
|
||||
def _log_summary(self, summary: ExecutionSummary) -> None:
|
||||
"""Log execution summary."""
|
||||
logger.info("=" * 60)
|
||||
logger.info("COMPARISON SUMMARY")
|
||||
logger.info("=" * 60)
|
||||
logger.info(f" PASS: {summary.passed:3d} | FAIL: {summary.failed:3d}")
|
||||
logger.info(f" WARNING: {summary.warnings:3d} | ERROR: {summary.errors:3d}")
|
||||
logger.info(f" INFO: {summary.info:3d} | SKIP: {summary.skipped:3d}")
|
||||
logger.info("=" * 60)
|
||||
logger.info(f"Duration: {summary.duration_seconds} seconds")
|
||||
logger.info(f"Success Rate: {summary.success_rate:.1f}%")
|
||||
logger.info("=" * 60)
|
||||
192
src/drt/services/discovery.py
Executable file
192
src/drt/services/discovery.py
Executable file
@@ -0,0 +1,192 @@
|
||||
"""Discovery service for auto-generating configuration."""
|
||||
|
||||
from typing import List
|
||||
from drt.database.connection import ConnectionManager
|
||||
from drt.database.executor import QueryExecutor
|
||||
from drt.database.queries import SQLQueries
|
||||
from drt.models.table import TableInfo, ColumnInfo
|
||||
from drt.config.models import Config, TableConfig, MetadataConfig, ConnectionConfig
|
||||
from drt.utils.logging import get_logger
|
||||
from drt.utils.timestamps import get_timestamp
|
||||
from drt.utils.patterns import matches_pattern
|
||||
|
||||
logger = get_logger(__name__)
|
||||
|
||||
|
||||
class DiscoveryService:
|
||||
"""Service for discovering database tables and generating configuration."""
|
||||
|
||||
def __init__(self, connection_config: ConnectionConfig, config: Config = None):
|
||||
"""
|
||||
Initialize discovery service.
|
||||
|
||||
Args:
|
||||
connection_config: Connection configuration for baseline database
|
||||
config: Optional existing configuration for discovery settings
|
||||
"""
|
||||
self.conn_config = connection_config
|
||||
self.config = config or Config()
|
||||
self.conn_mgr = ConnectionManager(connection_config)
|
||||
self.executor = QueryExecutor(self.conn_mgr)
|
||||
|
||||
def discover_tables(self) -> List[TableInfo]:
|
||||
"""
|
||||
Discover all tables in the database.
|
||||
|
||||
Returns:
|
||||
List of discovered tables
|
||||
"""
|
||||
logger.info("Starting table discovery...")
|
||||
|
||||
try:
|
||||
# Get all tables
|
||||
tables_data = self.executor.get_all_tables()
|
||||
logger.info(f"Found {len(tables_data)} tables")
|
||||
|
||||
discovered_tables = []
|
||||
|
||||
for table_data in tables_data:
|
||||
schema = table_data['schema_name']
|
||||
name = table_data['table_name']
|
||||
estimated_rows = table_data.get('estimated_rows', 0)
|
||||
|
||||
# Apply filters
|
||||
if self._should_exclude_table(schema, name):
|
||||
logger.debug(f"Excluding table: {schema}.{name}")
|
||||
continue
|
||||
|
||||
# Get column information
|
||||
columns = self._discover_columns(schema, name)
|
||||
|
||||
# Get primary keys
|
||||
pk_columns = self.executor.get_primary_keys(schema, name)
|
||||
|
||||
# Identify numeric columns for aggregation
|
||||
aggregate_cols = [
|
||||
col.name for col in columns
|
||||
if col.is_numeric and self.config.discovery.detect_numeric_columns
|
||||
]
|
||||
|
||||
table_info = TableInfo(
|
||||
schema=schema,
|
||||
name=name,
|
||||
estimated_row_count=estimated_rows,
|
||||
columns=columns,
|
||||
primary_key_columns=pk_columns,
|
||||
enabled=True,
|
||||
expected_in_target=self.config.discovery.default_expected_in_target,
|
||||
aggregate_columns=aggregate_cols,
|
||||
notes=""
|
||||
)
|
||||
|
||||
discovered_tables.append(table_info)
|
||||
logger.debug(f"Discovered: {table_info.full_name} ({estimated_rows:,} rows)")
|
||||
|
||||
logger.info(f"Discovery complete: {len(discovered_tables)} tables discovered")
|
||||
return discovered_tables
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Discovery failed: {e}")
|
||||
raise
|
||||
|
||||
def _discover_columns(self, schema: str, table: str) -> List[ColumnInfo]:
|
||||
"""Discover columns for a table."""
|
||||
import math
|
||||
columns_data = self.executor.get_columns(schema, table)
|
||||
columns = []
|
||||
|
||||
for idx, col_data in enumerate(columns_data, 1):
|
||||
is_numeric = SQLQueries.is_numeric_type(col_data['DATA_TYPE'])
|
||||
|
||||
# Convert nan to None for Pydantic validation
|
||||
# Pandas converts SQL NULL to nan, but Pydantic v2 rejects nan for Optional[int]
|
||||
max_length = col_data.get('CHARACTER_MAXIMUM_LENGTH')
|
||||
if isinstance(max_length, float) and math.isnan(max_length):
|
||||
max_length = None
|
||||
|
||||
precision = col_data.get('NUMERIC_PRECISION')
|
||||
if isinstance(precision, float) and math.isnan(precision):
|
||||
precision = None
|
||||
|
||||
scale = col_data.get('NUMERIC_SCALE')
|
||||
if isinstance(scale, float) and math.isnan(scale):
|
||||
scale = None
|
||||
|
||||
# DEBUG: Log converted values to verify fix
|
||||
logger.debug(f"Column {col_data['COLUMN_NAME']}: max_length={max_length} (converted from {col_data.get('CHARACTER_MAXIMUM_LENGTH')}), "
|
||||
f"precision={precision}, scale={scale}, data_type={col_data['DATA_TYPE']}")
|
||||
|
||||
column = ColumnInfo(
|
||||
name=col_data['COLUMN_NAME'],
|
||||
data_type=col_data['DATA_TYPE'],
|
||||
max_length=max_length,
|
||||
precision=precision,
|
||||
scale=scale,
|
||||
is_nullable=col_data['IS_NULLABLE'] == 'YES',
|
||||
is_numeric=is_numeric,
|
||||
ordinal_position=col_data.get('ORDINAL_POSITION', idx)
|
||||
)
|
||||
columns.append(column)
|
||||
|
||||
return columns
|
||||
|
||||
def _should_exclude_table(self, schema: str, table: str) -> bool:
|
||||
"""Check if table should be excluded based on filters."""
|
||||
# Check schema exclusions
|
||||
if schema in self.config.discovery.exclude_schemas:
|
||||
return True
|
||||
|
||||
# Check table name patterns
|
||||
if matches_pattern(table, self.config.discovery.exclude_patterns):
|
||||
return True
|
||||
|
||||
# Check schema inclusions (if specified)
|
||||
if self.config.discovery.include_schemas:
|
||||
if schema not in self.config.discovery.include_schemas:
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
def generate_config(self, tables: List[TableInfo]) -> Config:
|
||||
"""
|
||||
Generate configuration from discovered tables.
|
||||
|
||||
Args:
|
||||
tables: List of discovered tables
|
||||
|
||||
Returns:
|
||||
Generated configuration
|
||||
"""
|
||||
logger.info("Generating configuration...")
|
||||
|
||||
# Create table configs
|
||||
table_configs = [
|
||||
TableConfig(
|
||||
schema=table.schema,
|
||||
name=table.name,
|
||||
enabled=table.enabled,
|
||||
expected_in_target=table.expected_in_target,
|
||||
estimated_row_count=table.estimated_row_count,
|
||||
primary_key_columns=table.primary_key_columns,
|
||||
aggregate_columns=table.aggregate_columns,
|
||||
notes=table.notes
|
||||
)
|
||||
for table in tables
|
||||
]
|
||||
|
||||
# Update metadata
|
||||
metadata = MetadataConfig(
|
||||
config_version="1.0",
|
||||
generated_date=get_timestamp(),
|
||||
generated_by="discovery",
|
||||
framework_version="1.0.0"
|
||||
)
|
||||
|
||||
# Create new config with discovered tables
|
||||
config = Config(
|
||||
metadata=metadata,
|
||||
tables=table_configs
|
||||
)
|
||||
|
||||
logger.info(f"Configuration generated with {len(table_configs)} tables")
|
||||
return config
|
||||
297
src/drt/services/investigation.py
Normal file
297
src/drt/services/investigation.py
Normal file
@@ -0,0 +1,297 @@
|
||||
"""Investigation service for executing investigation queries."""
|
||||
|
||||
import time
|
||||
from pathlib import Path
|
||||
from typing import List, Tuple
|
||||
from drt.database.connection import ConnectionManager
|
||||
from drt.database.executor import QueryExecutor
|
||||
from drt.config.models import Config, DatabasePairConfig
|
||||
from drt.models.investigation import (
|
||||
QueryExecutionResult,
|
||||
TableInvestigationResult,
|
||||
InvestigationSummary
|
||||
)
|
||||
from drt.models.enums import Status
|
||||
from drt.services.sql_parser import SQLParser, discover_sql_files
|
||||
from drt.utils.logging import get_logger
|
||||
from drt.utils.timestamps import get_timestamp
|
||||
|
||||
logger = get_logger(__name__)
|
||||
|
||||
|
||||
class InvestigationService:
|
||||
"""Service for executing investigation queries."""
|
||||
|
||||
def __init__(self, config: Config):
|
||||
"""
|
||||
Initialize investigation service.
|
||||
|
||||
Args:
|
||||
config: Configuration object
|
||||
"""
|
||||
self.config = config
|
||||
self.parser = SQLParser()
|
||||
|
||||
def run_investigation(
|
||||
self,
|
||||
analysis_dir: Path,
|
||||
db_pair: DatabasePairConfig
|
||||
) -> InvestigationSummary:
|
||||
"""
|
||||
Run investigation for all SQL files in analysis directory.
|
||||
|
||||
Args:
|
||||
analysis_dir: Path to analysis output directory
|
||||
db_pair: Database pair configuration
|
||||
|
||||
Returns:
|
||||
Investigation summary with all results
|
||||
"""
|
||||
start_time = get_timestamp()
|
||||
start_ts = time.time()
|
||||
|
||||
logger.info("=" * 60)
|
||||
logger.info(f"Starting investigation: {analysis_dir.name}")
|
||||
logger.info("=" * 60)
|
||||
|
||||
# Initialize connections
|
||||
baseline_mgr = ConnectionManager(db_pair.baseline)
|
||||
target_mgr = ConnectionManager(db_pair.target)
|
||||
|
||||
try:
|
||||
# Connect to databases
|
||||
baseline_mgr.connect()
|
||||
target_mgr.connect()
|
||||
|
||||
# Create executors
|
||||
baseline_executor = QueryExecutor(baseline_mgr)
|
||||
target_executor = QueryExecutor(target_mgr)
|
||||
|
||||
# Discover SQL files
|
||||
sql_files = discover_sql_files(analysis_dir)
|
||||
logger.info(f"Found {len(sql_files)} investigation files")
|
||||
|
||||
# Create summary
|
||||
summary = InvestigationSummary(
|
||||
start_time=start_time,
|
||||
end_time="",
|
||||
duration_seconds=0,
|
||||
analysis_directory=str(analysis_dir),
|
||||
baseline_info=f"{db_pair.baseline.server}.{db_pair.baseline.database}",
|
||||
target_info=f"{db_pair.target.server}.{db_pair.target.database}",
|
||||
tables_processed=0,
|
||||
tables_successful=0,
|
||||
tables_partial=0,
|
||||
tables_failed=0,
|
||||
total_queries_executed=0,
|
||||
results=[]
|
||||
)
|
||||
|
||||
# Process each SQL file
|
||||
for idx, (schema, table, sql_path) in enumerate(sql_files, 1):
|
||||
logger.info(f"[{idx:3d}/{len(sql_files)}] {schema}.{table:40s} ...")
|
||||
|
||||
result = self._investigate_table(
|
||||
schema,
|
||||
table,
|
||||
sql_path,
|
||||
baseline_executor,
|
||||
target_executor
|
||||
)
|
||||
|
||||
summary.results.append(result)
|
||||
summary.tables_processed += 1
|
||||
|
||||
# Update counters
|
||||
if result.overall_status == Status.PASS:
|
||||
summary.tables_successful += 1
|
||||
elif result.overall_status == Status.SKIP:
|
||||
# Don't count skipped tables in partial/failed
|
||||
pass
|
||||
elif result.overall_status in [Status.WARNING, Status.INFO]:
|
||||
# Treat WARNING/INFO as partial success
|
||||
summary.tables_partial += 1
|
||||
elif self._is_partial_status(result):
|
||||
summary.tables_partial += 1
|
||||
else:
|
||||
summary.tables_failed += 1
|
||||
|
||||
# Count queries
|
||||
summary.total_queries_executed += len(result.baseline_results)
|
||||
summary.total_queries_executed += len(result.target_results)
|
||||
|
||||
logger.info(f" {self._get_status_symbol(result.overall_status)} "
|
||||
f"{result.overall_status.value}")
|
||||
|
||||
# Finalize summary
|
||||
end_time = get_timestamp()
|
||||
duration = int(time.time() - start_ts)
|
||||
summary.end_time = end_time
|
||||
summary.duration_seconds = duration
|
||||
|
||||
self._log_summary(summary)
|
||||
|
||||
return summary
|
||||
|
||||
finally:
|
||||
baseline_mgr.disconnect()
|
||||
target_mgr.disconnect()
|
||||
|
||||
def _investigate_table(
|
||||
self,
|
||||
schema: str,
|
||||
table: str,
|
||||
sql_path: Path,
|
||||
baseline_executor: QueryExecutor,
|
||||
target_executor: QueryExecutor
|
||||
) -> TableInvestigationResult:
|
||||
"""Execute investigation queries for a single table."""
|
||||
|
||||
# Parse SQL file
|
||||
queries = self.parser.parse_sql_file(sql_path)
|
||||
|
||||
if not queries:
|
||||
logger.warning(f"No valid queries found in {sql_path.name}")
|
||||
return TableInvestigationResult(
|
||||
schema=schema,
|
||||
table=table,
|
||||
sql_file_path=str(sql_path),
|
||||
baseline_results=[],
|
||||
target_results=[],
|
||||
overall_status=Status.SKIP,
|
||||
timestamp=get_timestamp()
|
||||
)
|
||||
|
||||
logger.debug(f" └─ Executing {len(queries)} queries")
|
||||
|
||||
# Execute on baseline
|
||||
baseline_results = self._execute_queries(
|
||||
queries,
|
||||
baseline_executor,
|
||||
"baseline"
|
||||
)
|
||||
|
||||
# Execute on target
|
||||
target_results = self._execute_queries(
|
||||
queries,
|
||||
target_executor,
|
||||
"target"
|
||||
)
|
||||
|
||||
# Determine overall status
|
||||
overall_status = self._determine_overall_status(
|
||||
baseline_results,
|
||||
target_results
|
||||
)
|
||||
|
||||
return TableInvestigationResult(
|
||||
schema=schema,
|
||||
table=table,
|
||||
sql_file_path=str(sql_path),
|
||||
baseline_results=baseline_results,
|
||||
target_results=target_results,
|
||||
overall_status=overall_status,
|
||||
timestamp=get_timestamp()
|
||||
)
|
||||
|
||||
def _execute_queries(
|
||||
self,
|
||||
queries: List[Tuple[int, str]],
|
||||
executor: QueryExecutor,
|
||||
environment: str
|
||||
) -> List[QueryExecutionResult]:
|
||||
"""Execute list of queries on one environment."""
|
||||
results = []
|
||||
|
||||
for query_num, query_text in queries:
|
||||
logger.debug(f" └─ Query {query_num} on {environment}")
|
||||
|
||||
status, result_df, error_msg, exec_time = \
|
||||
executor.execute_investigation_query(query_text)
|
||||
|
||||
result = QueryExecutionResult(
|
||||
query_number=query_num,
|
||||
query_text=query_text,
|
||||
status=status,
|
||||
execution_time_ms=exec_time,
|
||||
result_data=result_df,
|
||||
error_message=error_msg,
|
||||
row_count=len(result_df) if result_df is not None else 0
|
||||
)
|
||||
|
||||
results.append(result)
|
||||
|
||||
logger.debug(f" └─ {status.value} ({exec_time}ms, "
|
||||
f"{result.row_count} rows)")
|
||||
|
||||
return results
|
||||
|
||||
def _determine_overall_status(
|
||||
self,
|
||||
baseline_results: List[QueryExecutionResult],
|
||||
target_results: List[QueryExecutionResult]
|
||||
) -> Status:
|
||||
"""Determine overall status for table investigation."""
|
||||
|
||||
all_results = baseline_results + target_results
|
||||
|
||||
if not all_results:
|
||||
return Status.SKIP
|
||||
|
||||
success_count = sum(1 for r in all_results if r.status == Status.PASS)
|
||||
failed_count = sum(1 for r in all_results if r.status == Status.FAIL)
|
||||
skipped_count = sum(1 for r in all_results if r.status == Status.SKIP)
|
||||
|
||||
# All successful
|
||||
if success_count == len(all_results):
|
||||
return Status.PASS
|
||||
|
||||
# All failed
|
||||
if failed_count == len(all_results):
|
||||
return Status.FAIL
|
||||
|
||||
# All skipped
|
||||
if skipped_count == len(all_results):
|
||||
return Status.SKIP
|
||||
|
||||
# Mixed results - use WARNING to indicate partial success
|
||||
if success_count > 0:
|
||||
return Status.WARNING
|
||||
else:
|
||||
return Status.FAIL
|
||||
|
||||
def _is_partial_status(self, result: TableInvestigationResult) -> bool:
|
||||
"""Check if result represents partial success."""
|
||||
all_results = result.baseline_results + result.target_results
|
||||
if not all_results:
|
||||
return False
|
||||
|
||||
success_count = sum(1 for r in all_results if r.status == Status.PASS)
|
||||
return 0 < success_count < len(all_results)
|
||||
|
||||
def _get_status_symbol(self, status: Status) -> str:
|
||||
"""Get symbol for status."""
|
||||
symbols = {
|
||||
Status.PASS: "✓",
|
||||
Status.FAIL: "✗",
|
||||
Status.WARNING: "◐",
|
||||
Status.SKIP: "○",
|
||||
Status.ERROR: "🔴",
|
||||
Status.INFO: "ℹ"
|
||||
}
|
||||
return symbols.get(status, "?")
|
||||
|
||||
def _log_summary(self, summary: InvestigationSummary) -> None:
|
||||
"""Log investigation summary."""
|
||||
logger.info("=" * 60)
|
||||
logger.info("INVESTIGATION SUMMARY")
|
||||
logger.info("=" * 60)
|
||||
logger.info(f" Tables Processed: {summary.tables_processed}")
|
||||
logger.info(f" Successful: {summary.tables_successful}")
|
||||
logger.info(f" Partial: {summary.tables_partial}")
|
||||
logger.info(f" Failed: {summary.tables_failed}")
|
||||
logger.info(f" Total Queries: {summary.total_queries_executed}")
|
||||
logger.info("=" * 60)
|
||||
logger.info(f"Duration: {summary.duration_seconds} seconds")
|
||||
logger.info(f"Success Rate: {summary.success_rate:.1f}%")
|
||||
logger.info("=" * 60)
|
||||
173
src/drt/services/sql_parser.py
Normal file
173
src/drt/services/sql_parser.py
Normal file
@@ -0,0 +1,173 @@
|
||||
"""SQL file parser for investigation queries."""
|
||||
|
||||
import re
|
||||
from pathlib import Path
|
||||
from typing import List, Tuple
|
||||
from drt.utils.logging import get_logger
|
||||
|
||||
logger = get_logger(__name__)
|
||||
|
||||
|
||||
class SQLParser:
|
||||
"""Parser for investigation SQL files."""
|
||||
|
||||
@staticmethod
|
||||
def parse_sql_file(file_path: Path) -> List[Tuple[int, str]]:
|
||||
"""
|
||||
Parse SQL file into individual queries with their numbers.
|
||||
|
||||
Args:
|
||||
file_path: Path to SQL file
|
||||
|
||||
Returns:
|
||||
List of tuples (query_number, query_text)
|
||||
|
||||
Example:
|
||||
>>> queries = SQLParser.parse_sql_file(Path("investigate.sql"))
|
||||
>>> for num, query in queries:
|
||||
... print(f"Query {num}: {query[:50]}...")
|
||||
"""
|
||||
try:
|
||||
content = file_path.read_text(encoding='utf-8')
|
||||
|
||||
# Step 1: Remove markdown code blocks
|
||||
content = SQLParser._remove_markdown(content)
|
||||
|
||||
# Step 2: Split into queries
|
||||
queries = SQLParser._split_queries(content)
|
||||
|
||||
# Step 3: Clean and validate
|
||||
cleaned_queries = []
|
||||
for num, query in queries:
|
||||
cleaned = SQLParser._clean_query(query)
|
||||
if cleaned and SQLParser._is_valid_query(cleaned):
|
||||
cleaned_queries.append((num, cleaned))
|
||||
else:
|
||||
logger.debug(f"Skipped invalid query {num} in {file_path.name}")
|
||||
|
||||
logger.info(f"Parsed {len(cleaned_queries)} queries from {file_path.name}")
|
||||
return cleaned_queries
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to parse {file_path}: {e}")
|
||||
return []
|
||||
|
||||
@staticmethod
|
||||
def _remove_markdown(content: str) -> str:
|
||||
"""Remove markdown code blocks from content."""
|
||||
# Remove opening ```sql
|
||||
content = re.sub(r'```sql\s*\n?', '', content, flags=re.IGNORECASE)
|
||||
# Remove closing ```
|
||||
content = re.sub(r'```\s*\n?', '', content)
|
||||
return content
|
||||
|
||||
@staticmethod
|
||||
def _split_queries(content: str) -> List[Tuple[int, str]]:
|
||||
"""
|
||||
Split content into individual queries.
|
||||
|
||||
Looks for patterns like:
|
||||
-- Query 1: Description
|
||||
-- Query 2: Description
|
||||
"""
|
||||
queries = []
|
||||
current_query = []
|
||||
current_number = 0
|
||||
|
||||
for line in content.split('\n'):
|
||||
# Check if line is a query separator
|
||||
match = re.match(r'^\s*--\s*Query\s+(\d+):', line, re.IGNORECASE)
|
||||
|
||||
if match:
|
||||
# Save previous query if exists
|
||||
if current_query and current_number > 0:
|
||||
query_text = '\n'.join(current_query).strip()
|
||||
if query_text:
|
||||
queries.append((current_number, query_text))
|
||||
|
||||
# Start new query
|
||||
current_number = int(match.group(1))
|
||||
current_query = []
|
||||
else:
|
||||
# Add line to current query
|
||||
current_query.append(line)
|
||||
|
||||
# Don't forget the last query
|
||||
if current_query and current_number > 0:
|
||||
query_text = '\n'.join(current_query).strip()
|
||||
if query_text:
|
||||
queries.append((current_number, query_text))
|
||||
|
||||
return queries
|
||||
|
||||
@staticmethod
|
||||
def _clean_query(query: str) -> str:
|
||||
"""Clean query text."""
|
||||
# Remove leading/trailing whitespace
|
||||
query = query.strip()
|
||||
|
||||
# Remove comment-only lines at start
|
||||
lines = query.split('\n')
|
||||
while lines and lines[0].strip().startswith('--'):
|
||||
lines.pop(0)
|
||||
|
||||
# Remove empty lines at start and end
|
||||
while lines and not lines[0].strip():
|
||||
lines.pop(0)
|
||||
while lines and not lines[-1].strip():
|
||||
lines.pop()
|
||||
|
||||
return '\n'.join(lines)
|
||||
|
||||
@staticmethod
|
||||
def _is_valid_query(query: str) -> bool:
|
||||
"""Check if query is valid (not empty, not just comments)."""
|
||||
if not query:
|
||||
return False
|
||||
|
||||
# Remove all comments and whitespace
|
||||
cleaned = re.sub(r'--.*$', '', query, flags=re.MULTILINE)
|
||||
cleaned = cleaned.strip()
|
||||
|
||||
# Must have some SQL content
|
||||
return len(cleaned) > 0
|
||||
|
||||
|
||||
def discover_sql_files(analysis_dir: Path) -> List[Tuple[str, str, Path]]:
|
||||
"""
|
||||
Discover all *_investigate.sql files in analysis directory.
|
||||
|
||||
Args:
|
||||
analysis_dir: Root analysis directory
|
||||
|
||||
Returns:
|
||||
List of tuples (schema, table, file_path)
|
||||
|
||||
Example:
|
||||
>>> files = discover_sql_files(Path("analysis/output_20251209_184032"))
|
||||
>>> for schema, table, path in files:
|
||||
... print(f"{schema}.{table}: {path}")
|
||||
"""
|
||||
sql_files = []
|
||||
|
||||
# Pattern: dbo.TableName/dbo.TableName_investigate.sql
|
||||
pattern = "**/*_investigate.sql"
|
||||
|
||||
for sql_file in analysis_dir.glob(pattern):
|
||||
# Extract schema and table from filename
|
||||
# Example: dbo.A_COREC_NACES2008_investigate.sql
|
||||
filename = sql_file.stem # Remove .sql
|
||||
|
||||
if filename.endswith('_investigate'):
|
||||
# Remove _investigate suffix
|
||||
full_name = filename[:-12] # len('_investigate') = 12
|
||||
|
||||
# Split schema.table
|
||||
if '.' in full_name:
|
||||
schema, table = full_name.split('.', 1)
|
||||
sql_files.append((schema, table, sql_file))
|
||||
else:
|
||||
logger.warning(f"Could not parse schema.table from {filename}")
|
||||
|
||||
logger.info(f"Discovered {len(sql_files)} investigation SQL files")
|
||||
return sql_files
|
||||
7
src/drt/utils/__init__.py
Executable file
7
src/drt/utils/__init__.py
Executable file
@@ -0,0 +1,7 @@
|
||||
"""Utility functions and helpers."""
|
||||
|
||||
from drt.utils.timestamps import get_timestamp, format_duration
|
||||
from drt.utils.patterns import matches_pattern
|
||||
from drt.utils.logging import setup_logging
|
||||
|
||||
__all__ = ["get_timestamp", "format_duration", "matches_pattern", "setup_logging"]
|
||||
75
src/drt/utils/logging.py
Executable file
75
src/drt/utils/logging.py
Executable file
@@ -0,0 +1,75 @@
|
||||
"""Logging configuration and setup."""
|
||||
|
||||
import logging
|
||||
import sys
|
||||
from pathlib import Path
|
||||
from typing import Optional
|
||||
from drt.utils.timestamps import get_timestamp
|
||||
|
||||
|
||||
def setup_logging(
|
||||
log_level: str = "INFO",
|
||||
log_dir: str = "./logs",
|
||||
log_to_console: bool = True,
|
||||
log_to_file: bool = True,
|
||||
) -> logging.Logger:
|
||||
"""
|
||||
Configure logging for the framework.
|
||||
|
||||
Args:
|
||||
log_level: Logging level (DEBUG, INFO, WARNING, ERROR)
|
||||
log_dir: Directory for log files
|
||||
log_to_console: Whether to log to console
|
||||
log_to_file: Whether to log to file
|
||||
|
||||
Returns:
|
||||
Configured logger instance
|
||||
"""
|
||||
# Create logger
|
||||
logger = logging.getLogger("drt")
|
||||
logger.setLevel(getattr(logging, log_level.upper()))
|
||||
|
||||
# Remove existing handlers
|
||||
logger.handlers.clear()
|
||||
|
||||
# Create formatter
|
||||
log_format = "%(asctime)s | %(levelname)-8s | %(name)-20s | %(message)s"
|
||||
date_format = "%Y%m%d_%H%M%S"
|
||||
formatter = logging.Formatter(log_format, datefmt=date_format)
|
||||
|
||||
# Console handler
|
||||
if log_to_console:
|
||||
console_handler = logging.StreamHandler(sys.stdout)
|
||||
console_handler.setLevel(getattr(logging, log_level.upper()))
|
||||
console_handler.setFormatter(formatter)
|
||||
logger.addHandler(console_handler)
|
||||
|
||||
# File handler
|
||||
if log_to_file:
|
||||
log_path = Path(log_dir)
|
||||
log_path.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
timestamp = get_timestamp()
|
||||
log_file = log_path / f"drt_{timestamp}.log"
|
||||
|
||||
file_handler = logging.FileHandler(log_file, encoding="utf-8")
|
||||
file_handler.setLevel(logging.DEBUG) # Always log everything to file
|
||||
file_handler.setFormatter(formatter)
|
||||
logger.addHandler(file_handler)
|
||||
|
||||
logger.info(f"Logging to file: {log_file}")
|
||||
|
||||
return logger
|
||||
|
||||
|
||||
def get_logger(name: str) -> logging.Logger:
|
||||
"""
|
||||
Get a logger instance for a specific module.
|
||||
|
||||
Args:
|
||||
name: Logger name (typically __name__)
|
||||
|
||||
Returns:
|
||||
Logger instance
|
||||
"""
|
||||
return logging.getLogger(f"drt.{name}")
|
||||
58
src/drt/utils/patterns.py
Executable file
58
src/drt/utils/patterns.py
Executable file
@@ -0,0 +1,58 @@
|
||||
"""Pattern matching utilities for wildcard support."""
|
||||
|
||||
import fnmatch
|
||||
from typing import List
|
||||
|
||||
|
||||
def matches_pattern(text: str, patterns: List[str]) -> bool:
|
||||
"""
|
||||
Check if text matches any of the given wildcard patterns.
|
||||
|
||||
Args:
|
||||
text: Text to match
|
||||
patterns: List of wildcard patterns (e.g., "*_TEMP", "tmp*")
|
||||
|
||||
Returns:
|
||||
True if text matches any pattern, False otherwise
|
||||
|
||||
Examples:
|
||||
>>> matches_pattern("Orders_TEMP", ["*_TEMP", "*_TMP"])
|
||||
True
|
||||
>>> matches_pattern("Orders", ["*_TEMP", "*_TMP"])
|
||||
False
|
||||
"""
|
||||
if not patterns:
|
||||
return False
|
||||
|
||||
for pattern in patterns:
|
||||
if fnmatch.fnmatch(text.upper(), pattern.upper()):
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
|
||||
def filter_by_patterns(
|
||||
items: List[str], include_patterns: List[str] = None, exclude_patterns: List[str] = None
|
||||
) -> List[str]:
|
||||
"""
|
||||
Filter items by include and exclude patterns.
|
||||
|
||||
Args:
|
||||
items: List of items to filter
|
||||
include_patterns: Patterns to include (if None, include all)
|
||||
exclude_patterns: Patterns to exclude
|
||||
|
||||
Returns:
|
||||
Filtered list of items
|
||||
"""
|
||||
result = items.copy()
|
||||
|
||||
# Apply include patterns if specified
|
||||
if include_patterns:
|
||||
result = [item for item in result if matches_pattern(item, include_patterns)]
|
||||
|
||||
# Apply exclude patterns
|
||||
if exclude_patterns:
|
||||
result = [item for item in result if not matches_pattern(item, exclude_patterns)]
|
||||
|
||||
return result
|
||||
59
src/drt/utils/timestamps.py
Executable file
59
src/drt/utils/timestamps.py
Executable file
@@ -0,0 +1,59 @@
|
||||
"""Timestamp utilities using YYYYMMDD_HHMMSS format."""
|
||||
|
||||
from datetime import datetime
|
||||
|
||||
|
||||
def get_timestamp() -> str:
|
||||
"""
|
||||
Get current timestamp in YYYYMMDD_HHMMSS format.
|
||||
|
||||
Returns:
|
||||
Formatted timestamp string
|
||||
"""
|
||||
return datetime.now().strftime("%Y%m%d_%H%M%S")
|
||||
|
||||
|
||||
def format_duration(seconds: int) -> str:
|
||||
"""
|
||||
Format duration in seconds to human-readable string.
|
||||
|
||||
Args:
|
||||
seconds: Duration in seconds
|
||||
|
||||
Returns:
|
||||
Formatted duration string (e.g., "4 minutes 38 seconds")
|
||||
"""
|
||||
if seconds < 60:
|
||||
return f"{seconds} second{'s' if seconds != 1 else ''}"
|
||||
|
||||
minutes = seconds // 60
|
||||
remaining_seconds = seconds % 60
|
||||
|
||||
if minutes < 60:
|
||||
if remaining_seconds == 0:
|
||||
return f"{minutes} minute{'s' if minutes != 1 else ''}"
|
||||
return f"{minutes} minute{'s' if minutes != 1 else ''} {remaining_seconds} second{'s' if remaining_seconds != 1 else ''}"
|
||||
|
||||
hours = minutes // 60
|
||||
remaining_minutes = minutes % 60
|
||||
|
||||
parts = [f"{hours} hour{'s' if hours != 1 else ''}"]
|
||||
if remaining_minutes > 0:
|
||||
parts.append(f"{remaining_minutes} minute{'s' if remaining_minutes != 1 else ''}")
|
||||
if remaining_seconds > 0:
|
||||
parts.append(f"{remaining_seconds} second{'s' if remaining_seconds != 1 else ''}")
|
||||
|
||||
return " ".join(parts)
|
||||
|
||||
|
||||
def parse_timestamp(timestamp_str: str) -> datetime:
|
||||
"""
|
||||
Parse timestamp string in YYYYMMDD_HHMMSS format.
|
||||
|
||||
Args:
|
||||
timestamp_str: Timestamp string to parse
|
||||
|
||||
Returns:
|
||||
datetime object
|
||||
"""
|
||||
return datetime.strptime(timestamp_str, "%Y%m%d_%H%M%S")
|
||||
117
test_data/init_baseline.sql
Executable file
117
test_data/init_baseline.sql
Executable file
@@ -0,0 +1,117 @@
|
||||
-- Baseline Database Initialization Script
|
||||
-- This creates a sample database structure for testing
|
||||
|
||||
USE master;
|
||||
GO
|
||||
|
||||
-- Create test database
|
||||
IF NOT EXISTS (SELECT name FROM sys.databases WHERE name = 'TestDB_Baseline')
|
||||
BEGIN
|
||||
CREATE DATABASE TestDB_Baseline;
|
||||
END
|
||||
GO
|
||||
|
||||
USE TestDB_Baseline;
|
||||
GO
|
||||
|
||||
-- Create sample tables
|
||||
|
||||
-- Dimension: Customers
|
||||
CREATE TABLE dbo.DimCustomer (
|
||||
CustomerID INT PRIMARY KEY IDENTITY(1,1),
|
||||
CustomerName NVARCHAR(100) NOT NULL,
|
||||
Email NVARCHAR(100),
|
||||
City NVARCHAR(50),
|
||||
Country NVARCHAR(50),
|
||||
CreatedDate DATETIME DEFAULT GETDATE()
|
||||
);
|
||||
|
||||
-- Dimension: Products
|
||||
CREATE TABLE dbo.DimProduct (
|
||||
ProductID INT PRIMARY KEY IDENTITY(1,1),
|
||||
ProductName NVARCHAR(100) NOT NULL,
|
||||
Category NVARCHAR(50),
|
||||
UnitPrice DECIMAL(10,2),
|
||||
IsActive BIT DEFAULT 1
|
||||
);
|
||||
|
||||
-- Fact: Sales
|
||||
CREATE TABLE dbo.FactSales (
|
||||
SaleID INT PRIMARY KEY IDENTITY(1,1),
|
||||
CustomerID INT,
|
||||
ProductID INT,
|
||||
SaleDate DATE,
|
||||
Quantity INT,
|
||||
UnitPrice DECIMAL(10,2),
|
||||
TotalAmount DECIMAL(10,2),
|
||||
TaxAmount DECIMAL(10,2),
|
||||
FOREIGN KEY (CustomerID) REFERENCES dbo.DimCustomer(CustomerID),
|
||||
FOREIGN KEY (ProductID) REFERENCES dbo.DimProduct(ProductID)
|
||||
);
|
||||
|
||||
-- Insert sample data (TEST DATA ONLY - NOT REAL CUSTOMERS)
|
||||
|
||||
-- Customers
|
||||
INSERT INTO dbo.DimCustomer (CustomerName, Email, City, Country) VALUES
|
||||
('TestCustomer1', 'test1@test.local', 'City1', 'Country1'),
|
||||
('TestCustomer2', 'test2@test.local', 'City2', 'Country2'),
|
||||
('TestCustomer3', 'test3@test.local', 'City3', 'Country3'),
|
||||
('TestCustomer4', 'test4@test.local', 'City4', 'Country4'),
|
||||
('TestCustomer5', 'test5@test.local', 'City5', 'Country5');
|
||||
|
||||
-- Products
|
||||
INSERT INTO dbo.DimProduct (ProductName, Category, UnitPrice, IsActive) VALUES
|
||||
('Laptop', 'Electronics', 999.99, 1),
|
||||
('Mouse', 'Electronics', 29.99, 1),
|
||||
('Keyboard', 'Electronics', 79.99, 1),
|
||||
('Monitor', 'Electronics', 299.99, 1),
|
||||
('Desk Chair', 'Furniture', 199.99, 1),
|
||||
('Desk', 'Furniture', 399.99, 1),
|
||||
('Notebook', 'Stationery', 4.99, 1),
|
||||
('Pen Set', 'Stationery', 12.99, 1);
|
||||
|
||||
-- Sales (100 records)
|
||||
DECLARE @i INT = 1;
|
||||
WHILE @i <= 100
|
||||
BEGIN
|
||||
INSERT INTO dbo.FactSales (CustomerID, ProductID, SaleDate, Quantity, UnitPrice, TotalAmount, TaxAmount)
|
||||
VALUES (
|
||||
(ABS(CHECKSUM(NEWID())) % 5) + 1, -- Random CustomerID 1-5
|
||||
(ABS(CHECKSUM(NEWID())) % 8) + 1, -- Random ProductID 1-8
|
||||
DATEADD(DAY, -ABS(CHECKSUM(NEWID())) % 365, GETDATE()), -- Random date in last year
|
||||
(ABS(CHECKSUM(NEWID())) % 10) + 1, -- Random Quantity 1-10
|
||||
(ABS(CHECKSUM(NEWID())) % 900) + 100.00, -- Random price 100-1000
|
||||
0, -- Will be calculated
|
||||
0 -- Will be calculated
|
||||
);
|
||||
|
||||
-- Calculate amounts
|
||||
UPDATE dbo.FactSales
|
||||
SET TotalAmount = Quantity * UnitPrice,
|
||||
TaxAmount = Quantity * UnitPrice * 0.1
|
||||
WHERE SaleID = @i;
|
||||
|
||||
SET @i = @i + 1;
|
||||
END
|
||||
GO
|
||||
|
||||
-- Create some views for testing
|
||||
CREATE VIEW dbo.vw_SalesSummary AS
|
||||
SELECT
|
||||
c.CustomerName,
|
||||
p.ProductName,
|
||||
s.SaleDate,
|
||||
s.Quantity,
|
||||
s.TotalAmount
|
||||
FROM dbo.FactSales s
|
||||
JOIN dbo.DimCustomer c ON s.CustomerID = c.CustomerID
|
||||
JOIN dbo.DimProduct p ON s.ProductID = p.ProductID;
|
||||
GO
|
||||
|
||||
-- Create statistics
|
||||
CREATE STATISTICS stat_sales_date ON dbo.FactSales(SaleDate);
|
||||
CREATE STATISTICS stat_customer_country ON dbo.DimCustomer(Country);
|
||||
GO
|
||||
|
||||
PRINT 'Baseline database initialized successfully';
|
||||
GO
|
||||
131
test_data/init_target.sql
Executable file
131
test_data/init_target.sql
Executable file
@@ -0,0 +1,131 @@
|
||||
-- Target Database Initialization Script
|
||||
-- This creates a similar structure with some intentional differences for testing
|
||||
|
||||
USE master;
|
||||
GO
|
||||
|
||||
-- Create test database
|
||||
IF NOT EXISTS (SELECT name FROM sys.databases WHERE name = 'TestDB_Target')
|
||||
BEGIN
|
||||
CREATE DATABASE TestDB_Target;
|
||||
END
|
||||
GO
|
||||
|
||||
USE TestDB_Target;
|
||||
GO
|
||||
|
||||
-- Create sample tables (similar to baseline with some differences)
|
||||
|
||||
-- Dimension: Customers (same structure)
|
||||
CREATE TABLE dbo.DimCustomer (
|
||||
CustomerID INT PRIMARY KEY IDENTITY(1,1),
|
||||
CustomerName NVARCHAR(100) NOT NULL,
|
||||
Email NVARCHAR(100),
|
||||
City NVARCHAR(50),
|
||||
Country NVARCHAR(50),
|
||||
CreatedDate DATETIME DEFAULT GETDATE()
|
||||
);
|
||||
|
||||
-- Dimension: Products (slightly different - added column)
|
||||
CREATE TABLE dbo.DimProduct (
|
||||
ProductID INT PRIMARY KEY IDENTITY(1,1),
|
||||
ProductName NVARCHAR(100) NOT NULL,
|
||||
Category NVARCHAR(50),
|
||||
UnitPrice DECIMAL(10,2),
|
||||
IsActive BIT DEFAULT 1,
|
||||
LastModified DATETIME DEFAULT GETDATE() -- Extra column for testing
|
||||
);
|
||||
|
||||
-- Fact: Sales (same structure)
|
||||
CREATE TABLE dbo.FactSales (
|
||||
SaleID INT PRIMARY KEY IDENTITY(1,1),
|
||||
CustomerID INT,
|
||||
ProductID INT,
|
||||
SaleDate DATE,
|
||||
Quantity INT,
|
||||
UnitPrice DECIMAL(10,2),
|
||||
TotalAmount DECIMAL(10,2),
|
||||
TaxAmount DECIMAL(10,2),
|
||||
FOREIGN KEY (CustomerID) REFERENCES dbo.DimCustomer(CustomerID),
|
||||
FOREIGN KEY (ProductID) REFERENCES dbo.DimProduct(ProductID)
|
||||
);
|
||||
|
||||
-- Insert sample data (TEST DATA ONLY - NOT REAL CUSTOMERS)
|
||||
|
||||
-- Customers
|
||||
INSERT INTO dbo.DimCustomer (CustomerName, Email, City, Country) VALUES
|
||||
('TestCustomer1', 'test1@test.local', 'City1', 'Country1'),
|
||||
('TestCustomer2', 'test2@test.local', 'City2', 'Country2'),
|
||||
('TestCustomer3', 'test3@test.local', 'City3', 'Country3'),
|
||||
('TestCustomer4', 'test4@test.local', 'City4', 'Country4'),
|
||||
('TestCustomer5', 'test5@test.local', 'City5', 'Country5');
|
||||
|
||||
-- Products (with LastModified)
|
||||
INSERT INTO dbo.DimProduct (ProductName, Category, UnitPrice, IsActive, LastModified) VALUES
|
||||
('Laptop', 'Electronics', 999.99, 1, GETDATE()),
|
||||
('Mouse', 'Electronics', 29.99, 1, GETDATE()),
|
||||
('Keyboard', 'Electronics', 79.99, 1, GETDATE()),
|
||||
('Monitor', 'Electronics', 299.99, 1, GETDATE()),
|
||||
('Desk Chair', 'Furniture', 199.99, 1, GETDATE()),
|
||||
('Desk', 'Furniture', 399.99, 1, GETDATE()),
|
||||
('Notebook', 'Stationery', 4.99, 1, GETDATE()),
|
||||
('Pen Set', 'Stationery', 12.99, 1, GETDATE());
|
||||
|
||||
-- Sales (95 records - 5 fewer than baseline for testing)
|
||||
DECLARE @i INT = 1;
|
||||
WHILE @i <= 95
|
||||
BEGIN
|
||||
INSERT INTO dbo.FactSales (CustomerID, ProductID, SaleDate, Quantity, UnitPrice, TotalAmount, TaxAmount)
|
||||
VALUES (
|
||||
(ABS(CHECKSUM(NEWID())) % 5) + 1,
|
||||
(ABS(CHECKSUM(NEWID())) % 8) + 1,
|
||||
DATEADD(DAY, -ABS(CHECKSUM(NEWID())) % 365, GETDATE()),
|
||||
(ABS(CHECKSUM(NEWID())) % 10) + 1,
|
||||
(ABS(CHECKSUM(NEWID())) % 900) + 100.00,
|
||||
0,
|
||||
0
|
||||
);
|
||||
|
||||
-- Calculate amounts
|
||||
UPDATE dbo.FactSales
|
||||
SET TotalAmount = Quantity * UnitPrice,
|
||||
TaxAmount = Quantity * UnitPrice * 0.1
|
||||
WHERE SaleID = @i;
|
||||
|
||||
SET @i = @i + 1;
|
||||
END
|
||||
GO
|
||||
|
||||
-- Create the same view
|
||||
CREATE VIEW dbo.vw_SalesSummary AS
|
||||
SELECT
|
||||
c.CustomerName,
|
||||
p.ProductName,
|
||||
s.SaleDate,
|
||||
s.Quantity,
|
||||
s.TotalAmount
|
||||
FROM dbo.FactSales s
|
||||
JOIN dbo.DimCustomer c ON s.CustomerID = c.CustomerID
|
||||
JOIN dbo.DimProduct p ON s.ProductID = p.ProductID;
|
||||
GO
|
||||
|
||||
-- Create an extra table that doesn't exist in baseline
|
||||
CREATE TABLE dbo.TempProcessing (
|
||||
ProcessID INT PRIMARY KEY IDENTITY(1,1),
|
||||
ProcessName NVARCHAR(100),
|
||||
Status NVARCHAR(20),
|
||||
CreatedDate DATETIME DEFAULT GETDATE()
|
||||
);
|
||||
|
||||
INSERT INTO dbo.TempProcessing (ProcessName, Status) VALUES
|
||||
('DataLoad', 'Completed'),
|
||||
('Validation', 'In Progress');
|
||||
GO
|
||||
|
||||
-- Create statistics
|
||||
CREATE STATISTICS stat_sales_date ON dbo.FactSales(SaleDate);
|
||||
CREATE STATISTICS stat_customer_country ON dbo.DimCustomer(Country);
|
||||
GO
|
||||
|
||||
PRINT 'Target database initialized successfully';
|
||||
GO
|
||||
97
test_data/setup_test_environment.sh
Executable file
97
test_data/setup_test_environment.sh
Executable file
@@ -0,0 +1,97 @@
|
||||
#!/bin/bash
|
||||
# Setup script for test SQL Server environment
|
||||
|
||||
set -e
|
||||
|
||||
echo "=========================================="
|
||||
echo "SQL Server Test Environment Setup"
|
||||
echo "=========================================="
|
||||
echo ""
|
||||
|
||||
# Check if Docker is installed
|
||||
if ! command -v docker &> /dev/null; then
|
||||
echo "Error: Docker is not installed"
|
||||
echo "Please install Docker first: https://docs.docker.com/get-docker/"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Check if Docker Compose is available (either standalone or plugin)
|
||||
if ! command -v docker-compose &> /dev/null && ! docker compose version &> /dev/null; then
|
||||
echo "Error: Docker Compose is not installed"
|
||||
echo "Please install Docker Compose first"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Determine which compose command to use
|
||||
if docker compose version &> /dev/null; then
|
||||
COMPOSE_CMD="docker compose"
|
||||
else
|
||||
COMPOSE_CMD="docker-compose"
|
||||
fi
|
||||
|
||||
echo "Step 1: Starting SQL Server containers..."
|
||||
$COMPOSE_CMD -f docker-compose.test.yml up -d
|
||||
|
||||
echo ""
|
||||
echo "Step 2: Waiting for SQL Server to be ready..."
|
||||
echo "This may take 30-60 seconds..."
|
||||
|
||||
# Set default password if not provided
|
||||
SA_PASSWORD=${SA_PASSWORD:-YourStrong!Passw0rd}
|
||||
|
||||
# Wait for baseline server
|
||||
echo -n "Waiting for baseline server"
|
||||
for i in {1..30}; do
|
||||
if docker exec drt-sqlserver-baseline /opt/mssql-tools18/bin/sqlcmd -S localhost -U sa -P "$SA_PASSWORD" -C -Q "SELECT 1" &> /dev/null; then
|
||||
echo " ✓"
|
||||
break
|
||||
fi
|
||||
echo -n "."
|
||||
sleep 2
|
||||
done
|
||||
|
||||
# Wait for target server
|
||||
echo -n "Waiting for target server"
|
||||
for i in {1..30}; do
|
||||
if docker exec drt-sqlserver-target /opt/mssql-tools18/bin/sqlcmd -S localhost -U sa -P "$SA_PASSWORD" -C -Q "SELECT 1" &> /dev/null; then
|
||||
echo " ✓"
|
||||
break
|
||||
fi
|
||||
echo -n "."
|
||||
sleep 2
|
||||
done
|
||||
|
||||
echo ""
|
||||
echo "Step 3: Initializing baseline database..."
|
||||
docker exec -i drt-sqlserver-baseline /opt/mssql-tools18/bin/sqlcmd -S localhost -U sa -P "$SA_PASSWORD" -C < test_data/init_baseline.sql
|
||||
|
||||
echo ""
|
||||
echo "Step 4: Initializing target database..."
|
||||
docker exec -i drt-sqlserver-target /opt/mssql-tools18/bin/sqlcmd -S localhost -U sa -P "$SA_PASSWORD" -C < test_data/init_target.sql
|
||||
|
||||
echo ""
|
||||
echo "=========================================="
|
||||
echo "Setup completed successfully!"
|
||||
echo "=========================================="
|
||||
echo ""
|
||||
echo "SQL Server instances are running:"
|
||||
echo " Baseline: localhost:1433"
|
||||
echo " Target: localhost:1434"
|
||||
echo ""
|
||||
echo "Credentials:"
|
||||
echo " Username: sa"
|
||||
echo " Password: (set via SA_PASSWORD environment variable)"
|
||||
echo ""
|
||||
echo "Test databases:"
|
||||
echo " Baseline: TestDB_Baseline"
|
||||
echo " Target: TestDB_Target"
|
||||
echo ""
|
||||
echo "To test the connection:"
|
||||
echo " drt discover --server localhost --database TestDB_Baseline --output config_test.yaml"
|
||||
echo ""
|
||||
echo "To stop the servers:"
|
||||
echo " $COMPOSE_CMD -f docker-compose.test.yml down"
|
||||
echo ""
|
||||
echo "To stop and remove all data:"
|
||||
echo " $COMPOSE_CMD -f docker-compose.test.yml down -v"
|
||||
echo ""
|
||||
3
tests/__init__.py
Executable file
3
tests/__init__.py
Executable file
@@ -0,0 +1,3 @@
|
||||
"""
|
||||
Test suite for Data Regression Testing Framework
|
||||
"""
|
||||
207
tests/test_config.py
Executable file
207
tests/test_config.py
Executable file
@@ -0,0 +1,207 @@
|
||||
"""
|
||||
Unit tests for configuration management
|
||||
"""
|
||||
import pytest
|
||||
from pathlib import Path
|
||||
from drt.config.models import (
|
||||
DatabaseConnection,
|
||||
DatabasePair,
|
||||
ComparisonSettings,
|
||||
RowCountSettings,
|
||||
SchemaSettings,
|
||||
AggregateSettings,
|
||||
ReportingSettings,
|
||||
LoggingSettings,
|
||||
Config
|
||||
)
|
||||
|
||||
|
||||
class TestDatabaseConnection:
|
||||
"""Test DatabaseConnection model"""
|
||||
|
||||
def test_database_connection_minimal(self):
|
||||
"""Test creating a minimal database connection"""
|
||||
conn = DatabaseConnection(
|
||||
server="SQLSERVER01",
|
||||
database="TestDB"
|
||||
)
|
||||
assert conn.server == "SQLSERVER01"
|
||||
assert conn.database == "TestDB"
|
||||
assert conn.timeout.connection == 30
|
||||
assert conn.timeout.query == 300
|
||||
|
||||
def test_database_connection_with_timeout(self):
|
||||
"""Test database connection with custom timeout"""
|
||||
conn = DatabaseConnection(
|
||||
server="SQLSERVER01",
|
||||
database="TestDB",
|
||||
timeout={"connection": 60, "query": 600}
|
||||
)
|
||||
assert conn.timeout.connection == 60
|
||||
assert conn.timeout.query == 600
|
||||
|
||||
|
||||
class TestDatabasePair:
|
||||
"""Test DatabasePair model"""
|
||||
|
||||
def test_database_pair_creation(self):
|
||||
"""Test creating a database pair"""
|
||||
pair = DatabasePair(
|
||||
name="Test_Pair",
|
||||
enabled=True,
|
||||
baseline=DatabaseConnection(
|
||||
server="SQLSERVER01",
|
||||
database="PROD_DB"
|
||||
),
|
||||
target=DatabaseConnection(
|
||||
server="SQLSERVER01",
|
||||
database="TEST_DB"
|
||||
)
|
||||
)
|
||||
assert pair.name == "Test_Pair"
|
||||
assert pair.enabled is True
|
||||
assert pair.baseline.database == "PROD_DB"
|
||||
assert pair.target.database == "TEST_DB"
|
||||
|
||||
|
||||
class TestComparisonSettings:
|
||||
"""Test ComparisonSettings model"""
|
||||
|
||||
def test_comparison_settings_health_check(self):
|
||||
"""Test health check mode settings"""
|
||||
settings = ComparisonSettings(
|
||||
mode="health_check",
|
||||
row_count=RowCountSettings(enabled=True, tolerance_percent=0.0),
|
||||
schema=SchemaSettings(
|
||||
enabled=True,
|
||||
checks={
|
||||
"column_names": True,
|
||||
"data_types": True
|
||||
}
|
||||
),
|
||||
aggregates=AggregateSettings(enabled=False)
|
||||
)
|
||||
assert settings.mode == "health_check"
|
||||
assert settings.row_count.enabled is True
|
||||
assert settings.aggregates.enabled is False
|
||||
|
||||
def test_comparison_settings_full_mode(self):
|
||||
"""Test full mode settings"""
|
||||
settings = ComparisonSettings(
|
||||
mode="full",
|
||||
row_count=RowCountSettings(enabled=True, tolerance_percent=0.0),
|
||||
schema=SchemaSettings(enabled=True),
|
||||
aggregates=AggregateSettings(enabled=True, tolerance_percent=0.01)
|
||||
)
|
||||
assert settings.mode == "full"
|
||||
assert settings.aggregates.enabled is True
|
||||
assert settings.aggregates.tolerance_percent == 0.01
|
||||
|
||||
|
||||
class TestReportingSettings:
|
||||
"""Test ReportingSettings model"""
|
||||
|
||||
def test_reporting_settings_defaults(self):
|
||||
"""Test default reporting settings"""
|
||||
settings = ReportingSettings()
|
||||
assert settings.output_dir == "./reports"
|
||||
assert settings.formats.html is True
|
||||
assert settings.formats.csv is True
|
||||
assert settings.formats.pdf is False
|
||||
assert settings.include_timestamp is True
|
||||
|
||||
def test_reporting_settings_custom(self):
|
||||
"""Test custom reporting settings"""
|
||||
settings = ReportingSettings(
|
||||
output_dir="./custom_reports",
|
||||
filename_prefix="custom_test",
|
||||
formats={"html": True, "csv": False, "pdf": True}
|
||||
)
|
||||
assert settings.output_dir == "./custom_reports"
|
||||
assert settings.filename_prefix == "custom_test"
|
||||
assert settings.formats.pdf is True
|
||||
|
||||
|
||||
class TestLoggingSettings:
|
||||
"""Test LoggingSettings model"""
|
||||
|
||||
def test_logging_settings_defaults(self):
|
||||
"""Test default logging settings"""
|
||||
settings = LoggingSettings()
|
||||
assert settings.level == "INFO"
|
||||
assert settings.output_dir == "./logs"
|
||||
assert settings.console.enabled is True
|
||||
assert settings.file.enabled is True
|
||||
|
||||
def test_logging_settings_custom(self):
|
||||
"""Test custom logging settings"""
|
||||
settings = LoggingSettings(
|
||||
level="DEBUG",
|
||||
console={"enabled": True, "level": "WARNING"}
|
||||
)
|
||||
assert settings.level == "DEBUG"
|
||||
assert settings.console.level == "WARNING"
|
||||
|
||||
|
||||
class TestConfig:
|
||||
"""Test Config model"""
|
||||
|
||||
def test_config_minimal(self):
|
||||
"""Test creating a minimal config"""
|
||||
config = Config(
|
||||
database_pairs=[
|
||||
DatabasePair(
|
||||
name="Test",
|
||||
enabled=True,
|
||||
baseline=DatabaseConnection(
|
||||
server="SERVER01",
|
||||
database="PROD"
|
||||
),
|
||||
target=DatabaseConnection(
|
||||
server="SERVER01",
|
||||
database="TEST"
|
||||
)
|
||||
)
|
||||
],
|
||||
comparison=ComparisonSettings(
|
||||
mode="health_check",
|
||||
row_count=RowCountSettings(enabled=True),
|
||||
schema=SchemaSettings(enabled=True),
|
||||
aggregates=AggregateSettings(enabled=False)
|
||||
),
|
||||
tables=[]
|
||||
)
|
||||
assert len(config.database_pairs) == 1
|
||||
assert config.comparison.mode == "health_check"
|
||||
assert len(config.tables) == 0
|
||||
|
||||
def test_config_with_tables(self):
|
||||
"""Test config with table definitions"""
|
||||
from drt.models.table import TableInfo
|
||||
|
||||
config = Config(
|
||||
database_pairs=[
|
||||
DatabasePair(
|
||||
name="Test",
|
||||
enabled=True,
|
||||
baseline=DatabaseConnection(server="S1", database="D1"),
|
||||
target=DatabaseConnection(server="S1", database="D2")
|
||||
)
|
||||
],
|
||||
comparison=ComparisonSettings(
|
||||
mode="health_check",
|
||||
row_count=RowCountSettings(enabled=True),
|
||||
schema=SchemaSettings(enabled=True),
|
||||
aggregates=AggregateSettings(enabled=False)
|
||||
),
|
||||
tables=[
|
||||
TableInfo(
|
||||
schema="dbo",
|
||||
name="TestTable",
|
||||
enabled=True,
|
||||
expected_in_target=True
|
||||
)
|
||||
]
|
||||
)
|
||||
assert len(config.tables) == 1
|
||||
assert config.tables[0].name == "TestTable"
|
||||
186
tests/test_models.py
Executable file
186
tests/test_models.py
Executable file
@@ -0,0 +1,186 @@
|
||||
"""
|
||||
Unit tests for data models
|
||||
"""
|
||||
import pytest
|
||||
from drt.models.enums import Status, CheckType
|
||||
from drt.models.table import TableInfo, ColumnInfo
|
||||
from drt.models.results import CheckResult, ComparisonResult
|
||||
|
||||
|
||||
class TestStatus:
|
||||
"""Test Status enum"""
|
||||
|
||||
def test_status_values(self):
|
||||
"""Test status enum values"""
|
||||
assert Status.PASS.value == "PASS"
|
||||
assert Status.FAIL.value == "FAIL"
|
||||
assert Status.WARNING.value == "WARNING"
|
||||
assert Status.ERROR.value == "ERROR"
|
||||
assert Status.INFO.value == "INFO"
|
||||
assert Status.SKIP.value == "SKIP"
|
||||
|
||||
def test_status_severity(self):
|
||||
"""Test status severity comparison"""
|
||||
assert Status.FAIL.severity > Status.WARNING.severity
|
||||
assert Status.WARNING.severity > Status.PASS.severity
|
||||
assert Status.ERROR.severity > Status.FAIL.severity
|
||||
|
||||
|
||||
class TestCheckType:
|
||||
"""Test CheckType enum"""
|
||||
|
||||
def test_check_type_values(self):
|
||||
"""Test check type enum values"""
|
||||
assert CheckType.TABLE_EXISTENCE.value == "TABLE_EXISTENCE"
|
||||
assert CheckType.ROW_COUNT.value == "ROW_COUNT"
|
||||
assert CheckType.SCHEMA.value == "SCHEMA"
|
||||
assert CheckType.AGGREGATE.value == "AGGREGATE"
|
||||
|
||||
|
||||
class TestTableInfo:
|
||||
"""Test TableInfo model"""
|
||||
|
||||
def test_table_info_creation(self):
|
||||
"""Test creating a TableInfo instance"""
|
||||
table = TableInfo(
|
||||
schema="dbo",
|
||||
name="TestTable",
|
||||
enabled=True,
|
||||
expected_in_target=True
|
||||
)
|
||||
assert table.schema == "dbo"
|
||||
assert table.name == "TestTable"
|
||||
assert table.enabled is True
|
||||
assert table.expected_in_target is True
|
||||
assert table.aggregate_columns == []
|
||||
|
||||
def test_table_info_with_aggregates(self):
|
||||
"""Test TableInfo with aggregate columns"""
|
||||
table = TableInfo(
|
||||
schema="dbo",
|
||||
name="FactSales",
|
||||
enabled=True,
|
||||
expected_in_target=True,
|
||||
aggregate_columns=["Amount", "Quantity"]
|
||||
)
|
||||
assert len(table.aggregate_columns) == 2
|
||||
assert "Amount" in table.aggregate_columns
|
||||
|
||||
|
||||
class TestColumnInfo:
|
||||
"""Test ColumnInfo model"""
|
||||
|
||||
def test_column_info_creation(self):
|
||||
"""Test creating a ColumnInfo instance"""
|
||||
column = ColumnInfo(
|
||||
name="CustomerID",
|
||||
data_type="int",
|
||||
is_nullable=False,
|
||||
is_primary_key=True
|
||||
)
|
||||
assert column.name == "CustomerID"
|
||||
assert column.data_type == "int"
|
||||
assert column.is_nullable is False
|
||||
assert column.is_primary_key is True
|
||||
|
||||
|
||||
class TestCheckResult:
|
||||
"""Test CheckResult model"""
|
||||
|
||||
def test_check_result_pass(self):
|
||||
"""Test creating a passing check result"""
|
||||
result = CheckResult(
|
||||
check_type=CheckType.ROW_COUNT,
|
||||
status=Status.PASS,
|
||||
message="Row counts match",
|
||||
baseline_value=1000,
|
||||
target_value=1000
|
||||
)
|
||||
assert result.status == Status.PASS
|
||||
assert result.baseline_value == 1000
|
||||
assert result.target_value == 1000
|
||||
|
||||
def test_check_result_fail(self):
|
||||
"""Test creating a failing check result"""
|
||||
result = CheckResult(
|
||||
check_type=CheckType.ROW_COUNT,
|
||||
status=Status.FAIL,
|
||||
message="Row count mismatch",
|
||||
baseline_value=1000,
|
||||
target_value=950
|
||||
)
|
||||
assert result.status == Status.FAIL
|
||||
assert result.baseline_value != result.target_value
|
||||
|
||||
|
||||
class TestComparisonResult:
|
||||
"""Test ComparisonResult model"""
|
||||
|
||||
def test_comparison_result_creation(self):
|
||||
"""Test creating a ComparisonResult instance"""
|
||||
result = ComparisonResult(
|
||||
schema="dbo",
|
||||
table="TestTable"
|
||||
)
|
||||
assert result.schema == "dbo"
|
||||
assert result.table == "TestTable"
|
||||
assert len(result.checks) == 0
|
||||
|
||||
def test_add_check_result(self):
|
||||
"""Test adding check results"""
|
||||
comparison = ComparisonResult(
|
||||
schema="dbo",
|
||||
table="TestTable"
|
||||
)
|
||||
|
||||
check = CheckResult(
|
||||
check_type=CheckType.ROW_COUNT,
|
||||
status=Status.PASS,
|
||||
message="Row counts match"
|
||||
)
|
||||
|
||||
comparison.checks.append(check)
|
||||
assert len(comparison.checks) == 1
|
||||
assert comparison.checks[0].status == Status.PASS
|
||||
|
||||
def test_overall_status_all_pass(self):
|
||||
"""Test overall status when all checks pass"""
|
||||
comparison = ComparisonResult(
|
||||
schema="dbo",
|
||||
table="TestTable"
|
||||
)
|
||||
|
||||
comparison.checks.append(CheckResult(
|
||||
check_type=CheckType.TABLE_EXISTENCE,
|
||||
status=Status.PASS,
|
||||
message="Table exists"
|
||||
))
|
||||
|
||||
comparison.checks.append(CheckResult(
|
||||
check_type=CheckType.ROW_COUNT,
|
||||
status=Status.PASS,
|
||||
message="Row counts match"
|
||||
))
|
||||
|
||||
assert comparison.overall_status == Status.PASS
|
||||
|
||||
def test_overall_status_with_failure(self):
|
||||
"""Test overall status when one check fails"""
|
||||
comparison = ComparisonResult(
|
||||
schema="dbo",
|
||||
table="TestTable"
|
||||
)
|
||||
|
||||
comparison.checks.append(CheckResult(
|
||||
check_type=CheckType.TABLE_EXISTENCE,
|
||||
status=Status.PASS,
|
||||
message="Table exists"
|
||||
))
|
||||
|
||||
comparison.checks.append(CheckResult(
|
||||
check_type=CheckType.ROW_COUNT,
|
||||
status=Status.FAIL,
|
||||
message="Row count mismatch"
|
||||
))
|
||||
|
||||
assert comparison.overall_status == Status.FAIL
|
||||
83
tests/test_utils.py
Executable file
83
tests/test_utils.py
Executable file
@@ -0,0 +1,83 @@
|
||||
"""
|
||||
Unit tests for utility functions
|
||||
"""
|
||||
import pytest
|
||||
from datetime import datetime
|
||||
from drt.utils.timestamps import format_timestamp, format_duration
|
||||
from drt.utils.patterns import matches_pattern
|
||||
|
||||
|
||||
class TestTimestamps:
|
||||
"""Test timestamp utilities"""
|
||||
|
||||
def test_format_timestamp(self):
|
||||
"""Test timestamp formatting"""
|
||||
dt = datetime(2024, 1, 15, 14, 30, 45)
|
||||
formatted = format_timestamp(dt)
|
||||
assert formatted == "20240115_143045"
|
||||
|
||||
def test_format_timestamp_current(self):
|
||||
"""Test formatting current timestamp"""
|
||||
formatted = format_timestamp()
|
||||
# Should be in YYYYMMDD_HHMMSS format
|
||||
assert len(formatted) == 15
|
||||
assert formatted[8] == "_"
|
||||
|
||||
def test_format_duration_seconds(self):
|
||||
"""Test duration formatting for seconds"""
|
||||
duration = format_duration(45.5)
|
||||
assert duration == "45.50s"
|
||||
|
||||
def test_format_duration_minutes(self):
|
||||
"""Test duration formatting for minutes"""
|
||||
duration = format_duration(125.0)
|
||||
assert duration == "2m 5.00s"
|
||||
|
||||
def test_format_duration_hours(self):
|
||||
"""Test duration formatting for hours"""
|
||||
duration = format_duration(3725.0)
|
||||
assert duration == "1h 2m 5.00s"
|
||||
|
||||
|
||||
class TestPatterns:
|
||||
"""Test pattern matching utilities"""
|
||||
|
||||
def test_exact_match(self):
|
||||
"""Test exact pattern matching"""
|
||||
assert matches_pattern("TestTable", "TestTable") is True
|
||||
assert matches_pattern("TestTable", "OtherTable") is False
|
||||
|
||||
def test_wildcard_star(self):
|
||||
"""Test wildcard * pattern"""
|
||||
assert matches_pattern("TestTable", "Test*") is True
|
||||
assert matches_pattern("TestTable", "*Table") is True
|
||||
assert matches_pattern("TestTable", "*est*") is True
|
||||
assert matches_pattern("TestTable", "Other*") is False
|
||||
|
||||
def test_wildcard_question(self):
|
||||
"""Test wildcard ? pattern"""
|
||||
assert matches_pattern("Test1", "Test?") is True
|
||||
assert matches_pattern("TestA", "Test?") is True
|
||||
assert matches_pattern("Test12", "Test?") is False
|
||||
assert matches_pattern("Test", "Test?") is False
|
||||
|
||||
def test_combined_wildcards(self):
|
||||
"""Test combined wildcard patterns"""
|
||||
assert matches_pattern("Test_Table_01", "Test_*_??") is True
|
||||
assert matches_pattern("Test_Table_1", "Test_*_??") is False
|
||||
|
||||
def test_case_sensitivity(self):
|
||||
"""Test case-sensitive matching"""
|
||||
assert matches_pattern("TestTable", "testtable") is False
|
||||
assert matches_pattern("TestTable", "TestTable") is True
|
||||
|
||||
def test_empty_pattern(self):
|
||||
"""Test empty pattern"""
|
||||
assert matches_pattern("TestTable", "") is False
|
||||
assert matches_pattern("", "") is True
|
||||
|
||||
def test_special_characters(self):
|
||||
"""Test patterns with special characters"""
|
||||
assert matches_pattern("Test.Table", "Test.Table") is True
|
||||
assert matches_pattern("Test_Table", "Test_*") is True
|
||||
assert matches_pattern("Test-Table", "Test-*") is True
|
||||
Reference in New Issue
Block a user