Implements Level 2 parallelization for row_count, schema, and
aggregate checkers, improving performance by 2-3x for tables with
multiple enabled checks.
Changes:
- Add max_workers config option (default: 4)
- Add ConnectionPool module with SQLAlchemy QueuePool
- Add URL encoding for connection strings
- Implement parallel checker execution with ThreadPoolExecutor
- Add fail-fast behavior on checker errors
- Update executor for SQLAlchemy 2.0 compatibility
- Fix engine disposal resource leak
- Cache pooled engines in ConnectionManager
- Add disconnect() cleanup for pooled engines
Performance:
- Sequential: 3 checkers × 100ms = 300ms
- Parallel: 3 checkers ≈ 100ms (2-3x speedup)
Configuration:
execution:
max_workers: 4 # Controls parallel checker execution
continue_on_error: true
89 lines
2.7 KiB
Python
89 lines
2.7 KiB
Python
"""Test parallelization features."""
|
|
|
|
import pytest
|
|
from drt.config.models import Config, ExecutionConfig
|
|
|
|
|
|
class TestParallelizationConfig:
|
|
"""Test parallelization configuration."""
|
|
|
|
def test_default_max_workers(self):
|
|
"""Test default max_workers value."""
|
|
config = Config()
|
|
assert config.execution.max_workers == 4
|
|
|
|
def test_custom_max_workers(self):
|
|
"""Test custom max_workers value."""
|
|
config = Config(execution=ExecutionConfig(max_workers=8))
|
|
assert config.execution.max_workers == 8
|
|
|
|
def test_max_workers_positive(self):
|
|
"""Test that max_workers is positive."""
|
|
config = Config()
|
|
assert config.execution.max_workers > 0
|
|
|
|
def test_continue_on_error_default(self):
|
|
"""Test default continue_on_error value."""
|
|
config = Config()
|
|
assert config.execution.continue_on_error is True
|
|
|
|
|
|
def test_imports():
|
|
"""Test that all parallelization modules can be imported."""
|
|
from drt.config.models import Config, ExecutionConfig
|
|
from urllib.parse import quote_plus
|
|
from sqlalchemy import create_engine, text, QueuePool
|
|
|
|
assert Config is not None
|
|
assert ExecutionConfig is not None
|
|
assert quote_plus is not None
|
|
assert create_engine is not None
|
|
assert text is not None
|
|
assert QueuePool is not None
|
|
|
|
|
|
def test_url_encoding():
|
|
"""Test URL encoding for connection strings."""
|
|
from urllib.parse import quote_plus
|
|
|
|
test_conn_str = "DRIVER={ODBC Driver};SERVER=localhost;PWD=test@pass#123"
|
|
encoded = quote_plus(test_conn_str)
|
|
|
|
# Special characters should be encoded
|
|
assert "%40" in encoded # @ encoded
|
|
assert "%23" in encoded # # encoded
|
|
assert "%3D" in encoded # = encoded
|
|
assert encoded != test_conn_str # Should be different after encoding
|
|
|
|
# Test that it can be decoded back
|
|
from urllib.parse import unquote_plus
|
|
decoded = unquote_plus(encoded)
|
|
assert decoded == test_conn_str
|
|
|
|
|
|
def test_config_load_minimal():
|
|
"""Test loading a minimal config with parallelization settings."""
|
|
config_dict = {
|
|
"database_pairs": [
|
|
{
|
|
"name": "Test",
|
|
"enabled": True,
|
|
"baseline": {"server": "S1", "database": "D1"},
|
|
"target": {"server": "S1", "database": "D2"}
|
|
}
|
|
],
|
|
"execution": {
|
|
"max_workers": 6,
|
|
"continue_on_error": False
|
|
},
|
|
"tables": []
|
|
}
|
|
|
|
config = Config(**config_dict)
|
|
assert config.execution.max_workers == 6
|
|
assert config.execution.continue_on_error is False
|
|
|
|
|
|
if __name__ == "__main__":
|
|
pytest.main([__file__, "-v"])
|