Skip to content

Commit

Permalink
Add support for populating the db with test data
Browse files Browse the repository at this point in the history
---------
Co-authored-by: Christoph Pirkl <[email protected]>
  • Loading branch information
Nicoretti authored Apr 30, 2024
1 parent fcd74eb commit e4d6b6e
Show file tree
Hide file tree
Showing 8 changed files with 10,298 additions and 12 deletions.
10 changes: 8 additions & 2 deletions noxfile.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,13 @@

def _test_command(path: Path) -> Iterable[str]:
base_command = ["poetry", "run"]
pytest_command = ["pytest", "-v", f"{path}"]
pytest_command = [
"pytest",
"-v",
"--log-level=INFO",
"--log-cli-level=INFO",
f"{path}"
]
return base_command + pytest_command


Expand Down Expand Up @@ -80,5 +86,5 @@ def integration_tests(session: Session) -> None:
@ nox.session(name="all-tests", python=False)
def all_tests(session: Session) -> None:
"""Runs all tests (Unit and Integration)"""
command=_test_command(_ROOT / "test")
command = _test_command(_ROOT / "test")
session.run(*command)
19 changes: 17 additions & 2 deletions poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,7 @@ nox = "^2023.4.22"
pytest = ">=7.0.0,<9"
docutils = "0.20.1"
exasol-integration-test-docker-environment = "^3.0.0"
faker = "^24.14.1"

[build-system]
requires = ["poetry-core"]
Expand Down
98 changes: 98 additions & 0 deletions scripts/generate_users.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@
import sys
import csv
import decimal
import random
import datetime
import argparse

from faker import Faker
from faker.providers import BaseProvider


class UserDataProvider(BaseProvider):

def date(self):
start_date = datetime.date(2018, 1, 1)
return start_date + datetime.timedelta(random.randint(1, 365))

def timestamp(self):
date = self.date()
time = datetime.time(
random.randint(0, 23),
random.randint(0, 59),
random.randint(0, 59),
random.randint(0, 999) * 1000
)
return datetime.datetime.combine(date, time)

def boolean(self):
return self.random_element([True, False])

def status(self):
return self.random_element(
['ACTIVE', 'PENDING', 'SUSPENDED', 'DISABLED']
)

def decimal(self):
return decimal.Decimal(random.randint(0, 100)) / 100

def score(self):
value = random.randint(0, 10)
return None if value == 10 else random.randint(0, 10000) / 100


def generate_users(count):
fake = Faker()
fake.add_provider(UserDataProvider)

for i in range(count):
yield (
i,
fake.name(),
fake.date(),
fake.timestamp(),
fake.boolean(),
fake.decimal(),
fake.score(),
fake.status()
)


def _create_parser():
parser = argparse.ArgumentParser(
description="Generate a CSV file containing users",
formatter_class=argparse.ArgumentDefaultsHelpFormatter
)
parser.add_argument(
'filename',
type=argparse.FileType('w', encoding='utf8'),
help='file the resulting CSV should be written to.'
)
parser.add_argument(
'-n', '--count',
type=int, default=10000,
help='Number of users to create.'
)

return parser


FAILURE = -1
SUCCESS = 0


def main(argv=None):
parser = _create_parser()
args = parser.parse_args(argv)
try:
writer = csv.writer(args.filename, delimiter=',')
for user in generate_users(count=args.count):
writer.writerow(user)
except Exception as ex:
print(f"Error while generating users, details: {ex}")
return FAILURE
return SUCCESS


if __name__ == '__main__':
sys.exit(main())
10,000 changes: 10,000 additions & 0 deletions test/data/csv/users.csv

Large diffs are not rendered by default.

18 changes: 18 additions & 0 deletions test/data/import.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
DROP SCHEMA IF EXISTS PYEXASOL_TEST CASCADE;
CREATE SCHEMA PYEXASOL_TEST;

CREATE OR REPLACE TABLE PYEXASOL_TEST.USERS
(
user_id DECIMAL(18,0),
user_name VARCHAR(255),
register_dt DATE,
last_visit_ts TIMESTAMP,
is_female BOOLEAN,
user_rating DECIMAL(10,5),
user_score DOUBLE,
status VARCHAR(50)
);

IMPORT INTO PYEXASOL_TEST.USERS FROM LOCAL CSV FILE 'users.csv' COLUMN SEPARATOR = ',';


144 changes: 139 additions & 5 deletions test/integration/conftest.py
Original file line number Diff line number Diff line change
@@ -1,26 +1,31 @@
import os
import uuid
import pytest
import pyexasol
import subprocess
from pathlib import Path

import logging

@pytest.fixture

@pytest.fixture(scope='session')
def dsn():
return os.environ.get('EXAHOST', 'localhost:8563')


@pytest.fixture
@pytest.fixture(scope='session')
def user():
return os.environ.get('EXAUID', 'SYS')


@pytest.fixture
@pytest.fixture(scope='session')
def password():
return os.environ.get('EXAPWD', 'exasol')


@pytest.fixture
@pytest.fixture(scope='session')
def schema():
return os.environ.get('EXASCHEMA', 'TEST')
return os.environ.get('EXASCHEMA', 'PYEXASOL_TEST')


@pytest.fixture
Expand All @@ -29,6 +34,135 @@ def connection(dsn, user, password, schema):
dsn=dsn,
user=user,
password=password,
schema=schema
)
yield con
con.close()


@pytest.fixture(scope='session', autouse=True)
def prepare_database(dsn, user, password):
class DockerDataLoader:
"""Data loader for docker based Exasol DB"""

def __init__(
self,
dsn,
username,
password,
container_name,
data_directory
):
self._logger = logging.getLogger("DockerDataLoader")
self._dsn = dsn
self._user = username
self._password = password
self._container = container_name
self._data_directory = data_directory
self._tmp_dir = f"data-{uuid.uuid4()}"

@property
def data_directory(self):
return self._data_directory

@property
def ddl_file(self):
return self.data_directory / "import.sql"

@property
def csv_files(self):
return self.data_directory.rglob("*.csv")

def load(self):
self._create_dir()
self._upload_files()
self._import_data()

def _execute_command(self, command):
self._logger.info("Executing docker command: %s", command)
result = subprocess.run(
command,
check=True,
text=True,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE
)
self._logger.debug("Stderr: %s", result.stderr)
return result.stdout

def _exaplus(self) -> str:
find_exaplus = [
"docker",
"exec",
self._container,
"find",
"/usr",
"-name",
"exaplus",
"-type",
"f", # only files
"-executable", # only executable files
"-print", # -print -quit will stop after the result is found
"-quit",
]
exaplus = self._execute_command(find_exaplus).strip()
self._logger.info("Found exaplus at %s", exaplus)
return exaplus

def _create_dir(self):
"""Create data directory within the docker container."""
mkdir = [
"docker",
"exec",
self._container,
"mkdir",
self._tmp_dir
]
stdout = self._execute_command(mkdir)
self._logger.info("Stdout: %s", stdout)

def _upload_files(self):
files = [self.ddl_file]
files.extend(self.csv_files)
for file in files:
copy_file = [
"docker",
"cp",
f"{file.resolve()}",
f"{self._container}:{self._tmp_dir}/{file.name}",
]
stdout = self._execute_command(copy_file)
self._logger.debug("Stdout: %s", stdout)

def _import_data(self):
"""Load test data into a backend."""
execute_ddl_file = [
"docker",
"exec",
"-w",
f"/{self._tmp_dir}",
self._container,
self._exaplus(),
"-c",
f"{self._dsn}",
"-u",
self._user,
"-p",
self._password,
"-f",
self.ddl_file.name,
"--jdbcparam",
"validateservercertificate=0",
]
stdout = self._execute_command(execute_ddl_file)
self._logger.info("Stdout: %s", stdout)

data_directory = Path(__file__).parent / '..' / 'data'
loader = DockerDataLoader(
dsn=dsn,
username=user,
password=password,
container_name='db_container_test',
data_directory=data_directory
)
loader.load()
20 changes: 17 additions & 3 deletions test/integration/integration_smoke_test.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,19 @@
def test_smoke(connection):
def test_static_select(connection):
result = connection.execute("SELECT 1;")
expected = (1,)
actual = result.fetchall()[0]
expected = [(1,)]
actual = result.fetchall()
assert expected == actual


def test_sorted_select_and_limited_select(connection):
statement = f"SELECT * FROM USERS ORDER BY USER_ID LIMIT 5;"
result = connection.execute(statement)
expected = [
(0, 'Amy Marquez', '2018-10-04', '2018-03-06 21:44:36.142000', True, '0.76', 30.11, 'PENDING\r'),
(1, 'John Lawson', '2018-05-17', '2018-05-28 02:58:29.079000', True, '0.04', 71.72, 'DISABLED\r'),
(2, 'Jessica Clark', '2018-05-23', '2018-05-22 04:19:51.098000', False, '0.72', 29.13, 'PENDING\r'),
(3, 'Jennifer Taylor', '2018-05-01', '2018-03-03 08:12:52.685000', True, '0.43', 8.46, 'SUSPENDED\r'),
(4, 'Tristan Romero', '2018-10-04', '2018-03-31 20:21:50.199000', True, '0.23', 62.980000000000004, 'PENDING\r')
]
actual = result.fetchall()
assert expected == actual

0 comments on commit e4d6b6e

Please sign in to comment.