diff --git a/CHANGELOG.md b/CHANGELOG.md index e44bb560..4af2433c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,56 @@ This is the current release cycle, so stay tuned for future releases! +### v1.4.11 + +- **Add support for older versions of MySQL.** + The `WITH` keyword for CTE blocks was not introduced until MySQL 8.0. This patch uses the older syntax for older versions of MySQL and MariaDB. MySQL 5.7 was added to the test suite. + +- **Allow for any iterable in `items_str()`** + If an iterable other than a list is passed to `items_str()`, it will convert to a list before building the string: + + ```python + from meerschaum.utils.misc import items_str + print(items_str({'apples': 1, 'bananas': 2}, quotes=False) + # apples and bananas + ``` + +- **Fixed an edge case with `datetime` set to `None`.** + This patch will ignore the datetime index even if it was set explicitly to `None`. + +- **Added `Pipe.children`.** + To complement `Pipe.parents`, setting the parameters key `children` to a list of pipes' keys will be treated the same as `Pipe.parents`: + + ```python + import meerschaum as mrsm + pipe = mrsm.Pipe( + 'a', 'b', + parameters = { + 'children': [ + { + 'connector': 'a', + 'metric': 'b', + 'location': 'c', + }, + ] + } + ) + print(pipe.children) + # [Pipe('a', 'b', 'c')] + ``` + +- **Added support for `type:label` syntax in `mrsm.get_connector()`.** + The factory function `mrsm.get_connector()` expects the type and label as two arguments, but this patch allows for passing a single string with both arguments: + + ```python + import meerschaum as mrsm + print(mrsm.get_connector('sql:local')) + # sql:local + ``` + +- **Fixed more edge case bugs.** + For example, converting to `Int64` sometimes breaks with older versions of `pandas`. This patch adds a workaround. + ### v1.4.10 - **Fixed an issue with syncing background jobs.** diff --git a/docs/mkdocs/news/changelog.md b/docs/mkdocs/news/changelog.md index e44bb560..4af2433c 100644 --- a/docs/mkdocs/news/changelog.md +++ b/docs/mkdocs/news/changelog.md @@ -4,6 +4,56 @@ This is the current release cycle, so stay tuned for future releases! +### v1.4.11 + +- **Add support for older versions of MySQL.** + The `WITH` keyword for CTE blocks was not introduced until MySQL 8.0. This patch uses the older syntax for older versions of MySQL and MariaDB. MySQL 5.7 was added to the test suite. + +- **Allow for any iterable in `items_str()`** + If an iterable other than a list is passed to `items_str()`, it will convert to a list before building the string: + + ```python + from meerschaum.utils.misc import items_str + print(items_str({'apples': 1, 'bananas': 2}, quotes=False) + # apples and bananas + ``` + +- **Fixed an edge case with `datetime` set to `None`.** + This patch will ignore the datetime index even if it was set explicitly to `None`. + +- **Added `Pipe.children`.** + To complement `Pipe.parents`, setting the parameters key `children` to a list of pipes' keys will be treated the same as `Pipe.parents`: + + ```python + import meerschaum as mrsm + pipe = mrsm.Pipe( + 'a', 'b', + parameters = { + 'children': [ + { + 'connector': 'a', + 'metric': 'b', + 'location': 'c', + }, + ] + } + ) + print(pipe.children) + # [Pipe('a', 'b', 'c')] + ``` + +- **Added support for `type:label` syntax in `mrsm.get_connector()`.** + The factory function `mrsm.get_connector()` expects the type and label as two arguments, but this patch allows for passing a single string with both arguments: + + ```python + import meerschaum as mrsm + print(mrsm.get_connector('sql:local')) + # sql:local + ``` + +- **Fixed more edge case bugs.** + For example, converting to `Int64` sometimes breaks with older versions of `pandas`. This patch adds a workaround. + ### v1.4.10 - **Fixed an issue with syncing background jobs.** diff --git a/meerschaum/config/_version.py b/meerschaum/config/_version.py index a165a19c..b4827f3e 100644 --- a/meerschaum/config/_version.py +++ b/meerschaum/config/_version.py @@ -2,4 +2,4 @@ Specify the Meerschaum release version. """ -__version__ = "1.4.10" +__version__ = "1.4.11" diff --git a/meerschaum/connectors/__init__.py b/meerschaum/connectors/__init__.py index f70f178f..ba431104 100644 --- a/meerschaum/connectors/__init__.py +++ b/meerschaum/connectors/__init__.py @@ -126,6 +126,8 @@ def get_connector( from meerschaum.config import get_config from meerschaum.config.static import STATIC_CONFIG global _loaded_plugin_connectors + if isinstance(type, str) and not label and ':' in type: + type, label = type.split(':', maxsplit=1) with _locks['_loaded_plugin_connectors']: if not _loaded_plugin_connectors: load_plugin_connectors() diff --git a/meerschaum/connectors/sql/_fetch.py b/meerschaum/connectors/sql/_fetch.py index fb6b1cc6..9f8bd197 100644 --- a/meerschaum/connectors/sql/_fetch.py +++ b/meerschaum/connectors/sql/_fetch.py @@ -106,7 +106,7 @@ def get_pipe_metadef( definition = get_pipe_query(pipe) btm = get_pipe_backtrack_minutes(pipe) - if not pipe.columns or 'datetime' not in pipe.columns: + if not pipe.columns.get('datetime', None): _dt = pipe.guess_datetime() dt_name = sql_item_name(_dt, self.flavor) if _dt else None is_guess = True @@ -252,7 +252,11 @@ def get_pipe_backtrack_minutes(pipe) -> Union[int, float]: def _simple_fetch_query(pipe, debug: bool=False, **kw) -> str: """Build a fetch query from a pipe's definition.""" definition = get_pipe_query(pipe) - return f"WITH definition AS ({definition}) SELECT * FROM definition" + return ( + f"WITH definition AS ({definition}) SELECT * FROM definition" + if pipe.connector.flavor not in ('mysql', 'mariadb') + else f"SELECT * FROM ({definition}) AS definition" + ) def _join_fetch_query( pipe, @@ -299,10 +303,17 @@ def _join_fetch_query( _sync_times_q = _sync_times_q[:(-1 * len('UNION ALL\n'))] + ")" definition = get_pipe_query(pipe) - query = f""" + query = ( + f""" WITH definition AS ({definition}){_sync_times_q} SELECT definition.* - FROM definition + FROM definition""" + if pipe.connector.flavor not in ('mysql', 'mariadb') + else ( + f""" + SELECT * FROM ({definition}) AS definition""" + ) + ) + f""" LEFT OUTER JOIN {sync_times_remote_name} AS st ON st.{id_remote_name} = definition.{id_remote_name} WHERE definition.{dt_remote_name} > st.{dt_remote_name} diff --git a/meerschaum/connectors/sql/_pipes.py b/meerschaum/connectors/sql/_pipes.py index c3bbc110..d2706c27 100644 --- a/meerschaum/connectors/sql/_pipes.py +++ b/meerschaum/connectors/sql/_pipes.py @@ -2015,11 +2015,18 @@ def get_pipe_rowcount( f"SELECT {', '.join(_cols_names)} FROM {_pipe_name}" if not remote else get_pipe_query(pipe) ) - query = f""" - WITH src AS ({src}) - SELECT COUNT(*) - FROM src - """ + query = ( + f""" + WITH src AS ({src}) + SELECT COUNT(*) + FROM src + """ + ) if self.flavor not in ('mysql', 'mariadb') else ( + f""" + SELECT COUNT(*) + FROM ({src}) AS src + """ + ) if begin is not None or end is not None: query += "WHERE" if begin is not None: diff --git a/meerschaum/core/Pipe/__init__.py b/meerschaum/core/Pipe/__init__.py index a9eb29e6..350efc07 100644 --- a/meerschaum/core/Pipe/__init__.py +++ b/meerschaum/core/Pipe/__init__.py @@ -97,6 +97,7 @@ class Pipe: id, get_val_column, parents, + children, target, _target_legacy, guess_datetime, diff --git a/meerschaum/core/Pipe/_attributes.py b/meerschaum/core/Pipe/_attributes.py index 0c1ab957..3f631ed4 100644 --- a/meerschaum/core/Pipe/_attributes.py +++ b/meerschaum/core/Pipe/_attributes.py @@ -306,10 +306,7 @@ def get_val_column(self, debug: bool = False) -> Union[str, None]: @property def parents(self) -> List[meerschaum.Pipe]: """ - Return a list of `meerschaum.Pipe` objects. - These pipes will be synced before this pipe. - - NOTE: Not yet in use! + Return a list of `meerschaum.Pipe` objects to be designated as parents. """ if 'parents' not in self.parameters: return [] @@ -333,6 +330,33 @@ def parents(self) -> List[meerschaum.Pipe]: return _parents +@property +def children(self) -> List[meerschaum.Pipe]: + """ + Return a list of `meerschaum.Pipe` objects to be designated as children. + """ + if 'children' not in self.parameters: + return [] + from meerschaum.utils.warnings import warn + _children_keys = self.parameters['children'] + if not isinstance(_children_keys, list): + warn( + f"Please ensure the children for {self} are defined as a list of keys.", + stacklevel = 4 + ) + return [] + from meerschaum import Pipe + _children = [] + for keys in _children_keys: + try: + p = Pipe(**keys) + except Exception as e: + warn(f"Unable to build parent with keys '{keys}' for {self}:\n{e}") + continue + _children.append(p) + return _children + + @property def target(self) -> str: """ diff --git a/meerschaum/core/Pipe/_dtypes.py b/meerschaum/core/Pipe/_dtypes.py index a42f7ef6..b06c0161 100644 --- a/meerschaum/core/Pipe/_dtypes.py +++ b/meerschaum/core/Pipe/_dtypes.py @@ -126,6 +126,12 @@ def enforce_dtypes(self, df: 'pd.DataFrame', debug: bool=False) -> 'pd.DataFrame except Exception as e: if debug: dprint(f"Encountered an error when casting column {d} to type {t}:\n{e}") + if 'int' in str(t.lower()): + try: + new_df[d] = new_df[d].astype('float64').astype(t) + except Exception as e: + if debug: + dprint(f"Was unable to convert to float then {t}.") return new_df diff --git a/meerschaum/utils/misc.py b/meerschaum/utils/misc.py index 18d46d27..673f2015 100644 --- a/meerschaum/utils/misc.py +++ b/meerschaum/utils/misc.py @@ -1539,10 +1539,10 @@ def items_str( c = comma_str if commas else '' if len(items) == 1: - return q + str(items[0]) + q + return q + str(list(items)[0]) + q if len(items) == 2: - return q + str(items[0]) + q + s + a + s + q + str(items[1]) + q + return q + str(list(items)[0]) + q + s + a + s + q + str(list(items)[1]) + q sep = q + c + s + q output = q + sep.join(str(i) for i in items[:-1]) + q diff --git a/meerschaum/utils/sql.py b/meerschaum/utils/sql.py index b23e72dc..aa2ca237 100644 --- a/meerschaum/utils/sql.py +++ b/meerschaum/utils/sql.py @@ -31,17 +31,15 @@ {and_subquery_f} """, 'mysql': """ - UPDATE {target_table_name} AS f - INNER JOIN (SELECT DISTINCT * FROM {patch_table_name}) AS p - ON {and_subquery_f} + UPDATE {target_table_name} AS f, + (SELECT DISTINCT * FROM {patch_table_name}) AS p {sets_subquery_f} WHERE {and_subquery_f} """, 'mariadb': """ - UPDATE {target_table_name} AS f - INNER JOIN (SELECT DISTINCT * FROM {patch_table_name}) AS p - ON {and_subquery_f} + UPDATE {target_table_name} AS f, + (SELECT DISTINCT * FROM {patch_table_name}) AS p {sets_subquery_f} WHERE {and_subquery_f} @@ -128,7 +126,10 @@ 'FLOAT': 'float64', 'DOUBLE_PRECISION': 'float64', 'DOUBLE': 'float64', + 'DECIMAL': 'float64', 'BIGINT': 'Int64', + 'INT': 'Int64', + 'INTEGER': 'Int64', 'NUMBER': 'float64', 'TIMESTAMP': 'datetime64[ns]', 'TIMESTAMP WITH TIMEZONE': 'datetime64[ns, UTC]', @@ -146,6 +147,7 @@ 'TIME': 'datetime64[ns]', 'DATE': 'datetime64[ns]', 'DOUBLE': 'float64', + 'DECIMAL': 'float64', 'INT': 'Int64', 'BOOL': 'bool', }, @@ -154,12 +156,12 @@ ### MySQL doesn't allow for casting as BIGINT, so this is a workaround. DB_FLAVORS_CAST_DTYPES = { 'mariadb': { - 'BIGINT': 'DOUBLE', + 'BIGINT': 'DECIMAL', 'TINYINT': 'INT', 'TEXT': 'CHAR(10000) CHARACTER SET utf8', }, 'mysql': { - 'BIGINT': 'DOUBLE', + 'BIGINT': 'DECIMAL', 'TINYINT': 'INT', 'TEXT': 'CHAR(10000) CHARACTER SET utf8', }, @@ -169,6 +171,8 @@ 'mssql': { 'NVARCHAR COLLATE "SQL Latin1 General CP1 CI AS"': 'NVARCHAR(MAX)', 'NVARCHAR COLLATE "SQL_Latin1_General_CP1_CI_AS"': 'NVARCHAR(MAX)', + 'VARCHAR COLLATE "SQL Latin1 General CP1 CI AS"': 'NVARCHAR(MAX)', + 'VARCHAR COLLATE "SQL_Latin1_General_CP1_CI_AS"': 'NVARCHAR(MAX)', }, } ### Map pandas dtypes to flavor-specific dtypes. @@ -202,8 +206,8 @@ 'float64': { 'timescaledb': 'DOUBLE PRECISION', 'postgresql': 'DOUBLE PRECISION', - 'mariadb': 'DOUBLE', - 'mysql': 'DOUBLE', + 'mariadb': 'DECIMAL', + 'mysql': 'DECIMAL', 'mssql': 'FLOAT', 'oracle': 'FLOAT', 'sqlite': 'FLOAT', @@ -490,10 +494,19 @@ def get_distinct_col_count( _col_name = sql_item_name(col, connector.flavor) - _meta_query = f""" - WITH src AS ( {query} ), - dist AS ( SELECT DISTINCT {_col_name} FROM src ) - SELECT COUNT(*) FROM dist""" + _meta_query = ( + f""" + WITH src AS ( {query} ), + dist AS ( SELECT DISTINCT {_col_name} FROM src ) + SELECT COUNT(*) FROM dist""" + ) if self.flavor not in ('mysql', 'mariadb') else ( + f""" + SELECT COUNT(*) + FROM ( + SELECT DISTINCT {_col_name} + FROM ({query}) AS src + ) AS dist""" + ) result = connector.value(_meta_query, debug=debug) try: diff --git a/scripts/setup.sh b/scripts/setup.sh index 6558b0aa..67724dc0 100755 --- a/scripts/setup.sh +++ b/scripts/setup.sh @@ -6,18 +6,19 @@ cd "$PARENT" ### Install python packages. +[ -z "$PYTHON_BIN" ] && export PYTHON_BIN=python reqs_file="/tmp/mrsm_dev_setup_reqs.txt" -python -m meerschaum install package wheel --venv None --debug -python -m meerschaum show packages dev-tools --nopretty > "$reqs_file" -python -m meerschaum show packages docs --nopretty >> "$reqs_file" -python -m pip install --upgrade -r "$reqs_file" || exit 1 +$PYTHON_BIN -m meerschaum install package wheel --venv None --debug +$PYTHON_BIN -m meerschaum show packages dev-tools --nopretty > "$reqs_file" +$PYTHON_BIN -m meerschaum show packages docs --nopretty >> "$reqs_file" +$PYTHON_BIN -m pip install --upgrade -r "$reqs_file" || exit 1 ### Install pdoc3 and docker-compose outside of the declared docs dependencies. -python -m pip install pdoc3 || exit 1 -python -m pip install docker-compose || exit 1 +$PYTHON_BIN -m pip install pdoc3 || exit 1 +$PYTHON_BIN -m pip install docker-compose || exit 1 rm -f "$reqs_file" ### Install Meerschaum plugins. -python -m meerschaum install plugin thanks +$PYTHON_BIN -m meerschaum install plugin thanks ### Enable docker buildx. diff --git a/scripts/test.sh b/scripts/test.sh index 34d6c902..2407374e 100755 --- a/scripts/test.sh +++ b/scripts/test.sh @@ -9,6 +9,8 @@ mkdir -p "$test_root" test_port="8989" export MRSM_ROOT_DIR=$test_root +[ -z "$PYTHON_BIN" ] && export PYTHON_BIN=python + ### Start the test databases. if [ "$1" == "db" ]; then cd tests/ @@ -17,31 +19,31 @@ if [ "$1" == "db" ]; then fi ### Install the `stress` plugin. -python -m meerschaum install plugin stress +$PYTHON_BIN -m meerschaum install plugin stress ### Start the test API. -api_exists=$(MRSM_ROOT_DIR="$test_root" python -m meerschaum show jobs test_api --nopretty) +api_exists=$(MRSM_ROOT_DIR="$test_root" $PYTHON_BIN -m meerschaum show jobs test_api --nopretty) if [ "$api_exists" != "test_api" ]; then - python -m meerschaum start api \ + $PYTHON_BIN -m meerschaum start api \ -w 1 -p $test_port --name test_api -y -d -i sql:memory else - python -m meerschaum start jobs test_api -y + $PYTHON_BIN -m meerschaum start jobs test_api -y fi -python -m meerschaum start jobs test_api -y -MRSM_API_TEST=http://user:pass@localhost:$test_port python -m meerschaum start connectors api:test -python -m meerschaum start jobs test_api -y +$PYTHON_BIN -m meerschaum start jobs test_api -y +MRSM_API_TEST=http://user:pass@localhost:$test_port $PYTHON_BIN -m meerschaum start connectors api:test +$PYTHON_BIN -m meerschaum start jobs test_api -y ### This is necessary to trigger installations in a clean environment. -python -c " +$PYTHON_BIN -c " from tests.connectors import conns [conn.URI for conn in conns.values()] " -MRSM_CONNS=$(python -c " +MRSM_CONNS=$($PYTHON_BIN -c " from tests.connectors import conns print(' '.join([str(c) for c in conns.values()]))") -MRSM_URIS=$(python -c " +MRSM_URIS=$($PYTHON_BIN -c " from tests.connectors import conns print(' '.join([ 'MRSM_' + c.type.upper() + '_' + c.label.upper() + '=' + c.URI @@ -49,8 +51,8 @@ print(' '.join([ ]))") export $MRSM_URIS -python -m meerschaum show connectors -python -m meerschaum start connectors $MRSM_CONNS +$PYTHON_BIN -m meerschaum show connectors +$PYTHON_BIN -m meerschaum start connectors $MRSM_CONNS export ff="" if [ "$2" == "--ff" ]; then @@ -58,7 +60,7 @@ if [ "$2" == "--ff" ]; then fi ### Execute the pytest tests. -python -m pytest \ +$PYTHON_BIN -m pytest \ --durations=0 \ --ignore=portable/ --ignore=test_root/ --ignore=tests/data/ --ignore=docs/ $ff; rc="$?" @@ -67,7 +69,7 @@ if [ "$2" == "rm" ]; then cd tests/ docker-compose down -v cd ../ - python -m meerschaum delete job test_api -f -y + $PYTHON_BIN -m meerschaum delete job test_api -f -y fi exit "$rc" diff --git a/tests/connectors.py b/tests/connectors.py index 2fbf2b6d..50a34203 100644 --- a/tests/connectors.py +++ b/tests/connectors.py @@ -18,6 +18,10 @@ flavor='mariadb', username='test', password='test1234', database='testdb', port=3309, host='localhost', ), + 'mysql': get_connector('sql', 'test_mysql', + flavor='mysql', username='root', password='my-secret-pw', database='mysql', + port=3310, host='localhost', + ), 'mssql': get_connector('sql', 'test_mssql', flavor='mssql', username='sa', password='supersecureSECRETPASSWORD123!', database='master', port=1439, host='localhost', diff --git a/tests/docker-compose.yaml b/tests/docker-compose.yaml index 55c6c5f2..19febcd6 100644 --- a/tests/docker-compose.yaml +++ b/tests/docker-compose.yaml @@ -3,10 +3,13 @@ volumes: timescaledb_volume: mssql_volume: mariadb_volume: + mysql_volume: # cockroachdb_volume: oracle_volume: citus_healthcheck_volume: + services: + timescaledb: environment: - TIMESCALEDB_TELEMETRY=off @@ -19,6 +22,7 @@ services: volumes: - timescaledb_volume:/var/lib/postgresql/data:z image: timescale/timescaledb:latest-pg14-oss + mssql: environment: - ACCEPT_EULA=Y @@ -28,6 +32,7 @@ services: image: mcr.microsoft.com/mssql/server:2017-latest volumes: - mssql_volume:/var/opt/mssql:z + mariadb: environment: - MARIADB_ROOT_PASSWORD=my-secret-pw @@ -39,6 +44,19 @@ services: - 3309:3306 volumes: - mariadb_volume:/var/lib/mysql:z + + mysql: + environment: + - MYSQL_ROOT_PASSWORD=my-secret-pw + - MYSQL_USER=test + - MYSQL_PASSWORD=test1234 + - MYSQL_DATABASE=testdb + image: cytopia/mysql-5.7 + ports: + - 3310:3306 + volumes: + - mysql_volume:/var/lib/mysql:z + # cockroachdb: # image: cockroachdb/cockroach:v21.1.2 # ports: @@ -52,6 +70,7 @@ services: - 1529:1521 volumes: - oracle_volume:/u01/app/oracle + citus_master: image: "citusdata/citus:11.0.2" ports: @@ -65,6 +84,7 @@ services: PGPASSWORD: test1234 POSTGRES_HOST_AUTH_METHOD: "${POSTGRES_HOST_AUTH_METHOD:-trust}" CITUS_HOST: citus_master + citus_worker: image: "citusdata/citus:11.0.2" labels: ["com.citusdata.role=Worker"] @@ -73,6 +93,7 @@ services: environment: *CITUS_AUTH volumes: - citus_healthcheck_volume:/healthcheck + citus_manager: image: "citusdata/membership-manager:0.3.0" volumes: