diff --git a/tests/tests_integration/tests_core_integration/tests_db_reader_integration/test_clickhouse_reader_integration.py b/tests/tests_integration/tests_core_integration/tests_db_reader_integration/test_clickhouse_reader_integration.py index 531cfd06..ef1bd39c 100644 --- a/tests/tests_integration/tests_core_integration/tests_db_reader_integration/test_clickhouse_reader_integration.py +++ b/tests/tests_integration/tests_core_integration/tests_db_reader_integration/test_clickhouse_reader_integration.py @@ -266,15 +266,6 @@ def test_clickhouse_reader_snapshot_with_partitioning_mode_hash(spark, processin # So just check that any partition has at least 0 rows assert table_df.groupBy(spark_partition_id()).count().count() == 3 - # 100 rows per 3 partitions -> each partition should contain about ~33 rows, - # with some variance caused by randomness & hash distribution - min_count_per_partition = 10 - max_count_per_partition = 55 - - count_per_partition = table_df.groupBy(spark_partition_id()).count().collect() - for partition in count_per_partition: - assert min_count_per_partition <= partition["count"] <= max_count_per_partition - @pytest.mark.parametrize( "column", @@ -362,15 +353,6 @@ def test_clickhouse_reader_snapshot_with_partitioning_mode_mod_date(spark, proce # So just check that any partition has at least 0 rows assert table_df.groupBy(spark_partition_id()).count().count() == 3 - # 100 rows per 3 partitions -> each partition should contain about ~33 rows, - # with some variance caused by randomness & hash distribution - min_count_per_partition = 10 - max_count_per_partition = 55 - - count_per_partition = table_df.groupBy(spark_partition_id()).count().collect() - for partition in count_per_partition: - assert min_count_per_partition <= partition["count"] <= max_count_per_partition - @pytest.mark.parametrize( "column", diff --git a/tests/tests_integration/tests_core_integration/tests_db_reader_integration/test_mssql_reader_integration.py b/tests/tests_integration/tests_core_integration/tests_db_reader_integration/test_mssql_reader_integration.py index a6129d79..7cc4aefd 100644 --- a/tests/tests_integration/tests_core_integration/tests_db_reader_integration/test_mssql_reader_integration.py +++ b/tests/tests_integration/tests_core_integration/tests_db_reader_integration/test_mssql_reader_integration.py @@ -272,16 +272,6 @@ def test_mssql_reader_snapshot_with_partitioning_mode_hash(spark, processing, lo # So just check that any partition has at least 0 rows assert table_df.groupBy(spark_partition_id()).count().count() == 3 - # 100 rows per 3 partitions -> each partition should contain about ~33 rows, - # with some variance caused by randomness & hash distribution - min_count_per_partition = 10 - max_count_per_partition = 55 - - count_per_partition = table_df.groupBy(spark_partition_id()).count().collect() - - for partition in count_per_partition: - assert min_count_per_partition <= partition["count"] <= max_count_per_partition - def test_mssql_reader_snapshot_with_partitioning_mode_mod(spark, processing, load_table_data): from pyspark.sql.functions import spark_partition_id diff --git a/tests/tests_integration/tests_core_integration/tests_db_reader_integration/test_mysql_reader_integration.py b/tests/tests_integration/tests_core_integration/tests_db_reader_integration/test_mysql_reader_integration.py index 64e1f4d2..f4568cb8 100644 --- a/tests/tests_integration/tests_core_integration/tests_db_reader_integration/test_mysql_reader_integration.py +++ b/tests/tests_integration/tests_core_integration/tests_db_reader_integration/test_mysql_reader_integration.py @@ -263,15 +263,6 @@ def test_mysql_reader_snapshot_with_partitioning_mode_hash(spark, processing, lo # So just check that any partition has at least 0 rows assert table_df.groupBy(spark_partition_id()).count().count() == 3 - # 100 rows per 3 partitions -> each partition should contain about ~33 rows, - # with some variance caused by randomness & hash distribution (+- 50% range is wide enough) - min_count_per_partition = 10 - max_count_per_partition = 55 - - count_per_partition = table_df.groupBy(spark_partition_id()).count().collect() - for partition in count_per_partition: - assert min_count_per_partition <= partition["count"] <= max_count_per_partition - @pytest.mark.parametrize( "column", diff --git a/tests/tests_integration/tests_core_integration/tests_db_reader_integration/test_oracle_reader_integration.py b/tests/tests_integration/tests_core_integration/tests_db_reader_integration/test_oracle_reader_integration.py index f5a8491a..90d7df1c 100644 --- a/tests/tests_integration/tests_core_integration/tests_db_reader_integration/test_oracle_reader_integration.py +++ b/tests/tests_integration/tests_core_integration/tests_db_reader_integration/test_oracle_reader_integration.py @@ -221,16 +221,6 @@ def test_oracle_reader_snapshot_with_partitioning_mode_hash(spark, processing, l # So just check that any partition has at least 0 rows assert table_df.groupBy(spark_partition_id()).count().count() == 3 - # 100 rows per 3 partitions -> each partition should contain about ~33 rows, - # with some variance caused by randomness & hash distribution - min_count_per_partition = 10 - max_count_per_partition = 55 - - count_per_partition = table_df.groupBy(spark_partition_id()).count().collect() - - for partition in count_per_partition: - assert min_count_per_partition <= partition["count"] <= max_count_per_partition - # Apparently, Oracle supports modulus for text columns type @pytest.mark.parametrize( diff --git a/tests/tests_integration/tests_core_integration/tests_db_reader_integration/test_postgres_reader_integration.py b/tests/tests_integration/tests_core_integration/tests_db_reader_integration/test_postgres_reader_integration.py index 48d719d5..990a21f3 100644 --- a/tests/tests_integration/tests_core_integration/tests_db_reader_integration/test_postgres_reader_integration.py +++ b/tests/tests_integration/tests_core_integration/tests_db_reader_integration/test_postgres_reader_integration.py @@ -508,16 +508,6 @@ def test_postgres_reader_snapshot_with_partitioning_mode_hash(spark, processing, # So just check that any partition has at least 0 rows assert table_df.groupBy(spark_partition_id()).count().count() == 3 - # 100 rows per 3 partitions -> each partition should contain about ~33 rows, - # with some variance caused by randomness & hash distribution - min_count_per_partition = 10 - max_count_per_partition = 55 - - count_per_partition = table_df.groupBy(spark_partition_id()).count().collect() - - for partition in count_per_partition: - assert min_count_per_partition <= partition["count"] <= max_count_per_partition - def test_postgres_reader_snapshot_with_partitioning_mode_mod(spark, processing, load_table_data): from pyspark.sql.functions import spark_partition_id