apache · yaooqinn · Apr 2, 2021 · Apr 2, 2021 · Apr 6, 2021 · Apr 6, 2021
diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml
@@ -462,61 +462,61 @@ jobs:
       run: |
         ./build/sbt -Pyarn -Pmesos -Pkubernetes -Phive -Phive-thriftserver -Phadoop-cloud -Pkinesis-asl -Phadoop-2.7 compile test:compile
 
-  tpcds-1g:
-    name: Run TPC-DS queries with SF=1
-    runs-on: ubuntu-20.04
-    steps:
-    - name: Checkout Spark repository
-      uses: actions/checkout@v2
-    - name: Cache TPC-DS generated data
-      id: cache-tpcds-sf-1
-      uses: actions/cache@v2
-      with:
-        path: ./tpcds-sf-1
-        key: tpcds-${{ hashFiles('tpcds-sf-1/.spark-tpcds-sf-1.md5') }}
-        restore-keys: |
-          tpcds-
-    - name: Checkout TPC-DS (SF=1) generated data repository
-      if: steps.cache-tpcds-sf-1.outputs.cache-hit != 'true'
-      uses: actions/checkout@v2
-      with:
-        repository: maropu/spark-tpcds-sf-1
-        ref: 6b660a53091bd6d23cbe58b0f09aae08e71cc667
-        path: ./tpcds-sf-1
-    - name: Cache Scala, SBT and Maven
-      uses: actions/cache@v2
-      with:
-        path: |
-          build/apache-maven-*
-          build/scala-*
-          build/*.jar
-          ~/.sbt
-        key: build-${{ hashFiles('**/pom.xml', 'project/build.properties', 'build/mvn', 'build/sbt', 'build/sbt-launch-lib.bash', 'build/spark-build-info') }}
-        restore-keys: |
-          build-
-    - name: Cache Coursier local repository
-      uses: actions/cache@v2
-      with:
-        path: ~/.cache/coursier
-        key: tpcds-coursier-${{ hashFiles('**/pom.xml', '**/plugins.sbt') }}
-        restore-keys: |
-          tpcds-coursier-
-    - name: Install Java 8
-      uses: actions/setup-java@v1
-      with:
-        java-version: 8
-    - name: Run TPC-DS queries
-      run: |
-        SPARK_TPCDS_DATA=`pwd`/tpcds-sf-1 build/sbt "sql/testOnly org.apache.spark.sql.TPCDSQueryTestSuite"
-    - name: Upload test results to report
-      if: always()
-      uses: actions/upload-artifact@v2
-      with:
-        name: test-results-tpcds--8-hadoop3.2-hive2.3
-        path: "**/target/test-reports/*.xml"
-    - name: Upload unit tests log files
-      if: failure()
-      uses: actions/upload-artifact@v2
-      with:
-        name: unit-tests-log-tpcds--8-hadoop3.2-hive2.3
-        path: "**/target/unit-tests.log"
+#  tpcds-1g:
+#    name: Run TPC-DS queries with SF=1
+#    runs-on: ubuntu-20.04
+#    steps:
+#    - name: Checkout Spark repository
+#      uses: actions/checkout@v2
+#    - name: Cache TPC-DS generated data
+#      id: cache-tpcds-sf-1
+#      uses: actions/cache@v2
+#      with:
+#        path: ./tpcds-sf-1
+#        key: tpcds-${{ hashFiles('tpcds-sf-1/.spark-tpcds-sf-1.md5') }}
+#        restore-keys: |
+#          tpcds-
+#    - name: Checkout TPC-DS (SF=1) generated data repository
+#      if: steps.cache-tpcds-sf-1.outputs.cache-hit != 'true'
+#      uses: actions/checkout@v2
+#      with:
+#        repository: maropu/spark-tpcds-sf-1
+#        ref: 6b660a53091bd6d23cbe58b0f09aae08e71cc667
+#        path: ./tpcds-sf-1
+#    - name: Cache Scala, SBT and Maven
+#      uses: actions/cache@v2
+#      with:
+#        path: |
+#          build/apache-maven-*
+#          build/scala-*
+#          build/*.jar
+#          ~/.sbt
+#        key: build-${{ hashFiles('**/pom.xml', 'project/build.properties', 'build/mvn', 'build/sbt', 'build/sbt-launch-lib.bash', 'build/spark-build-info') }}
+#        restore-keys: |
+#          build-
+#    - name: Cache Coursier local repository
+#      uses: actions/cache@v2
+#      with:
+#        path: ~/.cache/coursier
+#        key: tpcds-coursier-${{ hashFiles('**/pom.xml', '**/plugins.sbt') }}
+#        restore-keys: |
+#          tpcds-coursier-
+#    - name: Install Java 8
+#      uses: actions/setup-java@v1
+#      with:
+#        java-version: 8
+#    - name: Run TPC-DS queries
+#      run: |
+#        SPARK_TPCDS_DATA=`pwd`/tpcds-sf-1 build/sbt "sql/testOnly org.apache.spark.sql.TPCDSQueryTestSuite"
+#    - name: Upload test results to report
+#      if: always()
+#      uses: actions/upload-artifact@v2
+#      with:
+#        name: test-results-tpcds--8-hadoop3.2-hive2.3
+#        path: "**/target/test-reports/*.xml"
+#    - name: Upload unit tests log files
+#      if: failure()
+#      uses: actions/upload-artifact@v2
+#      with:
+#        name: unit-tests-log-tpcds--8-hadoop3.2-hive2.3
+#        path: "**/target/unit-tests.log"
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q1.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q1.sf100/explain.txt
@@ -54,7 +54,7 @@ Batched: true
 Location: InMemoryFileIndex []
 PartitionFilters: [isnotnull(sr_returned_date_sk#4), dynamicpruningexpression(sr_returned_date_sk#4 IN dynamicpruning#5)]
 PushedFilters: [IsNotNull(sr_store_sk), IsNotNull(sr_customer_sk)]
-ReadSchema: struct<sr_customer_sk:bigint,sr_store_sk:bigint,sr_return_amt:decimal(7,2)>
+ReadSchema: struct<sr_customer_sk:int,sr_store_sk:int,sr_return_amt:decimal(7,2)>
 
 (2) ColumnarToRow [codegen id : 2]
 Input [4]: [sr_customer_sk#1, sr_store_sk#2, sr_return_amt#3, sr_returned_date_sk#4]
@@ -87,7 +87,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))
 
 (9) BroadcastHashJoin [codegen id : 2]
 Left keys [1]: [sr_returned_date_sk#4]
-Right keys [1]: [cast(d_date_sk#6 as bigint)]
+Right keys [1]: [d_date_sk#6]
 Join condition: None
 
 (10) Project [codegen id : 2]
@@ -122,7 +122,7 @@ Batched: true
 Location: InMemoryFileIndex []
 PartitionFilters: [isnotnull(sr_returned_date_sk#19), dynamicpruningexpression(sr_returned_date_sk#19 IN dynamicpruning#5)]
 PushedFilters: [IsNotNull(sr_store_sk)]
-ReadSchema: struct<sr_customer_sk:bigint,sr_store_sk:bigint,sr_return_amt:decimal(7,2)>
+ReadSchema: struct<sr_customer_sk:int,sr_store_sk:int,sr_return_amt:decimal(7,2)>
 
 (16) ColumnarToRow [codegen id : 4]
 Input [4]: [sr_customer_sk#16, sr_store_sk#17, sr_return_amt#18, sr_returned_date_sk#19]
@@ -136,7 +136,7 @@ Output [1]: [d_date_sk#20]
 
 (19) BroadcastHashJoin [codegen id : 4]
 Left keys [1]: [sr_returned_date_sk#19]
-Right keys [1]: [cast(d_date_sk#20 as bigint)]
+Right keys [1]: [d_date_sk#20]
 Join condition: None
 
 (20) Project [codegen id : 4]
@@ -185,7 +185,7 @@ Condition : isnotnull((avg(ctr_total_return) * 1.2)#31)
 
 (28) BroadcastExchange
 Input [2]: [(avg(ctr_total_return) * 1.2)#31, ctr_store_sk#14#32]
-Arguments: HashedRelationBroadcastMode(List(input[1, bigint, true]),false), [id=#33]
+Arguments: HashedRelationBroadcastMode(List(cast(input[1, int, true] as bigint)),false), [id=#33]
 
 (29) BroadcastHashJoin [codegen id : 8]
 Left keys [1]: [ctr_store_sk#14]
@@ -220,7 +220,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))
 
 (36) BroadcastHashJoin [codegen id : 8]
 Left keys [1]: [ctr_store_sk#14]
-Right keys [1]: [cast(s_store_sk#34 as bigint)]
+Right keys [1]: [s_store_sk#34]
 Join condition: None
 
 (37) Project [codegen id : 8]
@@ -251,15 +251,15 @@ Condition : isnotnull(c_customer_sk#38)
 
 (43) Exchange
 Input [2]: [c_customer_sk#38, c_customer_id#39]
-Arguments: hashpartitioning(cast(c_customer_sk#38 as bigint), 5), ENSURE_REQUIREMENTS, [id=#40]
+Arguments: hashpartitioning(c_customer_sk#38, 5), ENSURE_REQUIREMENTS, [id=#40]
 
 (44) Sort [codegen id : 11]
 Input [2]: [c_customer_sk#38, c_customer_id#39]
-Arguments: [cast(c_customer_sk#38 as bigint) ASC NULLS FIRST], false, 0
+Arguments: [c_customer_sk#38 ASC NULLS FIRST], false, 0
 
 (45) SortMergeJoin [codegen id : 12]
 Left keys [1]: [ctr_customer_sk#13]
-Right keys [1]: [cast(c_customer_sk#38 as bigint)]
+Right keys [1]: [c_customer_sk#38]
 Join condition: None
 
 (46) Project [codegen id : 12]

diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q1.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q1.sf100/simplified.txt
@@ -11,7 +11,7 @@ TakeOrderedAndProject [c_customer_id]
                     Project [ctr_customer_sk]
                       BroadcastHashJoin [ctr_store_sk,s_store_sk]
                         Project [ctr_customer_sk,ctr_store_sk]
-                          BroadcastHashJoin [ctr_store_sk,ctr_store_skL,ctr_total_return,(avg(ctr_total_return) * 1.2)]
+                          BroadcastHashJoin [ctr_store_sk,ctr_store_sk,ctr_total_return,(avg(ctr_total_return) * 1.2)]
                             Filter [ctr_total_return]
                               HashAggregate [sr_customer_sk,sr_store_sk,sum] [sum(UnscaledValue(sr_return_amt)),ctr_customer_sk,ctr_store_sk,ctr_total_return,sum]
                                 InputAdapter
@@ -38,7 +38,7 @@ TakeOrderedAndProject [c_customer_id]
                               BroadcastExchange #4
                                 WholeStageCodegen (6)
                                   Filter [(avg(ctr_total_return) * 1.2)]
-                                    HashAggregate [ctr_store_sk,sum,count] [avg(ctr_total_return),(avg(ctr_total_return) * 1.2),ctr_store_skL,sum,count]
+                                    HashAggregate [ctr_store_sk,sum,count] [avg(ctr_total_return),(avg(ctr_total_return) * 1.2),ctr_store_sk,sum,count]
                                       InputAdapter
                                         Exchange [ctr_store_sk] #5
                                           WholeStageCodegen (5)

diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q1/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q1/explain.txt
@@ -51,7 +51,7 @@ Batched: true
 Location: InMemoryFileIndex []
 PartitionFilters: [isnotnull(sr_returned_date_sk#4), dynamicpruningexpression(sr_returned_date_sk#4 IN dynamicpruning#5)]
 PushedFilters: [IsNotNull(sr_store_sk), IsNotNull(sr_customer_sk)]
-ReadSchema: struct<sr_customer_sk:bigint,sr_store_sk:bigint,sr_return_amt:decimal(7,2)>
+ReadSchema: struct<sr_customer_sk:int,sr_store_sk:int,sr_return_amt:decimal(7,2)>
 
 (2) ColumnarToRow [codegen id : 2]
 Input [4]: [sr_customer_sk#1, sr_store_sk#2, sr_return_amt#3, sr_returned_date_sk#4]
@@ -84,7 +84,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))
 
 (9) BroadcastHashJoin [codegen id : 2]
 Left keys [1]: [sr_returned_date_sk#4]
-Right keys [1]: [cast(d_date_sk#6 as bigint)]
+Right keys [1]: [d_date_sk#6]
 Join condition: None
 
 (10) Project [codegen id : 2]
@@ -119,7 +119,7 @@ Batched: true
 Location: InMemoryFileIndex []
 PartitionFilters: [isnotnull(sr_returned_date_sk#19), dynamicpruningexpression(sr_returned_date_sk#19 IN dynamicpruning#5)]
 PushedFilters: [IsNotNull(sr_store_sk)]
-ReadSchema: struct<sr_customer_sk:bigint,sr_store_sk:bigint,sr_return_amt:decimal(7,2)>
+ReadSchema: struct<sr_customer_sk:int,sr_store_sk:int,sr_return_amt:decimal(7,2)>
 
 (16) ColumnarToRow [codegen id : 4]
 Input [4]: [sr_customer_sk#16, sr_store_sk#17, sr_return_amt#18, sr_returned_date_sk#19]
@@ -133,7 +133,7 @@ Output [1]: [d_date_sk#20]
 
 (19) BroadcastHashJoin [codegen id : 4]
 Left keys [1]: [sr_returned_date_sk#19]
-Right keys [1]: [cast(d_date_sk#20 as bigint)]
+Right keys [1]: [d_date_sk#20]
 Join condition: None
 
 (20) Project [codegen id : 4]
@@ -182,7 +182,7 @@ Condition : isnotnull((avg(ctr_total_return) * 1.2)#31)
 
 (28) BroadcastExchange
 Input [2]: [(avg(ctr_total_return) * 1.2)#31, ctr_store_sk#14#32]
-Arguments: HashedRelationBroadcastMode(List(input[1, bigint, true]),false), [id=#33]
+Arguments: HashedRelationBroadcastMode(List(cast(input[1, int, true] as bigint)),false), [id=#33]
 
 (29) BroadcastHashJoin [codegen id : 9]
 Left keys [1]: [ctr_store_sk#14]
@@ -217,7 +217,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))
 
 (36) BroadcastHashJoin [codegen id : 9]
 Left keys [1]: [ctr_store_sk#14]
-Right keys [1]: [cast(s_store_sk#34 as bigint)]
+Right keys [1]: [s_store_sk#34]
 Join condition: None
 
 (37) Project [codegen id : 9]
@@ -244,7 +244,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)
 
 (42) BroadcastHashJoin [codegen id : 9]
 Left keys [1]: [ctr_customer_sk#13]
-Right keys [1]: [cast(c_customer_sk#37 as bigint)]
+Right keys [1]: [c_customer_sk#37]
 Join condition: None
 
 (43) Project [codegen id : 9]

diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q1/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q1/simplified.txt
@@ -5,7 +5,7 @@ TakeOrderedAndProject [c_customer_id]
         Project [ctr_customer_sk]
           BroadcastHashJoin [ctr_store_sk,s_store_sk]
             Project [ctr_customer_sk,ctr_store_sk]
-              BroadcastHashJoin [ctr_store_sk,ctr_store_skL,ctr_total_return,(avg(ctr_total_return) * 1.2)]
+              BroadcastHashJoin [ctr_store_sk,ctr_store_sk,ctr_total_return,(avg(ctr_total_return) * 1.2)]
                 Filter [ctr_total_return]
                   HashAggregate [sr_customer_sk,sr_store_sk,sum] [sum(UnscaledValue(sr_return_amt)),ctr_customer_sk,ctr_store_sk,ctr_total_return,sum]
                     InputAdapter
@@ -32,7 +32,7 @@ TakeOrderedAndProject [c_customer_id]
                   BroadcastExchange #3
                     WholeStageCodegen (6)
                       Filter [(avg(ctr_total_return) * 1.2)]
-                        HashAggregate [ctr_store_sk,sum,count] [avg(ctr_total_return),(avg(ctr_total_return) * 1.2),ctr_store_skL,sum,count]
+                        HashAggregate [ctr_store_sk,sum,count] [avg(ctr_total_return),(avg(ctr_total_return) * 1.2),ctr_store_sk,sum,count]
                           InputAdapter
                             Exchange [ctr_store_sk] #4
                               WholeStageCodegen (5)