-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add the SpatialOptimizations injector function (#17)
- Loading branch information
Showing
6 changed files
with
264 additions
and
11 deletions.
There are no files selected for viewing
24 changes: 24 additions & 0 deletions
24
...dex/src/main/scala/com/azavea/hiveless/spark/sql/SpatialFilterPushdownOptimizations.scala
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,24 @@ | ||
/* | ||
* Copyright 2022 Azavea | ||
* | ||
* Licensed under the Apache License, Version 2.0 (the "License"); | ||
* you may not use this file except in compliance with the License. | ||
* You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
|
||
package com.azavea.hiveless.spark.sql | ||
|
||
import com.azavea.hiveless.spark.sql.rules.SpatialFilterPushdownRules | ||
import org.apache.spark.sql.SparkSessionExtensions | ||
|
||
class SpatialFilterPushdownOptimizations extends (SparkSessionExtensions => Unit) { | ||
def apply(e: SparkSessionExtensions): Unit = e.injectOptimizerRule(_ => SpatialFilterPushdownRules) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
32 changes: 32 additions & 0 deletions
32
spatial-index/src/test/scala/com/azavea/hiveless/InjectOptimizerTestEnvironment.scala
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,32 @@ | ||
/* | ||
* Copyright 2022 Azavea | ||
* | ||
* Licensed under the Apache License, Version 2.0 (the "License"); | ||
* you may not use this file except in compliance with the License. | ||
* You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
|
||
package com.azavea.hiveless | ||
|
||
import com.azavea.hiveless.spark.sql.SpatialFilterPushdownOptimizations | ||
import org.apache.spark.SparkConf | ||
import org.apache.spark.sql.SQLContext | ||
import org.scalatest.{BeforeAndAfterAll, Suite} | ||
|
||
trait InjectOptimizerTestEnvironment extends SpatialIndexHiveTestEnvironment { self: Suite with BeforeAndAfterAll => | ||
|
||
// disable manual optimizations registration | ||
override def registerOptimizations(sqlContext: SQLContext): Unit = {} | ||
|
||
// enable plan optimizations by using the plan injector | ||
override def addSparkConfigProperties(config: SparkConf): Unit = | ||
config.set("spark.sql.extensions", classOf[SpatialFilterPushdownOptimizations].getName) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
183 changes: 183 additions & 0 deletions
183
spatial-index/src/test/scala/com/azavea/hiveless/spatial/index/STIndexInjectorSpec.scala
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,183 @@ | ||
/* | ||
* Copyright 2022 Azavea | ||
* | ||
* Licensed under the Apache License, Version 2.0 (the "License"); | ||
* you may not use this file except in compliance with the License. | ||
* You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
|
||
package com.azavea.hiveless.spatial.index | ||
|
||
import com.azavea.hiveless.{InjectOptimizerTestEnvironment, SpatialIndexTestTables} | ||
import org.apache.spark.sql.catalyst.plans.logical.Filter | ||
import org.scalatest.funspec.AnyFunSpec | ||
|
||
class STIndexInjectorSpec extends AnyFunSpec with InjectOptimizerTestEnvironment with SpatialIndexTestTables { | ||
|
||
describe("ST Index functions spec") { | ||
it("ST_Intersects plan should be optimized") { | ||
val df = ssc.sql( | ||
""" | ||
|SELECT * FROM polygons_parquet WHERE ST_Intersects(bbox, ST_GeomFromGeoJSON('{"type":"Polygon","coordinates":[[[-75.5859375,40.32517767999294],[-75.5859375,43.197167282501276],[-72.41015625,43.197167282501276],[-72.41015625,40.32517767999294],[-75.5859375,40.32517767999294]]]}')) | ||
|""".stripMargin | ||
) | ||
|
||
val dfe = ssc.sql( | ||
""" | ||
|SELECT * FROM polygons_parquet | ||
|WHERE bbox.xmin >= -75.5859375 | ||
|AND bbox.ymin >= 40.3251777 | ||
|AND bbox.xmax <= -72.4101562 | ||
|AND bbox.ymax <= 43.1971673 | ||
|AND ST_Intersects(bbox, ST_GeomFromGeoJSON('{"type":"Polygon","coordinates":[[[-75.5859375,40.32517767999294],[-75.5859375,43.197167282501276],[-72.41015625,43.197167282501276],[-72.41015625,40.32517767999294],[-75.5859375,40.32517767999294]]]}')) | ||
|""".stripMargin | ||
) | ||
|
||
df.count() shouldBe dfe.count() | ||
|
||
// compare optimized plans filters | ||
val dfc = df.queryExecution.optimizedPlan.collect { case Filter(condition, _) => condition } | ||
val dfec = dfe.queryExecution.optimizedPlan.collect { case Filter(condition, _) => condition } | ||
|
||
dfc shouldBe dfec | ||
} | ||
|
||
it("ST_Intersects by Extent plan should be optimized") { | ||
val df = ssc.sql( | ||
""" | ||
|SELECT * FROM polygons_parquet WHERE ST_Intersects(bbox, ST_MakeExtent(-75.5859375, 40.3251777, -72.4101562, 43.1971673)) | ||
|""".stripMargin | ||
) | ||
|
||
val dfe = ssc.sql( | ||
""" | ||
|SELECT * FROM polygons_parquet | ||
|WHERE bbox.xmin >= -75.5859375 | ||
|AND bbox.ymin >= 40.3251777 | ||
|AND bbox.xmax <= -72.4101562 | ||
|AND bbox.ymax <= 43.1971673 | ||
|""".stripMargin | ||
) | ||
|
||
df.count() shouldBe dfe.count() | ||
|
||
// compare optimized plans filters | ||
val dfc = df.queryExecution.optimizedPlan.collect { case Filter(condition, _) => condition } | ||
val dfec = dfe.queryExecution.optimizedPlan.collect { case Filter(condition, _) => condition } | ||
|
||
dfc shouldBe dfec | ||
} | ||
|
||
it("ST_Intersects optimization failure (Extent, Extent)") { | ||
val df = ssc.sql( | ||
""" | ||
|SELECT * FROM polygons_parquet WHERE ST_Intersects(bbox, bbox) | ||
|""".stripMargin | ||
) | ||
|
||
val dfe = ssc.sql( | ||
""" | ||
|SELECT * FROM polygons_parquet | ||
|WHERE bbox.xmin >= -75.5859375 | ||
|AND bbox.ymin >= 40.3251777 | ||
|AND bbox.xmax <= -72.4101562 | ||
|AND bbox.ymax <= 43.1971673 | ||
|""".stripMargin | ||
) | ||
|
||
df.count() shouldBe dfe.count() | ||
|
||
// compare optimized plans filters | ||
val dfc = df.queryExecution.optimizedPlan.collect { case Filter(condition, _) => condition } | ||
val dfec = dfe.queryExecution.optimizedPlan.collect { case Filter(condition, _) => condition } | ||
|
||
dfc shouldNot be(dfec) | ||
} | ||
|
||
it("ST_Intersects optimization failure (Extent, Geometry)") { | ||
val df = ssc.sql( | ||
""" | ||
|SELECT * FROM polygons_parquet WHERE ST_Intersects(bbox, geom) | ||
|""".stripMargin | ||
) | ||
|
||
val dfe = ssc.sql( | ||
""" | ||
|SELECT * FROM polygons_parquet | ||
|WHERE bbox.xmin >= -75.5859375 | ||
|AND bbox.ymin >= 40.3251777 | ||
|AND bbox.xmax <= -72.4101562 | ||
|AND bbox.ymax <= 43.1971673 | ||
|""".stripMargin | ||
) | ||
|
||
df.count() shouldBe dfe.count() | ||
|
||
// compare optimized plans filters | ||
val dfc = df.queryExecution.optimizedPlan.collect { case Filter(condition, _) => condition } | ||
val dfec = dfe.queryExecution.optimizedPlan.collect { case Filter(condition, _) => condition } | ||
|
||
dfc shouldNot be(dfec) | ||
} | ||
|
||
it("ST_Intersects optimization failure (Geometry, Geometry)") { | ||
val df = ssc.sql( | ||
""" | ||
|SELECT * FROM polygons_parquet WHERE ST_Intersects(geom, geom) | ||
|""".stripMargin | ||
) | ||
|
||
val dfe = ssc.sql( | ||
""" | ||
|SELECT * FROM polygons_parquet | ||
|WHERE bbox.xmin >= -75.5859375 | ||
|AND bbox.ymin >= 40.3251777 | ||
|AND bbox.xmax <= -72.4101562 | ||
|AND bbox.ymax <= 43.1971673 | ||
|""".stripMargin | ||
) | ||
|
||
df.count() shouldBe dfe.count() | ||
|
||
// compare optimized plans filters | ||
val dfc = df.queryExecution.optimizedPlan.collect { case Filter(condition, _) => condition } | ||
val dfec = dfe.queryExecution.optimizedPlan.collect { case Filter(condition, _) => condition } | ||
|
||
dfc shouldNot be(dfec) | ||
} | ||
|
||
it("ST_Intersects optimization failure (Geometry, Extent)") { | ||
val df = ssc.sql( | ||
""" | ||
|SELECT * FROM polygons_parquet WHERE ST_Intersects(geom, bbox) | ||
|""".stripMargin | ||
) | ||
|
||
val dfe = ssc.sql( | ||
""" | ||
|SELECT * FROM polygons_parquet | ||
|WHERE bbox.xmin >= -75.5859375 | ||
|AND bbox.ymin >= 40.3251777 | ||
|AND bbox.xmax <= -72.4101562 | ||
|AND bbox.ymax <= 43.1971673 | ||
|""".stripMargin | ||
) | ||
|
||
df.count() shouldBe dfe.count() | ||
|
||
// compare optimized plans filters | ||
val dfc = df.queryExecution.optimizedPlan.collect { case Filter(condition, _) => condition } | ||
val dfec = dfe.queryExecution.optimizedPlan.collect { case Filter(condition, _) => condition } | ||
|
||
dfc shouldNot be(dfec) | ||
} | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters