Skip to content

Demo: Data Skipping

This demo shows Data Skipping in action.

./bin/spark-shell \
  --packages io.delta:delta-core_2.12:1.2.1 \
  --conf spark.sql.extensions=io.delta.sql.DeltaSparkSessionExtension \
  --conf spark.sql.catalog.spark_catalog=org.apache.spark.sql.delta.catalog.DeltaCatalog
val tableName = "d01"
spark.range(5).write.format("delta").saveAsTable(tableName)
import org.apache.spark.sql.delta._
import org.apache.spark.sql.catalyst.TableIdentifier
val d01 = DeltaLog.forTable(spark, TableIdentifier(tableName))
val partitionFilters = Nil
d01.snapshot.filesWithStatsForScan(partitionFilters).printSchema
root
 |-- path: string (nullable = true)
 |-- partitionValues: map (nullable = true)
 |    |-- key: string
 |    |-- value: string (valueContainsNull = true)
 |-- size: long (nullable = false)
 |-- modificationTime: long (nullable = false)
 |-- dataChange: boolean (nullable = false)
 |-- stats: struct (nullable = true)
 |    |-- numRecords: long (nullable = true)
 |    |-- minValues: struct (nullable = true)
 |    |    |-- id: long (nullable = true)
 |    |-- maxValues: struct (nullable = true)
 |    |    |-- id: long (nullable = true)
 |    |-- nullCount: struct (nullable = true)
 |    |    |-- id: long (nullable = true)
 |-- tags: map (nullable = true)
 |    |-- key: string
 |    |-- value: string (valueContainsNull = true)
val tableStats = d01.snapshot.filesWithStatsForScan(partitionFilters).select('path, 'size, $"stats.*")
scala> tableStats.show
+--------------------+----+----------+---------+---------+---------+
|                path|size|numRecords|minValues|maxValues|nullCount|
+--------------------+----+----------+---------+---------+---------+
|part-00003-e2489b...| 478|         1|      {0}|      {0}|      {0}|
|part-00012-81afe9...| 478|         1|      {3}|      {3}|      {0}|
|part-00015-10c3b3...| 478|         1|      {4}|      {4}|      {0}|
|part-00006-fc62fe...| 478|         1|      {1}|      {1}|      {0}|
|part-00009-04ceb6...| 478|         1|      {2}|      {2}|      {0}|
|part-00000-ef076d...| 296|         0|   {null}|   {null}|   {null}|
+--------------------+----+----------+---------+---------+---------+
Back to top