Github user gatorsmile commented on a diff in the pull request:

    https://github.com/apache/spark/pull/14500#discussion_r73621345
  
    --- Diff: 
sql/core/src/main/scala/org/apache/spark/sql/execution/command/ddl.scala ---
    @@ -425,6 +431,96 @@ case class AlterTableDropPartitionCommand(
     
     }
     
    +/**
    + * Discover Partitions in ALTER TABLE: discover all the partition in the 
directory of a table and
    + * update the catalog.
    + *
    + * The syntax of this command is:
    + * {{{
    + *   ALTER TABLE table DISCOVER PARTITIONS;
    + * }}}
    + */
    +case class AlterTableRecoverPartitionsCommand(
    +    tableName: TableIdentifier) extends RunnableCommand {
    +  override def run(spark: SparkSession): Seq[Row] = {
    +    val catalog = spark.sessionState.catalog
    +    if (!catalog.tableExists(tableName)) {
    +      throw new AnalysisException(
    +        s"Table $tableName in ALTER TABLE RECOVER PARTITIONS does not 
exist.")
    +    }
    +    val table = catalog.getTableMetadata(tableName)
    +    if (catalog.isTemporaryTable(tableName)) {
    +      throw new AnalysisException(
    +        s"Operation not allowed: ALTER TABLE RECOVER PARTITIONS on 
temporary tables: $tableName")
    +    }
    +    if (DDLUtils.isDatasourceTable(table)) {
    +      throw new AnalysisException(
    +        s"Operation not allowed: ALTER TABLE RECOVER PARTITIONS on 
datasource tables: $tableName")
    +    }
    +    if (table.tableType != CatalogTableType.EXTERNAL) {
    +      throw new AnalysisException(
    +        s"Operation not allowed: ALTER TABLE RECOVER PARTITIONS only works 
on external " +
    +          s"tables: $tableName")
    +    }
    +    if (DDLUtils.isTablePartitioned(table)) {
    +      throw new AnalysisException(
    +        s"Operation not allowed: ALTER TABLE RECOVER PARTITIONS only works 
on partitioned " +
    +          s"tables: $tableName")
    +    }
    +    if (table.storage.locationUri.isEmpty) {
    +      throw new AnalysisException(
    +        s"Operation not allowed: ALTER TABLE RECOVER PARTITIONS only works 
on tables with " +
    +          s"location provided: $tableName")
    +    }
    +
    +    recoverPartitions(spark, table)
    +    Seq.empty[Row]
    +  }
    +
    +  def recoverPartitions(spark: SparkSession, table: CatalogTable): Unit = {
    +    val root = new Path(table.storage.locationUri.get)
    +    val fs = root.getFileSystem(spark.sparkContext.hadoopConfiguration)
    +    val partitionSpecsAndLocs = scanPartitions(spark, fs, root, Map(), 
table.partitionSchema.size)
    +    val parts = partitionSpecsAndLocs.map { case (spec, location) =>
    +      // inherit table storage format (possibly except for location)
    +      CatalogTablePartition(spec, table.storage.copy(locationUri = 
Some(location.toUri.toString)))
    +    }
    +    spark.sessionState.catalog.createPartitions(tableName,
    +      parts.toArray[CatalogTablePartition], ignoreIfExists = true)
    +  }
    +
    +  @transient private lazy val evalTaskSupport = new 
ForkJoinTaskSupport(new ForkJoinPool(8))
    +
    +  private def scanPartitions(
    +      spark: SparkSession,
    +      fs: FileSystem,
    +      path: Path,
    +      spec: TablePartitionSpec,
    +      numPartitionsLeft: Int): GenSeq[(TablePartitionSpec, Path)] = {
    +    if (numPartitionsLeft == 0) {
    +      return Seq(spec -> path)
    +    }
    +
    +    val statuses = fs.listStatus(path)
    +    val threshold = spark.conf.get("spark.rdd.parallelListingThreshold", 
"10").toInt
    +    val statusPar: GenSeq[FileStatus] =
    +      if (numPartitionsLeft > 1 && statuses.length > threshold || 
numPartitionsLeft > 2) {
    --- End diff --
    
    This condition looks confusing.


---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at [email protected] or file a JIRA ticket
with INFRA.
---

---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to