melin commented on issue #5537:
URL: https://github.com/apache/hudi/issues/5537#issuecomment-1122313372
hack way
```java
@Aspect
public class CatalogManagerAspectj {
private static final Logger LOG =
LoggerFactory.getLogger(CatalogManagerAspectj.class);
@Around("execution(org.apache.spark.sql.connector.catalog.CatalogManager.new(..))")
public void aroundCatalogManagerInit(ProceedingJoinPoint pjp) throws
Throwable {
SuperiorHoodieCatalog.defaultSessionCatalog_$eq((CatalogPlugin)
pjp.getArgs()[0]);
pjp.proceed();
}
}
```
```java
import org.apache.spark.sql.AnalysisException
import org.apache.spark.sql.catalyst.TableIdentifier
import org.apache.spark.sql.catalyst.analysis._
import org.apache.spark.sql.connector.catalog.TableChange.{AddColumn,
ColumnChange, UpdateColumnComment, UpdateColumnType}
import org.apache.spark.sql.connector.catalog._
import org.apache.spark.sql.connector.expressions.Transform
import
org.apache.spark.sql.connector.catalog.CatalogV2Implicits.IdentifierHelper
import
org.apache.spark.sql.hudi.catalog.SuperiorHoodieCatalog.defaultSessionCatalog
import org.apache.spark.sql.hudi.command._
import org.apache.spark.sql.types.{StructField, StructType}
import java.util
class SuperiorHoodieCatalog extends HoodieCatalog {
override def name: String = "hudi";
override def defaultNamespace: Array[String] =
defaultSessionCatalog.defaultNamespace
override def stageCreate(ident: Identifier, schema: StructType,
partitions: Array[Transform], properties: util.Map[String, String]):
StagedTable = {
if (sparkAdapter.isHoodieTable(properties)) {
HoodieStagedTable(ident, this, schema, partitions, properties,
TableCreationMode.STAGE_CREATE)
} else {
BasicStagedTable(
ident,
asTableCatalog.createTable(ident, schema, partitions, properties),
this)
}
}
override def stageReplace(ident: Identifier, schema: StructType,
partitions: Array[Transform], properties: util.Map[String, String]):
StagedTable = {
if (sparkAdapter.isHoodieTable(properties)) {
HoodieStagedTable(ident, this, schema, partitions, properties,
TableCreationMode.STAGE_REPLACE)
} else {
asTableCatalog.dropTable(ident)
BasicStagedTable(
ident,
asTableCatalog.createTable(ident, schema, partitions, properties),
this)
}
}
override def stageCreateOrReplace(ident: Identifier,
schema: StructType,
partitions: Array[Transform],
properties: util.Map[String, String]):
StagedTable = {
if (sparkAdapter.isHoodieTable(properties)) {
HoodieStagedTable(
ident, this, schema, partitions, properties,
TableCreationMode.CREATE_OR_REPLACE)
} else {
try asTableCatalog.dropTable(ident) catch {
case _: NoSuchTableException => // ignore the exception
}
BasicStagedTable(
ident,
asTableCatalog.createTable(ident, schema, partitions, properties),
this)
}
}
override def loadTable(ident: Identifier): Table = {
try {
asTableCatalog.loadTable(ident) match {
case v1: V1Table if sparkAdapter.isHoodieTable(v1.catalogTable) =>
HoodieInternalV2Table(
spark,
v1.catalogTable.location.toString,
catalogTable = Some(v1.catalogTable),
tableIdentifier = Some(ident.toString))
case o => o
}
} catch {
case e: Exception =>
throw e
}
}
override def createTable(ident: Identifier,
schema: StructType,
partitions: Array[Transform],
properties: util.Map[String, String]): Table = {
createHoodieTable(ident, schema, partitions, properties, Map.empty,
Option.empty, TableCreationMode.CREATE)
}
override def tableExists(ident: Identifier): Boolean =
asTableCatalog.tableExists(ident)
override def dropTable(ident: Identifier): Boolean = {
val table = loadTable(ident)
table match {
case _: HoodieInternalV2Table =>
DropHoodieTableCommand(ident.asTableIdentifier, ifExists = true,
isView = false, purge = false).run(spark)
true
case _ => asTableCatalog.dropTable(ident)
}
}
override def purgeTable(ident: Identifier): Boolean = {
val table = loadTable(ident)
table match {
case _: HoodieInternalV2Table =>
DropHoodieTableCommand(ident.asTableIdentifier, ifExists = true,
isView = false, purge = true).run(spark)
true
case _ => asTableCatalog.purgeTable(ident)
}
}
@throws[NoSuchTableException]
@throws[TableAlreadyExistsException]
override def renameTable(oldIdent: Identifier, newIdent: Identifier): Unit
= {
loadTable(oldIdent) match {
case _: HoodieInternalV2Table =>
new AlterHoodieTableRenameCommand(oldIdent.asTableIdentifier,
newIdent.asTableIdentifier, false).run(spark)
case _ => asTableCatalog.renameTable(oldIdent, newIdent)
}
}
override def alterTable(ident: Identifier, changes: TableChange*): Table =
{
val tableIdent = TableIdentifier(ident.name(),
ident.namespace().lastOption)
// scalastyle:off
val table = loadTable(ident) match {
case hoodieTable: HoodieInternalV2Table => hoodieTable
case _ => return asTableCatalog.alterTable(ident, changes: _*)
}
// scalastyle:on
val grouped = changes.groupBy(c => c.getClass)
grouped.foreach {
case (t, newColumns) if t == classOf[AddColumn] =>
AlterHoodieTableAddColumnsCommand(
tableIdent,
newColumns.asInstanceOf[Seq[AddColumn]].map { col =>
StructField(
col.fieldNames()(0),
col.dataType(),
col.isNullable)
}).run(spark)
case (t, columnChanges) if classOf[ColumnChange].isAssignableFrom(t) =>
columnChanges.foreach {
case dataType: UpdateColumnType =>
val colName = UnresolvedAttribute(dataType.fieldNames()).name
val newDataType = dataType.newDataType()
val structField = StructField(colName, newDataType)
AlterHoodieTableChangeColumnCommand(tableIdent, colName,
structField).run(spark)
case dataType: UpdateColumnComment =>
val newComment = dataType.newComment()
val colName = UnresolvedAttribute(dataType.fieldNames()).name
val fieldOpt =
table.schema().findNestedField(dataType.fieldNames(), includeCollections = true,
spark.sessionState.conf.resolver).map(_._2)
val field = fieldOpt.getOrElse {
throw new AnalysisException(
s"Couldn't find column $colName
in:\n${table.schema().treeString}")
}
AlterHoodieTableChangeColumnCommand(tableIdent, colName,
field.withComment(newComment)).run(spark)
}
case (t, _) =>
throw new UnsupportedOperationException(s"not supported table
change: ${t.getClass}")
}
loadTable(ident)
}
@throws[NoSuchNamespaceException]
override def listTables(namespace: Array[String]): Array[Identifier] =
asTableCatalog.listTables(namespace)
override def invalidateTable(ident: Identifier): Unit = {
asTableCatalog.invalidateTable(ident)
}
@throws[NoSuchNamespaceException]
override def listNamespaces: Array[Array[String]] =
asNamespaceCatalog.listNamespaces
@throws[NoSuchNamespaceException]
override def listNamespaces(namespace: Array[String]):
Array[Array[String]] =
asNamespaceCatalog.listNamespaces(namespace)
override def namespaceExists(namespace: Array[String]): Boolean =
asNamespaceCatalog.namespaceExists(namespace)
@throws[NoSuchNamespaceException]
override def loadNamespaceMetadata(namespace: Array[String]):
util.Map[String, String] =
asNamespaceCatalog.loadNamespaceMetadata(namespace)
@throws[NamespaceAlreadyExistsException]
override def createNamespace(namespace: Array[String], metadata:
util.Map[String, String]): Unit = {
asNamespaceCatalog.createNamespace(namespace, metadata)
}
@throws[NoSuchNamespaceException]
override def alterNamespace(namespace: Array[String], changes:
NamespaceChange*): Unit = {
asNamespaceCatalog.alterNamespace(namespace, changes:_*)
}
@throws[NoSuchNamespaceException]
override def dropNamespace(namespace: Array[String]): Boolean =
asNamespaceCatalog.dropNamespace(namespace)
private def asTableCatalog: TableCatalog =
defaultSessionCatalog.asInstanceOf[TableCatalog]
private def asNamespaceCatalog: SupportsNamespaces =
defaultSessionCatalog.asInstanceOf[SupportsNamespaces]
}
object SuperiorHoodieCatalog {
var defaultSessionCatalog: CatalogPlugin = _
}
```
```xml
<?xml version="1.0" encoding="UTF-8" ?>
<aspectj>
<aspects>
<aspect
name="com.github.melin.superior.jobserver.extensions.aspectj.CatalogManagerAspectj"/>
</aspects>
<weaver options="-verbose -showWeaveInfo">
<include within="org.apache.spark.sql.connector.catalog..*"/>
</weaver>
<weaver options="-XaddSerialVersionUID"/>
</aspectj>
```
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]