HyukjinKwon commented on a change in pull request #20658: [SPARK-23488][python]
Add missing catalog methods to python API
URL: https://github.com/apache/spark/pull/20658#discussion_r245191105
##########
File path: python/pyspark/sql/catalog.py
##########
@@ -138,6 +139,78 @@ def listColumns(self, tableName, dbName=None):
isBucket=jcolumn.isBucket()))
return columns
+ @ignore_unicode_prefix
+ @since(3.0)
+ def databaseExists(self, dbName):
+ """Check if the database with the specified name exists."""
+ return self._jcatalog.databaseExists(dbName)
+
+ @ignore_unicode_prefix
+ @since(3.0)
+ def functionExists(self, functionName, dbName=None):
+ """Check if the function with the specified name exists.
+
+ If no database is specified, the current database is used.
+ """
+ if dbName is None:
+ dbName = self.currentDatabase()
+ return self._jcatalog.functionExists(dbName, functionName)
+
+ @ignore_unicode_prefix
+ @since(3.0)
+ def tableExists(self, tableName, dbName=None):
+ """Check if the table or view with the specified name exists.
+
+ If no database is specified, the current database is used.
+ """
+ if dbName is None:
+ dbName = self.currentDatabase()
+ return self._jcatalog.tableExists(dbName, tableName)
+
+ @ignore_unicode_prefix
+ @since(3.0)
+ def getDatabase(self, dbName):
+ """Get the database with the specified name."""
+ database = self._jcatalog.getDatabase(dbName)
+ return Database(
+ name=database.name(),
+ description=database.description(),
+ locationUri=database.locationUri())
+
+ @ignore_unicode_prefix
+ @since(3.0)
+ def getTable(self, tableName, dbName=None):
+ """Get the table or view with the specified name.
+
+ If no database is specified, the current database is used.
+ """
+ if dbName is None:
+ dbName = self.currentDatabase()
+ table = self._jcatalog.getTable(dbName, tableName)
+ return Table(
+ name=table.name(),
+ database=table.database(),
+ description=table.description(),
+ tableType=table.tableType(),
+ isTemporary=table.isTemporary())
+
+ @ignore_unicode_prefix
+ @since(3.0)
+ def getFunction(self, functionName, dbName=None):
+ """Get the function with the specified name.
+
+ If no database is specified, the current database is used.
+ """
+ if dbName is None:
+ dbName = self.currentDatabase()
Review comment:
Can you check this logic? In Scala side, it seeks the table and set the
database if possible whereas PySpark version only set it as currentDatabase. We
can do for instance:
```python
args = []
if dbName is set:
args += dbName
if function name is set:
args += functionName
self._jcatalog.getFunction(*args)
```
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]
With regards,
Apache Git Services
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]