This is an automated email from the ASF dual-hosted git repository. joemcdonnell pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/impala.git
commit 8a1b4793e8020891cf3937713f42bc2d98dd775e Author: Arnab Karmakar <[email protected]> AuthorDate: Wed Dec 31 02:12:24 2025 -0800 IMPALA-9935: Support individual partitions in catalog_object webUI page This patch adds support for querying individual partition metadata through the /catalog_object webUI endpoint on both CatalogServer and Coordinator. Previously, the endpoint only supported DATABASE, TABLE, VIEW, FUNCTION, DATA_SOURCE, HDFS_CACHE_POOL, PRINCIPAL, PRIVILEGE, and AUTHZ_CACHE_INVALIDATION catalog object types. This change adds support for HDFS_PARTITION objects. Endpoint Availability: - Catalog V1 (legacy catalog mode): Works on both CatalogServer (port 25020) and Coordinator (port 25000) - Catalog V2 (local catalog mode): Only works on CatalogServer (port 25020) URL Examples: - Basic: /catalog_object?object_type=HDFS_PARTITION&object_name=mydb.mytable%3Ayear%3D2010%2Fmonth%3D3 - Multi-key: /catalog_object?object_type=HDFS_PARTITION&object_name=db.tbl%3Ayear%3D2024%2Fmonth%3D12%2Fday%3D25 - Special chars: /catalog_object?object_type=HDFS_PARTITION&object_name=db.tbl%3Ads%3D2024%252F12%252F25 Testing: - Existing tests pass - New tests added for partition object retrieval in both HTML and JSON formats - Manual testing with various partition configurations - Error handling tested for edge cases Change-Id: I5645a20283e664af12d04a9665c8870c7666a74c Reviewed-on: http://gerrit.cloudera.org:8080/23816 Reviewed-by: Impala Public Jenkins <[email protected]> Tested-by: Impala Public Jenkins <[email protected]> --- be/src/catalog/catalog-util.cc | 35 +++- .../java/org/apache/impala/catalog/Catalog.java | 67 ++++++- .../org/apache/impala/catalog/FeCatalogUtils.java | 75 ++++++++ .../java/org/apache/impala/catalog/HdfsTable.java | 12 ++ .../org/apache/impala/catalog/CatalogTest.java | 198 +++++++++++++++++++++ tests/webserver/test_web_pages.py | 113 ++++++++++++ 6 files changed, 497 insertions(+), 3 deletions(-) diff --git a/be/src/catalog/catalog-util.cc b/be/src/catalog/catalog-util.cc index 26c0b5dc4..780278a9c 100644 --- a/be/src/catalog/catalog-util.cc +++ b/be/src/catalog/catalog-util.cc @@ -145,8 +145,9 @@ TCatalogObjectType::type TCatalogObjectTypeFromName(const string& name) { return TCatalogObjectType::PRINCIPAL; } else if (upper == "PRIVILEGE") { return TCatalogObjectType::PRIVILEGE; + } else if (upper == "HDFS_PARTITION") { + return TCatalogObjectType::HDFS_PARTITION; } - // TODO(IMPALA-9935): support HDFS_PARTITION return TCatalogObjectType::UNKNOWN; } @@ -175,7 +176,37 @@ Status TCatalogObjectFromObjectName(const TCatalogObjectType::type& object_type, catalog_object->table.__set_tbl_name(object_name.substr(pos + 1)); break; } - // TODO(IMPALA-9935): support HDFS_PARTITION + case TCatalogObjectType::HDFS_PARTITION: { + catalog_object->__set_type(object_type); + catalog_object->__set_hdfs_partition(THdfsPartition()); + // Parse format: dbName.tableName:partitionName + // The partitionName format is "key1=value1/key2=value2/..." + // If partition values contain special characters (like '/'), they must be + // double-encoded in the HTTP request URL because: + // 1. Hive stores such partitions on HDFS with '/' pre-encoded as '%2F' + // (e.g., directory name: ds=2024%2F12%2F25) + // 2. HTTP URL encoding must encode the '%' character as '%25' + // (e.g., HTTP URL: ds=2024%252F12%252F25) + // Example: For partition value "12/25/2025", the URL should contain: + // "ds=12%252F25%252F2025" (double-encoded) + // After URL decoding, the backend receives: "ds=12%2F25%2F2025" + // After FileUtils.unescapePathName(), the value is: "12/25/2025" + int dot_pos = object_name.find('.'); + int colon_pos = object_name.find(':'); + if (dot_pos == string::npos || dot_pos >= object_name.size() - 1 + || colon_pos == string::npos || colon_pos >= object_name.size() - 1 + || colon_pos <= dot_pos || dot_pos == 0) { + stringstream error_msg; + error_msg << "Invalid partition name: " << object_name; + return Status(error_msg.str()); + } + catalog_object->hdfs_partition.__set_db_name(object_name.substr(0, dot_pos)); + catalog_object->hdfs_partition.__set_tbl_name( + object_name.substr(dot_pos + 1, colon_pos - dot_pos - 1)); + catalog_object->hdfs_partition.__set_partition_name( + object_name.substr(colon_pos + 1)); + break; + } case TCatalogObjectType::FUNCTION: { // The key looks like: <db>.fn(<args>). We need to parse out the // db, fn and signature. diff --git a/fe/src/main/java/org/apache/impala/catalog/Catalog.java b/fe/src/main/java/org/apache/impala/catalog/Catalog.java index d1da330b8..e5f1761ef 100644 --- a/fe/src/main/java/org/apache/impala/catalog/Catalog.java +++ b/fe/src/main/java/org/apache/impala/catalog/Catalog.java @@ -38,8 +38,10 @@ import org.apache.hadoop.hive.metastore.api.LockLevel; import org.apache.hadoop.hive.metastore.api.LockType; import org.apache.impala.analysis.FunctionName; import org.apache.impala.authorization.AuthorizationPolicy; +import org.apache.impala.catalog.CatalogObject.ThriftObjectType; import org.apache.impala.catalog.MetaStoreClientPool.MetaStoreClient; import org.apache.impala.catalog.monitor.CatalogMonitor; +import org.apache.impala.common.ImpalaException; import org.apache.impala.common.TransactionException; import org.apache.impala.common.TransactionKeepalive; import org.apache.impala.common.TransactionKeepalive.HeartbeatContext; @@ -640,7 +642,70 @@ public abstract class Catalog implements AutoCloseable { } break; } - // TODO(IMPALA-9935): support HDFS_PARTITION + case HDFS_PARTITION: { + THdfsPartition partDesc = objectDesc.getHdfs_partition(); + String dbName = partDesc.getDb_name(); + String tblName = partDesc.getTbl_name(); + String partitionName = partDesc.getPartition_name(); + + Table table = getTable(dbName, tblName); + if (table == null) { + throw new CatalogException("Table not found: " + dbName + "." + tblName); + } + + // Check if table is an IncompleteTable (not yet loaded) + if (table instanceof IncompleteTable) { + IncompleteTable incompleteTable = (IncompleteTable) table; + if (!incompleteTable.isLoaded()) { + throw new CatalogException("Table " + dbName + "." + tblName + + " is not loaded. Please load the table first (e.g., DESCRIBE " + + dbName + "." + tblName + ")"); + } else { + ImpalaException cause = incompleteTable.getCause(); + throw new CatalogException("Table " + dbName + "." + tblName + + " failed to load: " + (cause != null ? cause.getMessage() : + "Unknown error")); + } + } + + if (!(table instanceof HdfsTable)) { + throw new CatalogException("Table " + dbName + "." + tblName + + " is not an HDFS table (table type: " + + table.getClass().getSimpleName() + ")"); + } + + HdfsTable hdfsTable = (HdfsTable) table; + // Check if the table has partition columns + if (hdfsTable.getNumClusteringCols() == 0) { + throw new CatalogException("Table " + dbName + "." + tblName + + " is not a partitioned HDFS table"); + } + + table.takeReadLock(); + try { + // Parse and validate partition name (e.g., "year=2010/month=3") + FeCatalogUtils.parsePartitionName(partitionName, hdfsTable); + + // Use efficient hash map lookup (O(1)) instead of iterating all partitions + HdfsPartition partition = hdfsTable.getPartitionByName(partitionName); + if (partition == null) { + throw new CatalogException("Partition not found: " + partitionName); + } + + result.setType(partition.getCatalogObjectType()); + // Partitions use the table's catalog version + result.setCatalog_version(table.getCatalogVersion()); + result.setLast_modified_time_ms(table.getLastLoadedTimeMs()); + THdfsPartition tPartition = FeCatalogUtils.fsPartitionToThrift( + partition, ThriftObjectType.FULL); + tPartition.setDb_name(dbName); + tPartition.setTbl_name(tblName); + result.setHdfs_partition(tPartition); + } finally { + table.releaseReadLock(); + } + break; + } case FUNCTION: { TFunction tfn = objectDesc.getFn(); Function desc = Function.fromThrift(tfn); diff --git a/fe/src/main/java/org/apache/impala/catalog/FeCatalogUtils.java b/fe/src/main/java/org/apache/impala/catalog/FeCatalogUtils.java index 4bac9a157..b7a18ab84 100644 --- a/fe/src/main/java/org/apache/impala/catalog/FeCatalogUtils.java +++ b/fe/src/main/java/org/apache/impala/catalog/FeCatalogUtils.java @@ -52,6 +52,7 @@ import org.apache.impala.service.BackendConfig; import org.apache.impala.thrift.TCatalogObject; import org.apache.impala.thrift.TGetCatalogMetricsResult; import org.apache.impala.thrift.THdfsPartition; +import org.apache.impala.thrift.TPartitionKeyValue; import org.apache.impala.thrift.TTable; import org.apache.impala.thrift.TTableStats; import org.apache.impala.thrift.TTableType; @@ -294,6 +295,80 @@ public abstract class FeCatalogUtils { return FileUtils.makePartName(partitionKeys, partitionValues); } + /** + * Parse a partition name string (e.g., "year=2010/month=3") into a list of + * TPartitionKeyValue objects. + * + * Note on URL encoding: If partition values contain special characters like "/", + * they must be double-encoded in the HTTP request URL because: + * 1. Hive stores such partitions on HDFS with '/' pre-encoded as '%2F' + * (e.g., HDFS directory: ds=2024%2F12%2F25) + * 2. HTTP URL encoding must encode the '%' as '%25' + * (e.g., HTTP request: ds=2024%252F12%252F25) + * After URL decoding, this method receives "ds=2024%2F12%2F25" (single-encoded), + * which is then decoded by FileUtils.unescapePathName() to get "2024/12/25". + * + * This method validates that: + * - The number of partition keys matches the table's clustering columns + * - Each partition key name matches the expected column name at that position + * - Each partition key-value pair is in the format "key=value" + * + * @param partitionName The partition name string after one level of URL decoding + * (e.g., "year=2010/month=3" or "ds=2024%2F12%2F25") + * @param table The HdfsTable containing the partition + * @return A list of TPartitionKeyValue objects representing the parsed partition + * @throws CatalogException if the partition name is invalid or doesn't match the + * table's partition schema + */ + public static List<TPartitionKeyValue> parsePartitionName( + String partitionName, HdfsTable table) throws CatalogException { + List<TPartitionKeyValue> partitionSpec = new ArrayList<>(); + if (partitionName == null || partitionName.isEmpty()) { + throw new CatalogException("Invalid partition name: " + partitionName); + } + + // Split the partition name by "/" to get individual key=value pairs. + // Note: If partition values contain "/", they should be URL-encoded as "%2F" + // before passing to this method, so the split will not break them apart. + String[] parts = partitionName.split("/"); + int numClusteringCols = table.getNumClusteringCols(); + + if (parts.length != numClusteringCols) { + throw new CatalogException( + String.format("Invalid partition name '%s': expected %d partition keys, got %d", + partitionName, numClusteringCols, parts.length)); + } + + List<Column> clusteringCols = table.getClusteringColumns(); + for (int i = 0; i < parts.length; i++) { + String part = parts[i]; + int eqPos = part.indexOf('='); + if (eqPos <= 0 || eqPos >= part.length() - 1) { + throw new CatalogException( + "Invalid partition key-value format: " + part); + } + + String key = part.substring(0, eqPos); + String encodedValue = part.substring(eqPos + 1); + + // URL-decode the value to handle special characters like "/" + // that are encoded as "%2F". + String value = FileUtils.unescapePathName(encodedValue); + + // Verify that the key matches the expected partition column name + String expectedKey = clusteringCols.get(i).getName(); + if (!key.equals(expectedKey)) { + throw new CatalogException( + String.format("Invalid partition key '%s': expected '%s'", + key, expectedKey)); + } + + partitionSpec.add(new TPartitionKeyValue(key, value)); + } + + return partitionSpec; + } + /** * Return the set of all file formats used in the collection of partitions. */ diff --git a/fe/src/main/java/org/apache/impala/catalog/HdfsTable.java b/fe/src/main/java/org/apache/impala/catalog/HdfsTable.java index ba9e66ec2..3dc313372 100644 --- a/fe/src/main/java/org/apache/impala/catalog/HdfsTable.java +++ b/fe/src/main/java/org/apache/impala/catalog/HdfsTable.java @@ -1809,6 +1809,18 @@ public class HdfsTable extends Table implements FeFsTable { return parts; } + /** + * Returns the partition with the given partition name, or null if it doesn't exist. + * Uses the nameToPartitionMap_ for O(1) lookup. + */ + public HdfsPartition getPartitionByName(String partitionName) { + String partName = DEFAULT_PARTITION_NAME; + if (partitionName != null && partitionName.length() > 0) { + partName = partitionName; + } + return nameToPartitionMap_.get(partName); + } + /** * Tracks the in-flight INSERT event id in the partition. * @return false if the partition doesn't exist. Otherwise returns true. diff --git a/fe/src/test/java/org/apache/impala/catalog/CatalogTest.java b/fe/src/test/java/org/apache/impala/catalog/CatalogTest.java index 661bbb8b5..11679971c 100644 --- a/fe/src/test/java/org/apache/impala/catalog/CatalogTest.java +++ b/fe/src/test/java/org/apache/impala/catalog/CatalogTest.java @@ -60,8 +60,12 @@ import org.apache.impala.compat.MetastoreShim; import org.apache.impala.service.BackendConfig; import org.apache.impala.testutil.CatalogServiceTestCatalog; import org.apache.impala.testutil.TestUtils; +import org.apache.impala.thrift.CatalogObjectsConstants; +import org.apache.impala.thrift.TCatalogObject; +import org.apache.impala.thrift.TCatalogObjectType; import org.apache.impala.thrift.TFunctionBinaryType; import org.apache.impala.thrift.TGetPartitionStatsRequest; +import org.apache.impala.thrift.THdfsPartition; import org.apache.impala.thrift.TPartitionKeyValue; import org.apache.impala.thrift.TPartitionStats; import org.apache.impala.thrift.TPrincipalType; @@ -1063,4 +1067,198 @@ public class CatalogTest { assertTrue(allRoleNames.contains(principal.getName())); } } + + /** + * Helper method to create a partition descriptor. + */ + private TCatalogObject createPartitionDescriptor(String dbName, String tableName, + String partitionName) { + TCatalogObject partitionDesc = new TCatalogObject(); + partitionDesc.setType(TCatalogObjectType.HDFS_PARTITION); + THdfsPartition partInfo = new THdfsPartition(); + partInfo.setDb_name(dbName); + partInfo.setTbl_name(tableName); + partInfo.setPartition_name(partitionName); + partitionDesc.setHdfs_partition(partInfo); + return partitionDesc; + } + + /** + * Helper method to verify that a partition result matches expected values. + */ + private void verifyPartitionResult(TCatalogObject result, String dbName, + String tableName, String partitionName) { + assertNotNull(result); + assertEquals(TCatalogObjectType.HDFS_PARTITION, result.getType()); + assertTrue(result.isSetHdfs_partition()); + assertEquals(dbName, result.getHdfs_partition().getDb_name()); + assertEquals(tableName, result.getHdfs_partition().getTbl_name()); + assertEquals(partitionName, result.getHdfs_partition().getPartition_name()); + } + + /** + * Helper method to verify that partition retrieval fails with expected error. + * Does not load the table - assumes table is already loaded or test wants to + * verify behavior without loading. + */ + private void verifyPartitionError(String dbName, String tableName, + String partitionName, String expectedErrorMsg) throws CatalogException { + TCatalogObject partitionDesc = createPartitionDescriptor(dbName, tableName, + partitionName); + + try { + catalog_.getTCatalogObject(partitionDesc); + fail("Expected CatalogException for " + dbName + "." + tableName + + " partition: " + partitionName); + } catch (CatalogException e) { + assertTrue("Expected error message to contain '" + expectedErrorMsg + "', but got: " + + e.getMessage(), e.getMessage().contains(expectedErrorMsg)); + } + } + + /** + * Helper method to test partition retrieval that should fail with an error. + * This method loads the table first, then verifies the partition error. + */ + private void loadAndVerifyPartitionError(String dbName, String tableName, + String partitionName, String expectedErrorMsg) throws CatalogException { + catalog_.getOrLoadTable(dbName, tableName, "test", null); + verifyPartitionError(dbName, tableName, partitionName, expectedErrorMsg); + } + + /** + * Test valid partition retrieval with correct partition keys. + */ + @Test + public void testGetValidPartitions() throws CatalogException { + // Load a partitioned table + HdfsTable table = (HdfsTable) catalog_.getOrLoadTable("functional", "alltypes", + "test", null); + assertNotNull(table); + assertTrue(table.getNumClusteringCols() > 0); + + // Test 1: Valid partition with two keys (year=2009, month=1) + TCatalogObject partitionDesc = createPartitionDescriptor("functional", "alltypes", + "year=2009/month=1"); + TCatalogObject result = catalog_.getTCatalogObject(partitionDesc); + verifyPartitionResult(result, "functional", "alltypes", "year=2009/month=1"); + + // Test 2: Another valid partition (year=2010, month=12) + partitionDesc = createPartitionDescriptor("functional", "alltypes", + "year=2010/month=12"); + result = catalog_.getTCatalogObject(partitionDesc); + verifyPartitionResult(result, "functional", "alltypes", "year=2010/month=12"); + } + + @Test + public void testGetNonExistingPartition() throws CatalogException { + loadAndVerifyPartitionError("functional", "alltypes", "year=9999/month=99", + "Partition not found"); + } + + @Test + public void testGetNonExistingTablePartition() throws CatalogException { + verifyPartitionError("functional", "nonexistenttable", "year=2009/month=1", + "Table not found"); + } + + @Test + public void testInvalidPartitionNames() throws CatalogException { + // Test cases: [partition_name, expected_error_substring] + String[][] testCases = { + {"year2009/month=1", "Invalid partition key-value format"}, + {"year=2009", "expected 2 partition keys, got 1"}, + {"year=2010/month=12/day=1", "expected 2 partition keys, got 3"}, + {"year=2009/day=1", "Invalid partition key 'day'"}, + {"month=1/year=2009", "Invalid partition key 'month'"}, + {"", "Invalid partition name"}, + {null, "Invalid partition name"} + }; + + for (String[] testCase : testCases) { + loadAndVerifyPartitionError("functional", "alltypes", testCase[0], testCase[1]); + } + } + + @Test + public void testGetNonPartitionedTablePartition() throws CatalogException { + loadAndVerifyPartitionError("functional", "alltypesnopart", "year=2009", + "is not a partitioned HDFS table"); + } + + @Test + public void testGetNonHdfsTablePartition() throws CatalogException { + loadAndVerifyPartitionError("functional_hbase", "alltypes", "id=1", + "is not an HDFS table (table type: HBaseTable)"); + } + + /** + * Test partition catalog object retrieval with special characters. + * This test uses a DATE-partitioned table where partition values contain hyphens + * (e.g., "date_part=1970-01-01"). This validates that the parsePartitionName + * function correctly handles special characters like hyphens in partition values. + */ + @Test + public void testGetPartitionWithSpecialCharacters() throws CatalogException { + // Test with date_tbl which has DATE partition column + HdfsTable table = (HdfsTable) catalog_.getOrLoadTable("functional", + "date_tbl", "test", null); + assertNotNull(table); + assertEquals(1, table.getNumClusteringCols()); + + // Get a partition that exists (date_part=1970-01-01) + Collection<? extends FeFsPartition> partitions = table.loadAllPartitions(); + assertFalse(partitions.isEmpty()); + + // Pick a real partition (not the prototype partition) + FeFsPartition partition = null; + for (FeFsPartition p : partitions) { + long partitionId = p.getId(); + if (partitionId != CatalogObjectsConstants.PROTOTYPE_PARTITION_ID) { + partition = p; + break; + } + } + + if (partition != null) { + String partitionName = partition.getPartitionName(); + assertNotNull(partitionName); + assertTrue(partitionName.startsWith("date_part=")); + + TCatalogObject partitionDesc = createPartitionDescriptor("functional", "date_tbl", + partitionName); + TCatalogObject result = catalog_.getTCatalogObject(partitionDesc); + verifyPartitionResult(result, "functional", "date_tbl", partitionName); + } + } + + @Test + public void testGetPartitionFromInvalidatedTable() throws CatalogException { + // Invalidate a table to make it incomplete + Reference<Boolean> tblWasRemoved = new Reference<>(); + Reference<Boolean> dbWasAdded = new Reference<>(); + catalog_.invalidateTable(new TTableName("functional", "alltypes"), + tblWasRemoved, dbWasAdded, NoOpEventSequence.INSTANCE); + + // Try to get partition from incomplete table (without loading) + verifyPartitionError("functional", "alltypes", "year=2009/month=1", + "Table functional.alltypes is not loaded. Please load the table first"); + } + + /** + * Test IncompleteTable (failed to load) returns proper error message. + * Tests the scenario where a table exists but failed to load due to errors + * (e.g., unsupported SerDe). + */ + @Test + public void testGetPartitionFromFailedTable() throws CatalogException { + // Load bad_serde table which is an IncompleteTable due to unsupported SerDe + Table table = catalog_.getOrLoadTable("functional", "bad_serde", "test", null); + assertTrue("Expected IncompleteTable", table instanceof IncompleteTable); + + // Try to get partition from this failed table (without reloading) + verifyPartitionError("functional", "bad_serde", "year=2009/month=1", + "Table functional.bad_serde failed to load: " + + "Failed to load metadata for table: functional.bad_serde"); + } } diff --git a/tests/webserver/test_web_pages.py b/tests/webserver/test_web_pages.py index 1b0658d7b..48558c3fa 100644 --- a/tests/webserver/test_web_pages.py +++ b/tests/webserver/test_web_pages.py @@ -456,6 +456,39 @@ class TestWebPage(ImpalaTestSuite): self.__test_catalog_tables_loading_time(unique_database, "foo_part") self.get_and_check_status(self.EVENT_PROCESSOR_URL, "events-consuming-delay", ports_to_test=self.CATALOG_TEST_PORT) + # Multi-key partitioned table + multi_part_query = "create table {0}.foo_multi_part (id int, val int) " \ + "partitioned by (year int, month int, day int)".format(unique_database) + self.execute_query(multi_part_query) + multi_part_insert_query = "insert into {0}.foo_multi_part partition " \ + "(year=2024, month=12, day=25) values (1, 200)".format(unique_database) + self.execute_query(multi_part_insert_query) + # Table with string partition that contains special characters + slash_part_query = "create table {0}.foo_slash_part (id int, val int) " \ + "partitioned by (ds string)".format(unique_database) + self.execute_query(slash_part_query) + slash_part_insert_query = "insert into {0}.foo_slash_part partition " \ + "(ds='2024/12/25') values (1, 200)".format(unique_database) + self.execute_query(slash_part_insert_query) + + # Test partition catalog objects (IMPALA-9935) + self.__test_catalog_partition_object(unique_database, "foo_part", "year=2010", + cluster_properties) + self.__test_json_partition_object(unique_database, "foo_part", "year=2010", + cluster_properties) + # Test multi-key partition + self.__test_catalog_partition_object(unique_database, "foo_multi_part", + "year=2024/month=12/day=25", cluster_properties) + self.__test_json_partition_object(unique_database, "foo_multi_part", + "year=2024/month=12/day=25", cluster_properties) + # Test partition value with slash + # Note: Pass the pre-encoded partition name that matches Hive's HDFS directory format. + # Hive stores "ds=2024/12/25" as directory "ds=2024%2F12%2F25" in HDFS. + # The test methods will URL-encode this again for HTTP transmission (double-encoding). + self.__test_catalog_partition_object(unique_database, "foo_slash_part", + "ds=2024%2F12%2F25", cluster_properties) + self.__test_json_partition_object(unique_database, "foo_slash_part", + "ds=2024%2F12%2F25", cluster_properties) def __test_catalog_object(self, db_name, tbl_name, cluster_properties): """Tests the /catalog_object endpoint for the given db/table. Runs @@ -533,6 +566,86 @@ class TestWebPage(ImpalaTestSuite): assert "partitions" in hdfs_tbl_obj assert "prototype_partition" in hdfs_tbl_obj + def __verify_catalog_partition_html_response(self, response_text): + """Verify HTML catalog partition response contains expected Thrift structures.""" + assert "CatalogException" not in response_text, \ + "Response should not contain error: " + response_text[:200] + assert "TCatalogObject" in response_text, "Response should contain TCatalogObject" + assert "THdfsPartition" in response_text, "Response should contain THdfsPartition" + assert "THdfsStorageDescriptor" in response_text, \ + "Response should contain THdfsStorageDescriptor" + + def __test_catalog_partition_object(self, db_name, tbl_name, partition_name, + cluster_properties): + """Tests the /catalog_object endpoint for the given db/table/partition.""" + import urllib + # URL encode the entire object name (db.table:partition). + # This is necessary when partition values contain slashes (e.g., "ds=2024/12/25"). + object_name = "{0}.{1}:{2}".format(db_name, tbl_name, partition_name) + encoded_object_name = urllib.parse.quote(object_name, safe='') + obj_url = self.CATALOG_OBJECT_URL + \ + "?object_type=HDFS_PARTITION&object_name={0}".format(encoded_object_name) + + # Make sure the table is loaded + self.client.execute("describe %s.%s" % (db_name, tbl_name)) + + if cluster_properties.is_catalog_v2_cluster(): + # In Catalog V2 (local catalog), endpoint only works on catalogd + responses = self.get_and_check_status(obj_url, partition_name, + ports_to_test=self.CATALOG_TEST_PORT) + self.__verify_catalog_partition_html_response(responses[0].text) + # Catalog object endpoint is disabled in local catalog mode on impalad + impalad_expected_str = "No URI handler for '/catalog_object'" + self.check_endpoint_is_disabled(obj_url, impalad_expected_str, + ports_to_test=self.IMPALAD_TEST_PORT) + else: + # In Catalog V1, endpoint works on both catalogd and impalad + responses = self.get_and_check_status(obj_url, partition_name, + ports_to_test=self.CATALOG_TEST_PORT) + self.__verify_catalog_partition_html_response(responses[0].text) + + responses = self.get_and_check_status(obj_url, partition_name, + ports_to_test=self.IMPALAD_TEST_PORT) + self.__verify_catalog_partition_html_response(responses[0].text) + + def __test_json_partition_object(self, db_name, tbl_name, partition_name, + cluster_properties): + """Tests the /catalog_object?json endpoint for the given db/table/partition.""" + import urllib + # URL encode the entire object name (db.table:partition). + # This is necessary when partition values contain slashes (e.g., "ds=2024/12/25"). + object_name = "{0}.{1}:{2}".format(db_name, tbl_name, partition_name) + encoded_object_name = urllib.parse.quote(object_name, safe='') + obj_url = self.CATALOG_OBJECT_URL + \ + "?json&object_type=HDFS_PARTITION&object_name={0}".format(encoded_object_name) + + # Make sure the table is loaded + self.client.execute("describe %s.%s" % (db_name, tbl_name)) + + # Test catalogd endpoint (works in both V1 and V2) + responses = self.get_and_check_status(obj_url, ports_to_test=self.CATALOG_TEST_PORT) + response_json = json.loads(responses[0].text) + assert "json_string" in response_json, "Response should contain json_string" + obj = json.loads(response_json["json_string"]) + assert obj["type"] == 11, "type should be HDFS_PARTITION (11)" + assert "catalog_version" in obj, "TCatalogObject should have catalog_version" + part_obj = obj["hdfs_partition"] + assert part_obj["db_name"] == db_name + assert part_obj["tbl_name"] == tbl_name + assert part_obj["partition_name"] == partition_name + + # In Catalog V1, also test impalad endpoint + if not cluster_properties.is_catalog_v2_cluster(): + responses = self.get_and_check_status(obj_url, ports_to_test=self.IMPALAD_TEST_PORT) + response_json = json.loads(responses[0].text) + assert "json_string" in response_json, "Response should contain json_string" + obj = json.loads(response_json["json_string"]) + assert obj["type"] == 11, "type should be HDFS_PARTITION (11)" + part_obj = obj["hdfs_partition"] + assert part_obj["db_name"] == db_name + assert part_obj["tbl_name"] == tbl_name + assert part_obj["partition_name"] == partition_name + def check_endpoint_is_disabled(self, url, string_to_search="", ports_to_test=None): """Helper method that verifies the given url does not exist.""" if ports_to_test is None:
