Author: pauly
Date: Mon Jun 20 23:47:04 2011
New Revision: 1137826
URL: http://svn.apache.org/viewvc?rev=1137826&view=rev
Log:
HIVE-2213. Optimize partial specification metastore functions (Sohan Jain via
pauly)
Modified:
hive/trunk/common/src/java/org/apache/hadoop/hive/common/FileUtils.java
hive/trunk/metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java
hive/trunk/metastore/src/java/org/apache/hadoop/hive/metastore/ObjectStore.java
hive/trunk/metastore/src/java/org/apache/hadoop/hive/metastore/RawStore.java
hive/trunk/metastore/src/java/org/apache/hadoop/hive/metastore/Warehouse.java
hive/trunk/metastore/src/test/org/apache/hadoop/hive/metastore/TestHiveMetaStore.java
Modified:
hive/trunk/common/src/java/org/apache/hadoop/hive/common/FileUtils.java
URL:
http://svn.apache.org/viewvc/hive/trunk/common/src/java/org/apache/hadoop/hive/common/FileUtils.java?rev=1137826&r1=1137825&r2=1137826&view=diff
==============================================================================
--- hive/trunk/common/src/java/org/apache/hadoop/hive/common/FileUtils.java
(original)
+++ hive/trunk/common/src/java/org/apache/hadoop/hive/common/FileUtils.java Mon
Jun 20 23:47:04 2011
@@ -94,15 +94,27 @@ public final class FileUtils {
public static String makePartName(List<String> partCols, List<String> vals) {
+ return makePartName(partCols, vals, null);
+ }
+ /**
+ * Makes a valid partition name.
+ * @param partCols The partition keys' names
+ * @param vals The partition values
+ * @param defaultStr
+ * The default name given to a partition value if the respective
value is empty or null.
+ * @return An escaped, valid partition name.
+ */
+ public static String makePartName(List<String> partCols, List<String> vals,
+ String defaultStr) {
StringBuilder name = new StringBuilder();
for (int i = 0; i < partCols.size(); i++) {
if (i > 0) {
name.append(Path.SEPARATOR);
}
- name.append(escapePathName((partCols.get(i)).toLowerCase()));
+ name.append(escapePathName((partCols.get(i)).toLowerCase(), defaultStr));
name.append('=');
- name.append(escapePathName(vals.get(i)));
+ name.append(escapePathName(vals.get(i), defaultStr));
}
return name.toString();
}
@@ -121,7 +133,7 @@ public final class FileUtils {
for (char c = 0; c < ' '; c++) {
charToEscape.set(c);
}
-
+
/**
* ASCII 01-1F are HTTP control characters that need to be escaped.
* \u000A and \u000D are \n and \r, respectively.
@@ -143,11 +155,28 @@ public final class FileUtils {
}
public static String escapePathName(String path) {
+ return escapePathName(path, null);
+ }
- // __HIVE_DEFAULT_NULL__ is the system default value for null and empty
string. We should
+ /**
+ * Escapes a path name.
+ * @param path The path to escape.
+ * @param defaultPath
+ * The default name for the path, if the given path is empty or
null.
+ * @return An escaped path name.
+ */
+ public static String escapePathName(String path, String defaultPath) {
+
+ // __HIVE_DEFAULT_NULL__ is the system default value for null and empty
string.
// TODO: we should allow user to specify default partition or HDFS file
location.
if (path == null || path.length() == 0) {
- return "__HIVE_DEFAULT_PARTITION__";
+ if (defaultPath == null) {
+ //previously, when path is empty or null and no default path is
specified,
+ // __HIVE_DEFAULT_PARTITION__ was the return value for escapePathName
+ return "__HIVE_DEFAULT_PARTITION__";
+ } else {
+ return defaultPath;
+ }
}
StringBuilder sb = new StringBuilder();
Modified:
hive/trunk/metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java
URL:
http://svn.apache.org/viewvc/hive/trunk/metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java?rev=1137826&r1=1137825&r2=1137826&view=diff
==============================================================================
---
hive/trunk/metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java
(original)
+++
hive/trunk/metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java
Mon Jun 20 23:47:04 2011
@@ -2133,7 +2133,7 @@ public class HiveMetaStore extends Thrif
final short max_parts) throws MetaException, TException {
startPartitionFunction("get_partitions_ps", db_name, tbl_name,
part_vals);
try {
- return this.get_partitions_ps_with_auth(db_name, tbl_name, part_vals,
+ return get_partitions_ps_with_auth(db_name, tbl_name, part_vals,
max_parts, null, null);
}
finally {
@@ -2148,32 +2148,26 @@ public class HiveMetaStore extends Thrif
final List<String> groupNames) throws MetaException, TException {
startPartitionFunction("get_partitions_ps_with_auth", db_name, tbl_name,
part_vals);
- List<Partition> parts = null;
- List<Partition> matchingParts = new ArrayList<Partition>();
-
+ List<Partition> ret;
try {
- // This gets all the partitions and then filters based on the specified
- // criteria. An alternative approach would be to get all the partition
- // names, do the filtering on the names, and get the partition for each
- // of the names. that match.
-
- try {
- parts = get_partitions(db_name, tbl_name, (short) -1);
- } catch (NoSuchObjectException e) {
- throw new MetaException(e.getMessage());
- }
-
- for (Partition p : parts) {
- if (MetaStoreUtils.pvalMatches(part_vals, p.getValues())) {
- matchingParts.add(p);
+ ret = executeWithRetry(new Command<List<Partition>>() {
+ @Override
+ public List<Partition> run(RawStore ms) throws Exception {
+ return ms.listPartitionsPsWithAuth(db_name, tbl_name, part_vals,
max_parts,
+ userName, groupNames);
}
- }
-
- return matchingParts;
- }
- finally {
+ });
+ } catch (MetaException e) {
+ throw e;
+ } catch (InvalidObjectException e) {
+ throw new MetaException(e.getMessage());
+ } catch (Exception e) {
+ assert(e instanceof RuntimeException);
+ throw (RuntimeException)e;
+ } finally {
endFunction("get_partitions_ps_with_auth");
}
+ return ret;
}
@Override
@@ -2181,34 +2175,23 @@ public class HiveMetaStore extends Thrif
final String tbl_name, final List<String> part_vals, final short
max_parts)
throws MetaException, TException {
startPartitionFunction("get_partitions_names_ps", db_name, tbl_name,
part_vals);
+ List<String> ret;
try {
- Table t;
- try {
- t = get_table(db_name, tbl_name);
- } catch (NoSuchObjectException e) {
- throw new MetaException(e.getMessage());
- }
-
- List<String> partNames = get_partition_names(db_name, tbl_name,
max_parts);
- List<String> filteredPartNames = new ArrayList<String>();
-
- for(String name : partNames) {
- LinkedHashMap<String, String> spec =
Warehouse.makeSpecFromName(name);
- List<String> vals = new ArrayList<String>();
- // Since we are iterating through a LinkedHashMap, iteration should
- // return the partition values in the correct order for comparison.
- for (String val : spec.values()) {
- vals.add(val);
- }
- if (MetaStoreUtils.pvalMatches(part_vals, vals)) {
- filteredPartNames.add(name);
+ ret = executeWithRetry(new Command<List<String>>() {
+ @Override
+ public List<String> run(RawStore ms) throws Exception {
+ return ms.listPartitionNamesPs(db_name, tbl_name, part_vals,
max_parts);
}
- }
-
- return filteredPartNames;
+ });
+ } catch (MetaException e) {
+ throw e;
+ } catch (Exception e) {
+ assert(e instanceof RuntimeException);
+ throw (RuntimeException)e;
} finally {
endFunction("get_partitions_names_ps");
}
+ return ret;
}
@Override
Modified:
hive/trunk/metastore/src/java/org/apache/hadoop/hive/metastore/ObjectStore.java
URL:
http://svn.apache.org/viewvc/hive/trunk/metastore/src/java/org/apache/hadoop/hive/metastore/ObjectStore.java?rev=1137826&r1=1137825&r2=1137826&view=diff
==============================================================================
---
hive/trunk/metastore/src/java/org/apache/hadoop/hive/metastore/ObjectStore.java
(original)
+++
hive/trunk/metastore/src/java/org/apache/hadoop/hive/metastore/ObjectStore.java
Mon Jun 20 23:47:04 2011
@@ -1282,6 +1282,120 @@ public class ObjectStore implements RawS
return pns;
}
+ /**
+ * Retrieves a Collection of partition-related results from the database
that match
+ * the partial specification given for a specific table.
+ * @param dbName the name of the database
+ * @param tableName the name of the table
+ * @param part_vals the partial specification values
+ * @param max_parts the maximum number of partitions to return
+ * @param resultsCol the metadata column of the data to return, e.g.
partitionName, etc.
+ * if resultsCol is empty or null, a collection of MPartition objects
is returned
+ * @results A Collection of partition-related items from the db that match
the partial spec
+ * for a table. The type of each item in the collection
corresponds to the column
+ * you want results for. E.g., if resultsCol is partitionName, the
Collection
+ * has types of String, and if resultsCol is null, the types are
MPartition.
+ */
+ private Collection getPartitionPsQueryResults(String dbName, String
tableName,
+ List<String> part_vals, short max_parts, String resultsCol)
+ throws MetaException {
+ dbName = dbName.toLowerCase().trim();
+ tableName = tableName.toLowerCase().trim();
+ Table table = getTable(dbName, tableName);
+
+ List<FieldSchema> partCols = table.getPartitionKeys();
+ int numPartKeys = partCols.size();
+ if (part_vals.size() > numPartKeys) {
+ throw new MetaException("Incorrect number of partition values");
+ }
+
+ partCols = partCols.subList(0, part_vals.size());
+ //Construct a pattern of the form: partKey=partVal/partKey2=partVal2/...
+ // where partVal is either the escaped partition value given as input,
+ // or a regex of the form ".*"
+ //This works because the "=" and "/" separating key names and partition
key/values
+ // are not escaped.
+ String partNameMatcher = Warehouse.makePartName(partCols, part_vals, ".*");
+ //add ".*" to the regex to match anything else afterwards the partial spec.
+ if (part_vals.size() < numPartKeys) {
+ partNameMatcher += ".*";
+ }
+
+ Query q = pm.newQuery(MPartition.class);
+ StringBuilder queryFilter = new StringBuilder("table.database.name ==
dbName");
+ queryFilter.append(" && table.tableName == tableName");
+ queryFilter.append(" && partitionName.matches(partialRegex)");
+ q.setFilter(queryFilter.toString());
+ q.declareParameters("java.lang.String dbName, " +
+ "java.lang.String tableName, java.lang.String partialRegex");
+
+ if( max_parts >= 0 ) {
+ //User specified a row limit, set it on the Query
+ q.setRange(0, max_parts);
+ }
+ if (resultsCol != null && !resultsCol.isEmpty()) {
+ q.setResult(resultsCol);
+ }
+
+ return (Collection) q.execute(dbName, tableName, partNameMatcher);
+ }
+
+ @Override
+ public List<Partition> listPartitionsPsWithAuth(String db_name, String
tbl_name,
+ List<String> part_vals, short max_parts, String userName, List<String>
groupNames)
+ throws MetaException, InvalidObjectException {
+ List<Partition> partitions = new ArrayList<Partition>();
+ boolean success = false;
+ try {
+ openTransaction();
+ LOG.debug("executing listPartitionNamesPsWithAuth");
+ Collection parts = getPartitionPsQueryResults(db_name, tbl_name,
+ part_vals, max_parts, null);
+ MTable mtbl = getMTable(db_name, tbl_name);
+ for (Object o : parts) {
+ Partition part = convertToPart((MPartition) o);
+ //set auth privileges
+ if (null != userName && null != groupNames &&
+
"TRUE".equalsIgnoreCase(mtbl.getParameters().get("PARTITION_LEVEL_PRIVILEGE")))
{
+ String partName =
Warehouse.makePartName(this.convertToFieldSchemas(mtbl
+ .getPartitionKeys()), part.getValues());
+ PrincipalPrivilegeSet partAuth = getPartitionPrivilegeSet(db_name,
+ tbl_name, partName, userName, groupNames);
+ part.setPrivileges(partAuth);
+ }
+ partitions.add(part);
+ }
+ success = commitTransaction();
+ } finally {
+ if (!success) {
+ rollbackTransaction();
+ }
+ }
+ return partitions;
+ }
+
+ @Override
+ public List<String> listPartitionNamesPs(String dbName, String tableName,
+ List<String> part_vals, short max_parts) throws MetaException {
+ List<String> partitionNames = new ArrayList<String>();
+ boolean success = false;
+ try {
+ openTransaction();
+ LOG.debug("Executing listPartitionNamesPs");
+ Collection names = getPartitionPsQueryResults(dbName, tableName,
+ part_vals, max_parts, "partitionName");
+ for (Object o : names) {
+ partitionNames.add((String) o);
+ }
+ success = commitTransaction();
+ } finally {
+ if (!success) {
+ rollbackTransaction();
+ }
+ }
+ return partitionNames;
+ }
+
// TODO:pc implement max
private List<MPartition> listMPartitions(String dbName, String tableName,
int max) {
@@ -1484,7 +1598,6 @@ public class ObjectStore implements RawS
Map<String, String> params = new HashMap<String, String>();
String queryFilterString =
makeQueryFilterString(mtable, filter, params);
-
Query query = pm.newQuery(
"select partitionName from
org.apache.hadoop.hive.metastore.model.MPartition "
+ "where " + queryFilterString);
Modified:
hive/trunk/metastore/src/java/org/apache/hadoop/hive/metastore/RawStore.java
URL:
http://svn.apache.org/viewvc/hive/trunk/metastore/src/java/org/apache/hadoop/hive/metastore/RawStore.java?rev=1137826&r1=1137825&r2=1137826&view=diff
==============================================================================
---
hive/trunk/metastore/src/java/org/apache/hadoop/hive/metastore/RawStore.java
(original)
+++
hive/trunk/metastore/src/java/org/apache/hadoop/hive/metastore/RawStore.java
Mon Jun 20 23:47:04 2011
@@ -241,6 +241,49 @@ public interface RawStore extends Config
public abstract List<Partition> getPartitionsWithAuth(String dbName,
String tblName, short maxParts, String userName, List<String> groupNames)
- throws MetaException, NoSuchObjectException, InvalidObjectException;;
+ throws MetaException, NoSuchObjectException, InvalidObjectException;
-}
+ /**
+ * Lists partition names that match a given partial specification
+ * @param db_name
+ * The name of the database which has the partitions
+ * @param tbl_name
+ * The name of the table which has the partitions
+ * @param part_vals
+ * A partial list of values for partitions in order of the table's
partition keys.
+ * Entries can be empty if you only want to specify latter
partitions.
+ * @param max_parts
+ * The maximum number of partitions to return
+ * @return A list of partition names that match the partial spec.
+ * @throws MetaException
+ * @throws NoSuchObjectException
+ */
+ public abstract List<String> listPartitionNamesPs(String db_name, String
tbl_name,
+ List<String> part_vals, short max_parts)
+ throws MetaException;
+
+ /**
+ * Lists partitions that match a given partial specification and sets their
auth privileges.
+ * If userName and groupNames null, then no auth privileges are set.
+ * @param db_name
+ * The name of the database which has the partitions
+ * @param tbl_name
+ * The name of the table which has the partitions
+ * @param part_vals
+ * A partial list of values for partitions in order of the table's
partition keys
+ * Entries can be empty if you need to specify latter partitions.
+ * @param max_parts
+ * The maximum number of partitions to return
+ * @param userName
+ * The user name for the partition for authentication privileges
+ * @param groupNames
+ * The groupNames for the partition for authentication privileges
+ * @return A list of partitions that match the partial spec.
+ * @throws MetaException
+ * @throws NoSuchObjectException
+ * @throws InvalidObjectException
+ */
+ public abstract List<Partition> listPartitionsPsWithAuth(String db_name,
String tbl_name,
+ List<String> part_vals, short max_parts, String userName, List<String>
groupNames)
+ throws MetaException, InvalidObjectException;
+}
\ No newline at end of file
Modified:
hive/trunk/metastore/src/java/org/apache/hadoop/hive/metastore/Warehouse.java
URL:
http://svn.apache.org/viewvc/hive/trunk/metastore/src/java/org/apache/hadoop/hive/metastore/Warehouse.java?rev=1137826&r1=1137825&r2=1137826&view=diff
==============================================================================
---
hive/trunk/metastore/src/java/org/apache/hadoop/hive/metastore/Warehouse.java
(original)
+++
hive/trunk/metastore/src/java/org/apache/hadoop/hive/metastore/Warehouse.java
Mon Jun 20 23:47:04 2011
@@ -390,6 +390,20 @@ public class Warehouse {
public static String makePartName(List<FieldSchema> partCols,
List<String> vals) throws MetaException {
+ return makePartName(partCols, vals, null);
+ }
+
+ /**
+ * Makes a valid partition name.
+ * @param partCols The partition columns
+ * @param vals The partition values
+ * @param defaultStr
+ * The default name given to a partition value if the respective value is
empty or null.
+ * @return An escaped, valid partition name.
+ * @throws MetaException
+ */
+ public static String makePartName(List<FieldSchema> partCols,
+ List<String> vals, String defaultStr) throws MetaException {
if ((partCols.size() != vals.size()) || (partCols.size() == 0)) {
throw new MetaException("Invalid partition key & values");
}
@@ -397,7 +411,7 @@ public class Warehouse {
for (FieldSchema col: partCols) {
colNames.add(col.getName());
}
- return FileUtils.makePartName(colNames, vals);
+ return FileUtils.makePartName(colNames, vals, defaultStr);
}
public static List<String> getPartValuesFromPartName(String partName)
Modified:
hive/trunk/metastore/src/test/org/apache/hadoop/hive/metastore/TestHiveMetaStore.java
URL:
http://svn.apache.org/viewvc/hive/trunk/metastore/src/test/org/apache/hadoop/hive/metastore/TestHiveMetaStore.java?rev=1137826&r1=1137825&r2=1137826&view=diff
==============================================================================
---
hive/trunk/metastore/src/test/org/apache/hadoop/hive/metastore/TestHiveMetaStore.java
(original)
+++
hive/trunk/metastore/src/test/org/apache/hadoop/hive/metastore/TestHiveMetaStore.java
Mon Jun 20 23:47:04 2011
@@ -251,6 +251,7 @@ public abstract class TestHiveMetaStore
String partName = "ds=2008-07-01 14%3A13%3A12/hr=14";
String part2Name = "ds=2008-07-01 14%3A13%3A12/hr=15";
String part3Name ="ds=2008-07-02 14%3A13%3A12/hr=15";
+ String part4Name ="ds=2008-07-03 14%3A13%3A12/hr=151";
part_get = client.getPartition(dbName, tblName, partName);
assertTrue("Partitions are not the same", part.equals(part_get));
@@ -275,6 +276,14 @@ public abstract class TestHiveMetaStore
assertTrue("Should have returned 2 partition names", partialNames.size()
== 2);
assertTrue("Not all part names returned",
partialNames.containsAll(partNames));
+ partNames.add(part3Name);
+ partNames.add(part4Name);
+ partialVals.clear();
+ partialVals.add("");
+ partialNames = client.listPartitionNames(dbName, tblName, partialVals,
(short) -1);
+ assertTrue("Should have returned 4 partition names", partialNames.size()
== 4);
+ assertTrue("Not all part names returned",
partialNames.containsAll(partNames));
+
// Test partition listing with a partial spec - hr is specified but ds
is not
parts.clear();
parts.add(part2);