Author: pauly
Date: Mon Jun 20 23:47:04 2011
New Revision: 1137826

URL: http://svn.apache.org/viewvc?rev=1137826&view=rev
Log:
HIVE-2213. Optimize partial specification metastore functions (Sohan Jain via 
pauly)


Modified:
    hive/trunk/common/src/java/org/apache/hadoop/hive/common/FileUtils.java
    
hive/trunk/metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java
    
hive/trunk/metastore/src/java/org/apache/hadoop/hive/metastore/ObjectStore.java
    hive/trunk/metastore/src/java/org/apache/hadoop/hive/metastore/RawStore.java
    
hive/trunk/metastore/src/java/org/apache/hadoop/hive/metastore/Warehouse.java
    
hive/trunk/metastore/src/test/org/apache/hadoop/hive/metastore/TestHiveMetaStore.java

Modified: 
hive/trunk/common/src/java/org/apache/hadoop/hive/common/FileUtils.java
URL: 
http://svn.apache.org/viewvc/hive/trunk/common/src/java/org/apache/hadoop/hive/common/FileUtils.java?rev=1137826&r1=1137825&r2=1137826&view=diff
==============================================================================
--- hive/trunk/common/src/java/org/apache/hadoop/hive/common/FileUtils.java 
(original)
+++ hive/trunk/common/src/java/org/apache/hadoop/hive/common/FileUtils.java Mon 
Jun 20 23:47:04 2011
@@ -94,15 +94,27 @@ public final class FileUtils {
 
 
   public static String makePartName(List<String> partCols, List<String> vals) {
+    return makePartName(partCols, vals, null);
+  }
 
+  /**
+   * Makes a valid partition name.
+   * @param partCols The partition keys' names
+   * @param vals The partition values
+   * @param defaultStr
+   *         The default name given to a partition value if the respective 
value is empty or null.
+   * @return An escaped, valid partition name.
+   */
+  public static String makePartName(List<String> partCols, List<String> vals,
+      String defaultStr) {
     StringBuilder name = new StringBuilder();
     for (int i = 0; i < partCols.size(); i++) {
       if (i > 0) {
         name.append(Path.SEPARATOR);
       }
-      name.append(escapePathName((partCols.get(i)).toLowerCase()));
+      name.append(escapePathName((partCols.get(i)).toLowerCase(), defaultStr));
       name.append('=');
-      name.append(escapePathName(vals.get(i)));
+      name.append(escapePathName(vals.get(i), defaultStr));
     }
     return name.toString();
   }
@@ -121,7 +133,7 @@ public final class FileUtils {
     for (char c = 0; c < ' '; c++) {
       charToEscape.set(c);
     }
-    
+
     /**
      * ASCII 01-1F are HTTP control characters that need to be escaped.
      * \u000A and \u000D are \n and \r, respectively.
@@ -143,11 +155,28 @@ public final class FileUtils {
   }
 
   public static String escapePathName(String path) {
+    return escapePathName(path, null);
+  }
 
-    // __HIVE_DEFAULT_NULL__ is the system default value for null and empty 
string. We should
+  /**
+   * Escapes a path name.
+   * @param path The path to escape.
+   * @param defaultPath
+   *          The default name for the path, if the given path is empty or 
null.
+   * @return An escaped path name.
+   */
+  public static String escapePathName(String path, String defaultPath) {
+
+    // __HIVE_DEFAULT_NULL__ is the system default value for null and empty 
string.
     // TODO: we should allow user to specify default partition or HDFS file 
location.
     if (path == null || path.length() == 0) {
-      return "__HIVE_DEFAULT_PARTITION__";
+      if (defaultPath == null) {
+        //previously, when path is empty or null and no default path is 
specified,
+        // __HIVE_DEFAULT_PARTITION__ was the return value for escapePathName
+        return "__HIVE_DEFAULT_PARTITION__";
+      } else {
+        return defaultPath;
+      }
     }
 
     StringBuilder sb = new StringBuilder();

Modified: 
hive/trunk/metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java
URL: 
http://svn.apache.org/viewvc/hive/trunk/metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java?rev=1137826&r1=1137825&r2=1137826&view=diff
==============================================================================
--- 
hive/trunk/metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java
 (original)
+++ 
hive/trunk/metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java
 Mon Jun 20 23:47:04 2011
@@ -2133,7 +2133,7 @@ public class HiveMetaStore extends Thrif
         final short max_parts) throws MetaException, TException {
       startPartitionFunction("get_partitions_ps", db_name, tbl_name, 
part_vals);
       try {
-        return this.get_partitions_ps_with_auth(db_name, tbl_name, part_vals,
+        return get_partitions_ps_with_auth(db_name, tbl_name, part_vals,
             max_parts, null, null);
       }
       finally {
@@ -2148,32 +2148,26 @@ public class HiveMetaStore extends Thrif
         final List<String> groupNames) throws MetaException, TException {
       startPartitionFunction("get_partitions_ps_with_auth", db_name, tbl_name,
           part_vals);
-      List<Partition> parts = null;
-      List<Partition> matchingParts = new ArrayList<Partition>();
-
+      List<Partition> ret;
       try {
-        // This gets all the partitions and then filters based on the specified
-        // criteria. An alternative approach would be to get all the partition
-        // names, do the filtering on the names, and get the partition for each
-        // of the names. that match.
-
-        try {
-           parts = get_partitions(db_name, tbl_name, (short) -1);
-        } catch (NoSuchObjectException e) {
-          throw new MetaException(e.getMessage());
-        }
-
-        for (Partition p : parts) {
-          if (MetaStoreUtils.pvalMatches(part_vals, p.getValues())) {
-            matchingParts.add(p);
+        ret = executeWithRetry(new Command<List<Partition>>() {
+          @Override
+          public List<Partition> run(RawStore ms) throws Exception {
+            return ms.listPartitionsPsWithAuth(db_name, tbl_name, part_vals, 
max_parts,
+                userName, groupNames);
           }
-        }
-
-        return matchingParts;
-      }
-      finally {
+        });
+      } catch (MetaException e) {
+        throw e;
+      } catch (InvalidObjectException e) {
+         throw new MetaException(e.getMessage());
+      } catch (Exception e) {
+        assert(e instanceof RuntimeException);
+        throw (RuntimeException)e;
+      } finally {
         endFunction("get_partitions_ps_with_auth");
       }
+      return ret;
     }
 
     @Override
@@ -2181,34 +2175,23 @@ public class HiveMetaStore extends Thrif
         final String tbl_name, final List<String> part_vals, final short 
max_parts)
         throws MetaException, TException {
       startPartitionFunction("get_partitions_names_ps", db_name, tbl_name, 
part_vals);
+      List<String> ret;
       try {
-        Table t;
-        try {
-          t = get_table(db_name, tbl_name);
-        } catch (NoSuchObjectException e) {
-          throw new MetaException(e.getMessage());
-        }
-
-       List<String> partNames = get_partition_names(db_name, tbl_name, 
max_parts);
-       List<String> filteredPartNames = new ArrayList<String>();
-
-        for(String name : partNames) {
-          LinkedHashMap<String, String> spec = 
Warehouse.makeSpecFromName(name);
-          List<String> vals = new ArrayList<String>();
-          // Since we are iterating through a LinkedHashMap, iteration should
-          // return the partition values in the correct order for comparison.
-          for (String val : spec.values()) {
-            vals.add(val);
-          }
-          if (MetaStoreUtils.pvalMatches(part_vals, vals)) {
-            filteredPartNames.add(name);
+        ret = executeWithRetry(new Command<List<String>>() {
+          @Override
+          public List<String> run(RawStore ms) throws Exception {
+            return ms.listPartitionNamesPs(db_name, tbl_name, part_vals, 
max_parts);
           }
-        }
-
-        return filteredPartNames;
+        });
+      } catch (MetaException e) {
+        throw e;
+      } catch (Exception e) {
+        assert(e instanceof RuntimeException);
+        throw (RuntimeException)e;
       } finally {
         endFunction("get_partitions_names_ps");
       }
+      return ret;
     }
 
     @Override

Modified: 
hive/trunk/metastore/src/java/org/apache/hadoop/hive/metastore/ObjectStore.java
URL: 
http://svn.apache.org/viewvc/hive/trunk/metastore/src/java/org/apache/hadoop/hive/metastore/ObjectStore.java?rev=1137826&r1=1137825&r2=1137826&view=diff
==============================================================================
--- 
hive/trunk/metastore/src/java/org/apache/hadoop/hive/metastore/ObjectStore.java 
(original)
+++ 
hive/trunk/metastore/src/java/org/apache/hadoop/hive/metastore/ObjectStore.java 
Mon Jun 20 23:47:04 2011
@@ -1282,6 +1282,120 @@ public class ObjectStore implements RawS
     return pns;
   }
 
+  /**
+   * Retrieves a Collection of partition-related results from the database 
that match
+   *  the partial specification given for a specific table.
+   * @param dbName the name of the database
+   * @param tableName the name of the table
+   * @param part_vals the partial specification values
+   * @param max_parts the maximum number of partitions to return
+   * @param resultsCol the metadata column of the data to return, e.g. 
partitionName, etc.
+   *        if resultsCol is empty or null, a collection of MPartition objects 
is returned
+   * @results A Collection of partition-related items from the db that match 
the partial spec
+   *          for a table.  The type of each item in the collection 
corresponds to the column
+   *          you want results for.  E.g., if resultsCol is partitionName, the 
Collection
+   *          has types of String, and if resultsCol is null, the types are 
MPartition.
+   */
+  private Collection getPartitionPsQueryResults(String dbName, String 
tableName,
+      List<String> part_vals, short max_parts, String resultsCol)
+      throws MetaException {
+    dbName = dbName.toLowerCase().trim();
+    tableName = tableName.toLowerCase().trim();
+    Table table = getTable(dbName, tableName);
+
+    List<FieldSchema> partCols = table.getPartitionKeys();
+    int numPartKeys = partCols.size();
+    if (part_vals.size() > numPartKeys) {
+      throw new MetaException("Incorrect number of partition values");
+    }
+
+    partCols = partCols.subList(0, part_vals.size());
+    //Construct a pattern of the form: partKey=partVal/partKey2=partVal2/...
+    // where partVal is either the escaped partition value given as input,
+    // or a regex of the form ".*"
+    //This works because the "=" and "/" separating key names and partition 
key/values
+    // are not escaped.
+    String partNameMatcher = Warehouse.makePartName(partCols, part_vals, ".*");
+    //add ".*" to the regex to match anything else afterwards the partial spec.
+    if (part_vals.size() < numPartKeys) {
+      partNameMatcher += ".*";
+    }
+
+    Query q = pm.newQuery(MPartition.class);
+    StringBuilder queryFilter = new StringBuilder("table.database.name == 
dbName");
+    queryFilter.append(" && table.tableName == tableName");
+    queryFilter.append(" && partitionName.matches(partialRegex)");
+    q.setFilter(queryFilter.toString());
+    q.declareParameters("java.lang.String dbName, " +
+        "java.lang.String tableName, java.lang.String partialRegex");
+
+    if( max_parts >= 0 ) {
+      //User specified a row limit, set it on the Query
+      q.setRange(0, max_parts);
+    }
+    if (resultsCol != null && !resultsCol.isEmpty()) {
+      q.setResult(resultsCol);
+    }
+
+    return (Collection) q.execute(dbName, tableName, partNameMatcher);
+  }
+
+  @Override
+  public List<Partition> listPartitionsPsWithAuth(String db_name, String 
tbl_name,
+      List<String> part_vals, short max_parts, String userName, List<String> 
groupNames)
+      throws MetaException, InvalidObjectException {
+    List<Partition> partitions = new ArrayList<Partition>();
+    boolean success = false;
+    try {
+      openTransaction();
+      LOG.debug("executing listPartitionNamesPsWithAuth");
+      Collection parts = getPartitionPsQueryResults(db_name, tbl_name,
+          part_vals, max_parts, null);
+      MTable mtbl = getMTable(db_name, tbl_name);
+      for (Object o : parts) {
+        Partition part = convertToPart((MPartition) o);
+        //set auth privileges
+        if (null != userName && null != groupNames &&
+            
"TRUE".equalsIgnoreCase(mtbl.getParameters().get("PARTITION_LEVEL_PRIVILEGE"))) 
{
+          String partName = 
Warehouse.makePartName(this.convertToFieldSchemas(mtbl
+              .getPartitionKeys()), part.getValues());
+          PrincipalPrivilegeSet partAuth = getPartitionPrivilegeSet(db_name,
+              tbl_name, partName, userName, groupNames);
+          part.setPrivileges(partAuth);
+        }
+        partitions.add(part);
+      }
+      success = commitTransaction();
+    } finally {
+      if (!success) {
+        rollbackTransaction();
+      }
+    }
+    return partitions;
+  }
+
+  @Override
+  public List<String> listPartitionNamesPs(String dbName, String tableName,
+      List<String> part_vals, short max_parts) throws MetaException {
+    List<String> partitionNames = new ArrayList<String>();
+    boolean success = false;
+    try {
+      openTransaction();
+      LOG.debug("Executing listPartitionNamesPs");
+      Collection names = getPartitionPsQueryResults(dbName, tableName,
+          part_vals, max_parts, "partitionName");
+      for (Object o : names) {
+        partitionNames.add((String) o);
+      }
+      success = commitTransaction();
+    } finally {
+      if (!success) {
+        rollbackTransaction();
+      }
+    }
+    return partitionNames;
+  }
+
   // TODO:pc implement max
   private List<MPartition> listMPartitions(String dbName, String tableName,
       int max) {
@@ -1484,7 +1598,6 @@ public class ObjectStore implements RawS
       Map<String, String> params = new HashMap<String, String>();
       String queryFilterString =
         makeQueryFilterString(mtable, filter, params);
-
       Query query = pm.newQuery(
           "select partitionName from 
org.apache.hadoop.hive.metastore.model.MPartition "
           + "where " + queryFilterString);

Modified: 
hive/trunk/metastore/src/java/org/apache/hadoop/hive/metastore/RawStore.java
URL: 
http://svn.apache.org/viewvc/hive/trunk/metastore/src/java/org/apache/hadoop/hive/metastore/RawStore.java?rev=1137826&r1=1137825&r2=1137826&view=diff
==============================================================================
--- 
hive/trunk/metastore/src/java/org/apache/hadoop/hive/metastore/RawStore.java 
(original)
+++ 
hive/trunk/metastore/src/java/org/apache/hadoop/hive/metastore/RawStore.java 
Mon Jun 20 23:47:04 2011
@@ -241,6 +241,49 @@ public interface RawStore extends Config
 
   public abstract List<Partition> getPartitionsWithAuth(String dbName,
       String tblName, short maxParts, String userName, List<String> groupNames)
-      throws MetaException, NoSuchObjectException, InvalidObjectException;;
+      throws MetaException, NoSuchObjectException, InvalidObjectException;
 
-}
+  /**
+   * Lists partition names that match a given partial specification
+   * @param db_name
+   *          The name of the database which has the partitions
+   * @param tbl_name
+   *          The name of the table which has the partitions
+   * @param part_vals
+   *          A partial list of values for partitions in order of the table's 
partition keys.
+   *          Entries can be empty if you only want to specify latter 
partitions.
+   * @param max_parts
+   *          The maximum number of partitions to return
+   * @return A list of partition names that match the partial spec.
+   * @throws MetaException
+   * @throws NoSuchObjectException
+   */
+  public abstract List<String> listPartitionNamesPs(String db_name, String 
tbl_name,
+      List<String> part_vals, short max_parts)
+      throws MetaException;
+
+  /**
+   * Lists partitions that match a given partial specification and sets their 
auth privileges.
+   *   If userName and groupNames null, then no auth privileges are set.
+   * @param db_name
+   *          The name of the database which has the partitions
+   * @param tbl_name
+   *          The name of the table which has the partitions
+   * @param part_vals
+   *          A partial list of values for partitions in order of the table's 
partition keys
+   *          Entries can be empty if you need to specify latter partitions.
+   * @param max_parts
+   *          The maximum number of partitions to return
+   * @param userName
+   *          The user name for the partition for authentication privileges
+   * @param groupNames
+   *          The groupNames for the partition for authentication privileges
+   * @return A list of partitions that match the partial spec.
+   * @throws MetaException
+   * @throws NoSuchObjectException
+   * @throws InvalidObjectException
+   */
+  public abstract List<Partition> listPartitionsPsWithAuth(String db_name, 
String tbl_name,
+      List<String> part_vals, short max_parts, String userName, List<String> 
groupNames)
+      throws MetaException, InvalidObjectException;
+}
\ No newline at end of file

Modified: 
hive/trunk/metastore/src/java/org/apache/hadoop/hive/metastore/Warehouse.java
URL: 
http://svn.apache.org/viewvc/hive/trunk/metastore/src/java/org/apache/hadoop/hive/metastore/Warehouse.java?rev=1137826&r1=1137825&r2=1137826&view=diff
==============================================================================
--- 
hive/trunk/metastore/src/java/org/apache/hadoop/hive/metastore/Warehouse.java 
(original)
+++ 
hive/trunk/metastore/src/java/org/apache/hadoop/hive/metastore/Warehouse.java 
Mon Jun 20 23:47:04 2011
@@ -390,6 +390,20 @@ public class Warehouse {
 
   public static String makePartName(List<FieldSchema> partCols,
       List<String> vals) throws MetaException {
+    return makePartName(partCols, vals, null);
+  }
+
+  /**
+   * Makes a valid partition name.
+   * @param partCols The partition columns
+   * @param vals The partition values
+   * @param defaultStr
+   *    The default name given to a partition value if the respective value is 
empty or null.
+   * @return An escaped, valid partition name.
+   * @throws MetaException
+   */
+  public static String makePartName(List<FieldSchema> partCols,
+      List<String> vals, String defaultStr) throws MetaException {
     if ((partCols.size() != vals.size()) || (partCols.size() == 0)) {
       throw new MetaException("Invalid partition key & values");
     }
@@ -397,7 +411,7 @@ public class Warehouse {
     for (FieldSchema col: partCols) {
       colNames.add(col.getName());
     }
-    return FileUtils.makePartName(colNames, vals);
+    return FileUtils.makePartName(colNames, vals, defaultStr);
   }
 
   public static List<String> getPartValuesFromPartName(String partName)

Modified: 
hive/trunk/metastore/src/test/org/apache/hadoop/hive/metastore/TestHiveMetaStore.java
URL: 
http://svn.apache.org/viewvc/hive/trunk/metastore/src/test/org/apache/hadoop/hive/metastore/TestHiveMetaStore.java?rev=1137826&r1=1137825&r2=1137826&view=diff
==============================================================================
--- 
hive/trunk/metastore/src/test/org/apache/hadoop/hive/metastore/TestHiveMetaStore.java
 (original)
+++ 
hive/trunk/metastore/src/test/org/apache/hadoop/hive/metastore/TestHiveMetaStore.java
 Mon Jun 20 23:47:04 2011
@@ -251,6 +251,7 @@ public abstract class TestHiveMetaStore 
       String partName = "ds=2008-07-01 14%3A13%3A12/hr=14";
       String part2Name = "ds=2008-07-01 14%3A13%3A12/hr=15";
       String part3Name ="ds=2008-07-02 14%3A13%3A12/hr=15";
+      String part4Name ="ds=2008-07-03 14%3A13%3A12/hr=151";
 
       part_get = client.getPartition(dbName, tblName, partName);
       assertTrue("Partitions are not the same", part.equals(part_get));
@@ -275,6 +276,14 @@ public abstract class TestHiveMetaStore 
       assertTrue("Should have returned 2 partition names", partialNames.size() 
== 2);
       assertTrue("Not all part names returned", 
partialNames.containsAll(partNames));
 
+      partNames.add(part3Name);
+      partNames.add(part4Name);
+      partialVals.clear();
+      partialVals.add("");
+      partialNames = client.listPartitionNames(dbName, tblName, partialVals, 
(short) -1);
+      assertTrue("Should have returned 4 partition names", partialNames.size() 
== 4);
+      assertTrue("Not all part names returned", 
partialNames.containsAll(partNames));
+
       // Test partition listing with a partial spec - hr is specified but ds 
is not
       parts.clear();
       parts.add(part2);


Reply via email to