Author: thomasm
Date: Thu Mar 13 12:14:45 2014
New Revision: 1577129

URL: http://svn.apache.org/r1577129
Log:
OAK-333 1000 byte path limit in MongoMK

Modified:
    
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/document/DocumentNodeState.java
    
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/document/NodeDocument.java
    
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/document/UnsavedModifications.java
    
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/document/util/Utils.java
    
jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/document/util/UtilsTest.java

Modified: 
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/document/DocumentNodeState.java
URL: 
http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/document/DocumentNodeState.java?rev=1577129&r1=1577128&r2=1577129&view=diff
==============================================================================
--- 
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/document/DocumentNodeState.java
 (original)
+++ 
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/document/DocumentNodeState.java
 Thu Mar 13 12:14:45 2014
@@ -304,6 +304,9 @@ class DocumentNodeState extends Abstract
         String id = Utils.getIdFromPath(path);
         UpdateOp op = new UpdateOp(id, isNew);
         op.set(Document.ID, id);
+        if (Utils.isLongPath(path)) {
+            op.set(NodeDocument.PATH, path);
+        }
         NodeDocument.setModified(op, rev);
         NodeDocument.setDeleted(op, rev, false);
         for (String p : properties.keySet()) {

Modified: 
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/document/NodeDocument.java
URL: 
http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/document/NodeDocument.java?rev=1577129&r1=1577128&r2=1577129&view=diff
==============================================================================
--- 
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/document/NodeDocument.java
 (original)
+++ 
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/document/NodeDocument.java
 Thu Mar 13 12:14:45 2014
@@ -158,6 +158,11 @@ public final class NodeDocument extends 
      * node had a child node
      */
     private static final String CHILDREN_FLAG = "_children";
+    
+    /**
+     * The node path, in case the id can not be converted to a path.
+     */
+    public static final String PATH = "_path";
 
     /**
      * Properties to ignore when a document is split.
@@ -707,6 +712,9 @@ public final class NodeDocument extends 
             UpdateOp main = new UpdateOp(id, false);
             setPrevious(main, high, low);
             UpdateOp old = new UpdateOp(Utils.getPreviousIdFor(id, high), 
true);
+            if (get(PATH) != null) {
+                old.set(PATH, get(PATH));
+            }
             old.set(ID, old.getId());
             for (String property : splitValues.keySet()) {
                 NavigableMap<Revision, String> splitMap = 
splitValues.get(property);
@@ -1147,6 +1155,10 @@ public final class NodeDocument extends 
     
     @Override
     public String getPath() {
+        String p = (String) get(PATH);
+        if (p != null) {
+            return p;
+        }
         return Utils.getPathFromId(getId());
     }
 

Modified: 
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/document/UnsavedModifications.java
URL: 
http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/document/UnsavedModifications.java?rev=1577129&r1=1577128&r2=1577129&view=diff
==============================================================================
--- 
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/document/UnsavedModifications.java
 (original)
+++ 
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/document/UnsavedModifications.java
 Thu Mar 13 12:14:45 2014
@@ -166,7 +166,7 @@ class UnsavedModifications {
 
         UpdateOp updateOp = null;
         Revision lastRev = null;
-        List<String> ids = new ArrayList<String>();
+        ArrayList<String> pathList = new ArrayList<String>();
         for (int i = 0; i < paths.size();) {
             String p = paths.get(i);
             Revision r = pending.get(p);
@@ -174,18 +174,18 @@ class UnsavedModifications {
                 i++;
                 continue;
             }
-            int size = ids.size();
+            int size = pathList.size();
             if (updateOp == null) {
                 // create UpdateOp
                 Commit commit = new Commit(store, null, r);
                 updateOp = commit.getUpdateOperationForNode(p);
                 NodeDocument.setLastRev(updateOp, r);
                 lastRev = r;
-                ids.add(Utils.getIdFromPath(p));
+                pathList.add(p);
                 i++;
             } else if (r.equals(lastRev)) {
                 // use multi update when possible
-                ids.add(Utils.getIdFromPath(p));
+                pathList.add(p);
                 i++;
             }
             // call update if any of the following is true:
@@ -194,13 +194,17 @@ class UnsavedModifications {
             // - revision is not equal to last revision (size of ids didn't 
change)
             // - the update limit is reached
             if (i + 2 > paths.size()
-                    || size == ids.size()
-                    || ids.size() >= BACKGROUND_MULTI_UPDATE_LIMIT) {
+                    || size == pathList.size()
+                    || pathList.size() >= BACKGROUND_MULTI_UPDATE_LIMIT) {
+                List<String> ids = new ArrayList<String>();
+                for (String path : pathList) {
+                    ids.add(Utils.getIdFromPath(path));
+                }
                 store.getDocumentStore().update(NODES, ids, updateOp);
-                for (String id : ids) {
-                    map.remove(Utils.getPathFromId(id), lastRev);
+                for (String path : pathList) {
+                    map.remove(path, lastRev);
                 }
-                ids.clear();
+                pathList.clear();
                 updateOp = null;
                 lastRev = null;
             }

Modified: 
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/document/util/Utils.java
URL: 
http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/document/util/Utils.java?rev=1577129&r1=1577128&r2=1577129&view=diff
==============================================================================
--- 
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/document/util/Utils.java
 (original)
+++ 
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/document/util/Utils.java
 Thu Mar 13 12:14:45 2014
@@ -16,6 +16,9 @@
  */
 package org.apache.jackrabbit.oak.plugins.document.util;
 
+import java.nio.charset.Charset;
+import java.security.MessageDigest;
+import java.security.NoSuchAlgorithmException;
 import java.util.Comparator;
 import java.util.HashSet;
 import java.util.Map;
@@ -29,6 +32,7 @@ import javax.annotation.Nullable;
 
 import com.mongodb.BasicDBObject;
 
+import org.apache.commons.codec.binary.Hex;
 import org.apache.jackrabbit.oak.commons.PathUtils;
 import org.apache.jackrabbit.oak.plugins.document.Revision;
 import org.bson.types.ObjectId;
@@ -45,6 +49,22 @@ public class Utils {
      */
     private static final int REVISION_LENGTH =
             new Revision(System.currentTimeMillis(), 0, 0).toString().length();
+    
+    /**
+     * The length of path (in characters), whose UTF-8 representation can not
+     * possibly be too large to be used for the primary key for the document
+     * store.
+     */
+    private static final int PATH_SHORT = Integer.getInteger("oak.pathShort", 
330);
+    
+    /**
+     * The maximum length of the parent path, in bytes. If the parent path is
+     * longer, then the id of a document is no longer the path, but the hash of
+     * the parent, and then the node name.
+     */
+    private static final int PATH_LONG = Integer.getInteger("oak.pathLong", 
700);
+
+    private static final Charset UTF_8 = Charset.forName("UTF-8");
 
     /**
      * Make sure the name string does not contain unnecessary baggage (shared
@@ -210,12 +230,42 @@ public class Utils {
     }
 
     public static String getIdFromPath(String path) {
+        if (isLongPath(path)) {
+            MessageDigest digest;
+            try {
+                digest = MessageDigest.getInstance("SHA-256");
+            } catch (NoSuchAlgorithmException e) {
+                throw new RuntimeException(e);
+            }
+            int depth = Utils.pathDepth(path);
+            String parent = PathUtils.getParentPath(path);
+            byte[] hash = digest.digest(parent.getBytes(UTF_8));
+            String name = PathUtils.getName(path);
+            return depth + ":h" + Hex.encodeHexString(hash) + "/" + name;
+        }
         int depth = Utils.pathDepth(path);
         return depth + ":" + path;
     }
     
+    public static boolean isLongPath(String path) {
+        // the most common case: a short path
+        // avoid calculating the parent path
+        if (path.length() < PATH_SHORT) {
+            return false;
+        }
+        // check if the parent path is long
+        byte[] parent = PathUtils.getParentPath(path).getBytes(UTF_8);
+        if (parent.length < PATH_LONG) {
+            return false;
+        }
+        return true;
+    }
+    
     public static String getPathFromId(String id) {
         int index = id.indexOf(':');
+        if (id.charAt(index + 1) == 'h') {
+            throw new IllegalArgumentException("Id is hashed: " + id);
+        }
         return id.substring(index + 1);
     }
 

Modified: 
jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/document/util/UtilsTest.java
URL: 
http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/document/util/UtilsTest.java?rev=1577129&r1=1577128&r2=1577129&view=diff
==============================================================================
--- 
jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/document/util/UtilsTest.java
 (original)
+++ 
jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/document/util/UtilsTest.java
 Thu Mar 13 12:14:45 2014
@@ -30,9 +30,12 @@ public class UtilsTest {
     @Test
     public void getPreviousIdFor() {
         Revision r = new Revision(System.currentTimeMillis(), 0, 0);
-        assertEquals("1:p/" + r.toString(), Utils.getPreviousIdFor("0:/", r));
-        assertEquals("2:p/test/" + r.toString(), 
Utils.getPreviousIdFor("1:/test", r));
-        assertEquals("14:p/a/b/c/d/e/f/g/h/i/j/k/l/m/" + r.toString(), 
Utils.getPreviousIdFor("13:/a/b/c/d/e/f/g/h/i/j/k/l/m", r));
+        String p = Utils.getIdFromPath("/");
+        assertEquals("1:p/" + r.toString(), Utils.getPreviousIdFor(p, r));
+        p = Utils.getIdFromPath("/test");
+        assertEquals("2:p/test/" + r.toString(), Utils.getPreviousIdFor(p, r));
+        p = Utils.getIdFromPath("/a/b/c/d/e/f/g/h/i/j/k/l/m");
+        assertEquals("14:p/a/b/c/d/e/f/g/h/i/j/k/l/m/" + r.toString(), 
Utils.getPreviousIdFor(p, r));
     }
 
     @Ignore("Performance test")


Reply via email to