Author: thomasm
Date: Thu Mar 13 12:14:45 2014
New Revision: 1577129
URL: http://svn.apache.org/r1577129
Log:
OAK-333 1000 byte path limit in MongoMK
Modified:
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/document/DocumentNodeState.java
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/document/NodeDocument.java
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/document/UnsavedModifications.java
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/document/util/Utils.java
jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/document/util/UtilsTest.java
Modified:
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/document/DocumentNodeState.java
URL:
http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/document/DocumentNodeState.java?rev=1577129&r1=1577128&r2=1577129&view=diff
==============================================================================
---
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/document/DocumentNodeState.java
(original)
+++
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/document/DocumentNodeState.java
Thu Mar 13 12:14:45 2014
@@ -304,6 +304,9 @@ class DocumentNodeState extends Abstract
String id = Utils.getIdFromPath(path);
UpdateOp op = new UpdateOp(id, isNew);
op.set(Document.ID, id);
+ if (Utils.isLongPath(path)) {
+ op.set(NodeDocument.PATH, path);
+ }
NodeDocument.setModified(op, rev);
NodeDocument.setDeleted(op, rev, false);
for (String p : properties.keySet()) {
Modified:
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/document/NodeDocument.java
URL:
http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/document/NodeDocument.java?rev=1577129&r1=1577128&r2=1577129&view=diff
==============================================================================
---
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/document/NodeDocument.java
(original)
+++
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/document/NodeDocument.java
Thu Mar 13 12:14:45 2014
@@ -158,6 +158,11 @@ public final class NodeDocument extends
* node had a child node
*/
private static final String CHILDREN_FLAG = "_children";
+
+ /**
+ * The node path, in case the id can not be converted to a path.
+ */
+ public static final String PATH = "_path";
/**
* Properties to ignore when a document is split.
@@ -707,6 +712,9 @@ public final class NodeDocument extends
UpdateOp main = new UpdateOp(id, false);
setPrevious(main, high, low);
UpdateOp old = new UpdateOp(Utils.getPreviousIdFor(id, high),
true);
+ if (get(PATH) != null) {
+ old.set(PATH, get(PATH));
+ }
old.set(ID, old.getId());
for (String property : splitValues.keySet()) {
NavigableMap<Revision, String> splitMap =
splitValues.get(property);
@@ -1147,6 +1155,10 @@ public final class NodeDocument extends
@Override
public String getPath() {
+ String p = (String) get(PATH);
+ if (p != null) {
+ return p;
+ }
return Utils.getPathFromId(getId());
}
Modified:
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/document/UnsavedModifications.java
URL:
http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/document/UnsavedModifications.java?rev=1577129&r1=1577128&r2=1577129&view=diff
==============================================================================
---
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/document/UnsavedModifications.java
(original)
+++
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/document/UnsavedModifications.java
Thu Mar 13 12:14:45 2014
@@ -166,7 +166,7 @@ class UnsavedModifications {
UpdateOp updateOp = null;
Revision lastRev = null;
- List<String> ids = new ArrayList<String>();
+ ArrayList<String> pathList = new ArrayList<String>();
for (int i = 0; i < paths.size();) {
String p = paths.get(i);
Revision r = pending.get(p);
@@ -174,18 +174,18 @@ class UnsavedModifications {
i++;
continue;
}
- int size = ids.size();
+ int size = pathList.size();
if (updateOp == null) {
// create UpdateOp
Commit commit = new Commit(store, null, r);
updateOp = commit.getUpdateOperationForNode(p);
NodeDocument.setLastRev(updateOp, r);
lastRev = r;
- ids.add(Utils.getIdFromPath(p));
+ pathList.add(p);
i++;
} else if (r.equals(lastRev)) {
// use multi update when possible
- ids.add(Utils.getIdFromPath(p));
+ pathList.add(p);
i++;
}
// call update if any of the following is true:
@@ -194,13 +194,17 @@ class UnsavedModifications {
// - revision is not equal to last revision (size of ids didn't
change)
// - the update limit is reached
if (i + 2 > paths.size()
- || size == ids.size()
- || ids.size() >= BACKGROUND_MULTI_UPDATE_LIMIT) {
+ || size == pathList.size()
+ || pathList.size() >= BACKGROUND_MULTI_UPDATE_LIMIT) {
+ List<String> ids = new ArrayList<String>();
+ for (String path : pathList) {
+ ids.add(Utils.getIdFromPath(path));
+ }
store.getDocumentStore().update(NODES, ids, updateOp);
- for (String id : ids) {
- map.remove(Utils.getPathFromId(id), lastRev);
+ for (String path : pathList) {
+ map.remove(path, lastRev);
}
- ids.clear();
+ pathList.clear();
updateOp = null;
lastRev = null;
}
Modified:
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/document/util/Utils.java
URL:
http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/document/util/Utils.java?rev=1577129&r1=1577128&r2=1577129&view=diff
==============================================================================
---
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/document/util/Utils.java
(original)
+++
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/document/util/Utils.java
Thu Mar 13 12:14:45 2014
@@ -16,6 +16,9 @@
*/
package org.apache.jackrabbit.oak.plugins.document.util;
+import java.nio.charset.Charset;
+import java.security.MessageDigest;
+import java.security.NoSuchAlgorithmException;
import java.util.Comparator;
import java.util.HashSet;
import java.util.Map;
@@ -29,6 +32,7 @@ import javax.annotation.Nullable;
import com.mongodb.BasicDBObject;
+import org.apache.commons.codec.binary.Hex;
import org.apache.jackrabbit.oak.commons.PathUtils;
import org.apache.jackrabbit.oak.plugins.document.Revision;
import org.bson.types.ObjectId;
@@ -45,6 +49,22 @@ public class Utils {
*/
private static final int REVISION_LENGTH =
new Revision(System.currentTimeMillis(), 0, 0).toString().length();
+
+ /**
+ * The length of path (in characters), whose UTF-8 representation can not
+ * possibly be too large to be used for the primary key for the document
+ * store.
+ */
+ private static final int PATH_SHORT = Integer.getInteger("oak.pathShort",
330);
+
+ /**
+ * The maximum length of the parent path, in bytes. If the parent path is
+ * longer, then the id of a document is no longer the path, but the hash of
+ * the parent, and then the node name.
+ */
+ private static final int PATH_LONG = Integer.getInteger("oak.pathLong",
700);
+
+ private static final Charset UTF_8 = Charset.forName("UTF-8");
/**
* Make sure the name string does not contain unnecessary baggage (shared
@@ -210,12 +230,42 @@ public class Utils {
}
public static String getIdFromPath(String path) {
+ if (isLongPath(path)) {
+ MessageDigest digest;
+ try {
+ digest = MessageDigest.getInstance("SHA-256");
+ } catch (NoSuchAlgorithmException e) {
+ throw new RuntimeException(e);
+ }
+ int depth = Utils.pathDepth(path);
+ String parent = PathUtils.getParentPath(path);
+ byte[] hash = digest.digest(parent.getBytes(UTF_8));
+ String name = PathUtils.getName(path);
+ return depth + ":h" + Hex.encodeHexString(hash) + "/" + name;
+ }
int depth = Utils.pathDepth(path);
return depth + ":" + path;
}
+ public static boolean isLongPath(String path) {
+ // the most common case: a short path
+ // avoid calculating the parent path
+ if (path.length() < PATH_SHORT) {
+ return false;
+ }
+ // check if the parent path is long
+ byte[] parent = PathUtils.getParentPath(path).getBytes(UTF_8);
+ if (parent.length < PATH_LONG) {
+ return false;
+ }
+ return true;
+ }
+
public static String getPathFromId(String id) {
int index = id.indexOf(':');
+ if (id.charAt(index + 1) == 'h') {
+ throw new IllegalArgumentException("Id is hashed: " + id);
+ }
return id.substring(index + 1);
}
Modified:
jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/document/util/UtilsTest.java
URL:
http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/document/util/UtilsTest.java?rev=1577129&r1=1577128&r2=1577129&view=diff
==============================================================================
---
jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/document/util/UtilsTest.java
(original)
+++
jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/document/util/UtilsTest.java
Thu Mar 13 12:14:45 2014
@@ -30,9 +30,12 @@ public class UtilsTest {
@Test
public void getPreviousIdFor() {
Revision r = new Revision(System.currentTimeMillis(), 0, 0);
- assertEquals("1:p/" + r.toString(), Utils.getPreviousIdFor("0:/", r));
- assertEquals("2:p/test/" + r.toString(),
Utils.getPreviousIdFor("1:/test", r));
- assertEquals("14:p/a/b/c/d/e/f/g/h/i/j/k/l/m/" + r.toString(),
Utils.getPreviousIdFor("13:/a/b/c/d/e/f/g/h/i/j/k/l/m", r));
+ String p = Utils.getIdFromPath("/");
+ assertEquals("1:p/" + r.toString(), Utils.getPreviousIdFor(p, r));
+ p = Utils.getIdFromPath("/test");
+ assertEquals("2:p/test/" + r.toString(), Utils.getPreviousIdFor(p, r));
+ p = Utils.getIdFromPath("/a/b/c/d/e/f/g/h/i/j/k/l/m");
+ assertEquals("14:p/a/b/c/d/e/f/g/h/i/j/k/l/m/" + r.toString(),
Utils.getPreviousIdFor(p, r));
}
@Ignore("Performance test")