nfsantos commented on code in PR #2145:
URL: https://github.com/apache/jackrabbit-oak/pull/2145#discussion_r1991154032


##########
oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/util/ElasticIndexUtils.java:
##########
@@ -31,6 +31,118 @@ public class ElasticIndexUtils {
 
     private static final Logger LOG = 
LoggerFactory.getLogger(ElasticIndexUtils.class);
 
+    /**
+     * Convert a JCR property name to a Elasticsearch field name.
+     * Notice that "|" is not allowed in JCR names.
+     *
+     * @param propertyName the property name
+     * @return the field name
+     */
+    public static String fieldName(String propertyName) {
+        if(propertyName.startsWith(":")) {
+            // there are some hardcoded field names
+            return propertyName;
+        }
+        String fieldName = propertyName;
+        boolean blank = fieldName.isBlank();
+        boolean escape = false;
+        if (blank) {
+            // empty field name or field names that only consist of spaces
+            escape = true;
+        } else {
+            // 99.99% property names are OK,
+            // so we loop over the characters first
+            for (int i = 0; i < fieldName.length() && !escape ; i++) {
+                switch (fieldName.charAt(i)) {
+                case '|':
+                case '.':
+                case '^':
+                case '_':
+                    escape = true;
+                }
+            }
+        }
+        if (escape) {
+            StringBuilder buff = new StringBuilder(fieldName.length());
+            if (fieldName.startsWith("_") || blank) {
+                // internal field start with a _
+                // we also support empty or just spaces
+                buff.append('|');
+            }
+            for (int i = 0; i < fieldName.length(); i++) {
+                char c = fieldName.charAt(i);
+                // For performance, the logic for the currently supported
+                // characters is hardcoded.
+                // In case more characters need to be escaped,
+                // buff.append('|').append(Integer.toHexString(c)).append('|');
+                switch (c) {
+                case '|':
+                    buff.append("||");
+                    break;
+                case '.':
+                    buff.append("|2e|");
+                    break;
+                case '^':
+                    buff.append("|5e|");
+                    break;
+                default:
+                    buff.append(c);
+                }
+            }
+            fieldName = buff.toString();
+        }
+        return fieldName;
+    }
+
+    /**
+     * Convert an elasticsearch field name to a JCR property name.
+     * Please note this method is not optimized for performance.
+     *
+     * @param fieldName the field name
+     * @return the property name
+     */
+    public static String propertyNameFromFieldName(String fieldName) {
+        if (fieldName.indexOf('|') < 0) {
+            return fieldName;
+        }
+        if (fieldName.startsWith("|")) {
+            if (fieldName.equals("|")) {
+                return "";
+            } if (fieldName.startsWith("|_") || 
fieldName.substring(1).isBlank()) {

Review Comment:
   The second condition is creating a new String and it probably can be 
replaced with a check of the string length. 



##########
oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/util/ElasticIndexUtils.java:
##########
@@ -31,6 +31,118 @@ public class ElasticIndexUtils {
 
     private static final Logger LOG = 
LoggerFactory.getLogger(ElasticIndexUtils.class);
 
+    /**
+     * Convert a JCR property name to a Elasticsearch field name.
+     * Notice that "|" is not allowed in JCR names.
+     *
+     * @param propertyName the property name
+     * @return the field name
+     */
+    public static String fieldName(String propertyName) {
+        if(propertyName.startsWith(":")) {
+            // there are some hardcoded field names
+            return propertyName;
+        }
+        String fieldName = propertyName;
+        boolean blank = fieldName.isBlank();
+        boolean escape = false;
+        if (blank) {
+            // empty field name or field names that only consist of spaces
+            escape = true;
+        } else {
+            // 99.99% property names are OK,
+            // so we loop over the characters first
+            for (int i = 0; i < fieldName.length() && !escape ; i++) {
+                switch (fieldName.charAt(i)) {
+                case '|':
+                case '.':
+                case '^':
+                case '_':
+                    escape = true;
+                }
+            }
+        }
+        if (escape) {
+            StringBuilder buff = new StringBuilder(fieldName.length());
+            if (fieldName.startsWith("_") || blank) {
+                // internal field start with a _
+                // we also support empty or just spaces
+                buff.append('|');
+            }
+            for (int i = 0; i < fieldName.length(); i++) {
+                char c = fieldName.charAt(i);
+                // For performance, the logic for the currently supported
+                // characters is hardcoded.
+                // In case more characters need to be escaped,
+                // buff.append('|').append(Integer.toHexString(c)).append('|');
+                switch (c) {
+                case '|':
+                    buff.append("||");
+                    break;
+                case '.':
+                    buff.append("|2e|");
+                    break;
+                case '^':
+                    buff.append("|5e|");
+                    break;
+                default:
+                    buff.append(c);
+                }
+            }
+            fieldName = buff.toString();
+        }
+        return fieldName;
+    }
+
+    /**
+     * Convert an elasticsearch field name to a JCR property name.
+     * Please note this method is not optimized for performance.
+     *
+     * @param fieldName the field name
+     * @return the property name
+     */
+    public static String propertyNameFromFieldName(String fieldName) {
+        if (fieldName.indexOf('|') < 0) {
+            return fieldName;
+        }
+        if (fieldName.startsWith("|")) {
+            if (fieldName.equals("|")) {
+                return "";
+            } if (fieldName.startsWith("|_") || 
fieldName.substring(1).isBlank()) {
+                fieldName = fieldName.substring(1);
+            }
+        }
+        StringBuilder buff = new StringBuilder(fieldName.length());
+        for (int i = 0; i < fieldName.length(); i++) {
+            char c = fieldName.charAt(i);
+            switch (c) {
+            case '|':
+                String next = fieldName.substring(i + 1);
+                if (next.startsWith("|")) {
+                    buff.append('|');
+                    i++;
+                } else {
+                    int end = next.indexOf('|');
+                    if (end < 0) {
+                        buff.append(next);
+                        break;
+                    }
+                    String code = next.substring(0, end);
+                    try {
+                        buff.append((char) Integer.parseInt(code, 16));
+                    } catch (NumberFormatException e) {
+                        buff.append(code);

Review Comment:
   Is it possible to avoid raising this exception? Especially in the case of 
converting to an hex char, this can be done more efficiently. 



##########
oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/util/ElasticIndexUtils.java:
##########
@@ -31,6 +31,118 @@ public class ElasticIndexUtils {
 
     private static final Logger LOG = 
LoggerFactory.getLogger(ElasticIndexUtils.class);
 
+    /**
+     * Convert a JCR property name to a Elasticsearch field name.
+     * Notice that "|" is not allowed in JCR names.
+     *
+     * @param propertyName the property name
+     * @return the field name
+     */
+    public static String fieldName(String propertyName) {
+        if(propertyName.startsWith(":")) {
+            // there are some hardcoded field names
+            return propertyName;
+        }
+        String fieldName = propertyName;
+        boolean blank = fieldName.isBlank();
+        boolean escape = false;
+        if (blank) {
+            // empty field name or field names that only consist of spaces
+            escape = true;
+        } else {
+            // 99.99% property names are OK,
+            // so we loop over the characters first
+            for (int i = 0; i < fieldName.length() && !escape ; i++) {
+                switch (fieldName.charAt(i)) {
+                case '|':
+                case '.':
+                case '^':
+                case '_':
+                    escape = true;
+                }
+            }
+        }
+        if (escape) {
+            StringBuilder buff = new StringBuilder(fieldName.length());
+            if (fieldName.startsWith("_") || blank) {
+                // internal field start with a _
+                // we also support empty or just spaces
+                buff.append('|');
+            }
+            for (int i = 0; i < fieldName.length(); i++) {
+                char c = fieldName.charAt(i);
+                // For performance, the logic for the currently supported
+                // characters is hardcoded.
+                // In case more characters need to be escaped,
+                // buff.append('|').append(Integer.toHexString(c)).append('|');
+                switch (c) {
+                case '|':
+                    buff.append("||");
+                    break;
+                case '.':
+                    buff.append("|2e|");
+                    break;
+                case '^':
+                    buff.append("|5e|");
+                    break;
+                default:
+                    buff.append(c);
+                }
+            }
+            fieldName = buff.toString();
+        }
+        return fieldName;
+    }
+
+    /**
+     * Convert an elasticsearch field name to a JCR property name.
+     * Please note this method is not optimized for performance.
+     *
+     * @param fieldName the field name
+     * @return the property name
+     */
+    public static String propertyNameFromFieldName(String fieldName) {
+        if (fieldName.indexOf('|') < 0) {
+            return fieldName;
+        }
+        if (fieldName.startsWith("|")) {
+            if (fieldName.equals("|")) {
+                return "";
+            } if (fieldName.startsWith("|_") || 
fieldName.substring(1).isBlank()) {
+                fieldName = fieldName.substring(1);
+            }
+        }
+        StringBuilder buff = new StringBuilder(fieldName.length());
+        for (int i = 0; i < fieldName.length(); i++) {
+            char c = fieldName.charAt(i);
+            switch (c) {
+            case '|':
+                String next = fieldName.substring(i + 1);

Review Comment:
   Once again creating a new String instance. Would be better if this could be 
avoided, as this method may be on the critical path in some cases. 



##########
oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/util/ElasticIndexUtils.java:
##########
@@ -31,6 +31,118 @@ public class ElasticIndexUtils {
 
     private static final Logger LOG = 
LoggerFactory.getLogger(ElasticIndexUtils.class);
 
+    /**
+     * Convert a JCR property name to a Elasticsearch field name.
+     * Notice that "|" is not allowed in JCR names.
+     *
+     * @param propertyName the property name
+     * @return the field name
+     */
+    public static String fieldName(String propertyName) {
+        if(propertyName.startsWith(":")) {
+            // there are some hardcoded field names
+            return propertyName;
+        }
+        String fieldName = propertyName;
+        boolean blank = fieldName.isBlank();
+        boolean escape = false;
+        if (blank) {
+            // empty field name or field names that only consist of spaces
+            escape = true;
+        } else {
+            // 99.99% property names are OK,
+            // so we loop over the characters first
+            for (int i = 0; i < fieldName.length() && !escape ; i++) {
+                switch (fieldName.charAt(i)) {
+                case '|':
+                case '.':
+                case '^':
+                case '_':
+                    escape = true;
+                }
+            }
+        }
+        if (escape) {
+            StringBuilder buff = new StringBuilder(fieldName.length());

Review Comment:
   The final string will be always bigger than the original string, so 
presizing the StringBuilder with the size of the original string will always 
lead to at least one internal resizing. Maybe it would be better to allocate 2x 
the size of the original field, to avoid resizing in most of the cases.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to