[ 
https://issues.apache.org/jira/browse/OAK-6807?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16201942#comment-16201942
 ] 

Thomas Mueller commented on OAK-6807:
-------------------------------------

First patch (not tested yet). Ideas:

* Uses a new logger, with level "debug" / "trace" (so not enabled by default)
* At level "debug", new queries are logged, plus if they are executed often 
(after 100 times, 200 times,...)
* Uses at most 5000 queries are kept in memory. If too many, all entries are 
logged and evicted
* At level "trace", the raw query is logged, and each time it is executed
* At level "debug", queries are simplified, meaning string literals are 
replaced with 'x' and paths truncated to 2 elements - that way, only the 
"shape" of the query is kept

[~chetanm] as you have asked for this, maybe you want to quickly review. This 
is work-in-progress, I will be able to continue in about one week.

{noformat}
### Eclipse Workspace Patch 1.0
#P oak-core
Index: src/main/java/org/apache/jackrabbit/oak/query/stats/QueryStatsData.java
===================================================================
--- src/main/java/org/apache/jackrabbit/oak/query/stats/QueryStatsData.java     
(revision 1811333)
+++ src/main/java/org/apache/jackrabbit/oak/query/stats/QueryStatsData.java     
(working copy)
@@ -140,6 +140,7 @@
         long time;
         
         public void execute(long nanos) {
+            QueryRecorder.record(query, internal);
             executeCount++;
             lastExecutedMillis = System.currentTimeMillis();
             time += nanos;
Index: src/main/java/org/apache/jackrabbit/oak/query/stats/QueryRecorder.java
===================================================================
--- src/main/java/org/apache/jackrabbit/oak/query/stats/QueryRecorder.java      
(nonexistent)
+++ src/main/java/org/apache/jackrabbit/oak/query/stats/QueryRecorder.java      
(working copy)
@@ -0,0 +1,126 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.jackrabbit.oak.query.stats;
+
+import java.io.BufferedReader;
+import java.io.FileReader;
+import java.io.IOException;
+import java.io.LineNumberReader;
+import java.util.HashMap;
+import java.util.Map.Entry;
+import java.util.TreeSet;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+public class QueryRecorder {
+
+    private static final Logger LOG = 
LoggerFactory.getLogger(QueryRecorder.class);
+    private static final HashMap<String, Integer> RECORD_QUERIES_MAP = new 
HashMap<String, Integer>();
+    private final static int QUERIES_MAX =  
Integer.getInteger("oak.query.record", 5000);
+    
+    public static void main(String... args) throws IOException {
+        LineNumberReader reader = new LineNumberReader(
+                new BufferedReader(new FileReader(args[0])));
+        TreeSet<String> sorted = new TreeSet<String>();
+        int lineCount = 0;
+        while(true) {
+            String line = reader.readLine();
+            if(line == null) {
+                break;
+            }
+            sorted.add(simplify(line));
+            lineCount++;
+        }
+        reader.close();
+        for(String s : sorted) {
+            System.out.println(s);
+        }
+        System.out.println("sorted: " + sorted.size() + " original: " + 
lineCount);
+    }
+    
+    public static void record(String query, boolean internal) {
+        if (internal) {
+            return;
+        }
+        if (!LOG.isDebugEnabled()) {
+            return;
+        }
+        if (LOG.isTraceEnabled()) {
+            LOG.trace("query {}", query);
+            return;
+        }
+        query = simplify(query);
+        record(query);
+    }
+    
+    private static String simplify(String query) {
+        query = query.replaceAll("'[^']*'", "'x'");
+        query = query.replaceAll("ISDESCENDANTNODE(\\[[^]]\\])", 
"ISDESCENDANTNODE('x')");
+        int pathIndex = query.indexOf("/jcr:root/");
+        if (pathIndex >= 0) {
+            int end = getFirstOccurance(query, pathIndex,
+                    " ", "/element(", "/text(", "/*", "/(", "/jcr:deref(");
+            String path = query.substring(pathIndex + "/jcr:root/".length());
+            int first = path.indexOf('/');
+            if (first > 0) {
+                first = path.indexOf('/', first + 1);
+                if (first > 0) {
+                    path = path.substring(0, first);
+                }
+            }
+            String newQuery = query.substring(0, pathIndex) + "/jcr:root/" + 
path + 
+                    "..." + query.substring(end, query.length());
+            System.out.println(query);
+            System.out.println(newQuery);
+            System.out.println();
+            query = newQuery;
+        }
+        return query;
+    }
+    
+    static int getFirstOccurance(String text, int start, String... strings) {
+        int first = text.length();
+        for(String s : strings) {
+            int index = text.indexOf(s, start);
+            if (index > 0 && index < first) {
+                first = index;
+            }
+        }
+        return first;
+    }
+    
+    private static synchronized void record(String query) {
+        if (RECORD_QUERIES_MAP.size() > QUERIES_MAX) {
+            for(Entry<String, Integer> e : RECORD_QUERIES_MAP.entrySet()) {
+                log(e.getKey(), e.getValue());
+            }
+            RECORD_QUERIES_MAP.clear();
+        }
+        Integer count = RECORD_QUERIES_MAP.get(query);
+        count = count == null ? 1 : count + 1;
+        RECORD_QUERIES_MAP.put(query, count);
+        if (count == 0 || count % 100 == 0) {
+            log(query, count);
+        }
+    }
+
+    private static void log(String query, int count) {
+        LOG.debug("query {} count {}", query, count);
+    }
+
+}
{noformat}

> Query Recorder
> --------------
>
>                 Key: OAK-6807
>                 URL: https://issues.apache.org/jira/browse/OAK-6807
>             Project: Jackrabbit Oak
>          Issue Type: Improvement
>          Components: query
>            Reporter: Thomas Mueller
>            Assignee: Thomas Mueller
>             Fix For: 1.8
>
>
> In order to manage indexes (e.g. find out which indexes are no longer needed, 
> which properties don't need to be indexed any longer), we have an easy way to 
> log all executed queries / query plans. 
> Each entry only needs to be logged once (logging multiple times is OK, but 
> ensure it's not logged to often). Different log levels can be used (e.g. log 
> level "TRACE" logs more data, "DEBUG" less). For "DEBUG" level, overhead of 
> logging should be minimal, so this can be kept enabled for a long time.



--
This message was sent by Atlassian JIRA
(v6.4.14#64029)

Reply via email to