[
https://issues.apache.org/jira/browse/OAK-6807?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16201942#comment-16201942
]
Thomas Mueller commented on OAK-6807:
-------------------------------------
First patch (not tested yet). Ideas:
* Uses a new logger, with level "debug" / "trace" (so not enabled by default)
* At level "debug", new queries are logged, plus if they are executed often
(after 100 times, 200 times,...)
* Uses at most 5000 queries are kept in memory. If too many, all entries are
logged and evicted
* At level "trace", the raw query is logged, and each time it is executed
* At level "debug", queries are simplified, meaning string literals are
replaced with 'x' and paths truncated to 2 elements - that way, only the
"shape" of the query is kept
[~chetanm] as you have asked for this, maybe you want to quickly review. This
is work-in-progress, I will be able to continue in about one week.
{noformat}
### Eclipse Workspace Patch 1.0
#P oak-core
Index: src/main/java/org/apache/jackrabbit/oak/query/stats/QueryStatsData.java
===================================================================
--- src/main/java/org/apache/jackrabbit/oak/query/stats/QueryStatsData.java
(revision 1811333)
+++ src/main/java/org/apache/jackrabbit/oak/query/stats/QueryStatsData.java
(working copy)
@@ -140,6 +140,7 @@
long time;
public void execute(long nanos) {
+ QueryRecorder.record(query, internal);
executeCount++;
lastExecutedMillis = System.currentTimeMillis();
time += nanos;
Index: src/main/java/org/apache/jackrabbit/oak/query/stats/QueryRecorder.java
===================================================================
--- src/main/java/org/apache/jackrabbit/oak/query/stats/QueryRecorder.java
(nonexistent)
+++ src/main/java/org/apache/jackrabbit/oak/query/stats/QueryRecorder.java
(working copy)
@@ -0,0 +1,126 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.jackrabbit.oak.query.stats;
+
+import java.io.BufferedReader;
+import java.io.FileReader;
+import java.io.IOException;
+import java.io.LineNumberReader;
+import java.util.HashMap;
+import java.util.Map.Entry;
+import java.util.TreeSet;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+public class QueryRecorder {
+
+ private static final Logger LOG =
LoggerFactory.getLogger(QueryRecorder.class);
+ private static final HashMap<String, Integer> RECORD_QUERIES_MAP = new
HashMap<String, Integer>();
+ private final static int QUERIES_MAX =
Integer.getInteger("oak.query.record", 5000);
+
+ public static void main(String... args) throws IOException {
+ LineNumberReader reader = new LineNumberReader(
+ new BufferedReader(new FileReader(args[0])));
+ TreeSet<String> sorted = new TreeSet<String>();
+ int lineCount = 0;
+ while(true) {
+ String line = reader.readLine();
+ if(line == null) {
+ break;
+ }
+ sorted.add(simplify(line));
+ lineCount++;
+ }
+ reader.close();
+ for(String s : sorted) {
+ System.out.println(s);
+ }
+ System.out.println("sorted: " + sorted.size() + " original: " +
lineCount);
+ }
+
+ public static void record(String query, boolean internal) {
+ if (internal) {
+ return;
+ }
+ if (!LOG.isDebugEnabled()) {
+ return;
+ }
+ if (LOG.isTraceEnabled()) {
+ LOG.trace("query {}", query);
+ return;
+ }
+ query = simplify(query);
+ record(query);
+ }
+
+ private static String simplify(String query) {
+ query = query.replaceAll("'[^']*'", "'x'");
+ query = query.replaceAll("ISDESCENDANTNODE(\\[[^]]\\])",
"ISDESCENDANTNODE('x')");
+ int pathIndex = query.indexOf("/jcr:root/");
+ if (pathIndex >= 0) {
+ int end = getFirstOccurance(query, pathIndex,
+ " ", "/element(", "/text(", "/*", "/(", "/jcr:deref(");
+ String path = query.substring(pathIndex + "/jcr:root/".length());
+ int first = path.indexOf('/');
+ if (first > 0) {
+ first = path.indexOf('/', first + 1);
+ if (first > 0) {
+ path = path.substring(0, first);
+ }
+ }
+ String newQuery = query.substring(0, pathIndex) + "/jcr:root/" +
path +
+ "..." + query.substring(end, query.length());
+ System.out.println(query);
+ System.out.println(newQuery);
+ System.out.println();
+ query = newQuery;
+ }
+ return query;
+ }
+
+ static int getFirstOccurance(String text, int start, String... strings) {
+ int first = text.length();
+ for(String s : strings) {
+ int index = text.indexOf(s, start);
+ if (index > 0 && index < first) {
+ first = index;
+ }
+ }
+ return first;
+ }
+
+ private static synchronized void record(String query) {
+ if (RECORD_QUERIES_MAP.size() > QUERIES_MAX) {
+ for(Entry<String, Integer> e : RECORD_QUERIES_MAP.entrySet()) {
+ log(e.getKey(), e.getValue());
+ }
+ RECORD_QUERIES_MAP.clear();
+ }
+ Integer count = RECORD_QUERIES_MAP.get(query);
+ count = count == null ? 1 : count + 1;
+ RECORD_QUERIES_MAP.put(query, count);
+ if (count == 0 || count % 100 == 0) {
+ log(query, count);
+ }
+ }
+
+ private static void log(String query, int count) {
+ LOG.debug("query {} count {}", query, count);
+ }
+
+}
{noformat}
> Query Recorder
> --------------
>
> Key: OAK-6807
> URL: https://issues.apache.org/jira/browse/OAK-6807
> Project: Jackrabbit Oak
> Issue Type: Improvement
> Components: query
> Reporter: Thomas Mueller
> Assignee: Thomas Mueller
> Fix For: 1.8
>
>
> In order to manage indexes (e.g. find out which indexes are no longer needed,
> which properties don't need to be indexed any longer), we have an easy way to
> log all executed queries / query plans.
> Each entry only needs to be logged once (logging multiple times is OK, but
> ensure it's not logged to often). Different log levels can be used (e.g. log
> level "TRACE" logs more data, "DEBUG" less). For "DEBUG" level, overhead of
> logging should be minimal, so this can be kept enabled for a long time.
--
This message was sent by Atlassian JIRA
(v6.4.14#64029)