Gehel has uploaded a new change for review. ( 
https://gerrit.wikimedia.org/r/368777 )

Change subject: add user-agent and IP address to logs
......................................................................

add user-agent and IP address to logs

A servlet Filter adds user-agent and IP address to the logging MDC
(Mapped Diagnostic Contexts). The logback configuration is updated to expose
those new fields on each log message.

Change-Id: I7aaa519b7e1137fd572b8a0a5a164896c9be819c
---
M blazegraph/pom.xml
A blazegraph/pom.xml.bak
R 
blazegraph/src/main/java/org/wikidata/query/rdf/blazegraph/filters/ClientIPFilter.java
A 
blazegraph/src/main/java/org/wikidata/query/rdf/blazegraph/filters/LoggingFilter.java
R 
blazegraph/src/test/java/org/wikidata/query/rdf/blazegraph/filters/ClientIPFilterTest.java
A 
blazegraph/src/test/java/org/wikidata/query/rdf/blazegraph/filters/LoggingFilterTest.java
M war/src/main/resources/logback.xml
M war/src/main/webapp/WEB-INF/web.xml
8 files changed, 411 insertions(+), 9 deletions(-)


  git pull ssh://gerrit.wikimedia.org:29418/wikidata/query/rdf 
refs/changes/77/368777/1

diff --git a/blazegraph/pom.xml b/blazegraph/pom.xml
index 5318026..6cddedd 100644
--- a/blazegraph/pom.xml
+++ b/blazegraph/pom.xml
@@ -116,6 +116,12 @@
       <scope>provided</scope>
     </dependency>
     <dependency>
+      <!-- testing the LoggingFilter requires a "real" logging framework -->
+      <groupId>ch.qos.logback</groupId>
+      <artifactId>logback-classic</artifactId>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
       <groupId>org.hamcrest</groupId>
       <artifactId>hamcrest-core</artifactId>
       <scope>test</scope>
@@ -133,11 +139,6 @@
     <dependency>
       <groupId>org.slf4j</groupId>
       <artifactId>jcl-over-slf4j</artifactId>
-      <scope>test</scope>
-    </dependency>
-    <dependency>
-      <groupId>org.slf4j</groupId>
-      <artifactId>slf4j-simple</artifactId>
       <scope>test</scope>
     </dependency>
     <dependency>
diff --git a/blazegraph/pom.xml.bak b/blazegraph/pom.xml.bak
new file mode 100644
index 0000000..932fdcb
--- /dev/null
+++ b/blazegraph/pom.xml.bak
@@ -0,0 +1,186 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project xmlns="http://maven.apache.org/POM/4.0.0"; 
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"; 
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 
http://maven.apache.org/maven-v4_0_0.xsd";>
+  <modelVersion>4.0.0</modelVersion>
+  <parent>
+    <groupId>org.wikidata.query.rdf</groupId>
+    <artifactId>parent</artifactId>
+    <version>0.2.5-SNAPSHOT</version>
+  </parent>
+  <artifactId>blazegraph</artifactId>
+  <packaging>jar</packaging>
+
+  <name>Blazegraph extension to improve performance for Wikibase</name>
+  <licenses>
+    <!-- To be compatible with Blazegraph -->
+    <license>
+      <name>GNU General Public License Version 2 (GPLv2)</name>
+      <url>http://www.gnu.org/licenses/gpl-2.0.html</url>
+    </license>
+  </licenses>
+
+  <dependencies>
+    <dependency>
+      <groupId>com.blazegraph</groupId>
+      <artifactId>bigdata-cache</artifactId>
+    </dependency>
+    <dependency>
+      <groupId>com.blazegraph</groupId>
+      <artifactId>bigdata-client</artifactId>
+    </dependency>
+    <dependency>
+      <groupId>com.blazegraph</groupId>
+      <artifactId>bigdata-common-util</artifactId>
+    </dependency>
+    <dependency>
+      <groupId>com.blazegraph</groupId>
+      <artifactId>bigdata-core</artifactId>
+    </dependency>
+    <dependency>
+      <groupId>com.blazegraph</groupId>
+      <artifactId>bigdata-util</artifactId>
+    </dependency>
+    <dependency>
+      <groupId>com.blazegraph</groupId>
+      <artifactId>ctc-striterators</artifactId>
+    </dependency>
+    <!-- Blazegraph needs http client to run services. -->
+    <dependency>
+      <groupId>com.fasterxml.jackson.core</groupId>
+      <artifactId>jackson-core</artifactId>
+    </dependency>
+    <dependency>
+      <groupId>com.fasterxml.jackson.core</groupId>
+      <artifactId>jackson-databind</artifactId>
+    </dependency>
+    <dependency>
+      <groupId>com.google.code.gson</groupId>
+      <artifactId>gson</artifactId>
+    </dependency>
+    <dependency>
+      <groupId>com.google.guava</groupId>
+      <artifactId>guava</artifactId>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.jena</groupId>
+      <artifactId>jena-core</artifactId>
+    </dependency>
+    <dependency>
+      <!-- Blazegraph needs http client to run services. -->
+      <groupId>org.eclipse.jetty</groupId>
+      <artifactId>jetty-client</artifactId>
+    </dependency>
+    <dependency>
+      <groupId>org.eclipse.jetty</groupId>
+      <artifactId>jetty-http</artifactId>
+    </dependency>
+    <dependency>
+      <groupId>org.isomorphism</groupId>
+      <artifactId>token-bucket</artifactId>
+    </dependency>
+    <dependency>
+      <groupId>org.linkeddatafragments</groupId>
+      <artifactId>ldfserver</artifactId>
+      <classifier>classes</classifier>
+    </dependency>
+    <dependency>
+      <groupId>org.openrdf.sesame</groupId>
+      <artifactId>sesame-model</artifactId>
+    </dependency>
+    <dependency>
+      <groupId>org.openrdf.sesame</groupId>
+      <artifactId>sesame-query</artifactId>
+    </dependency>
+    <dependency>
+      <groupId>org.openrdf.sesame</groupId>
+      <artifactId>sesame-queryalgebra-evaluation</artifactId>
+    </dependency>
+    <dependency>
+      <groupId>org.openrdf.sesame</groupId>
+      <artifactId>sesame-queryresultio-api</artifactId>
+    </dependency>
+    <dependency>
+      <groupId>org.slf4j</groupId>
+      <artifactId>slf4j-api</artifactId>
+    </dependency>
+    <dependency>
+      <groupId>org.wikidata.query.rdf</groupId>
+      <artifactId>common</artifactId>
+    </dependency>
+    <dependency>
+      <groupId>xml-apis</groupId>
+      <artifactId>xml-apis</artifactId>
+    </dependency>
+    <dependency>
+      <groupId>javax.servlet</groupId>
+      <artifactId>javax.servlet-api</artifactId>
+      <scope>provided</scope>
+    </dependency>
+    <dependency>
+      <groupId>org.hamcrest</groupId>
+      <artifactId>hamcrest-core</artifactId>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>org.hamcrest</groupId>
+      <artifactId>hamcrest-library</artifactId>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>org.mockito</groupId>
+      <artifactId>mockito-core</artifactId>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>org.slf4j</groupId>
+      <artifactId>jcl-over-slf4j</artifactId>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <!-- testing the LoggingFilter requires a "real" logging framework -->
+      <groupId>ch.qos.logback</groupId>
+      <artifactId>logback-classic</artifactId>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>org.springframework</groupId>
+      <artifactId>spring-test</artifactId>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>org.wikidata.query.rdf</groupId>
+      <artifactId>testTools</artifactId>
+      <scope>test</scope>
+    </dependency>
+  </dependencies>
+
+  <build>
+    <finalName>wikidata-query-blazegraph-${project.version}</finalName>
+    <testResources>
+      <testResource>
+        <directory>${project.basedir}/src/test/resources</directory>
+      </testResource>
+    </testResources>
+    <plugins>
+      <plugin>
+        <groupId>com.carrotsearch.randomizedtesting</groupId>
+        <artifactId>junit4-maven-plugin</artifactId>
+      </plugin>
+      <plugin>
+        <groupId>org.apache.maven.plugins</groupId>
+        <artifactId>maven-checkstyle-plugin</artifactId>
+      </plugin>
+      <plugin>
+        <groupId>org.apache.maven.plugins</groupId>
+        <artifactId>maven-jar-plugin</artifactId>
+      </plugin>
+      <plugin>
+        <groupId>org.apache.maven.plugins</groupId>
+        <artifactId>maven-javadoc-plugin</artifactId>
+      </plugin>
+      <plugin>
+        <groupId>org.apache.maven.plugins</groupId>
+        <artifactId>maven-source-plugin</artifactId>
+      </plugin>
+    </plugins>
+  </build>
+</project>
diff --git 
a/blazegraph/src/main/java/org/wikidata/query/rdf/blazegraph/throttling/ClientIPFilter.java
 
b/blazegraph/src/main/java/org/wikidata/query/rdf/blazegraph/filters/ClientIPFilter.java
similarity index 97%
rename from 
blazegraph/src/main/java/org/wikidata/query/rdf/blazegraph/throttling/ClientIPFilter.java
rename to 
blazegraph/src/main/java/org/wikidata/query/rdf/blazegraph/filters/ClientIPFilter.java
index ce2e67e..9d3211c 100644
--- 
a/blazegraph/src/main/java/org/wikidata/query/rdf/blazegraph/throttling/ClientIPFilter.java
+++ 
b/blazegraph/src/main/java/org/wikidata/query/rdf/blazegraph/filters/ClientIPFilter.java
@@ -1,4 +1,4 @@
-package org.wikidata.query.rdf.blazegraph.throttling;
+package org.wikidata.query.rdf.blazegraph.filters;
 
 import javax.servlet.Filter;
 import javax.servlet.FilterChain;
diff --git 
a/blazegraph/src/main/java/org/wikidata/query/rdf/blazegraph/filters/LoggingFilter.java
 
b/blazegraph/src/main/java/org/wikidata/query/rdf/blazegraph/filters/LoggingFilter.java
new file mode 100644
index 0000000..c557b3a
--- /dev/null
+++ 
b/blazegraph/src/main/java/org/wikidata/query/rdf/blazegraph/filters/LoggingFilter.java
@@ -0,0 +1,94 @@
+package org.wikidata.query.rdf.blazegraph.filters;
+
+import com.google.common.base.Function;
+import org.slf4j.MDC;
+
+import javax.servlet.Filter;
+import javax.servlet.FilterChain;
+import javax.servlet.FilterConfig;
+import javax.servlet.ServletException;
+import javax.servlet.ServletRequest;
+import javax.servlet.ServletResponse;
+import javax.servlet.http.HttpServletRequest;
+import java.io.Closeable;
+import java.io.IOException;
+
+import static com.google.common.base.Charsets.UTF_8;
+import static com.google.common.base.MoreObjects.firstNonNull;
+import static com.google.common.hash.Hashing.sha256;
+import static java.lang.Boolean.parseBoolean;
+
+/**
+ * Add user-agent and IP address to the logging MDC.
+ *
+ * The Mapped Diagnostic Context (MDC) is a way to expose context to the
+ * logging framework, which can then be printed with each log message.
+ *
+ * See <a href="https://logback.qos.ch/manual/mdc.html";>the logback
+ * documentation</a> for details on MDC.
+ */
+public class LoggingFilter implements Filter {
+
+    /** Rewrite function.
+     *
+     * This function is applied to private information to obfuscate it. It
+     * should be either a NOOP (expose this private info as-is), or a hash
+     * function (SHA256).
+     */
+    private Function<String, String> rewrite;
+
+    /**
+     * Configure the filter.
+     *
+     * The filter config parameter <code>hash-private-data</code> is used to
+     * enable hashing of private data with SHA256. This reduces the amount of
+     * private information available in the logs, while still allowing some
+     * tracking of issues if / when they arise.
+     *
+     * @param filterConfig
+     * @throws ServletException
+     */
+    @Override
+    public void init(FilterConfig filterConfig) throws ServletException {
+        String hashPrivateData = 
firstNonNull(filterConfig.getInitParameter("hash-private-data"), "true");
+        if (parseBoolean(hashPrivateData)) {
+            this.rewrite = (String s) -> sha256().hashString(s, 
UTF_8).toString();
+        } else {
+            this.rewrite = (String s) -> s;
+        }
+    }
+
+    /**
+     * Add user-agent and IP address to the logging MDC.
+     *
+     * @param request
+     * @param response
+     * @param chain
+     * @throws IOException
+     * @throws ServletException
+     */
+    @Override
+    public void doFilter(ServletRequest request, ServletResponse response, 
FilterChain chain) throws IOException, ServletException {
+        String remoteAddr = request.getRemoteAddr();
+        String userAgent = null;
+        if (request instanceof HttpServletRequest) {
+            HttpServletRequest httpRequest = (HttpServletRequest) request;
+            userAgent = httpRequest.getHeader("User-Agent");
+        }
+
+        try (
+                Closeable mdcIPAddress = MDC.putCloseable("IPAddress", 
rewrite.apply(remoteAddr));
+                Closeable mdcUserAgent = MDC.putCloseable("UserAgent", 
rewrite.apply(userAgent))
+        ) {
+            chain.doFilter(request, response);
+        }
+    }
+
+    /**
+     * {@inheritDoc}
+     */
+    @Override
+    public void destroy() {
+        // do nothing
+    }
+}
diff --git 
a/blazegraph/src/test/java/org/wikidata/query/rdf/blazegraph/throttling/ClientIPFilterTest.java
 
b/blazegraph/src/test/java/org/wikidata/query/rdf/blazegraph/filters/ClientIPFilterTest.java
similarity index 97%
rename from 
blazegraph/src/test/java/org/wikidata/query/rdf/blazegraph/throttling/ClientIPFilterTest.java
rename to 
blazegraph/src/test/java/org/wikidata/query/rdf/blazegraph/filters/ClientIPFilterTest.java
index 7001ac8..56c837c 100644
--- 
a/blazegraph/src/test/java/org/wikidata/query/rdf/blazegraph/throttling/ClientIPFilterTest.java
+++ 
b/blazegraph/src/test/java/org/wikidata/query/rdf/blazegraph/filters/ClientIPFilterTest.java
@@ -1,4 +1,4 @@
-package org.wikidata.query.rdf.blazegraph.throttling;
+package org.wikidata.query.rdf.blazegraph.filters;
 
 import org.junit.Test;
 import org.junit.runner.RunWith;
diff --git 
a/blazegraph/src/test/java/org/wikidata/query/rdf/blazegraph/filters/LoggingFilterTest.java
 
b/blazegraph/src/test/java/org/wikidata/query/rdf/blazegraph/filters/LoggingFilterTest.java
new file mode 100644
index 0000000..4c89f79
--- /dev/null
+++ 
b/blazegraph/src/test/java/org/wikidata/query/rdf/blazegraph/filters/LoggingFilterTest.java
@@ -0,0 +1,113 @@
+package org.wikidata.query.rdf.blazegraph.filters;
+
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.mockito.Mock;
+import org.mockito.runners.MockitoJUnitRunner;
+import org.slf4j.MDC;
+import org.springframework.mock.web.MockFilterConfig;
+import org.springframework.mock.web.MockHttpServletRequest;
+
+import javax.servlet.FilterChain;
+import javax.servlet.ServletException;
+import javax.servlet.ServletRequest;
+import javax.servlet.ServletResponse;
+import java.io.IOException;
+
+import static org.hamcrest.CoreMatchers.equalTo;
+import static org.hamcrest.MatcherAssert.assertThat;
+import static org.junit.Assert.assertNull;
+import static org.mockito.Mockito.doAnswer;
+import static org.mockito.Mockito.mock;
+
+@RunWith(MockitoJUnitRunner.class)
+public class LoggingFilterTest {
+
+    private LoggingFilter filter = new LoggingFilter();
+    @Mock private ServletResponse response;
+    @Mock private FilterChain chain;
+
+    @Test
+    public void ipAndUserAgentAreAddedToMDC() throws IOException, 
ServletException {
+        MockHttpServletRequest request = new MockHttpServletRequest();
+        request.setRemoteAddr("1.2.3.4");
+        request.addHeader("User-Agent", "UA1");
+
+        doAnswer(invocation -> {
+            assertThat(MDC.get("IPAddress"), equalTo("1.2.3.4"));
+            assertThat(MDC.get("UserAgent"), equalTo("UA1"));
+            return null;
+        }).when(chain).doFilter(request, response);
+
+
+        filter.init(getMockFilterConfig(false));
+        filter.doFilter(request, response, chain);
+    }
+
+    @Test
+    public void nullIpAndUserAgentDontCauseIssues() throws IOException, 
ServletException {
+        MockHttpServletRequest request = new MockHttpServletRequest();
+        request.setRemoteAddr(null);
+        // dont set User-Agent header
+
+        doAnswer(invocation -> {
+            assertNull(MDC.get("IPAddress"));
+            assertNull(MDC.get("UserAgent"));
+            return null;
+        }).when(chain).doFilter(request, response);
+
+
+        filter.init(getMockFilterConfig(false));
+        filter.doFilter(request, response, chain);
+    }
+
+    @Test
+    public void ipAndUserAgentAreHashedWhenRequested() throws IOException, 
ServletException {
+        MockHttpServletRequest request = new MockHttpServletRequest();
+        request.setRemoteAddr("1.2.3.4");
+        request.addHeader("User-Agent", "UA1");
+
+        doAnswer(invocation -> {
+            assertThat(MDC.get("IPAddress"), 
equalTo("6694f83c9f476da31f5df6bcc520034e7e57d421d247b9d34f49edbfc84a764c"));
+            assertThat(MDC.get("UserAgent"), 
equalTo("e8d481b47ccc0c1ac1aee2a700cebfc64be03fb61d9c5e0135fea429c9e3de57"));
+            return null;
+        }).when(chain).doFilter(request, response);
+
+
+        filter.init(getMockFilterConfig(true));
+        filter.doFilter(request, response, chain);
+    }
+
+    @Test
+    public void ipAndUserAgentAreCleanedUp() throws IOException, 
ServletException {
+        MockHttpServletRequest request = new MockHttpServletRequest();
+        request.setRemoteAddr("1.2.3.4");
+        request.addHeader("User-Agent", "UA1");
+
+        filter.init(getMockFilterConfig(false));
+        filter.doFilter(request, response, chain);
+
+        assertNull(MDC.get("IPAddress"));
+        assertNull(MDC.get("UserAgent"));
+    }
+
+    @Test
+    public void nonHttpRequestsDontCauseIssues() throws IOException, 
ServletException {
+        ServletRequest request = mock(ServletRequest.class);
+        doAnswer(invocation -> {
+            assertNull(MDC.get("UserAgent"));
+            return null;
+        }).when(chain).doFilter(request, response);
+
+        filter.init(getMockFilterConfig(false));
+        filter.doFilter(request, response, chain);
+    }
+
+
+    private MockFilterConfig getMockFilterConfig(boolean hashPrivateData) {
+        MockFilterConfig filterConfig = new MockFilterConfig();
+        filterConfig.addInitParameter("hash-private-data", 
Boolean.toString(hashPrivateData));
+        return filterConfig;
+    }
+
+}
diff --git a/war/src/main/resources/logback.xml 
b/war/src/main/resources/logback.xml
index 17370c8..16feb45 100644
--- a/war/src/main/resources/logback.xml
+++ b/war/src/main/resources/logback.xml
@@ -2,7 +2,7 @@
 <configuration>
     <appender name="STDOUT" class="ch.qos.logback.core.ConsoleAppender">
         <encoder class="ch.qos.logback.classic.encoder.PatternLayoutEncoder">
-            <pattern>%d{HH:mm:ss.SSS} [%thread] %-5level %logger{36} - 
%msg%n</pattern>
+            <pattern>%d{HH:mm:ss.SSS} [%thread] %-5level %logger{36} 
IP:%X{IPAddress} UA:%X{UserAgent} - %msg%n</pattern>
             <outputPatternAsHeader>true</outputPatternAsHeader>
         </encoder>
     </appender>
diff --git a/war/src/main/webapp/WEB-INF/web.xml 
b/war/src/main/webapp/WEB-INF/web.xml
index b6f4484..d6a33e0 100644
--- a/war/src/main/webapp/WEB-INF/web.xml
+++ b/war/src/main/webapp/WEB-INF/web.xml
@@ -86,13 +86,21 @@
   </listener>
   <filter>
       <filter-name>real-ip-filter</filter-name>
-      
<filter-class>org.wikidata.query.rdf.blazegraph.throttling.ClientIPFilter</filter-class>
+      
<filter-class>org.wikidata.query.rdf.blazegraph.filters.ClientIPFilter</filter-class>
   </filter>
   <filter-mapping>
     <filter-name>real-ip-filter</filter-name>
     <url-pattern>/*</url-pattern>
   </filter-mapping>
   <filter>
+    <filter-name>logging-filter</filter-name>
+    
<filter-class>org.wikidata.query.rdf.blazegraph.filters.LoggingFilter</filter-class>
+  </filter>
+  <filter-mapping>
+    <filter-name>logging-filter</filter-name>
+    <url-pattern>/*</url-pattern>
+  </filter-mapping>
+  <filter>
       <filter-name>throttling-filter</filter-name>
       
<filter-class>org.wikidata.query.rdf.blazegraph.throttling.ThrottlingFilter</filter-class>
   </filter>

-- 
To view, visit https://gerrit.wikimedia.org/r/368777
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: I7aaa519b7e1137fd572b8a0a5a164896c9be819c
Gerrit-PatchSet: 1
Gerrit-Project: wikidata/query/rdf
Gerrit-Branch: master
Gerrit-Owner: Gehel <[email protected]>

_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to