Gehel has uploaded a new change for review. (
https://gerrit.wikimedia.org/r/368777 )
Change subject: add user-agent and IP address to logs
......................................................................
add user-agent and IP address to logs
A servlet Filter adds user-agent and IP address to the logging MDC
(Mapped Diagnostic Contexts). The logback configuration is updated to expose
those new fields on each log message.
Change-Id: I7aaa519b7e1137fd572b8a0a5a164896c9be819c
---
M blazegraph/pom.xml
A blazegraph/pom.xml.bak
R
blazegraph/src/main/java/org/wikidata/query/rdf/blazegraph/filters/ClientIPFilter.java
A
blazegraph/src/main/java/org/wikidata/query/rdf/blazegraph/filters/LoggingFilter.java
R
blazegraph/src/test/java/org/wikidata/query/rdf/blazegraph/filters/ClientIPFilterTest.java
A
blazegraph/src/test/java/org/wikidata/query/rdf/blazegraph/filters/LoggingFilterTest.java
M war/src/main/resources/logback.xml
M war/src/main/webapp/WEB-INF/web.xml
8 files changed, 411 insertions(+), 9 deletions(-)
git pull ssh://gerrit.wikimedia.org:29418/wikidata/query/rdf
refs/changes/77/368777/1
diff --git a/blazegraph/pom.xml b/blazegraph/pom.xml
index 5318026..6cddedd 100644
--- a/blazegraph/pom.xml
+++ b/blazegraph/pom.xml
@@ -116,6 +116,12 @@
<scope>provided</scope>
</dependency>
<dependency>
+ <!-- testing the LoggingFilter requires a "real" logging framework -->
+ <groupId>ch.qos.logback</groupId>
+ <artifactId>logback-classic</artifactId>
+ <scope>test</scope>
+ </dependency>
+ <dependency>
<groupId>org.hamcrest</groupId>
<artifactId>hamcrest-core</artifactId>
<scope>test</scope>
@@ -133,11 +139,6 @@
<dependency>
<groupId>org.slf4j</groupId>
<artifactId>jcl-over-slf4j</artifactId>
- <scope>test</scope>
- </dependency>
- <dependency>
- <groupId>org.slf4j</groupId>
- <artifactId>slf4j-simple</artifactId>
<scope>test</scope>
</dependency>
<dependency>
diff --git a/blazegraph/pom.xml.bak b/blazegraph/pom.xml.bak
new file mode 100644
index 0000000..932fdcb
--- /dev/null
+++ b/blazegraph/pom.xml.bak
@@ -0,0 +1,186 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0
http://maven.apache.org/maven-v4_0_0.xsd">
+ <modelVersion>4.0.0</modelVersion>
+ <parent>
+ <groupId>org.wikidata.query.rdf</groupId>
+ <artifactId>parent</artifactId>
+ <version>0.2.5-SNAPSHOT</version>
+ </parent>
+ <artifactId>blazegraph</artifactId>
+ <packaging>jar</packaging>
+
+ <name>Blazegraph extension to improve performance for Wikibase</name>
+ <licenses>
+ <!-- To be compatible with Blazegraph -->
+ <license>
+ <name>GNU General Public License Version 2 (GPLv2)</name>
+ <url>http://www.gnu.org/licenses/gpl-2.0.html</url>
+ </license>
+ </licenses>
+
+ <dependencies>
+ <dependency>
+ <groupId>com.blazegraph</groupId>
+ <artifactId>bigdata-cache</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>com.blazegraph</groupId>
+ <artifactId>bigdata-client</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>com.blazegraph</groupId>
+ <artifactId>bigdata-common-util</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>com.blazegraph</groupId>
+ <artifactId>bigdata-core</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>com.blazegraph</groupId>
+ <artifactId>bigdata-util</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>com.blazegraph</groupId>
+ <artifactId>ctc-striterators</artifactId>
+ </dependency>
+ <!-- Blazegraph needs http client to run services. -->
+ <dependency>
+ <groupId>com.fasterxml.jackson.core</groupId>
+ <artifactId>jackson-core</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>com.fasterxml.jackson.core</groupId>
+ <artifactId>jackson-databind</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>com.google.code.gson</groupId>
+ <artifactId>gson</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>com.google.guava</groupId>
+ <artifactId>guava</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.jena</groupId>
+ <artifactId>jena-core</artifactId>
+ </dependency>
+ <dependency>
+ <!-- Blazegraph needs http client to run services. -->
+ <groupId>org.eclipse.jetty</groupId>
+ <artifactId>jetty-client</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>org.eclipse.jetty</groupId>
+ <artifactId>jetty-http</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>org.isomorphism</groupId>
+ <artifactId>token-bucket</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>org.linkeddatafragments</groupId>
+ <artifactId>ldfserver</artifactId>
+ <classifier>classes</classifier>
+ </dependency>
+ <dependency>
+ <groupId>org.openrdf.sesame</groupId>
+ <artifactId>sesame-model</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>org.openrdf.sesame</groupId>
+ <artifactId>sesame-query</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>org.openrdf.sesame</groupId>
+ <artifactId>sesame-queryalgebra-evaluation</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>org.openrdf.sesame</groupId>
+ <artifactId>sesame-queryresultio-api</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>org.slf4j</groupId>
+ <artifactId>slf4j-api</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>org.wikidata.query.rdf</groupId>
+ <artifactId>common</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>xml-apis</groupId>
+ <artifactId>xml-apis</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>javax.servlet</groupId>
+ <artifactId>javax.servlet-api</artifactId>
+ <scope>provided</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.hamcrest</groupId>
+ <artifactId>hamcrest-core</artifactId>
+ <scope>test</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.hamcrest</groupId>
+ <artifactId>hamcrest-library</artifactId>
+ <scope>test</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.mockito</groupId>
+ <artifactId>mockito-core</artifactId>
+ <scope>test</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.slf4j</groupId>
+ <artifactId>jcl-over-slf4j</artifactId>
+ <scope>test</scope>
+ </dependency>
+ <dependency>
+ <!-- testing the LoggingFilter requires a "real" logging framework -->
+ <groupId>ch.qos.logback</groupId>
+ <artifactId>logback-classic</artifactId>
+ <scope>test</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.springframework</groupId>
+ <artifactId>spring-test</artifactId>
+ <scope>test</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.wikidata.query.rdf</groupId>
+ <artifactId>testTools</artifactId>
+ <scope>test</scope>
+ </dependency>
+ </dependencies>
+
+ <build>
+ <finalName>wikidata-query-blazegraph-${project.version}</finalName>
+ <testResources>
+ <testResource>
+ <directory>${project.basedir}/src/test/resources</directory>
+ </testResource>
+ </testResources>
+ <plugins>
+ <plugin>
+ <groupId>com.carrotsearch.randomizedtesting</groupId>
+ <artifactId>junit4-maven-plugin</artifactId>
+ </plugin>
+ <plugin>
+ <groupId>org.apache.maven.plugins</groupId>
+ <artifactId>maven-checkstyle-plugin</artifactId>
+ </plugin>
+ <plugin>
+ <groupId>org.apache.maven.plugins</groupId>
+ <artifactId>maven-jar-plugin</artifactId>
+ </plugin>
+ <plugin>
+ <groupId>org.apache.maven.plugins</groupId>
+ <artifactId>maven-javadoc-plugin</artifactId>
+ </plugin>
+ <plugin>
+ <groupId>org.apache.maven.plugins</groupId>
+ <artifactId>maven-source-plugin</artifactId>
+ </plugin>
+ </plugins>
+ </build>
+</project>
diff --git
a/blazegraph/src/main/java/org/wikidata/query/rdf/blazegraph/throttling/ClientIPFilter.java
b/blazegraph/src/main/java/org/wikidata/query/rdf/blazegraph/filters/ClientIPFilter.java
similarity index 97%
rename from
blazegraph/src/main/java/org/wikidata/query/rdf/blazegraph/throttling/ClientIPFilter.java
rename to
blazegraph/src/main/java/org/wikidata/query/rdf/blazegraph/filters/ClientIPFilter.java
index ce2e67e..9d3211c 100644
---
a/blazegraph/src/main/java/org/wikidata/query/rdf/blazegraph/throttling/ClientIPFilter.java
+++
b/blazegraph/src/main/java/org/wikidata/query/rdf/blazegraph/filters/ClientIPFilter.java
@@ -1,4 +1,4 @@
-package org.wikidata.query.rdf.blazegraph.throttling;
+package org.wikidata.query.rdf.blazegraph.filters;
import javax.servlet.Filter;
import javax.servlet.FilterChain;
diff --git
a/blazegraph/src/main/java/org/wikidata/query/rdf/blazegraph/filters/LoggingFilter.java
b/blazegraph/src/main/java/org/wikidata/query/rdf/blazegraph/filters/LoggingFilter.java
new file mode 100644
index 0000000..c557b3a
--- /dev/null
+++
b/blazegraph/src/main/java/org/wikidata/query/rdf/blazegraph/filters/LoggingFilter.java
@@ -0,0 +1,94 @@
+package org.wikidata.query.rdf.blazegraph.filters;
+
+import com.google.common.base.Function;
+import org.slf4j.MDC;
+
+import javax.servlet.Filter;
+import javax.servlet.FilterChain;
+import javax.servlet.FilterConfig;
+import javax.servlet.ServletException;
+import javax.servlet.ServletRequest;
+import javax.servlet.ServletResponse;
+import javax.servlet.http.HttpServletRequest;
+import java.io.Closeable;
+import java.io.IOException;
+
+import static com.google.common.base.Charsets.UTF_8;
+import static com.google.common.base.MoreObjects.firstNonNull;
+import static com.google.common.hash.Hashing.sha256;
+import static java.lang.Boolean.parseBoolean;
+
+/**
+ * Add user-agent and IP address to the logging MDC.
+ *
+ * The Mapped Diagnostic Context (MDC) is a way to expose context to the
+ * logging framework, which can then be printed with each log message.
+ *
+ * See <a href="https://logback.qos.ch/manual/mdc.html">the logback
+ * documentation</a> for details on MDC.
+ */
+public class LoggingFilter implements Filter {
+
+ /** Rewrite function.
+ *
+ * This function is applied to private information to obfuscate it. It
+ * should be either a NOOP (expose this private info as-is), or a hash
+ * function (SHA256).
+ */
+ private Function<String, String> rewrite;
+
+ /**
+ * Configure the filter.
+ *
+ * The filter config parameter <code>hash-private-data</code> is used to
+ * enable hashing of private data with SHA256. This reduces the amount of
+ * private information available in the logs, while still allowing some
+ * tracking of issues if / when they arise.
+ *
+ * @param filterConfig
+ * @throws ServletException
+ */
+ @Override
+ public void init(FilterConfig filterConfig) throws ServletException {
+ String hashPrivateData =
firstNonNull(filterConfig.getInitParameter("hash-private-data"), "true");
+ if (parseBoolean(hashPrivateData)) {
+ this.rewrite = (String s) -> sha256().hashString(s,
UTF_8).toString();
+ } else {
+ this.rewrite = (String s) -> s;
+ }
+ }
+
+ /**
+ * Add user-agent and IP address to the logging MDC.
+ *
+ * @param request
+ * @param response
+ * @param chain
+ * @throws IOException
+ * @throws ServletException
+ */
+ @Override
+ public void doFilter(ServletRequest request, ServletResponse response,
FilterChain chain) throws IOException, ServletException {
+ String remoteAddr = request.getRemoteAddr();
+ String userAgent = null;
+ if (request instanceof HttpServletRequest) {
+ HttpServletRequest httpRequest = (HttpServletRequest) request;
+ userAgent = httpRequest.getHeader("User-Agent");
+ }
+
+ try (
+ Closeable mdcIPAddress = MDC.putCloseable("IPAddress",
rewrite.apply(remoteAddr));
+ Closeable mdcUserAgent = MDC.putCloseable("UserAgent",
rewrite.apply(userAgent))
+ ) {
+ chain.doFilter(request, response);
+ }
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ @Override
+ public void destroy() {
+ // do nothing
+ }
+}
diff --git
a/blazegraph/src/test/java/org/wikidata/query/rdf/blazegraph/throttling/ClientIPFilterTest.java
b/blazegraph/src/test/java/org/wikidata/query/rdf/blazegraph/filters/ClientIPFilterTest.java
similarity index 97%
rename from
blazegraph/src/test/java/org/wikidata/query/rdf/blazegraph/throttling/ClientIPFilterTest.java
rename to
blazegraph/src/test/java/org/wikidata/query/rdf/blazegraph/filters/ClientIPFilterTest.java
index 7001ac8..56c837c 100644
---
a/blazegraph/src/test/java/org/wikidata/query/rdf/blazegraph/throttling/ClientIPFilterTest.java
+++
b/blazegraph/src/test/java/org/wikidata/query/rdf/blazegraph/filters/ClientIPFilterTest.java
@@ -1,4 +1,4 @@
-package org.wikidata.query.rdf.blazegraph.throttling;
+package org.wikidata.query.rdf.blazegraph.filters;
import org.junit.Test;
import org.junit.runner.RunWith;
diff --git
a/blazegraph/src/test/java/org/wikidata/query/rdf/blazegraph/filters/LoggingFilterTest.java
b/blazegraph/src/test/java/org/wikidata/query/rdf/blazegraph/filters/LoggingFilterTest.java
new file mode 100644
index 0000000..4c89f79
--- /dev/null
+++
b/blazegraph/src/test/java/org/wikidata/query/rdf/blazegraph/filters/LoggingFilterTest.java
@@ -0,0 +1,113 @@
+package org.wikidata.query.rdf.blazegraph.filters;
+
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.mockito.Mock;
+import org.mockito.runners.MockitoJUnitRunner;
+import org.slf4j.MDC;
+import org.springframework.mock.web.MockFilterConfig;
+import org.springframework.mock.web.MockHttpServletRequest;
+
+import javax.servlet.FilterChain;
+import javax.servlet.ServletException;
+import javax.servlet.ServletRequest;
+import javax.servlet.ServletResponse;
+import java.io.IOException;
+
+import static org.hamcrest.CoreMatchers.equalTo;
+import static org.hamcrest.MatcherAssert.assertThat;
+import static org.junit.Assert.assertNull;
+import static org.mockito.Mockito.doAnswer;
+import static org.mockito.Mockito.mock;
+
+@RunWith(MockitoJUnitRunner.class)
+public class LoggingFilterTest {
+
+ private LoggingFilter filter = new LoggingFilter();
+ @Mock private ServletResponse response;
+ @Mock private FilterChain chain;
+
+ @Test
+ public void ipAndUserAgentAreAddedToMDC() throws IOException,
ServletException {
+ MockHttpServletRequest request = new MockHttpServletRequest();
+ request.setRemoteAddr("1.2.3.4");
+ request.addHeader("User-Agent", "UA1");
+
+ doAnswer(invocation -> {
+ assertThat(MDC.get("IPAddress"), equalTo("1.2.3.4"));
+ assertThat(MDC.get("UserAgent"), equalTo("UA1"));
+ return null;
+ }).when(chain).doFilter(request, response);
+
+
+ filter.init(getMockFilterConfig(false));
+ filter.doFilter(request, response, chain);
+ }
+
+ @Test
+ public void nullIpAndUserAgentDontCauseIssues() throws IOException,
ServletException {
+ MockHttpServletRequest request = new MockHttpServletRequest();
+ request.setRemoteAddr(null);
+ // dont set User-Agent header
+
+ doAnswer(invocation -> {
+ assertNull(MDC.get("IPAddress"));
+ assertNull(MDC.get("UserAgent"));
+ return null;
+ }).when(chain).doFilter(request, response);
+
+
+ filter.init(getMockFilterConfig(false));
+ filter.doFilter(request, response, chain);
+ }
+
+ @Test
+ public void ipAndUserAgentAreHashedWhenRequested() throws IOException,
ServletException {
+ MockHttpServletRequest request = new MockHttpServletRequest();
+ request.setRemoteAddr("1.2.3.4");
+ request.addHeader("User-Agent", "UA1");
+
+ doAnswer(invocation -> {
+ assertThat(MDC.get("IPAddress"),
equalTo("6694f83c9f476da31f5df6bcc520034e7e57d421d247b9d34f49edbfc84a764c"));
+ assertThat(MDC.get("UserAgent"),
equalTo("e8d481b47ccc0c1ac1aee2a700cebfc64be03fb61d9c5e0135fea429c9e3de57"));
+ return null;
+ }).when(chain).doFilter(request, response);
+
+
+ filter.init(getMockFilterConfig(true));
+ filter.doFilter(request, response, chain);
+ }
+
+ @Test
+ public void ipAndUserAgentAreCleanedUp() throws IOException,
ServletException {
+ MockHttpServletRequest request = new MockHttpServletRequest();
+ request.setRemoteAddr("1.2.3.4");
+ request.addHeader("User-Agent", "UA1");
+
+ filter.init(getMockFilterConfig(false));
+ filter.doFilter(request, response, chain);
+
+ assertNull(MDC.get("IPAddress"));
+ assertNull(MDC.get("UserAgent"));
+ }
+
+ @Test
+ public void nonHttpRequestsDontCauseIssues() throws IOException,
ServletException {
+ ServletRequest request = mock(ServletRequest.class);
+ doAnswer(invocation -> {
+ assertNull(MDC.get("UserAgent"));
+ return null;
+ }).when(chain).doFilter(request, response);
+
+ filter.init(getMockFilterConfig(false));
+ filter.doFilter(request, response, chain);
+ }
+
+
+ private MockFilterConfig getMockFilterConfig(boolean hashPrivateData) {
+ MockFilterConfig filterConfig = new MockFilterConfig();
+ filterConfig.addInitParameter("hash-private-data",
Boolean.toString(hashPrivateData));
+ return filterConfig;
+ }
+
+}
diff --git a/war/src/main/resources/logback.xml
b/war/src/main/resources/logback.xml
index 17370c8..16feb45 100644
--- a/war/src/main/resources/logback.xml
+++ b/war/src/main/resources/logback.xml
@@ -2,7 +2,7 @@
<configuration>
<appender name="STDOUT" class="ch.qos.logback.core.ConsoleAppender">
<encoder class="ch.qos.logback.classic.encoder.PatternLayoutEncoder">
- <pattern>%d{HH:mm:ss.SSS} [%thread] %-5level %logger{36} -
%msg%n</pattern>
+ <pattern>%d{HH:mm:ss.SSS} [%thread] %-5level %logger{36}
IP:%X{IPAddress} UA:%X{UserAgent} - %msg%n</pattern>
<outputPatternAsHeader>true</outputPatternAsHeader>
</encoder>
</appender>
diff --git a/war/src/main/webapp/WEB-INF/web.xml
b/war/src/main/webapp/WEB-INF/web.xml
index b6f4484..d6a33e0 100644
--- a/war/src/main/webapp/WEB-INF/web.xml
+++ b/war/src/main/webapp/WEB-INF/web.xml
@@ -86,13 +86,21 @@
</listener>
<filter>
<filter-name>real-ip-filter</filter-name>
-
<filter-class>org.wikidata.query.rdf.blazegraph.throttling.ClientIPFilter</filter-class>
+
<filter-class>org.wikidata.query.rdf.blazegraph.filters.ClientIPFilter</filter-class>
</filter>
<filter-mapping>
<filter-name>real-ip-filter</filter-name>
<url-pattern>/*</url-pattern>
</filter-mapping>
<filter>
+ <filter-name>logging-filter</filter-name>
+
<filter-class>org.wikidata.query.rdf.blazegraph.filters.LoggingFilter</filter-class>
+ </filter>
+ <filter-mapping>
+ <filter-name>logging-filter</filter-name>
+ <url-pattern>/*</url-pattern>
+ </filter-mapping>
+ <filter>
<filter-name>throttling-filter</filter-name>
<filter-class>org.wikidata.query.rdf.blazegraph.throttling.ThrottlingFilter</filter-class>
</filter>
--
To view, visit https://gerrit.wikimedia.org/r/368777
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: newchange
Gerrit-Change-Id: I7aaa519b7e1137fd572b8a0a5a164896c9be819c
Gerrit-PatchSet: 1
Gerrit-Project: wikidata/query/rdf
Gerrit-Branch: master
Gerrit-Owner: Gehel <[email protected]>
_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits