lewismc closed pull request #32: SDAP-120 Error trying to ingest logs
URL: https://github.com/apache/incubator-sdap-mudrod/pull/32
 
 
   

This is a PR merged from a forked repository.
As GitHub hides the original diff on merge, it is displayed below for
the sake of provenance:

As this is a foreign pull request (from a fork), the diff is supplied
below (as it won't show otherwise due to GitHub magic):

diff --git 
a/core/src/test/java/org/apache/sdap/mudrod/weblog/structure/TestApacheAccessLog.java
 
b/core/src/test/java/org/apache/sdap/mudrod/weblog/structure/TestApacheAccessLog.java
new file mode 100644
index 0000000..e1453a5
--- /dev/null
+++ 
b/core/src/test/java/org/apache/sdap/mudrod/weblog/structure/TestApacheAccessLog.java
@@ -0,0 +1,55 @@
+/*
+ * Licensed under the Apache License, Version 2.0 (the "License"); you
+ * may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.sdap.mudrod.weblog.structure;
+
+import org.junit.BeforeClass;
+import org.junit.Test;
+
+import java.io.FileInputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.net.URL;
+import java.text.ParseException;
+import java.util.Properties;
+
+import static org.junit.Assert.assertNotEquals;
+
+
+public class TestApacheAccessLog {
+
+    private static Properties testProperties = new Properties();
+
+    @BeforeClass
+    public static void loadProperties() throws IOException {
+
+        URL configURL = 
ClassLoader.getSystemClassLoader().getResource("config.properties");
+
+        assert configURL != null : "Could not load config.properties";
+        try (InputStream instream = new FileInputStream(configURL.getFile())) {
+            testProperties.load(instream);
+        }
+    }
+
+    @Test
+    public void testLogMatch() throws IOException, ParseException {
+
+
+        String testLogLine = "198.118.243.84 - - [31/Dec/2017:23:59:20 +0000] 
\"GET 
/events?page=12&amp%25252525252525252525252525252525252525253Bsort=asc&order=field_location&sort=desc
 HTTP/1.1\" 200 86173";
+
+        String result = ApacheAccessLog.parseFromLogLine(testLogLine, 
testProperties);
+
+        assertNotEquals("Log line does not match", "{}", result);
+    }
+}
diff --git a/core/src/test/resources/config.properties 
b/core/src/test/resources/config.properties
new file mode 100644
index 0000000..4c8991e
--- /dev/null
+++ b/core/src/test/resources/config.properties
@@ -0,0 +1,74 @@
+# Licensed under the Apache License, Version 2.0 (the "License"); you 
+# may not use this file except in compliance with the License. 
+# You may obtain a copy of the License at
+# 
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Database configuration
+mudrod.cluster.name=MudrodES
+mudrod.es.transport.tcp.port = 9300
+mudrod.es.unicast.hosts = 127.0.0.1
+mudrod.es.http.port = 9200
+mudrod.es.index = mudrod
+    
+# Spark related
+# Log processing type. Possible values include 'sequential' or 'parallel'
+mudrod.processing.type = parallel
+mudrod.spark.app.name = MudrodSparkApp
+mudrod.spark.master = local[4]
+mudrod.spark.optimize = repartition
+    
+# Web log processing configuration
+# index name has to be all lowercase
+mudrod.log.index = log
+mudrod.ftp.prefix = FTP.
+mudrod.http.prefix = WWW.
+mudrod.base.url = http://podaac.jpl.nasa.gov
+mudrod.black.request.list = .js, .css, .jpg, .png, .ico, image_captcha, 
autocomplete, .gif, /alldata/, /api/, get / http/1.1, .jpeg, /ws/
+mudrod.black.agent.list = crawler, googlebot, bingbot, slurp, yacybot, 
rogerbot, yandexbot, -, apache-httpclient, java, curl
+mudrod.search.freq = 100
+mudrod.view.freq = 200
+mudrod.download.freq = 100
+mudrod.request.rate = 30
+mudrod.session.port = 8080
+mudrod.session.url = /mudrod-service/session.html
+mudrod.request.time.gap = 600   
+mudrod.view.url.marker = /dataset/
+mudrod.search.url.marker = /datasetlist?
+# In order to better parse a URL (getting searching keyword, etc.), please 
consider custimize 
+# org.apache.sdap.mudrod.weblog.structure.RequestUrl - GetSearchInfo, 
getFilterInfo
+       
+# User search history
+mudrod.query.min = 0
+mudrod.user.history.weight = 2
+       
+# clickstream
+mudrod.download.weight = 3
+mudrod.clickstream.svd.d = 50
+mudrod.clickstream.weight = 2
+                               
+# metadata
+mudrod.metadata.download = 0
+mudrod.metadata.download.url = 
https://podaac.jpl.nasa.gov/api/dataset?startIndex=$startIndex&entries=10&sortField=Dataset-AllTimePopularity&sortOrder=asc&id=&value=&search=
+mudrod.metadata.svd.d = 50
+mudrod.metadata.url = null
+mudrod.metadata.weight = 1
+mudrod.metadata.type = RawMetadata
+               
+# ranking, ${svmSgdModel.value} is resolved at build time. See the property in 
core/pom.xml for the value
+mudrod.ranking.machine.learning = 1
+mudrod.ranking.model = ${svmSgdModel.value}.zip
+               
+# recommendation
+mudrod.metadata.id = Dataset-ShortName
+mudrod.metadata.semantic.fields = 
DatasetParameter-Term,DatasetParameter-Variable,Dataset-ExtractTerm
+
+# ontology service implementation. Possible values include EsipPortal - 
EsipPortalOntology EsipCOR - EsipCOROntology Local - 
org.apache.sdap.mudrod.ontology.process.Local
+mudrod.ontology.implementation = Local
+mudrod.ontology.weight = 2


 

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
[email protected]


With regards,
Apache Git Services

Reply via email to