Author: gates
Date: Thu Oct 30 09:56:48 2008
New Revision: 709206

URL: http://svn.apache.org/viewvc?rev=709206&view=rev
Log:
PIG-509: Added CombinedLogLoader, loads logs that were created using Apache's 
combined log format.

Added:
    
hadoop/pig/trunk/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/storage/apachelog/CombinedLogLoader.java
    
hadoop/pig/trunk/contrib/piggybank/java/src/test/java/org/apache/pig/piggybank/test/storage/TestCombinedLogLoader.java
Modified:
    hadoop/pig/trunk/CHANGES.txt

Modified: hadoop/pig/trunk/CHANGES.txt
URL: 
http://svn.apache.org/viewvc/hadoop/pig/trunk/CHANGES.txt?rev=709206&r1=709205&r2=709206&view=diff
==============================================================================
--- hadoop/pig/trunk/CHANGES.txt (original)
+++ hadoop/pig/trunk/CHANGES.txt Thu Oct 30 09:56:48 2008
@@ -379,3 +379,6 @@
        gates).
 
     move to hadoop
+
+       PIG-509: Added CombinedLogLoader, loads logs that were created using
+       Apache's combined log format (spackest via gates).

Added: 
hadoop/pig/trunk/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/storage/apachelog/CombinedLogLoader.java
URL: 
http://svn.apache.org/viewvc/hadoop/pig/trunk/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/storage/apachelog/CombinedLogLoader.java?rev=709206&view=auto
==============================================================================
--- 
hadoop/pig/trunk/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/storage/apachelog/CombinedLogLoader.java
 (added)
+++ 
hadoop/pig/trunk/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/storage/apachelog/CombinedLogLoader.java
 Thu Oct 30 09:56:48 2008
@@ -0,0 +1,45 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more 
contributor license agreements. See the
+ * NOTICE file distributed with this work for additional information regarding 
copyright ownership. The ASF
+ * licenses this file to you under the Apache License, Version 2.0 (the 
"License"); you may not use this file
+ * except in compliance with the License. You may obtain a copy of the License 
at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software 
distributed under the License is
+ * distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 
KIND, either express or implied.
+ * See the License for the specific language governing permissions and 
limitations under the License.
+ */
+
+package org.apache.pig.piggybank.storage.apachelog;
+
+import java.util.regex.Pattern;
+
+import org.apache.pig.piggybank.storage.RegExLoader;
+
+/**
+ * CombinedLogLoader is used to load logs based on Apache's combined log 
format, based on a format like
+ * 
+ * LogFormat "%h %l %u %t \"%r\" %>s %b \"%{Referer}i\" \"%{User-Agent}i\"" 
combined
+ * 
+ * The log filename ends up being access_log from a line like
+ * 
+ * CustomLog logs/combined_log combined
+ * 
+ * Example:
+ * 
+ * raw = LOAD 'combined_log' USING 
org.apache.pig.piggybank.storage.apachelog.CombinedLogLoader AS
+ * (remoteAddr, remoteLogname, user, time, method, uri, proto, status, bytes, 
referer, userAgent);
+ * 
+ */
+
+public class CombinedLogLoader extends RegExLoader {
+    // 1.2.3.4 - - [30/Sep/2008:15:07:53 -0400] "GET / HTTP/1.1" 200 3190 "-"
+    // "Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_5_4; en-us) 
AppleWebKit/525.18 (KHTML, like Gecko) Version/3.1.2 Safari/525.20.1"
+    private final static Pattern combinedLogPattern = Pattern
+        
.compile("^(\\S+)\\s+(\\S+)\\s+(\\S+)\\s+.(\\S+\\s+\\S+).\\s+\"(\\S+)\\s+(.+?)\\s+(HTTP[^\"]+)\"\\s+(\\S+)\\s+(\\S+)\\s+\"([^\"]*)\"\\s+\"(.*)\"$");
+
+    public Pattern getPattern() {
+        return combinedLogPattern;
+    }
+}

Added: 
hadoop/pig/trunk/contrib/piggybank/java/src/test/java/org/apache/pig/piggybank/test/storage/TestCombinedLogLoader.java
URL: 
http://svn.apache.org/viewvc/hadoop/pig/trunk/contrib/piggybank/java/src/test/java/org/apache/pig/piggybank/test/storage/TestCombinedLogLoader.java?rev=709206&view=auto
==============================================================================
--- 
hadoop/pig/trunk/contrib/piggybank/java/src/test/java/org/apache/pig/piggybank/test/storage/TestCombinedLogLoader.java
 (added)
+++ 
hadoop/pig/trunk/contrib/piggybank/java/src/test/java/org/apache/pig/piggybank/test/storage/TestCombinedLogLoader.java
 Thu Oct 30 09:56:48 2008
@@ -0,0 +1,126 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more 
contributor license agreements. See the
+ * NOTICE file distributed with this work for additional information regarding 
copyright ownership. The ASF
+ * licenses this file to you under the Apache License, Version 2.0 (the 
"License"); you may not use this file
+ * except in compliance with the License. You may obtain a copy of the License 
at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software 
distributed under the License is
+ * distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 
KIND, either express or implied.
+ * See the License for the specific language governing permissions and 
limitations under the License.
+ */
+
+package org.apache.pig.piggybank.test.storage;
+
+import java.io.InputStream;
+import java.util.ArrayList;
+import java.util.Iterator;
+import java.util.Properties;
+
+import junit.framework.TestCase;
+
+import org.apache.pig.PigServer;
+import org.apache.pig.PigServer.ExecType;
+import org.apache.pig.data.Tuple;
+import org.apache.pig.impl.PigContext;
+import org.apache.pig.impl.io.BufferedPositionedInputStream;
+import org.apache.pig.impl.io.FileLocalizer;
+import org.apache.pig.piggybank.storage.apachelog.CombinedLogLoader;
+import org.junit.Test;
+
+public class TestCombinedLogLoader extends TestCase {
+    public static ArrayList<String[]> data = new ArrayList<String[]>();
+    static {
+        data.add(new String[] { "1.2.3.4", "-", "-", "[01/Jan/2008:23:27:45 
-0600]", "\"GET /zero.html HTTP/1.0\"", "200", "100", "\"-\"",
+            "\"Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_5_4; en-us) 
AppleWebKit/525.18 (KHTML, like Gecko) Version/3.1.2 Safari/525.20.1\"" });
+        data.add(new String[] { "1.2.3.4", "-", "-", "[01/Jan/2008:23:27:45 
-0600]", "\"GET /zero.html HTTP/1.0\"", "200", "100",
+            "\"http://myreferringsite.com\"";,
+            "\"Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_5_4; en-us) 
AppleWebKit/525.18 (KHTML, like Gecko) Version/3.1.2 Safari/525.20.1\"" });
+        data.add(new String[] { "1.2.3.4", "-", "-", "[01/Jan/2008:23:27:45 
-0600]", "\"GET /zero.html HTTP/1.0\"", "200", "100", "\"-\"",
+            "\"Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_5_4; en-us) 
AppleWebKit/525.18 (KHTML, like Gecko) Version/3.1.2 Safari/525.20.1\"" });
+    }
+
+    public static ArrayList<String[]> EXPECTED = new ArrayList<String[]>();
+    static {
+
+        for (int i = 0; i < data.size(); i++) {
+            ArrayList<String> thisExpected = new ArrayList<String>();
+            for (int j = 0; j <= 2; j++) {
+                thisExpected.add(data.get(i)[j]);
+            }
+            String temp = data.get(i)[3];
+            temp = temp.replace("[", "");
+            temp = temp.replace("]", "");
+            thisExpected.add(temp);
+
+            temp = data.get(i)[4];
+
+            for (String thisOne : data.get(i)[4].split(" ")) {
+                thisOne = thisOne.replace("\"", "");
+                thisExpected.add(thisOne);
+            }
+            for (int j = 5; j <= 6; j++) {
+                thisExpected.add(data.get(i)[j]);
+            }
+            for (int j = 7; j <= 8; j++) {
+                String thisOne = data.get(i)[j];
+                thisOne = thisOne.replace("\"", "");
+                thisExpected.add(thisOne);
+            }
+
+            String[] toAdd = new String[0];
+            toAdd = (String[]) (thisExpected.toArray(toAdd));
+            EXPECTED.add(toAdd);
+        }
+    }
+
+    @Test
+    public void testInstantiation() {
+        CombinedLogLoader combinedLogLoader = new CombinedLogLoader();
+        assertNotNull(combinedLogLoader);
+    }
+
+    @Test
+    public void testLoadFromBindTo() throws Exception {
+        String filename = TestHelper.createTempFile(data, " ");
+        CombinedLogLoader combinedLogLoader = new CombinedLogLoader();
+        PigContext pigContext = new PigContext(ExecType.LOCAL, new 
Properties());
+        InputStream inputStream = FileLocalizer.open(filename, pigContext);
+        combinedLogLoader.bindTo(filename, new 
BufferedPositionedInputStream(inputStream), 0, Long.MAX_VALUE);
+
+        int tupleCount = 0;
+
+        while (true) {
+            Tuple tuple = combinedLogLoader.getNext();
+            if (tuple == null)
+                break;
+            else {
+                TestHelper.examineTuple(EXPECTED, tuple, tupleCount);
+                tupleCount++;
+            }
+        }
+        assertEquals(data.size(), tupleCount);
+    }
+
+    public void testLoadFromPigServer() throws Exception {
+        String filename = TestHelper.createTempFile(data, " ");
+        PigServer pig = new PigServer(ExecType.LOCAL);
+        filename = filename.replace("\\", "\\\\");
+        pig.registerQuery("A = LOAD 'file:" + filename + "' USING 
org.apache.pig.piggybank.storage.apachelog.CombinedLogLoader();");
+        Iterator<?> it = pig.openIterator("A");
+
+        int tupleCount = 0;
+
+        while (it.hasNext()) {
+            Tuple tuple = (Tuple) it.next();
+            if (tuple == null)
+                break;
+            else {
+                TestHelper.examineTuple(EXPECTED, tuple, tupleCount);
+                tupleCount++;
+            }
+        }
+        assertEquals(data.size(), tupleCount);
+    }
+}


Reply via email to