Author: gates
Date: Thu Oct  9 10:40:06 2008
New Revision: 703211

URL: http://svn.apache.org/viewvc?rev=703211&view=rev
Log:
PIG-473: Added CommonLogLoader, a subclass of RegExLoader to piggybank


Added:
    
incubator/pig/trunk/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/storage/apachelog/
    
incubator/pig/trunk/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/storage/apachelog/CommonLogLoader.java
    
incubator/pig/trunk/contrib/piggybank/java/src/test/java/org/apache/pig/piggybank/test/storage/TestCommonLogLoader.java
Modified:
    incubator/pig/trunk/CHANGES.txt

Modified: incubator/pig/trunk/CHANGES.txt
URL: 
http://svn.apache.org/viewvc/incubator/pig/trunk/CHANGES.txt?rev=703211&r1=703210&r2=703211&view=diff
==============================================================================
--- incubator/pig/trunk/CHANGES.txt (original)
+++ incubator/pig/trunk/CHANGES.txt Thu Oct  9 10:40:06 2008
@@ -357,3 +357,5 @@
        PIG-472: Added RegExLoader to piggybank, an abstract loader class to 
parse
        text files via regular espressions (spackest via gates)
 
+       PIG-473: Added CommonLogLoader, a subclass of RegExLoader to piggybank 
(spackest via gates)
+

Added: 
incubator/pig/trunk/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/storage/apachelog/CommonLogLoader.java
URL: 
http://svn.apache.org/viewvc/incubator/pig/trunk/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/storage/apachelog/CommonLogLoader.java?rev=703211&view=auto
==============================================================================
--- 
incubator/pig/trunk/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/storage/apachelog/CommonLogLoader.java
 (added)
+++ 
incubator/pig/trunk/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/storage/apachelog/CommonLogLoader.java
 Thu Oct  9 10:40:06 2008
@@ -0,0 +1,44 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more 
contributor license agreements. See the
+ * NOTICE file distributed with this work for additional information regarding 
copyright ownership. The ASF
+ * licenses this file to you under the Apache License, Version 2.0 (the 
"License"); you may not use this file
+ * except in compliance with the License. You may obtain a copy of the License 
at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software 
distributed under the License is
+ * distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 
KIND, either express or implied.
+ * See the License for the specific language governing permissions and 
limitations under the License.
+ */
+
+package org.apache.pig.piggybank.storage.apachelog;
+
+import java.util.regex.Pattern;
+
+import org.apache.pig.piggybank.storage.RegExLoader;
+
+/**
+ * CommonLogLoader is used to load logs based on Apache's common log format, 
based on a format like
+ * 
+ * LogFormat "%h %l %u %t \"%r\" %>s %b" common
+ * 
+ * The log filename ends up being access_log from a line like
+ * 
+ * CustomLog logs/access_log common
+ * 
+ * Example:
+ * 
+ * raw = LOAD 'access_log' USING 
org.apache.pig.piggybank.storage.apachelog.CommongLogLoader AS (remoteAddr,
+ * remoteLogname, user, time, method, uri, proto, bytes);
+ * 
+ */
+
+public class CommonLogLoader extends RegExLoader {
+    // 81.19.151.110 - - [04/Oct/2008:13:28:23 -0600] "GET / HTTP/1.0" 200 156
+    private final static Pattern commonLogPattern = Pattern
+        
.compile("^(\\S+)\\s+(\\S+)\\s+(\\S+)\\s+.(\\S+\\s+\\S+).\\s+.(\\S+)\\s+(\\S+)\\s+(\\S+.\\S+).\\s+(\\S+)\\s+(\\S+)$");
+
+    public Pattern getPattern() {
+        return commonLogPattern;
+    }
+}

Added: 
incubator/pig/trunk/contrib/piggybank/java/src/test/java/org/apache/pig/piggybank/test/storage/TestCommonLogLoader.java
URL: 
http://svn.apache.org/viewvc/incubator/pig/trunk/contrib/piggybank/java/src/test/java/org/apache/pig/piggybank/test/storage/TestCommonLogLoader.java?rev=703211&view=auto
==============================================================================
--- 
incubator/pig/trunk/contrib/piggybank/java/src/test/java/org/apache/pig/piggybank/test/storage/TestCommonLogLoader.java
 (added)
+++ 
incubator/pig/trunk/contrib/piggybank/java/src/test/java/org/apache/pig/piggybank/test/storage/TestCommonLogLoader.java
 Thu Oct  9 10:40:06 2008
@@ -0,0 +1,117 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more 
contributor license agreements. See the
+ * NOTICE file distributed with this work for additional information regarding 
copyright ownership. The ASF
+ * licenses this file to you under the Apache License, Version 2.0 (the 
"License"); you may not use this file
+ * except in compliance with the License. You may obtain a copy of the License 
at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software 
distributed under the License is
+ * distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 
KIND, either express or implied.
+ * See the License for the specific language governing permissions and 
limitations under the License.
+ */
+
+package org.apache.pig.piggybank.test.storage;
+
+import java.io.InputStream;
+import java.util.ArrayList;
+import java.util.Iterator;
+import java.util.Properties;
+
+import junit.framework.TestCase;
+
+import org.apache.pig.PigServer;
+import org.apache.pig.PigServer.ExecType;
+import org.apache.pig.data.Tuple;
+import org.apache.pig.impl.PigContext;
+import org.apache.pig.impl.io.BufferedPositionedInputStream;
+import org.apache.pig.impl.io.FileLocalizer;
+import org.apache.pig.piggybank.storage.apachelog.CommonLogLoader;
+import org.junit.Test;
+
+public class TestCommonLogLoader extends TestCase {
+    public static ArrayList<String[]> data = new ArrayList<String[]>();
+    static {
+        data.add(new String[] { "1.2.3.4", "-", "-", "[01/Jan/2008:23:27:45 
-0600]", "\"GET /zero.html HTTP/1.0\"", "200", "100" });
+        data.add(new String[] { "2.3.4.5", "-", "-", "[02/Feb/2008:23:27:48 
-0600]", "\"GET /one.js HTTP/1.1\"", "201", "101" });
+        data.add(new String[] { "3.4.5.6", "-", "-", "[03/Mar/2008:23:27:48 
-0600]", "\"GET /two.xml HTTP/1.2\"", "202", "102" });
+    }
+
+    public static ArrayList<String[]> EXPECTED = new ArrayList<String[]>();
+    static {
+
+        for (int i = 0; i < data.size(); i++) {
+            ArrayList<String> thisExpected = new ArrayList<String>();
+            for (int j = 0; j <= 2; j++) {
+                thisExpected.add(data.get(i)[j]);
+            }
+            String temp = data.get(i)[3];
+            temp = temp.replace("[", "");
+            temp = temp.replace("]", "");
+            thisExpected.add(temp);
+
+            temp = data.get(i)[4];
+
+            for (String thisOne : data.get(i)[4].split(" ")) {
+                thisOne = thisOne.replace("\"", "");
+                thisExpected.add(thisOne);
+            }
+            for (int j = 5; j <= 6; j++) {
+                thisExpected.add(data.get(i)[j]);
+            }
+
+            String[] toAdd = new String[0];
+            toAdd = (String[]) (thisExpected.toArray(toAdd));
+            EXPECTED.add(toAdd);
+        }
+    }
+
+    @Test
+    public void testInstantiation() {
+        CommonLogLoader commonLogLoader = new CommonLogLoader();
+        assertNotNull(commonLogLoader);
+    }
+
+    @Test
+    public void testLoadFromBindTo() throws Exception {
+        String filename = TestHelper.createTempFile(data, " ");
+        CommonLogLoader commonLogLoader = new CommonLogLoader();
+        PigContext pigContext = new PigContext(ExecType.LOCAL, new 
Properties());
+        InputStream inputStream = FileLocalizer.open(filename, pigContext);
+        commonLogLoader.bindTo(filename, new 
BufferedPositionedInputStream(inputStream), 0, Long.MAX_VALUE);
+
+        int tupleCount = 0;
+
+        while (true) {
+            Tuple tuple = commonLogLoader.getNext();
+            if (tuple == null)
+                break;
+            else {
+                TestHelper.examineTuple(EXPECTED, tuple, tupleCount);
+                tupleCount++;
+            }
+        }
+        assertEquals(data.size(), tupleCount);
+    }
+
+    public void testLoadFromPigServer() throws Exception {
+        String filename = TestHelper.createTempFile(data, " ");
+        PigServer pig = new PigServer(ExecType.LOCAL);
+        filename = filename.replace("\\", "\\\\");
+        pig.registerQuery("A = LOAD 'file:" + filename + "' USING 
org.apache.pig.piggybank.storage.apachelog.CommonLogLoader();");
+        Iterator<?> it = pig.openIterator("A");
+
+        int tupleCount = 0;
+
+        while (it.hasNext()) {
+            Tuple tuple = (Tuple) it.next();
+            if (tuple == null)
+                break;
+            else {
+                TestHelper.examineTuple(EXPECTED, tuple, tupleCount);
+                tupleCount++;
+            }
+        }
+        assertEquals(data.size(), tupleCount);
+    }
+}


Reply via email to