Author: gates Date: Thu Oct 9 10:40:06 2008 New Revision: 703211 URL: http://svn.apache.org/viewvc?rev=703211&view=rev Log: PIG-473: Added CommonLogLoader, a subclass of RegExLoader to piggybank
Added: incubator/pig/trunk/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/storage/apachelog/ incubator/pig/trunk/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/storage/apachelog/CommonLogLoader.java incubator/pig/trunk/contrib/piggybank/java/src/test/java/org/apache/pig/piggybank/test/storage/TestCommonLogLoader.java Modified: incubator/pig/trunk/CHANGES.txt Modified: incubator/pig/trunk/CHANGES.txt URL: http://svn.apache.org/viewvc/incubator/pig/trunk/CHANGES.txt?rev=703211&r1=703210&r2=703211&view=diff ============================================================================== --- incubator/pig/trunk/CHANGES.txt (original) +++ incubator/pig/trunk/CHANGES.txt Thu Oct 9 10:40:06 2008 @@ -357,3 +357,5 @@ PIG-472: Added RegExLoader to piggybank, an abstract loader class to parse text files via regular espressions (spackest via gates) + PIG-473: Added CommonLogLoader, a subclass of RegExLoader to piggybank (spackest via gates) + Added: incubator/pig/trunk/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/storage/apachelog/CommonLogLoader.java URL: http://svn.apache.org/viewvc/incubator/pig/trunk/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/storage/apachelog/CommonLogLoader.java?rev=703211&view=auto ============================================================================== --- incubator/pig/trunk/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/storage/apachelog/CommonLogLoader.java (added) +++ incubator/pig/trunk/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/storage/apachelog/CommonLogLoader.java Thu Oct 9 10:40:06 2008 @@ -0,0 +1,44 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the + * NOTICE file distributed with this work for additional information regarding copyright ownership. The ASF + * licenses this file to you under the Apache License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed under the License is + * distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and limitations under the License. + */ + +package org.apache.pig.piggybank.storage.apachelog; + +import java.util.regex.Pattern; + +import org.apache.pig.piggybank.storage.RegExLoader; + +/** + * CommonLogLoader is used to load logs based on Apache's common log format, based on a format like + * + * LogFormat "%h %l %u %t \"%r\" %>s %b" common + * + * The log filename ends up being access_log from a line like + * + * CustomLog logs/access_log common + * + * Example: + * + * raw = LOAD 'access_log' USING org.apache.pig.piggybank.storage.apachelog.CommongLogLoader AS (remoteAddr, + * remoteLogname, user, time, method, uri, proto, bytes); + * + */ + +public class CommonLogLoader extends RegExLoader { + // 81.19.151.110 - - [04/Oct/2008:13:28:23 -0600] "GET / HTTP/1.0" 200 156 + private final static Pattern commonLogPattern = Pattern + .compile("^(\\S+)\\s+(\\S+)\\s+(\\S+)\\s+.(\\S+\\s+\\S+).\\s+.(\\S+)\\s+(\\S+)\\s+(\\S+.\\S+).\\s+(\\S+)\\s+(\\S+)$"); + + public Pattern getPattern() { + return commonLogPattern; + } +} Added: incubator/pig/trunk/contrib/piggybank/java/src/test/java/org/apache/pig/piggybank/test/storage/TestCommonLogLoader.java URL: http://svn.apache.org/viewvc/incubator/pig/trunk/contrib/piggybank/java/src/test/java/org/apache/pig/piggybank/test/storage/TestCommonLogLoader.java?rev=703211&view=auto ============================================================================== --- incubator/pig/trunk/contrib/piggybank/java/src/test/java/org/apache/pig/piggybank/test/storage/TestCommonLogLoader.java (added) +++ incubator/pig/trunk/contrib/piggybank/java/src/test/java/org/apache/pig/piggybank/test/storage/TestCommonLogLoader.java Thu Oct 9 10:40:06 2008 @@ -0,0 +1,117 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the + * NOTICE file distributed with this work for additional information regarding copyright ownership. The ASF + * licenses this file to you under the Apache License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed under the License is + * distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and limitations under the License. + */ + +package org.apache.pig.piggybank.test.storage; + +import java.io.InputStream; +import java.util.ArrayList; +import java.util.Iterator; +import java.util.Properties; + +import junit.framework.TestCase; + +import org.apache.pig.PigServer; +import org.apache.pig.PigServer.ExecType; +import org.apache.pig.data.Tuple; +import org.apache.pig.impl.PigContext; +import org.apache.pig.impl.io.BufferedPositionedInputStream; +import org.apache.pig.impl.io.FileLocalizer; +import org.apache.pig.piggybank.storage.apachelog.CommonLogLoader; +import org.junit.Test; + +public class TestCommonLogLoader extends TestCase { + public static ArrayList<String[]> data = new ArrayList<String[]>(); + static { + data.add(new String[] { "1.2.3.4", "-", "-", "[01/Jan/2008:23:27:45 -0600]", "\"GET /zero.html HTTP/1.0\"", "200", "100" }); + data.add(new String[] { "2.3.4.5", "-", "-", "[02/Feb/2008:23:27:48 -0600]", "\"GET /one.js HTTP/1.1\"", "201", "101" }); + data.add(new String[] { "3.4.5.6", "-", "-", "[03/Mar/2008:23:27:48 -0600]", "\"GET /two.xml HTTP/1.2\"", "202", "102" }); + } + + public static ArrayList<String[]> EXPECTED = new ArrayList<String[]>(); + static { + + for (int i = 0; i < data.size(); i++) { + ArrayList<String> thisExpected = new ArrayList<String>(); + for (int j = 0; j <= 2; j++) { + thisExpected.add(data.get(i)[j]); + } + String temp = data.get(i)[3]; + temp = temp.replace("[", ""); + temp = temp.replace("]", ""); + thisExpected.add(temp); + + temp = data.get(i)[4]; + + for (String thisOne : data.get(i)[4].split(" ")) { + thisOne = thisOne.replace("\"", ""); + thisExpected.add(thisOne); + } + for (int j = 5; j <= 6; j++) { + thisExpected.add(data.get(i)[j]); + } + + String[] toAdd = new String[0]; + toAdd = (String[]) (thisExpected.toArray(toAdd)); + EXPECTED.add(toAdd); + } + } + + @Test + public void testInstantiation() { + CommonLogLoader commonLogLoader = new CommonLogLoader(); + assertNotNull(commonLogLoader); + } + + @Test + public void testLoadFromBindTo() throws Exception { + String filename = TestHelper.createTempFile(data, " "); + CommonLogLoader commonLogLoader = new CommonLogLoader(); + PigContext pigContext = new PigContext(ExecType.LOCAL, new Properties()); + InputStream inputStream = FileLocalizer.open(filename, pigContext); + commonLogLoader.bindTo(filename, new BufferedPositionedInputStream(inputStream), 0, Long.MAX_VALUE); + + int tupleCount = 0; + + while (true) { + Tuple tuple = commonLogLoader.getNext(); + if (tuple == null) + break; + else { + TestHelper.examineTuple(EXPECTED, tuple, tupleCount); + tupleCount++; + } + } + assertEquals(data.size(), tupleCount); + } + + public void testLoadFromPigServer() throws Exception { + String filename = TestHelper.createTempFile(data, " "); + PigServer pig = new PigServer(ExecType.LOCAL); + filename = filename.replace("\\", "\\\\"); + pig.registerQuery("A = LOAD 'file:" + filename + "' USING org.apache.pig.piggybank.storage.apachelog.CommonLogLoader();"); + Iterator<?> it = pig.openIterator("A"); + + int tupleCount = 0; + + while (it.hasNext()) { + Tuple tuple = (Tuple) it.next(); + if (tuple == null) + break; + else { + TestHelper.examineTuple(EXPECTED, tuple, tupleCount); + tupleCount++; + } + } + assertEquals(data.size(), tupleCount); + } +}