gates
Thu, 09 Oct 2008 10:45:08 -0700
Author: gates Date: Thu Oct 9 10:44:15 2008 New Revision: 703213 URL: http://svn.apache.org/viewvc?rev=703213&view=rev Log: PIG-474: Added MyRegexLoader, a subclass of RegExLoader, to piggybank Added: incubator/pig/trunk/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/storage/MyRegExLoader.java incubator/pig/trunk/contrib/piggybank/java/src/test/java/org/apache/pig/piggybank/test/storage/TestMyRegExLoader.java Modified: incubator/pig/trunk/CHANGES.txt Modified: incubator/pig/trunk/CHANGES.txt URL: http://svn.apache.org/viewvc/incubator/pig/trunk/CHANGES.txt?rev=703213&r1=703212&r2=703213&view=diff ============================================================================== --- incubator/pig/trunk/CHANGES.txt (original) +++ incubator/pig/trunk/CHANGES.txt Thu Oct 9 10:44:15 2008 @@ -359,3 +359,5 @@ PIG-473: Added CommonLogLoader, a subclass of RegExLoader to piggybank (spackest via gates) + PIG-474: Added MyRegexLoader, a subclass of RegExLoader, to piggybank (spackest via gates) + Added: incubator/pig/trunk/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/storage/MyRegExLoader.java URL: http://svn.apache.org/viewvc/incubator/pig/trunk/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/storage/MyRegExLoader.java?rev=703213&view=auto ============================================================================== --- incubator/pig/trunk/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/storage/MyRegExLoader.java (added) +++ incubator/pig/trunk/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/storage/MyRegExLoader.java Thu Oct 9 10:44:15 2008 @@ -0,0 +1,44 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the + * NOTICE file distributed with this work for additional information regarding copyright ownership. The ASF + * licenses this file to you under the Apache License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed under the License is + * distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and limitations under the License. + */ + +package org.apache.pig.piggybank.storage; + +import java.util.regex.Pattern; + +/* + * MyRegExLoader extends RegExLoader, allowing regular expressions to be passed by argument through pig latin + * via a line like + * + * A = LOAD 'file:test.txt' USING org.apache.pig.piggybank.storage.MyRegExLoader('(\\d+)!+(\\w+)~+(\\w+)'); + * + * which would parse lines like + * + * 1!!!one~i 2!!two~~ii 3!three~~~iii + * + * into arrays like + * + * {1, "one", "i"}, {2, "two", "ii"}, {3, "three", "iii"} + */ + +public class MyRegExLoader extends RegExLoader { + Pattern pattern = null; + + public MyRegExLoader(String pattern) { + this.pattern = Pattern.compile(pattern); + } + + @Override + public Pattern getPattern() { + return pattern; + } +} Added: incubator/pig/trunk/contrib/piggybank/java/src/test/java/org/apache/pig/piggybank/test/storage/TestMyRegExLoader.java URL: http://svn.apache.org/viewvc/incubator/pig/trunk/contrib/piggybank/java/src/test/java/org/apache/pig/piggybank/test/storage/TestMyRegExLoader.java?rev=703213&view=auto ============================================================================== --- incubator/pig/trunk/contrib/piggybank/java/src/test/java/org/apache/pig/piggybank/test/storage/TestMyRegExLoader.java (added) +++ incubator/pig/trunk/contrib/piggybank/java/src/test/java/org/apache/pig/piggybank/test/storage/TestMyRegExLoader.java Thu Oct 9 10:44:15 2008 @@ -0,0 +1,59 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the + * NOTICE file distributed with this work for additional information regarding copyright ownership. The ASF + * licenses this file to you under the Apache License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed under the License is + * distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and limitations under the License. + */ + +package org.apache.pig.piggybank.test.storage; + +import java.util.ArrayList; +import java.util.Iterator; +import java.util.regex.Pattern; + +import junit.framework.TestCase; + +import org.apache.pig.PigServer; +import org.apache.pig.PigServer.ExecType; +import org.apache.pig.data.Tuple; + +public class TestMyRegExLoader extends TestCase { + private static String patternString = "(\\d+)!+(\\w+)~+(\\w+)"; + private final static Pattern pattern = Pattern.compile(patternString); + public static ArrayList<String[]> data = new ArrayList<String[]>(); + static { + data.add(new String[] { "1!!!one~i" }); + data.add(new String[] { "2!!two~~ii" }); + data.add(new String[] { "3!three~~~iii" }); + } + + public void testLoadMyRegExFromPigServer() throws Exception { + ArrayList<String[]> expected = TestHelper.getExpected(data, pattern); + String filename = TestHelper.createTempFile(data, ""); + PigServer pig = new PigServer(ExecType.LOCAL); + filename = filename.replace("\\", "\\\\"); + patternString = patternString.replace("\\", "\\\\"); + String query = "A = LOAD 'file:" + filename + "' USING org.apache.pig.piggybank.storage.MyRegExLoader('" + patternString + "');"; + pig.registerQuery(query); + Iterator<?> it = pig.openIterator("A"); + + int tupleCount = 0; + + while (it.hasNext()) { + Tuple tuple = (Tuple) it.next(); + if (tuple == null) + break; + else { + TestHelper.examineTuple(expected, tuple, tupleCount); + tupleCount++; + } + } + assertEquals(data.size(), tupleCount); + } +}