Added: hadoop/pig/trunk/contrib/piggybank/java/src/test/java/org/apache/pig/piggybank/test/evaluation/util/apachelogparser/TestSearchTermExtractor.java URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/contrib/piggybank/java/src/test/java/org/apache/pig/piggybank/test/evaluation/util/apachelogparser/TestSearchTermExtractor.java?rev=782143&view=auto ============================================================================== --- hadoop/pig/trunk/contrib/piggybank/java/src/test/java/org/apache/pig/piggybank/test/evaluation/util/apachelogparser/TestSearchTermExtractor.java (added) +++ hadoop/pig/trunk/contrib/piggybank/java/src/test/java/org/apache/pig/piggybank/test/evaluation/util/apachelogparser/TestSearchTermExtractor.java Fri Jun 5 21:32:38 2009 @@ -0,0 +1,144 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the + * NOTICE file distributed with this work for additional information regarding copyright ownership. The ASF + * licenses this file to you under the Apache License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed under the License is + * distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and limitations under the License. + */ + +package org.apache.pig.piggybank.test.evaluation.util.apachelogparser; + +import java.util.HashMap; + +import junit.framework.TestCase; + +import org.apache.pig.data.DefaultTupleFactory; +import org.apache.pig.data.Tuple; +import org.apache.pig.piggybank.evaluation.util.apachelogparser.SearchEngineExtractor; +import org.apache.pig.piggybank.evaluation.util.apachelogparser.SearchTermExtractor; +import org.junit.Test; + +public class TestSearchTermExtractor extends TestCase { + private static HashMap<String, String> tests = new HashMap<String, String>(); + static { + tests.put("http://www.google.com/search?hl=en&q=a+simple+test&btnG=Google+Search", "a simple test"); + tests.put("http://www.google.co.uk/search?hl=en&q=a+simple+test&btnG=Google+Search&meta=", "a simple test"); + tests.put("http://www.google.co.jp/search?hl=ja&q=a+simple+test&btnG=Google+%E6%A4%9C%E7%B4%A2&lr=", "a simple test"); + tests.put("http://search.msn.co.uk/results.aspx?q=a+simple+test&geovar=56&FORM=REDIR", "a simple test"); + tests.put("http://search.msn.com/results.aspx?q=a+simple+test&geovar=56&FORM=REDIR", "a simple test"); + tests.put("http://www.altavista.com/web/results?itag=ody&q=a+simple+test&kgs=1&kls=0", "a simple test"); + tests.put("http://uk.altavista.com/web/results?itag=ody&q=a+simple+test&kgs=1&kls=0", "a simple test"); + tests.put("http://www.blueyonder.co.uk/blueyonder/searches/search.jsp?q=a+simple+test&cr=&sitesearch=&x=0&y=0", "a simple test"); + tests.put("http://www.alltheweb.com/search?cat=web&cs=iso88591&q=a+simple+test&rys=0&itag=crv&_sb_lang=pref", "a simple test"); + tests.put("http://search.lycos.com/?query=a+simple+test&x=0&y=0", "a simple test"); + tests.put("http://search.lycos.co.uk/cgi-bin/pursuit?query=a+simple+test&enc=utf-8&cat=slim_loc&sc=blue", "a simple test"); + tests.put("http://www.hotbot.com/index.php?query=a+simple+test&ps=&loc=searchbox&tab=web&mode=search&currProv=msn", "a simple test"); + tests.put("http://search.yahoo.com/search?p=a+simple+test&fr=FP-tab-web-t400&toggle=1&cop=&ei=UTF-8", "a simple test"); + tests.put("http://uk.search.yahoo.com/search?p=a+simple+test&fr=FP-tab-web-t340&ei=UTF-8&meta=vc%3D", "a simple test"); + tests.put("http://uk.ask.com/web?q=a+simple+test&qsrc=0&o=0&l=dir&dm=all", "a simple test"); + tests.put("http://www.mirago.co.uk/scripts/qhandler.aspx?qry=a+simple+test&x=0&y=0", "a simple test"); + tests.put("http://www.netscape.com/search/?s=a+simple+test", "a simple test"); + tests.put("http://search.aol.co.uk/web?invocationType=ns_uk&query=a%20simple%20test", "a simple test"); + tests.put("http://www.tiscali.co.uk/search/results.php?section=&from=&query=a+simple+test", "a simple test"); + tests.put("http://www.mamma.com/Mamma?utfout=1&qtype=0&query=a+simple+test&Submit=%C2%A0%C2%A0Search%C2%A0%C2%A0", "a simple test"); + tests.put("http://blogs.icerocket.com/search?q=a+simple+test", "a simple test"); + tests.put("http://blogsearch.google.com/blogsearch?hl=en&ie=UTF-8&q=a+simple+test&btnG=Search+Blogs", "a simple test"); + tests.put("http://suche.fireball.de/cgi-bin/pursuit?query=a+simple+test&x=0&y=0&cat=fb_loc&enc=utf-8", "a simple test"); + tests.put("http://suche.web.de/search/web/?allparams=&smode=&su=a+simple+test&webRb=de", "a simple test"); + tests.put("http://www.technorati.com/search/a%20simple%20test", "a simple test"); + tests.put("http://www.feedster.com/search/a%20simple%20test", "a simple test"); + tests.put("http://www.tesco.net/google/searchresults.asp?q=a+simple+test&cr=", "a simple test"); + tests + .put( + "http://gps.virgin.net/search/sitesearch?submit.x=1&start=0&format=1&num=10&restrict=site&sitefilter=site%2Fsite_filter.hts&siteresults=site%2Fsite_results.hts&sitescorethreshold=28&q=a+simple+test&scope=UK&x=0&y=0", + "a simple test"); + tests.put("http://search.bbc.co.uk/cgi-bin/search/results.pl?tab=web&go=homepage&q=a+simple+test&Search.x=0&Search.y=0&Search=Search&scope=all", + "a simple test"); + tests.put("http://search.live.com/results.aspx?q=a+simple+test&mkt=en-us&FORM=LVSP&go.x=0&go.y=0&go=Search", "a simple test"); + tests.put("http://search.mywebsearch.com/mywebsearch/AJmain.jhtml?searchfor=a+simple+test", "a simple test"); + tests.put("http://www.megasearching.net/m/search.aspx?s=a+simple+test&mkt=&orig=1", "a simple test"); + tests.put("http://www.blueyonder.co.uk/blueyonder/searches/search.jsp?q=a+simple+test&cr=&sitesearch=&x=0&y=0", "a simple test"); + tests.put("http://search.ntlworld.com/ntlworld/search.php?q=a+simple+test&cr=&x=0&y=0", "a simple test"); + tests.put("http://search.orange.co.uk/all?p=_searchbox&pt=resultgo&brand=ouk&tab=web&q=a+simple+test", "a simple test"); + tests.put("http://search.virginmedia.com/results/index.php?channel=other&q=a+simple+test&cr=&x=0&y=0", "a simple test"); + tests.put("http://as.starware.com/dp/search?src_id=305&product=unknown&qry=a+simple+test&z=Find+It", "a simple test"); + tests.put("http://aolsearch.aol.com/aol/search?invocationType=topsearchbox.webhome&query=a+simple+test", "a simple test"); + tests.put("http://www.ask.com/web?q=a+simple+test&qsrc=0&o=0&l=dir", "a simple test"); + tests.put("http://buscador.terra.es/Default.aspx?source=Search&ca=s&query=a%20simple%20test", "a simple test"); + tests.put("http://busca.orange.es/search?origen=home&destino=web&buscar=a+simple+test", "a simple test"); + tests.put("http://search.sweetim.com/search.asp?ln=en&q=a%20simple%20test", "a simple test"); + tests.put("http://search.conduit.com/Results.aspx?q=a+simple+test&hl=en&SelfSearch=1&SearchSourceOrigin=1&ctid=WEBSITE", "a simple test"); + tests.put("http://buscar.ozu.es/index.php?etq=web&q=a+simple+test", "a simple test"); + tests.put("http://buscador.lycos.es/cgi-bin/pursuit?query=a+simple+test&websearchCat=loc&cat=loc&SITE=de&enc=utf-8&ref=sboxlink", "a simple test"); + tests.put("http://search.icq.com/search/results.php?q=a+simple+test&ch_id=st&search_mode=web", "a simple test"); + tests.put("http://search.yahoo.co.jp/search?ei=UTF-8&fr=sfp_as&p=a+simple+test&meta=vc%3D", "a simple test"); + tests.put("http://www.soso.com/q?pid=s.idx&w=a+simple+test", "a simple test"); + tests.put("http://search.myway.com/search/AJmain.jhtml?searchfor=a+simple+test", "a simple test"); + tests.put("http://www.ilmotore.com/newsearch/?query=a+simple+test&where=web", "a simple test"); + tests.put("http://www.ithaki.net/ricerca.cgi?where=italia&query=a+simple+test", "a simple test"); + tests.put("http://ricerca.alice.it/ricerca?f=hpn&qs=a+simple+test", "a simple test"); + tests.put("http://it.search.yahoo.com/search?p=a+simple+test&fr=yfp-t-501&ei=UTF-8&rd=r1", "a simple test"); + tests.put("http://www.excite.it/search/web/results?l=&q=a+simple+test", "a simple test"); + tests.put("http://it.altavista.com/web/results?itag=ody&q=a+simple+test&kgs=1&kls=0", "a simple test"); + tests.put("http://cerca.lycos.it/cgi-bin/pursuit?query=a+simple+test&cat=web", "a simple test"); + tests.put("http://arianna.libero.it/search/abin/integrata.cgi?query=a+simple+test®ione=8&x=0&y=0", "a simple test"); + tests.put("http://www.thespider.it/dir/index.php?q=a+simple+test&search-btn.x=0&search-btn.y=0", "a simple test"); + tests.put("http://godado.it/engine.php?l=it&key=a+simple+test&x=0&y=0", "a simple test"); + tests.put("http://www.simpatico.ws/cgi-bin/links/search.cgi?query=a+simple+test&Vai=Go", "a simple test"); + tests + .put( + "http://www.categorico.it/ricerca.html?domains=Categorico.it&q=a+simple+test&sa=Cerca+con+Google&sitesearch=&client=pub-0499722654836507&forid=1&channel=7983145815&ie=ISO-8859-1&oe=ISO-8859-1&cof=GALT%3A%23008000%3BGL%3A1%3BDIV%3A%23336699%3BVLC%3A663399%3BAH%3Acenter%3BBGC%3AFFFFFF%3BLBGC%3A336699%3BALC%3A0000FF%3BLC%3A0000FF%3BT%3A000000%3BGFNT%3A0000FF%3BGIMP%3A0000FF%3BFORID%3A11&hl=it", + "a simple test"); + tests.put("http://www.cuil.com/search?q=a+simple+test", "a simple test"); + tests.put("http://www.google.com/search?hl=en&lr=&q=a+more%21+complex_+search%24&btnG=Search", "a more! complex_ search$"); + tests.put("http://www.google.co.uk/search?hl=en&q=a+more%21+complex_+search%24&btnG=Google+Search&meta=", "a more! complex_ search$"); + tests.put("http://www.google.co.jp/search?hl=ja&q=a+more%21+complex_+search%24&btnG=Google+%E6%A4%9C%E7%B4%A2&lr=", "a more! complex_ search$"); + tests.put("http://search.msn.com/results.aspx?q=a+more%21+complex_+search%24&FORM=QBHP", "a more! complex_ search$"); + tests.put("http://search.msn.co.uk/results.aspx?q=a+more%21+complex_+search%24&FORM=MSNH&srch_type=0&cp=65001", "a more! complex_ search$"); + tests.put("http://www.altavista.com/web/results?itag=ody&q=a+more%21+complex_+search%24&kgs=1&kls=0", "a more! complex_ search$"); + tests.put("http://uk.altavista.com/web/results?itag=ody&q=a+more%21+complex_+search%24&kgs=1&kls=0", "a more! complex_ search$"); + tests.put("http://www.blueyonder.co.uk/blueyonder/searches/search.jsp?q=a+more%21+complex_+search%24&cr=&sitesearch=&x=0&y=0", + "a more! complex_ search$"); + tests + .put("http://www.alltheweb.com/search?cat=web&cs=iso88591&q=a+more%21+complex_+search%24&rys=0&itag=crv&_sb_lang=pref", "a more! complex_ search$"); + tests.put("http://search.lycos.com/?query=a+more%21+complex_+search%24&x=0&y=0", "a more! complex_ search$"); + tests.put("http://search.lycos.co.uk/cgi-bin/pursuit?query=a+more%21+complex_+search%24&enc=utf-8&cat=slim_loc&sc=blue", "a more! complex_ search$"); + tests.put("http://www.hotbot.com/index.php?query=a+more%21+complex_+search%24&ps=&loc=searchbox&tab=web&mode=search&currProv=msn", + "a more! complex_ search$"); + tests.put("http://search.yahoo.com/search?p=a+more%21+complex_+search%24&fr=FP-tab-web-t400&toggle=1&cop=&ei=UTF-8", "a more! complex_ search$"); + tests.put("http://uk.search.yahoo.com/search?p=a+more%21+complex_+search%24&fr=FP-tab-web-t340&ei=UTF-8&meta=vc%3D", "a more! complex_ search$"); + tests.put("http://uk.ask.com/web?q=a+more%21+complex_+search%24&qsrc=0&o=0&l=dir&dm=all", "a more! complex_ search$"); + tests.put("http://www.mirago.co.uk/scripts/qhandler.aspx?qry=a+more%21+complex_+search%24&x=0&y=0", "a more! complex_ search$"); + tests.put("http://www.netscape.com/search/?s=a+more%21+complex_+search%24", "a more! complex_ search$"); + tests.put("http://search.aol.co.uk/web?query=a+more%21+complex_+search%24&x=0&y=0&isinit=true&restrict=wholeweb", "a more! complex_ search$"); + tests.put("http://www.tiscali.co.uk/search/results.php?section=&from=&query=a+more%21+complex_+search%24", "a more! complex_ search$"); + tests.put("http://www.mamma.com/Mamma?utfout=1&qtype=0&query=a+more%21+complex_+search%24&Submit=%C2%A0%C2%A0Search%C2%A0%C2%A0", + "a more! complex_ search$"); + tests.put("dud", null); + } + + @Test + public void testInstantiation() { + assertNotNull(new SearchEngineExtractor()); + } + + @Test + public void testTests() throws Exception { + SearchTermExtractor searchTermExtractor = new SearchTermExtractor(); + int testCount = 0; + Tuple input=DefaultTupleFactory.getInstance().newTuple(1); + for (String key : tests.keySet()) { + String expected = tests.get(key); + + input.set(0,key); + assertEquals(expected, searchTermExtractor.exec(input)); + testCount++; + } + assertEquals(tests.size(), testCount); + } +}
Added: hadoop/pig/trunk/contrib/piggybank/java/src/test/java/org/apache/pig/piggybank/test/storage/TestCombinedLogLoader.java URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/contrib/piggybank/java/src/test/java/org/apache/pig/piggybank/test/storage/TestCombinedLogLoader.java?rev=782143&view=auto ============================================================================== --- hadoop/pig/trunk/contrib/piggybank/java/src/test/java/org/apache/pig/piggybank/test/storage/TestCombinedLogLoader.java (added) +++ hadoop/pig/trunk/contrib/piggybank/java/src/test/java/org/apache/pig/piggybank/test/storage/TestCombinedLogLoader.java Fri Jun 5 21:32:38 2009 @@ -0,0 +1,128 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the + * NOTICE file distributed with this work for additional information regarding copyright ownership. The ASF + * licenses this file to you under the Apache License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed under the License is + * distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and limitations under the License. + */ + +package org.apache.pig.piggybank.test.storage; + +import static org.apache.pig.ExecType.LOCAL; + +import java.io.InputStream; +import java.util.ArrayList; +import java.util.Iterator; + +import junit.framework.TestCase; + +import org.apache.pig.PigServer; + +import org.apache.pig.ExecType; +import org.apache.pig.data.DataByteArray; +import org.apache.pig.data.Tuple; +import org.apache.pig.impl.io.BufferedPositionedInputStream; +import org.apache.pig.impl.io.FileLocalizer; +import org.apache.pig.piggybank.storage.apachelog.CombinedLogLoader; +import org.junit.Test; + +public class TestCombinedLogLoader extends TestCase { + public static ArrayList<String[]> data = new ArrayList<String[]>(); + static { + data.add(new String[] { "1.2.3.4", "-", "-", "[01/Jan/2008:23:27:45 -0600]", "\"GET /zero.html HTTP/1.0\"", "200", "100", "\"-\"", + "\"Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_5_4; en-us) AppleWebKit/525.18 (KHTML, like Gecko) Version/3.1.2 Safari/525.20.1\"" }); + data.add(new String[] { "1.2.3.4", "-", "-", "[01/Jan/2008:23:27:45 -0600]", "\"GET /zero.html HTTP/1.0\"", "200", "100", + "\"http://myreferringsite.com\"", + "\"Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_5_4; en-us) AppleWebKit/525.18 (KHTML, like Gecko) Version/3.1.2 Safari/525.20.1\"" }); + data.add(new String[] { "1.2.3.4", "-", "-", "[01/Jan/2008:23:27:45 -0600]", "\"GET /zero.html HTTP/1.0\"", "200", "100", "\"-\"", + "\"Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_5_4; en-us) AppleWebKit/525.18 (KHTML, like Gecko) Version/3.1.2 Safari/525.20.1\"" }); + } + + public static ArrayList<DataByteArray[]> EXPECTED = new ArrayList<DataByteArray[]>(); + static { + + for (int i = 0; i < data.size(); i++) { + ArrayList<DataByteArray> thisExpected = new ArrayList<DataByteArray>(); + for (int j = 0; j <= 2; j++) { + thisExpected.add(new DataByteArray(data.get(i)[j])); + } + String temp = data.get(i)[3]; + temp = temp.replace("[", ""); + temp = temp.replace("]", ""); + thisExpected.add(new DataByteArray(temp)); + + temp = data.get(i)[4]; + + for (String thisOne : data.get(i)[4].split(" ")) { + thisOne = thisOne.replace("\"", ""); + thisExpected.add(new DataByteArray(thisOne)); + } + for (int j = 5; j <= 6; j++) { + thisExpected.add(new DataByteArray(data.get(i)[j])); + } + for (int j = 7; j <= 8; j++) { + String thisOne = data.get(i)[j]; + thisOne = thisOne.replace("\"", ""); + thisExpected.add(new DataByteArray(thisOne)); + } + + DataByteArray[] toAdd = new DataByteArray[0]; + toAdd = (DataByteArray[]) (thisExpected.toArray(toAdd)); + EXPECTED.add(toAdd); + } + } + + @Test + public void testInstantiation() { + CombinedLogLoader combinedLogLoader = new CombinedLogLoader(); + assertNotNull(combinedLogLoader); + } + + @Test + public void testLoadFromBindTo() throws Exception { + String filename = TestHelper.createTempFile(data, " "); + CombinedLogLoader combinedLogLoader = new CombinedLogLoader(); + PigServer pigServer = new PigServer(LOCAL); + InputStream inputStream = FileLocalizer.open(filename, pigServer.getPigContext()); + combinedLogLoader.bindTo(filename, new BufferedPositionedInputStream(inputStream), 0, Long.MAX_VALUE); + + int tupleCount = 0; + + while (true) { + Tuple tuple = combinedLogLoader.getNext(); + if (tuple == null) + break; + else { + TestHelper.examineTuple(EXPECTED, tuple, tupleCount); + tupleCount++; + } + } + assertEquals(data.size(), tupleCount); + } + + public void testLoadFromPigServer() throws Exception { + String filename = TestHelper.createTempFile(data, " "); + PigServer pig = new PigServer(ExecType.LOCAL); + filename = filename.replace("\\", "\\\\"); + pig.registerQuery("A = LOAD 'file:" + filename + "' USING org.apache.pig.piggybank.storage.apachelog.CombinedLogLoader();"); + Iterator<?> it = pig.openIterator("A"); + + int tupleCount = 0; + + while (it.hasNext()) { + Tuple tuple = (Tuple) it.next(); + if (tuple == null) + break; + else { + TestHelper.examineTuple(EXPECTED, tuple, tupleCount); + tupleCount++; + } + } + assertEquals(data.size(), tupleCount); + } +} Added: hadoop/pig/trunk/contrib/piggybank/java/src/test/java/org/apache/pig/piggybank/test/storage/TestCommonLogLoader.java URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/contrib/piggybank/java/src/test/java/org/apache/pig/piggybank/test/storage/TestCommonLogLoader.java?rev=782143&view=auto ============================================================================== --- hadoop/pig/trunk/contrib/piggybank/java/src/test/java/org/apache/pig/piggybank/test/storage/TestCommonLogLoader.java (added) +++ hadoop/pig/trunk/contrib/piggybank/java/src/test/java/org/apache/pig/piggybank/test/storage/TestCommonLogLoader.java Fri Jun 5 21:32:38 2009 @@ -0,0 +1,120 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the + * NOTICE file distributed with this work for additional information regarding copyright ownership. The ASF + * licenses this file to you under the Apache License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed under the License is + * distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and limitations under the License. + */ + +package org.apache.pig.piggybank.test.storage; + +import static org.apache.pig.ExecType.LOCAL; + +import java.io.InputStream; +import java.util.ArrayList; +import java.util.Iterator; + +import junit.framework.TestCase; + +import org.apache.pig.PigServer; + +import org.apache.pig.ExecType; +import org.apache.pig.data.DataByteArray; +import org.apache.pig.data.Tuple; +import org.apache.pig.impl.io.BufferedPositionedInputStream; +import org.apache.pig.impl.io.FileLocalizer; +import org.apache.pig.piggybank.storage.apachelog.CommonLogLoader; +import org.junit.Test; + +public class TestCommonLogLoader extends TestCase { + public static ArrayList<String[]> data = new ArrayList<String[]>(); + static { + data.add(new String[] { "1.2.3.4", "-", "-", "[01/Jan/2008:23:27:45 -0600]", "\"GET /zero.html HTTP/1.0\"", "200", "100" }); + data.add(new String[] { "2.3.4.5", "-", "-", "[02/Feb/2008:23:27:48 -0600]", "\"GET /one.js HTTP/1.1\"", "201", "101" }); + data.add(new String[] { "3.4.5.6", "-", "-", "[03/Mar/2008:23:27:48 -0600]", "\"GET /two.xml HTTP/1.2\"", "202", "102" }); + } + + public static ArrayList<DataByteArray[]> EXPECTED = new ArrayList<DataByteArray[]>(); + static { + + for (int i = 0; i < data.size(); i++) { + ArrayList<DataByteArray> thisExpected = new ArrayList<DataByteArray>(); + for (int j = 0; j <= 2; j++) { + thisExpected.add(new DataByteArray(data.get(i)[j])); + } + String temp = data.get(i)[3]; + temp = temp.replace("[", ""); + temp = temp.replace("]", ""); + thisExpected.add(new DataByteArray(temp)); + + temp = data.get(i)[4]; + + for (String thisOne : data.get(i)[4].split(" ")) { + thisOne = thisOne.replace("\"", ""); + thisExpected.add(new DataByteArray(thisOne)); + } + for (int j = 5; j <= 6; j++) { + thisExpected.add(new DataByteArray(data.get(i)[j])); + } + + DataByteArray[] toAdd = new DataByteArray[0]; + toAdd = (DataByteArray[]) (thisExpected.toArray(toAdd)); + EXPECTED.add(toAdd); + } + } + + @Test + public void testInstantiation() { + CommonLogLoader commonLogLoader = new CommonLogLoader(); + assertNotNull(commonLogLoader); + } + + @Test + public void testLoadFromBindTo() throws Exception { + String filename = TestHelper.createTempFile(data, " "); + CommonLogLoader commonLogLoader = new CommonLogLoader(); + PigServer pigServer = new PigServer(LOCAL); + + InputStream inputStream = FileLocalizer.open(filename, pigServer.getPigContext()); + commonLogLoader.bindTo(filename, new BufferedPositionedInputStream(inputStream), 0, Long.MAX_VALUE); + + int tupleCount = 0; + + while (true) { + Tuple tuple = commonLogLoader.getNext(); + if (tuple == null) + break; + else { + TestHelper.examineTuple(EXPECTED, tuple, tupleCount); + tupleCount++; + } + } + assertEquals(data.size(), tupleCount); + } + + public void testLoadFromPigServer() throws Exception { + String filename = TestHelper.createTempFile(data, " "); + PigServer pig = new PigServer(ExecType.LOCAL); + filename = filename.replace("\\", "\\\\"); + pig.registerQuery("A = LOAD 'file:" + filename + "' USING org.apache.pig.piggybank.storage.apachelog.CommonLogLoader();"); + Iterator<?> it = pig.openIterator("A"); + + int tupleCount = 0; + + while (it.hasNext()) { + Tuple tuple = (Tuple) it.next(); + if (tuple == null) + break; + else { + TestHelper.examineTuple(EXPECTED, tuple, tupleCount); + tupleCount++; + } + } + assertEquals(data.size(), tupleCount); + } +} Added: hadoop/pig/trunk/contrib/piggybank/java/src/test/java/org/apache/pig/piggybank/test/storage/TestHelper.java URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/contrib/piggybank/java/src/test/java/org/apache/pig/piggybank/test/storage/TestHelper.java?rev=782143&view=auto ============================================================================== --- hadoop/pig/trunk/contrib/piggybank/java/src/test/java/org/apache/pig/piggybank/test/storage/TestHelper.java (added) +++ hadoop/pig/trunk/contrib/piggybank/java/src/test/java/org/apache/pig/piggybank/test/storage/TestHelper.java Fri Jun 5 21:32:38 2009 @@ -0,0 +1,90 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the + * NOTICE file distributed with this work for additional information regarding copyright ownership. The ASF + * licenses this file to you under the Apache License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed under the License is + * distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and limitations under the License. + */ + +package org.apache.pig.piggybank.test.storage; + +import java.io.File; +import java.io.PrintWriter; +import java.util.ArrayList; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +import junit.framework.TestCase; + +import org.apache.pig.backend.executionengine.ExecException; +import org.apache.pig.data.DataByteArray; +import org.apache.pig.data.Tuple; +import org.junit.Test; + +public class TestHelper extends TestCase { + @Test + public void testTest() { + assertTrue(true); + } + + + public static ArrayList<DataByteArray[]> getExpected(ArrayList<String[]> data, Pattern pattern) { + ArrayList<DataByteArray[]> expected = new ArrayList<DataByteArray[]>(); + for (int i = 0; i < data.size(); i++) { + String string = data.get(i)[0]; + Matcher matcher = pattern.matcher(string); + matcher.groupCount(); + matcher.find(); + DataByteArray[] toAdd = new DataByteArray[] { + new DataByteArray(matcher.group(1)), + new DataByteArray(matcher.group(2)), + new DataByteArray(matcher.group(3)) }; + expected.add(toAdd); + } + + return expected; + } + + private static String join(String delimiter, String[] strings) { + String string = strings[0].toString(); + for (int i = 1; i < strings.length; i++) { + string += delimiter + strings[i].toString(); + } + return string; + } + + public static void examineTuple(ArrayList<DataByteArray[]> expectedData, Tuple tuple, int tupleCount) { + for (int i = 0; i < tuple.size(); i++) { + DataByteArray dataAtom= null; + try { + dataAtom = (DataByteArray) tuple.get(i); + } catch (ExecException e) { + // TODO Auto-generated catch block + e.printStackTrace(); + } + DataByteArray expected = expectedData.get(tupleCount)[i]; + System.err.println("compare "+expected+" to "+dataAtom); + assertEquals(expected, dataAtom); + } + } + + public static String createTempFile(ArrayList<String[]> myData, String delimiter) throws Exception { + File tmpFile = File.createTempFile("test", ".txt"); + if (tmpFile.exists()) { + tmpFile.delete(); + } + PrintWriter pw = new PrintWriter(tmpFile); + for (int i = 0; i < myData.size(); i++) { + pw.println(join(delimiter, myData.get(i))); + System.err.println(join(delimiter, myData.get(i))); + } + pw.close(); + tmpFile.deleteOnExit(); + return tmpFile.getAbsolutePath(); + } +} Added: hadoop/pig/trunk/contrib/piggybank/java/src/test/java/org/apache/pig/piggybank/test/storage/TestMyRegExLoader.java URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/contrib/piggybank/java/src/test/java/org/apache/pig/piggybank/test/storage/TestMyRegExLoader.java?rev=782143&view=auto ============================================================================== --- hadoop/pig/trunk/contrib/piggybank/java/src/test/java/org/apache/pig/piggybank/test/storage/TestMyRegExLoader.java (added) +++ hadoop/pig/trunk/contrib/piggybank/java/src/test/java/org/apache/pig/piggybank/test/storage/TestMyRegExLoader.java Fri Jun 5 21:32:38 2009 @@ -0,0 +1,58 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the + * NOTICE file distributed with this work for additional information regarding copyright ownership. The ASF + * licenses this file to you under the Apache License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed under the License is + * distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and limitations under the License. + */ + +package org.apache.pig.piggybank.test.storage; + +import java.util.ArrayList; +import java.util.Iterator; +import java.util.regex.Pattern; + +import junit.framework.TestCase; + +import org.apache.pig.PigServer; +import org.apache.pig.data.DataByteArray; +import org.apache.pig.data.Tuple; +import static org.apache.pig.ExecType.LOCAL; + +public class TestMyRegExLoader extends TestCase { + private static String patternString = "(\\d+)!+(\\w+)~+(\\w+)"; + private final static Pattern pattern = Pattern.compile(patternString); + public static ArrayList<String[]> data = new ArrayList<String[]>(); + static { + data.add(new String[] { "1!!!one~i" }); + data.add(new String[] { "2!!two~~ii" }); + data.add(new String[] { "3!three~~~iii" }); + } + + public void testLoadMyRegExFromPigServer() throws Exception { + ArrayList<DataByteArray[]> expected = TestHelper.getExpected(data, pattern); + String filename = TestHelper.createTempFile(data, ""); + PigServer pig = new PigServer(LOCAL); + filename = filename.replace("\\", "\\\\"); + patternString = patternString.replace("\\", "\\\\"); + String query = "A = LOAD 'file:" + filename + "' USING org.apache.pig.piggybank.storage.MyRegExLoader('" + patternString + "');"; + pig.registerQuery(query); + Iterator<?> it = pig.openIterator("A"); + int tupleCount = 0; + while (it.hasNext()) { + Tuple tuple = (Tuple) it.next(); + if (tuple == null) + break; + else { + TestHelper.examineTuple(expected, tuple, tupleCount); + tupleCount++; + } + } + assertEquals(data.size(), tupleCount); + } +} Added: hadoop/pig/trunk/contrib/piggybank/java/src/test/java/org/apache/pig/piggybank/test/storage/TestRegExLoader.java URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/contrib/piggybank/java/src/test/java/org/apache/pig/piggybank/test/storage/TestRegExLoader.java?rev=782143&view=auto ============================================================================== --- hadoop/pig/trunk/contrib/piggybank/java/src/test/java/org/apache/pig/piggybank/test/storage/TestRegExLoader.java (added) +++ hadoop/pig/trunk/contrib/piggybank/java/src/test/java/org/apache/pig/piggybank/test/storage/TestRegExLoader.java Fri Jun 5 21:32:38 2009 @@ -0,0 +1,80 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the + * NOTICE file distributed with this work for additional information regarding copyright ownership. The ASF + * licenses this file to you under the Apache License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed under the License is + * distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and limitations under the License. + */ + +package org.apache.pig.piggybank.test.storage; + +import static org.apache.pig.ExecType.LOCAL; + +import java.io.InputStream; +import java.util.ArrayList; +import java.util.regex.Pattern; + +import junit.framework.TestCase; + +import org.apache.pig.PigServer; +import org.apache.pig.data.DataByteArray; +import org.apache.pig.data.Tuple; +import org.apache.pig.impl.io.BufferedPositionedInputStream; +import org.apache.pig.impl.io.FileLocalizer; +import org.apache.pig.piggybank.storage.RegExLoader; +import org.junit.Test; + +public class TestRegExLoader extends TestCase { + private static String patternString = "(\\w+),(\\w+);(\\w+)"; + private final static Pattern pattern = Pattern.compile(patternString); + + class DummyRegExLoader extends RegExLoader { + @Override + public Pattern getPattern() { + return Pattern.compile(patternString); + } + + } + + public static ArrayList<String[]> data = new ArrayList<String[]>(); + static { + data.add(new String[] { "1,one;i" }); + data.add(new String[] { "2,two;ii" }); + data.add(new String[] { "3,three;iii" }); + } + + @Test + public void testLoadFromBindTo() throws Exception { + //String filename = TestHelper.createTempFile(data, " "); + //System.err.println(filename); + DummyRegExLoader dummyRegExLoader = new DummyRegExLoader(); + PigServer pigServer = new PigServer(LOCAL); + + String filename = TestHelper.createTempFile(data, ""); + /*org.apache.pig.test.Util.createInputFile("tmp", "", + new String[]{"1,one;i", "2,two;ii", "3,three;iii"} + ); + + String filename = input.getAbsolutePath(); + */ + InputStream inputStream = FileLocalizer.open(filename, pigServer.getPigContext()); + dummyRegExLoader.bindTo(filename, new BufferedPositionedInputStream(inputStream), 0, Long.MAX_VALUE); + ArrayList<DataByteArray[]> expected = TestHelper.getExpected(data, pattern); + int tupleCount = 0; + while (true) { + Tuple tuple = dummyRegExLoader.getNext(); + if (tuple == null) + break; + else { + TestHelper.examineTuple(expected, tuple, tupleCount); + tupleCount++; + } + } + assertEquals(data.size(), tupleCount); + } +}
