Author: olga Date: Mon Nov 30 15:50:31 2009 New Revision: 885465 URL: http://svn.apache.org/viewvc?rev=885465&view=rev Log: PIG-1107: PigLineRecordReader bails out on an empty line for compressed data (ankit.modi via olgan)
Modified: hadoop/pig/trunk/CHANGES.txt hadoop/pig/trunk/src/org/apache/pig/impl/io/PigLineRecordReader.java hadoop/pig/trunk/test/org/apache/pig/test/TestPigLineRecordReader.java Modified: hadoop/pig/trunk/CHANGES.txt URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/CHANGES.txt?rev=885465&r1=885464&r2=885465&view=diff ============================================================================== --- hadoop/pig/trunk/CHANGES.txt (original) +++ hadoop/pig/trunk/CHANGES.txt Mon Nov 30 15:50:31 2009 @@ -41,6 +41,9 @@ BUG FIXES +PIG-1107: PigLineRecordReader bails out on an empty line for compressed data +(ankit.modi via olgan) + PIG-598: Parameter substitution ($PARAMETER) should not be performed in comments (thejas via olgan) Modified: hadoop/pig/trunk/src/org/apache/pig/impl/io/PigLineRecordReader.java URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/src/org/apache/pig/impl/io/PigLineRecordReader.java?rev=885465&r1=885464&r2=885465&view=diff ============================================================================== --- hadoop/pig/trunk/src/org/apache/pig/impl/io/PigLineRecordReader.java (original) +++ hadoop/pig/trunk/src/org/apache/pig/impl/io/PigLineRecordReader.java Mon Nov 30 15:50:31 2009 @@ -133,7 +133,8 @@ if (b == '\n' ) { byte[] array = mBuf.toByteArray(); - if (array[array.length-1]=='\r' && os==OS_WINDOWS) { + if (array.length != 0 && array[array.length-1]=='\r' + && os==OS_WINDOWS) { // Here we dont copy the last '\r' in the Text Value value.append(array, 0, array.length - 1 ); } else { Modified: hadoop/pig/trunk/test/org/apache/pig/test/TestPigLineRecordReader.java URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/test/org/apache/pig/test/TestPigLineRecordReader.java?rev=885465&r1=885464&r2=885465&view=diff ============================================================================== --- hadoop/pig/trunk/test/org/apache/pig/test/TestPigLineRecordReader.java (original) +++ hadoop/pig/trunk/test/org/apache/pig/test/TestPigLineRecordReader.java Mon Nov 30 15:50:31 2009 @@ -614,4 +614,97 @@ fail(e.getMessage()); } } + + /** + * This tests check if PigLineRecordReader can read a file which has an empty line + */ + @Test + public void testEmptyLineSimpleFile() { + try { + File testFile = File.createTempFile("testPigLineRecordReader", ".txt"); + String text = "This is a text"; + + PrintStream ps = new PrintStream( testFile ); + for( int i = 0; i < LOOP_COUNT; i++ ) { + ps.println( text ); + // Add an empty line + ps.println(""); + } + ps.close(); + + LocalSeekableInputStream is = new LocalSeekableInputStream( testFile ); + BufferedPositionedInputStream bpis = new BufferedPositionedInputStream( is ); + PigLineRecordReader reader = new PigLineRecordReader( bpis, 0, Integer.MAX_VALUE ); + + Text value = new Text(); + int counter = 0; + while( reader.next(value) ) { + if( counter % 2 == 0 ) { + assertTrue( "Invalid Text", value.toString().compareTo(text) == 0 ); + } else { + assertTrue( "Invalid Text", value.toString().compareTo("") == 0 ); + } + counter++; + } + assertEquals("Invalid number of lines", counter, LOOP_COUNT*2 ); + testFile.deleteOnExit(); + + } catch (IOException e) { + e.printStackTrace(); + fail( e.getMessage() ); + } catch (SecurityException e) { + e.printStackTrace(); + fail( e.getMessage() ); + } catch (IllegalArgumentException e) { + e.printStackTrace(); + fail(e.getMessage()); + } + } + + /** + * This tests check if PigLineRecordReader can read a file which has an empty line + */ + @Test + public void testEmptyLineBZFile() { + try { + File testFile = File.createTempFile("testPigLineRecordReader", ".txt.bz2"); + String text = "This is a text"; + + PrintStream ps = new PrintStream( new CBZip2OutputStream( new FileOutputStream( testFile )) ); + for( int i = 0; i < LOOP_COUNT; i++ ) { + ps.println( text ); + // Add an empty line + ps.println(""); + } + ps.close(); + + LocalSeekableInputStream is = new LocalSeekableInputStream( testFile ); + CBZip2InputStream bzis = new CBZip2InputStream( is ); + BufferedPositionedInputStream bpis = new BufferedPositionedInputStream( bzis ); + PigLineRecordReader reader = new PigLineRecordReader( bpis, 0, Integer.MAX_VALUE ); + + Text value = new Text(); + int counter = 0; + while( reader.next(value) ) { + if( counter % 2 == 0 ) { + assertTrue( "Invalid Text", value.toString().compareTo(text) == 0 ); + } else { + assertTrue( "Invalid Text", value.toString().compareTo("") == 0 ); + } + counter++; + } + assertEquals("Invalid number of lines", counter, LOOP_COUNT*2 ); + testFile.deleteOnExit(); + + } catch (IOException e) { + e.printStackTrace(); + fail( e.getMessage() ); + } catch (SecurityException e) { + e.printStackTrace(); + fail( e.getMessage() ); + } catch (IllegalArgumentException e) { + e.printStackTrace(); + fail(e.getMessage()); + } + } }