Author: cdouglas
Date: Tue Oct 20 23:40:16 2009
New Revision: 827839
URL: http://svn.apache.org/viewvc?rev=827839&view=rev
Log:
HADOOP-6097. Fix Path conversion in makeQualified and reset LineReader byte
count at the start of each block in Hadoop archives. Contributed by Ben Slusky,
Tom White, and Mahadev Konar
Modified:
hadoop/common/branches/branch-0.20/CHANGES.txt
hadoop/common/branches/branch-0.20/src/core/org/apache/hadoop/fs/HarFileSystem.java
hadoop/common/branches/branch-0.20/src/test/org/apache/hadoop/fs/TestHarFileSystem.java
Modified: hadoop/common/branches/branch-0.20/CHANGES.txt
URL:
http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.20/CHANGES.txt?rev=827839&r1=827838&r2=827839&view=diff
==============================================================================
--- hadoop/common/branches/branch-0.20/CHANGES.txt (original)
+++ hadoop/common/branches/branch-0.20/CHANGES.txt Tue Oct 20 23:40:16 2009
@@ -35,6 +35,10 @@
HADOOP-5759. Fix for IllegalArgumentException when CombineFileInputFormat
is used as job InputFormat. (Amareshwari Sriramadasu via zshao)
+ HADOOP-6097. Fix Path conversion in makeQualified and reset LineReader byte
+ count at the start of each block in Hadoop archives. (Ben Slusky, Tom
+ White, and Mahadev Konar via cdouglas)
+
Release 0.20.1 - 2009-09-01
INCOMPATIBLE CHANGES
Modified:
hadoop/common/branches/branch-0.20/src/core/org/apache/hadoop/fs/HarFileSystem.java
URL:
http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.20/src/core/org/apache/hadoop/fs/HarFileSystem.java?rev=827839&r1=827838&r2=827839&view=diff
==============================================================================
---
hadoop/common/branches/branch-0.20/src/core/org/apache/hadoop/fs/HarFileSystem.java
(original)
+++
hadoop/common/branches/branch-0.20/src/core/org/apache/hadoop/fs/HarFileSystem.java
Tue Oct 20 23:40:16 2009
@@ -301,19 +301,8 @@
}
URI tmpURI = fsPath.toUri();
- fsPath = new Path(tmpURI.getPath());
//change this to Har uri
- URI tmp = null;
- try {
- tmp = new URI(uri.getScheme(), harAuth, fsPath.toString(),
- tmpURI.getQuery(), tmpURI.getFragment());
- } catch(URISyntaxException ue) {
- LOG.error("Error in URI ", ue);
- }
- if (tmp != null) {
- return new Path(tmp.toString());
- }
- return null;
+ return new Path(uri.getScheme(), harAuth, tmpURI.getPath());
}
/**
@@ -425,12 +414,13 @@
// do nothing just a read.
}
FSDataInputStream aIn = fs.open(archiveIndex);
- LineReader aLin = new LineReader(aIn, getConf());
+ LineReader aLin;
String retStr = null;
// now start reading the real index file
- read = 0;
for (Store s: stores) {
+ read = 0;
aIn.seek(s.begin);
+ aLin = new LineReader(aIn, getConf());
while (read + s.begin < s.end) {
int tmp = aLin.readLine(line);
read += tmp;
Modified:
hadoop/common/branches/branch-0.20/src/test/org/apache/hadoop/fs/TestHarFileSystem.java
URL:
http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.20/src/test/org/apache/hadoop/fs/TestHarFileSystem.java?rev=827839&r1=827838&r2=827839&view=diff
==============================================================================
---
hadoop/common/branches/branch-0.20/src/test/org/apache/hadoop/fs/TestHarFileSystem.java
(original)
+++
hadoop/common/branches/branch-0.20/src/test/org/apache/hadoop/fs/TestHarFileSystem.java
Tue Oct 20 23:40:16 2009
@@ -57,7 +57,7 @@
private MiniDFSCluster dfscluster;
private MiniMRCluster mapred;
private FileSystem fs;
- private Path filea, fileb, filec;
+ private Path filea, fileb, filec, filed;
private Path archivePath;
protected void setUp() throws Exception {
@@ -69,6 +69,9 @@
filea = new Path(inputPath,"a");
fileb = new Path(inputPath,"b");
filec = new Path(inputPath,"c");
+ // check for har containing escape worthy characters
+ // in there name
+ filed = new Path(inputPath, "d%d");
archivePath = new Path(fs.getHomeDirectory(), "tmp");
}
@@ -121,7 +124,14 @@
out = fs.create(filec);
out.write("c".getBytes());
out.close();
+ out = fs.create(filed);
+ out.write("d".getBytes());
+ out.close();
Configuration conf = mapred.createJobConf();
+
+ // check to see if fs.har.impl.disable.cache is true
+ boolean archivecaching = conf.getBoolean("fs.har.impl.disable.cache",
false);
+ assertTrue(archivecaching);
HadoopArchives har = new HadoopArchives(conf);
String[] args = new String[3];
//check for destination not specfied
@@ -179,6 +189,7 @@
Path harFilea = new Path(harPath, "a");
Path harFileb = new Path(harPath, "b");
Path harFilec = new Path(harPath, "c");
+ Path harFiled = new Path(harPath, "d%d");
FileSystem harFs = harFilea.getFileSystem(conf);
FSDataInputStream fin = harFs.open(harFilea);
byte[] b = new byte[4];
@@ -193,6 +204,11 @@
fin.read(b);
fin.close();
assertTrue("strings are equal ", (b[0] == "c".getBytes()[0]));
+ fin = harFs.open(harFiled);
+ fin.read(b);
+ fin.close();
+ assertTrue("strings are equal ", (b[0] == "d".getBytes()[0]));
+
// ok all files match
// run a map reduce job
Path outdir = new Path(fs.getHomeDirectory(), "mapout");
@@ -213,11 +229,11 @@
FileStatus[] status = fs.globStatus(new Path(outdir, "part*"));
Path reduceFile = status[0].getPath();
FSDataInputStream reduceIn = fs.open(reduceFile);
- b = new byte[6];
+ b = new byte[8];
reduceIn.read(b);
- //assuming all the 6 bytes were read.
+ //assuming all the 8 bytes were read.
Text readTxt = new Text(b);
- assertTrue("a\nb\nc\n".equals(readTxt.toString()));
+ assertTrue("a\nb\nc\nd\n".equals(readTxt.toString()));
assertTrue("number of bytes left should be -1", reduceIn.read(b) == -1);
reduceIn.close();
}