2009/8/12 Mathias De Maré <[email protected]>
> Thank you, that's very useful.
> In addition, I changed the way the tasks work, so they store their data in
> HBase now (since it's more suited for handling small files).
> I'm not 100% sure yet if the problems have been resolved (still doing
> extensive testing), but I think I might have gotten rid of them (and I'll
> add the 'skipping records' option in case I do get a failure).
>
Hi,
I can get everything to 'run' successfully now, but there are still some
tasks that crash.
I was thinking perhaps my Writable class is the issue, so I'll just post it
here. Does anyone notice anything that could cause a hang? In particular the
readFields and write methods could perhaps be the reason (but I just don't
see it).
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import org.apache.hadoop.io.ArrayWritable;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.io.WritableComparable;
/**
* Contains information on a URL, which other URLs link to it and if it has
been crawled previously.
* @author mathias
*/
public class URLInfo implements Writable, WritableComparable {
String url;
Text[] linkedfrom;
int urlStatus;
int seconds;
public URLInfo() {
url = "";
linkedfrom = new Text[0];
urlStatus = Constants.URL_NEW;
seconds = 1;
}
/**
*
* @param url
* @param linkedfrom Must only contains domain names, nothing appended
* @param urlStatus
*/
public URLInfo(String url, Text[] linkedfrom, int urlStatus, int
seconds) {
this.url = url;
this.linkedfrom = linkedfrom;
this.urlStatus = urlStatus;
this.seconds = seconds;
}
public void write(DataOutput out) throws IOException {
new Text(url).write(out);
new ArrayWritable(Text.class, linkedfrom).write(out);
new IntWritable(urlStatus).write(out);
new IntWritable(seconds).write(out);
}
public void readFields(DataInput in) throws IOException {
url = Text.readString(in);
ArrayWritable aw = new ArrayWritable(Text.class);
aw.readFields(in);
Writable[] linkedfromWritable = aw.get();
linkedfrom = new Text[linkedfromWritable.length];
for(int i=0; i<linkedfromWritable.length; i++) {
linkedfrom[i] = (Text) linkedfromWritable[i];
}
IntWritable iw = new IntWritable();
iw.readFields(in);
urlStatus = iw.get();
IntWritable iw2 = new IntWritable();
iw2.readFields(in);
seconds = iw2.get();
}
public int compareTo(Object o) {
return url.compareToIgnoreCase(((URLInfo) o).url);
}
public void setURLStatus(int urlStatus) {
this.urlStatus = urlStatus;
}
public int getURLStatus() {
return urlStatus;
}
public void setLinkedFrom(Text[] linkedfrom) {
this.linkedfrom = linkedfrom;
}
public Text[] getLinkedFrom() {
return linkedfrom;
}
public String getURL() {
return new String(url);
}
public int getSeconds() {
return seconds;
}
public void setSeconds(int seconds) {
this.seconds = seconds;
}
@Override
public String toString() {
return new String(url);
}
@Override
public boolean equals(Object obj) {
if(!(obj instanceof URLInfo)) {
return false;
}
URLInfo urlObject = (URLInfo) obj;
return this.getURL().equals(urlObject.getURL());
}
}