2009/8/12 Mathias De Maré <[email protected]>

> Thank you, that's very useful.
> In addition, I changed the way the tasks work, so they store their data in
> HBase now (since it's more suited for handling small files).
> I'm not 100% sure yet if the problems have been resolved (still doing
> extensive testing), but I think I might have gotten rid of them (and I'll
> add the 'skipping records' option in case I do get a failure).
>


Hi,

I can get everything to 'run' successfully now, but there are still some
tasks that crash.

I was thinking perhaps my Writable class is the issue, so I'll just post it
here. Does anyone notice anything that could cause a hang? In particular the
readFields and write methods could perhaps be the reason (but I just don't
see it).

import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import org.apache.hadoop.io.ArrayWritable;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.io.WritableComparable;

/**
 * Contains information on a URL, which other URLs link to it and if it has
been crawled previously.
 * @author mathias
 */
public class URLInfo implements Writable, WritableComparable {
    String url;
    Text[] linkedfrom;
    int urlStatus;
    int seconds;

    public URLInfo() {
        url = "";
        linkedfrom = new Text[0];
        urlStatus = Constants.URL_NEW;
        seconds = 1;
    }

    /**
     *
     * @param url
     * @param linkedfrom Must only contains domain names, nothing appended
     * @param urlStatus
     */
    public URLInfo(String url, Text[] linkedfrom, int urlStatus, int
seconds) {
        this.url = url;
        this.linkedfrom = linkedfrom;
        this.urlStatus = urlStatus;
        this.seconds = seconds;
    }

    public void write(DataOutput out) throws IOException {
        new Text(url).write(out);
        new ArrayWritable(Text.class, linkedfrom).write(out);
        new IntWritable(urlStatus).write(out);
        new IntWritable(seconds).write(out);
    }

    public void readFields(DataInput in) throws IOException {
        url = Text.readString(in);
        ArrayWritable aw = new ArrayWritable(Text.class);
        aw.readFields(in);
        Writable[] linkedfromWritable = aw.get();
        linkedfrom = new Text[linkedfromWritable.length];
        for(int i=0; i<linkedfromWritable.length; i++) {
            linkedfrom[i] = (Text) linkedfromWritable[i];
        }
        IntWritable iw = new IntWritable();
        iw.readFields(in);
        urlStatus = iw.get();
        IntWritable iw2 = new IntWritable();
        iw2.readFields(in);
        seconds = iw2.get();
    }

    public int compareTo(Object o) {
        return url.compareToIgnoreCase(((URLInfo) o).url);
    }

    public void setURLStatus(int urlStatus) {
        this.urlStatus = urlStatus;
    }

    public int getURLStatus() {
        return urlStatus;
    }

    public void setLinkedFrom(Text[] linkedfrom) {
        this.linkedfrom = linkedfrom;
    }

    public Text[] getLinkedFrom() {
        return linkedfrom;
    }

    public String getURL() {
        return new String(url);
    }

    public int getSeconds() {
        return seconds;
    }

    public void setSeconds(int seconds) {
        this.seconds = seconds;
    }

    @Override
    public String toString() {
        return new String(url);
    }

    @Override
    public boolean equals(Object obj) {
        if(!(obj instanceof URLInfo)) {
            return false;
        }
        URLInfo urlObject = (URLInfo) obj;
        return this.getURL().equals(urlObject.getURL());
    }

}

Reply via email to