Author: dogacan Date: Wed Jan 21 05:09:48 2009 New Revision: 736307 URL: http://svn.apache.org/viewvc?rev=736307&view=rev Log: NUTCH-681 - parse-mp3 compilation problem. Patch by Wildan Maulana.
Modified: lucene/nutch/trunk/CHANGES.txt lucene/nutch/trunk/src/plugin/parse-mp3/src/java/org/apache/nutch/parse/mp3/MP3Parser.java lucene/nutch/trunk/src/plugin/parse-mp3/src/java/org/apache/nutch/parse/mp3/MetadataCollector.java lucene/nutch/trunk/src/plugin/parse-mp3/src/test/org/apache/nutch/parse/mp3/TestMP3Parser.java Modified: lucene/nutch/trunk/CHANGES.txt URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/CHANGES.txt?rev=736307&r1=736306&r2=736307&view=diff ============================================================================== --- lucene/nutch/trunk/CHANGES.txt (original) +++ lucene/nutch/trunk/CHANGES.txt Wed Jan 21 05:09:48 2009 @@ -313,6 +313,9 @@ 117. NUTCH-678 - Hadoop 0.19 requires an update of jets3t. (julien nioche via dogacan) + +118. NUTCH-681 - parse-mp3 compilation problem. + (Wildan Maulana via dogacan) Release 0.9 - 2007-04-02 Modified: lucene/nutch/trunk/src/plugin/parse-mp3/src/java/org/apache/nutch/parse/mp3/MP3Parser.java URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/plugin/parse-mp3/src/java/org/apache/nutch/parse/mp3/MP3Parser.java?rev=736307&r1=736306&r2=736307&view=diff ============================================================================== --- lucene/nutch/trunk/src/plugin/parse-mp3/src/java/org/apache/nutch/parse/mp3/MP3Parser.java (original) +++ lucene/nutch/trunk/src/plugin/parse-mp3/src/java/org/apache/nutch/parse/mp3/MP3Parser.java Wed Jan 21 05:09:48 2009 @@ -37,17 +37,16 @@ // Nutch imports import org.apache.nutch.metadata.Metadata; -import org.apache.nutch.parse.Parse; import org.apache.nutch.parse.ParseData; -import org.apache.nutch.parse.ParseException; import org.apache.nutch.parse.ParseImpl; +import org.apache.nutch.parse.ParseResult; import org.apache.nutch.parse.ParseStatus; import org.apache.nutch.parse.Parser; import org.apache.nutch.protocol.Content; - /** * A parser for MP3 audio files + * * @author Andy Hedges */ public class MP3Parser implements Parser { @@ -55,12 +54,12 @@ private MetadataCollector metadataCollector; private Configuration conf; - public Parse getParse(Content content) { + public ParseResult getParse(Content content) { - Parse parse = null; + ParseResult parse = null; byte[] raw = content.getContent(); File tmp = null; - + try { tmp = File.createTempFile("nutch", ".mp3"); FileOutputStream fos = new FileOutputStream(tmp); @@ -69,49 +68,50 @@ MP3File mp3 = new MP3File(tmp); if (mp3.hasID3v2Tag()) { - parse = getID3v2Parse(mp3, content.getMetadata()); + parse = getID3v2Parse(mp3, content.getMetadata(), content); } else if (mp3.hasID3v1Tag()) { - parse = getID3v1Parse(mp3, content.getMetadata()); + parse = getID3v1Parse(mp3, content.getMetadata(), content); } else { - return new ParseStatus(ParseStatus.FAILED, - ParseStatus.FAILED_MISSING_CONTENT, - "No textual content available").getEmptyParse(conf); + return new ParseStatus().getEmptyParseResult(content.getUrl(), + getConf()); } } catch (IOException e) { - return new ParseStatus(ParseStatus.FAILED, - ParseStatus.FAILED_EXCEPTION, - "Couldn't create temporary file:" + e).getEmptyParse(conf); + return new ParseStatus().getEmptyParseResult(content.getUrl(), + getConf()); } catch (TagException e) { - return new ParseStatus(ParseStatus.FAILED, - ParseStatus.FAILED_EXCEPTION, - "ID3 Tags could not be parsed:" + e).getEmptyParse(conf); - } finally{ + return new ParseStatus().getEmptyParseResult(content.getUrl(), + getConf()); + } finally { tmp.delete(); } + return parse; } - private Parse getID3v1Parse(MP3File mp3, Metadata contentMeta) - throws MalformedURLException { + private ParseResult getID3v1Parse(MP3File mp3, Metadata contentMeta, + Content content) throws MalformedURLException { ID3v1 tag = mp3.getID3v1Tag(); metadataCollector.notifyProperty("TALB-Text", tag.getAlbum()); metadataCollector.notifyProperty("TPE1-Text", tag.getArtist()); metadataCollector.notifyProperty("COMM-Text", tag.getComment()); - metadataCollector.notifyProperty("TCON-Text", "(" + tag.getGenre() + ")"); + metadataCollector.notifyProperty("TCON-Text", "(" + tag.getGenre() + + ")"); metadataCollector.notifyProperty("TIT2-Text", tag.getTitle()); metadataCollector.notifyProperty("TYER-Text", tag.getYear()); ParseData parseData = new ParseData(ParseStatus.STATUS_SUCCESS, - metadataCollector.getTitle(), - metadataCollector.getOutlinks(), - contentMeta, - metadataCollector.getData()); - return new ParseImpl(metadataCollector.getText(), parseData); + metadataCollector.getTitle(), metadataCollector.getOutlinks(), + contentMeta, metadataCollector.getData()); + ParseResult parseResult = ParseResult.createParseResult(content + .getUrl(), + new ParseImpl(metadataCollector.getText(), parseData)); + + return parseResult; } - public Parse getID3v2Parse(MP3File mp3, Metadata contentMeta) - throws IOException { - + public ParseResult getID3v2Parse(MP3File mp3, Metadata contentMeta, + Content content) throws IOException { + AbstractID3v2 tag = mp3.getID3v2Tag(); Iterator it = tag.iterator(); while (it.hasNext()) { @@ -120,23 +120,26 @@ if (!name.equals("APIC")) { Iterator itBody = frame.getBody().iterator(); while (itBody.hasNext()) { - AbstractMP3Object mp3Obj = (AbstractMP3Object) itBody.next(); + AbstractMP3Object mp3Obj = (AbstractMP3Object) itBody + .next(); String bodyName = mp3Obj.getIdentifier(); if (!bodyName.equals("Picture data")) { String bodyValue = mp3Obj.getValue().toString(); - metadataCollector.notifyProperty(name + "-" + bodyName, bodyValue); + metadataCollector.notifyProperty(name + "-" + bodyName, + bodyValue); } } } } ParseData parseData = new ParseData(ParseStatus.STATUS_SUCCESS, - metadataCollector.getTitle(), - metadataCollector.getOutlinks(), - contentMeta, - metadataCollector.getData()); - return new ParseImpl(metadataCollector.getText(), parseData); - } + metadataCollector.getTitle(), metadataCollector.getOutlinks(), + contentMeta, metadataCollector.getData()); + ParseResult parseResult = ParseResult.createParseResult(content + .getUrl(), + new ParseImpl(metadataCollector.getText(), parseData)); + return parseResult; + } public void setConf(Configuration conf) { this.conf = conf; Modified: lucene/nutch/trunk/src/plugin/parse-mp3/src/java/org/apache/nutch/parse/mp3/MetadataCollector.java URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/plugin/parse-mp3/src/java/org/apache/nutch/parse/mp3/MetadataCollector.java?rev=736307&r1=736306&r2=736307&view=diff ============================================================================== --- lucene/nutch/trunk/src/plugin/parse-mp3/src/java/org/apache/nutch/parse/mp3/MetadataCollector.java (original) +++ lucene/nutch/trunk/src/plugin/parse-mp3/src/java/org/apache/nutch/parse/mp3/MetadataCollector.java Wed Jan 21 05:09:48 2009 @@ -34,7 +34,7 @@ private String title = null; private String artist = null; private String album = null; - private ArrayList links = new ArrayList(); + private ArrayList<Outlink> links = new ArrayList<Outlink>(); private String text = ""; private Configuration conf; @@ -51,7 +51,7 @@ setArtist(value); if (name.indexOf("URL Link") > -1) { - links.add(new Outlink(value, "", this.conf)); + links.add(new Outlink(value, "")); } else if (name.indexOf("Text") > -1) { text += value + "\n"; } Modified: lucene/nutch/trunk/src/plugin/parse-mp3/src/test/org/apache/nutch/parse/mp3/TestMP3Parser.java URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/plugin/parse-mp3/src/test/org/apache/nutch/parse/mp3/TestMP3Parser.java?rev=736307&r1=736306&r2=736307&view=diff ============================================================================== --- lucene/nutch/trunk/src/plugin/parse-mp3/src/test/org/apache/nutch/parse/mp3/TestMP3Parser.java (original) +++ lucene/nutch/trunk/src/plugin/parse-mp3/src/test/org/apache/nutch/parse/mp3/TestMP3Parser.java Wed Jan 21 05:09:48 2009 @@ -71,7 +71,7 @@ protocol = new ProtocolFactory(conf).getProtocol(urlString); content = protocol.getProtocolOutput(new Text(urlString), new CrawlDatum()) .getContent(); - parse = new ParseUtil(conf).parseByExtensionId("parse-mp3", content); + parse = new ParseUtil(conf).parseByExtensionId("parse-mp3", content).get(urlString); Metadata metadata = parse.getData().getParseMeta(); assertEquals("postgresql comment id3v2", metadata.get("COMM-Text")); assertEquals("postgresql composer id3v2", metadata.get("TCOM-Text")); @@ -103,7 +103,7 @@ protocol = new ProtocolFactory(conf).getProtocol(urlString); content = protocol.getProtocolOutput(new Text(urlString), new CrawlDatum()) .getContent(); - parse = new ParseUtil(conf).parseByExtensionId("parse-mp3", content); + parse = new ParseUtil(conf).parseByExtensionId("parse-mp3", content).get(urlString); Metadata metadata = parse.getData().getParseMeta(); assertEquals("postgresql comment id3v1", metadata.get("COMM-Text")); @@ -130,7 +130,7 @@ protocol = new ProtocolFactory(conf).getProtocol(urlString); content = protocol.getProtocolOutput(new Text(urlString), new CrawlDatum()) .getContent(); - parse = new ParseUtil(conf).parseByExtensionId("parse-mp3", content); + parse = new ParseUtil(conf).parseByExtensionId("parse-mp3", content).get(urlString); // Metadata metadata = parse.getData().getParseMeta(); if (parse.getData().getStatus().isSuccess()) { fail("Expected ParseException");