Revision: 18331 http://sourceforge.net/p/gate/code/18331 Author: adamfunk Date: 2014-09-12 20:35:28 +0000 (Fri, 12 Sep 2014) Log Message: ----------- WIP on the file location memory & the empty document issue.
Modified Paths: -------------- gate/branches/twitter-pop-dev/plugins/Twitter/src/gate/corpora/twitter/Population.java gate/branches/twitter-pop-dev/plugins/Twitter/src/gate/corpora/twitter/PopulationConfig.java gate/branches/twitter-pop-dev/plugins/Twitter/src/gate/corpora/twitter/PopulationDialogWrapper.java gate/branches/twitter-pop-dev/plugins/Twitter/src/gate/corpora/twitter/TweetStreamIterator.java Modified: gate/branches/twitter-pop-dev/plugins/Twitter/src/gate/corpora/twitter/Population.java =================================================================== --- gate/branches/twitter-pop-dev/plugins/Twitter/src/gate/corpora/twitter/Population.java 2014-09-12 15:45:46 UTC (rev 18330) +++ gate/branches/twitter-pop-dev/plugins/Twitter/src/gate/corpora/twitter/Population.java 2014-09-12 20:35:28 UTC (rev 18331) @@ -25,6 +25,7 @@ import gate.gui.ResourceHelper; import gate.util.InvalidOffsetException; import java.awt.event.ActionEvent; +import java.io.IOException; import java.io.InputStream; import java.net.MalformedURLException; import java.net.URL; @@ -68,10 +69,12 @@ */ public static void populateCorpus(final Corpus corpus, URL inputUrl, String encoding, List<String> contentKeys, List<String> featureKeys, int tweetsPerDoc) throws ResourceInstantiationException { + + InputStream input = null; try { - InputStream input = inputUrl.openStream(); + input = inputUrl.openStream(); - // TODO detect & handle gzipped input + // TODO Detect & handle gzipped input. TweetStreamIterable tweetSource = new TweetStreamIterable(input, contentKeys, featureKeys, false); int tweetCounter = 0; @@ -79,6 +82,8 @@ StringBuilder content = new StringBuilder(); Map<PreAnnotation, Integer> annotandaOffsets = new HashMap<PreAnnotation, Integer>(); + // TODO Suppress empty documents (generated by 0-tweet files). + for (Tweet tweet : tweetSource) { if ( (tweetsPerDoc > 0) && (tweetCounter > 0) && ((tweetCounter % tweetsPerDoc) == 0) ) { closeDocument(document, content, annotandaOffsets, corpus); @@ -97,6 +102,9 @@ tweetCounter++; } // end of Tweet loop + System.out.println("CL = " + content.length()); + System.out.println("TC = " + tweetCounter); + if (content.length() > 0) { closeDocument(document, content, annotandaOffsets, corpus); } @@ -112,6 +120,18 @@ catch (Exception e) { throw new ResourceInstantiationException(e); } + finally { + if (input != null) { + try { + input.close(); + } + catch(IOException e) { + logger.warn("Error in Twitter Population", e); + } + } + + } + } Modified: gate/branches/twitter-pop-dev/plugins/Twitter/src/gate/corpora/twitter/PopulationConfig.java =================================================================== --- gate/branches/twitter-pop-dev/plugins/Twitter/src/gate/corpora/twitter/PopulationConfig.java 2014-09-12 15:45:46 UTC (rev 18330) +++ gate/branches/twitter-pop-dev/plugins/Twitter/src/gate/corpora/twitter/PopulationConfig.java 2014-09-12 20:35:28 UTC (rev 18331) @@ -14,7 +14,6 @@ import gate.Gate; import gate.swing.XJFileChooser; -//TODO Get GATE to remember last location. import java.awt.event.ActionEvent; import java.awt.event.ActionListener; import java.io.File; @@ -152,6 +151,8 @@ @Override public void actionPerformed(ActionEvent arg0) { XJFileChooser chooser = new XJFileChooser(); + //TODO Get GATE to remember last location. + //chooser.setResource(PopulationConfig.class.getName()); chooser.setDialogTitle("Load XML configuration"); chooser.setFileSelectionMode(XJFileChooser.FILES_ONLY); int chosen = chooser.showOpenDialog(this.wrapper.dialog); Modified: gate/branches/twitter-pop-dev/plugins/Twitter/src/gate/corpora/twitter/PopulationDialogWrapper.java =================================================================== --- gate/branches/twitter-pop-dev/plugins/Twitter/src/gate/corpora/twitter/PopulationDialogWrapper.java 2014-09-12 15:45:46 UTC (rev 18330) +++ gate/branches/twitter-pop-dev/plugins/Twitter/src/gate/corpora/twitter/PopulationDialogWrapper.java 2014-09-12 20:35:28 UTC (rev 18331) @@ -14,7 +14,6 @@ import gate.gui.ListEditorDialog; import gate.gui.MainFrame; import gate.swing.XJFileChooser; -// TODO Get GATE to remember last location. import gate.util.ExtensionFileFilter; import gate.util.Strings; import java.awt.Window; @@ -110,6 +109,8 @@ dialog.add(Box.createVerticalStrut(2)); chooser = new XJFileChooser(); + // TODO Fix this to get GATE to remember last location. + //chooser.setResource(PopulationDialogWrapper.class.getName()); chooser.setFileSelectionMode(XJFileChooser.FILES_ONLY); chooser.setMultiSelectionEnabled(true); chooser.setDialogTitle("Select a Twitter JSON file"); Modified: gate/branches/twitter-pop-dev/plugins/Twitter/src/gate/corpora/twitter/TweetStreamIterator.java =================================================================== --- gate/branches/twitter-pop-dev/plugins/Twitter/src/gate/corpora/twitter/TweetStreamIterator.java 2014-09-12 15:45:46 UTC (rev 18330) +++ gate/branches/twitter-pop-dev/plugins/Twitter/src/gate/corpora/twitter/TweetStreamIterator.java 2014-09-12 20:35:28 UTC (rev 18331) @@ -75,11 +75,13 @@ @Override public boolean hasNext() { + // Should that be iterator.hasNextValue() ? return this.iterator.hasNext() || - ( (this.nestedStatuses != null) && this.nestedStatuses.hasNext()); - // should that be iterator.hasNextValue() ? + (this.nested && (this.nestedStatuses != null) && this.nestedStatuses.hasNext()); + // Belt & braces: this.nested should suffice. } + @Override public Tweet next() { Tweet result = null; @@ -98,12 +100,12 @@ if (isSearchResultList(json)) { this.nestedStatuses = getStatuses(json).iterator(); this.nested = this.nestedStatuses.hasNext(); - // Set the nested flag according as there is anything in - // the statuses value array (it could be empty). + // Set the nested flag according as there is anything left + // in thee statuses value array (which could be empty). } - // Test nested now: true IFF we are in a search result thingy AND - // the statuses array is non-empty. + // Now let's test nested: true IFF we are in a search result thingy AND + // the thingy's statuses array is non-empty. if (this.nested) { result = Tweet.readTweet(this.nestedStatuses.next(), contentKeys, featureKeys); // Set the nested flag again for the next call to next() This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. ------------------------------------------------------------------------------ Want excitement? Manually upgrade your production database. When you want reliability, choose Perforce Perforce version control. Predictably reliable. http://pubads.g.doubleclick.net/gampad/clk?id=157508191&iu=/4140/ostg.clktrk _______________________________________________ GATE-cvs mailing list GATE-cvs@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/gate-cvs