Revision: 18970 http://sourceforge.net/p/gate/code/18970 Author: adamfunk Date: 2015-10-26 14:46:14 +0000 (Mon, 26 Oct 2015) Log Message: ----------- You can now specify a document feature to use as the identifier in termbank listings (fallback to sourceURL & getName() if missing/blank). Added corpus index in [] after that to make uniqueness even more likely.
Cleared the remaining eclipse warnings. Modified Paths: -------------- gate/trunk/plugins/TermRaider/.classpath gate/trunk/plugins/TermRaider/src/gate/termraider/bank/AbstractPairbank.java gate/trunk/plugins/TermRaider/src/gate/termraider/bank/AbstractTermbank.java gate/trunk/plugins/TermRaider/src/gate/termraider/bank/AnnotationTermbank.java gate/trunk/plugins/TermRaider/src/gate/termraider/bank/DocumentFrequencyBank.java gate/trunk/plugins/TermRaider/src/gate/termraider/bank/HyponymyTermbank.java gate/trunk/plugins/TermRaider/src/gate/termraider/bank/PMIBank.java gate/trunk/plugins/TermRaider/src/gate/termraider/bank/TfIdfTermbank.java gate/trunk/plugins/TermRaider/src/gate/termraider/gui/TermbankViewer.java gate/trunk/plugins/TermRaider/src/gate/termraider/util/Utilities.java Modified: gate/trunk/plugins/TermRaider/.classpath =================================================================== --- gate/trunk/plugins/TermRaider/.classpath 2015-10-26 10:31:39 UTC (rev 18969) +++ gate/trunk/plugins/TermRaider/.classpath 2015-10-26 14:46:14 UTC (rev 18970) @@ -1,7 +1,7 @@ <?xml version="1.0" encoding="UTF-8"?> <classpath> <classpathentry kind="src" path="src"/> - <classpathentry kind="con" path="org.eclipse.jdt.launching.JRE_CONTAINER/org.eclipse.jdt.internal.debug.ui.launcher.StandardVMType/JavaSE-1.6"/> <classpathentry combineaccessrules="false" exported="true" kind="src" path="/GATE"/> + <classpathentry kind="con" path="org.eclipse.jdt.launching.JRE_CONTAINER"/> <classpathentry kind="output" path="classes"/> </classpath> Modified: gate/trunk/plugins/TermRaider/src/gate/termraider/bank/AbstractPairbank.java =================================================================== --- gate/trunk/plugins/TermRaider/src/gate/termraider/bank/AbstractPairbank.java 2015-10-26 10:31:39 UTC (rev 18969) +++ gate/trunk/plugins/TermRaider/src/gate/termraider/bank/AbstractPairbank.java 2015-10-26 14:46:14 UTC (rev 18970) @@ -145,7 +145,7 @@ boolean wasLoaded = corpus.isDocumentLoaded(i); Document document = (Document) corpus.get(i); - addData(document); + addData(document, i); // datastore safety if (! wasLoaded) { @@ -166,7 +166,7 @@ /* BEHOLD THE GUBBINS to distinguish the various (potential) types of Pairbanks*/ - protected abstract void addData(Document document); + protected abstract void addData(Document document, int index); protected abstract void calculateScores(); Modified: gate/trunk/plugins/TermRaider/src/gate/termraider/bank/AbstractTermbank.java =================================================================== --- gate/trunk/plugins/TermRaider/src/gate/termraider/bank/AbstractTermbank.java 2015-10-26 10:31:39 UTC (rev 18969) +++ gate/trunk/plugins/TermRaider/src/gate/termraider/bank/AbstractTermbank.java 2015-10-26 14:46:14 UTC (rev 18970) @@ -46,6 +46,7 @@ // additional CREOLE init parameters protected Set<String> inputAnnotationTypes; + protected String idDocumentFeature; // transient to allow serialization protected transient List<Action> actionsList; @@ -225,7 +226,7 @@ boolean wasLoaded = corpus.isDocumentLoaded(i); Document document = (Document) corpus.get(i); - processDocument(document); + processDocument(document, i); // datastore safety if (! wasLoaded) { @@ -241,7 +242,7 @@ protected abstract void resetScores(); - protected abstract void processDocument(Document document); + protected abstract void processDocument(Document document, int index); /** * This also needs to fill types and languages @@ -332,4 +333,17 @@ return this.inputAnnotationTypes; } + + @CreoleParameter(comment = "doc feature to use for identification (blank = use sourceURL)", + defaultValue = "") + public void setIdDocumentFeature(String name) { + this.idDocumentFeature = name; + } + + public String getIdDocumentFeature() { + return this.idDocumentFeature; + } + + + } Modified: gate/trunk/plugins/TermRaider/src/gate/termraider/bank/AnnotationTermbank.java =================================================================== --- gate/trunk/plugins/TermRaider/src/gate/termraider/bank/AnnotationTermbank.java 2015-10-26 10:31:39 UTC (rev 18969) +++ gate/trunk/plugins/TermRaider/src/gate/termraider/bank/AnnotationTermbank.java 2015-10-26 14:46:14 UTC (rev 18970) @@ -50,9 +50,9 @@ private ScoreType rawScoreST, termFrequencyST, localDocFrequencyST; - protected void processDocument(Document document) { + protected void processDocument(Document document, int index) { documentCount++; - String documentSource = Utilities.sourceOrName(document); + String documentSource = Utilities.docIdentifier(document, idDocumentFeature, index); AnnotationSet candidates = document.getAnnotations(inputASName).get(inputAnnotationTypes); for (Annotation candidate : candidates) { Modified: gate/trunk/plugins/TermRaider/src/gate/termraider/bank/DocumentFrequencyBank.java =================================================================== --- gate/trunk/plugins/TermRaider/src/gate/termraider/bank/DocumentFrequencyBank.java 2015-10-26 10:31:39 UTC (rev 18969) +++ gate/trunk/plugins/TermRaider/src/gate/termraider/bank/DocumentFrequencyBank.java 2015-10-26 14:46:14 UTC (rev 18970) @@ -134,18 +134,18 @@ } - protected void processDocument(Document document) { + protected void processDocument(Document document, int index) { if (this.segmentAnnotationType.isEmpty() || (this.segmentAnnotationType == null)) { - processWholeDocument(document); + processWholeDocument(document, index); } else { - processDocumentSegments(document); + processDocumentSegments(document, index); } } - protected void processDocumentSegments(Document document) { - String documentSource = Utilities.sourceOrName(document); + protected void processDocumentSegments(Document document, int index) { + String documentSource = Utilities.docIdentifier(document, idDocumentFeature, index); AnnotationSet segments = document.getAnnotations(inputASName).get(segmentAnnotationType); AnnotationSet candidates = document.getAnnotations(inputASName).get(inputAnnotationTypes); @@ -166,9 +166,9 @@ } - protected void processWholeDocument(Document document) { + protected void processWholeDocument(Document document, int index) { documentCount++; - String documentSource = Utilities.sourceOrName(document); + String documentSource = Utilities.docIdentifier(document, idDocumentFeature, index); AnnotationSet candidates = document.getAnnotations(inputASName).get(inputAnnotationTypes); Set<Term> documentTerms = new HashSet<Term>(); Modified: gate/trunk/plugins/TermRaider/src/gate/termraider/bank/HyponymyTermbank.java =================================================================== --- gate/trunk/plugins/TermRaider/src/gate/termraider/bank/HyponymyTermbank.java 2015-10-26 10:31:39 UTC (rev 18969) +++ gate/trunk/plugins/TermRaider/src/gate/termraider/bank/HyponymyTermbank.java 2015-10-26 14:46:14 UTC (rev 18970) @@ -73,9 +73,9 @@ } - protected void processDocument(Document document) { + protected void processDocument(Document document, int index) { documentCount++; - String documentSource = Utilities.sourceOrName(document); + String documentSource = Utilities.docIdentifier(document, idDocumentFeature, index); AnnotationSet candidates = document.getAnnotations(inputASName).get(inputAnnotationTypes); for (Annotation candidate : candidates) { Modified: gate/trunk/plugins/TermRaider/src/gate/termraider/bank/PMIBank.java =================================================================== --- gate/trunk/plugins/TermRaider/src/gate/termraider/bank/PMIBank.java 2015-10-26 10:31:39 UTC (rev 18969) +++ gate/trunk/plugins/TermRaider/src/gate/termraider/bank/PMIBank.java 2015-10-26 14:46:14 UTC (rev 18970) @@ -55,8 +55,9 @@ - protected void addData(Document document) { - String documentSource = Utilities.sourceOrName(document); + protected void addData(Document document, int index) { + // TODO: add support for the doc ID feature + String documentSource = Utilities.docIdentifier(document, null, index); /** Collocations that have already been processed in this document * (each collocation is a pair of IDs for a Token annotation), to avoid counting * them again. */ Modified: gate/trunk/plugins/TermRaider/src/gate/termraider/bank/TfIdfTermbank.java =================================================================== --- gate/trunk/plugins/TermRaider/src/gate/termraider/bank/TfIdfTermbank.java 2015-10-26 10:31:39 UTC (rev 18969) +++ gate/trunk/plugins/TermRaider/src/gate/termraider/bank/TfIdfTermbank.java 2015-10-26 14:46:14 UTC (rev 18970) @@ -55,9 +55,9 @@ - protected void processDocument(Document document) { + protected void processDocument(Document document, int index) { documentCount++; - String documentSource = Utilities.sourceOrName(document); + String documentSource = Utilities.docIdentifier(document, idDocumentFeature, index); AnnotationSet candidates = document.getAnnotations(inputASName).get(inputAnnotationTypes); for (Annotation candidate : candidates) { Modified: gate/trunk/plugins/TermRaider/src/gate/termraider/gui/TermbankViewer.java =================================================================== --- gate/trunk/plugins/TermRaider/src/gate/termraider/gui/TermbankViewer.java 2015-10-26 10:31:39 UTC (rev 18969) +++ gate/trunk/plugins/TermRaider/src/gate/termraider/gui/TermbankViewer.java 2015-10-26 14:46:14 UTC (rev 18970) @@ -83,7 +83,7 @@ private TermbankTableModel termbankTableModel; private XHTMLPanel termCloud = new XHTMLPanel(); - private JComboBox cloudType; + private JComboBox<ScoreType> cloudType; private JSlider cloudSize = new JSlider(); private List<ScoreType> scoreTypes; @@ -177,7 +177,7 @@ cloudBar.setFloatable(false); JButton btnExport = new JButton(MainFrame.getIcon("Download")); - cloudType = new JComboBox(); + cloudType = new JComboBox<ScoreType>(); Hashtable<Integer, JLabel> labelTable = new Hashtable<Integer,JLabel>(); labelTable.put(0, new JLabel(MainFrame.getIcon("Sunny"))); Modified: gate/trunk/plugins/TermRaider/src/gate/termraider/util/Utilities.java =================================================================== --- gate/trunk/plugins/TermRaider/src/gate/termraider/util/Utilities.java 2015-10-26 10:31:39 UTC (rev 18969) +++ gate/trunk/plugins/TermRaider/src/gate/termraider/util/Utilities.java 2015-10-26 14:46:14 UTC (rev 18970) @@ -131,14 +131,28 @@ } - public static String sourceOrName(Document document) { - URL url = document.getSourceUrl(); - if (url == null) { - return document.getName(); + public static String docIdentifier(Document document, String feature, int index) { + String identifier = null; + if ( (feature != null) && (! feature.isEmpty() ) && + document.getFeatures().containsKey(feature) ) { + Object value = document.getFeatures().get(feature); + if (value != null) { + identifier = value.toString(); + } } + + if (identifier == null) { + URL url = document.getSourceUrl(); + if (url != null) { + identifier = url.toString(); + } + } - //implied else - return url.toString(); + if (identifier == null) { + identifier = document.getName(); + } + + return String.format("%s [%d]", identifier, index); } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. ------------------------------------------------------------------------------ _______________________________________________ GATE-cvs mailing list GATE-cvs@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/gate-cvs