Revision: 17812
http://sourceforge.net/p/gate/code/17812
Author: markagreenwood
Date: 2014-04-11 12:46:02 +0000 (Fri, 11 Apr 2014)
Log Message:
-----------
applied the patched version submitted via soureforge that prevents an invlaid
index exception being thrown when processing words with multiple hyphens
Modified Paths:
--------------
gate/trunk/plugins/Lang_French/tokeniser/postprocess.jape
Modified: gate/trunk/plugins/Lang_French/tokeniser/postprocess.jape
===================================================================
--- gate/trunk/plugins/Lang_French/tokeniser/postprocess.jape 2014-04-11
12:38:20 UTC (rev 17811)
+++ gate/trunk/plugins/Lang_French/tokeniser/postprocess.jape 2014-04-11
12:46:02 UTC (rev 17812)
@@ -10,41 +10,41 @@
// this rule is apparently no more needed by the TreeTagger
-// Rule: simpleJoin
-// /* joins a final apostrophe with the preceding word, to make it the same as
the
-// TreeTagger output, e.g. d' should be one Token not two */
+Rule: simpleJoin
+/* joins a final apostrophe with the preceding word, to make it the same as the
+TreeTagger output, e.g. d' should be one Token not two */
-// (
-// (
-// {Token.string == "d"}|
-// {Token.string == "D"}|
-// {Token.string == "L"}|
-// {Token.string == "l"}|
-// {Token.string == "n"}|
-// {Token.string == "N"}
-// )
-// {Token.string == "'"}
-// ):left
-// -->
-// {
-// gate.AnnotationSet toRemove = (gate.AnnotationSet)bindings.get("left");
-// outputAS.removeAll(toRemove);
-// //get the tokens
-// java.util.ArrayList tokens = new java.util.ArrayList(toRemove);
-// //define a comparator for annotations by start offset
-// Collections.sort(tokens, new gate.util.OffsetComparator());
-// String text = "";
-// Iterator tokIter = tokens.iterator();
-// while(tokIter.hasNext())
-// text +=
(String)((Annotation)tokIter.next()).getFeatures().get("string");
+ (
+ (
+ {Token.string == "d"}|
+ {Token.string == "D"}|
+ {Token.string == "L"}|
+ {Token.string == "l"}|
+ {Token.string == "n"}|
+ {Token.string == "N"}
+ )
+ {Token.string == "'"}
+ ):left
+-->
+{
+ gate.AnnotationSet toRemove = (gate.AnnotationSet)bindings.get("left");
+ outputAS.removeAll(toRemove);
+ //get the tokens
+ java.util.ArrayList tokens = new java.util.ArrayList(toRemove);
+ //define a comparator for annotations by start offset
+ Collections.sort(tokens, new gate.util.OffsetComparator());
+ String text = "";
+ Iterator tokIter = tokens.iterator();
+ while(tokIter.hasNext())
+ text += (String)((Annotation)tokIter.next()).getFeatures().get("string");
-// gate.FeatureMap features = Factory.newFeatureMap();
-// features.put("kind", "word");
-// features.put("string", text);
-// features.put("length", Integer.toString(text.length()));
-// features.put("orth", "artapos");
-// outputAS.add(toRemove.firstNode(), toRemove.lastNode(), "Token",
features);
-// }
+ gate.FeatureMap features = Factory.newFeatureMap();
+ features.put("kind", "word");
+ features.put("string", text);
+ features.put("length", Integer.toString(text.length()));
+ features.put("orth", "artapos");
+ outputAS.add(toRemove.firstNode(), toRemove.lastNode(), "Token", features);
+}
Rule: simpleSplit
/* split compound word, to make it the same as the
@@ -69,20 +69,20 @@
features.putAll(annotation.getFeatures());
features.put("string", content.substring(startIndex, dashIndex));
features.put("length", dashIndex-startIndex);
- outputAS.add(offset, offset+dashIndex, "Token", features);
+ outputAS.add(offset+startIndex, offset+dashIndex, "Token", features);
features = Factory.newFeatureMap();
features.putAll(annotation.getFeatures());
features.put("string", "-");
features.put("length", 1);
- outputAS.add(offset+dashIndex, offset+dashIndex+1, "Token", features);
- offset += dashIndex;
+ outputAS.add(offset+dashIndex, offset+dashIndex+1, "Token", features); //
<-- MODIF HERE
+ /* offset += dashIndex; */ // <-- REMOVE THIS LINE
startIndex = dashIndex + 1;
}
features = Factory.newFeatureMap();
features.putAll(annotation.getFeatures());
features.put("string", content.substring(startIndex));
features.put("length", content.length()-startIndex);
- outputAS.add(offset+1, endOffset, "Token", features);
+ outputAS.add(offset+startIndex, endOffset, "Token", features); // <-- MODIF
HERE
} catch (InvalidOffsetException e) {
throw new LuckyException(e);
}
This was sent by the SourceForge.net collaborative development platform, the
world's largest Open Source development site.
------------------------------------------------------------------------------
Put Bad Developers to Shame
Dominate Development with Jenkins Continuous Integration
Continuously Automate Build, Test & Deployment
Start a new project now. Try Jenkins in the cloud.
http://p.sf.net/sfu/13600_Cloudbees
_______________________________________________
GATE-cvs mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/gate-cvs