Revision: 19184
http://sourceforge.net/p/gate/code/19184
Author: markagreenwood
Date: 2016-04-02 14:42:05 +0000 (Sat, 02 Apr 2016)
Log Message:
-----------
first stage is breaking everything is to move all PRs out of gate-core and into
the plugins that define them, i.e. ANNIE and Tools. This probably breaks other
plugins but we'll get to that later
Modified Paths:
--------------
gate/branches/sawdust2/.classpath
gate/branches/sawdust2/plugins/ANNIE/build.xml
gate/branches/sawdust2/plugins/ANNIE/creole.xml
gate/branches/sawdust2/plugins/ANNIE/src/gate/creole/coref/Coreferencer.java
gate/branches/sawdust2/plugins/ANNIE/src/gate/creole/coref/PronominalCoref.java
gate/branches/sawdust2/plugins/ANNIE/src/gate/creole/splitter/SentenceSplitter.java
gate/branches/sawdust2/plugins/ANNIE/src/gate/creole/tokeniser/DefaultTokeniser.java
gate/branches/sawdust2/plugins/ANNIE/src/gate/jape/Batch.java
gate/branches/sawdust2/plugins/ANNIE/src/gate/jape/Constraint.java
gate/branches/sawdust2/plugins/ANNIE/src/gate/jape/parser/ParseCpsl.java
gate/branches/sawdust2/plugins/ANNIE/src/gate/jape/parser/ParseCpsl.jj
gate/branches/sawdust2/plugins/ANNIE/src/gate/jape/parser/ParseCpslTokenManager.java
gate/branches/sawdust2/plugins/DocumentNormalizer/.classpath
gate/branches/sawdust2/plugins/Information_Retrieval/.classpath
gate/branches/sawdust2/plugins/Text_Categorization/.classpath
gate/branches/sawdust2/plugins/Tools/.classpath
gate/branches/sawdust2/plugins/Tools/creole.xml
gate/branches/sawdust2/plugins/Tools/src/gate/creole/annotransfer/AnnotationSetTransfer.java
gate/branches/sawdust2/plugins/Tools/src/gate/creole/dumpingPR/DumpingPR.java
gate/branches/sawdust2/plugins/Tools/src/gate/creole/morph/Morph.java
gate/branches/sawdust2/src/main/gate/Factory.java
gate/branches/sawdust2/src/main/gate/creole/AbstractProcessingResource.java
gate/branches/sawdust2/src/main/gate/util/spring/DuplicateResourceFactoryBean.java
Added Paths:
-----------
gate/branches/sawdust2/plugins/ANNIE/.classpath
gate/branches/sawdust2/plugins/ANNIE/.project
gate/branches/sawdust2/plugins/ANNIE/resources/coref/
gate/branches/sawdust2/plugins/ANNIE/src/
gate/branches/sawdust2/plugins/ANNIE/src/com/
gate/branches/sawdust2/plugins/ANNIE/src/gate/
gate/branches/sawdust2/plugins/ANNIE/src/gate/creole/
gate/branches/sawdust2/plugins/ANNIE/src/gate/creole/ANNIETransducer.java
gate/branches/sawdust2/plugins/ANNIE/src/gate/creole/POSTagger.java
gate/branches/sawdust2/plugins/ANNIE/src/gate/creole/Transducer.java
gate/branches/sawdust2/plugins/ANNIE/src/gate/creole/annotdelete/
gate/branches/sawdust2/plugins/ANNIE/src/gate/creole/coref/
gate/branches/sawdust2/plugins/ANNIE/src/gate/creole/gazetteer/
gate/branches/sawdust2/plugins/ANNIE/src/gate/creole/orthomatcher/
gate/branches/sawdust2/plugins/ANNIE/src/gate/creole/splitter/
gate/branches/sawdust2/plugins/ANNIE/src/gate/creole/tokeniser/
gate/branches/sawdust2/plugins/ANNIE/src/gate/fsm/
gate/branches/sawdust2/plugins/ANNIE/src/gate/gui/
gate/branches/sawdust2/plugins/ANNIE/src/gate/gui/GazetteerEditor.java
gate/branches/sawdust2/plugins/ANNIE/src/gate/gui/jape/
gate/branches/sawdust2/plugins/ANNIE/src/gate/jape/
gate/branches/sawdust2/plugins/ANNIE/src/gate/jape/JapeFactory.java
gate/branches/sawdust2/plugins/ANNIE/src/hepple/
gate/branches/sawdust2/plugins/Tools/resources/VP/
gate/branches/sawdust2/plugins/Tools/src/gate/creole/
gate/branches/sawdust2/plugins/Tools/src/gate/creole/GazetteerListsCollector.java
gate/branches/sawdust2/plugins/Tools/src/gate/creole/VPChunker.java
gate/branches/sawdust2/plugins/Tools/src/gate/creole/annotransfer/
gate/branches/sawdust2/plugins/Tools/src/gate/creole/dumpingPR/
gate/branches/sawdust2/plugins/Tools/src/gate/creole/gazetteer/
gate/branches/sawdust2/plugins/Tools/src/gate/creole/gazetteer/FlexGazMappingTable.java
gate/branches/sawdust2/plugins/Tools/src/gate/creole/gazetteer/FlexibleGazetteer.java
gate/branches/sawdust2/plugins/Tools/src/gate/creole/gazetteer/NodePosition.java
gate/branches/sawdust2/plugins/Tools/src/gate/creole/morph/
gate/branches/sawdust2/plugins/Tools/src/gate/gui/
gate/branches/sawdust2/plugins/Tools/src/gate/gui/STreeNode.java
gate/branches/sawdust2/plugins/Tools/src/gate/gui/SyntaxTreeViewer.java
Removed Paths:
-------------
gate/branches/sawdust2/plugins/ANNIE/resources/VP/
gate/branches/sawdust2/plugins/ANNIE/src/gate/creole/gazetteer/FlexGazMappingTable.java
gate/branches/sawdust2/plugins/ANNIE/src/gate/creole/gazetteer/FlexibleGazetteer.java
gate/branches/sawdust2/plugins/ANNIE/src/gate/creole/gazetteer/NodePosition.java
gate/branches/sawdust2/plugins/JAPE/
gate/branches/sawdust2/src/main/com/
gate/branches/sawdust2/src/main/gate/creole/ANNIETransducer.java
gate/branches/sawdust2/src/main/gate/creole/GazetteerListsCollector.java
gate/branches/sawdust2/src/main/gate/creole/POSTagger.java
gate/branches/sawdust2/src/main/gate/creole/Transducer.java
gate/branches/sawdust2/src/main/gate/creole/VPChunker.java
gate/branches/sawdust2/src/main/gate/creole/annotdelete/
gate/branches/sawdust2/src/main/gate/creole/annotransfer/
gate/branches/sawdust2/src/main/gate/creole/coref/
gate/branches/sawdust2/src/main/gate/creole/dumpingPR/
gate/branches/sawdust2/src/main/gate/creole/gazetteer/
gate/branches/sawdust2/src/main/gate/creole/morph/
gate/branches/sawdust2/src/main/gate/creole/orthomatcher/
gate/branches/sawdust2/src/main/gate/creole/splitter/
gate/branches/sawdust2/src/main/gate/creole/tokeniser/
gate/branches/sawdust2/src/main/gate/fsm/
gate/branches/sawdust2/src/main/gate/gui/GazetteerEditor.java
gate/branches/sawdust2/src/main/gate/gui/STreeNode.java
gate/branches/sawdust2/src/main/gate/gui/SyntaxTreeViewer.java
gate/branches/sawdust2/src/main/gate/gui/jape/
gate/branches/sawdust2/src/main/gate/jape/
gate/branches/sawdust2/src/main/gate/resources/creole/coref/
gate/branches/sawdust2/src/main/hepple/
Property Changed:
----------------
gate/branches/sawdust2/
gate/branches/sawdust2/plugins/
gate/branches/sawdust2/plugins/ANNIE/
Index: gate/branches/sawdust2
===================================================================
--- gate/branches/sawdust2 2016-04-02 09:45:31 UTC (rev 19183)
+++ gate/branches/sawdust2 2016-04-02 14:42:05 UTC (rev 19184)
Property changes on: gate/branches/sawdust2
___________________________________________________________________
Modified: svn:mergeinfo
## -4,3 +4,4 ##
/gate/branches/release-8.1:18736-18747
/gate/branches/sawdust:17055-17079
/gate/tags/release-7.0:15399-15407
+/gate/trunk:19181
\ No newline at end of property
Modified: gate/branches/sawdust2/.classpath
===================================================================
--- gate/branches/sawdust2/.classpath 2016-04-02 09:45:31 UTC (rev 19183)
+++ gate/branches/sawdust2/.classpath 2016-04-02 14:42:05 UTC (rev 19184)
@@ -1,7 +1,6 @@
<?xml version="1.0" encoding="UTF-8"?>
<classpath>
<classpathentry kind="src" path="src/main"/>
- <classpathentry kind="src" output="classes/test" path="src/test"/>
<classpathentry kind="con"
path="org.eclipse.jdt.launching.JRE_CONTAINER/org.eclipse.jdt.internal.debug.ui.launcher.StandardVMType/JavaSE-1.7"/>
<classpathentry exported="true" kind="con"
path="org.apache.ivyde.eclipse.cpcontainer.IVYDE_CONTAINER/?project=GATE&ivyXmlPath=ivy.xml&confs=*"/>
<classpathentry kind="output" path="classes/main"/>
Index: gate/branches/sawdust2/plugins
===================================================================
--- gate/branches/sawdust2/plugins 2016-04-02 09:45:31 UTC (rev 19183)
+++ gate/branches/sawdust2/plugins 2016-04-02 14:42:05 UTC (rev 19184)
Property changes on: gate/branches/sawdust2/plugins
___________________________________________________________________
Modified: svn:mergeinfo
## -3,4 +3,4 ##
/gate/branches/release-8.1/plugins:18736-18747
/gate/branches/sawdust/plugins:17055-17079
/gate/tags/release-7.0/plugins:15399-15407
-/gate/trunk/plugins:3-390
+/gate/trunk/plugins:3-390,19181
\ No newline at end of property
Index: gate/branches/sawdust2/plugins/ANNIE
===================================================================
--- gate/branches/sawdust2/plugins/ANNIE 2016-04-02 09:45:31 UTC (rev
19183)
+++ gate/branches/sawdust2/plugins/ANNIE 2016-04-02 14:42:05 UTC (rev
19184)
Property changes on: gate/branches/sawdust2/plugins/ANNIE
___________________________________________________________________
Modified: svn:ignore
## -1,2 +1,3 ##
-
+annie.jar
+classes
.settings
Added: gate/branches/sawdust2/plugins/ANNIE/.classpath
===================================================================
--- gate/branches/sawdust2/plugins/ANNIE/.classpath
(rev 0)
+++ gate/branches/sawdust2/plugins/ANNIE/.classpath 2016-04-02 14:42:05 UTC
(rev 19184)
@@ -0,0 +1,7 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<classpath>
+ <classpathentry kind="src" path="src"/>
+ <classpathentry kind="con"
path="org.eclipse.jdt.launching.JRE_CONTAINER"/>
+ <classpathentry combineaccessrules="false" kind="src" path="/GATE"/>
+ <classpathentry kind="output" path="classes"/>
+</classpath>
Added: gate/branches/sawdust2/plugins/ANNIE/.project
===================================================================
--- gate/branches/sawdust2/plugins/ANNIE/.project
(rev 0)
+++ gate/branches/sawdust2/plugins/ANNIE/.project 2016-04-02 14:42:05 UTC
(rev 19184)
@@ -0,0 +1,17 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<projectDescription>
+ <name>GATE-plugin-ANNIE</name>
+ <comment></comment>
+ <projects>
+ </projects>
+ <buildSpec>
+ <buildCommand>
+ <name>org.eclipse.jdt.core.javabuilder</name>
+ <arguments>
+ </arguments>
+ </buildCommand>
+ </buildSpec>
+ <natures>
+ <nature>org.eclipse.jdt.core.javanature</nature>
+ </natures>
+</projectDescription>
Modified: gate/branches/sawdust2/plugins/ANNIE/build.xml
===================================================================
--- gate/branches/sawdust2/plugins/ANNIE/build.xml 2016-04-02 09:45:31 UTC
(rev 19183)
+++ gate/branches/sawdust2/plugins/ANNIE/build.xml 2016-04-02 14:42:05 UTC
(rev 19184)
@@ -1,174 +1,112 @@
-<?xml version="1.0"?>
-<!-- $Id$ -->
-<project name="GATE Tokenisers" default="jar" basedir=".">
- <description>
- GATE see http://gate.ac.uk
- </description>
- <!-- set global properties for this build -->
- <!-- Prevent Ant from warning about includeantruntime not being set -->
- <property name="build.sysclasspath" value="ignore" />
+<?xml version="1.0" encoding="UTF-8"?>
- <!-- Name of the output jar file -->
- <property name="jarFile" location="heptag_wrapper.jar"/>
-
- <!-- Sources -->
- <property name="srcDir" location="src"/>
-
- <!-- Output directory for the build process -->
- <property name="outputDir" location="classes"/>
+<project basedir="." default="all" name="annie">
+ <!-- Prevent Ant from warning about includeantruntime not being set -->
+ <property name="build.sysclasspath" value="ignore" />
- <!-- Libraries directory for the build process -->
- <property name="libDir" location="lib"/>
-
- <!-- Extensions directory for the build process -->
- <property name="extDir" location="lib/ext"/>
+ <property file="build.properties" />
+ <property name="plugin.name" value="ANNIE" />
- <!-- Documentation directory -->
- <property name="docDir" location="doc"/>
-
- <!-- Use the new 1.3+ compiler -->
- <property name="build.compiler" value="modern"/>
+ <!-- Make environment variables available -->
+ <property environment="env" />
-
- <!-- tools.jar path -->
- <path id="tools.jar.path" location="../bin/tools14.jar"/>
-
- <!-- The build CLASSPATH -->
- <path id="build.class.path">
- <fileset file="../../bin/gate.jar"/>
- <fileset file="lib/heptag.jar"/>
- </path>
-
- <!-- Prints out some information messages -->
- <echo level="info">
- This is the ANT build script for a CREOLE GATE component
(http://gate.ac.uk).
- Type "ant help" for details about the targets available.
- GATE ${gate.version}, build ${gate.build}</echo>
-
- <!-- Prints out details about the targets -->
- <target name="help">
- <echo level="info">
- This is the ANT build script for gate (http://gate.ac.uk).
- The following targets are available
- -all (default target): compiles GUK and GATE classes and makes the
gate.jar archive.
-
- -compile: compiles all the .java files
- -resources: copies the resources to the output directory (${outputDir})
- -jar: creates the gate.jar archive
- -run: starts GATE
- -clean: cleans up the output from all previous builds
-
- -doc: creates the Javadoc and Java2HTML documentation
- -docclean: deletes all built documentation
- -jape: rebuilds the JAPE parser files and associated documentation
- -guk: compiles the GUK classes and builds the guk.jar archive
- -gukdemo: starts the GUK Editor (a.k.a. GATE Unicode Editor)
- -test: runs the GATE test suite
-
- -distro: make a GATE distribution (calls all build tasks in the
process)
- </echo>
- </target>
-
+ <!-- If environment variable GATE_HOME is set, use it for
+ gate.home (unless it was already set in build.properties -->
+ <condition property="gate.home" value="${env.GATE_HOME}">
+ <isset property="env.GATE_HOME" />
+ </condition>
- <!-- This target compiles all the classes -->
- <target name="compile"
- description="compile the source " >
- <!-- Compile the java code from ${srcDir} into ${buildDir} -->
- <javac srcdir="${srcDir}"
- destdir="${outputDir}"
- includes="gate/**"
- source="1.4"
- target="1.4"
- classpathref="build.class.path"/>
- </target>
-
- <!-- Make gate.jar archive -->
+ <property name="dest" value="classes" />
+ <property name="gate.home" value="../.." />
+ <property name="buildDir" location="${gate.home}/build" />
+ <property name="src" value="src" />
+ <property name="doc.dir" location="doc" />
+ <property name="javadoc.dir" location="${doc.dir}/javadoc" />
+
+ <fileset id="gatelib.classpath" dir="${gate.home}/lib">
+ <include name="*.jar" />
+ <include name="*.zip" />
+ </fileset>
+
+ <fileset id="gate.classpath" dir="${gate.home}/bin">
+ <include name="*.jar" />
+ <include name="*.zip" />
+ </fileset>
+
+ <path id="compile.classpath">
+ <fileset refid="gatelib.classpath" />
+ <fileset refid="gate.classpath" />
+ </path>
+
<target name="jar" depends="compile">
- <jar destfile="${jarFile}"
- update="false"
- index="true">
- <fileset dir="${outputDir}/"
- includes="gate/**"/>
- </jar>
+ <jar compress="false" destfile="annie.jar">
+ <fileset dir="${dest}" />
+ </jar>
</target>
-
-
- <!-- Clear all build output -->
- <target name="clean">
- <delete includeemptydirs="true">
- <fileset dir="${outputDir}" excludes="**/CVS,**/CVS/**,**/.cvsignore"/>
- </delete>
- </target>
-
- <!-- Dummy javadoc as there are no sources but gate core wants the target -->
- <target name="javadoc">
- </target>
-
- <!-- Make documentation - internal -->
- <target name="internaljavadoc">
- <javadoc access="private"
- destdir="${docDir}/javadoc/internal"
- classpathref="run.class.path"
- Extdirs="${extDir}"
- Encoding="UTF-8"
- Use="yes"
- Windowtitle="GATE JavaDoc"
- link="http://docs.oracle.com/javase/6/docs/api/"
- docencoding="UTF-8"
- charset="UTF-8"
- source="1.6"
- breakiterator="true">
- <fileset dir="${srcDir}"/>
- </javadoc>
- </target>
-
- <!-- Convert the source to HTML using the java2html utility -->
- <target name="java2html">
- <java jar="${buildDir}/lib/j2h.jar"
- fork="true"
- spawn="false">
- <jvmarg value="-Dfile.encoding=UTF-8"/>
- <arg value="-d"/> <arg file="${docDir}/java2html/"/>
- <arg value="-js"/> <arg file="${srcDir}"/>
- <arg value="-jd"/> <arg file="${docDir}/javadoc"/>
- <arg value="-m"/> <arg value="4"/>
- <arg value="-t"/> <arg value="2"/>
- <arg value="-n"/> <arg value=""GATE source""/>
- </java>
- </target>
-
- <target name="doc" depends="javadoc, internaljavadoc, java2html"/>
-
-
- <!-- Clear all built documentation -->
- <target name="docclean">
- <delete includeemptydirs="true">
- <fileset dir="${docDir}/javadoc"
excludes="**/CVS,**/CVS/**,**/.cvsignore,internal,internal/**"/>
- </delete>
- <delete includeemptydirs="true">
- <fileset dir="${docDir}/javadoc/internal"
excludes="**/CVS,**/CVS/**,**/.cvsignore"/>
- </delete>
- <delete includeemptydirs="true">
- <fileset dir="${docDir}/java2html"
excludes="**/CVS,**/CVS/**,**/.cvsignore"/>
- </delete>
- </target>
-
- <target name="test" depends="jar">
- <junit fork="true"
- showoutput="yes"
- printsummary="yes">
- <classpath refid="run.class.path"/>
- <sysproperty key="gate.config" value="${buildDir}/gate.xml"/>
- <jvmarg value="-Djava.ext.dirs=${extDir}"/>
- <formatter type="plain"
- usefile="false"/>
- <formatter type="xml"
- if="write.xml"/>
- <test name="gate.TestGate"
- outfile="testResult"
- todir="${buildDir}"/>
- </junit>
- </target>
+ <!-- Rebuild the JAPE file parser -->
+ <target name="jape">
+ <java classname="javacc" fork="true" spawn="false"
dir="${src}/gate/jape/parser">
+ <classpath>
+ <pathelement location="${buildDir}/lib/javacc.jar" />
+ </classpath>
+ <arg value="ParseCpsl.jj" />
+ </java>
+
+ <java classname="jjdoc" fork="true" spawn="false"
dir="${src}/gate/jape/parser">
+ <classpath>
+ <pathelement location="${buildDir}/lib/javacc.jar" />
+ </classpath>
+ <arg value="ParseCpsl.jj" />
+ </java>
+
+ <move file="${src}/gate/jape/parser/ParseCpsl.html"
toFile="${src}/gate/jape/parser/ParseCpslPlain.html" />
+ </target>
+
+ <target name="compile" depends="init">
+ <javac encoding="utf-8" classpathref="compile.classpath"
debug="true" deprecation="true" destdir="${dest}" nowarn="false" source="1.6"
target="1.6">
+ <src path="${src}" />
+ <compilerarg value="-Xmaxwarns" />
+ <compilerarg value="${gate.compile.maxwarnings}" />
+ <compilerarg value="-Xlint:all" />
+ </javac>
+ </target>
+
+ <!-- Build JavaDoc documentation -->
+ <target name="doc.prepare">
+ <mkdir dir="${javadoc.dir}" />
+ </target>
+
+ <target name="javadoc" depends="doc.prepare">
+ <javadoc destdir="${javadoc.dir}" packagenames="*"
classpathref="compile.classpath" encoding="UTF-8" windowtitle="${plugin.name}
JavaDoc" source="1.6">
+ <sourcepath>
+ <pathelement location="${src}" />
+ </sourcepath>
+ <link href="http://docs.oracle.com/javase/6/docs/api/"
/>
+ <link href="http://gate.ac.uk/gate/doc/javadoc/" />
+ </javadoc>
+ </target>
+
+ <target name="clean.classes">
+ <delete failonerror="false" includeemptydirs="true">
+ <fileset dir="${dest}" />
+ </delete>
+ </target>
+
+ <target name="clean" depends="clean.classes">
+ <delete file="annie.jar" />
+ </target>
+
+ <target name="init">
+ <mkdir dir="${dest}" />
+ </target>
+
+ <!-- Build everything - the code and JavaDoc -->
+ <target name="all" depends="jar, javadoc" />
+
+ <!-- targets required by the top-level build file -->
+ <target name="build" depends="jar" />
+ <target name="distro.prepare" depends="clean.classes" />
+ <target name="test" />
+
</project>
-
Modified: gate/branches/sawdust2/plugins/ANNIE/creole.xml
===================================================================
--- gate/branches/sawdust2/plugins/ANNIE/creole.xml 2016-04-02 09:45:31 UTC
(rev 19183)
+++ gate/branches/sawdust2/plugins/ANNIE/creole.xml 2016-04-02 14:42:05 UTC
(rev 19184)
@@ -2,89 +2,5 @@
<!-- $Id$ -->
<!-- creole.xml directory file for ANNIE Processing resources -->
<CREOLE-DIRECTORY>
-
- <!-- PROCESSING RESOURCES -->
-
- <!-- creole.xml for the Unicode tokeniser -->
- <RESOURCE>
- <CLASS>gate.creole.tokeniser.SimpleTokeniser</CLASS>
- </RESOURCE>
-
- <!-- creole.xml for the Englishtokeniser -->
- <RESOURCE>
- <CLASS>gate.creole.tokeniser.DefaultTokeniser</CLASS>
- </RESOURCE>
-
- <!-- creole.xml for gazetteer -->
- <RESOURCE>
- <CLASS>gate.creole.gazetteer.DefaultGazetteer</CLASS>
- </RESOURCE>
-
- <!-- creole.xml for a SharedDefaultGazetteer instance -->
- <RESOURCE>
- <CLASS>gate.creole.gazetteer.SharedDefaultGazetteer</CLASS>
- </RESOURCE>
-
- <!-- creole.xml for Hash Gazetteer (ex Natural)-->
- <RESOURCE>
- <CLASS>com.ontotext.gate.gazetteer.HashGazetteer</CLASS>
- </RESOURCE>
-
- <!-- creole.xml for JapeTransducer -->
- <RESOURCE>
- <CLASS>gate.creole.Transducer</CLASS>
- </RESOURCE>
-
- <!-- creole.xml for ANNIE Transducer -->
- <RESOURCE>
- <CLASS>gate.creole.ANNIETransducer</CLASS>
- </RESOURCE>
-
- <!-- creole.xml for Sentence splitter-->
- <RESOURCE>
- <CLASS>gate.creole.splitter.SentenceSplitter</CLASS>
- </RESOURCE>
-
- <!-- creole.xml for RegEx Sentence splitter-->
- <RESOURCE>
- <CLASS>gate.creole.splitter.RegexSentenceSplitter</CLASS>
- </RESOURCE>
-
- <!-- creole.xml for HepTag (Mark Hepple's POS tagger)-->
- <RESOURCE>
- <CLASS>gate.creole.POSTagger</CLASS>
- </RESOURCE>
-
- <!-- creole.xml for namematch -->
- <RESOURCE>
- <CLASS>gate.creole.orthomatcher.OrthoMatcher</CLASS>
- </RESOURCE>
-
- <!-- creole.xml for the coreferencer -->
- <RESOURCE>
- <CLASS>gate.creole.coref.Coreferencer</CLASS>
- </RESOURCE>
-
- <!-- creole.xml for the coreferencer -->
- <RESOURCE>
- <CLASS>gate.creole.coref.NominalCoref</CLASS>
- </RESOURCE>
-
- <!-- creole.xml for the document reset -->
- <RESOURCE>
- <CLASS>gate.creole.annotdelete.AnnotationDeletePR</CLASS>
- </RESOURCE>
-
- <!-- VISUAL RESOURCES -->
-
- <!-- creole.xml for the jape viewer -->
- <RESOURCE>
- <CLASS>gate.gui.jape.JapeViewer</CLASS>
- </RESOURCE>
-
- <!-- Gazetteer editor -->
- <RESOURCE>
- <CLASS>gate.gui.GazetteerEditor</CLASS>
- </RESOURCE>
-
+ <JAR SCAN="true">annie.jar</JAR>
</CREOLE-DIRECTORY>
Copied:
gate/branches/sawdust2/plugins/ANNIE/src/gate/creole/ANNIETransducer.java (from
rev 19180, gate/branches/sawdust2/src/main/gate/creole/ANNIETransducer.java)
===================================================================
--- gate/branches/sawdust2/plugins/ANNIE/src/gate/creole/ANNIETransducer.java
(rev 0)
+++ gate/branches/sawdust2/plugins/ANNIE/src/gate/creole/ANNIETransducer.java
2016-04-02 14:42:05 UTC (rev 19184)
@@ -0,0 +1,74 @@
+/*
+ * Copyright (c) 1995-2012, The University of Sheffield. See the file
+ * COPYRIGHT.txt in the software or at http://gate.ac.uk/gate/COPYRIGHT.txt
+ *
+ * This file is part of GATE (see http://gate.ac.uk/), and is free
+ * software, licenced under the GNU Library General Public License,
+ * Version 2, June 1991 (in the distribution as file licence.html,
+ * and also available at http://gate.ac.uk/gate/licence.html).
+ *
+ * Valentin Tablan, 20 Sep 2001
+ *
+ * $Id$
+ */
+package gate.creole;
+
+import gate.creole.metadata.CreoleParameter;
+import gate.creole.metadata.CreoleResource;
+import gate.creole.metadata.HiddenCreoleParameter;
+import java.net.URL;
+
+/**
+ * The ANNIE named entity transducer.
+ * This is a JAPE transducer and this class is here to allow the specification
+ * in creole.xml of a default grammar to be used in .
+ */
+@CreoleResource(name = "ANNIE NE Transducer",
+ comment = "ANNIE named entity grammar.",
+ helpURL = "http://gate.ac.uk/userguide/sec:annie:semantic-tagger",
+ icon = "ne-transducer"
+ )
+public class ANNIETransducer extends Transducer {
+
+ private static final long serialVersionUID = 7443615855994597034L;
+
+ /**
+ * The ontology parameter is not used for this PR and therefore hidden.
+ *
+ * @param ontology
+ */
+ @HiddenCreoleParameter
+ @Override
+ public void setOntology(gate.creole.ontology.Ontology ontology) {
+ super.setOntology(ontology);
+ }
+
+ /**
+ * The binaryGrammarURL parameter is not used for this PR and therefore
hidden.
+ *
+ * @param url
+ */
+ @HiddenCreoleParameter
+ @Override
+ public void setBinaryGrammarURL(URL url) {
+ super.setBinaryGrammarURL(url);
+ }
+
+
+ /**
+ * The grammarURL parameter provides the ANNIE main.jape file as a default
+ * for this PR.
+ *
+ * @param newGrammarURL
+ */
+ @CreoleParameter(
+ comment = "The URL to the grammar file.",
+ suffixes = "jape",
+ defaultValue = "resources/NE/main.jape"
+ )
+ @Override
+ public void setGrammarURL(java.net.URL newGrammarURL) {
+ super.setGrammarURL(newGrammarURL);
+ }
+
+}
\ No newline at end of file
Copied: gate/branches/sawdust2/plugins/ANNIE/src/gate/creole/POSTagger.java
(from rev 19180, gate/branches/sawdust2/src/main/gate/creole/POSTagger.java)
===================================================================
--- gate/branches/sawdust2/plugins/ANNIE/src/gate/creole/POSTagger.java
(rev 0)
+++ gate/branches/sawdust2/plugins/ANNIE/src/gate/creole/POSTagger.java
2016-04-02 14:42:05 UTC (rev 19184)
@@ -0,0 +1,485 @@
+/*
+ * Copyright (c) 1995-2012, The University of Sheffield. See the file
+ * COPYRIGHT.txt in the software or at http://gate.ac.uk/gate/COPYRIGHT.txt
+ *
+ * This file is part of GATE (see http://gate.ac.uk/), and is free
+ * software, licenced under the GNU Library General Public License,
+ * Version 2, June 1991 (in the distribution as file licence.html,
+ * and also available at http://gate.ac.uk/gate/licence.html).
+ *
+ * Valentin Tablan, 01 Feb 2000
+ *
+ * $Id$
+ */
+
+package gate.creole;
+
+import gate.Annotation;
+import gate.AnnotationSet;
+import gate.Factory;
+import gate.FeatureMap;
+import gate.Resource;
+import gate.Utils;
+import gate.creole.metadata.CreoleParameter;
+import gate.creole.metadata.CreoleResource;
+import gate.creole.metadata.Optional;
+import gate.creole.metadata.RunTime;
+import gate.util.GateRuntimeException;
+import gate.util.OffsetComparator;
+
+import java.text.NumberFormat;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.Comparator;
+import java.util.Iterator;
+import java.util.List;
+import java.util.ListIterator;
+
+import org.apache.log4j.Level;
+import org.apache.log4j.Logger;
+/**
+ * This class is a wrapper for HepTag, Mark Hepple's POS tagger.
+ */
+@CreoleResource(name = "ANNIE POS Tagger",
+ helpURL = "http://gate.ac.uk/userguide/sec:annie:tagger",
+ comment = "Mark Hepple's Brill-style POS tagger", icon="pos-tagger")
+public class POSTagger extends AbstractLanguageAnalyser {
+
+ private static final long serialVersionUID = 7680938864165071808L;
+
+ public static final String
+ TAG_DOCUMENT_PARAMETER_NAME = "document";
+
+ public static final String
+ TAG_INPUT_AS_PARAMETER_NAME = "inputASName";
+
+ public static final String
+ TAG_LEXICON_URL_PARAMETER_NAME = "lexiconURL";
+
+ public static final String
+ TAG_RULES_URL_PARAMETER_NAME = "rulesURL";
+
+ public static final String
+ TAG_ENCODING_PARAMETER_NAME = "encoding";
+
+
+ public static final String
+ BASE_TOKEN_ANNOTATION_TYPE_PARAMETER_NAME = "baseTokenAnnotationType";
+
+ public static final String
+ OUTPUT_ANNOTATION_TYPE_PARAMETER_NAME = "outputAnnotationType";
+
+ public static final String
+ BASE_SENTENCE_ANNOTATION_TYPE_PARAMETER_NAME =
"baseSentenceAnnotationType";
+
+ public static final String
+ TAG_OUTPUT_AS_PARAMETER_NAME = "outputASName";
+
+ @RunTime
+ @Optional
+ @CreoleParameter(
+ comment = "Throw an exception when there are none of the required input
annotations",
+ defaultValue = "true")
+ public void setFailOnMissingInputAnnotations(Boolean fail) {
+ failOnMissingInputAnnotations = fail;
+ }
+ public Boolean getFailOnMissingInputAnnotations() {
+ return failOnMissingInputAnnotations;
+ }
+ protected Boolean failOnMissingInputAnnotations = true;
+
+
+ @RunTime
+ @Optional
+ @CreoleParameter(
+ comment = "Should all Tokens be POS tagged or just those within
baseSentenceAnnotationType?",
+ defaultValue = "true")
+ public void setPosTagAllTokens(Boolean allTokens) {
+ posTagAllTokens = allTokens;
+ }
+ public Boolean getPosTagAllTokens() {
+ return posTagAllTokens;
+ }
+ protected Boolean posTagAllTokens = true; // should all Tokens be POS
tagged or just those within baseSentenceAnnotationType
+
+ public POSTagger() {
+ }
+
+ protected Logger logger = Logger.getLogger(this.getClass().getName());
+
+ @Override
+ public Resource init()throws ResourceInstantiationException{
+ if(lexiconURL == null){
+ throw new ResourceInstantiationException(
+ "NoURL provided for the lexicon!");
+ }
+ if(rulesURL == null){
+ throw new ResourceInstantiationException(
+ "No URL provided for the rules!");
+ }
+ try{
+ tagger = new hepple.postag.POSTagger(lexiconURL,rulesURL, encoding);
+ }catch(Exception e){
+ throw new ResourceInstantiationException(e);
+ }
+ return this;
+ }
+
+
+ @Override
+ public void execute() throws ExecutionException{
+ //check the parameters
+ if(document == null) throw new ExecutionException(
+ "No document to process!");
+ if(inputASName != null && inputASName.equals("")) inputASName = null;
+ AnnotationSet inputAS = (inputASName == null) ?
+ document.getAnnotations() :
+ document.getAnnotations(inputASName);
+
+
+ if(baseTokenAnnotationType == null ||
baseTokenAnnotationType.trim().length()==0) {
+ throw new ExecutionException("No base Token Annotation Type
provided!");
+ }
+
+ if(outputASName != null && outputASName.equals("")) outputASName = null;
+
+ if(baseSentenceAnnotationType == null ||
baseSentenceAnnotationType.trim().length()==0) {
+ throw new ExecutionException("No base Sentence Annotation Type
provided!");
+ }
+
+ if(outputAnnotationType == null ||
outputAnnotationType.trim().length()==0) {
+ throw new ExecutionException("No AnnotationType provided to store the
new feature!");
+ }
+
+ AnnotationSet sentencesAS = inputAS.get(baseSentenceAnnotationType);
+ AnnotationSet tokensAS = inputAS.get(baseTokenAnnotationType);
+ if(sentencesAS != null && sentencesAS.size() > 0
+ && tokensAS != null && tokensAS.size() > 0){
+ long startTime = System.currentTimeMillis();
+ fireStatusChanged("POS tagging " + document.getName());
+ fireProgressChanged(0);
+ //prepare the input for HepTag
+ List<String> sentenceForTagger = new ArrayList<String>();
+ List<List<String>> sentencesForTagger = new ArrayList<List<String>>(1);
+ sentencesForTagger.add(sentenceForTagger);
+
+ //define a comparator for annotations by start offset
+ Comparator<Annotation> offsetComparator = new OffsetComparator();
+
+ //read all the tokens and all the sentences
+ List<Annotation> sentencesList = new ArrayList<Annotation>(sentencesAS);
+ Collections.sort(sentencesList, offsetComparator);
+ List<Annotation> tokensList = new ArrayList<Annotation>(tokensAS);
+ Collections.sort(tokensList, offsetComparator);
+
+ Iterator<Annotation> sentencesIter = sentencesList.iterator();
+ ListIterator<Annotation> tokensIter = tokensList.listIterator();
+
+ List<Annotation> tokensInCurrentSentence = new ArrayList<Annotation>();
+ Annotation currentToken = tokensIter.next();
+ int sentIndex = 0;
+ int sentCnt = sentencesAS.size();
+ while(sentencesIter.hasNext()){
+ Annotation currentSentence = sentencesIter.next();
+ tokensInCurrentSentence.clear();
+ sentenceForTagger.clear();
+ while(currentToken != null
+ &&
+ currentToken.getEndNode().getOffset().compareTo(
+ currentSentence.getEndNode().getOffset()) <= 0){
+ // If we're only POS tagging Tokens within
baseSentenceAnnotationType, don't add the sentence if the Tokens aren't within
the span of baseSentenceAnnotationType
+ if (posTagAllTokens || currentToken.withinSpanOf(currentSentence)) {
+ tokensInCurrentSentence.add(currentToken);
+ sentenceForTagger.add((String)currentToken.getFeatures().
+ get(TOKEN_STRING_FEATURE_NAME));
+ }
+ currentToken = (tokensIter.hasNext() ?
+ tokensIter.next() : null);
+ }
+ //run the POS tagger
+ List<List<String[]>> taggerList = tagger.runTagger(sentencesForTagger);
+ if(taggerList != null && taggerList.size() > 0){
+ List<String[]> taggerResults = taggerList.get(0);
+ //add the results
+ //make sure no malfunction occurred
+ if(taggerResults.size() != tokensInCurrentSentence.size())
+ throw new ExecutionException(
+ "POS Tagger malfunction: the output size (" +
+ taggerResults.size() +
+ ") is different from the input size (" +
+ tokensInCurrentSentence.size() + ")!");
+ Iterator<String[]> resIter = taggerResults.iterator();
+ Iterator<Annotation> tokIter = tokensInCurrentSentence.iterator();
+ while(resIter.hasNext()){
+ Annotation annot = tokIter.next();
+ addFeatures(annot, TOKEN_CATEGORY_FEATURE_NAME,
resIter.next()[1]);
+ }
+ }
+ fireProgressChanged(sentIndex++ * 100 / sentCnt);
+ }//while(sentencesIter.hasNext())
+
+ if(currentToken != null && posTagAllTokens){ // Tag remaining Tokens if
we are not considering those only within baseSentenceAnnotationType
+ //we have remaining tokens after the last sentence
+ tokensInCurrentSentence.clear();
+ sentenceForTagger.clear();
+ while(currentToken != null){
+ tokensInCurrentSentence.add(currentToken);
+ sentenceForTagger.add((String)currentToken.getFeatures().
+ get(TOKEN_STRING_FEATURE_NAME));
+ currentToken = (tokensIter.hasNext() ?
+ tokensIter.next() : null);
+ }
+ //run the POS tagger
+ List<String[]> taggerResults =
tagger.runTagger(sentencesForTagger).get(0);
+ //add the results
+ //make sure no malfunction occurred
+ if(taggerResults.size() != tokensInCurrentSentence.size())
+ throw new ExecutionException(
+ "POS Tagger malfunction: the output size (" +
+ taggerResults.size() +
+ ") is different from the input size (" +
+ tokensInCurrentSentence.size() + ")!");
+ Iterator<String[]> resIter = taggerResults.iterator();
+ Iterator<Annotation> tokIter = tokensInCurrentSentence.iterator();
+ while(resIter.hasNext()){
+ Annotation annot = tokIter.next();
+ addFeatures(annot, TOKEN_CATEGORY_FEATURE_NAME, resIter.next()[1]);
+ }
+ }//if(currentToken != null)
+ fireProcessFinished();
+ fireStatusChanged(
+ document.getName() + " tagged in " +
+ NumberFormat.getInstance().format(
+ (double)(System.currentTimeMillis() - startTime) / 1000) +
+ " seconds!");
+ }else{
+ if(failOnMissingInputAnnotations) {
+ throw new ExecutionException("No sentences or tokens to process in
document "+document.getName()+"\n" +
+ "Please run a sentence splitter "+
+ "and tokeniser first!");
+ } else {
+ Utils.logOnce(logger,Level.INFO,"POS tagger: no sentence or token
annotations in input document - see debug log for details.");
+ logger.debug("No input annotations in document "+document.getName());
+ }
+ }
+
+//OLD version
+/*
+ AnnotationSet as = inputAS.get(SENTENCE_ANNOTATION_TYPE);
+ if(as != null && as.size() > 0){
+ List sentences = new ArrayList(as);
+ Collections.sort(sentences, offsetComparator);
+ Iterator sentIter = sentences.iterator();
+ int sentIndex = 0;
+ int sentCnt = sentences.size();
+ long startTime= System.currentTimeMillis();
+ while(sentIter.hasNext()){
+start = System.currentTimeMillis();
+ Annotation sentenceAnn = (Annotation)sentIter.next();
+ AnnotationSet rangeSet = inputAS.get(
+ sentenceAnn.getStartNode().getOffset(),
+ sentenceAnn.getEndNode().getOffset());
+ if(rangeSet == null) continue;
+ AnnotationSet tokensSet = rangeSet.get(TOKEN_ANNOTATION_TYPE);
+ if(tokensSet == null) continue;
+ List tokens = new ArrayList(tokensSet);
+ Collections.sort(tokens, offsetComparator);
+
+// List tokens = (List)sentenceAnn.getFeatures().get("tokens");
+ List sentence = new ArrayList(tokens.size());
+ Iterator tokIter = tokens.iterator();
+ while(tokIter.hasNext()){
+ Annotation token = (Annotation)tokIter.next();
+ String text =
(String)token.getFeatures().get(TOKEN_STRING_FEATURE_NAME);
+ sentence.add(text);
+ }//while(tokIter.hasNext())
+
+ //run the POSTagger over this sentence
+ List sentences4tagger = new ArrayList(1);
+ sentences4tagger.add(sentence);
+prepTime += System.currentTimeMillis() - start;
+start = System.currentTimeMillis();
+ List taggerResults = tagger.runTagger(sentences4tagger);
+posTime += System.currentTimeMillis() - start;
+start = System.currentTimeMillis();
+ //add the results to the output annotation set
+ //we only get one sentence
+ List sentenceFromTagger = (List)taggerResults.get(0);
+ if(sentenceFromTagger.size() != sentence.size()){
+ String taggerResult = "";
+ for(int i = 0; i< sentenceFromTagger.size(); i++){
+ taggerResult += ((String[])sentenceFromTagger.get(i))[1] + ", ";
+ }
+ throw new GateRuntimeException(
+ "POS Tagger malfunction: the output size (" +
+ sentenceFromTagger.size() +
+ ") is different from the input size (" +
+ sentence.size() + ")!" +
+ "\n Input: " + sentence + "\nOutput: " + taggerResult);
+ }
+ for(int i = 0; i< sentence.size(); i++){
+ String category = ((String[])sentenceFromTagger.get(i))[1];
+ Annotation token = (Annotation)tokens.get(i);
+ token.getFeatures().
+ put(TOKEN_CATEGORY_FEATURE_NAME, category);
+ }//for(i = 0; i<= sentence.size(); i++)
+postTime += System.currentTimeMillis() - start;
+ fireProgressChanged(sentIndex++ * 100 / sentCnt);
+ }//while(sentIter.hasNext())
+Out.prln("POS preparation time:" + prepTime);
+Out.prln("POS execution time:" + posTime);
+Out.prln("POS after execution time:" + postTime);
+ fireProcessFinished();
+ long endTime = System.currentTimeMillis();
+ fireStatusChanged(document.getName() + " tagged in " +
+ NumberFormat.getInstance().format(
+ (double)(endTime - startTime) / 1000) + " seconds!");
+ }else{
+ throw new GateRuntimeException("No sentences to process!\n" +
+ "Please run a sentence splitter first!");
+ }//if(as != null && as.size() > 0)
+*/
+ }
+
+
+ protected void addFeatures(Annotation annot, String featureName, String
featureValue) throws GateRuntimeException {
+ String tempIASN = inputASName == null ? "" : inputASName;
+ String tempOASN = outputASName == null ? "" : outputASName;
+ if(outputAnnotationType.equals(baseTokenAnnotationType) &&
tempIASN.equals(tempOASN)) {
+ annot.getFeatures().put(featureName, featureValue);
+ return;
+ } else {
+ int start = annot.getStartNode().getOffset().intValue();
+ int end = annot.getEndNode().getOffset().intValue();
+
+ // get the annotations of type outputAnnotationType
+ AnnotationSet outputAS = (outputASName == null) ?
+ document.getAnnotations() :
+ document.getAnnotations(outputASName);
+ AnnotationSet annotations = outputAS.get(outputAnnotationType);
+ if(annotations == null || annotations.size() == 0) {
+ // add new annotation
+ FeatureMap features = Factory.newFeatureMap();
+ features.put(featureName, featureValue);
+ try {
+ outputAS.add(new Long(start), new Long(end),
outputAnnotationType, features);
+ } catch(Exception e) {
+ throw new GateRuntimeException("Invalid Offsets");
+ }
+ } else {
+ // search for the annotation if there is one with the same start
and end offsets
+ List<Annotation> tempList = new
ArrayList<Annotation>(annotations.get());
+ boolean found = false;
+ for(int i=0;i<tempList.size();i++) {
+ Annotation annotation = tempList.get(i);
+ if(annotation.getStartNode().getOffset().intValue() == start
&& annotation.getEndNode().getOffset().intValue() == end) {
+ // this is the one
+ annotation.getFeatures().put(featureName, featureValue);
+ found = true;
+ break;
+ }
+ }
+
+ if(!found) {
+ // add new annotation
+ FeatureMap features = Factory.newFeatureMap();
+ features.put(featureName, featureValue);
+ try {
+ outputAS.add(new Long(start), new Long(end),
outputAnnotationType, features);
+ } catch(Exception e) {
+ throw new GateRuntimeException("Invalid Offsets");
+ }
+ }
+ }
+ }
+ }
+
+ @Optional
+ @CreoleParameter(comment="The URL to the lexicon file",
defaultValue="resources/heptag/lexicon")
+ public void setLexiconURL(java.net.URL newLexiconURL) {
+ lexiconURL = newLexiconURL;
+ }
+ public java.net.URL getLexiconURL() {
+ return lexiconURL;
+ }
+
+ @Optional
+ @CreoleParameter(comment="The URL to the ruleset file",
defaultValue="resources/heptag/ruleset")
+ public void setRulesURL(java.net.URL newRulesURL) {
+ rulesURL = newRulesURL;
+ }
+
+ @Optional
+ @CreoleParameter(comment="The encoding used for reading rules and lexicons")
+ public void setEncoding(String encoding) {
+ this.encoding = encoding;
+ }
+
+ public java.net.URL getRulesURL() {
+ return rulesURL;
+ }
+
+ @RunTime
+ @Optional
+ @CreoleParameter(comment="The annotation set to be used as input that must
contain 'Token' and 'Sentence' annotations")
+ public void setInputASName(String newInputASName) {
+ inputASName = newInputASName;
+ }
+ public String getInputASName() {
+ return inputASName;
+ }
+ public String getEncoding() {
+ return this.encoding;
+ }
+
+ public String getBaseTokenAnnotationType() {
+ return this.baseTokenAnnotationType;
+ }
+
+ public String getBaseSentenceAnnotationType() {
+ return this.baseSentenceAnnotationType;
+ }
+
+ public String getOutputAnnotationType() {
+ return this.outputAnnotationType;
+ }
+
+ @RunTime
+ @CreoleParameter(comment="The name of the base 'Token' annotation type",
defaultValue="Token")
+ public void setBaseTokenAnnotationType(String baseTokenAnnotationType) {
+ this.baseTokenAnnotationType = baseTokenAnnotationType;
+ }
+
+ @RunTime
+ @CreoleParameter(comment="The name of the base 'Sentence' annotation type",
defaultValue="Sentence")
+ public void setBaseSentenceAnnotationType(String baseSentenceAnnotationtype)
{
+ this.baseSentenceAnnotationType = baseSentenceAnnotationtype;
+ }
+
+ @RunTime
+ @CreoleParameter(comment="The name of the annotation type where the new
features should be added", defaultValue="Token")
+ public void setOutputAnnotationType(String outputAnnotationType) {
+ this.outputAnnotationType = outputAnnotationType;
+ }
+
+ public String getOutputASName() {
+ return this.outputASName;
+ }
+
+ @RunTime
+ @Optional
+ @CreoleParameter(comment="The annotation set to be used as output for POS
annotations")
+ public void setOutputASName(String outputASName) {
+ this.outputASName = outputASName;
+ }
+
+ protected hepple.postag.POSTagger tagger;
+ private java.net.URL lexiconURL;
+ private java.net.URL rulesURL;
+ private String inputASName;
+ private String encoding;
+ private String baseTokenAnnotationType;
+ private String baseSentenceAnnotationType;
+ private String outputAnnotationType;
+ private String outputASName;
+}
Copied: gate/branches/sawdust2/plugins/ANNIE/src/gate/creole/Transducer.java
(from rev 19180, gate/branches/sawdust2/src/main/gate/creole/Transducer.java)
===================================================================
--- gate/branches/sawdust2/plugins/ANNIE/src/gate/creole/Transducer.java
(rev 0)
+++ gate/branches/sawdust2/plugins/ANNIE/src/gate/creole/Transducer.java
2016-04-02 14:42:05 UTC (rev 19184)
@@ -0,0 +1,632 @@
+/*
+ * Copyright (c) 1995-2012, The University of Sheffield. See the file
+ * COPYRIGHT.txt in the software or at http://gate.ac.uk/gate/COPYRIGHT.txt
+ *
+ * This file is part of GATE (see http://gate.ac.uk/), and is free
+ * software, licenced under the GNU Library General Public License,
+ * Version 2, June 1991 (in the distribution as file licence.html,
+ * and also available at http://gate.ac.uk/gate/licence.html).
+ *
+ * Valentin Tablan, 01 Feb 2000
+ *
+ * $Id$
+ */
+package gate.creole;
+
+import gate.Controller;
+import gate.Gate;
+import gate.Resource;
+import gate.creole.metadata.CreoleParameter;
+import gate.creole.metadata.CreoleResource;
+import gate.creole.metadata.Optional;
+import gate.creole.metadata.RunTime;
+import gate.gui.MainFrame;
+import gate.jape.Batch;
+import gate.jape.DefaultActionContext;
+import gate.jape.JapeException;
+import gate.jape.JapeFactory;
+import gate.jape.constraint.AnnotationAccessor;
+import gate.jape.constraint.ConstraintPredicate;
+import gate.util.Benchmarkable;
+import gate.util.Err;
+
+import java.io.File;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.io.ObjectInputStream;
+import java.io.ObjectOutputStream;
+import java.util.ArrayList;
+import java.util.List;
+
+import javax.swing.Action;
+import javax.swing.JFileChooser;
+import javax.swing.JOptionPane;
+
+/**
+ * A cascaded multi-phase transducer using the Jape language which is a variant
+ * of the CPSL language.
+ */
+@CreoleResource(name = "JAPE Transducer",
+ comment = "A module for executing Jape grammars.",
+ helpURL = "http://gate.ac.uk/userguide/chap:jape",
+ icon = "jape"
+ )
+public class Transducer
+ extends AbstractLanguageAnalyser
+ implements gate.gui.ActionsPublisher, Benchmarkable, ControllerAwarePR
+{
+ private static final long serialVersionUID = -8789395272116846595L;
+
+ public static final String TRANSD_DOCUMENT_PARAMETER_NAME = "document";
+
+ public static final String TRANSD_INPUT_AS_PARAMETER_NAME = "inputASName";
+
+ public static final String TRANSD_OUTPUT_AS_PARAMETER_NAME = "outputASName";
+
+ public static final String TRANSD_ENCODING_PARAMETER_NAME = "encoding";
+
+ public static final String TRANSD_GRAMMAR_URL_PARAMETER_NAME = "grammarURL";
+
+ public static final String TRANSD_BINARY_GRAMMAR_URL_PARAMETER_NAME =
"binaryGrammarURL";
+
+ public static final String TRANSD_OPERATORS_PARAMETER_NAME = "operators";
+
+ public static final String TRANSD_ANNOTATION_ACCESSORS_PARAMETER_NAME =
"annotationAccessors";
+
+
+ protected List<Action> actionList;
+ protected DefaultActionContext actionContext;
+
+ /**
+ * Default constructor. Does nothing apart from calling the default
+ * constructor from the super class. The actual object initialisation is done
+ * via the {@link #init} method.
+ */
+ public Transducer() {
+ actionList = new ArrayList<Action>();
+ actionList.add(null);
+ actionList.add(new SerializeTransducerAction());
+ }
+
+ /*
+ * private void writeObject(ObjectOutputStream oos) throws IOException {
+ * Out.prln("writing transducer"); oos.defaultWriteObject();
+ * Out.prln("finished writing transducer"); } // writeObject
+ */
+ /**
+ * This method is the one responsible for initialising the transducer. It
+ * assumes that all the needed parameters have been already set using the
+ * appropiate setXXX() methods.
+ *
+ * @return a reference to <b>this</b>
+ */
+ @Override
+ public Resource init() throws ResourceInstantiationException {
+ try {
+ fireProgressChanged(0);
+
+ initCustomConstraints();
+
+ if(binaryGrammarURL != null) {
+ ObjectInputStream s = new ObjectInputStream(binaryGrammarURL
+ .openStream());
+ batch = (gate.jape.Batch)s.readObject();
+ } else if(grammarURL != null) {
+ if(encoding != null) {
+ batch = new Batch(grammarURL, encoding, new
InternalStatusListener());
+ if(enableDebugging != null) {
+ batch.setEnableDebugging(enableDebugging.booleanValue());
+ } else {
+ batch.setEnableDebugging(false);
+ }
+ batch.setOntology(ontology);
+ } else {
+ throw new ResourceInstantiationException("encoding is not set!");
+ }
+ } else {
+ throw new ResourceInstantiationException(
+ "Neither grammarURL or binaryGrammarURL parameters are set!");
+ }
+ } catch(Exception e) {
+ String message = "Error while parsing the grammar ";
+ if(grammarURL != null) message += "(" + grammarURL.toExternalForm() +
")";
+ message += ":";
+ throw new ResourceInstantiationException(message, e);
+ } finally {
+ fireProcessFinished();
+ }
+ actionContext = initActionContext();
+ batch.setActionContext(actionContext);
+ batch.addProgressListener(new IntervalProgressListener(0, 100));
+ return this;
+ }
+
+ /**
+ * Method that initialises the ActionContext. This method can be overridden
+ * if somebody wants to extend the Transducer PR class and provide their own
+ * subclass of DefaultActionContext to add some functionality.
+ *
+ * @return a DefaultActionContext object
+ */
+ protected DefaultActionContext initActionContext() {
+ return new DefaultActionContext();
+ }
+
+
+
+ /**
+ * Implementation of the run() method from {@link java.lang.Runnable}. This
+ * method is responsible for doing all the processing of the input document.
+ */
+ @Override
+ public void execute() throws ExecutionException {
+ interrupted = false;
+ if(document == null) throw new ExecutionException("No document provided!");
+ if(inputASName != null && inputASName.equals("")) inputASName = null;
+ if(outputASName != null && outputASName.equals("")) outputASName = null;
+ // the action context always reflects, for each document executed,
+ // the current PR features and the corpus, if present
+ actionContext.setCorpus(corpus);
+ actionContext.setPRFeatures(features);
+ actionContext.setPR(this);
+ try {
+ batch.transduce(document, inputASName == null
+ ? document.getAnnotations()
+ : document.getAnnotations(inputASName), outputASName == null
+ ? document.getAnnotations()
+ : document.getAnnotations(outputASName));
+ } catch(JapeException je) {
+ throw new ExecutionException(je);
+ }
+ }
+
+ /**
+ * Gets the list of actions that can be performed on this resource.
+ *
+ * @return a List of Action objects (or null values)
+ */
+ @Override
+ public List<Action> getActions() {
+ List<Action> result = new ArrayList<Action>();
+ result.addAll(actionList);
+ return result;
+ }
+
+ /**
+ * Loads any custom operators and annotation accessors into the
ConstraintFactory.
+ * @throws ResourceInstantiationException
+ */
+ protected void initCustomConstraints() throws ResourceInstantiationException
{
+ //Load operators
+ if (operators != null) {
+ for(String opName : operators) {
+ Class<? extends ConstraintPredicate> clazz = null;
+ try {
+ clazz = Class.forName(opName, true, Gate.getClassLoader())
+ .asSubclass(ConstraintPredicate.class);
+ }
+ catch(ClassNotFoundException e) {
+ //if couldn't find it that way, try with current thread class loader
+ try {
+ clazz = Class.forName(opName, true,
+ Thread.currentThread().getContextClassLoader())
+ .asSubclass(ConstraintPredicate.class);
+ }
+ catch(ClassNotFoundException e1) {
+ throw new ResourceInstantiationException("Cannot load class for
operator: " + opName, e1);
+ }
+ }
+ catch(ClassCastException cce) {
+ throw new ResourceInstantiationException("Operator class '" + opName
+ "' must implement ConstraintPredicate");
+ }
+
+ //instantiate an instance of the class so can get the operator string
+ try {
+ ConstraintPredicate predicate = clazz.newInstance();
+ String opSymbol = predicate.getOperator();
+ //now store it in ConstraintFactory
+ JapeFactory.getConstraintFactory().addOperator(opSymbol, clazz);
+ }
+ catch(Exception e) {
+ throw new ResourceInstantiationException("Cannot instantiate class
for operator: " + opName, e);
+ }
+ }
+ }
+
+ //Load annotationAccessors
+ if (annotationAccessors != null) {
+ for(String accessorName : annotationAccessors) {
+ Class<? extends AnnotationAccessor> clazz = null;
+ try {
+ clazz = Class.forName(accessorName, true, Gate.getClassLoader())
+ .asSubclass(AnnotationAccessor.class);
+ }
+ catch(ClassNotFoundException e) {
+ //if couldn't find it that way, try with current thread class loader
+ try {
+ clazz = Class.forName(accessorName, true,
+ Thread.currentThread().getContextClassLoader())
+ .asSubclass(AnnotationAccessor.class);
+ }
+ catch(ClassNotFoundException e1) {
+ throw new ResourceInstantiationException("Cannot load class for
accessor: " + accessorName, e1);
+ }
+ }
+ catch(ClassCastException cce) {
+ throw new ResourceInstantiationException("Operator class '" +
accessorName + "' must implement AnnotationAccessor");
+ }
+
+ //instantiate an instance of the class so can get the meta-property
name string
+ try {
+ AnnotationAccessor aa = clazz.newInstance();
+ String accSymbol = (String)aa.getKey();
+ //now store it in ConstraintFactory
+ JapeFactory.getConstraintFactory().addMetaProperty(accSymbol, clazz);
+ }
+ catch(Exception e) {
+ throw new ResourceInstantiationException("Cannot instantiate class
for accessor: " + accessorName, e);
+ }
+
+ }
+ }
+ }
+
+ /**
+ * Sends a serialized (binary) copy of this transducer to the specified
output stream.
+ * Note that this is the same function used by the "Serialize Transducer"
menu item
+ * allowing the same functionality to be accessed via code as well as the
GUI.
+ **/
+ public void serialize(ObjectOutputStream out) throws IOException {
+ out.writeObject(batch);
+ out.flush();
+ }
+
+ /**
+ * Saves the Jape Transuder to the binary file.
+ *
+ * @author niraj
+ */
+ protected class SerializeTransducerAction extends javax.swing.AbstractAction
{
+
+ private static final long serialVersionUID = 4248612378452393237L;
+
+ public SerializeTransducerAction() {
+ super("Serialize Transducer");
+ putValue(SHORT_DESCRIPTION, "Serializes the Transducer as binary file");
+ }
+
+ @Override
+ public void actionPerformed(java.awt.event.ActionEvent evt) {
+ Runnable runnable = new Runnable() {
+ @Override
+ public void run() {
+ JFileChooser fileChooser = MainFrame.getFileChooser();
+ fileChooser.setFileFilter(fileChooser.getAcceptAllFileFilter());
+ fileChooser.setFileSelectionMode(JFileChooser.FILES_ONLY);
+ fileChooser.setMultiSelectionEnabled(false);
+ if(fileChooser.showSaveDialog(null) == JFileChooser.APPROVE_OPTION) {
+ File file = fileChooser.getSelectedFile();
+ try {
+ MainFrame.lockGUI("Serializing JAPE Transducer...");
+ FileOutputStream out = new FileOutputStream(file);
+ ObjectOutputStream s = new ObjectOutputStream(out);
+ serialize(s);
+ s.close();
+ out.close();
+ } catch(IOException ioe) {
+ JOptionPane.showMessageDialog(MainFrame.getInstance(),
"Error!\n" + ioe.toString(),
+ "GATE", JOptionPane.ERROR_MESSAGE);
+ ioe.printStackTrace(Err.getPrintWriter());
+ } finally {
+ MainFrame.unlockGUI();
+ }
+ }
+ }
+ };
+ Thread thread = new Thread(runnable, "Transduer Serialization");
+ thread.setPriority(Thread.MIN_PRIORITY);
+ thread.start();
+ }
+ }
+
+ /**
+ * Notifies all the PRs in this controller that they should stop their
+ * execution as soon as possible.
+ */
+ @Override
+ public synchronized void interrupt() {
+ interrupted = true;
+ batch.interrupt();
+ }
+
+ /**
+ * Sets the grammar to be used for building this transducer.
+ *
+ * @param newGrammarURL
+ * an URL to a file containing a Jape grammar.
+ */
+ @CreoleParameter(
+ comment = "The URL to the grammar file.",
+ suffixes = "jape",
+ disjunction = "grammar",
+ priority = 1
+ )
+ public void setGrammarURL(java.net.URL newGrammarURL) {
+ grammarURL = newGrammarURL;
+ }
+
+ /**
+ * Gets the URL to the grammar used to build this transducer.
+ *
+ * @return a {@link java.net.URL} pointing to the grammar file.
+ */
+ public java.net.URL getGrammarURL() {
+ return grammarURL;
+ }
+
+ /**
+ *
+ * Sets the encoding to be used for reding the input file(s) forming the Jape
+ * grammar. Note that if the input grammar is a multi-file one than the same
+ * encoding will be used for reding all the files. Multi file grammars with
+ * different encoding across the composing files are not supported!
+ *
+ * @param newEncoding
+ * a {link String} representing the encoding.
+ */
+ @CreoleParameter(
+ comment = "The encoding used for reading the grammar",
+ defaultValue = "UTF-8"
+ )
+ public void setEncoding(String newEncoding) {
+ encoding = newEncoding;
+ }
+
+ /**
+ * Gets the encoding used for reding the grammar file(s).
+ */
+ public String getEncoding() {
+ return encoding;
+ }
+
+ /**
+ * Sets the {@link gate.AnnotationSet} to be used as input for the
transducer.
+ *
+ * @param newInputASName
+ * a {@link gate.AnnotationSet}
+ */
+ @RunTime
+ @Optional
+ @CreoleParameter(
+ comment = "The annotation set to be used as input for the transducer"
+ )
+ public void setInputASName(String newInputASName) {
+ inputASName = newInputASName;
+ }
+
+ /**
+ * Gets the {@link gate.AnnotationSet} used as input by this transducer.
+ *
+ * @return a {@link gate.AnnotationSet}
+ */
+ public String getInputASName() {
+ return inputASName;
+ }
+
+ /**
+ * Sets the {@link gate.AnnotationSet} to be used as output by the
transducer.
+ *
+ * @param newOutputASName
+ * a {@link gate.AnnotationSet}
+ */
+ @RunTime
+ @Optional
+ @CreoleParameter(
+ comment = "The annotation set to be used as output for the transducer"
+ )
+ public void setOutputASName(String newOutputASName) {
+ outputASName = newOutputASName;
+ }
+
+ /**
+ * Gets the {@link gate.AnnotationSet} used as output by this transducer.
+ *
+ * @return a {@link gate.AnnotationSet}
+ */
+ public String getOutputASName() {
+ return outputASName;
+ }
+
+ public Boolean getEnableDebugging() {
+ return enableDebugging;
+ }
+
+ @RunTime
+ @CreoleParameter(defaultValue = "false")
+ public void setEnableDebugging(Boolean enableDebugging) {
+ this.enableDebugging = enableDebugging;
+ }
+
+ /**
+ * Gets the list of class names for any custom boolean operators.
+ * Classes must implement {@link gate.jape.constraint.ConstraintPredicate}.
+ */
+ public List<String> getOperators() {
+ return operators;
+ }
+
+ /**
+ * Sets the list of class names for any custom boolean operators.
+ * Classes must implement {@link gate.jape.constraint.ConstraintPredicate}.
+ */
+ @Optional
+ @CreoleParameter(
+ comment = "Class names that implement
gate.jape.constraint.ConstraintPredicate."
+ )
+ public void setOperators(List<String> operators) {
+ this.operators = operators;
+ }
+
+ /**
+ * Gets the list of class names for any custom
+ * {@link gate.jape.constraint.AnnotationAccessor}s.
+ */
+ public List<String> getAnnotationAccessors() {
+ return annotationAccessors;
+ }
+
+ /**
+ * Sets the list of class names for any custom
+ * {@link gate.jape.constraint.AnnotationAccessor}s.
+ */
+ @Optional
+ @CreoleParameter(
+ comment = "Class names that implement
gate.jape.constraint.AnnotationAccessor."
+ )
+ public void setAnnotationAccessors(List<String> annotationAccessors) {
+ this.annotationAccessors = annotationAccessors;
+ }
+
+ /**
+ * Get the benchmark ID of this Transducers batch.
+ */
+ @Override
+ public String getBenchmarkId() {
+ return batch.getBenchmarkId();
+ }
+
+ /**
+ * Set the benchmark ID of this PR.
+ */
+ @Override
+ public void setBenchmarkId(String benchmarkId) {
+ batch.setBenchmarkId(benchmarkId);
+ }
+
+ /**
+ * The URL to the jape file used as grammar by this transducer.
+ */
+ protected java.net.URL grammarURL;
+
+ /**
+ * The URL to the serialized jape file used as grammar by this transducer.
+ */
+ protected java.net.URL binaryGrammarURL;
+
+ /**
+ * The actual JapeTransducer used for processing the document(s).
+ */
+ protected Batch batch;
+
+ /**
+ * The encoding used for reding the grammar file(s).
+ */
+ protected String encoding;
+
+ /**
+ * The {@link gate.AnnotationSet} used as input for the transducer.
+ */
+ protected String inputASName;
+
+ /**
+ * The {@link gate.AnnotationSet} used as output by the transducer.
+ */
+ protected String outputASName;
+
+ /**
+ * The ontology that will be available on the RHS of JAPE rules.
+ */
+ protected gate.creole.ontology.Ontology ontology;
+
+ /**
+ * List of class names for any custom
+ * {@link gate.jape.constraint.ConstraintPredicate}.
+ */
+ protected List<String> operators = null;
+
+ /**
+ * List of class names for any custom
+ * {@link gate.jape.constraint.AnnotationAccessor}s.
+ */
+ protected List<String> annotationAccessors = null;
+
+ /**
+ * Gets the ontology used by this transducer.
+ *
+ * @return an {@link gate.creole.ontology.Ontology} value.
+ */
+ public gate.creole.ontology.Ontology getOntology() {
+ return ontology;
+ }
+
+ /**
+ * Sets the ontology used by this transducer.
+ *
+ * @param ontology
+ * an {@link gate.creole.ontology.Ontology} value.
+ */
+ @RunTime
+ @Optional
+ @CreoleParameter(
+ comment = "The ontology to be used by this transducer"
+ )
+ public void setOntology(gate.creole.ontology.Ontology ontology) {
+ this.ontology = ontology;
+ //ontology is now a run-time param so we need to propagate it down to the
+ //actual SPTs included in this transducer.
+ if(batch!= null) batch.setOntology(ontology);
+ }
+
+ /**
+ * A switch used to activate the JAPE debugger.
+ */
+ protected Boolean enableDebugging = Boolean.FALSE;
+
+
+ public java.net.URL getBinaryGrammarURL() {
+ return binaryGrammarURL;
+ }
+
+ @CreoleParameter(
+ comment = "The URL to the binary grammar file.",
+ suffixes = "jape",
+ disjunction = "grammar",
+ priority = 100
+ )
+ public void setBinaryGrammarURL(java.net.URL binaryGrammarURL) {
+ this.binaryGrammarURL = binaryGrammarURL;
+ }
+
+ // methods implemeting ControllerAwarePR
+ @Override
+ public void controllerExecutionStarted(Controller c)
+ throws ExecutionException {
+ actionContext.setController(c);
+ actionContext.setCorpus(corpus);
+ actionContext.setPRFeatures(features);
+ actionContext.setPRName(this.getName());
+ actionContext.setPR(this);
+ actionContext.setDebuggingEnabled(enableDebugging);
+ batch.runControllerExecutionStartedBlock(actionContext,c,ontology);
+ }
+
+ @Override
+ public void controllerExecutionFinished(Controller c)
+ throws ExecutionException {
+ batch.runControllerExecutionFinishedBlock(actionContext,c,ontology);
+ actionContext.setCorpus(null);
+ actionContext.setController(null);
+ actionContext.setPR(null);
+ }
+
+ @Override
+ public void controllerExecutionAborted(Controller c, Throwable t)
+ throws ExecutionException {
+ batch.runControllerExecutionAbortedBlock(actionContext,c,t,ontology);
+ actionContext.setCorpus(null);
+ actionContext.setController(null);
+ actionContext.setPR(null);
+ }
+
+
+}
Modified:
gate/branches/sawdust2/plugins/ANNIE/src/gate/creole/coref/Coreferencer.java
===================================================================
--- gate/branches/sawdust2/src/main/gate/creole/coref/Coreferencer.java
2016-04-02 08:23:38 UTC (rev 19180)
+++
gate/branches/sawdust2/plugins/ANNIE/src/gate/creole/coref/Coreferencer.java
2016-04-02 14:42:05 UTC (rev 19184)
@@ -19,6 +19,7 @@
import gate.Annotation;
import gate.AnnotationSet;
import gate.Document;
+import gate.Factory;
import gate.FeatureMap;
import gate.ProcessingResource;
import gate.Resource;
@@ -72,7 +73,8 @@
public Resource init() throws ResourceInstantiationException {
// load all submodules
- pronominalModule.init();
+ //pronominalModule.init();
+ pronominalModule =
(PronominalCoref)Factory.createResource("gate.creole.coref.PronominalCoref");
return this;
} // init()
@@ -87,7 +89,8 @@
*/
@Override
public void reInit() throws ResourceInstantiationException {
- init();
+ //init();
+ pronominalModule.reInit();
} // reInit()
/** Set the document to run on. */
Modified:
gate/branches/sawdust2/plugins/ANNIE/src/gate/creole/coref/PronominalCoref.java
===================================================================
--- gate/branches/sawdust2/src/main/gate/creole/coref/PronominalCoref.java
2016-04-02 08:23:38 UTC (rev 19180)
+++
gate/branches/sawdust2/plugins/ANNIE/src/gate/creole/coref/PronominalCoref.java
2016-04-02 14:42:05 UTC (rev 19184)
@@ -22,20 +22,20 @@
import gate.Factory;
import gate.FeatureMap;
import gate.Gate;
+import gate.LanguageAnalyser;
import gate.Node;
import gate.Resource;
import gate.creole.ANNIEConstants;
import gate.creole.AbstractLanguageAnalyser;
import gate.creole.ExecutionException;
import gate.creole.ResourceInstantiationException;
-import gate.creole.Transducer;
+import gate.creole.metadata.CreoleParameter;
+import gate.creole.metadata.CreoleResource;
import gate.util.Benchmark;
import gate.util.Benchmarkable;
import gate.util.Err;
-import gate.util.Files;
import gate.util.SimpleFeatureMapImpl;
-import java.net.MalformedURLException;
import java.net.URL;
import java.util.ArrayList;
import java.util.Arrays;
@@ -47,6 +47,7 @@
import java.util.Map;
import java.util.Set;
+@CreoleResource(isPrivate = true)
public class PronominalCoref extends AbstractLanguageAnalyser
implements ANNIEConstants,
Benchmarkable {
@@ -56,15 +57,35 @@
public static final String COREF_DOCUMENT_PARAMETER_NAME = "document";
public static final String COREF_ANN_SET_PARAMETER_NAME =
"annotationSetName";
+
+ public static final String TRANSD_ENCODING_PARAMETER_NAME = "encoding";
+ public static final String TRANSD_GRAMMAR_URL_PARAMETER_NAME = "grammarURL";
+
/** --- */
private static final boolean DEBUG = false;
- //JAPE grammars
- private static final String QT_GRAMMAR_URL = Files.getGateResource(
- "/creole/coref/quoted_text.jape").toString();
- private static final String PLEON_GRAMMAR_URL = Files.getGateResource(
- "/creole/coref/pleonasm.jape").toString();
+ private URL qtGrammarURL;
+
+ public URL getQuotedGrammarURL() {
+ return qtGrammarURL;
+ }
+
+ @CreoleParameter(defaultValue="resources/coref/quoted_text.jape")
+ public void setQuotedGrammarURL(URL qtGrammarURL) {
+ this.qtGrammarURL = qtGrammarURL;
+ }
+
+ private URL pleonGrammarURL;
+
+ public URL getPleonasmGrammarURL() {
+ return pleonGrammarURL;
+ }
+
+ @CreoleParameter(defaultValue="resources/coref/pleonasm.jape")
+ public void setPleonasmGrammarURL(URL pleonGrammarURL) {
+ this.pleonGrammarURL = pleonGrammarURL;
+ }
//annotation types
private static final String QUOTED_TEXT_TYPE = "QuotedText";
@@ -81,9 +102,9 @@
/** --- */
private String annotationSetName;
/** --- */
- private Transducer qtTransducer;
+ private LanguageAnalyser qtTransducer;
/** --- */
- private Transducer pleonTransducer;
+ private LanguageAnalyser pleonTransducer;
/** --- */
private AnnotationSet defaultAnnotations;
/** --- */
@@ -124,19 +145,13 @@
inanimatedSet = new HashSet<String>();
//1. initialise quoted text transducer
- URL qtGrammarURL = null;
- try {
- qtGrammarURL = new URL(QT_GRAMMAR_URL);
- } catch(MalformedURLException mue) {
- throw new ResourceInstantiationException(mue);
- }
FeatureMap params = Factory.newFeatureMap();
- params.put(Transducer.TRANSD_GRAMMAR_URL_PARAMETER_NAME, qtGrammarURL);
- params.put(Transducer.TRANSD_ENCODING_PARAMETER_NAME, "UTF-8");
+ params.put(TRANSD_GRAMMAR_URL_PARAMETER_NAME, qtGrammarURL);
+ params.put(TRANSD_ENCODING_PARAMETER_NAME, "UTF-8");
if (qtTransducer == null) {
features = Factory.newFeatureMap();
Gate.setHiddenAttribute(features, true);
- qtTransducer =
(Transducer)Factory.createResource("gate.creole.Transducer",
+ qtTransducer =
(LanguageAnalyser)Factory.createResource("gate.creole.Transducer",
params, features);
qtTransducer.setName("PronominalCoref-QT " + System.currentTimeMillis());
}
@@ -147,20 +162,13 @@
//2. initialise pleonastic transducer
- URL pleonGrammarURL = null;
- try {
- pleonGrammarURL = new URL(PLEON_GRAMMAR_URL);
- }
- catch(MalformedURLException mue) {
- throw new ResourceInstantiationException(mue);
- }
params = Factory.newFeatureMap();
- params.put(Transducer.TRANSD_GRAMMAR_URL_PARAMETER_NAME, pleonGrammarURL);
- params.put(Transducer.TRANSD_ENCODING_PARAMETER_NAME, "UTF-8");
+ params.put(TRANSD_GRAMMAR_URL_PARAMETER_NAME, pleonGrammarURL);
+ params.put(TRANSD_ENCODING_PARAMETER_NAME, "UTF-8");
if (pleonTransducer == null) {
features = Factory.newFeatureMap();
Gate.setHiddenAttribute(features, true);
- pleonTransducer =
(Transducer)Factory.createResource("gate.creole.Transducer",
+ pleonTransducer =
(LanguageAnalyser)Factory.createResource("gate.creole.Transducer",
params, features);
pleonTransducer.setName("PronominalCoref-Pleon " +
System.currentTimeMillis());
}
Deleted:
gate/branches/sawdust2/plugins/ANNIE/src/gate/creole/gazetteer/FlexGazMappingTable.java
===================================================================
---
gate/branches/sawdust2/src/main/gate/creole/gazetteer/FlexGazMappingTable.java
2016-04-02 08:23:38 UTC (rev 19180)
+++
gate/branches/sawdust2/plugins/ANNIE/src/gate/creole/gazetteer/FlexGazMappingTable.java
2016-04-02 14:42:05 UTC (rev 19184)
@@ -1,161 +0,0 @@
-/*
- * Copyright (c) 2012, The University of Sheffield.
- *
- * This file is part of GATE (see http://gate.ac.uk/), and is free software,
- * licenced under the GNU Library General Public License, Version 2, June1991.
- *
- * A copy of this licence is included in the distribution in the file
- * licence.html, and is also available at http://gate.ac.uk/gate/licence.html.
- * $Id$
- */
-package gate.creole.gazetteer;
-
-import java.util.*;
-
-public class FlexGazMappingTable {
-
- private Map<Long, NodePosition> startMap;
- private Map<Long, NodePosition> endMap;
- private long[] tempStartOffsets;
- private long[] tempEndOffsets;
- private boolean updated;
- private int size;
-
-
- public FlexGazMappingTable() {
- startMap = new HashMap<Long, NodePosition>();
- endMap = new HashMap<Long, NodePosition>();
- tempStartOffsets = null;
- tempEndOffsets = null;
- size = 0;
- updated = false;
- }
-
-
- private void add(NodePosition mapping) {
- startMap.put(mapping.getTempStartOffset(), mapping);
- endMap.put(mapping.getTempEndOffset(), mapping);
- size++;
- updated = false;
- }
-
-
- public Collection<NodePosition> getMappings() {
- return startMap.values();
- }
-
-
- public void add(long originalStart, long originalEnd, long tempStart, long
tempEnd) {
- NodePosition mapping = new NodePosition(originalStart, originalEnd,
tempStart, tempEnd);
- add(mapping);
- }
-
-
- public int size() {
- return this.size;
- }
-
-
- public boolean isEmpty() {
- return this.size == 0;
- }
-
-
- private void update() {
- if (updated) {
- return;
- }
-
- tempStartOffsets = new long[size];
- tempEndOffsets = new long[size];
-
- int i = 0;
- for (Long key : startMap.keySet()) {
- tempStartOffsets[i] = key.longValue();
- tempEndOffsets[i] = startMap.get(key).getTempEndOffset();
- i++;
- }
-
- Arrays.sort(tempStartOffsets);
- Arrays.sort(tempEndOffsets);
- updated = true;
- }
-
-
- /** Find the start offset of the latest original annotation
- * that starts at or before this temporary annotation.
- * This method MUST return a valid original annotation
- * start offset or -1.
- * @param tempStartOffset
- * @return -1 is the error code, sorry
- */
-
- public long getBestOriginalStart(long tempStartOffset) {
- update();
- int i = Arrays.binarySearch(tempStartOffsets, tempStartOffset);
-
- // According to the binarySearch API, i = - insPt - 1
-
- if (i == -1) {
- // This means we've undershot the first original annotation
- return -1L;
- }
-
- if (i >= 0) {
- return startMap.get(tempStartOffsets[i]).getOriginalStartOffset();
- }
-
- /* Now we want the position before the insertion point
- * (we've already tested for undershooting the first
- * original annotation) */
- i = - i - 2;
- return startMap.get(tempStartOffsets[i]).getOriginalStartOffset();
- }
-
-
- /** Find the end offset of the first original annotation
- * that ends at or after this temporary annotation. This method
- * MUST return a valid original annotation end offset or -1.
- *
- * @param tempEndOffset
- * @return -1 is the error code, sorry
- */
- public long getBestOriginalEnd(long tempEndOffset) {
- update();
- int i = Arrays.binarySearch(tempEndOffsets, tempEndOffset);
-
- // Exact key is found in the array:
- if (i >= 0) {
- return endMap.get(tempEndOffsets[i]).getOriginalEndOffset();
- }
-
- /* Exact key is not in the array; according
- * to the binarySearch API, i = - insPt - 1
- * We want the insertion point, but if that is past the
- * existing end of the array, then
- * we have overshot the first input annotation */
- i = - i - 1;
- if (i >= size) {
- return -1L;
- }
-
- return endMap.get(tempEndOffsets[i]).getOriginalEndOffset();
- }
-
-
- public void dump() {
- update();
- for (int i = 0 ; i < size ; i++) {
- long start = tempStartOffsets[i];
- long end = tempEndOffsets[i];
- NodePosition m = startMap.get(start);
- System.out.format("FGMT: %d, %d : o(%d, %d) t(%d, %d)\n", start, end,
- m.getOriginalStartOffset(), m.getOriginalEndOffset(),
- m.getTempStartOffset(), m.getTempEndOffset() );
- }
-
-
- }
-
-
-}
Deleted:
gate/branches/sawdust2/plugins/ANNIE/src/gate/creole/gazetteer/FlexibleGazetteer.java
===================================================================
---
gate/branches/sawdust2/src/main/gate/creole/gazetteer/FlexibleGazetteer.java
2016-04-02 08:23:38 UTC (rev 19180)
+++
gate/branches/sawdust2/plugins/ANNIE/src/gate/creole/gazetteer/FlexibleGazetteer.java
2016-04-02 14:42:05 UTC (rev 19184)
@@ -1,399 +0,0 @@
-/*
- * FlexibleGazetteer.java
- *
- * Copyright (c) 2004-2012, The University of Sheffield.
- *
- * This file is part of GATE (see http://gate.ac.uk/), and is free software,
- * licenced under the GNU Library General Public License, Version 2, June1991.
- *
- * A copy of this licence is included in the distribution in the file
- * licence.html, and is also available at http://gate.ac.uk/gate/licence.html.
- *
- * Niraj Aswani 02/2002
- * $Id$
- */
-package gate.creole.gazetteer;
-
-import gate.Annotation;
-import gate.AnnotationSet;
-import gate.Document;
-import gate.Factory;
-import gate.FeatureMap;
-import gate.Gate;
-import gate.ProcessingResource;
-import gate.Resource;
-import gate.Utils;
-import gate.corpora.DocumentImpl;
-import gate.creole.AbstractLanguageAnalyser;
-import gate.creole.ExecutionException;
-import gate.creole.ResourceInstantiationException;
-import gate.util.InvalidOffsetException;
-import java.util.List;
-
-
-/**
- * <p>
- * Title: Flexible Gazetteer
- * </p>
- * <p>
- * The Flexible Gazetteer provides users with the flexibility to choose
- * their own customised input and an external Gazetteer. For example,
- * the user might want to replace words in the text with their base
- * forms (which is an output of the Morphological Analyser).
- * </p>
- * <p>
- * The Flexible Gazetteer performs lookup over a document based on the
- * values of an arbitrary feature of an arbitrary annotation type, by
- * using an externally provided gazetteer. It is important to use an
- * external gazetteer as this allows the use of any type of gazetteer
- * (e.g. an Ontological gazetteer).
- * </p>
- *
- * @author niraj aswani
- * @version 1.0
- */
-public class FlexibleGazetteer extends AbstractLanguageAnalyser
- implements ProcessingResource {
-
- private static final long serialVersionUID = -1023682327651886920L;
- private static final String wrappedOutputASName = "Output";
- private static final String wrappedInputASName = "Input";
-
- // SET TO false BEFORE CHECKING IN
- private static final boolean DEBUG = false;
-
- /**
- * Does the actual loading and parsing of the lists. This method must be
- * called before the gazetteer can be used
- */
- @Override
- public Resource init() throws ResourceInstantiationException {
- if(gazetteerInst == null) { throw new ResourceInstantiationException(
- "No Gazetteer Provided!"); }
- return this;
- }
-
- /**
- * This method runs the gazetteer. It assumes that all the needed parameters
- * are set. If they are not, an exception will be fired.
- */
- @Override
- public void execute() throws ExecutionException {
- fireProgressChanged(0);
- fireStatusChanged("Checking Document...");
- if(document == null) { throw new ExecutionException(
- "No document to process!"); }
- // obtain the inputAS
- AnnotationSet inputAS = document.getAnnotations(inputASName);
- // anything in the inputFeatureNames?
- if(inputFeatureNames == null || inputFeatureNames.size() == 0) { throw new
ExecutionException(
- "No input feature names provided!"); }
- // for each input feature, create a temporary document and run the
- // gazetteer
- for(String aFeature : inputFeatureNames) {
- // find out the feature name user wants us to use
- String[] keyVal = aFeature.split("\\.");
- // if invalid feature name
- if(keyVal.length != 2) {
- System.err.println("Invalid input feature name:" + aFeature);
- continue;
- }
- // keyVal[0] = annotation type
- // keyVal[1] = feature name
- // holds mapping for newly created annotations
- FlexGazMappingTable mappingTable = new FlexGazMappingTable();
- fireStatusChanged("Creating temporary Document for feature " + aFeature);
- StringBuilder newdocString =
- new StringBuilder(document.getContent().toString());
- // sort annotations
- List<Annotation> annotations =
- Utils.inDocumentOrder(inputAS.get(keyVal[0]));
-
- // remove duplicate annotations
- // (this makes the reverse mapping much easier)
- removeOverlappingAnnotations(annotations);
- // initially no space is deducted
- int totalDeductedSpaces = 0;
- // now replace the document content with the value of the feature that
- // user has provided
- for(Annotation currentAnnotation : annotations) {
- // if there's no such feature, continue
- if(!currentAnnotation.getFeatures().containsKey(keyVal[1])) continue;
- String newTokenValue =
- currentAnnotation.getFeatures().get(keyVal[1]).toString();
- // if no value found for this feature
- if(newTokenValue == null) continue;
- // feature value found so we need to replace it
- // find the start and end offsets for this token
- long startOffset = Utils.start(currentAnnotation);
- long endOffset = Utils.end(currentAnnotation);
- // let us find the difference between the lengths of the
- // actual string and the newTokenValue
- long actualLength = endOffset - startOffset;
- long lengthDifference = actualLength - newTokenValue.length();
- // so lets find out the new startOffset and endOffset
- long newStartOffset = startOffset - totalDeductedSpaces;
- long newEndOffset = newStartOffset + newTokenValue.length();
- totalDeductedSpaces += lengthDifference;
-
- mappingTable.add(startOffset, endOffset, newStartOffset, newEndOffset);
-
- // and finally replace the actual string in the document
- // with the new document
- newdocString.replace((int)newStartOffset, (int)newStartOffset
- + (int)actualLength, newTokenValue);
- }
-
- // proceed only if there was any replacement Map
- if(mappingTable.isEmpty()) continue;
-
- /* All the binary search stuff is done inside FlexGazMappingTable
- * now, so it's guaranteed to return valid original annotation start
- * and end offsets. */
-
- // otherwise create a temporary document for the new text
- Document tempDoc = null;
- // update the status
- fireStatusChanged("Processing document with Gazetteer...");
- try {
- FeatureMap params = Factory.newFeatureMap();
- params.put("stringContent", newdocString.toString());
- // set the appropriate encoding
- if(document instanceof DocumentImpl) {
- params.put("encoding", ((DocumentImpl)document).getEncoding());
- params.put("markupAware", ((DocumentImpl)document).getMarkupAware());
- }
- FeatureMap features = Factory.newFeatureMap();
- Gate.setHiddenAttribute(features, true);
- tempDoc =
- (Document)Factory.createResource("gate.corpora.DocumentImpl",
- params, features);
-
- /* Mark the temp document with the locations of the input annotations
so
- * that we can later eliminate Lookups that are out of scope. */
- for (NodePosition mapping : mappingTable.getMappings()) {
-
tempDoc.getAnnotations(wrappedInputASName).add(mapping.getTempStartOffset(),
- mapping.getTempEndOffset(), "Input", Factory.newFeatureMap());
- }
- }
- catch(ResourceInstantiationException rie) {
- throw new ExecutionException("Temporary document cannot be created",
rie);
- }
- catch(InvalidOffsetException e) {
- throw new ExecutionException("Error duplicating Input annotations", e);
- }
- try {
- // lets create the gazetteer based on the provided gazetteer name
- gazetteerInst.setDocument(tempDoc);
- gazetteerInst.setAnnotationSetName(wrappedOutputASName);
- fireStatusChanged("Executing Gazetteer...");
- gazetteerInst.execute();
- // now the tempDoc has been looked up, we need to shift the annotations
- // from this temp document to the original document
- fireStatusChanged("Transfering new annotations to the original
one...");
- AnnotationSet originalDocOutput =
document.getAnnotations(outputASName);
-
- if (DEBUG) {
- mappingTable.dump();
- }
-
- // Now iterate over the new annotations and transfer them from the
- // temp document back to the real one
- for(Annotation currentLookup :
tempDoc.getAnnotations(wrappedOutputASName)) {
- long tempStartOffset = Utils.start(currentLookup);
- long tempEndOffset = Utils.end(currentLookup);
-
- /* Ignore annotations that fall entirely outside the input
annotations,
- * so that we don't get dodgy Lookups outside the area covered by
- * Tokens copied into a restricted working set by the AST PR
- * (for example) */
- if (coveredByInput(tempStartOffset, tempEndOffset,
tempDoc.getAnnotations(wrappedInputASName))) {
- long destinationStart =
mappingTable.getBestOriginalStart(tempStartOffset);
- long destinationEnd =
mappingTable.getBestOriginalEnd(tempEndOffset);
-
- boolean valid = (destinationStart >= 0) && (destinationEnd >= 0);
-
- if (valid) {
- // Now make sure there is no other annotation like this
- AnnotationSet testSet =
originalDocOutput.getContained(destinationStart, destinationEnd).get(
- currentLookup.getType(), currentLookup.getFeatures());
- for(Annotation annot : testSet) {
- if(Utils.start(annot) == destinationStart
- && Utils.end(annot) == destinationEnd
- && annot.getFeatures().size() ==
currentLookup.getFeatures().size()) {
- valid = false;
- break;
- }
- }
- }
-
- if(valid) {
- addToOriginal(originalDocOutput, destinationStart,
destinationEnd,
- tempStartOffset, tempEndOffset, currentLookup, tempDoc);
- }
- } // END if coveredByInput(...)
- } // END for OVER ALL THE Lookups
- }
- finally {
- gazetteerInst.setDocument(null);
- if(tempDoc != null) {
- // now remove the newDoc
- Factory.deleteResource(tempDoc);
- }
- }
- } // for
- fireProcessFinished();
- } // END execute METHOD
-
-
- /**
- * Removes the overlapping annotations. preserves the one that appears first
- * in the list. This assumes the list has been sorted already.
- *
- * @param annotations
- */
- private void removeOverlappingAnnotations(List<Annotation> annotations) {
- for(int i = 0; i < annotations.size() - 1; i++) {
- Annotation annot1 = annotations.get(i);
- Annotation annot2 = annotations.get(i + 1);
- long annot2Start = Utils.start(annot2);
- if(annot2Start >= Utils.start(annot1) && annot2Start <
Utils.end(annot1)) {
- annotations.remove(annot2);
- i--;
- continue;
- }
- }
- }
-
-
- /* We try hard not to cause InvalidOffsetExceptions, but let's have
- * some better debugging info in case they happen.
- */
- private void addToOriginal(AnnotationSet original, long originalStart, long
originalEnd,
- long tempStart, long tempEnd, Annotation tempLookup, Document tempDoc)
throws ExecutionException {
- try {
- original.add(originalStart, originalEnd, tempLookup.getType(),
tempLookup.getFeatures());
- }
- catch(InvalidOffsetException ioe) {
- String errorDetails = String.format("temp %d, %d [%s]-> original %d, %d
", tempStart, tempEnd, Utils.stringFor(tempDoc, tempLookup),
- originalStart, originalEnd);
- throw new ExecutionException(errorDetails, ioe);
- }
- }
-
-
-
- /* Is this Lookup within the scope of the input annotations? It might not
be, if Token annotations
- * have been copied by AST only over the significant sections of the
document.
- */
- private boolean coveredByInput(long tempStart, long tempEnd, AnnotationSet
tempInputAS) {
- if (tempInputAS.getCovering(wrappedInputASName, tempStart,
tempStart).isEmpty()) {
- return false;
- }
- // implied else
- if (tempInputAS.getCovering(wrappedInputASName, tempEnd,
tempEnd).isEmpty()) {
- return false;
- }
- // implied else
- return true;
- }
-
-
- /**
- * Sets the document to work on
- *
- * @param doc
- */
- @Override
- public void setDocument(gate.Document doc) {
- this.document = doc;
- }
-
- /**
- * Returns the document set up by user to work on
- *
- * @return a {@link Document}
- */
- @Override
- public gate.Document getDocument() {
- return this.document;
- }
-
- /**
- * Sets the name of annotation set that should be used for storing new
- * annotations
- *
- * @param outputASName
- */
- public void setOutputASName(String outputASName) {
- this.outputASName = outputASName;
- }
-
- /**
- * Returns the outputAnnotationSetName
- *
- * @return a {@link String} value.
- */
- public String getOutputASName() {
- return this.outputASName;
- }
-
- /**
- * sets the input AnnotationSet Name
- *
- * @param inputASName
- */
- public void setInputASName(String inputASName) {
- this.inputASName = inputASName;
- }
-
- /**
- * Returns the inputAnnotationSetName
- *
- * @return a {@link String} value.
- */
- public String getInputASName() {
- return this.inputASName;
- }
-
- /**
- * Feature names for example: Token.string, Token.root etc... Values of these
- * features should be used to replace the actual string of these features.
- * This method allows a user to set the name of such features
- *
- * @param inputs
- */
- public void setInputFeatureNames(java.util.List<String> inputs) {
- this.inputFeatureNames = inputs;
- }
-
- /**
- * Returns the feature names that are provided by the user to use their
values
- * to replace their actual strings in the document
- *
- * @return a {@link List} value.
- */
- public java.util.List<String> getInputFeatureNames() {
- return this.inputFeatureNames;
- }
-
- public Gazetteer getGazetteerInst() {
- return this.gazetteerInst;
- }
-
- public void setGazetteerInst(gate.creole.gazetteer.Gazetteer gazetteerInst) {
- this.gazetteerInst = gazetteerInst;
- }
-
- // Gazetteer Runtime parameters
- private gate.Document document;
-
- private java.lang.String outputASName;
-
- private java.lang.String inputASName;
-
- // Flexible Gazetteer parameter
- private Gazetteer gazetteerInst;
-
- private java.util.List<String> inputFeatureNames;
-}
Deleted:
gate/branches/sawdust2/plugins/ANNIE/src/gate/creole/gazetteer/NodePosition.java
===================================================================
--- gate/branches/sawdust2/src/main/gate/creole/gazetteer/NodePosition.java
2016-04-02 08:23:38 UTC (rev 19180)
+++
gate/branches/sawdust2/plugins/ANNIE/src/gate/creole/gazetteer/NodePosition.java
2016-04-02 14:42:05 UTC (rev 19184)
@@ -1,119 +0,0 @@
-/*
- * NodePosition.java
- *
- * Copyright (c) 2004--2011, The University of Sheffield.
- *
- * This file is part of GATE (see http://gate.ac.uk/), and is free
- * software, licenced under the GNU Library General Public License,
- * Version 2, June1991.
- *
- * A copy of this licence is included in the distribution in the file
- * licence.html, and is also available at http://gate.ac.uk/gate/licence.html.
- *
- * Niraj Aswani 02/2002
- * $Id$
- *
- * 2011-11-18: AF made this immutable.
- */
-
-package gate.creole.gazetteer;
-
-import java.util.Comparator;
-
-/**
- * <p>Title: NodePosition.java </p>
- * <p>Description: This class is used to store the information about the
- * changes in the text and the addition or the subtraction of the spaces.
- * It is used by FlexibleGazetteer. </p>
- * @author Niraj Aswani
- */
-
-public class NodePosition {
-
- /** The original start offset before changes */
- private long originalStartOffset;
-
- /** The original end offset before changes */
- private long originalEndOffset;
-
- /** The new start offset after the changes */
- private long tempStartOffset;
-
- /** The new end offset after the changes */
- private long tempEndOffset;
-
- /**
- * constructor
- * @param osn - old start offset
- * @param oen - old end offset
- * @param nsn - new start offset
- * @param nen - new end offset
- */
- public NodePosition(long osn, long oen, long nsn, long nen) {
- originalStartOffset = osn;
- originalEndOffset = oen;
- tempStartOffset = nsn;
- tempEndOffset = nen;
- }
-
- /**
- * Returns the old start offset
- * @return a <tt>long</tt> value.
- */
- public long getOriginalStartOffset() {
- return originalStartOffset;
- }
-
- /**
- * Returns the old end offset
- * @return a <tt>long</tt> value.
- */
- public long getOriginalEndOffset() {
- return originalEndOffset;
- }
-
- /**
- * Returns new start offset
- * @return a <tt>long</tt> value.
- */
- public long getTempStartOffset() {
- return tempStartOffset;
- }
-
- /**
- * Returns the new end offset
- * @return a <tt>long</tt> value.
- */
- public long getTempEndOffset() {
- return tempEndOffset;
- }
-
-}
-
-
-class NodePositionComparator implements Comparator<NodePosition> {
-
- @Override
- public int compare(NodePosition arg0, NodePosition arg1) {
- long diff = arg0.getTempStartOffset() - arg1.getTempStartOffset();
- if (diff != 0L) {
- return Long.signum(diff);
- }
- // implied else
- diff = arg0.getTempEndOffset() - arg1.getTempEndOffset();
- if (diff != 0L) {
- return Long.signum(diff);
- }
- // implied else
- diff = arg0.getOriginalStartOffset() - arg1.getOriginalStartOffset();
- if (diff != 0L) {
- return Long.signum(diff);
- }
- // implied else
- diff = arg0.getOriginalEndOffset() - arg1.getOriginalEndOffset();
- return Long.signum(diff);
- }
-
-
-
-}
Modified:
gate/branches/sawdust2/plugins/ANNIE/src/gate/creole/splitter/SentenceSplitter.java
===================================================================
--- gate/branches/sawdust2/src/main/gate/creole/splitter/SentenceSplitter.java
2016-04-02 08:23:38 UTC (rev 19180)
+++
gate/branches/sawdust2/plugins/ANNIE/src/gate/creole/splitter/SentenceSplitter.java
2016-04-02 14:42:05 UTC (rev 19184)
@@ -64,9 +64,8 @@
SPLIT_GAZ_URL_PARAMETER_NAME = "gazetteerListsURL";
public static final String
- SPLIT_TRANSD_URL_PARAMETER_NAME = "transducerURL";
+ SPLIT_TRANSD_URL_PARAMETER_NAME = "transducerURL";
-
private String benchmarkId;
@Override
@@ -110,7 +109,7 @@
features = Factory.newFeatureMap();
Gate.setHiddenAttribute(features, true);
- transducer = (Transducer)Factory.createResource(
+ transducer = (AbstractLanguageAnalyser)Factory.createResource(
"gate.creole.Transducer",
params, features);
transducer.setName("Transducer " + System.currentTimeMillis());
@@ -261,7 +260,7 @@
return transducerURL;
}
DefaultGazetteer gazetteer;
- Transducer transducer;
+ AbstractLanguageAnalyser transducer;
private java.net.URL transducerURL;
private String encoding;
private java.net.URL gazetteerListsURL;
Modified:
gate/branches/sawdust2/plugins/ANNIE/src/gate/creole/tokeniser/DefaultTokeniser.java
===================================================================
--- gate/branches/sawdust2/src/main/gate/creole/tokeniser/DefaultTokeniser.java
2016-04-02 08:23:38 UTC (rev 19180)
+++
gate/branches/sawdust2/plugins/ANNIE/src/gate/creole/tokeniser/DefaultTokeniser.java
2016-04-02 14:42:05 UTC (rev 19184)
@@ -95,7 +95,7 @@
if(DEBUG) Out.prln("Parameters for the transducer: \n" + params);
features = Factory.newFeatureMap();
Gate.setHiddenAttribute(features, true);
- transducer =
(Transducer)Factory.createResource("gate.creole.Transducer",
+ transducer =
(AbstractLanguageAnalyser)Factory.createResource("gate.creole.Transducer",
params, features);
transducer.setName("Transducer " + System.currentTimeMillis());
}
@@ -236,7 +236,7 @@
protected SimpleTokeniser tokeniser;
/** the transducer used for post-processing*/
- protected Transducer transducer;
+ protected AbstractLanguageAnalyser transducer;
private java.net.URL tokeniserRulesURL;
private String encoding;
private java.net.URL transducerGrammarURL;
Copied: gate/branches/sawdust2/plugins/ANNIE/src/gate/gui/GazetteerEditor.java
(from rev 19180, gate/branches/sawdust2/src/main/gate/gui/GazetteerEditor.java)
===================================================================
--- gate/branches/sawdust2/plugins/ANNIE/src/gate/gui/GazetteerEditor.java
(rev 0)
+++ gate/branches/sawdust2/plugins/ANNIE/src/gate/gui/GazetteerEditor.java
2016-04-02 14:42:05 UTC (rev 19184)
@@ -0,0 +1,1358 @@
+/*
+ * Copyright (c) 1995-2012, The University of Sheffield. See the file
+ * COPYRIGHT.txt in the software or at http://gate.ac.uk/gate/COPYRIGHT.txt
+ *
+ * This file is part of GATE (see http://gate.ac.uk/), and is free
+ * software, licenced under the GNU Library General Public License,
+ * Version 2, June 1991 (in the distribution as file licence.html,
+ * and also available at http://gate.ac.uk/gate/licence.html).
+ *
+ * Thomas Heitz, 1 March 2010
+ *
+ * $Id$
+ */
+
+package gate.gui;
+
+import gate.Resource;
+import gate.creole.AbstractVisualResource;
+import gate.creole.ResourceInstantiationException;
+import gate.creole.gazetteer.Gazetteer;
+import gate.creole.gazetteer.GazetteerEvent;
+import gate.creole.gazetteer.GazetteerList;
+import gate.creole.gazetteer.GazetteerListener;
+import gate.creole.gazetteer.GazetteerNode;
+import gate.creole.gazetteer.LinearDefinition;
+import gate.creole.gazetteer.LinearNode;
+import gate.creole.metadata.CreoleResource;
+import gate.creole.metadata.GuiType;
+import gate.swing.XJFileChooser;
+import gate.swing.XJTable;
+import gate.util.Err;
+import gate.util.ExtensionFileFilter;
+import gate.util.Files;
+import gate.util.GateRuntimeException;
+
+import java.awt.BorderLayout;
+import java.awt.Color;
+import java.awt.Component;
+import java.awt.FlowLayout;
+import java.awt.Insets;
+import java.awt.Point;
+import java.awt.Toolkit;
+import java.awt.datatransfer.Clipboard;
+import java.awt.datatransfer.DataFlavor;
+import java.awt.datatransfer.UnsupportedFlavorException;
+import java.awt.event.ActionEvent;
+import java.awt.event.ActionListener;
+import java.awt.event.KeyAdapter;
+import java.awt.event.KeyEvent;
+import java.awt.event.MouseAdapter;
+import java.awt.event.MouseEvent;
+import java.io.File;
+import java.io.FilenameFilter;
+import java.io.IOException;
+import java.net.MalformedURLException;
+import java.net.URL;
+import java.text.Collator;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Date;
+import java.util.HashMap;
+import java.util.LinkedHashMap;
+import java.util.List;
+import java.util.Locale;
+import java.util.Map;
+import java.util.Timer;
+import java.util.TimerTask;
+import java.util.regex.Pattern;
+import java.util.regex.PatternSyntaxException;
+
+import javax.swing.AbstractAction;
+import javax.swing.Action;
+import javax.swing.ActionMap;
+import javax.swing.DefaultComboBoxModel;
+import javax.swing.InputMap;
+import javax.swing.JButton;
+import javax.swing.JCheckBox;
+import javax.swing.JComboBox;
+import javax.swing.JComponent;
+import javax.swing.JFileChooser;
+import javax.swing.JLabel;
+import javax.swing.JOptionPane;
+import javax.swing.JPanel;
+import javax.swing.JPopupMenu;
+import javax.swing.JScrollPane;
+import javax.swing.JSplitPane;
+import javax.swing.JTable;
+import javax.swing.JTextField;
+import javax.swing.JToolTip;
+import javax.swing.KeyStroke;
+import javax.swing.ListSelectionModel;
+import javax.swing.Popup;
+import javax.swing.PopupFactory;
+import javax.swing.SwingUtilities;
+import javax.swing.event.DocumentEvent;
+import javax.swing.event.DocumentListener;
+import javax.swing.event.ListSelectionEvent;
+import javax.swing.event.ListSelectionListener;
+import javax.swing.event.TableModelEvent;
+import javax.swing.event.TableModelListener;
+import javax.swing.table.AbstractTableModel;
+import javax.swing.table.DefaultTableCellRenderer;
+import javax.swing.table.DefaultTableModel;
+import javax.swing.text.BadLocationException;
+import javax.swing.text.Document;
+import javax.swing.text.JTextComponent;
+
+/**
+ * Editor for {@link gate.creole.gazetteer.Gazetteer ANNIE Gazetteer}.
+<pre>
+ Main features:
+- left table with 5 columns (List name, Major, Minor, Language, AnnotationType)
+ for the definition
+- right table with 1+n columns (Value, Feature 1...Feature n) for the lists
+- 'Save' on the context menu of the resources tree and tab
+- context menu on both tables to delete selected rows
+- drop down list with .lst files in directory
+- text fields and buttons to add a list/entry
+- for the second table, a button to filter the list and another to add columns
+- both tables sorted case insensitively on the first column by default
+- display in red the list name when the list is modified
+- for the separator character test when editing feature columns
+- make feature map ordered
+- remove feature/value columns when containing only spaces or empty
+</pre>
+*/
+@SuppressWarnings("serial")
+@CreoleResource(name="Gazetteer Editor", comment="Gazetteer viewer and
editor.", helpURL="http://gate.ac.uk/userguide/sec:gazetteers:anniegazeditor",
guiType=GuiType.LARGE, mainViewer=true,
resourceDisplayed="gate.creole.gazetteer.AbstractGazetteer")
+public class GazetteerEditor extends AbstractVisualResource
+ implements GazetteerListener, ActionsPublisher {
+
+ public GazetteerEditor() {
+ definitionTableModel = new DefaultTableModel();
+ definitionTableModel.addColumn("List name");
+ definitionTableModel.addColumn("Major");
+ definitionTableModel.addColumn("Minor");
+ definitionTableModel.addColumn("Language");
+ definitionTableModel.addColumn("Annotation type");
+ listTableModel = new ListTableModel();
+ actions = new ArrayList<Action>();
+ actions.add(new SaveAndReinitialiseGazetteerAction());
+ actions.add(new SaveAsGazetteerAction());
+ }
+
+ @Override
+ public Resource init() throws ResourceInstantiationException {
+ initGUI();
+ initListeners();
+ return this;
+ }
+
+ protected void initGUI() {
+ collator = Collator.getInstance(Locale.ENGLISH);
+ collator.setStrength(Collator.TERTIARY);
+
+ /*************************/
+ /* Definition table pane */
+ /*************************/
+
+ JPanel definitionPanel = new JPanel(new BorderLayout());
+ JPanel definitionTopPanel = new JPanel(new FlowLayout(FlowLayout.LEFT));
+ newListComboBox = new JComboBox<String>();
+ newListComboBox.setEditable(true);
+ newListComboBox.setPrototypeDisplayValue("123456789012345");
+ newListComboBox.setToolTipText(
+ "Lists available in the gazetteer directory");
+ newListButton = new JButton("Add");
+ // enable/disable button according to the text field content
+ JTextComponent listTextComponent = (JTextField)
+ newListComboBox.getEditor().getEditorComponent();
+ listTextComponent.getDocument().addDocumentListener(new DocumentListener()
{
+ @Override
+ public void insertUpdate(DocumentEvent e) { update(e); }
+ @Override
+ public void removeUpdate(DocumentEvent e) { update(e); }
+ @Override
+ public void changedUpdate(DocumentEvent e) { update(e); }
+ public void update(DocumentEvent e) {
+ Document document = e.getDocument();
+ try {
+ String value = document.getText(0, document.getLength());
+ if (value.trim().length() == 0) {
+ newListButton.setEnabled(false);
+ newListButton.setText("Add");
+ } else if (value.contains(":")) {
+ newListButton.setEnabled(false);
+ newListButton.setText("Colon Char Forbidden");
+ } else if (linearDefinition.getLists().contains(value)) {
+ // this list already exists in the gazetteer
+ newListButton.setEnabled(false);
+ newListButton.setText("Existing");
+ } else {
+ newListButton.setEnabled(true);
+ newListButton.setText("Add");
+ }
+ } catch (BadLocationException ble) {
+ ble.printStackTrace();
+ }
+ }
+ });
+ newListComboBox.getEditor().getEditorComponent()
+ .addKeyListener(new KeyAdapter() {
+ @Override
+ public void keyPressed(KeyEvent e) {
+ if (e.getKeyCode() == KeyEvent.VK_ENTER) {
+ // Enter key in the text field add the entry to the table
+ newListButton.doClick();
+ }
+ }
+ });
+ newListButton.setToolTipText("<html>Add a list in the gazetteer"
+ + " <font color=#667799><small>Enter"
+ + " </small></font></html>");
+ newListButton.setMargin(new Insets(2, 2, 2, 2));
+ newListButton.addActionListener(new AbstractAction() {
+ @Override
+ public void actionPerformed(ActionEvent e) {
+ String listName = (String) newListComboBox.getEditor().getItem();
+ newListComboBox.removeItem(listName);
+ // update the table
+ definitionTableModel.addRow(new Object[]{listName, "", "", "", ""});
+ // update the gazetteer
+ LinearNode linearNode = new LinearNode(listName, "", "", "", "");
+ linearDefinition.add(linearNode);
+ linearDefinition.getNodesByListNames().put(listName, linearNode);
+ GazetteerList gazetteerList;
+ try {
+ gazetteerList = linearDefinition.loadSingleList(listName, true);
+ } catch (ResourceInstantiationException rie) {
+ rie.printStackTrace();
+ return;
+ }
+ linearDefinition.getListsByNode().put(linearNode, gazetteerList);
+ // select the new list
+ final int row = definitionTable.rowModelToView(
+ definitionTable.getRowCount()-1);
+ final int column = definitionTable.convertColumnIndexToView(0);
+ definitionTable.setRowSelectionInterval(row, row);
+ SwingUtilities.invokeLater(new Runnable() {
+ @Override
+ public void run() {
+ // scroll to the selected new list
+ definitionTable.scrollRectToVisible(
+ definitionTable.getCellRect(row, column, true));
+ definitionTable.requestFocusInWindow();
+ }
+ });
+ }
+ });
+ definitionTopPanel.add(newListComboBox);
+ definitionTopPanel.add(newListButton);
+ definitionPanel.add(definitionTopPanel, BorderLayout.NORTH);
+ definitionTable = new XJTable() {
+ // shift + Delete keys delete the selected rows
+ @Override
+ protected void processKeyEvent(KeyEvent e) {
+ if (e.getKeyCode() == KeyEvent.VK_DELETE
+ && ((e.getModifiersEx() & KeyEvent.SHIFT_DOWN_MASK) != 0)) {
+ new DeleteSelectedLinearNodeAction().actionPerformed(null);
+ } else {
@@ Diff output truncated at 100000 characters. @@
This was sent by the SourceForge.net collaborative development platform, the
world's largest Open Source development site.
------------------------------------------------------------------------------
Transform Data into Opportunity.
Accelerate data analysis in your applications with
Intel Data Analytics Acceleration Library.
Click to learn more.
http://pubads.g.doubleclick.net/gampad/clk?id=278785471&iu=/4140
_______________________________________________
GATE-cvs mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/gate-cvs