parser.groovy

chenpei Wed, 27 Nov 2013 09:08:46 -0800

Author: chenpei
Date: Wed Nov 27 17:07:19 2013
New Revision: 1546113

URL: http://svn.apache.org/r1546113
Log:
CTAKES-273 - Groovy Integration - Updated sample script for Tim's usecase.
runnign this script will output the Parse Text to console from input directory.


Modified:
    ctakes/sandbox/groovy/parser.groovy

Modified: ctakes/sandbox/groovy/parser.groovy
URL: 
http://svn.apache.org/viewvc/ctakes/sandbox/groovy/parser.groovy?rev=1546113&r1=1546112&r2=1546113&view=diff
==============================================================================
--- ctakes/sandbox/groovy/parser.groovy (original)
+++ ctakes/sandbox/groovy/parser.groovy Wed Nov 27 17:07:19 2013
@@ -1,4 +1,19 @@
 #!/usr/bin/env groovy
+/**
+**     This assumes that you have installed Groovy and 
+**     that you have the command groovy available in your path. 
+**     On Debian/Ubuntu systems, installing Groovy should be as easy as 
apt-get install groovy.
+**     You can download groovy from http://groovy.codehaus.org/
+**     The first run may be slow since it needs to download all of the 
dependencies.
+**  Usage: $./parser.groovy [inputDir]
+**     or enable more verbose status $groovy 
-Dgroovy.grape.report.downloads=true parser.groovy [inputDir]
+**/
+@Grab(group='org.apache.ctakes',
+      module='ctakes-core',
+            version='3.1.0')
+@Grab(group='org.apache.ctakes',
+      module='ctakes-core-res',
+            version='3.1.0')                   
 @Grab(group='org.apache.ctakes',
       module='ctakes-constituency-parser',
             version='3.1.0')
@@ -6,15 +21,8 @@
       module='ctakes-constituency-parser-res',
             version='3.1.0')           
 @Grab(group='org.apache.ctakes',
-      module='ctakes-pos-tagger',
-            version='3.1.0')   
-@Grab(group='org.apache.ctakes',
-      module='ctakes-pos-tagger-res',
-            version='3.1.0')                   
-@Grab(group='org.apache.ctakes',
       module='ctakes-clinical-pipeline',
             version='3.1.0')
-                       
 import java.io.File;
 import org.apache.uima.jcas.JCas;
 import org.apache.uima.analysis_engine.AnalysisEngineDescription;
@@ -31,39 +39,60 @@ import static org.uimafit.util.JCasUtil.
 import org.apache.ctakes.typesystem.type.syntax.BaseToken;
 import org.apache.ctakes.typesystem.type.textspan.Segment;
 import org.apache.ctakes.typesystem.type.textspan.Sentence;
+import org.apache.ctakes.typesystem.type.syntax.TopTreebankNode;
+import org.apache.ctakes.core.resource.FileLocator;
 import org.apache.ctakes.core.ae.SentenceDetector;
 import org.apache.ctakes.core.ae.SimpleSegmentAnnotator;
 import org.apache.ctakes.core.ae.TokenizerAnnotatorPTB;
-import org.apache.ctakes.postagger.POSTagger;
 import org.apache.ctakes.constituency.parser.ae.ConstituencyParser;
 
                CollectionReader collectionReader = 
FilesCollectionReader.getCollectionReader(args[0]);
+               if(args.length < 1) {
+               System.out.println("Please specify input directory");
+               System.exit(1);
+               }
                System.out.println("Reading from directory: " + args[0]);
-               
+
+               //Download Models
+               //TODO: Seperate downloads from URL here is a hack.  
+               //Models should really be automatically downloaded from 
+               //maven central as part of ctakes-*-res projects/artifacts via 
@grab.
+               //Illustrative purposes until we have all of the *-res 
artifacts in maven central.
+               
downloadFile("http://svn.apache.org/repos/asf/ctakes/trunk/ctakes-core-res/src/main/resources/org/apache/ctakes/core/sentdetect/sd-med-model.zip","sd-med-model.zip";);
+               
downloadFile("http://svn.apache.org/repos/asf/ctakes/trunk/ctakes-constituency-parser-res/src/main/resources/org/apache/ctakes/constituency/parser/models/sharpacq-3.1.bin","sharpacq-3.1.bin";);
+
+               //Build the pipeline to run
                AggregateBuilder aggregateBuilder = new AggregateBuilder();
-               
                
aggregateBuilder.add(AnalysisEngineFactory.createPrimitiveDescription(SimpleSegmentAnnotator.class));
-               
aggregateBuilder.add(AnalysisEngineFactory.createPrimitiveDescription(TokenizerAnnotatorPTB.class));
-               
-               /*  Need to resolve zip resoures from inside a jar first...
-               
aggregateBuilder.add(AnalysisEngineFactory.createPrimitiveDescription(
-                       POSTagger.class,
-                       
TypeSystemDescriptionFactory.createTypeSystemDescription(),
-                       
TypePrioritiesFactory.createTypePriorities(Segment.class, Sentence.class, 
BaseToken.class),
-                       POSTagger.POS_MODEL_FILE_PARAM,
-                       "org/apache/ctakes/postagger/models/mayo-pos.zip"));    
        
                
aggregateBuilder.add(AnalysisEngineFactory.createPrimitiveDescription(
             SentenceDetector.class,
             SentenceDetector.SD_MODEL_FILE_PARAM,
-            "org/apache/ctakes/core/sentdetect/sd-med-model.zip"));            
        
-               
aggregateBuilder.add(AnalysisEngineFactory.createPrimitiveDescription(ConstituencyParser.class));
-               */
+            "sd-med-model.zip"));
+               
aggregateBuilder.add(AnalysisEngineFactory.createPrimitiveDescription(TokenizerAnnotatorPTB.class));
                    
+               
aggregateBuilder.add(AnalysisEngineFactory.createPrimitiveDescription(
+                       ConstituencyParser.class,
+                       ConstituencyParser.PARAM_MODELFILE,
+            "sharpacq-3.1.bin"));
                
aggregateBuilder.add(AnalysisEngineFactory.createPrimitiveDescription(Writer.class));
                SimplePipeline.runPipeline(collectionReader, 
aggregateBuilder.createAggregate());
 
 // Custom writer class used at the end of the pipeline to write results to 
screen
 class Writer extends org.uimafit.component.JCasAnnotator_ImplBase {
   void process(JCas jcas) {
-    select(jcas, Segment).each { println "${it.coveredText} begin:${it.begin} 
end:${it.end}"  }
+       //Get each Treebanknode and print out the text and it's parse string
+    select(jcas, TopTreebankNode).each { println "${it.treebankParse} "  }
   }
+}
+
+def downloadFile(String url, String filename) {
+       System.out.println("Downloading: " + url);
+       def file = new File(filename);
+       if(file.exists()) {
+         System.out.println("File already exists:" + filename);
+         return;
+       }
+    def f = new FileOutputStream(url.tokenize("/")[-1])
+    def out = new BufferedOutputStream(f)
+    out << new URL(url).openStream()
+    out.close()
 }
\ No newline at end of file

svn commit: r1546113 - /ctakes/sandbox/groovy/parser.groovy

Reply via email to