Added: ctakes/trunk/ctakes-examples/src/main/java/org/apache/ctakes/examples/cr/LetterColumnReader.java URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-examples/src/main/java/org/apache/ctakes/examples/cr/LetterColumnReader.java?rev=1905443&view=auto ============================================================================== --- ctakes/trunk/ctakes-examples/src/main/java/org/apache/ctakes/examples/cr/LetterColumnReader.java (added) +++ ctakes/trunk/ctakes-examples/src/main/java/org/apache/ctakes/examples/cr/LetterColumnReader.java Mon Nov 21 17:50:20 2022 @@ -0,0 +1,279 @@ +package org.apache.ctakes.examples.cr; + +import org.apache.ctakes.core.cr.AbstractFileTreeReader; +import org.apache.ctakes.core.pipeline.PipeBitInfo; +import org.apache.ctakes.core.pipeline.ProgressManager; +import org.apache.ctakes.core.util.Pair; +import org.apache.ctakes.core.util.StringUtil; +import org.apache.ctakes.core.util.doc.JCasBuilder; +import org.apache.ctakes.core.util.doc.TextBySectionBuilder; +import org.apache.ctakes.core.util.regex.RegexSpanFinder; +import org.apache.log4j.Logger; +import org.apache.uima.collection.CollectionException; +import org.apache.uima.jcas.JCas; +import org.apache.uima.util.Progress; +import org.apache.uima.util.ProgressImpl; + +import java.io.*; +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; +import java.util.regex.Pattern; +import java.util.stream.Collectors; + +@PipeBitInfo( + name = "LetterColumnReader", + description = "Build Patient document text from columnar Letter text.", + role = PipeBitInfo.Role.READER +) +public class LetterColumnReader extends AbstractFileTreeReader { + + static private final Logger LOGGER = Logger.getLogger( "LetterColumnReader" ); + + static private final Pattern LETTER_PATTERN = Pattern.compile( "\\bLetter [0-9]+\\|" ); + + private String _fileId = ""; + private int _letterTotal = 0; + private int _letterCount = 0; + private final List<Letter> _fileLetters = new ArrayList<>(); + private int _fileLetterIndex = 0; + + private JCasBuilder _jCasBuilder = new JCasBuilder(); + + + /** + * Gets the total number of documents that will be returned by this + * collection reader. + * + * @return the number of documents in the collection. + */ + @Override + public int getNoteCount() { + return _letterTotal; + } + + /** + * {@inheritDoc} + */ + @Override + public boolean hasNext() { + if ( _fileLetterIndex < _fileLetters.size() ) { + return true; + } + final boolean hasNext = getCurrentIndex() < getFiles().size(); + if ( !hasNext ) { + ProgressManager.getInstance() + .updateProgress( _letterTotal ); + } + return hasNext; + } + + /** + * {@inheritDoc} + */ + @Override + public void getNext( final JCas jcas ) throws IOException, CollectionException { + if ( _fileLetterIndex < _fileLetters.size() ) { + final Letter letter = _fileLetters.get( _fileLetterIndex ); + _fileLetterIndex++; + _letterCount++; + ProgressManager.getInstance() + .updateProgress( _letterCount ); + _jCasBuilder.setDocId( _fileId + "_" + letter._id ) + .setDocTime( letter._date ) + .rebuild( jcas ); + final TextBySectionBuilder builder = new TextBySectionBuilder(); + letter._sections + .forEach( p -> builder.addSection( p.getValue1(), p.getValue2() ) ); + builder.populate( jcas ); + return; + } + final int currentFileIndex = getCurrentIndex(); + final File file = getFiles().get( currentFileIndex ); + setCurrentIndex( currentFileIndex + 1 ); + _fileId = createDocumentID( file, getValidExtensions() ); + readFile( jcas, file ); + getNext( jcas ); + } + + /** + * {@inheritDoc} + */ + @Override + public Progress[] getProgress() { + return new Progress[]{ + new ProgressImpl( _letterCount, _letterTotal, Progress.ENTITIES ) + }; + } + + + /** + * Places Document Text (and other information) in JCas. + * + * @param jCas unpopulated jcas data container. + * @param file file to be read. + * @throws IOException should anything bad happen. + */ + protected void readFile( JCas jCas, File file ) throws IOException { + // Read the file, building a document only using lines preceded by "Text:" + LOGGER.info( "Reading File " + file.getPath() ); + final String fileText = readByBuffer( file ); + _fileLetters.clear(); + _fileLetterIndex = 0; + if ( !fileText.isEmpty() ) { + _fileLetters.addAll( readLetters( fileText ) ); + _letterTotal += _fileLetters.size(); + } + _jCasBuilder = getJCasBuilder( file ).setDocType( "Letter" ) + .nullDocText(); + ProgressManager.getInstance() + .updateProgress( _letterCount, _letterTotal ); + LOGGER.info( "Parsed " + _fileLetters.size() + " letters" ); + } + + + /** + * @param rawText complete raw text as read from file. + * @return letters parsed from file text. + * @throws IOException if things go wrong. + */ + static private List<Letter> readLetters( final String rawText ) throws IOException { + final List<Integer> letterStarts; + try ( RegexSpanFinder finder = new RegexSpanFinder( LETTER_PATTERN ) ) { + letterStarts = finder.findSpans( rawText ) + .stream() + .map( Pair::getValue1 ) + .collect( Collectors.toList() ); + } catch ( IllegalArgumentException iaE ) { + throw new IOException( "Illegal Argument " + iaE.getMessage() ); + } + if ( letterStarts.isEmpty() ) { + return Collections.emptyList(); + } + final List<Letter> letters = new ArrayList<>(); + Letter currentLetter = new Letter(); + for ( int i = 0; i < letterStarts.size() - 1; i++ ) { + final String letterLine = rawText.substring( letterStarts.get( i ), letterStarts.get( i + 1 ) ); + final Letter newOrCurrent = handleLetterLine( currentLetter, letterLine ); + if ( !newOrCurrent._id.equals( currentLetter._id ) ) { + if ( currentLetter.hasInfo() ) { + letters.add( currentLetter ); + } + currentLetter = newOrCurrent; + } + } + final String lastLetterLine = rawText.substring( letterStarts.get( letterStarts.size() - 1 ) ); + final Letter newOrCurrent = handleLetterLine( currentLetter, lastLetterLine ); + if ( currentLetter.hasInfo() ) { + letters.add( currentLetter ); + } + if ( newOrCurrent.hasInfo() && !newOrCurrent._id.equals( currentLetter._id ) ) { + letters.add( newOrCurrent ); + } + return letters; + } + + + /** + * @param letter the letter currently being populated with sections. + * @param line a block of text representing a letter line. + * @return the letter provided with the text processed OR a new letter with the text processed. + */ + static private Letter handleLetterLine( final Letter letter, final String line ) { + final LineType lineType = letter.addLine( line ); + if ( lineType != LineType.NEXT_LETTER ) { + return letter; + } + return handleLetterLine( new Letter(), line ); + } + + + /** + * Reads file using buffered input stream + * + * @param file file to read + * @return text in file + * @throws IOException if the file could not be read + */ + private String readByBuffer( final File file ) throws IOException { + final String encoding = getValidEncoding(); + // Use 8KB as the default buffer size + byte[] buffer = new byte[ 8192 ]; + final StringBuilder sb = new StringBuilder(); + try ( final InputStream inputStream = new BufferedInputStream( new FileInputStream( file ), buffer.length ) ) { + while ( true ) { + final int length = inputStream.read( buffer ); + if ( length < 0 ) { + break; + } + if ( encoding != null && !encoding.isEmpty() && !UNKNOWN.equals( encoding ) ) { + sb.append( new String( buffer, 0, length, encoding ) ); + } else { + sb.append( new String( buffer, 0, length ) ); + } + } + } catch ( FileNotFoundException fnfE ) { + throw new IOException( fnfE ); + } + return sb.toString(); + } + + + private enum LineType { + MALFORMED, + EMPTY, + SECTION, + NEXT_LETTER + } + + static private final class Letter { + + private String _id; + private String _date; + private final List<Pair<String>> _sections = new ArrayList<>(); + + private LineType addLine( final String line ) { + final String[] splits = StringUtil.fastSplit( line, '|' ); + if ( !isLineValid( splits ) ) { + return LineType.MALFORMED; + } + if ( _id == null ) { + _id = splits[ 0 ]; + } else if ( !_id.equals( splits[ 0 ] ) ) { + return LineType.NEXT_LETTER; + } + if ( splits[ 3 ].isEmpty() ) { + // There is no letter content. + return LineType.EMPTY; + } + _sections.add( new Pair<>( splits[ 2 ], splits[ 3 ] ) ); + _date = splits[ 6 ]; + return LineType.SECTION; + } + + private boolean hasInfo() { + return _id != null && _date != null && !_sections.isEmpty(); + } + + static private boolean isLineValid( final String[] splits ) { + if ( splits.length != 7 ) { + LOGGER.debug( "Incorrect number of columns ... skipping." ); + return false; + } + if ( splits[ 0 ].trim() + .isEmpty() ) { + LOGGER.debug( "No Letter Title ... skipping." ); + return false; + } + if ( splits[ 6 ].trim() + .isEmpty() ) { + LOGGER.debug( "No Letter Date ... skipping." ); + return false; + } + return true; + } + + } + + +}
Added: ctakes/trunk/ctakes-gui/src/main/java/org/apache/ctakes/gui/generic/GenericRunnerGui.java URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-gui/src/main/java/org/apache/ctakes/gui/generic/GenericRunnerGui.java?rev=1905443&view=auto ============================================================================== --- ctakes/trunk/ctakes-gui/src/main/java/org/apache/ctakes/gui/generic/GenericRunnerGui.java (added) +++ ctakes/trunk/ctakes-gui/src/main/java/org/apache/ctakes/gui/generic/GenericRunnerGui.java Mon Nov 21 17:50:20 2022 @@ -0,0 +1,62 @@ +package org.apache.ctakes.gui.generic; + +import org.apache.ctakes.gui.component.DisablerPane; +import org.apache.log4j.Logger; + +import javax.swing.*; +import java.awt.*; + +/** + * Can run a simple command line. + * + * @author SPF , chip-nlp + * @since {9/20/2022} + */ +final public class GenericRunnerGui { + + static private final Logger LOGGER = Logger.getLogger( "GenericRunnerGui" ); + + static private JFrame createFrame() { + final JFrame frame = new JFrame( "cTAKES Simple Program Frame" ); + frame.setDefaultCloseOperation( WindowConstants.EXIT_ON_CLOSE ); + // Use 1024 x 768 as the minimum required resolution (XGA) + // iPhone 3 : 480 x 320 (3:2, HVGA) + // iPhone 4 : 960 x 640 (3:2, unique to Apple) + // iPhone 5 : 1136 x 640 (under 16:9, unique to Apple) + // iPad 3&4 : 2048 x 1536 (4:3, QXGA) + // iPad Mini: 1024 x 768 (4:3, XGA) + final Dimension size = new Dimension( 1024, 768 ); + frame.setSize( size ); + frame.setMinimumSize( size ); + System.setProperty( "apple.laf.useScreenMenuBar", "true" ); + return frame; + } + + + public static void main( final String... args ) { + try { + UIManager.setLookAndFeel( UIManager.getSystemLookAndFeelClassName() ); + UIManager.getDefaults() + .put( "SplitPane.border", BorderFactory.createEmptyBorder() ); + // Needed for MacOS, which sets gridlines to white by default + UIManager.getDefaults() + .put( "Table.gridColor", Color.GRAY ); + } catch ( ClassNotFoundException | InstantiationException + | IllegalAccessException | UnsupportedLookAndFeelException multE ) { + LOGGER.error( multE.getLocalizedMessage() ); + } + final JFrame frame = createFrame(); + final MainPanel mainPanel = new MainPanel(); + frame.add( mainPanel ); + frame.pack(); + frame.setVisible( true ); + DisablerPane.getInstance() + .initialize( frame ); + mainPanel.readParameterFile( args ); + LOGGER.info( "To start, click the Green Circular button above." ); + LOGGER.info( "To stop, click the Red X button above." ); + // Check for -p and -c specification of piper file and cli parameter file + } + + +} Added: ctakes/trunk/ctakes-gui/src/main/java/org/apache/ctakes/gui/generic/MainPanel.java URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-gui/src/main/java/org/apache/ctakes/gui/generic/MainPanel.java?rev=1905443&view=auto ============================================================================== --- ctakes/trunk/ctakes-gui/src/main/java/org/apache/ctakes/gui/generic/MainPanel.java (added) +++ ctakes/trunk/ctakes-gui/src/main/java/org/apache/ctakes/gui/generic/MainPanel.java Mon Nov 21 17:50:20 2022 @@ -0,0 +1,217 @@ +package org.apache.ctakes.gui.generic; + +import org.apache.ctakes.core.util.external.SystemUtil; +import org.apache.ctakes.gui.component.LoggerPanel; +import org.apache.ctakes.gui.util.IconLoader; +import org.apache.log4j.Logger; + +import javax.swing.*; +import javax.swing.border.EmptyBorder; +import java.awt.*; +import java.awt.event.ActionEvent; +import java.awt.event.ActionListener; +import java.io.BufferedReader; +import java.io.File; +import java.io.FileReader; +import java.io.IOException; + +/** + * @author SPF , chip-nlp + * @since {9/20/2022} + */ +public class MainPanel extends JPanel { + + static private final Logger LOGGER = Logger.getLogger( "MainPanel" ); + + private JButton _runButton; + private JButton _stopButton; + + MainPanel() { + super( new BorderLayout() ); + setBorder( new EmptyBorder( 2, 2, 2, 2 ) ); + add( createToolBar(), BorderLayout.NORTH ); + add( LoggerPanel.createLoggerPanel(), BorderLayout.CENTER ); + SwingUtilities.invokeLater( new ButtonIconLoader() ); + } + + public void readParameterFile( final String... args ) { + if ( args.length != 1 ) { + logBadArgs( args ); + return; + } + final File parmFile = new File( args[ 0 ] ); + if ( !parmFile.canRead() ) { + LOGGER.error( "Cannot read parameter file: " + args[ 0 ] ); + LOGGER.info( "Please exit the application" ); + return; + } + String name = ""; + String startCommand = ""; + String directory = ""; + String stopCommand = ""; + try ( BufferedReader reader = new BufferedReader( new FileReader( args[ 0 ] ) ) ) { + String line = ""; + while ( line != null ) { + if ( !line.isEmpty() && !line.startsWith( "//" ) ) { + if ( name.isEmpty() ) { + name = line; + } else if ( startCommand.isEmpty() ) { + startCommand = line; + } else if ( directory.isEmpty() ) { + directory = line; + } else if ( stopCommand.isEmpty() ) { + stopCommand = line; + } else { + LOGGER.warn( "Ignoring extra line: " + line ); + } + } + line = reader.readLine(); + } + } catch ( IOException ioE ) { + LOGGER.error( ioE.getMessage() ); + System.exit( -1 ); + } + _runButton.addActionListener( new StartAction( name, startCommand, directory ) ); + _stopButton.addActionListener( new StopAction( name, stopCommand, directory ) ); + + } + + static private void logBadArgs( final String... args ) { + if ( args.length > 1 ) { + LOGGER.error( "There are too many arguments in " + String.join( " ", args ) ); + } + LOGGER.error( "A single argument pointing to a File containing run parameters is required." ); + LOGGER.info( "The file format is:" ); + LOGGER.info( "Application Title" ); + LOGGER.info( "Start Command" ); + LOGGER.info( "Starting Directory (optional)" ); + LOGGER.info( "Stop Command (optional)" ); + LOGGER.info( "Please exit the application" ); + } + + + private JToolBar createToolBar() { + final JToolBar toolBar = new JToolBar(); + toolBar.setFloatable( false ); + toolBar.setRollover( true ); + toolBar.addSeparator( new Dimension( 10, 0 ) ); + _runButton = addButton( toolBar, "Start " ); + _runButton.setEnabled( false ); + toolBar.addSeparator( new Dimension( 50, 0 ) ); + _stopButton = addButton( toolBar, "Stop " ); + _stopButton.setEnabled( false ); + + toolBar.addSeparator( new Dimension( 50, 0 ) ); + toolBar.addSeparator( new Dimension( 10, 0 ) ); + + return toolBar; + } + + static private JButton addButton( final JToolBar toolBar, final String toolTip ) { + final JButton button = new JButton(); + button.setFocusPainted( false ); + // prevents first button from having a painted border +// button.setFocusable( false ); + button.setToolTipText( toolTip ); + toolBar.add( button ); + toolBar.addSeparator( new Dimension( 10, 0 ) ); + return button; + } + + + private final class StartAction implements ActionListener { + + private final String _name; + private final String _command; + private final String _dir; + + private StartAction( final String name, + final String command, + final String dir ) { + _name = name; + _command = command; + _dir = dir; + } + + @Override + public void actionPerformed( final ActionEvent event ) { + if ( _runButton == null ) { + return; + } + final SystemUtil.CommandRunner runner = new SystemUtil.CommandRunner( _command ); + runner.setLogger( LOGGER ); + runner.wait( true ); + if ( _dir != null && !_dir.isEmpty() ) { + runner.setDirectory( _dir ); + } + LOGGER.info( "Starting " + _name + " ..." ); + try { + SystemUtil.run( runner ); + } catch ( IOException ioE ) { + LOGGER.error( ioE.getMessage() ); + } + } + + } + + + private final class StopAction implements ActionListener { + + private final String _name; + private final String _command; + private final String _dir; + + private StopAction( final String name, + final String command, + final String dir ) { + _name = name; + _command = command; + _dir = dir; + } + + @Override + public void actionPerformed( final ActionEvent event ) { + if ( _runButton == null ) { + return; + } + final SystemUtil.CommandRunner runner = new SystemUtil.CommandRunner( _command ); + runner.setLogger( LOGGER ); + runner.wait( true ); + if ( _dir != null && !_dir.isEmpty() ) { + runner.setDirectory( _dir ); + } + LOGGER.info( "Stopping " + _name + " ..." ); + try { + SystemUtil.run( runner ); + } catch ( IOException ioE ) { + LOGGER.error( ioE.getMessage() ); + } + } + + } + + + /** + * Simple Startable that loads an icon + * <p> + * Some icons + * <a href="https://www.freepik.com/free-vector/no-entry-hand-sign-isolated-white_10601278.htm#query=stop%20hand&position=1&from_view=keyword">Image by macrovector</a> on Freepik + */ + private final class ButtonIconLoader implements Runnable { + + @Override + public void run() { + final String dir = "org/apache/ctakes/gui/pipeline/icon/"; + final String runPng = "RunPiper.png"; + final String stopPng = "StopHand.png"; + + final Icon runIcon = IconLoader.loadIcon( dir + runPng ); + final Icon stopIcon = IconLoader.loadIcon( dir + stopPng ); + _runButton.setIcon( runIcon ); + _stopButton.setIcon( stopIcon ); + } + + } + + +} Added: ctakes/trunk/ctakes-smoking-status/data/Deprecated.txt URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-smoking-status/data/Deprecated.txt?rev=1905443&view=auto ============================================================================== --- ctakes/trunk/ctakes-smoking-status/data/Deprecated.txt (added) +++ ctakes/trunk/ctakes-smoking-status/data/Deprecated.txt Mon Nov 21 17:50:20 2022 @@ -0,0 +1,2 @@ +Consider the note files in this directory deprecated. +Example clinical notes are now in the ctakes-examples-res project. Added: ctakes/trunk/ctakes-smoking-status/src/main/java/org/apache/ctakes/smokingstatus/ae/PcsClassifier.java URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-smoking-status/src/main/java/org/apache/ctakes/smokingstatus/ae/PcsClassifier.java?rev=1905443&view=auto ============================================================================== --- ctakes/trunk/ctakes-smoking-status/src/main/java/org/apache/ctakes/smokingstatus/ae/PcsClassifier.java (added) +++ ctakes/trunk/ctakes-smoking-status/src/main/java/org/apache/ctakes/smokingstatus/ae/PcsClassifier.java Mon Nov 21 17:50:20 2022 @@ -0,0 +1,265 @@ +package org.apache.ctakes.smokingstatus.ae; + +import libsvm.svm; +import libsvm.svm_model; +import libsvm.svm_node; +import org.apache.ctakes.core.pipeline.PipeBitInfo; +import org.apache.ctakes.core.resource.FileLocator; +import org.apache.ctakes.core.util.log.DotLogger; +import org.apache.ctakes.smokingstatus.type.libsvm.NominalAttributeValue; +import org.apache.ctakes.typesystem.type.syntax.WordToken; +import org.apache.log4j.Logger; +import org.apache.uima.UimaContext; +import org.apache.uima.analysis_engine.AnalysisEngineProcessException; +import org.apache.uima.fit.component.JCasAnnotator_ImplBase; +import org.apache.uima.fit.descriptor.ConfigurationParameter; +import org.apache.uima.fit.util.JCasUtil; +import org.apache.uima.jcas.JCas; +import org.apache.uima.resource.ResourceInitializationException; + +import java.io.BufferedReader; +import java.io.IOException; +import java.io.InputStreamReader; +import java.util.*; +import java.util.regex.Pattern; +import java.util.stream.Collectors; + +import static org.apache.ctakes.smokingstatus.Const.*; + + +/** + * Update of original PcsClassifierAnnotator_libsvm to use UimaFit. + * + * @author SPF , chip-nlp + * @since {6/3/2022} + */ +@PipeBitInfo( + name = "PcsClassifier", + description = "Uses SVM for smoking status classification.", + role = PipeBitInfo.Role.ANNOTATOR +) +public class PcsClassifier extends JCasAnnotator_ImplBase { + + static private final Logger LOGGER = Logger.getLogger( "PcsClassifier" ); + + static public final String CASED_PARAM = "CaseSensitive"; + static public final String CASED_DESC = "yes/no for case sensitivity."; + @ConfigurationParameter( + name = CASED_PARAM, + description = CASED_DESC, + mandatory = false, + defaultValue = "yes" + ) + private String _caseSensitive; + + static public final String STOP_WORDS_PARAM = "StopWordsPath"; + static public final String STOP_WORDS_DESC = "Path to file containing stop words."; + @ConfigurationParameter( + name = STOP_WORDS_PARAM, + description = STOP_WORDS_DESC + ) + private String _stopWordsPath; + + static public final String KEY_WORDS_PARAM = "KeyWordsPath"; + static public final String KEY_WORDS_DESC = "Path to file containing key words."; + @ConfigurationParameter( + name = KEY_WORDS_PARAM, + description = KEY_WORDS_DESC + ) + private String _keyWordsPath; + + static public final String MODEL_PARAM = "ModelPath"; + static public final String MODEL_DESC = "Path to file containing the model."; + @ConfigurationParameter( + name = MODEL_PARAM, + description = MODEL_DESC + ) + private String _modelPath; + + static private final Map<Integer, String> SMOKER_CODES = new HashMap<>(); + + static private final Pattern SPACE_PATTERN = Pattern.compile( "\\s+" ); + static private final Pattern TEXT_CLEANER_PATTERN = Pattern.compile( "[.?!:;()',\"{}<>#+]" ); + static private final String[] DATE_REGEXES = { + "19\\d\\d", "19\\d\\ds", "20\\d\\d", "20\\d\\ds", "[1-9]0s", "\\d{1,2}[/-]\\d{1,2}", + "\\d{1,2}[/-]\\d{4}", "\\d{1,2}[/-]\\d{1,2}[/-]\\d{2}", "\\d{1,2}[/-]\\d{1,2}[/-]\\d{4}" }; + + static private final Collection<Pattern> DATE_PATTERNS = new ArrayList<>(); + + static { + for ( String regex : DATE_REGEXES ) { + DATE_PATTERNS.add( Pattern.compile( regex ) ); + } + SMOKER_CODES.put( CLASS_CURR_SMOKER_INT, CLASS_CURR_SMOKER ); + SMOKER_CODES.put( CLASS_PAST_SMOKER_INT, CLASS_PAST_SMOKER ); + SMOKER_CODES.put( CLASS_SMOKER_INT, CLASS_SMOKER ); + } + + private boolean _isCaseSensitive = true; + private final Collection<String> _stopWords = new HashSet<>(); + private final List<String> _keyWords = new ArrayList<>(); + // Trained lib_svm model. + private svm_model _model; + + + /** + * {@inheritDoc} + */ + @Override + public void initialize( final UimaContext context ) throws ResourceInitializationException { + super.initialize( context ); + LOGGER.info( "Initializing ..." ); + try ( DotLogger dotter = new DotLogger() ) { + // run long initialization process. Caught Exception may be of some other type. + if ( _caseSensitive.equalsIgnoreCase( "no" ) + || _caseSensitive.equalsIgnoreCase( "false" ) ) { + _isCaseSensitive = false; + } + parseFile( _stopWordsPath, _isCaseSensitive, _stopWords ); + parseFile( _keyWordsPath, _isCaseSensitive, _keyWords ); + _model = svm.svm_load_model( FileLocator.getFile( _modelPath ) + .getPath() ); + } catch ( IOException ioE ) { + throw new ResourceInitializationException( ioE ); + } + } + + /** + * {@inheritDoc} + */ + @Override + public void process( final JCas jcas ) throws AnalysisEngineProcessException { + LOGGER.info( "Processing ..." ); + try ( DotLogger dotter = new DotLogger() ) { + final List<Double> features = createFeatures( jcas ); + // date information + double dateInfo = 0.0; + // Cannot access sentence by SentenceAnnotator or RecordSentence. this is sentence!! + String sentence = jcas.getDocumentText(); + sentence = TEXT_CLEANER_PATTERN.matcher( sentence ) + .replaceAll( " " ) + .trim(); + final String[] textTokens = SPACE_PATTERN.split( sentence ); + for ( String textToken : textTokens ) { + if ( DATE_PATTERNS.stream() + .anyMatch( p -> p.matcher( textToken ) + .matches() ) ) { + dateInfo = 1.0; + LOGGER.info( "***dateInfo|" + textToken + "|" + dateInfo ); + break; + } + } + features.add( dateInfo ); + // set the libSVM feature vector. + final svm_node[] svm_nodes = new svm_node[ features.size() ]; + for ( int j = 0; j < features.size(); j++ ) { + svm_nodes[ j ] = new svm_node(); + svm_nodes[ j ].index = j + 1; + svm_nodes[ j ].value = features.get( j ); + } + // 1:CURRENT_SMOKER, 2:PAST_SMOKER, 3:SMOKER + final double classLabel = svm.svm_predict( _model, svm_nodes ); + // string value. + // note that the original code would cast to integer, which is equivalent to floor but poor form. + final int intClassLabel = Double.valueOf( classLabel ) + .intValue(); + final String classValue = SMOKER_CODES.get( intClassLabel ); + LOGGER.info( "classLabel=" + classLabel + " intClassLabel" + intClassLabel + " classValue=" + classValue ); + final NominalAttributeValue nominalAttributeValue = new NominalAttributeValue( jcas ); + nominalAttributeValue.setAttributeName( "smoking_status" ); + nominalAttributeValue.setNominalValue( classValue ); + nominalAttributeValue.addToIndexes(); + } catch ( IOException ioE ) { + throw new AnalysisEngineProcessException( ioE ); + } + } + + private List<Double> createFeatures( final JCas jcas ) { + final List<Double> features = new ArrayList<>(); + final List<String> unigrams = createUnigrams( jcas ); + final List<String> bigrams = new ArrayList<>(); + for ( int i = 0; i < unigrams.size() - 1; i++ ) { + bigrams.add( unigrams.get( i ) + "_" + unigrams.get( i + 1 ) ); + } + // unigram & bigram keywords + for ( String keyWord : _keyWords ) { + double value = 0.0; + if ( keyWord.contains( "_" ) ) { + if ( bigrams.stream() + .anyMatch( keyWord::equalsIgnoreCase ) ) { + value = 1.0; + LOGGER.info( "keyWord=" + keyWord + " bigram=" + bigrams.stream() + .filter( keyWord::equalsIgnoreCase ) + .collect( + Collectors.joining( " ; " ) ) ); + } + } else { + if ( unigrams.stream() + .anyMatch( keyWord::equalsIgnoreCase ) ) { + value = 1.0; + LOGGER.info( "keyWord=" + keyWord + " unigram=" + unigrams.stream() + .filter( keyWord::equalsIgnoreCase ) + .collect( + Collectors.joining( " ; " ) ) ); + } + } + features.add( value ); + } + return features; + } + + private List<String> createUnigrams( final JCas jcas ) { + final List<String> unigrams = new ArrayList<>(); + final Collection<WordToken> wordTokens = JCasUtil.select( jcas, WordToken.class ); + for ( WordToken token : wordTokens ) { + String tokenText = token.getCoveredText(); + if ( tokenText == null || tokenText.isEmpty() ) { + continue; + } + // TODO - The following code CONDITIONALLY turns tokenText to lowercase, + // while the subsequent code ALWAYS turns tokenText to lowercase. +// if ( !_isCaseSensitive ) { +// tokenText = tokenText.toLowerCase(); +// } + // if(!stopWords.contains(tok)) unigrams.add(tok); + // -- this is the replace of the above line + // Since the model was trained on words without non-word characters + tokenText = tokenText.toLowerCase() + .replaceAll( "-{2,}", " " ) + .trim(); + // with + // the + // cases + // like: + // Tobacco--quit + // in + // 1980. + Arrays.stream( SPACE_PATTERN.split( tokenText ) ) + .filter( t -> !_stopWords.contains( t ) ) + .forEach( unigrams::add ); + } + return unigrams; + } + + static private void parseFile( final String filePath, + final boolean isCaseSensitive, + final Collection<String> collection ) throws IOException { + try ( BufferedReader reader + = new BufferedReader( + new InputStreamReader( + FileLocator.getAsStream( filePath ) ) ) ) { + String line = reader.readLine(); + while ( line != null ) { + if ( !isCaseSensitive ) { + line = line.toLowerCase(); + } + collection.add( line ); + line = reader.readLine(); + } + } catch ( IOException ioE ) { + throw new IOException( "Couldn't read " + filePath + " " + ioE.getMessage() ); + } + } + + +} Modified: ctakes/trunk/ctakes-ytex-web/pom.xml URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-ytex-web/pom.xml?rev=1905443&r1=1905442&r2=1905443&view=diff ============================================================================== --- ctakes/trunk/ctakes-ytex-web/pom.xml (original) +++ ctakes/trunk/ctakes-ytex-web/pom.xml Mon Nov 21 17:50:20 2022 @@ -132,32 +132,63 @@ <build> <!-- dirty hack to get resources into the classpath (because the *-res dependencies are empty) --> - <resources> - <resource> - <directory>${project.basedir}/../ctakes-ytex-res/src/main/resources</directory> - </resource> - <resource> - <directory> - ${project.basedir}/../ctakes-ytex/target/classes - </directory> - <excludes> - <exclude>**/*.class</exclude> - </excludes> - </resource> - </resources> + <!-- REMOVED 04/28/2022 in favor of maven-resources-plugin method below. SPF --> + <!-- <resources>--> + <!-- <resource>--> + <!-- <directory>${project.basedir}/../ctakes-ytex-res/src/main/resources</directory>--> + <!-- </resource>--> + <!-- <resource>--> + <!-- <directory>--> + <!-- ${project.basedir}/../ctakes-ytex/target/classes--> + <!-- </directory>--> + <!-- <excludes>--> + <!-- <exclude>**/*.class</exclude>--> + <!-- </excludes>--> + <!-- </resource>--> + <!-- </resources>--> <!-- dirty hack to get test resources into the classpath (because the *-res dependencies are empty) --> - <testResources> - <testResource> - <directory> - ${project.basedir}/../ctakes-ytex/target/test-classes - </directory> - <excludes> - <exclude>**/*.class</exclude> - </excludes> - </testResource> - </testResources> + <!-- <testResources>--> + <!-- <testResource>--> + <!-- <directory>--> + <!-- ${project.basedir}/../ctakes-ytex/target/test-classes--> + <!-- </directory>--> + <!-- <excludes>--> + <!-- <exclude>**/*.class</exclude>--> + <!-- </excludes>--> + <!-- </testResource>--> + <!-- </testResources>--> <plugins> + + <!-- ctakes-ytex-res is a separate module with its own code repo. + ytex-web wants its resources. There is a dirty hack above, but below is a + different method that should produce a usable result. + Using the plugin instead of redirecting resources allows maven to appropriately + build a classpath. --> + <plugin> + <artifactId>maven-resources-plugin</artifactId> + <version>3.0.2</version> + <executions> + <execution> + <id>copy-resources</id> + <phase>compile</phase> + <goals> + <goal>copy-resources</goal> + </goals> + <configuration> + <outputDirectory>${basedir}/target/classes</outputDirectory> + <resources> + <resource> + <directory>${basedir}/../ctakes-ytex-res/src/main/resources</directory> + <filtering>true</filtering> + </resource> + </resources> + </configuration> + </execution> + </executions> + </plugin> + + <plugin> <groupId>org.eclipse.jetty</groupId> <artifactId>jetty-maven-plugin</artifactId>
