Santhosh has uploaded a new change for review. ( 
https://gerrit.wikimedia.org/r/372120 )

Change subject: Add a sentence segment binary utility for debugging purposes
......................................................................

Add a sentence segment binary utility for debugging purposes

Change-Id: I9cee55f28c219d7026935ae2837fa070c7c77338
---
A bin/segment
1 file changed, 28 insertions(+), 0 deletions(-)


  git pull ssh://gerrit.wikimedia.org:29418/mediawiki/services/cxserver 
refs/changes/20/372120/1

diff --git a/bin/segment b/bin/segment
new file mode 100755
index 0000000..72724cc
--- /dev/null
+++ b/bin/segment
@@ -0,0 +1,28 @@
+#!/usr/bin/env node
+var script, xhtmlSource, xhtml, result, segmenter,
+       fs = require( 'fs' ),
+       Segmenter = require( __dirname + '/../lib/segmentation/CXSegmenter' 
).CXSegmenter,
+       LinearDoc = require( __dirname + '/../lib/lineardoc' );
+
+function normalize( html ) {
+       var normalizer = new LinearDoc.Normalizer();
+       normalizer.init();
+       normalizer.write( html.replace( /(\r\n|\n|\t|\r)/gm, '' ) );
+       return normalizer.getHtml();
+}
+
+script = process.argv[ 1 ];
+if ( process.argv.length !== 3 ) {
+       process.stderr.write(
+               'Usage: node ' + script + ' xhtmlSource\n' +
+               'xhtml must be wrapped in a block element such as <p>...</p> or 
<div>..</div>.\n'
+       );
+       process.exit( 1 );
+}
+
+xhtmlSource = process.argv[ 2 ];
+xhtml = fs.readFileSync( xhtmlSource, 'utf8' );
+segmenter = new Segmenter( xhtml, 'en' );
+segmenter.segment();
+result = normalize( segmenter.getSegmentedContent() );
+process.stdout.write( result + '\n' );

-- 
To view, visit https://gerrit.wikimedia.org/r/372120
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: I9cee55f28c219d7026935ae2837fa070c7c77338
Gerrit-PatchSet: 1
Gerrit-Project: mediawiki/services/cxserver
Gerrit-Branch: master
Gerrit-Owner: Santhosh <santhosh.thottin...@gmail.com>

_______________________________________________
MediaWiki-commits mailing list
MediaWiki-commits@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to