jenkins-bot has submitted this change and it was merged. ( 
https://gerrit.wikimedia.org/r/372120 )

Change subject: Add a sentence segment binary utility for debugging purposes
......................................................................


Add a sentence segment binary utility for debugging purposes

Change-Id: I9cee55f28c219d7026935ae2837fa070c7c77338
---
A bin/segment
1 file changed, 28 insertions(+), 0 deletions(-)

Approvals:
  Catrope: Looks good to me, approved
  jenkins-bot: Verified



diff --git a/bin/segment b/bin/segment
new file mode 100755
index 0000000..0ccb3a5
--- /dev/null
+++ b/bin/segment
@@ -0,0 +1,28 @@
+#!/usr/bin/env node
+var script, xhtmlSource, xhtml, result, segmenter,
+       fs = require( 'fs' ),
+       Segmenter = require( __dirname + '/../lib/segmentation/CXSegmenter' 
).CXSegmenter,
+       LinearDoc = require( __dirname + '/../lib/lineardoc' );
+
+function normalize( html ) {
+       var normalizer = new LinearDoc.Normalizer();
+       normalizer.init();
+       normalizer.write( html.replace( /(\r\n|\n|\t|\r)/gm, '' ) );
+       return normalizer.getHtml();
+}
+
+html = fs.readFileSync( '/dev/stdin', 'utf8' );
+if ( html.trim() === '' ) {
+       script = process.argv[ 1 ];
+       process.stderr.write(
+               'Usage: node ' + script + ' < file\n' +
+               'Input must be wrapped in a block element such as <p>...</p> or 
<div>..</div>.\n'
+       );
+       process.exit( 1 );
+
+}
+
+segmenter = new Segmenter( html, 'en' );
+segmenter.segment();
+result = normalize( segmenter.getSegmentedContent() );
+process.stdout.write( result + '\n' );

-- 
To view, visit https://gerrit.wikimedia.org/r/372120
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: merged
Gerrit-Change-Id: I9cee55f28c219d7026935ae2837fa070c7c77338
Gerrit-PatchSet: 2
Gerrit-Project: mediawiki/services/cxserver
Gerrit-Branch: master
Gerrit-Owner: Santhosh <[email protected]>
Gerrit-Reviewer: Catrope <[email protected]>
Gerrit-Reviewer: Divec <[email protected]>
Gerrit-Reviewer: Nikerabbit <[email protected]>
Gerrit-Reviewer: Santhosh <[email protected]>
Gerrit-Reviewer: jenkins-bot <>

_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to