updated language pack builder

Project: http://git-wip-us.apache.org/repos/asf/incubator-joshua/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-joshua/commit/95a79848
Tree: http://git-wip-us.apache.org/repos/asf/incubator-joshua/tree/95a79848
Diff: http://git-wip-us.apache.org/repos/asf/incubator-joshua/diff/95a79848

Branch: refs/heads/master
Commit: 95a79848e2a8aeb44c5d8628b8c447497280545f
Parents: 2c2dce4
Author: Matt Post <p...@cs.jhu.edu>
Authored: Wed Oct 12 10:04:04 2016 -0400
Committer: Matt Post <p...@cs.jhu.edu>
Committed: Wed Oct 12 10:04:04 2016 -0400

----------------------------------------------------------------------
 scripts/language-pack/README.template | 20 ++++++---
 scripts/language-pack/build_lp.sh     | 66 ++++++++++++++++++------------
 scripts/language-pack/copy_model.py   | 19 +++++----
 3 files changed, 64 insertions(+), 41 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/95a79848/scripts/language-pack/README.template
----------------------------------------------------------------------
diff --git a/scripts/language-pack/README.template 
b/scripts/language-pack/README.template
index b996e0b..03041ca 100644
--- a/scripts/language-pack/README.template
+++ b/scripts/language-pack/README.template
@@ -1,8 +1,8 @@
 Apache Joshua Language Pack
 ===========================
 
-Thanks for downloading the Apache Joshua language pack for
-<SOURCE>--<TARGET>. This language pack provides a machine translation
+Thanks for downloading the Apache Joshua <SOURCE>--<TARGET>
+language pack. This language pack provides a machine translation
 system for automatically translating sentences from <SOURCE> to
 sentences in <TARGET>. Joshua language packs have no external
 dependencies, and can be run straight from the provided JAR file.
@@ -22,17 +22,21 @@ Quick Start
 -----------
 To run the language pack, invoke the command
 
-    joshua [[JOSHUA] OPTIONS ... ]
+    joshua [OPTIONS ...]
 
 The Joshua decoder will start running, accepting input from STDIN and writing 
to
 STDOUT. Joshua expects its input in the form of a single sentence per line. 
Each
 sentence should first be piped through `prepare.sh`, which normalizes and
-tokenizes the input. 
+tokenizes the input for the language pack's source language.
 
     cat sentences.txt | prepare.sh | joshua > output.txt
 
-Joshua can also be run in server mode, implementing either a direct TCP-IP
-interface, or implementing a Google-translate style RESTful API. To run Joshua 
as a TCP-IP server, add the option
+It takes some time (sometimes as much as a minute) to load all of the models
+into memory, which means there is high latency from startup until the first
+translation. To reduce this time, Joshua can also be run in server mode,
+implementing either a direct TCP-IP interface, or implementing a
+Google-translate style RESTful API. To run Joshua as a TCP-IP server, add the
+option
 
     joshua -server-port 5674
 
@@ -54,6 +58,10 @@ balance speed and accuracy. These and many other runtime 
options can be changed
 with the following arguments and parameters to the Joshua invocation
 demonstrated above.
 
+- `-v 1`
+
+   Be more verbose in output.
+
 -  `-threads N`
 
    N is the number of simultaneous decoding threads to launch. If this option 
is

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/95a79848/scripts/language-pack/build_lp.sh
----------------------------------------------------------------------
diff --git a/scripts/language-pack/build_lp.sh 
b/scripts/language-pack/build_lp.sh
index a51376d..68cd086 100755
--- a/scripts/language-pack/build_lp.sh
+++ b/scripts/language-pack/build_lp.sh
@@ -5,16 +5,18 @@
 
 langpair=$1
 config=$2
-credits=$3
-benchmark=$4
+mem=$3
+credits=$4
+benchmark=$5
 
 date=$(date +%Y-%m-%d)
 
-if [[ -z $4 ]]; then
-    echo "Usage: $0 langpair config credits-file benchmark-file"
+if [[ -z $5 ]]; then
+    echo "Usage: $0 langpair config mem credits-file benchmark-file"
     echo "where"
     echo "  langpair is the language pair, (e.g., es-en)"
     echo "  config is the tuned Joshua config, (1/tune/joshua.config.final)"
+    echo "  mem is the amount of memory the decoder needs"
     echo "  credits-file is a file describing how the model was built 
(1/CREDITS"
     echo "  benchmark-file is a file describing model performance on test sets 
(1/BENCHMARK)"
     exit 1
@@ -26,46 +28,56 @@ set -e
 JOSHUA=$(dirname $0)/../..
 date=$(date +%Y-%m-%d)
 dest=releases/apache-joshua-$langpair-$date
-source=$(echo $langpair | cut -d- -f1)
-target=$(echo $langpair | cut -d- -f2)
+source_abbr=$(echo $langpair | cut -d- -f1)
+target_abbr=$(echo $langpair | cut -d- -f2)
+source=$(iso639 $source_abbr)
+target=$(iso639 $target_abbr)
 
 # Create the jar file
 (cd $JOSHUA && mvn clean compile assembly:single)
 
-# Copy over critical infrastructure files
-[[ ! -d "$dest/target" ]] && mkdir -p "$dest/target"
-[[ ! -d "$dest/bin" ]] && mkdir -p "$dest/bin"
-cp $JOSHUA/target/joshua-*-jar-with-dependencies.jar $dest/target
-
-# Copy over the web demonstration
-cp -a $JOSHUA/demo $dest/web
-
 # Create the bundle
 # ... --copy-config-options "-lower-case true -project-case true"
 $JOSHUA/scripts/language-pack/copy_model.py \
     --force \
     --verbose \
+    --mem $mem \
     --copy-config-options \
-      '-top-n 1 -output-format %S -mark-oovs false -lowercase true 
-projectcase true' \
+      '-top-n 1 -output-format %S -mark-oovs false -lower-case -project-case' \
     $config \
     $dest
 
+copy_template() {
+  cat $1 \
+    | perl -pe "s/<SOURCE>/$source/g" \
+    | perl -pe "s/<TARGET>/$target/g" \
+    | perl -pe "s/<SRC>/$source_abbr/g" \
+    | perl -pe "s/<TRG>/$target_abbr/g" \
+    | perl -pe "s/<MEM>/$mem/g" \
+    | perl -pe "s/<DATE>/$date/g" \
+    > $2
+}
+
+# Copy over critical infrastructure files
+[[ ! -d "$dest/target" ]] && mkdir -p "$dest/target"
+cp $JOSHUA/target/joshua-*-jar-with-dependencies.jar $dest/target
+
+# Copy over the web demonstration
+cp -a $JOSHUA/demo $dest/web
+
 # Copy over preprocessing scripts
-cp -a $langpair/$modelno/scripts $dest/scripts
+cp -a $JOSHUA/scripts/preparation $dest/scripts
+copy_template "$JOSHUA/scripts/language-pack/prepare.sh" "$dest/prepare.sh"
+chmod 555 $dest/prepare.sh
 
 # Copy the credits file
-cat $credits \
-    > $dest/CREDITS
+cat $credits > $dest/CREDITS
+chmod 444 $dest/CREDITS
 
 # Summarize test set performance for the README
-cat $benchmark \
-    > $dest/BENCHMARK
+cat $benchmark > $dest/BENCHMARK
+chmod 444 $dest/BENCHMARK
 
 # Create the README
-cat $JOSHUA/scripts/language-pack/README.template \
-    | perl -pe "s/<SOURCE>/$source/g" \
-    | perl -pe "s/<TARGET>/$target/g" \
-    | perl -pe "s/<DATE>/$date/g" \
-    > $dest/README
-
-
+copy_template "$JOSHUA/scripts/language-pack/README.template" "$dest/README"
+chmod 444 $dest/README

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/95a79848/scripts/language-pack/copy_model.py
----------------------------------------------------------------------
diff --git a/scripts/language-pack/copy_model.py 
b/scripts/language-pack/copy_model.py
index 1ff55bb..68ab8ef 100755
--- a/scripts/language-pack/copy_model.py
+++ b/scripts/language-pack/copy_model.py
@@ -54,8 +54,9 @@ FILE_TYPE_TOKENS = ['lm', 'tm']
 FILE_TYPE_OPTIONS = ['-path', '-lm_file']
 
 OUTPUT_CONFIG_FILE_NAME = 'joshua.config'
-BUNDLE_RUNNER_FILE_NAME = 'joshua'
-BUNDLE_RUNNER_TEXT = """#!/bin/bash
+
+def bundle_runner_text(mem):
+    text = """#!/bin/bash
 # Licensed to the Apache Software Foundation (ASF) under one or more
 # contributor license agreements.  See the NOTICE file distributed with
 # this work for additional information regarding copyright ownership.
@@ -85,7 +86,7 @@ NUM_ARGS=0
 E_OPTERROR=1
 
 ## memory usage; default is 4 GB
-mem=4g
+mem=%s
 
 if [[ $1 == "-m" ]]; then
     mem=$2
@@ -102,8 +103,10 @@ exec java -mx${mem} \
     -Dfile.encoding=utf8 \
     -Djava.library.path=./lib \
     -cp ./target/joshua-*-jar-with-dependencies.jar \
-    org.apache.joshua.decoder.JoshuaDecoder -c joshua.config "$@"
-"""
+    org.apache.joshua.decoder.JoshuaDecoder -c joshua.config -v 0 "$@"
+""" % mem
+
+    return text
 
 
 LineParts = namedtuple('LineParts', ['config', 'comment'])
@@ -369,8 +372,8 @@ def handle_args(clargs):
              ' \'-top-n 0 -output-format %%S -mark-oovs false\''
     )
     parser.add_argument(
-        '--server-port', dest='server_port', type=int, default=5674,
-        help='specify the port to be used when running Joshua as a server'
+        '-m', '--mem', default='4g',
+        help='default amount of memory for Joshua. Defaults to 4g'
     )
     parser.add_argument(
         '-v', '--verbose', action='store_true',
@@ -489,7 +492,7 @@ def collect_operations(opts):
     # Write the scripts that run Joshua using the configuration and
     # resource in the bundle, and make their mode world-readable, and
     # world-executable.
-    for file_name, file_text in [[BUNDLE_RUNNER_FILE_NAME, 
BUNDLE_RUNNER_TEXT],]:
+    for file_name, file_text in [['joshua', bundle_runner_text(opts.mem)],]:
         path = os.path.join(opts.dest_dir, file_name)
         operations.append(
             (write_string_to_file, (path, file_text),

Reply via email to