Added: uima/site/trunk/uima-website/docs/d/uima-ducc-3.0.0/duccbook.html URL: http://svn.apache.org/viewvc/uima/site/trunk/uima-website/docs/d/uima-ducc-3.0.0/duccbook.html?rev=1858151&view=auto ============================================================================== --- uima/site/trunk/uima-website/docs/d/uima-ducc-3.0.0/duccbook.html (added) +++ uima/site/trunk/uima-website/docs/d/uima-ducc-3.0.0/duccbook.html Thu Apr 25 17:32:43 2019 @@ -0,0 +1,26880 @@ +<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" + "http://www.w3.org/TR/html4/loose.dtd"> +<html > +<head><title>Distributed UIMA Cluster Computing</title> +<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1"> +<meta name="generator" content="TeX4ht (http://www.cse.ohio-state.edu/~gurari/TeX4ht/)"> +<meta name="originator" content="TeX4ht (http://www.cse.ohio-state.edu/~gurari/TeX4ht/)"> +<!-- html --> +<meta name="src" content="duccbook.tex"> +<meta name="date" content="2019-04-02 14:27:00"> +<link rel="stylesheet" type="text/css" href="duccbook.css"> +</head><body +> + + +<div class="maketitle"> + + + + + +<h2 class="titleHead">Distributed UIMA Cluster Computing</h2> +<div class="author" ><span +class="cmr-12">Written and maintained by the Apache</span> +<br /> <span +class="cmr-12">UIMA</span><sup class="textsuperscript"><span +class="cmr-9">TM</span></sup><span +class="cmr-12">Development Community</span><br /><br /><br /> +<br /> <span +class="cmr-12">Version 3.0.0</span></div> +<br /> +<div class="date" ></div> + + +</div> +<!--l. 18--><p class="noindent" >Copyright <span +class="cmsy-10">©</span>  2012 The Apache Software Foundation +<!--l. 20--><p class="noindent" >Copyright <span +class="cmsy-10">©</span>  2012 International Business Machines Corporation + <!--l. 23--><p class="noindent" ><span class="paragraphHead"><a + id="x1-1000"></a><span +class="cmbx-10">License and Disclaimer</span></span> + The ASF licenses this documentation to you under the Apache License, Version 2.0 (the ”License”); you may not + use this documentation except in compliance with the License. You may obtain a copy of the License + at + <!--l. 28--><p class="noindent" ><a +href="http://www.apache.org/licenses/LICENSE-2.0" class="url" ><span +class="cmtt-10">http://www.apache.org/licenses/LICENSE-2.0</span></a> + <!--l. 30--><p class="noindent" >Unless required by applicable law or agreed to in writing, this documentation and its contents are distributed under + the License on an ”AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express + or implied. See the License for the specific language governing permissions and limitations under the + License. + <!--l. 35--><p class="noindent" ><span class="paragraphHead"><a + id="x1-2000"></a><span +class="cmbx-10">Trademarks</span></span> + All terms mentioned in the text that are known to be trademarks or service marks have been appropriately capitalized. + Use of such terms in this book should not be regarded as affecting the validity of the the trademark or service + mark. +<!--l. 47--><p class="noindent" >Publication date: April 2019 + + +<h2 class="likechapterHead"><a + id="x1-3000"></a>Table of Contents</h2> <div class="tableofcontents"> +<span class="partToc" >I  <a +href="#x1-5000I" id="QQ2-1-5">DUCC Concepts</a></span> +<br /><span class="chapterToc" >1 <a +href="#x1-60001" id="QQ2-1-6">DUCC Overview</a></span> +<br /> <span class="sectionToc" >1.1 <a +href="#x1-70001.1" id="QQ2-1-7">What is DUCC?</a></span> +<br /> <span class="sectionToc" >1.2 <a +href="#x1-80001.2" id="QQ2-1-8">DUCC Job Model</a></span> +<br /> <span class="sectionToc" >1.3 <a +href="#x1-90001.3" id="QQ2-1-9">DUCC From UIMA to Full Scale-out</a></span> +<br /> <span class="sectionToc" >1.4 <a +href="#x1-140001.4" id="QQ2-1-18">Error Management </a></span> +<br /> <span class="sectionToc" >1.5 <a +href="#x1-150001.5" id="QQ2-1-19">Cluster and Job Management</a></span> +<br /> <span class="sectionToc" >1.6 <a +href="#x1-160001.6" id="QQ2-1-20">Security Measures</a></span> +<br />  <span class="subsectionToc" >1.6.1 <a +href="#x1-170001.6.1" id="QQ2-1-21">ducc_ling</a></span> +<br /> <span class="sectionToc" >1.7 <a +href="#x1-180001.7" id="QQ2-1-22">Security Issues</a></span> +<br /><span class="chapterToc" >2 <a +href="#x1-190002" id="QQ2-1-23">Glossary</a></span> +<br /><span class="partToc" >II  <a +href="#x1-20000II" id="QQ2-1-24">Ducc Users Guide</a></span> +<br /><span class="chapterToc" >3 <a +href="#x1-210003" id="QQ2-1-25">Command Line Interface</a></span> +<br /> <span class="sectionToc" >3.1 <a +href="#x1-230003.1" id="QQ2-1-27">The DUCC Job Descriptor</a></span> +<br /> <span class="sectionToc" >3.2 <a +href="#x1-240003.2" id="QQ2-1-28">Operating System Limit Support</a></span> +<br /> <span class="sectionToc" >3.3 <a +href="#x1-250003.3" id="QQ2-1-29">Command Line Forms</a></span> +<br /> <span class="sectionToc" >3.4 <a +href="#x1-260003.4" id="QQ2-1-30">DUCC Commands</a></span> +<br /> <span class="sectionToc" >3.5 <a +href="#x1-270003.5" id="QQ2-1-31">ducc_submit</a></span> +<br /> <span class="sectionToc" >3.6 <a +href="#x1-320003.6" id="QQ2-1-36">ducc_cancel</a></span> +<br /> <span class="sectionToc" >3.7 <a +href="#x1-370003.7" id="QQ2-1-41">ducc_reserve</a></span> +<br /> <span class="sectionToc" >3.8 <a +href="#x1-420003.8" id="QQ2-1-46">ducc_unreserve</a></span> +<br /> <span class="sectionToc" >3.9 <a +href="#x1-470003.9" id="QQ2-1-51">ducc_process_submit</a></span> +<br /> <span class="sectionToc" >3.10 <a +href="#x1-520003.10" id="QQ2-1-56">ducc_process_cancel</a></span> +<br /> <span class="sectionToc" >3.11 <a +href="#x1-570003.11" id="QQ2-1-61">ducc_services</a></span> +<br />  <span class="subsectionToc" >3.11.1 <a +href="#x1-610003.11.1" id="QQ2-1-65">Common Options</a></span> +<br />  <span class="subsectionToc" >3.11.2 <a +href="#x1-620003.11.2" id="QQ2-1-66">ducc_services –register [specification file] [options]</a></span> +<br />  <span class="subsectionToc" >3.11.3 <a +href="#x1-630003.11.3" id="QQ2-1-67">ducc_services –start options</a></span> +<br />  <span class="subsectionToc" >3.11.4 <a +href="#x1-640003.11.4" id="QQ2-1-68">ducc_services –stop options</a></span> +<br />  <span class="subsectionToc" >3.11.5 <a +href="#x1-650003.11.5" id="QQ2-1-69">ducc_services –enable options</a></span> +<br />  <span class="subsectionToc" >3.11.6 <a +href="#x1-660003.11.6" id="QQ2-1-70">ducc_services –disable options</a></span> + + +<br />  <span class="subsectionToc" >3.11.7 <a +href="#x1-670003.11.7" id="QQ2-1-71">ducc_services –observe_references options</a></span> +<br />  <span class="subsectionToc" >3.11.8 <a +href="#x1-680003.11.8" id="QQ2-1-72">ducc_services –ignore_references options</a></span> +<br />  <span class="subsectionToc" >3.11.9 <a +href="#x1-690003.11.9" id="QQ2-1-73">ducc_services –modify options</a></span> +<br />  <span class="subsectionToc" >3.11.10 <a +href="#x1-700003.11.10" id="QQ2-1-74">ducc_services –query options</a></span> +<br /> <span class="sectionToc" >3.12 <a +href="#x1-720003.12" id="QQ2-1-76">viaducc and java_viaducc</a></span> +<br /> <span class="sectionToc" >3.13 <a +href="#x1-760003.13" id="QQ2-1-80">ducc_status</a></span> +<br /> <span class="sectionToc" >3.14 <a +href="#x1-810003.14" id="QQ2-1-85">ducc_watcher</a></span> +<br /><span class="chapterToc" >4 <a +href="#x1-870004" id="QQ2-1-91">The DUCC Public API</a></span> +<br /> <span class="sectionToc" >4.1 <a +href="#x1-880004.1" id="QQ2-1-92">Overview Of The DUCC API</a></span> +<br /> <span class="sectionToc" >4.2 <a +href="#x1-890004.2" id="QQ2-1-93">Compiling and Running With the DUCC API</a></span> +<br /> <span class="sectionToc" >4.3 <a +href="#x1-900004.3" id="QQ2-1-94">Java API</a></span> +<br /><span class="chapterToc" >5 <a +href="#x1-910005" id="QQ2-1-95">Service Management</a></span> +<br /> <span class="sectionToc" >5.1 <a +href="#x1-920005.1" id="QQ2-1-96">Overview.</a></span> +<br /> <span class="sectionToc" >5.2 <a +href="#x1-930005.2" id="QQ2-1-97">Service Types.</a></span> +<br /> <span class="sectionToc" >5.3 <a +href="#x1-940005.3" id="QQ2-1-98">Service Instance IDs</a></span> +<br /> <span class="sectionToc" >5.4 <a +href="#x1-950005.4" id="QQ2-1-99">Service References and Endpoints</a></span> +<br /> <span class="sectionToc" >5.5 <a +href="#x1-960005.5" id="QQ2-1-100">Application Broker for UIMA-AS Services</a></span> +<br /> <span class="sectionToc" >5.6 <a +href="#x1-970005.6" id="QQ2-1-101">Service Management Policies</a></span> +<br /> <span class="sectionToc" >5.7 <a +href="#x1-990005.7" id="QQ2-1-103">Service Pingers</a></span> +<br />  <span class="subsectionToc" >5.7.1 <a +href="#x1-1000005.7.1" id="QQ2-1-104">The Pinger API</a></span> +<br />  <span class="subsectionToc" >5.7.2 <a +href="#x1-1030005.7.2" id="QQ2-1-107">Declaring a Pinger in A Service</a></span> +<br />  <span class="subsectionToc" >5.7.3 <a +href="#x1-1040005.7.3" id="QQ2-1-108">Implementing a Pinger</a></span> +<br />  <span class="subsectionToc" >5.7.4 <a +href="#x1-1050005.7.4" id="QQ2-1-110">Building And Testing Your Pinger</a></span> +<br />  <span class="subsectionToc" >5.7.5 <a +href="#x1-1100005.7.5" id="QQ2-1-115">Globally Registered Pingers</a></span> +<br /> <span class="sectionToc" >5.8 <a +href="#x1-1110005.8" id="QQ2-1-116">Sample Pinger</a></span> +<br />  <span class="subsectionToc" >5.8.1 <a +href="#x1-1120005.8.1" id="QQ2-1-117">Using the Sample Pinger</a></span> +<br />  <span class="subsectionToc" >5.8.2 <a +href="#x1-1130005.8.2" id="QQ2-1-118">Understanding Sample Pinger</a></span> +<br />  <span class="subsectionToc" >5.8.3 <a +href="#x1-1260005.8.3" id="QQ2-1-131">Calculating New Deployments in the Pinger</a></span> +<br />  <span class="subsectionToc" >5.8.4 <a +href="#x1-1370005.8.4" id="QQ2-1-142">Summary of Sample Pinger</a></span> +<br /><span class="chapterToc" >6 <a +href="#x1-1380006" id="QQ2-1-143">Job Logs</a></span> +<br /><span class="chapterToc" >7 <a +href="#x1-1430007" id="QQ2-1-148">Job Error Handler</a></span> +<br /><span class="chapterToc" >8 <a +href="#x1-1480008" id="QQ2-1-153">DUCC Web Server</a></span> +<br /> <span class="sectionToc" >8.1 <a +href="#x1-1530008.1" id="QQ2-1-160">Common Links</a></span> +<br /> <span class="sectionToc" >8.2 <a +href="#x1-1540008.2" id="QQ2-1-161">Login</a></span> +<br /> <span class="sectionToc" >8.3 <a +href="#x1-1550008.3" id="QQ2-1-162">Jobs Page</a></span> +<br /> <span class="sectionToc" >8.4 <a +href="#x1-1560008.4" id="QQ2-1-164">Job Details Page</a></span> +<br />  <span class="subsectionToc" >8.4.1 <a +href="#x1-1570008.4.1" id="QQ2-1-165">Processes</a></span> + + +<br />  <span class="subsectionToc" >8.4.2 <a +href="#x1-1580008.4.2" id="QQ2-1-167">Work Items</a></span> +<br />  <span class="subsectionToc" >8.4.3 <a +href="#x1-1590008.4.3" id="QQ2-1-169">Performance</a></span> +<br />  <span class="subsectionToc" >8.4.4 <a +href="#x1-1600008.4.4" id="QQ2-1-171">Specification</a></span> +<br />  <span class="subsectionToc" >8.4.5 <a +href="#x1-1610008.4.5" id="QQ2-1-173">Files</a></span> +<br /> <span class="sectionToc" >8.5 <a +href="#x1-1620008.5" id="QQ2-1-174">Reservations Page</a></span> +<br /> <span class="sectionToc" >8.6 <a +href="#x1-1630008.6" id="QQ2-1-176">Managed Reservation Details Page</a></span> +<br />  <span class="subsectionToc" >8.6.1 <a +href="#x1-1640008.6.1" id="QQ2-1-177">Processes</a></span> +<br />  <span class="subsectionToc" >8.6.2 <a +href="#x1-1650008.6.2" id="QQ2-1-178">Specification</a></span> +<br />  <span class="subsectionToc" >8.6.3 <a +href="#x1-1660008.6.3" id="QQ2-1-179">Files</a></span> +<br /> <span class="sectionToc" >8.7 <a +href="#x1-1670008.7" id="QQ2-1-180">Services Page</a></span> +<br /> <span class="sectionToc" >8.8 <a +href="#x1-1680008.8" id="QQ2-1-181">Service Details Page</a></span> +<br />  <span class="subsectionToc" >8.8.1 <a +href="#x1-1690008.8.1" id="QQ2-1-182">Deployments</a></span> +<br />  <span class="subsectionToc" >8.8.2 <a +href="#x1-1700008.8.2" id="QQ2-1-183">Registry</a></span> +<br />  <span class="subsectionToc" >8.8.3 <a +href="#x1-1710008.8.3" id="QQ2-1-184">Files</a></span> +<br />  <span class="subsectionToc" >8.8.4 <a +href="#x1-1720008.8.4" id="QQ2-1-185">History</a></span> +<br /> <span class="sectionToc" >8.9 <a +href="#x1-1730008.9" id="QQ2-1-186">System Pages</a></span> +<br />  <span class="subsectionToc" >8.9.1 <a +href="#x1-1740008.9.1" id="QQ2-1-187">Administration</a></span> +<br />  <span class="subsectionToc" >8.9.2 <a +href="#x1-1750008.9.2" id="QQ2-1-188">Broker</a></span> +<br />  <span class="subsectionToc" >8.9.3 <a +href="#x1-1760008.9.3" id="QQ2-1-189">Classes</a></span> +<br />  <span class="subsectionToc" >8.9.4 <a +href="#x1-1770008.9.4" id="QQ2-1-190">Daemons</a></span> +<br />  <span class="subsectionToc" >8.9.5 <a +href="#x1-1780008.9.5" id="QQ2-1-191">Machines</a></span> +<br /> <span class="sectionToc" >8.10 <a +href="#x1-1790008.10" id="QQ2-1-192">Visualization</a></span> +<br /> <span class="sectionToc" >8.11 <a +href="#x1-1800008.11" id="QQ2-1-194">JSON</a></span> +<br /><span class="partToc" >III  <a +href="#x1-181000III" id="QQ2-1-195">Programming Model And Applications</a></span> +<br /><span class="chapterToc" >9 <a +href="#x1-1820009" id="QQ2-1-196">Building and Testing Jobs</a></span> +<br /> <span class="sectionToc" >9.1 <a +href="#x1-1830009.1" id="QQ2-1-197">Overview</a></span> +<br />  <span class="subsectionToc" >9.1.1 <a +href="#x1-1840009.1.1" id="QQ2-1-198">Basic Job Process Threading Model</a></span> +<br />  <span class="subsectionToc" >9.1.2 <a +href="#x1-1850009.1.2" id="QQ2-1-199">Alternate Pipeline Threading Model</a></span> +<br />  <span class="subsectionToc" >9.1.3 <a +href="#x1-1860009.1.3" id="QQ2-1-200">Overriding UIMA Configuration Parameters</a></span> +<br /> <span class="sectionToc" >9.2 <a +href="#x1-1870009.2" id="QQ2-1-201">Collection Segmentation and Artifact Extraction</a></span> +<br /> <span class="sectionToc" >9.3 <a +href="#x1-1880009.3" id="QQ2-1-202">CAS Consumer Changes for DUCC</a></span> +<br /> <span class="sectionToc" >9.4 <a +href="#x1-1890009.4" id="QQ2-1-203">Job Development for an Existing Pipeline Design</a></span> +<br /> <span class="sectionToc" >9.5 <a +href="#x1-1900009.5" id="QQ2-1-204">Job Development for a New Pipeline Design</a></span> +<br />  <span class="subsectionToc" >9.5.1 <a +href="#x1-1910009.5.1" id="QQ2-1-205">Collection Reader (CR) Characteristics</a></span> +<br />  <span class="subsectionToc" >9.5.2 <a +href="#x1-1920009.5.2" id="QQ2-1-206">DUCC built-in Flow Controller</a></span> +<br />  <span class="subsectionToc" >9.5.3 <a +href="#x1-1930009.5.3" id="QQ2-1-207">Workitem Feature Structure</a></span> +<br />  <span class="subsectionToc" >9.5.4 <a +href="#x1-1940009.5.4" id="QQ2-1-208">Deployment Descriptor (DD) Jobs</a></span> + + +<br />  <span class="subsectionToc" >9.5.5 <a +href="#x1-1950009.5.5" id="QQ2-1-209">Debugging</a></span> +<br /><span class="chapterToc" >10 <a +href="#x1-19600010" id="QQ2-1-210">Sample Application: Raw Text Processing</a></span> +<br /> <span class="sectionToc" >10.1 <a +href="#x1-19700010.1" id="QQ2-1-211">Application Function and Design</a></span> +<br /> <span class="sectionToc" >10.2 <a +href="#x1-19800010.2" id="QQ2-1-212">Configuration Parameters</a></span> +<br /> <span class="sectionToc" >10.3 <a +href="#x1-19900010.3" id="QQ2-1-213">Set up a working directory</a></span> +<br /> <span class="sectionToc" >10.4 <a +href="#x1-20000010.4" id="QQ2-1-214">Download and Install OpenNLP</a></span> +<br /> <span class="sectionToc" >10.5 <a +href="#x1-20100010.5" id="QQ2-1-215">Get some Input Text</a></span> +<br /> <span class="sectionToc" >10.6 <a +href="#x1-20200010.6" id="QQ2-1-216">Run the Job</a></span> +<br /> <span class="sectionToc" >10.7 <a +href="#x1-20300010.7" id="QQ2-1-217">Job Output</a></span> +<br /> <span class="sectionToc" >10.8 <a +href="#x1-20400010.8" id="QQ2-1-218">Job Performance Details</a></span> +<br /><span class="chapterToc" >11 <a +href="#x1-20500011" id="QQ2-1-221">Sample Application: CAS Input Processing</a></span> +<br /> <span class="sectionToc" >11.1 <a +href="#x1-20600011.1" id="QQ2-1-222">Application Function and Design</a></span> +<br /> <span class="sectionToc" >11.2 <a +href="#x1-20700011.2" id="QQ2-1-223">Configuration Parameters</a></span> +<br /> <span class="sectionToc" >11.3 <a +href="#x1-20800011.3" id="QQ2-1-224">Run the Job</a></span> +<br /> <span class="sectionToc" >11.4 <a +href="#x1-20900011.4" id="QQ2-1-225">Job Performance Details</a></span> +<br /> <span class="sectionToc" >11.5 <a +href="#x1-21000011.5" id="QQ2-1-227">Limiting Job Resources</a></span> +<br /><span class="partToc" >IV  <a +href="#x1-211000IV" id="QQ2-1-228">Ducc Administrators Guide</a></span> +<br /><span class="chapterToc" >12 <a +href="#x1-21200012" id="QQ2-1-229">Installation, Configuration, and Verification</a></span> +<br /> <span class="sectionToc" >12.1 <a +href="#x1-21300012.1" id="QQ2-1-230">Overview</a></span> +<br /> <span class="sectionToc" >12.2 <a +href="#x1-21400012.2" id="QQ2-1-231">Software Prerequisites</a></span> +<br /> <span class="sectionToc" >12.3 <a +href="#x1-21500012.3" id="QQ2-1-232">Building from Source</a></span> +<br /> <span class="sectionToc" >12.4 <a +href="#x1-21600012.4" id="QQ2-1-233">Documentation</a></span> +<br /> <span class="sectionToc" >12.5 <a +href="#x1-21700012.5" id="QQ2-1-234">Single System Installation and Verification</a></span> +<br /> <span class="sectionToc" >12.6 <a +href="#x1-21800012.6" id="QQ2-1-235">Minimal Hardware Requirements for Single System Installation</a></span> +<br /> <span class="sectionToc" >12.7 <a +href="#x1-21900012.7" id="QQ2-1-236">Single System Installation</a></span> +<br /> <span class="sectionToc" >12.8 <a +href="#x1-22000012.8" id="QQ2-1-237">Initial System Verification</a></span> +<br />  <span class="subsectionToc" >12.8.1 <a +href="#x1-22100012.8.1" id="QQ2-1-238">Submitting a test job</a></span> +<br />  <span class="subsectionToc" >12.8.2 <a +href="#x1-22200012.8.2" id="QQ2-1-239">Registering a test service</a></span> +<br />  <span class="subsectionToc" >12.8.3 <a +href="#x1-22300012.8.3" id="QQ2-1-240">To stop DUCC</a></span> +<br /> <span class="sectionToc" >12.9 <a +href="#x1-22400012.9" id="QQ2-1-241">Add additional nodes to the DUCC cluster</a></span> +<br /> <span class="sectionToc" >12.10 <a +href="#x1-22500012.10" id="QQ2-1-242">Ducc_ling Configuration - Running with credentials of submitting user</a></span> +<br /> <span class="sectionToc" >12.11 <a +href="#x1-22600012.11" id="QQ2-1-243">CGroups Installation and Configuration</a></span> +<br /> <span class="sectionToc" >12.12 <a +href="#x1-22700012.12" id="QQ2-1-244">Full DUCC Verification</a></span> +<br /> <span class="sectionToc" >12.13 <a +href="#x1-22800012.13" id="QQ2-1-245">Enable DUCC webserver login</a></span> +<br /> <span class="sectionToc" >12.14 <a +href="#x1-22900012.14" id="QQ2-1-246">DUCC webserver user data restricted access</a></span> +<br /> <span class="sectionToc" >12.15 <a +href="#x1-23000012.15" id="QQ2-1-247">DUCC daemons monitoring and notification</a></span> +<br /><span class="chapterToc" >13 <a +href="#x1-23100013" id="QQ2-1-248">Administration</a></span> + + +<br /> <span class="sectionToc" >13.1 <a +href="#x1-23200013.1" id="QQ2-1-249">WebServer Authentication</a></span> +<br />  <span class="subsectionToc" >13.1.1 <a +href="#x1-23300013.1.1" id="QQ2-1-250">Example Implementation</a></span> +<br />  <span class="subsectionToc" >13.1.2 <a +href="#x1-23400013.1.2" id="QQ2-1-251">IAuthenticationManager</a></span> +<br />  <span class="subsectionToc" >13.1.3 <a +href="#x1-23500013.1.3" id="QQ2-1-252">IAuthenticationResult</a></span> +<br />  <span class="subsectionToc" >13.1.4 <a +href="#x1-23600013.1.4" id="QQ2-1-253">Example ANT script to build jar</a></span> +<br />  <span class="subsectionToc" >13.1.5 <a +href="#x1-23700013.1.5" id="QQ2-1-254">Example ducc.properties entries</a></span> +<br />  <span class="subsectionToc" >13.1.6 <a +href="#x1-23800013.1.6" id="QQ2-1-255">Example ducc.administrators</a></span> +<br /> <span class="sectionToc" >13.2 <a +href="#x1-23900013.2" id="QQ2-1-256">Properties</a></span> +<br /> <span class="sectionToc" >13.3 <a +href="#x1-24000013.3" id="QQ2-1-257">Properties merging</a></span> +<br /> <span class="sectionToc" >13.4 <a +href="#x1-24100013.4" id="QQ2-1-258">ducc.properties</a></span> +<br /> <span class="sectionToc" >13.5 <a +href="#x1-24200013.5" id="QQ2-1-259">default.ducc.properties</a></span> +<br /> <span class="sectionToc" >13.6 <a +href="#x1-24300013.6" id="QQ2-1-260">Resource Manager Configuration: Classes and Nodepools</a></span> +<br />  <span class="subsectionToc" >13.6.1 <a +href="#x1-24400013.6.1" id="QQ2-1-261">Nodepools</a></span> +<br />  <span class="subsectionToc" >13.6.2 <a +href="#x1-24800013.6.2" id="QQ2-1-271">Class Definitions</a></span> +<br />  <span class="subsectionToc" >13.6.3 <a +href="#x1-24900013.6.3" id="QQ2-1-273">Validation</a></span> +<br /> <span class="sectionToc" >13.7 <a +href="#x1-25200013.7" id="QQ2-1-276">Ducc Node Definitions</a></span> +<br /> <span class="sectionToc" >13.8 <a +href="#x1-25300013.8" id="QQ2-1-278">Ducc User Definitions</a></span> +<br /> <span class="sectionToc" >13.9 <a +href="#x1-25400013.9" id="QQ2-1-280">DUCC Database Integration</a></span> +<br />  <span class="subsectionToc" >13.9.1 <a +href="#x1-25500013.9.1" id="QQ2-1-281">Overview</a></span> +<br />  <span class="subsectionToc" >13.9.2 <a +href="#x1-26000013.9.2" id="QQ2-1-286">Database Scripting Utilities</a></span> +<br />  <span class="subsectionToc" >13.9.3 <a +href="#x1-26100013.9.3" id="QQ2-1-287">Database Configuration</a></span> +<br /> <span class="sectionToc" >13.10 <a +href="#x1-26200013.10" id="QQ2-1-288">Administrative Commands</a></span> +<br />  <span class="subsectionToc" >13.10.1 <a +href="#x1-26300013.10.1" id="QQ2-1-289">autostart.py</a></span> +<br />  <span class="subsectionToc" >13.10.2 <a +href="#x1-26600013.10.2" id="QQ2-1-292">db_autostart_delete.py</a></span> +<br />  <span class="subsectionToc" >13.10.3 <a +href="#x1-27100013.10.3" id="QQ2-1-297">db_autostart_query.py</a></span> +<br />  <span class="subsectionToc" >13.10.4 <a +href="#x1-27400013.10.4" id="QQ2-1-300">start_ducc</a></span> +<br />  <span class="subsectionToc" >13.10.5 <a +href="#x1-28100013.10.5" id="QQ2-1-307">stop_ducc</a></span> +<br />  <span class="subsectionToc" >13.10.6 <a +href="#x1-28700013.10.6" id="QQ2-1-313">check_ducc</a></span> +<br />  <span class="subsectionToc" >13.10.7 <a +href="#x1-29200013.10.7" id="QQ2-1-318">build_duccling</a></span> +<br />  <span class="subsectionToc" >13.10.8 <a +href="#x1-29300013.10.8" id="QQ2-1-319">ducc_disk_info</a></span> +<br />  <span class="subsectionToc" >13.10.9 <a +href="#x1-29700013.10.9" id="QQ2-1-323">ducc_gather_logs</a></span> +<br />  <span class="subsectionToc" >13.10.10 <a +href="#x1-30100013.10.10" id="QQ2-1-327">ducc_post_install</a></span> +<br />  <span class="subsectionToc" >13.10.11 <a +href="#x1-30400013.10.11" id="QQ2-1-330">ducc_update</a></span> +<br />  <span class="subsectionToc" >13.10.12 <a +href="#x1-30900013.10.12" id="QQ2-1-335">rm_reconfigure</a></span> +<br />  <span class="subsectionToc" >13.10.13 <a +href="#x1-31200013.10.13" id="QQ2-1-338">rm_qoccupancy</a></span> +<br />  <span class="subsectionToc" >13.10.14 <a +href="#x1-31500013.10.14" id="QQ2-1-341">vary_off</a></span> +<br />  <span class="subsectionToc" >13.10.15 <a +href="#x1-31800013.10.15" id="QQ2-1-344">vary_on</a></span> + + +<br />  <span class="subsectionToc" >13.10.16 <a +href="#x1-32100013.10.16" id="QQ2-1-347">ducc_properties_manager</a></span> +<br />  <span class="subsectionToc" >13.10.17 <a +href="#x1-32600013.10.17" id="QQ2-1-352">db_create</a></span> +<br />  <span class="subsectionToc" >13.10.18 <a +href="#x1-32800013.10.18" id="QQ2-1-354">db_loader</a></span> +<br />  <span class="subsectionToc" >13.10.19 <a +href="#x1-33400013.10.19" id="QQ2-1-360">db_tool</a></span> +<br />  <span class="subsectionToc" >13.10.20 <a +href="#x1-33900013.10.20" id="QQ2-1-365">ducc_get_process_swap_usage</a></span> +<br /> <span class="sectionToc" >13.11 <a +href="#x1-34300013.11" id="QQ2-1-369">Administrative Tasks</a></span> +<br />  <span class="subsectionToc" >13.11.1 <a +href="#x1-34400013.11.1" id="QQ2-1-370">Add Node</a></span> +<br />  <span class="subsectionToc" >13.11.2 <a +href="#x1-34600013.11.2" id="QQ2-1-372">Remove Node</a></span> +<br />  <span class="subsectionToc" >13.11.3 <a +href="#x1-34800013.11.3" id="QQ2-1-374">Notes</a></span> +<br /><span class="chapterToc" >14 <a +href="#x1-34900014" id="QQ2-1-375">Resource Management</a></span> +<br /> <span class="sectionToc" >14.1 <a +href="#x1-35000014.1" id="QQ2-1-376">Overview</a></span> +<br /> <span class="sectionToc" >14.2 <a +href="#x1-35100014.2" id="QQ2-1-377">Preemption vs Eviction</a></span> +<br /> <span class="sectionToc" >14.3 <a +href="#x1-35200014.3" id="QQ2-1-378">Scheduling Policies</a></span> +<br /> <span class="sectionToc" >14.4 <a +href="#x1-35300014.4" id="QQ2-1-379">Allotment</a></span> +<br /> <span class="sectionToc" >14.5 <a +href="#x1-35400014.5" id="QQ2-1-380">Priority vs Weight</a></span> +<br /> <span class="sectionToc" >14.6 <a +href="#x1-35700014.6" id="QQ2-1-383">Node Pools</a></span> +<br /> <span class="sectionToc" >14.7 <a +href="#x1-35800014.7" id="QQ2-1-384">Scheduling Classes</a></span> +<br /><span class="chapterToc" >15 <a +href="#x1-35900015" id="QQ2-1-385">Service Management</a></span> +<br /><span class="chapterToc" >16 <a +href="#x1-36000016" id="QQ2-1-386">DUCC Web Server Customization</a></span> +<br /> <span class="sectionToc" >16.1 <a +href="#x1-36100016.1" id="QQ2-1-387">Server Side</a></span> +<br /> <span class="sectionToc" >16.2 <a +href="#x1-36200016.2" id="QQ2-1-388">Client Side</a></span> +<br /> <span class="sectionToc" >16.3 <a +href="#x1-36300016.3" id="QQ2-1-389">Build and Install</a></span> +<br /><span class="chapterToc" >17 <a +href="#x1-36400017" id="QQ2-1-390">Reliable DUCC</a></span> +<br /> <span class="sectionToc" >17.1 <a +href="#x1-36500017.1" id="QQ2-1-391">Introduction</a></span> +<br /> <span class="sectionToc" >17.2 <a +href="#x1-36600017.2" id="QQ2-1-392">Configuring Host Machines</a></span> +<br /> <span class="sectionToc" >17.3 <a +href="#x1-36700017.3" id="QQ2-1-393">Configuring DUCC</a></span> +<br /> <span class="sectionToc" >17.4 <a +href="#x1-36800017.4" id="QQ2-1-394">Webserver</a></span> +<br /> <span class="sectionToc" >17.5 <a +href="#x1-36900017.5" id="QQ2-1-395">Database</a></span> +<br /><span class="chapterToc" >18 <a +href="#x1-37000018" id="QQ2-1-396">Simulation and System Testing</a></span> +<br /> <span class="sectionToc" >18.1 <a +href="#x1-37100018.1" id="QQ2-1-397">Cluster Simulation</a></span> +<br />  <span class="subsectionToc" >18.1.1 <a +href="#x1-37200018.1.1" id="QQ2-1-398">Overview</a></span> +<br />  <span class="subsectionToc" >18.1.2 <a +href="#x1-37300018.1.2" id="QQ2-1-399">Node Configuration</a></span> +<br />  <span class="subsectionToc" >18.1.3 <a +href="#x1-37400018.1.3" id="QQ2-1-400">Setting up Test Mode</a></span> +<br />  <span class="subsectionToc" >18.1.4 <a +href="#x1-37500018.1.4" id="QQ2-1-401">Starting a Simulated Cluster</a></span> +<br />  <span class="subsectionToc" >18.1.5 <a +href="#x1-37900018.1.5" id="QQ2-1-405">Stopping a Simulated Cluster</a></span> +<br /> <span class="sectionToc" >18.2 <a +href="#x1-38300018.2" id="QQ2-1-409">Job Simulation</a></span> +<br />  <span class="subsectionToc" >18.2.1 <a +href="#x1-38400018.2.1" id="QQ2-1-410">Overview</a></span> + + +<br />  <span class="subsectionToc" >18.2.2 <a +href="#x1-38500018.2.2" id="QQ2-1-411">Job meta-descriptors</a></span> +<br />  <span class="subsectionToc" >18.2.3 <a +href="#x1-38600018.2.3" id="QQ2-1-412"><span +class="cmti-10">Prepare </span>Descriptors</a></span> +<br />  <span class="subsectionToc" >18.2.4 <a +href="#x1-38700018.2.4" id="QQ2-1-413">Services</a></span> +<br />  <span class="subsectionToc" >18.2.5 <a +href="#x1-38900018.2.5" id="QQ2-1-415">Generating a Job Set</a></span> +<br />  <span class="subsectionToc" >18.2.6 <a +href="#x1-39000018.2.6" id="QQ2-1-416">Running the Test Driver</a></span> +<br /> <span class="sectionToc" >18.3 <a +href="#x1-39100018.3" id="QQ2-1-417">Pre-Packaged Tests</a></span> +<br /><span class="chapterToc" >19 <a +href="#x1-39200019" id="QQ2-1-418">State Directory</a></span> +<br /> <span class="sectionToc" >19.1 <a +href="#x1-39300019.1" id="QQ2-1-419">Overview</a></span> +<br /> <span class="sectionToc" >19.2 <a +href="#x1-39400019.2" id="QQ2-1-420">Backup</a></span> +<br /> <span class="sectionToc" >19.3 <a +href="#x1-39500019.3" id="QQ2-1-421">Sub-directories</a></span> +<br />  <span class="subsectionToc" >19.3.1 <a +href="#x1-39600019.3.1" id="QQ2-1-422">agents</a></span> +<br />  <span class="subsectionToc" >19.3.2 <a +href="#x1-39700019.3.2" id="QQ2-1-423">daemons</a></span> +<br />  <span class="subsectionToc" >19.3.3 <a +href="#x1-39800019.3.3" id="QQ2-1-424">database</a></span> +<br /> <span class="sectionToc" >19.4 <a +href="#x1-39900019.4" id="QQ2-1-425">Files</a></span> +<br />  <span class="subsectionToc" >19.4.1 <a +href="#x1-40000019.4.1" id="QQ2-1-426">cassandra.pid</a></span> +<br />  <span class="subsectionToc" >19.4.2 <a +href="#x1-40100019.4.2" id="QQ2-1-427">duccling.version</a></span> +<br />  <span class="subsectionToc" >19.4.3 <a +href="#x1-40200019.4.3" id="QQ2-1-428">orchestrator.properties</a></span> +<br />  <span class="subsectionToc" >19.4.4 <a +href="#x1-40300019.4.4" id="QQ2-1-429">orchestrator-state.json</a></span> +<br />  <span class="subsectionToc" >19.4.5 <a +href="#x1-40400019.4.5" id="QQ2-1-430">sm.properties</a></span> +<br /><span class="chapterToc" >20 <a +href="#x1-40500020" id="QQ2-1-431">Understanding the DUCC logs</a></span> +<br /> <span class="sectionToc" >20.1 <a +href="#x1-40600020.1" id="QQ2-1-432">Overview</a></span> +<br /> <span class="sectionToc" >20.2 <a +href="#x1-40700020.2" id="QQ2-1-433">Resource Manager Log (rm.log)</a></span> +<br />  <span class="subsectionToc" >20.2.1 <a +href="#x1-40800020.2.1" id="QQ2-1-434">Bootstrap Configuration</a></span> +<br />  <span class="subsectionToc" >20.2.2 <a +href="#x1-41300020.2.2" id="QQ2-1-439">Node Arrival and Missed Heartbeats</a></span> +<br />  <span class="subsectionToc" >20.2.3 <a +href="#x1-41600020.2.3" id="QQ2-1-442">Node Occupancy</a></span> +<br />  <span class="subsectionToc" >20.2.4 <a +href="#x1-41700020.2.4" id="QQ2-1-443">Job Arrival and Status Updates</a></span> +<br />  <span class="subsectionToc" >20.2.5 <a +href="#x1-42000020.2.5" id="QQ2-1-446">Calculation Of Job Caps</a></span> +<br />  <span class="subsectionToc" >20.2.6 <a +href="#x1-42100020.2.6" id="QQ2-1-447">The “how much” calculations</a></span> +<br />  <span class="subsectionToc" >20.2.7 <a +href="#x1-42200020.2.7" id="QQ2-1-448">The “what of” calculations</a></span> +<br />  <span class="subsectionToc" >20.2.8 <a +href="#x1-42300020.2.8" id="QQ2-1-449">Defragmentation</a></span> +<br />  <span class="subsectionToc" >20.2.9 <a +href="#x1-42400020.2.9" id="QQ2-1-450">Published Schedule</a></span> +<br /> <span class="sectionToc" >20.3 <a +href="#x1-42700020.3" id="QQ2-1-453">Service Manager Log (sm.log)</a></span> +<br />  <span class="subsectionToc" >20.3.1 <a +href="#x1-42800020.3.1" id="QQ2-1-454">Bootstrap configuration</a></span> +<br />  <span class="subsectionToc" >20.3.2 <a +href="#x1-43300020.3.2" id="QQ2-1-459">Receipt and analysis of Orchestrator State</a></span> +<br />  <span class="subsectionToc" >20.3.3 <a +href="#x1-43400020.3.3" id="QQ2-1-460">CLI Requests</a></span> +<br />  <span class="subsectionToc" >20.3.4 <a +href="#x1-43500020.3.4" id="QQ2-1-461">Dispatching / Startup of Service Instances</a></span> +<br />  <span class="subsectionToc" >20.3.5 <a +href="#x1-43600020.3.5" id="QQ2-1-462">Progression of Service State</a></span> + + +<br />  <span class="subsectionToc" >20.3.6 <a +href="#x1-43700020.3.6" id="QQ2-1-463">Starting and Logging Pingers</a></span> +<br />  <span class="subsectionToc" >20.3.7 <a +href="#x1-43800020.3.7" id="QQ2-1-464">Publishing State</a></span> +<br /> <span class="sectionToc" >20.4 <a +href="#x1-43900020.4" id="QQ2-1-465">Orchestrator Log (or.log)</a></span> +<br /> <span class="sectionToc" >20.5 <a +href="#x1-44000020.5" id="QQ2-1-466">Process Manager Log (pm.log)</a></span> +<br /> <span class="sectionToc" >20.6 <a +href="#x1-44100020.6" id="QQ2-1-467">Agent log Log (hostname.agent.log)</a></span> +</div> + + +<h2 class="likechapterHead"><a + id="x1-4000"></a>List of Figures</h2><div class="tableofcontents"><span class="lofToc" >1.1 <a +href="#x1-10001r1">Standard UIMA Pipeline</a></span><br /><span class="lofToc" >1.2 <a +href="#x1-11001r2">UIMA Pipeline As Scaled by +UIMA-AS</a></span><br /><span class="lofToc" >1.3 <a +href="#x1-12001r3">UIMA Pipeline As Automatically Scaled Out By DUCC</a></span><br /><span class="lofToc" >1.4 <a +href="#x1-13001r4">UIMA Pipeline +With User-Supplied DD as Automatically Scaled Out By DUCC</a></span><br /><span class="lofToc" >5.1 <a +href="#x1-104001r1">Sample UIMA-AS +Service Pinger</a></span><br /><span class="lofToc" >8.1 <a +href="#x1-148001r1">Sample Webserver Page</a></span><br /><span class="lofToc" >8.2 <a +href="#x1-152001r2">Preferences Page</a></span><br /><span class="lofToc" >8.3 <a +href="#x1-155001r3">Jobs Page</a></span><br /><span class="lofToc" >8.4 <a +href="#x1-157004r4">Processes +Tab</a></span><br /><span class="lofToc" >8.5 <a +href="#x1-158001r5">Work Items Tab</a></span><br /><span class="lofToc" >8.6 <a +href="#x1-159001r6">Performance Tab</a></span><br /><span class="lofToc" >8.7 <a +href="#x1-160001r7">Specification Tab</a></span><br /><span class="lofToc" >8.8 <a +href="#x1-162001r8">Reservations +Page</a></span><br /><span class="lofToc" >8.9 <a +href="#x1-179001r9">Visualization</a></span><br /><span class="lofToc" >10.1 <a +href="#x1-204001r1">OpenNLP Process Measurements</a></span><br /><span class="lofToc" >10.2 <a +href="#x1-204002r2">OpenNLP +Process Breakdown</a></span><br /><span class="lofToc" >11.1 <a +href="#x1-209001r1">CAS Input Processing Performance</a></span><br /><span class="lofToc" >13.1 <a +href="#x1-245004r1">Nodepool +Example</a></span><br /><span class="lofToc" >13.2 <a +href="#x1-245007r2">Nodepools: Overlapping Pools are Incorrect</a></span><br /><span class="lofToc" >13.3 <a +href="#x1-245008r3">Nodepools: Multiple +top-level Nodepools</a></span><br /><span class="lofToc" >13.4 <a +href="#x1-247007r4">Sample Nodepool Configuration</a></span><br /><span class="lofToc" >13.5 <a +href="#x1-247008r5">Sample Nodepool +regex specification</a></span><br /><span class="lofToc" >13.6 <a +href="#x1-247009r6">Sample file contents for jobdriver.nodes.regex </a></span><br /><span class="lofToc" >13.7 <a +href="#x1-248001r7">Sample +Class Configuration</a></span><br /><span class="lofToc" >13.8 <a +href="#x1-252001r8">Sample Node Configuration</a></span><br /><span class="lofToc" >13.9 <a +href="#x1-253001r9">Sample User Registration</a></span><br /> +</div> + + + + +<!--l. 83--><p class="noindent" > + + +<h1 class="partHead"><span class="titlemark">Part I<br /></span><a + id="x1-5000I"></a>DUCC Concepts</h1> +<!--l. 22--><p class="noindent" ><a name='DUCC_OVERVIEW'></a> + + +<h2 class="chapterHead"><span class="titlemark">Chapter 1</span><br /><a + id="x1-60001"></a>DUCC Overview</h2> +<h3 class="sectionHead"><span class="titlemark">1.1 </span> <a + id="x1-70001.1"></a>What is DUCC?</h3> +<!--l. 28--><p class="noindent" >DUCC stands for Distributed UIMA Cluster Computing. DUCC is a cluster management system providing +tooling, management, and scheduling facilities to automate the scale-out of applications written to the UIMA +framework. +<!--l. 32--><p class="noindent" >Core UIMA provides a generalized framework for applications that process unstructured information such as human +language, but does not provide a scale-out mechanism. UIMA-AS provides a scale-out mechanism to distribute UIMA +pipelines over a cluster of computing resources, but does not provide job or cluster management of the resources. +DUCC defines a formal job model that closely maps to a standard UIMA pipeline. Around this job model +DUCC provides cluster management services to automate the scale-out of UIMA pipelines over computing +clusters. +<!--l. 39--><p class="noindent" >As of DUCC version 3.0.0 both UIMAv2 and UIMAv3 applications are supported. Although DUCC distributes a UIMA-AS +runtime, it only uses UIMA classes specified by the user’s application classpath. +<!--l. 44--><p class="noindent" > +<h3 class="sectionHead"><span class="titlemark">1.2 </span> <a + id="x1-80001.2"></a>DUCC Job Model</h3> +<!--l. 46--><p class="noindent" >The Job Model defines the steps necessary to scale-up a UIMA pipeline using DUCC. The goal of DUCC is to +scale-up any UIMA pipeline, including pipelines that must be deployed across multiple machines using shared +services. +<!--l. 50--><p class="noindent" >The DUCC Job model consists of standard UIMA components: a Collection Reader (CR), a CAS Multiplier (CM), +application logic as implemented one or more Analysis Engines (AE), and a CAS Consumer (CC). +<!--l. 54--><p class="noindent" >The Collection Reader builds input CASs and forwards them to the UIMA pipelines. In the DUCC model, the CR is run in a +process separate from the rest of the pipeline. In fact, in all but the smallest clusters it is run on a different physical machine +than the rest of the pipeline. To achieve scalability, the CR must create very small CASs that do not contain application +data, but which contain references to data; for instance, file names. Ideally, the CR should be runnable in a process +not much larger than the smallest Java virtual machine. Later sections demonstrate methods for achieving +this. +<!--l. 62--><p class="noindent" >Each pipeline must contain at least one CAS Multiplier which receives the CASs from the CR. The CMs encapsulate the +knowledge of how to receive the data references in the small CASs received from the CRs and deliver the referenced data to +the application pipeline. DUCC packages the CM, AE(s), and CC into a single process, multiple instances of which are then +deployed over the cluster. +<!--l. 68--><p class="noindent" >A DUCC job therefore consists of a small specification containing the following items: + <ul class="itemize1"> + <li class="itemize">The name of a resource containing the CR descriptor. + </li> + <li class="itemize">The name of a resource containing the CM descriptor. + </li> + <li class="itemize">The name of a resource containing the AE descriptor. + </li> + <li class="itemize">The name of a resource containing the CC descriptor. + + + </li> + <li class="itemize">Other information required to parameterize the above and identify the job such as log directory, working + directory, desired scale-out, classpath, etc. These are described in detail in subsequent sections.</li></ul> +<!--l. 80--><p class="noindent" >On job submission, DUCC creates a single process executing the CR and one or more processes containing the analysis +pipeline. +<!--l. 83--><p class="noindent" >DUCC provides other facilities in support of scale-out: + <ul class="itemize1"> + <li class="itemize">The ability to reserve all or part of a node in the cluster. + </li> + <li class="itemize">Automated management of services required in support of jobs. + </li> + <li class="itemize">The ability to schedule and execute arbitrary processes on nodes in the cluster. + </li> + <li class="itemize">Debugging tools and support. + </li> + <li class="itemize">A web server to display and manage work and cluster status. + </li> + <li class="itemize">A CLI and a Java API to support the above.</li></ul> +<!--l. 94--><p class="noindent" > +<h3 class="sectionHead"><span class="titlemark">1.3 </span> <a + id="x1-90001.3"></a>DUCC From UIMA to Full Scale-out</h3> +<!--l. 96--><p class="noindent" >In this section we demonstrate the progression of a simple UIMA pipeline to a fully scaled-out job running under +DUCC. +<!--l. 99--><p class="noindent" ><span class="paragraphHead"><a + id="x1-100001.3"></a><span +class="cmbx-10">UIMA Pipelines</span></span> +A normal UIMA pipeline contains a Collection Reader (CR), one or more Analysis Engines (AE) connected in a pipeline, and +a CAS Consumer (CC) as shown in <a +href="#x1-10001r1">Figure  1.1</a>. +<!--l. 104--><p class="noindent" ><hr class="figure"><div class="figure" +> + + +<a + id="x1-10001r1"></a> + + + +<!--l. 106--><p class="noindent" ><img +src="images/uima-pipeline.jpg" alt="PIC" +> +<br /> <div class="caption" +><span class="id">Figure 1.1: </span><span +class="content">Standard UIMA Pipeline</span></div><!--tex4ht:label?: x1-10001r1 --> + + +<!--l. 109--><p class="noindent" ></div><hr class="endfigure"> +<!--l. 111--><p class="noindent" ><span class="paragraphHead"><a + id="x1-110001.3"></a><span +class="cmbx-10">UIMA-AS Scaled Pipeline</span></span> +With UIMA-AS the CR is separated into a discrete process and a CAS Multiplier (CM) is introduced into the pipeline as an +interface between the CR and the pipeline, as shown in <a +href="#x1-11001r2">Figure  1.2</a> below. Multiple pipelines are serviced by the CR and are +scaled-out over a computing cluster. The difficulty with this model is that each user is individually responsible for finding and +scheduling computing nodes, installing communication software such as ActiveMQ, and generally managing the distributed +job and associated hardware. +<!--l. 121--><p class="noindent" ><hr class="figure"><div class="figure" +> + + +<a + id="x1-11001r2"></a> + + + +<!--l. 123--><p class="noindent" ><img +src="images/uima-as-pipeline.png" alt="PIC" +> +<br /> <div class="caption" +><span class="id">Figure 1.2: </span><span +class="content">UIMA Pipeline As Scaled by UIMA-AS</span></div><!--tex4ht:label?: x1-11001r2 --> + + +<!--l. 126--><p class="noindent" ></div><hr class="endfigure"> +<!--l. 128--><p class="noindent" ><span class="paragraphHead"><a + id="x1-120001.3"></a><span +class="cmbx-10">UIMA Pipeline Scaled By DUCC</span></span> +DUCC is a UIMA and UIMA-AS-aware cluster manager. To scale out work under DUCC the developer tells DUCC what +the parts of the application are, and DUCC does the work to build the scale-out via UIMA/AS, to find and +schedule resources, to deploy the parts of the application over the cluster, and to manage the jobs while it +executes. +<!--l. 134--><p class="noindent" >On job submission, the CR is wrapped with a DUCC main class and launched as a Job Driver (or JD). The DUCC main +class establishes communication with other DUCC components and instantiates the CR. If the CR initializes +successfully, and indicates that there are greater than 0 work items to process, the specified CM, AE and CC +components are assembled into an aggregate, wrapped with a DUCC main class, and launched as a Job Process (or +JP). +<!--l. 140--><p class="noindent" >The JP will replicate the aggregate as many times as specified, each aggregate instance running in a single thread. When the +aggregate initializes, and whenever an aggregate thread needs work, the JP wrapper will fetch the next work item from the +JD, as shown in <a +href="#x1-12001r3">Figure  1.3</a> below. +<!--l. 145--><p class="noindent" ><hr class="figure"><div class="figure" +> + + +<a + id="x1-12001r3"></a> + + + +<!--l. 147--><p class="noindent" ><img +src="images/ducc-sequential.png" alt="PIC" +> +<br /> <div class="caption" +><span class="id">Figure 1.3: </span><span +class="content">UIMA Pipeline As Automatically Scaled Out By DUCC</span></div><!--tex4ht:label?: x1-12001r3 --> + + +<!--l. 150--><p class="noindent" ></div><hr class="endfigure"> +<!--l. 152--><p class="noindent" ><span class="paragraphHead"><a + id="x1-130001.3"></a><span +class="cmbx-10">UIMA Pipeline with User-Supplied DD Scaled By DUCC</span></span> +Application programmers may supply their own Deployment Descriptors to control intra-process threading and scale-out. If a +DD is specified in the job parameters, DUCC will launch each JP with the specified UIMA-AS service instantiated in-process, +as depicted in <a +href="#x1-13001r4">Figure  1.4</a> below. In this case the user can still specify how many work items to deliver to the service +concurrently. +<!--l. 160--><p class="noindent" ><hr class="figure"><div class="figure" +> + + +<a + id="x1-13001r4"></a> + + + +<!--l. 162--><p class="noindent" ><img +src="images/ducc-parallel.png" alt="PIC" +> +<br /> <div class="caption" +><span class="id">Figure 1.4: </span><span +class="content">UIMA Pipeline With User-Supplied DD as Automatically Scaled Out By DUCC</span></div><!--tex4ht:label?: x1-13001r4 --> + + +<!--l. 165--><p class="noindent" ></div><hr class="endfigure"> +<h3 class="sectionHead"><span class="titlemark">1.4 </span> <a + id="x1-140001.4"></a>Error Management </h3> +<!--l. 169--><p class="noindent" >DUCC provides a number of facilities to assist error management: + <ul class="itemize1"> + <li class="itemize">DUCC captures exceptions in the JPs and delivers them to the Job Drivers. The JD wrappers implement logic + to enforce error thresholds, to identify and log errors, and to reflect job problems in the DUCC Web Server. + Error thresholds are configurable both globally and on a per-job basis. + </li> + <li class="itemize">Error and timeout thresholds are implemented for both the initialization phase of a pipeline and the execution + phase. + </li> + <li class="itemize">Retry-after-error is supported: if a process has a failure on some CAS after initialization is successful, the + process is terminated and all affected CASs are retried, up to some configurable threshold. + </li> + <li class="itemize">To avoid disrupting existing workloads by a job that will fail to run, DUCC ensures that JD and JP processes + can successfully initialize before fully scaling out a job. + </li> + <li class="itemize">Various error conditions encountered while a job is running will prevent a problematic job from continuing + scale out, and can result in termination of the job.</li></ul> +<!--l. 191--><p class="noindent" > +<h3 class="sectionHead"><span class="titlemark">1.5 </span> <a + id="x1-150001.5"></a>Cluster and Job Management</h3> +<!--l. 192--><p class="noindent" >DUCC supports management of multiple jobs and multiple users in a distributed cluster: + <dl class="description"><dt class="description"> +<span +class="cmbx-10">Multiple User Support</span> </dt><dd +class="description">When properly configured, DUCC runs all work under the identity of the submitting + user. Logs are written with the user’s credentials into the user’s file space designated at job submission. + </dd><dt class="description"> +<span +class="cmbx-10">Fair-Share Scheduling</span> </dt><dd +class="description">DUCC provides a Fair-Share scheduler to equitably share resources among multiple users. + The scheduler also supports semi-permanent reservation of machines. + </dd><dt class="description"> +<span +class="cmbx-10">Service Management</span> </dt><dd +class="description">DUCC provides a Service Manager capable of automatically starting, stopping, and + otherwise managing and querying both UIMA-AS and non-UIMA-AS services in support of jobs. + </dd><dt class="description"> +<span +class="cmbx-10">Job Lifetime Management and Orchestration</span> </dt><dd +class="description">DUCC includes an Orchestrator to manage the lifetimes of all + entities in the system. + </dd><dt class="description"> +<span +class="cmbx-10">Node Sharing</span> </dt><dd +class="description">DUCC allocates processes for one or more users on a node, each with a specified amount of memory. + DUCC’s preferred mechanism for constraining memory use is Linux Control Groups, or CGroups. For nodes + that do not support CGroups, DUCC agents monitor RAM use and kill processes that exceed their share size + by a settable fudge factor. + + + </dd><dt class="description"> +<span +class="cmbx-10">DUCC Agents</span> </dt><dd +class="description">DUCC Agents manage each node’s local resources and all processes started by DUCC. Each node in a + cluster has exactly one Agent. The Agent + <ul class="itemize1"> + <li class="itemize">Monitors and reports node capabilities (memory, etc) and performance data (CPU busy, swap, etc). + </li> + <li class="itemize">Starts, stops, and monitors all processes on behalf of users. + </li> + <li class="itemize">Patrols the node for “foreign” (non-DUCC) processes, reporting them to the Web Server, and optionally + reaping them. + </li> + <li class="itemize">Ensures job processes do not exceed their declared memory requirements through the use of Linux + CGroups.</li></ul> + </dd><dt class="description"> +<span +class="cmbx-10">DUCC Web server</span> </dt><dd +class="description">DUCC provides a web server displaying all aspects of the system: + <ul class="itemize1"> + <li class="itemize">All jobs in the system, their current state, resource usage, etc. + </li> + <li class="itemize">All reserved resources and associated information (owner, etc.), including the ability to request and cancel + reservations. + </li> + <li class="itemize">All services, including the ability to start, stop, and modify service definitions. + </li> + <li class="itemize">All nodes in the system and their status, usage, etc. + </li> + <li class="itemize">The status of all DUCC management processes. + </li> + <li class="itemize">Access to documentation.</li></ul> + </dd><dt class="description"> +<span +class="cmbx-10">Cluster Management Support</span> </dt><dd +class="description">DUCC provides system management support to: + <ul class="itemize1"> + <li class="itemize">Start, stop, and query full DUCC systems. + </li> + <li class="itemize">Start, stop, and quiesce individual DUCC components. + </li> + <li class="itemize">Add and delete nodes from the DUCC system. + </li> + <li class="itemize">Discover DUCC processes (e.g. after partial failures). + </li> + <li class="itemize">Find and kill errant job processes belonging to individual users. + </li> + <li class="itemize">Monitor and display inter-DUCC messages.</li></ul> + </dd></dl> + + +<!--l. 261--><p class="noindent" > +<h3 class="sectionHead"><span class="titlemark">1.6 </span> <a + id="x1-160001.6"></a>Security Measures</h3> +<!--l. 262--><p class="noindent" >The following DUCC security measures are provided: + <dl class="description"><dt class="description"> +<span +class="cmbx-10">user credentials</span> </dt><dd +class="description">DUCC instantiates user processes using a setuid root executable named ducc_ling. See more at + <a +href="#x1-170001.6.1"><span +class="cmti-10">ducc</span><span +class="cmti-10">_ling</span></a>. + </dd><dt class="description"> +<span +class="cmbx-10">command line interface</span> </dt><dd +class="description">The CLI employs HTTP to send requests to the DUCC controller. The CLI creates and + employs public and private security keys in the user’s home directory for authentication of HTTP requests. + The controller validates requests via these same security keys. + </dd><dt class="description"> +<span +class="cmbx-10">webserver</span> </dt><dd +class="description">The webserver facilitates operational control and therefore authentication is desirable. + <ul class="itemize1"> + <li class="itemize">Each user has the ability to control certain aspects of only his/her active submissions. + </li> + <li class="itemize">Each administrator has the ability to control certain aspects of any user’s active submissions, as well as + modification of some DUCC operational characteristics.</li></ul> + <!--l. 280--><p class="noindent" >A simple interface is provided so that an installation can plug-in a site specific authentication mechanism comprising + userid and password. + </dd><dt class="description"> +<span +class="cmbx-10">ActiveMQ</span> </dt><dd +class="description">DUCC uses ActiveMQ for administrative communication. AMQ authentication is used to prevent arbitrary + processes from participating. But when testing DUCC on a simulated cluster the AMQ broker runs without any access + restrictions so that it can be used as an application broker for UIMA-AS services used in simulation tests. See + <a +href="#x1-37500018.1.4"><span +class="cmti-10">start</span><span +class="cmti-10">_sim</span></a>.</dd></dl> +<!--l. 290--><p class="noindent" > +<h4 class="subsectionHead"><span class="titlemark">1.6.1 </span> <a + id="x1-170001.6.1"></a>ducc_ling</h4> +<!--l. 292--><p class="noindent" >ducc_ling contains the following functions, which the security-conscious may verify by examining the source in +$DUCC_HOME/duccling. All sensitive operations are performed only AFTER switching userids, to prevent unauthorized +root access to the system. + <ul class="itemize1"> + <li class="itemize">Changes it’s real and effective userid to that of the user invoking the job. + </li> + <li class="itemize">Optionally redirects its stdout and stderr to the DUCC log for the current job. + </li> + <li class="itemize">Optionally redirects its stdio to a port set by the CLI, when a job is submitted. + </li> + <li class="itemize">“Nice”s itself to a “worse” priority than the default, to reduce the chances that a runaway DUCC job could + monopolize a system. + </li> + <li class="itemize">Optionally sets user limits. + </li> + <li class="itemize">Prints the effective limits for a job to both the user’s log, and the DUCC agent’s log. + + + </li> + <li class="itemize">Changes to the user’s working directory, as specified by the job. + </li> + <li class="itemize">Optionally establishes LD_LIBRARY_PATH for the job from the environment variable <span +class="cmtt-10">DUCC</span><span +class="cmtt-10">_LD</span><span +class="cmtt-10">_LIBRARY</span><span +class="cmtt-10">_PATH</span> + if set in the DUCC job specification. (Secure Linux systems will prevent LD_LIBRARY_PATH from being set + by a program with root authority, so this is done AFTER changing userids). + </li> + <li class="itemize">ONLY user <span +class="cmti-10">ducc </span>may use the ducc_ling program in a privileged way. Ducc_ling contains checks to prevent even + user <span +class="cmti-10">root </span>from using it for privileged operations. + </li></ul> +<!--l. 317--><p class="noindent" > +<h3 class="sectionHead"><span class="titlemark">1.7 </span> <a + id="x1-180001.7"></a>Security Issues</h3> +<!--l. 318--><p class="noindent" >The following DUCC security issues should be considered: + <dl class="description"><dt class="description"> +<span +class="cmbx-10">submit transmission ’sniffed’</span> </dt><dd +class="description">In the event that the DUCC submit command is ’sniffed’ then the user + authentication mechanism is compromised and user masquerading is possible. That is, the userid encryption + mechanism can be exploited such that user A can submit a job pretending to be user B. + </dd><dt class="description"> +<span +class="cmbx-10">user </span><span +class="cmbxti-10">ducc </span><span +class="cmbx-10">password compromised</span> </dt><dd +class="description">In the event that the <span +class="cmti-10">ducc </span>user password is compromised then the root + privileged command <span +class="cmbx-10">ducc</span><span +class="cmbx-10">_ling </span>can be used to become any other user except root. + </dd><dt class="description"> +<span +class="cmbx-10">user </span><span +class="cmbxti-10">root </span><span +class="cmbx-10">password compromised</span> </dt><dd +class="description">In the event that the <span +class="cmti-10">root </span>user password is compromised DUCC provides no + protection. That is, compromising the root user is equivalent to compromising the DUCC user password.</dd></dl> +<!--l. 22--><p class="noindent" ><a name='DUCC_TERMINOLOGY'></a> + + +<h2 class="chapterHead"><span class="titlemark">Chapter 2</span><br /><a + id="x1-190002"></a>Glossary</h2> + <dl class="description"><dt class="description"> +<span +class="cmbx-10">Agent</span> </dt><dd +class="description">DUCC Agent processes run on every node in the system. The Agent receives orders to start and stop processes + on each node. Agents monitors nodes, sending heartbeat packets with node statistics to interested components + (such as the RM and web-server). If CGroups are installed in the cluster, the Agent is responsible for managing + the CGroups for each job process. All processes other than the DUCC management processes are are managed + as children of the agents. + </dd><dt class="description"> +<span +class="cmbx-10">Autostarted Service</span> </dt><dd +class="description">An autostarted service is a registered service that is started automatically by DUCC when + the DUCC system is booted. + </dd><dt class="description"> +<span +class="cmbx-10">Dependent Service or Job</span> </dt><dd +class="description">A dependent service or job is a service or job that specifies one or more service + dependencies in their job specification. The service or job is dependent upon the referenced service being + operational before being started by DUCC. + </dd><dt class="description"> +<span +class="cmbx-10">DUCC</span> </dt><dd +class="description">Distributed UIMA Cluster Computing. + </dd><dt class="description"> +<span +class="cmbx-10">DUCC-MON</span> </dt><dd +class="description">DUCC-MON is the DUCC web-server. + </dd><dt class="description"> +<span +class="cmbx-10">Job</span> </dt><dd +class="description">A DUCC job consists of the components required to deploy and execute a UIMA pipeline over a computing + cluster. It consists of a JD to run the Collection Reader, a set of JPs to run the UIMA AEs, and a Job + Specification to describe how the parts fit together. + </dd><dt class="description"> +<span +class="cmbx-10">Job Driver (JD)</span> </dt><dd +class="description">The Job Driver is a thin wrapper that encapsulates a Job’s Collection Reader. The JD executes + as a process that is scheduled and deployed by DUCC. + </dd><dt class="description"> +<span +class="cmbx-10">Job Process (JP)</span> </dt><dd +class="description">The Job Process is a thin wrapper that encapsulates a job’s pipeline components. The JP + executes in a process that is scheduled and deployed by DUCC. + </dd><dt class="description"> +<span +class="cmbx-10">Job Specification</span> </dt><dd +class="description">The Job Specification is a collection of properties that describe work to be scheduled and + deployed by DUCC. It identifies the UIMA components (CR, AE, etc) that comprise the job and the system-wide + properties of the job (CLASSPATHs, RAM requirements, etc). + </dd><dt class="description"> +<span +class="cmbx-10">Machine</span> </dt><dd +class="description">A physical computing resource managed by the DUCC Resource Manager. + </dd><dt class="description"> +<span +class="cmbx-10">Managed Reservation</span> </dt><dd +class="description">A DUCC managed reservation comprises an arbitrary process that is deployed on the + computing cluster within a <span +class="cmti-10">share </span>assigned by the DUCC scheduler. + </dd><dt class="description"> +<span +class="cmbx-10">Node</span> </dt><dd +class="description">See Machine. + </dd><dt class="description"> +<span +class="cmbx-10">Orchestrator (OR)</span> </dt><dd +class="description">The Orchestrator manages the life cycle of all entities within DUCC. + </dd><dt class="description"> +<span +class="cmbx-10">Process</span> </dt><dd +class="description">A process is one physical process executing on a machine in the DUCC cluster. DUCC jobs are comprised + of one or more processes (JDs and JPs). Each process is assigned one or more <span +class="cmti-10">shares </span>by the DUCC scheduler. + </dd><dt class="description"> +<span +class="cmbx-10">Process Manager (PM)</span> </dt><dd +class="description">The Process Manager coordinates distribution of work among the Agents. + </dd><dt class="description"> + + +<span +class="cmbx-10">Registered Service</span> </dt><dd +class="description">A registered service is a service that is registered with DUCC. DUCC saves the service + specification and fully manages the service, insuring it is running when needed, and shutdown when not. + </dd><dt class="description"> +<span +class="cmbx-10">Resource Manager (RM)</span> </dt><dd +class="description">The Resource Manager schedules physical resources for DUCC work. + </dd><dt class="description"> +<span +class="cmbx-10">Service Endpoint</span> </dt><dd +class="description">In DUCC, the service endpoint provides a unique identifier for a service. In the case of UIMA-AS + services, the endpoint also serves as a well-known address for contacting the service. + </dd><dt class="description"> +<span +class="cmbx-10">Service Instance</span> </dt><dd +class="description">A service instance is one physical process which runs a CUSTOM or UIMA-AS service. UIMA-AS + services are usually scaled-out with multiple instances implementing the same underlying service logic. + </dd><dt class="description"> +<span +class="cmbx-10">Service Manager (SM)</span> </dt><dd +class="description">The Service Manager manages the life-cycles of UIMA-AS and CUSTOM services. It + coordinates registration of services, starting and stopping of services, and ensures that services are available + and remain available for the lifetime of the jobs. + </dd><dt class="description"> +<span +class="cmbx-10">Share Quantum</span> </dt><dd +class="description">The DUCC scheduler abstracts the nodes in the cluster as a single large conglomerate of resources: + memory, processor cores, etc. The scheduler logically decomposes the collection of resources into some number + of equal-sized atomic units. Each unit of work requiring resources is apportioned one or more of these atomic + units. The smallest possible atomic unit is called the <span +class="cmti-10">share quantum</span>, or simply, <span +class="cmti-10">share</span>. + </dd><dt class="description"> +<span +class="cmbx-10">Weighted Fair Share</span> </dt><dd +class="description">A weighted fair share calculation is used to apportion resources equitably to the outstanding + work in the system. In a non-weighted fair-share system, all work requests are given equal consideration to all + resources. To provide some (“more important”) work more than equal resources, weights are used to bias the + allotment of shares in favor of some classes of work. + </dd><dt class="description"> +<span +class="cmbx-10">Work Items</span> </dt><dd +class="description">A DUCC work item is one unit of work to be completed in a single DUCC process. It is usually + initiated by the submission of a single CAS from the JD to one of the JPs. It could be thought of as a single + “question” to be answered by a UIMA analytic, or a single “task” to complete. Usually each DUCC JP executes + many work items per job. + </dd><dt class="description"> +<span +class="cmbx-10">$DUCC</span><span +class="cmbx-10">_HOME</span> </dt><dd +class="description">The root of the installed DUCC runtime, e.g. /home/ducc/ducc_runtime. It need not be set in + the environment, although the examples in this document assume that it has been. + </dd></dl> + + +<!--l. 87--><p class="noindent" > + + +<h1 class="partHead"><span class="titlemark">Part II<br /></span><a + id="x1-20000II"></a>Ducc Users Guide</h1> +<!--l. 23--><p class="noindent" ><a name='DUCC_CLI'></a> + + +<h2 class="chapterHead"><span class="titlemark">Chapter 3</span><br /><a + id="x1-210003"></a>Command Line Interface</h2> +<!--l. 28--><p class="noindent" ><span class="paragraphHead"><a + id="x1-220003"></a><span +class="cmbx-10">Overview</span></span> +The DUCC CLI is the primary means of communication with DUCC. Work is submitted, work is canceled, work is +monitored, and work is queried with this interface. +<!--l. 32--><p class="noindent" >All parameters may be passed to all the CLI commands in the form of Unix-like “long-form” (key, value) pairs, in which the +key is proceeded by the characters “<span +class="cmsy-10">--</span>”. As well, the parameters may be saved in a standard Java Properties file, without +the leading “<span +class="cmsy-10">--</span>” characters. Both a properties file and command-line parameters may be passed to each CLI. +When both are present, the parameters on the command line take precedence. Take, for example the following +simple job properties file, call it <span +class="cmtt-10">1.job</span>, where the environment variable “DH” has been set to the location of +$DUCC_HOME. + + +<div class="verbatim" id="verbatim-1"> +description                    Test job 1 + <br /> + <br />classpath                      ${DH}/lib/uima-ducc/examples/* + <br />environment                    AE_INIT_TIME=5 AE_INIT_RANGE=5 LD_LIBRARY_PATH=/a/nother/path + <br />scheduling_class               normal + <br /> + <br />driver_descriptor_CR           org.apache.uima.ducc.test.randomsleep.FixedSleepCR + <br />driver_descriptor_CR_overrides jobfile=${DH}/lib/examples/simple/1.inputs compression=10 + <br />error_rate=0.0 + <br /> + <br />driver_jvm_args                -Xmx500M + <br /> + <br />process_descriptor_AE          org.apache.uima.ducc.test.randomsleep.FixedSleepAE + <br />process_memory_size            4 + <br />process_jvm_args               -Xmx100M + <br />process_pipeline_count         2 + <br />process_per_item_time_max      5 + <br />process_deployments_max        999 + <br /></div> +<!--l. 59--><p class="nopar" > +<!--l. 61--><p class="noindent" >This can be submitted, overriding the scheduling class and memory, thus: + + +<div class="verbatim" id="verbatim-2"> +ducc_submit --specification 1.job --process_memory_size 16 --scheduling_class high</div> +<!--l. 64--><p class="nopar" > +<!--l. 66--><p class="noindent" >The DUCC CLI parameters are now described in detail. +<!--l. 68--><p class="noindent" > +<h3 class="sectionHead"><span class="titlemark">3.1 </span> <a + id="x1-230003.1"></a>The DUCC Job Descriptor</h3> +<!--l. 69--><p class="noindent" >The DUCC Job Descriptor includes properties to enable automated management and scale-out over large computing clusters. +The job descriptor includes + <ul class="itemize1"> + <li class="itemize">References to the various UIMA components required by the job (CR, CM, AE, CC, and maybe DD) + </li> + <li class="itemize">Scale-out requirements: number of processes, number of threads per process, etc + </li> + <li class="itemize">Environment requirements: log directory, working directory, environment variables, etc, + </li> + <li class="itemize">JVM parameters + </li> + <li class="itemize">Scheduling class + </li> + <li class="itemize">Error-handling preferences: acceptable failure counts, timeouts, etc + </li> + <li class="itemize">Debugging and monitoring requirements and preferences</li></ul> +<!--l. 81--><p class="noindent" > +<h3 class="sectionHead"><span class="titlemark">3.2 </span> <a + id="x1-240003.2"></a>Operating System Limit Support</h3> +<!--l. 82--><p class="noindent" >The CLI supports specification of operating system limits applied to the various job processes. To specify a limit, pass the +name of the limit and its value in the <span +class="cmti-10">environment </span>specified in the job. Limits are named with the string +“DUCC_RLIMIT_name” where “name” is the name of a specific limit. Supported limits include: + <ul class="itemize1"> + <li class="itemize">DUCC_RLIMIT_CORE + </li> + <li class="itemize">DUCC_RLIMIT_CPU + </li> + <li class="itemize">DUCC_RLIMIT_DATA + </li> + <li class="itemize">DUCC_RLIMIT_FSIZE + + + </li> + <li class="itemize">DUCC_RLIMIT_MEMLOCK + </li> + <li class="itemize">DUCC_RLIMIT_NOFILE + </li> + <li class="itemize">DUCC_RLIMIT_NPROC + </li> + <li class="itemize">DUCC_RLIMIT_RSS + </li> + <li class="itemize">DUCC_RLIMIT_STACK + </li> + <li class="itemize">DUCC_RLIMIT_AS + </li> + <li class="itemize">DUCC_RLIMIT_LOCKS + </li> + <li class="itemize">DUCC_RLIMIT_SIGPENDING + </li> + <li class="itemize">DUCC_RLIMIT_MSGQUEUE + </li> + <li class="itemize">DUCC_RLIMIT_NICE + </li> + <li class="itemize">DUCC_RLIMIT_STACK + </li> + <li class="itemize">DUCC_RLIMIT_RTPRIO</li></ul> +<!--l. 104--><p class="noindent" >See the Linux documentation for details on the meanings of these limits and their values. +<!--l. 106--><p class="noindent" >For example, to set the maximum number of open files allowed in any job process, specify an environment similar to this +when submitting the job: + + +<div class="verbatim" id="verbatim-3"> +     ducc_submit .... --environment="DUCC_RLIMIT_NOFILE=1024" ...</div> +<!--l. 110--><p class="nopar" > +<!--l. 112--><p class="noindent" > +<h3 class="sectionHead"><span class="titlemark">3.3 </span> <a + id="x1-250003.3"></a>Command Line Forms</h3> +<!--l. 113--><p class="noindent" >The Command Line Interface is provided in several forms: +<!--l. 116--><p class="noindent" > + <dl class="enumerate-enumitem"><dt class="enumerate-enumitem"> + 1. </dt><dd +class="enumerate-enumitem">A wrapper script around the uima-ducc-cli.jar. + </dd><dt class="enumerate-enumitem"> + 2. </dt><dd +class="enumerate-enumitem">Direct invocation of each command’s <span +class="cmtt-10">class </span>with the <span +class="cmtt-10">java </span>command.</dd></dl> +<!--l. 120--><p class="noindent" >When using the scripts the full execution environment is established silently. When invoking a command’s <span +class="cmtt-10">class </span>directly, the +java <span +class="cmtt-10">CLASSPATH </span>must include the uima-ducc-cli.jar, as illustrated in the wrapper scripts. +<!--l. 124--><p class="noindent" > +<h3 class="sectionHead"><span class="titlemark">3.4 </span> <a + id="x1-260003.4"></a>DUCC Commands</h3> +<!--l. 125--><p class="noindent" >The following commands are provided: + <dl class="description"><dt class="description"> +<span +class="cmbx-10">ducc</span><span +class="cmbx-10">_submit</span> </dt><dd +class="description">Submit a job for execution. + </dd><dt class="description"> +<span +class="cmbx-10">ducc</span><span +class="cmbx-10">_cancel</span> </dt><dd +class="description">Cancel a job in progress. + </dd><dt class="description"> +<span +class="cmbx-10">ducc</span><span +class="cmbx-10">_reserve</span> </dt><dd +class="description">Request a reservation of a machine. + </dd><dt class="description"> +<span +class="cmbx-10">ducc</span><span +class="cmbx-10">_unreserve</span> </dt><dd +class="description">Cancel a reservation. + </dd><dt class="description"> +<span +class="cmbx-10">ducc</span><span +class="cmbx-10">_monitor</span> </dt><dd +class="description">Monitor the progress of a job that is already submitted. + </dd><dt class="description"> +<span +class="cmbx-10">ducc</span><span +class="cmbx-10">_process</span><span +class="cmbx-10">_submit</span> </dt><dd +class="description">Submit an arbitrary process (managed reservation) for execution. + </dd><dt class="description"> +<span +class="cmbx-10">ducc</span><span +class="cmbx-10">_process</span><span +class="cmbx-10">_cancel</span> </dt><dd +class="description">Cancel an arbitrary process. + </dd><dt class="description"> +<span +class="cmbx-10">ducc</span><span +class="cmbx-10">_services</span> </dt><dd +class="description">Register, unregister, start, stop, modify, disable, enable, ignore references, observe references, and + query a service. + </dd><dt class="description"> +<span +class="cmbx-10">viaducc</span> </dt><dd +class="description">This is a script wrapper to facilitate execution of Eclipse workspaces as DUCC jobs as well as general + execution of arbitrary processes in DUCC-managed resources.</dd></dl> + + +<!--l. 140--><p class="noindent" >The next section describes these commands in detail. +<!--l. 22--><p class="noindent" ><a name='DUCC_CLI_SUBMIT'></a> +<!--l. 25--><p class="noindent" > +<h3 class="sectionHead"><span class="titlemark">3.5 </span> <a + id="x1-270003.5"></a>ducc_submit</h3> +<!--l. 28--><p class="noindent" ><span class="paragraphHead"><a + id="x1-280003.5"></a><span +class="cmbx-10">Description:</span></span> +The submit CLI is used to submit work for execution by DUCC. DUCC assigns a unique id to the job and schedules it for +execution. The submitter may optionally request that the progress of the job is monitored, in which case the state of the job +as it progresses through its lifetime is printed on the console. +<!--l. 33--><p class="noindent" ><span class="paragraphHead"><a + id="x1-290003.5"></a><span +class="cmbx-10">Usage:</span></span> + <dl class="description"><dt class="description"> +<span +class="cmbx-10">Script wrapper</span> </dt><dd +class="description">$DUCC_HOME/bin/ducc_submit <span +class="cmti-10">options</span> + </dd><dt class="description"> +<span +class="cmbx-10">Java Main</span> </dt><dd +class="description">java -cp $DUCC_HOME/lib/uima-ducc-cli.jar org.apache.uima.ducc.cli.DuccJobSubmit <span +class="cmti-10">options</span></dd></dl> +<!--l. 39--><p class="noindent" ><span class="paragraphHead"><a + id="x1-300003.5"></a><span +class="cmbx-10">Options:</span></span> + <dl class="description"><dt class="description"> +<span +class="cmsy-10">--</span><span +class="cmbx-10">all</span><span +class="cmbx-10">_in</span><span +class="cmbx-10">_one </span><span +class="cmmi-10"><</span><span +class="cmbx-10">local </span><span +class="cmsy-10">| </span><span +class="cmbx-10">remote </span><span +class="cmmi-10">></span> </dt><dd +class="description">Run driver and pipeline in single process. If <span +class="cmti-10">local </span>is specified, the process is + executed on the local machine, for example, in the current Eclipse session. If <span +class="cmti-10">remote </span>is specified, the jobs is + submitted to DUCC as a <span +class="cmti-10">managed reservation </span>and run on some (presumably larger) machine allocated by + DUCC. + </dd><dt class="description"> +<span +class="cmsy-10">--</span><span +class="cmbx-10">attach</span><span +class="cmbx-10">_console</span> </dt><dd +class="description">If specified, redirect remote stdout and stderr to the local submitting console. + </dd><dt class="description"> +<span +class="cmsy-10">--</span><span +class="cmbx-10">cancel</span><span +class="cmbx-10">_on</span><span +class="cmbx-10">_interrupt</span> </dt><dd +class="description">If specified, the job is monitored and will be canceled if the submit command is + interrupted, e.g. with CTRL-C. This option always implies <span +class="cmsy-10">--</span><span +class="cmti-10">wait</span><span +class="cmti-10">_for</span><span +class="cmti-10">_completion</span>. + </dd><dt class="description"> +<span +class="cmsy-10">--</span><span +class="cmbx-10">classpath [path-string]</span> </dt><dd
[... 25449 lines stripped ...]
