Dear Wiki user,

You have subscribed to a wiki page or wiki category on "Tika Wiki" for change 
notification.

The "GrobidJournalParser" page has been changed by ChrisMattmann:
https://wiki.apache.org/tika/GrobidJournalParser?action=diff&rev1=4&rev2=5

Comment:
- update example from Tika app

              "org.apache.tika.parser.CompositeParser",
              "org.apache.tika.parser.journal.JournalParser"
          ],
-         "X-TIKA:content": "<html 
xmlns=\"http://www.w3.org/1999/xhtml\";>\n<head>\n<meta 
name=\"access_permission:extract_for_accessibility\" content=\"true\" />\n<meta 
name=\"meta:save-date\" content=\"2006-02-15T21:16:01Z\" />\n<meta 
name=\"access_permission:modify_annotations\" content=\"true\" />\n<meta 
name=\"Creation-Date\" content=\"2006-02-15T21:13:58Z\" />\n<meta 
name=\"grobid:header_Address\" content=\"Pasadena, CA 91109, USA Los Angeles, 
CA 90089, USA\" />\n<meta name=\"access_permission:fill_in_form\" 
content=\"true\" />\n<meta name=\"created\" content=\"Wed Feb 15 13:13:58 PST 
2006\" />\n<meta name=\"grobid:header_FullAffiliations\" 
content=\"[Affiliation{name='null', url='null', institutions=[California 
Institute of Technology], departments=null, laboratories=[Jet Propulsion 
Laboratory], country='USA', postCode='91109', postBox='null', region='CA', 
settlement='Pasadena', addrLine='null', marker='1', addressString='null', 
affiliationString='null', failAffiliation=false}, Affiliation{name='null', 
url='null', institutions=[University of Southern California], 
departments=[Computer Science Department], laboratories=null, country='USA', 
postCode='90089', postBox='null', region='CA', settlement='Los Angeles', 
addrLine='null', marker='2', addressString='null', 
affiliationString='null',..snip..",
+         "X-TIKA:content": "<html 
xmlns=\"http://www.w3.org/1999/xhtml\";>\n<head>\n<meta 
name=\"access_permission:extract_for_accessibility\" content=\"true\" />\n<meta 
name=\"meta:save-date\" content=\"2006-02-15T21:16:01Z\" />\n<meta 
name=\"grobid:header_Affiliation\" content=\"1 Jet Propulsion Laboratory 
California Institute of Technology; 2 Computer Science Department University of 
Southern California\" />\n<meta name=\"Content-Length\" content=\"200435\" 
/>\n<meta name=\"dcterms:created\" content=\"2006-02-15T21:13:58Z\" />\n<meta 
name=\"Author\" content=\"End User Computing Services\" />\n<meta name=\"date\" 
content=\"2006-02-15T21:16:01Z\" />\n<meta 
name=\"access_permission:can_modify\" content=\"true\" />\n<meta 
name=\"creator\" content=\"End User Computing Services\" />\n<meta 
name=\"access_permission:modify_annotations\" content=\"true\" />\n<meta 
name=\"Creation-Date\" content=\"2006-02-15T21:13:58Z\" />\n<meta 
name=\"grobid:header_Address\" content=\"Pasadena, CA 91109 USA Los Angeles, CA 
90089 USA \" />\n<meta name=\"meta:author\" content=\"End User Computing 
Services\" />\n<meta name=\"created\" content=\"Wed Feb 15 13:13:58 PST 2006\" 
/>\n<meta name=\"access_permission:fill_in_form\" content=\"true\" />\n<meta 
name=\"grobid:header_FullAffiliations\" content=\"[Affiliation {orgName=Jet 
Propulsion Laboratory California Institute of Technology , address=Pasadena, CA 
91109 USA},Affiliation {orgName=Computer Science Department University of 
Southern California , address=Los Angeles, CA 90089 USA}[Affiliation 
{orgName=Jet Propulsion Laboratory California Institute of Technology , 
address=Pasadena, CA 91109 USA},Affiliation {orgName=Computer Science 
Department University of Southern California , address=Los Angeles, CA 90089 
USA}]\" />\n<meta name=\"grobid:header_Class\" 
content=\"org.apache.tika.metadata.Metadata\" />\n<meta name=\"dc:format\" 
content=\"application/pdf; version=1.4\" />\n<meta 
name=\"access_permission:can_print\" content=\"true\" />\n<meta 
name=\"Company\" content=\"ACM\" />\n<meta name=\"xmp:CreatorTool\" 
content=\"Acrobat PDFMaker 6.0 for Word\" />\n<meta name=\"resourceName\" 
content=\"ICSE06.pdf\" />\n<meta name=\"Last-Save-Date\" 
content=\"2006-02-15T21:16:01Z\" />\n<meta name=\"dc:title\" 
content=\"Proceedings Template - WORD\" />\n<meta 
name=\"grobid:header_TEIJSONSource\" 
content=\"{&quot;TEI&quot;:{&quot;text&quot;:{&quot;xml:lang&quot;:&quot;en&quot;},&quot;teiHeader&quot;:{&quot;fileDesc&quot;:{&quot;titleStmt&quot;:{&quot;title&quot;:{&quot;content&quot;:&quot;A
 Software Architecture-Based Framework for Highly Distributed and Data 
Intensive Scientific 
Applications&quot;,&quot;level&quot;:&quot;a&quot;,&quot;type&quot;:&quot;main&quot;}},&quot;sourceDesc&quot;:{&quot;biblStruct&quot;:{&quot;analytic&quot;:{&quot;author&quot;:[{&quot;persName&quot;:{&quot;forename&quot;:[{&quot;content&quot;:&quot;Chris&quot;,&quot;type&quot;:&quot;first&quot;},{&quot;content&quot;:&quot;A&quot;,&quot;type&quot;:&quot;middle&quot;}],&quot;surname&quot;:&quot;Mattmann&quot;},&quot;affiliation&quot;:[{&quot;address&quot;:{&quot;region&quot;:&quot;CA&quot;,&quot;postCode&quot;:91109,&quot;settlement&quot;:&quot;Pasadena&quot;,&quot;country&quot;:{&quot;content&quot;:&quot;USA&quot;,&quot;key&quot;:&quot;US&quot;}},&quot;orgName&quot;:[{&quot;content&quot;:&quot;Jet
 Propulsion 
Laboratory&quot;,&quot;type&quot;:&quot;laboratory&quot;},{&quot;content&quot;:&quot;California
 Institute of 
Technology&quot;,&quot;type&quot;:&quot;institution&quot;}]},{&quot;address&quot;:{&quot;region&quot;:&quot;CA&quot;,&quot;postCode&quot;:90089,&quot;settlement&quot;:&quot;Los
 
Angeles&quot;,&quot;country&quot;:{&quot;content&quot;:&quot;USA&quot;,&quot;key&quot;:&quot;US&quot;}},&quot;orgName&quot;:[{&quot;content&quot;:&quot;Computer
 Science 
Department&quot;,&quot;type&quot;:&quot;department&quot;},{&quot;content&quot;:&quot;University
 of Southern 
California&quot;,&quot;type&quot;:&quot;institution&quot;}]}]},{&quot;persName&quot;:{&quot;forename&quot;:[{&quot;content&quot;:&quot;Daniel&quot;,&quot;type&quot;:&quot;first&quot;},{&quot;content&quot;:&quot;J&quot;,&quot;type&quot;:&quot;middle&quot;}],&quot;surname&quot;:&quot;Crichton&quot;},&quot;affiliation&quot;:{&quot;address&quot;:{&quot;region&quot;:&quot;CA&quot;,&quot;postCode&quot;:91109,&quot;settlement&quot;:&quot;Pasadena&quot;,&quot;country&quot;:{&quot;content&quot;:&quot;USA&quot;,&quot;key&quot;:&quot;US&quot;}},&quot;orgName&quot;:[{&quot;content&quot;:&quot;Jet
 Propulsion 
Laboratory&quot;,&quot;type&quot;:&quot;laboratory&quot;},{&quot;content&quot;:&quot;California
 Institute of 
Technology&quot;,&quot;type&quot;:&quot;institution&quot;}]}},{&quot;persName&quot;:{&quot;forename&quot;:{&quot;content&quot;:&quot;Nenad&quot;,&quot;type&quot;:&quot;first&quot;},&quot;surname&quot;:&quot;Medvidovic&quot;},&quot;affiliation&quot;:{&quot;address&quot;:{&quot;region&quot;:&quot;CA&quot;,&quot;postCode&quot;:90089,&quot;settlement&quot;:&quot;Los
 
Angeles&quot;,&quot;country&quot;:{&quot;content&quot;:&quot;USA&quot;,&quot;key&quot;:&quot;US&quot;}},&quot;orgName&quot;:[{&quot;content&quot;:&quot;Computer
 Science 
Department&quot;,&quot;type&quot;:&quot;department&quot;},{&quot;content&quot;:&quot;University
 of Southern 
California&quot;,&quot;type&quot;:&quot;institution&quot;}]}},{&quot;persName&quot;:{&quot;forename&quot;:{&quot;content&quot;:&quot;Steve&quot;,&quot;type&quot;:&quot;first&quot;},&quot;surname&quot;:&quot;Hughes&quot;},&quot;affiliation&quot;:{&quot;address&quot;:{&quot;region&quot;:&quot;CA&quot;,&quot;postCode&quot;:91109,&quot;settlement&quot;:&quot;Pasadena&quot;,&quot;country&quot;:{&quot;content&quot;:&quot;USA&quot;,&quot;key&quot;:&quot;US&quot;}},&quot;orgName&quot;:[{&quot;content&quot;:&quot;Jet
 Propulsion 
Laboratory&quot;,&quot;type&quot;:&quot;laboratory&quot;},{&quot;content&quot;:&quot;California
 Institute of 
Technology&quot;,&quot;type&quot;:&quot;institution&quot;}]}}],&quot;title&quot;:{&quot;content&quot;:&quot;A
 Software Architecture-Based Framework for Highly Distributed and Data 
Intensive Scientific 
Applications&quot;,&quot;level&quot;:&quot;a&quot;,&quot;type&quot;:&quot;main&quot;}},&quot;monogr&quot;:{&quot;imprint&quot;:{&quot;date&quot;:&quot;&quot;}}}},&quot;publicationStmt&quot;:{&quot;availability&quot;:{&quot;licence&quot;:&quot;&quot;,&quot;status&quot;:&quot;unknown&quot;},&quot;publisher&quot;:&quot;&quot;}},&quot;profileDesc&quot;:{&quot;abstract&quot;:{&quot;p&quot;:&quot;Modern
 scientific research is increasingly conducted by virtual communities of 
scientists distributed around the world. The data volumes created by these 
communities are extremely large, and growing rapidly. The management of the 
resulting highly distributed, virtual data systems is a complex task, 
characterized by a number of formidable technical challenges, many of which are 
of a software engineering nature. In this paper we describe our experience over 
the past seven years in constructing and deploying OODT, a software framework 
that supports large, distributed, virtual scientific communities. We outline 
the key software engineering challenges that we faced, and addressed, along the 
way. We argue that a major contributor to the success of OODT was its explicit 
focus on software architecture. We describe several large-scale, real-world 
deployments of OODT, and the manner in which OODT helped us to address the 
domain-specific challenges induced by each 
deployment.&quot;},&quot;textClass&quot;:{&quot;keywords&quot;:{&quot;term&quot;:[&quot;D2
 Software Engineering, D211 Domain Specific 
Architectures&quot;,&quot;Keywords&quot;,&quot;OODT, Data Management, Software 
Architecture&quot;]}}},&quot;xml:lang&quot;:&quot;en&quot;},&quot;xmlns&quot;:&quot;http://www.tei-c.org/ns/1.0&quot;}}\";
 />\n<meta name=\"access_permission:assemble_document\" content=\"true\" 
/>\n<meta name=\"dcterms:modified\" content=\"2006-02-15T21:16:01Z\" />\n<meta 
name=\"meta:creation-date\" content=\"2006-02-15T21:13:58Z\" />\n<meta 
name=\"dc:creator\" content=\"End User Computing Services\" />\n<meta 
name=\"pdf:PDFVersion\" content=\"1.4\" />\n<meta name=\"Last-Modified\" 
content=\"2006-02-15T21:16:01Z\" />\n<meta name=\"SourceModified\" 
content=\"D:20060215211344\" />\n<meta name=\"X-Parsed-By\" 
content=\"org.apache.tika.parser.CompositeParser\" />\n<meta 
name=\"X-Parsed-By\" content=\"org.apache.tika.parser.journal.JournalParser\" 
/>\n<meta name=\"modified\" content=\"2006-02-15T21:16:01Z\" />\n<meta 
name=\"access_permission:can_print_degraded\" content=\"true\" />\n<meta 
name=\"xmpTPg:NPages\" content=\"10\" />\n<meta name=\"pdf:encrypted\" 
content=\"false\" />\n<meta name=\"access_permission:extract_content\" 
content=\"true\" />\n<meta name=\"grobid:header_Keyword\" content=\"&quot;D2 
Software Engineering, D211 Domain Specific Architectures&quot;\" />\n<meta 
name=\"producer\" content=\"Acrobat Distiller 6.0 (Windows)\" />\n<meta 
name=\"grobid:header_Title\" content=\"A Software Architecture-Based Framework 
for Highly Distributed and Data Intensive Scientific Applications\" />\n<meta 
name=\"grobid:header_TEIXMLSource\" content=\"&lt;?xml version=&quot;1.0&quot; 
encoding=&quot;UTF-8&quot;?&gt;\n&lt;?xml-model 
href=&quot;file:///Users/mattmann/git/grobid/grobid-home/schemas/rng/Grobid.rng&quot;
 schematypens=&quot;http://relaxng.org/ns/structure/1.0&quot;?&gt;\n&lt;TEI 
xmlns=&quot;http://www.tei-c.org/ns/1.0&quot;&gt;\n\t&lt;teiHeader 
xml:lang=&quot;en&quot;&gt;\n\t\t&lt;fileDesc&gt;\n\t\t\t&lt;titleStmt&gt;\n\t\t\t\t&lt;title
 level=&quot;a&quot; type=&quot;main&quot;&gt;A Software Architecture-Based 
Framework for Highly Distributed and Data Intensive Scientific 
Applications&lt;/title&gt;\n\t\t\t&lt;/titleStmt&gt;\n\t\t\t&lt;publicationStmt&gt;\n\t\t\t\t&lt;publisher/&gt;\n\t\t\t\t&lt;availability
 
status=&quot;unknown&quot;&gt;&lt;licence/&gt;&lt;/availability&gt;\n\t\t\t&lt;/publicationStmt&gt;\n\t\t\t&lt;sourceDesc&gt;\n\t\t\t\t&lt;biblStruct&gt;\n\t\t\t\t\t&lt;analytic&gt;\n\t\t\t\t\t\t&lt;author&gt;\n\t\t\t\t\t\t\t&lt;persName&gt;\n\t\t\t\t\t\t\t\t&lt;forename
 type=&quot;first&quot;&gt;Chris&lt;/forename&gt;\n\t\t\t\t\t\t\t\t&lt;forename 
type=&quot;middle&quot;&gt;A&lt;/forename&gt;\n\t\t\t\t\t\t\t\t&lt;surname&gt;Mattmann&lt;/surname&gt;\n\t\t\t\t\t\t\t&lt;/persName&gt;\n\t\t\t\t\t\t\t&lt;affiliation&gt;\n\t\t\t\t\t\t\t\t&lt;orgName
 type=&quot;laboratory&quot;&gt;Jet Propulsion 
Laboratory&lt;/orgName&gt;\n\t\t\t\t\t\t\t\t&lt;orgName 
type=&quot;institution&quot;&gt;California Institute of 
Technology&lt;/orgName&gt;\n\t\t\t\t\t\t\t\t&lt;address&gt;\n\t\t\t\t\t\t\t\t\t&lt;postCode&gt;91109&lt;/postCode&gt;\n\t\t\t\t\t\t\t\t\t&lt;settlement&gt;Pasadena&lt;/settlement&gt;\n\t\t\t\t\t\t\t\t\t&lt;region&gt;CA&lt;/region&gt;\n\t\t\t\t\t\t\t\t\t&lt;country
 
key=&quot;US&quot;&gt;USA&lt;/country&gt;\n\t\t\t\t\t\t\t\t&lt;/address&gt;\n\t\t\t\t\t\t\t&lt;/affiliation&gt;\n\t\t\t\t\t\t\t&lt;affiliation&gt;\n\t\t\t\t\t\t\t\t&lt;orgName
 type=&quot;department&quot;&gt;Computer Science 
Department&lt;/orgName&gt;\n\t\t\t\t\t\t\t\t&lt;orgName 
type=&quot;institution&quot;&gt;University of Southern 
California&lt;/orgName&gt;\n\t\t\t\t\t\t\t\t&lt;address&gt;\n\t\t\t\t\t\t\t\t\t&lt;postCode&gt;90089&lt;/postCode&gt;\n\t\t\t\t\t\t\t\t\t&lt;settlement&gt;Los
 
Angeles&lt;/settlement&gt;\n\t\t\t\t\t\t\t\t\t&lt;region&gt;CA&lt;/region&gt;\n\t\t\t\t\t\t\t\t\t&lt;country
 
key=&quot;US&quot;&gt;USA&lt;/country&gt;\n\t\t\t\t\t\t\t\t&lt;/address&gt;\n\t\t\t\t\t\t\t&lt;/affiliation&gt;\n\t\t\t\t\t\t&lt;/author&gt;\n\t\t\t\t\t\t&lt;author&gt;\n\t\t\t\t\t\t\t&lt;persName&gt;\n\t\t\t\t\t\t\t\t&lt;forename
 
type=&quot;first&quot;&gt;Daniel&lt;/forename&gt;\n\t\t\t\t\t\t\t\t&lt;forename 
type=&quot;middle&quot;&gt;J&lt;/forename&gt;\n\t\t\t\t\t\t\t\t&lt;surname&gt;Crichton&lt;/surname&gt;\n\t\t\t\t\t\t\t&lt;/persName&gt;\n\t\t\t\t\t\t\t&lt;affiliation&gt;\n\t\t\t\t\t\t\t\t&lt;orgName
 type=&quot;laboratory&quot;&gt;Jet Propulsion 
Laboratory&lt;/orgName&gt;\n\t\t\t\t\t\t\t\t&lt;orgName 
type=&quot;institution&quot;&gt;California Institute of 
Technology&lt;/orgName&gt;\n\t\t\t\t\t\t\t\t&lt;address&gt;\n\t\t\t\t\t\t\t\t\t&lt;postCode&gt;91109&lt;/postCode&gt;\n\t\t\t\t\t\t\t\t\t&lt;settlement&gt;Pasadena&lt;/settlement&gt;\n\t\t\t\t\t\t\t\t\t&lt;region&gt;CA&lt;/region&gt;\n\t\t\t\t\t\t\t\t\t&lt;country
 
key=&quot;US&quot;&gt;USA&lt;/country&gt;\n\t\t\t\t\t\t\t\t&lt;/address&gt;\n\t\t\t\t\t\t\t&lt;/affiliation&gt;\n\t\t\t\t\t\t&lt;/author&gt;\n\t\t\t\t\t\t&lt;author&gt;\n\t\t\t\t\t\t\t&lt;persName&gt;\n\t\t\t\t\t\t\t\t&lt;forename
 
type=&quot;first&quot;&gt;Nenad&lt;/forename&gt;\n\t\t\t\t\t\t\t\t&lt;surname&gt;Medvidovic&lt;/surname&gt;\n\t\t\t\t\t\t\t&lt;/persName&gt;\n\t\t\t\t\t\t\t&lt;affiliation&gt;\n\t\t\t\t\t\t\t\t&lt;orgName
 type=&quot;department&quot;&gt;Computer Science 
Department&lt;/orgName&gt;\n\t\t\t\t\t\t\t\t&lt;orgName 
type=&quot;institution&quot;&gt;University of Southern 
California&lt;/orgName&gt;\n\t\t\t\t\t\t\t\t&lt;address&gt;\n\t\t\t\t\t\t\t\t\t&lt;postCode&gt;90089&lt;/postCode&gt;\n\t\t\t\t\t\t\t\t\t&lt;settlement&gt;Los
 
Angeles&lt;/settlement&gt;\n\t\t\t\t\t\t\t\t\t&lt;region&gt;CA&lt;/region&gt;\n\t\t\t\t\t\t\t\t\t&lt;country
 
key=&quot;US&quot;&gt;USA&lt;/country&gt;\n\t\t\t\t\t\t\t\t&lt;/address&gt;\n\t\t\t\t\t\t\t&lt;/affiliation&gt;\n\t\t\t\t\t\t&lt;/author&gt;\n\t\t\t\t\t\t&lt;author&gt;\n\t\t\t\t\t\t\t&lt;persName&gt;\n\t\t\t\t\t\t\t\t&lt;forename
 
type=&quot;first&quot;&gt;Steve&lt;/forename&gt;\n\t\t\t\t\t\t\t\t&lt;surname&gt;Hughes&lt;/surname&gt;\n\t\t\t\t\t\t\t&lt;/persName&gt;\n\t\t\t\t\t\t\t&lt;affiliation&gt;\n\t\t\t\t\t\t\t\t&lt;orgName
 type=&quot;laboratory&quot;&gt;Jet Propulsion 
Laboratory&lt;/orgName&gt;\n\t\t\t\t\t\t\t\t&lt;orgName 
type=&quot;institution&quot;&gt;California Institute of 
Technology&lt;/orgName&gt;\n\t\t\t\t\t\t\t\t&lt;address&gt;\n\t\t\t\t\t\t\t\t\t&lt;postCode&gt;91109&lt;/postCode&gt;\n\t\t\t\t\t\t\t\t\t&lt;settlement&gt;Pasadena&lt;/settlement&gt;\n\t\t\t\t\t\t\t\t\t&lt;region&gt;CA&lt;/region&gt;\n\t\t\t\t\t\t\t\t\t&lt;country
 
key=&quot;US&quot;&gt;USA&lt;/country&gt;\n\t\t\t\t\t\t\t\t&lt;/address&gt;\n\t\t\t\t\t\t\t&lt;/affiliation&gt;\n\t\t\t\t\t\t&lt;/author&gt;\n\t\t\t\t\t\t&lt;title
 level=&quot;a&quot; type=&quot;main&quot;&gt;A Software Architecture-Based 
Framework for Highly Distributed and Data Intensive Scientific 
Applications&lt;/title&gt;\n\t\t\t\t\t&lt;/analytic&gt;\n\t\t\t\t\t&lt;monogr&gt;\n\t\t\t\t\t\t&lt;imprint&gt;\n\t\t\t\t\t\t\t&lt;date/&gt;\n\t\t\t\t\t\t&lt;/imprint&gt;\n\t\t\t\t\t&lt;/monogr&gt;\n\t\t\t\t&lt;/biblStruct&gt;\n\t\t\t&lt;/sourceDesc&gt;\n\t\t&lt;/fileDesc&gt;\n\t\t&lt;profileDesc&gt;\n\t\t\t&lt;textClass&gt;\n\t\t\t\t&lt;keywords&gt;\n\t\t\t\t\t&lt;term&gt;D2
 Software Engineering, D211 Domain Specific 
Architectures&lt;/term&gt;\n\t\t\t\t\t&lt;term&gt;Keywords&lt;/term&gt;\n\t\t\t\t\t&lt;term&gt;OODT,
 Data Management, Software 
Architecture&lt;/term&gt;\n\t\t\t\t&lt;/keywords&gt;\n\t\t\t&lt;/textClass&gt;\n\t\t\t&lt;abstract&gt;\n\t\t\t\t&lt;p&gt;Modern
 scientific research is increasingly conducted by virtual communities of 
scientists distributed around the world. The data volumes created by these 
communities are extremely large, and growing rapidly. The management of the 
resulting highly distributed, virtual data systems is a complex task, 
characterized by a number of formidable technical challenges, many of which are 
of a software engineering nature. In this paper we describe our experience over 
the past seven years in constructing and deploying OODT, a software framework 
that supports large, distributed, virtual scientific communities. We outline 
the key software engineering challenges that we faced, and addressed, along the 
way. We argue that a major contributor to the success of OODT was its explicit 
focus on software architecture. We describe several large-scale, real-world 
deployments of OODT, and the manner in which OODT helped us to address the 
domain-specific challenges induced by each 
deployment.&lt;/p&gt;\n\t\t\t&lt;/abstract&gt;\n\t\t&lt;/profileDesc&gt;\n\t&lt;/teiHeader&gt;\n\t&lt;text
 xml:lang=&quot;en&quot;&gt;\n\t&lt;/text&gt;\n&lt;/TEI&gt;\n\" />\n<meta 
name=\"Content-Type\" content=\"application/pdf\" />\n<meta 
name=\"grobid:header_Authors\" content=\"Chris A Mattmann 1,2 Daniel J Crichton 
1 Nenad  Medvidovic 2 Steve  Hughes 1 \" />\n<title>Proceedings Template - 
WORD</title>\n</head>\n<body><div class=\"page\"><p />\n<p>A Software 
Architecture-Based Framework for Highly \nDistributed and Data Intensive 
Scientific Applications \n</p>\n<p> \nChris A. Mattmann1, 2        Daniel J. 
Crichton1        Nenad Medvidovic2        Steve Hughes1 \n</p>\n<p> \n1Jet 
Propulsion Laboratory \n</p>\n<p>California Institute of Technology \nPasadena, 
CA 91109, USA \n</p>\n<p>{dan.crichton,mattmann,steve.hughes}@jpl.nasa.gov 
\n</p>\n<p>2Computer Science Department \nUniversity of Southern California  
\n</p>\n<p>Los Angeles, CA 90089, USA \n{mattmann,neno}@usc.edu \n</p>\n<p> 
\nABSTRACT \nModern scientific research is increasingly conducted by virtual 
\ncommunities of scientists distributed around the world. The data \nvolumes 
created by these communities are extremely large, and \ngrowing rapidly. The 
management of the resulting highly \ndistributed, virtual data systems is 
a..snip",
-         "X-TIKA:parse_time_millis": "11529",
+         "X-TIKA:parse_time_millis": "4302",
          "access_permission:assemble_document": "true",
          "access_permission:can_modify": "true",
          "access_permission:can_print": "true",
@@ -84, +84 @@

          "dc:title": "Proceedings Template - WORD",
          "dcterms:created": "2006-02-15T21:13:58Z",
          "dcterms:modified": "2006-02-15T21:16:01Z",
-         "grobid:header_Abstract": "Modern scientific research is increasingly 
conducted by virtual communities of scientists distributed around the world. 
The data volumes created by these communities are extremely large, and growing 
rapidly. The management of the resulting highly distributed, virtual data 
systems is a complex task, characterized by a number of formidable technical 
challenges, many of which are of a software engineering nature. In this paper 
we describe our experience over the past seven years in constructing and 
deploying OODT, a software framework that supports large, distributed, virtual 
scientific communities. We outline the key software engineering challenges that 
we faced, and addressed, along the way. We argue that a major contributor to 
the success of OODT was its explicit focus on software architecture. We 
describe several large-scale, real-world deployments of OODT, and the manner in 
which OODT helped us to address the domain-specific challenges induced by each 
deployment.",
-         "grobid:header_AbstractHeader": "ABSTRACT",
-         "grobid:header_Address": "Pasadena, CA 91109, USA Los Angeles, CA 
90089, USA",
+         "grobid:header_Address": "Pasadena, CA 91109 USA Los Angeles, CA 
90089 USA ",
-         "grobid:header_Affiliation": "1 Jet Propulsion Laboratory California 
Institute of Technology ; 2 Computer Science Department University of Southern 
California",
+         "grobid:header_Affiliation": "1 Jet Propulsion Laboratory California 
Institute of Technology; 2 Computer Science Department University of Southern 
California",
-         "grobid:header_Authors": "Chris A. Mattmann 1, 2 Daniel J. Crichton 1 
Nenad Medvidovic 2 Steve Hughes 1",
+         "grobid:header_Authors": "Chris A Mattmann 1,2 Daniel J Crichton 1 
Nenad  Medvidovic 2 Steve  Hughes 1 ",
+         "grobid:header_Class": "org.apache.tika.metadata.Metadata",
+         "grobid:header_FullAffiliations": "[Affiliation {orgName=Jet 
Propulsion Laboratory California Institute of Technology , address=Pasadena, CA 
91109 USA},Affiliation {orgName=Computer Science Department University of 
Southern California , address=Los Angeles, CA 90089 USA}[Affiliation 
{orgName=Jet Propulsion Laboratory California Institute of Technology , 
address=Pasadena, CA 91109 USA},Affiliation {orgName=Computer Science 
Department University of Southern California , address=Los Angeles, CA 90089 
USA}]",
+         "grobid:header_Keyword": "\"D2 Software Engineering, D211 Domain 
Specific Architectures\"",
+         "grobid:header_TEIJSONSource": 
"{\"TEI\":{\"text\":{\"xml:lang\":\"en\"},\"teiHeader\":{\"fileDesc\":{\"titleStmt\":{\"title\":{\"content\":\"A
 Software Architecture-Based Framework for Highly Distributed and Data 
Intensive Scientific 
Applications\",\"level\":\"a\",\"type\":\"main\"}},\"sourceDesc\":{\"biblStruct\":{\"analytic\":{\"author\":[{\"persName\":{\"forename\":[{\"content\":\"Chris\",\"type\":\"first\"},{\"content\":\"A\",\"type\":\"middle\"}],\"surname\":\"Mattmann\"},\"affiliation\":[{\"address\":{\"region\":\"CA\",\"postCode\":91109,\"settlement\":\"Pasadena\",\"country\":{\"content\":\"USA\",\"key\":\"US\"}},\"orgName\":[{\"content\":\"Jet
 Propulsion Laboratory\",\"type\":\"laboratory\"},{\"content\":\"California 
Institute of 
Technology\",\"type\":\"institution\"}]},{\"address\":{\"region\":\"CA\",\"postCode\":90089,\"settlement\":\"Los
 
Angeles\",\"country\":{\"content\":\"USA\",\"key\":\"US\"}},\"orgName\":[{\"content\":\"Computer
 Science Department\",\"type\":\"department\"},{\"content\":\"University of 
Southern 
California\",\"type\":\"institution\"}]}]},{\"persName\":{\"forename\":[{\"content\":\"Daniel\",\"type\":\"first\"},{\"content\":\"J\",\"type\":\"middle\"}],\"surname\":\"Crichton\"},\"affiliation\":{\"address\":{\"region\":\"CA\",\"postCode\":91109,\"settlement\":\"Pasadena\",\"country\":{\"content\":\"USA\",\"key\":\"US\"}},\"orgName\":[{\"content\":\"Jet
 Propulsion Laboratory\",\"type\":\"laboratory\"},{\"content\":\"California 
Institute of 
Technology\",\"type\":\"institution\"}]}},{\"persName\":{\"forename\":{\"content\":\"Nenad\",\"type\":\"first\"},\"surname\":\"Medvidovic\"},\"affiliation\":{\"address\":{\"region\":\"CA\",\"postCode\":90089,\"settlement\":\"Los
 
Angeles\",\"country\":{\"content\":\"USA\",\"key\":\"US\"}},\"orgName\":[{\"content\":\"Computer
 Science Department\",\"type\":\"department\"},{\"content\":\"University of 
Southern 
California\",\"type\":\"institution\"}]}},{\"persName\":{\"forename\":{\"content\":\"Steve\",\"type\":\"first\"},\"surname\":\"Hughes\"},\"affiliation\":{\"address\":{\"region\":\"CA\",\"postCode\":91109,\"settlement\":\"Pasadena\",\"country\":{\"content\":\"USA\",\"key\":\"US\"}},\"orgName\":[{\"content\":\"Jet
 Propulsion Laboratory\",\"type\":\"laboratory\"},{\"content\":\"California 
Institute of 
Technology\",\"type\":\"institution\"}]}}],\"title\":{\"content\":\"A Software 
Architecture-Based Framework for Highly Distributed and Data Intensive 
Scientific 
Applications\",\"level\":\"a\",\"type\":\"main\"}},\"monogr\":{\"imprint\":{\"date\":\"\"}}}},\"publicationStmt\":{\"availability\":{\"licence\":\"\",\"status\":\"unknown\"},\"publisher\":\"\"}},\"profileDesc\":{\"abstract\":{\"p\":\"Modern
 scientific research is increasingly conducted by virtual communities of 
scientists distributed around the world. The data volumes created by these 
communities are extremely large, and growing rapidly. The management of the 
resulting highly distributed, virtual data systems is a complex task, 
characterized by a number of formidable technical challenges, many of which are 
of a software engineering nature. In this paper we describe our experience over 
the past seven years in constructing and deploying OODT, a software framework 
that supports large, distributed, virtual scientific communities. We outline 
the key software engineering challenges that we faced, and addressed, along the 
way. We argue that a major contributor to the success of OODT was its explicit 
focus on software architecture. We describe several large-scale, real-world 
deployments of OODT, and the manner in which OODT helped us to address the 
domain-specific challenges induced by each 
deployment.\"},\"textClass\":{\"keywords\":{\"term\":[\"D2 Software 
Engineering, D211 Domain Specific Architectures\",\"Keywords\",\"OODT, Data 
Management, Software 
Architecture\"]}}},\"xml:lang\":\"en\"},\"xmlns\":\"http://www.tei-c.org/ns/1.0\"}}";,
+         "grobid:header_TEIXMLSource": "<?xml version=\"1.0\" 
encoding=\"UTF-8\"?>\n<?xml-model 
href=\"file:///Users/mattmann/git/grobid/grobid-home/schemas/rng/Grobid.rng\" 
schematypens=\"http://relaxng.org/ns/structure/1.0\";?>\n<TEI 
xmlns=\"http://www.tei-c.org/ns/1.0\";>\n\t<teiHeader 
xml:lang=\"en\">\n\t\t<fileDesc>\n\t\t\t<titleStmt>\n\t\t\t\t<title level=\"a\" 
type=\"main\">A Software Architecture-Based Framework for Highly Distributed 
and Data Intensive Scientific 
Applications</title>\n\t\t\t</titleStmt>\n\t\t\t<publicationStmt>\n\t\t\t\t<publisher/>\n\t\t\t\t<availability
 
status=\"unknown\"><licence/></availability>\n\t\t\t</publicationStmt>\n\t\t\t<sourceDesc>\n\t\t\t\t<biblStruct>\n\t\t\t\t\t<analytic>\n\t\t\t\t\t\t<author>\n\t\t\t\t\t\t\t<persName>\n\t\t\t\t\t\t\t\t<forename
 type=\"first\">Chris</forename>\n\t\t\t\t\t\t\t\t<forename 
type=\"middle\">A</forename>\n\t\t\t\t\t\t\t\t<surname>Mattmann</surname>\n\t\t\t\t\t\t\t</persName>\n\t\t\t\t\t\t\t<affiliation>\n\t\t\t\t\t\t\t\t<orgName
 type=\"laboratory\">Jet Propulsion 
Laboratory</orgName>\n\t\t\t\t\t\t\t\t<orgName type=\"institution\">California 
Institute of 
Technology</orgName>\n\t\t\t\t\t\t\t\t<address>\n\t\t\t\t\t\t\t\t\t<postCode>91109</postCode>\n\t\t\t\t\t\t\t\t\t<settlement>Pasadena</settlement>\n\t\t\t\t\t\t\t\t\t<region>CA</region>\n\t\t\t\t\t\t\t\t\t<country
 
key=\"US\">USA</country>\n\t\t\t\t\t\t\t\t</address>\n\t\t\t\t\t\t\t</affiliation>\n\t\t\t\t\t\t\t<affiliation>\n\t\t\t\t\t\t\t\t<orgName
 type=\"department\">Computer Science 
Department</orgName>\n\t\t\t\t\t\t\t\t<orgName type=\"institution\">University 
of Southern 
California</orgName>\n\t\t\t\t\t\t\t\t<address>\n\t\t\t\t\t\t\t\t\t<postCode>90089</postCode>\n\t\t\t\t\t\t\t\t\t<settlement>Los
 
Angeles</settlement>\n\t\t\t\t\t\t\t\t\t<region>CA</region>\n\t\t\t\t\t\t\t\t\t<country
 
key=\"US\">USA</country>\n\t\t\t\t\t\t\t\t</address>\n\t\t\t\t\t\t\t</affiliation>\n\t\t\t\t\t\t</author>\n\t\t\t\t\t\t<author>\n\t\t\t\t\t\t\t<persName>\n\t\t\t\t\t\t\t\t<forename
 type=\"first\">Daniel</forename>\n\t\t\t\t\t\t\t\t<forename 
type=\"middle\">J</forename>\n\t\t\t\t\t\t\t\t<surname>Crichton</surname>\n\t\t\t\t\t\t\t</persName>\n\t\t\t\t\t\t\t<affiliation>\n\t\t\t\t\t\t\t\t<orgName
 type=\"laboratory\">Jet Propulsion 
Laboratory</orgName>\n\t\t\t\t\t\t\t\t<orgName type=\"institution\">California 
Institute of 
Technology</orgName>\n\t\t\t\t\t\t\t\t<address>\n\t\t\t\t\t\t\t\t\t<postCode>91109</postCode>\n\t\t\t\t\t\t\t\t\t<settlement>Pasadena</settlement>\n\t\t\t\t\t\t\t\t\t<region>CA</region>\n\t\t\t\t\t\t\t\t\t<country
 
key=\"US\">USA</country>\n\t\t\t\t\t\t\t\t</address>\n\t\t\t\t\t\t\t</affiliation>\n\t\t\t\t\t\t</author>\n\t\t\t\t\t\t<author>\n\t\t\t\t\t\t\t<persName>\n\t\t\t\t\t\t\t\t<forename
 
type=\"first\">Nenad</forename>\n\t\t\t\t\t\t\t\t<surname>Medvidovic</surname>\n\t\t\t\t\t\t\t</persName>\n\t\t\t\t\t\t\t<affiliation>\n\t\t\t\t\t\t\t\t<orgName
 type=\"department\">Computer Science 
Department</orgName>\n\t\t\t\t\t\t\t\t<orgName type=\"institution\">University 
of Southern 
California</orgName>\n\t\t\t\t\t\t\t\t<address>\n\t\t\t\t\t\t\t\t\t<postCode>90089</postCode>\n\t\t\t\t\t\t\t\t\t<settlement>Los
 
Angeles</settlement>\n\t\t\t\t\t\t\t\t\t<region>CA</region>\n\t\t\t\t\t\t\t\t\t<country
 
key=\"US\">USA</country>\n\t\t\t\t\t\t\t\t</address>\n\t\t\t\t\t\t\t</affiliation>\n\t\t\t\t\t\t</author>\n\t\t\t\t\t\t<author>\n\t\t\t\t\t\t\t<persName>\n\t\t\t\t\t\t\t\t<forename
 
type=\"first\">Steve</forename>\n\t\t\t\t\t\t\t\t<surname>Hughes</surname>\n\t\t\t\t\t\t\t</persName>\n\t\t\t\t\t\t\t<affiliation>\n\t\t\t\t\t\t\t\t<orgName
 type=\"laboratory\">Jet Propulsion 
Laboratory</orgName>\n\t\t\t\t\t\t\t\t<orgName type=\"institution\">California 
Institute of 
Technology</orgName>\n\t\t\t\t\t\t\t\t<address>\n\t\t\t\t\t\t\t\t\t<postCode>91109</postCode>\n\t\t\t\t\t\t\t\t\t<settlement>Pasadena</settlement>\n\t\t\t\t\t\t\t\t\t<region>CA</region>\n\t\t\t\t\t\t\t\t\t<country
 
key=\"US\">USA</country>\n\t\t\t\t\t\t\t\t</address>\n\t\t\t\t\t\t\t</affiliation>\n\t\t\t\t\t\t</author>\n\t\t\t\t\t\t<title
 level=\"a\" type=\"main\">A Software Architecture-Based Framework for Highly 
Distributed and Data Intensive Scientific 
Applications</title>\n\t\t\t\t\t</analytic>\n\t\t\t\t\t<monogr>\n\t\t\t\t\t\t<imprint>\n\t\t\t\t\t\t\t<date/>\n\t\t\t\t\t\t</imprint>\n\t\t\t\t\t</monogr>\n\t\t\t\t</biblStruct>\n\t\t\t</sourceDesc>\n\t\t</fileDesc>\n\t\t<profileDesc>\n\t\t\t<textClass>\n\t\t\t\t<keywords>\n\t\t\t\t\t<term>D2
 Software Engineering, D211 Domain Specific 
Architectures</term>\n\t\t\t\t\t<term>Keywords</term>\n\t\t\t\t\t<term>OODT, 
Data Management, Software 
Architecture</term>\n\t\t\t\t</keywords>\n\t\t\t</textClass>\n\t\t\t<abstract>\n\t\t\t\t<p>Modern
 scientific research is increasingly conducted by virtual communities of 
scientists distributed around the world. The data volumes created by these 
communities are extremely large, and growing rapidly. The management of the 
resulting highly distributed, virtual data systems is a complex task, 
characterized by a number of formidable technical challenges, many of which are 
of a software engineering nature. In this paper we describe our experience over 
the past seven years in constructing and deploying OODT, a software framework 
that supports large, distributed, virtual scientific communities. We outline 
the key software engineering challenges that we faced, and addressed, along the 
way. We argue that a major contributor to the success of OODT was its explicit 
focus on software architecture. We describe several large-scale, real-world 
deployments of OODT, and the manner in which OODT helped us to address the 
domain-specific challenges induced by each 
deployment.</p>\n\t\t\t</abstract>\n\t\t</profileDesc>\n\t</teiHeader>\n\t<text 
xml:lang=\"en\">\n\t</text>\n</TEI>\n",
-         "grobid:header_BeginPage": "-1",
-         "grobid:header_Class": "class org.grobid.core.data.BiblioItem",
-         "grobid:header_Email": 
"{dan.crichton,mattmann,steve.hughes}@jpl.nasa.gov ; {mattmann,neno}@usc.edu",
-         "grobid:header_EndPage": "-1",
-         "grobid:header_Error": "true",
-         "grobid:header_FirstAuthorSurname": "Mattmann",
-         "grobid:header_FullAffiliations": "[Affiliation{name='null', 
url='null', institutions=[California Institute of Technology], 
departments=null, laboratories=[Jet Propulsion Laboratory], country='USA', 
postCode='91109', postBox='null', region='CA', settlement='Pasadena', 
addrLine='null', marker='1', addressString='null', affiliationString='null', 
failAffiliation=false}, Affiliation{name='null', url='null', 
institutions=[University of Southern California], departments=[Computer Science 
Department], laboratories=null, country='USA', postCode='90089', 
postBox='null', region='CA', settlement='Los Angeles', addrLine='null', 
marker='2', addressString='null', affiliationString='null', 
failAffiliation=false}]",
-         "grobid:header_FullAuthors": "[Chris A Mattmann, Daniel J Crichton, 
Nenad Medvidovic, Steve Hughes]",
-         "grobid:header_Item": "-1",
-         "grobid:header_Keyword": "Categories and Subject Descriptors D2 
Software Engineering, D211 Domain Specific Architectures Keywords OODT, Data 
Management, Software Architecture",
-         "grobid:header_Keywords": "[D2 Software Engineering, D211 Domain 
Specific Architectures  (type:subject-headers), Keywords  
(type:subject-headers), OODT, Data Management, Software Architecture  
(type:subject-headers)]",
-         "grobid:header_Language": "en",
-         "grobid:header_NbPages": "-1",
-         "grobid:header_OriginalAuthors": "Chris A. Mattmann 1, 2 Daniel J. 
Crichton 1 Nenad Medvidovic 2 Steve Hughes 1",
          "grobid:header_Title": "A Software Architecture-Based Framework for 
Highly Distributed and Data Intensive Scientific Applications",
          "meta:author": "End User Computing Services",
          "meta:creation-date": "2006-02-15T21:13:58Z",

Reply via email to