Xikui Wang has uploaded a new change for review.

  https://asterix-gerrit.ics.uci.edu/1269

Change subject: Introduce XML Adaptor & Parser
......................................................................

Introduce XML Adaptor & Parser

1. Add file system XML adaptor.
2. Add xml parser based on XML to JSON and ADMParser.

Change-Id: Ia36101a0761973a9edb96b42d3dcc117661301da
---
A asterixdb/asterix-app/data/xml/ER.xml
A asterixdb/asterix-app/data/xml/HSA.xml
A asterixdb/asterix-app/data/xml/STA.xml
A asterixdb/asterix-app/data/xml/small_ER.xml
M asterixdb/asterix-app/src/test/resources/runtimets/only.xml
A 
asterixdb/asterix-app/src/test/resources/runtimets/queries/feeds/xml-adaptor/xml-adaptor.1.ddl.aql
A 
asterixdb/asterix-app/src/test/resources/runtimets/queries/feeds/xml-adaptor/xml-adaptor.2.update.aql
A 
asterixdb/asterix-app/src/test/resources/runtimets/queries/feeds/xml-adaptor/xml-adaptor.3.sleep.aql
A 
asterixdb/asterix-app/src/test/resources/runtimets/queries/feeds/xml-adaptor/xml-adaptor.4.update.aql
A 
asterixdb/asterix-app/src/test/resources/runtimets/queries/feeds/xml-adaptor/xml-adaptor.5.query.aql
A 
asterixdb/asterix-app/src/test/resources/runtimets/queries/feeds/xml-adaptor/xml-adaptor.6.ddl.aql
A 
asterixdb/asterix-app/src/test/resources/runtimets/results/feeds/xml-adaptor/xml-adaptor.1.adm
M asterixdb/asterix-app/src/test/resources/runtimets/testsuite.xml
M 
asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/CharArrayRecord.java
A 
asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/stream/XMLFileRecordReader.java
A 
asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/parser/XMLFileParser.java
A 
asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/parser/factory/XMLFileParserFactory.java
M 
asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/provider/ParserFactoryProvider.java
M 
asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/provider/StreamRecordReaderProvider.java
M 
asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/ExternalDataConstants.java
M 
asterixdb/asterix-metadata/src/main/java/org/apache/asterix/metadata/feeds/FeedMetadataUtil.java
21 files changed, 482 insertions(+), 3 deletions(-)


  git pull ssh://asterix-gerrit.ics.uci.edu:29418/asterixdb 
refs/changes/69/1269/1

diff --git a/asterixdb/asterix-app/data/xml/ER.xml 
b/asterixdb/asterix-app/data/xml/ER.xml
new file mode 100644
index 0000000..e6db4e8
--- /dev/null
+++ b/asterixdb/asterix-app/data/xml/ER.xml
@@ -0,0 +1,45 @@
+<?xml version = "1.0" encoding = "UTF-8"?>
+<alert xmlns = "urn:oasis:names:tc:emergency:cap:1.2">
+    <identifier>TRI13970876.2</identifier>
+    <sender>[email protected]</sender>
+    <sent>2003-06-11T20:56:00-07:00</sent>
+    <status>Actual</status>
+    <msgType>Update</msgType>
+    <scope>Public</scope>
+    
<references>[email protected],TRI13970876.1,2003-06-11T20:30:00-07:00</references>
+    <info>
+        <category>Geo</category>
+        <event>Earthquake</event>
+        <urgency>Past</urgency>
+        <severity>Minor</severity>
+        <certainty>Observed</certainty>
+        <senderName>Southern California Seismic Network (TriNet) operated by 
Caltech and USGS</senderName>
+        <headline>EQ 3.4 Imperial County CA</headline>
+        <description>A minor earthquake measuring 3.4 on the Richter scale 
occurred near Brawley, California at 8:30 PM Pacific Daylight Time on 
Wednesday, June 11, 2003. (This event has now been reviewed by a 
seismologist)</description>
+        <web>http://www.trinet.org/scsn/scsn.html</web>
+        <parameter>
+            <valueName>EventID</valueName>
+            <value>13970876</value>
+        </parameter>
+        <parameter>
+            <valueName>Version</valueName>
+            <value>1</value>
+        </parameter>
+        <parameter>
+            <valueName>Magnitude</valueName>
+            <value>3.4 Ml</value>
+        </parameter>
+        <parameter>
+            <valueName>Depth</valueName>
+            <value>11.8 mi.</value>
+        </parameter>
+        <parameter>
+            <valueName>Quality</valueName>
+            <value>Excellent</value>
+        </parameter>
+        <area>
+            <areaDesc>1 mi. WSW of Brawley, CA; 11 mi. N of El Centro, CA; 30 
mi. E of OCOTILLO (quarry); 1 mi. N of the Imperial Fault</areaDesc>
+            <circle>32.9525,-115.5527 0</circle>
+        </area>
+    </info>
+</alert>
\ No newline at end of file
diff --git a/asterixdb/asterix-app/data/xml/HSA.xml 
b/asterixdb/asterix-app/data/xml/HSA.xml
new file mode 100644
index 0000000..8043ea6
--- /dev/null
+++ b/asterixdb/asterix-app/data/xml/HSA.xml
@@ -0,0 +1,33 @@
+<?xml version = "1.0" encoding = "UTF-8"?>
+<alert xmlns = "urn:oasis:names:tc:emergency:cap:1.2">
+    <identifier>43b080713727</identifier>
+    <sender>[email protected]</sender>
+    <sent>2003-04-02T14:39:01-05:00</sent>
+    <status>Actual</status>
+    <msgType>Alert</msgType>
+    <scope>Public</scope>
+    <info>
+        <category>Security</category>
+        <event>Homeland Security Advisory System Update</event>
+        <urgency>Immediate</urgency>
+        <severity>Severe</severity>
+        <certainty>Likely</certainty>
+        <senderName>U.S. Government, Department of Homeland 
Security</senderName>
+        <headline>Homeland Security Sets Code ORANGE</headline>
+        <description>The Department of Homeland Security has elevated the 
Homeland Security Advisory System threat level to ORANGE / High in response to 
intelligence which may indicate a heightened threat of terrorism.</description>
+        <instruction> A High Condition is declared when there is a high risk 
of terrorist attacks. In addition to the Protective Measures taken in the 
previous Threat Conditions, Federal departments and agencies should consider 
agency-specific Protective Measures in accordance with their existing 
plans.</instruction>
+        <web>http://www.dhs.gov/dhspublic/display?theme=29</web>
+        <parameter>
+            <valueName>HSAS</valueName>
+            <value>ORANGE</value>
+        </parameter>
+        <resource>
+            <resourceDesc>Image file (GIF)</resourceDesc>
+            <mimeType>image/gif</mimeType>
+            <uri>http://www.dhs.gov/dhspublic/getAdvisoryImage</uri>
+        </resource>
+        <area>
+            <areaDesc>U.S. nationwide and interests worldwide</areaDesc>
+        </area>
+    </info>
+</alert>
\ No newline at end of file
diff --git a/asterixdb/asterix-app/data/xml/STA.xml 
b/asterixdb/asterix-app/data/xml/STA.xml
new file mode 100644
index 0000000..fe96743
--- /dev/null
+++ b/asterixdb/asterix-app/data/xml/STA.xml
@@ -0,0 +1,43 @@
+<?xml version = "1.0" encoding = "UTF-8"?>
+<alert xmlns = "urn:oasis:names:tc:emergency:cap:1.2">
+    <identifier>KSTO1055887203</identifier>
+    <sender>[email protected]</sender>
+    <sent>2003-06-17T14:57:00-07:00</sent>
+    <status>Actual</status>
+    <msgType>Alert</msgType>
+    <scope>Public</scope>
+    <info>
+        <category>Met</category>
+        <event>SEVERE THUNDERSTORM</event>
+        <responseType>Shelter</responseType>
+        <urgency>Immediate</urgency>
+        <severity>Severe</severity>
+        <certainty>Observed</certainty>
+        <eventCode>
+            <valueName>SAME</valueName>
+            <value>SVR</value>
+        </eventCode>
+        <expires>2003-06-17T16:00:00-07:00</expires>
+        <senderName>NATIONAL WEATHER SERVICE SACRAMENTO CA</senderName>
+        <headline>SEVERE THUNDERSTORM WARNING</headline>
+        <description> AT 254 PM PDT...NATIONAL WEATHER SERVICE DOPPLER RADAR 
INDICATED A SEVERE THUNDERSTORM OVER SOUTH CENTRAL ALPINE COUNTY...OR ABOUT 18 
MILES SOUTHEAST OF KIRKWOOD...MOVING SOUTHWEST AT 5 MPH. HAIL...INTENSE RAIN 
AND STRONG DAMAGING WINDS ARE LIKELY WITH THIS STORM.</description>
+        <instruction>TAKE COVER IN A SUBSTANTIAL SHELTER UNTIL THE STORM 
PASSES.</instruction>
+        <contact>BARUFFALDI/JUSKIE</contact>
+        <area>
+            <areaDesc>EXTREME NORTH CENTRAL TUOLUMNE COUNTY IN CALIFORNIA, 
EXTREME NORTHEASTERN CALAVERAS COUNTY IN CALIFORNIA, SOUTHWESTERN ALPINE COUNTY 
IN CALIFORNIA</areaDesc>
+            <polygon>38.47,-120.14 38.34,-119.95 38.52,-119.74 38.62,-119.89 
38.47,-120.14</polygon>
+            <geocode>
+                <valueName>SAME</valueName>
+                <value>006109</value>
+            </geocode>
+            <geocode>
+                <valueName>SAME</valueName>
+                <value>006009</value>
+            </geocode>
+            <geocode>
+                <valueName>SAME</valueName>
+                <value>006003</value>
+            </geocode>
+        </area>
+    </info>
+</alert>
\ No newline at end of file
diff --git a/asterixdb/asterix-app/data/xml/small_ER.xml 
b/asterixdb/asterix-app/data/xml/small_ER.xml
new file mode 100644
index 0000000..fd25fd5
--- /dev/null
+++ b/asterixdb/asterix-app/data/xml/small_ER.xml
@@ -0,0 +1,19 @@
+<?xml version = "1.0" encoding = "UTF-8"?>
+<alert xmlns = "urn:oasis:names:tc:emergency:cap:1.2">
+    <identifier>TRI13970876.3</identifier>
+    <info>
+        <category>Geo</category>
+        <parameter>
+            <valueName>EventID</valueName>
+            <value>13970876</value>
+        </parameter>
+        <parameter>
+            <valueName>Magnitude</valueName>
+            <value>3.4 Ml</value>
+        </parameter>
+        <area>
+            <areaDesc>1 mi. WSW of Brawley, CA; 11 mi. N of El Centro, CA; 30 
mi. E of OCOTILLO (quarry); 1 mi. N of the Imperial Fault</areaDesc>
+            <circle>32.9525,-115.5527 0</circle>
+        </area>
+    </info>
+</alert>
\ No newline at end of file
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/only.xml 
b/asterixdb/asterix-app/src/test/resources/runtimets/only.xml
index 12b0d83..26406cf 100644
--- a/asterixdb/asterix-app/src/test/resources/runtimets/only.xml
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/only.xml
@@ -19,5 +19,10 @@
  !-->
 <test-suite xmlns="urn:xml.testframework.asterix.apache.org" 
ResultOffsetPath="results" QueryOffsetPath="queries">
   <test-group name="failed">
+    <test-case FilePath="feeds">
+      <compilation-unit name="xml-adaptor">
+        <output-dir compare="Text">xml-adaptor</output-dir>
+      </compilation-unit>
+    </test-case>
   </test-group>
 </test-suite>
diff --git 
a/asterixdb/asterix-app/src/test/resources/runtimets/queries/feeds/xml-adaptor/xml-adaptor.1.ddl.aql
 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries/feeds/xml-adaptor/xml-adaptor.1.ddl.aql
new file mode 100644
index 0000000..7b58373
--- /dev/null
+++ 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries/feeds/xml-adaptor/xml-adaptor.1.ddl.aql
@@ -0,0 +1,32 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+drop dataverse xml_feeds if exists;
+create dataverse xml_feeds;
+use dataverse xml_feeds;
+
+create type XML_Alert as open{
+ identifier: string
+}
+
+create dataset XML_Alerts(XML_Alert)
+primary key identifier;
+
+create feed XML_Alert_Feeds using localfs
+(("path"="asterix_nc1://data/xml/"),("format"="xml"),("type-name"="XML_Alert"));
diff --git 
a/asterixdb/asterix-app/src/test/resources/runtimets/queries/feeds/xml-adaptor/xml-adaptor.2.update.aql
 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries/feeds/xml-adaptor/xml-adaptor.2.update.aql
new file mode 100644
index 0000000..c5d91d2
--- /dev/null
+++ 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries/feeds/xml-adaptor/xml-adaptor.2.update.aql
@@ -0,0 +1,20 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+use dataverse xml_feeds;
+connect feed XML_Alert_Feeds to dataset XML_Alerts;
\ No newline at end of file
diff --git 
a/asterixdb/asterix-app/src/test/resources/runtimets/queries/feeds/xml-adaptor/xml-adaptor.3.sleep.aql
 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries/feeds/xml-adaptor/xml-adaptor.3.sleep.aql
new file mode 100644
index 0000000..5af9639
--- /dev/null
+++ 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries/feeds/xml-adaptor/xml-adaptor.3.sleep.aql
@@ -0,0 +1,19 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+1000
\ No newline at end of file
diff --git 
a/asterixdb/asterix-app/src/test/resources/runtimets/queries/feeds/xml-adaptor/xml-adaptor.4.update.aql
 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries/feeds/xml-adaptor/xml-adaptor.4.update.aql
new file mode 100644
index 0000000..c69b966
--- /dev/null
+++ 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries/feeds/xml-adaptor/xml-adaptor.4.update.aql
@@ -0,0 +1,20 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+use dataverse xml_feeds;
+disconnect feed XML_Alert_Feeds from dataset XML_Alerts;
\ No newline at end of file
diff --git 
a/asterixdb/asterix-app/src/test/resources/runtimets/queries/feeds/xml-adaptor/xml-adaptor.5.query.aql
 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries/feeds/xml-adaptor/xml-adaptor.5.query.aql
new file mode 100644
index 0000000..4dee200
--- /dev/null
+++ 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries/feeds/xml-adaptor/xml-adaptor.5.query.aql
@@ -0,0 +1,20 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+use dataverse xml_feeds;
+count(for $i in dataset XML_Alerts return $i);
\ No newline at end of file
diff --git 
a/asterixdb/asterix-app/src/test/resources/runtimets/queries/feeds/xml-adaptor/xml-adaptor.6.ddl.aql
 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries/feeds/xml-adaptor/xml-adaptor.6.ddl.aql
new file mode 100644
index 0000000..25b2094
--- /dev/null
+++ 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries/feeds/xml-adaptor/xml-adaptor.6.ddl.aql
@@ -0,0 +1,20 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+use dataverse xml_feeds;
+drop dataverse xml_feeds if exists;
\ No newline at end of file
diff --git 
a/asterixdb/asterix-app/src/test/resources/runtimets/results/feeds/xml-adaptor/xml-adaptor.1.adm
 
b/asterixdb/asterix-app/src/test/resources/runtimets/results/feeds/xml-adaptor/xml-adaptor.1.adm
new file mode 100644
index 0000000..00750ed
--- /dev/null
+++ 
b/asterixdb/asterix-app/src/test/resources/runtimets/results/feeds/xml-adaptor/xml-adaptor.1.adm
@@ -0,0 +1 @@
+3
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/testsuite.xml 
b/asterixdb/asterix-app/src/test/resources/runtimets/testsuite.xml
index e90b2e7..3a5a891 100644
--- a/asterixdb/asterix-app/src/test/resources/runtimets/testsuite.xml
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/testsuite.xml
@@ -266,6 +266,11 @@
         <output-dir compare="Text">issue_230_feeds</output-dir>
       </compilation-unit>
     </test-case>
+    <test-case FilePath="feeds">
+      <compilation-unit name="xml-adaptor">
+        <output-dir compare="Text">xml-adaptor</output-dir>
+      </compilation-unit>
+    </test-case>
   </test-group>
   <test-group name="upsert">
     <test-case FilePath="upsert">
diff --git 
a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/CharArrayRecord.java
 
b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/CharArrayRecord.java
index 33f9673..1a2aa4c 100644
--- 
a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/CharArrayRecord.java
+++ 
b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/CharArrayRecord.java
@@ -88,7 +88,7 @@
 
     @Override
     public String toString() {
-        return String.valueOf(value, 0, size == 0 ? 0 : size - 1);
+        return String.valueOf(value, 0, size);
     }
 
     public void endRecord() throws IOException {
diff --git 
a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/stream/XMLFileRecordReader.java
 
b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/stream/XMLFileRecordReader.java
new file mode 100644
index 0000000..08eb410
--- /dev/null
+++ 
b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/stream/XMLFileRecordReader.java
@@ -0,0 +1,55 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.asterix.external.input.record.reader.stream;
+
+import org.apache.asterix.external.api.AsterixInputStream;
+import org.apache.asterix.external.util.ExternalDataConstants;
+
+import java.io.IOException;
+
+public class XMLFileRecordReader extends StreamRecordReader {
+
+    protected boolean newRecordFormed;
+    protected boolean prevCharLF = false;
+
+    public XMLFileRecordReader(AsterixInputStream inputStream) {
+        super(inputStream);
+    }
+
+    @Override public boolean hasNext() throws IOException {
+        newRecordFormed = false;
+        record.reset();
+        prevCharLF = false;
+        while (!newRecordFormed) {
+            if (done)
+                return false;
+
+            bufferLength = reader.read(inputBuffer);
+
+            if((bufferLength == 1 && 
inputBuffer[0]==ExternalDataConstants.BYTE_LF) || bufferLength == -1){
+                newRecordFormed = true;
+//                record.endRecord();
+            } else{
+                record.append(inputBuffer, 0, bufferLength);
+            }
+        }
+        return newRecordFormed;
+    }
+}
diff --git 
a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/parser/XMLFileParser.java
 
b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/parser/XMLFileParser.java
new file mode 100644
index 0000000..6e4b9af
--- /dev/null
+++ 
b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/parser/XMLFileParser.java
@@ -0,0 +1,57 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.asterix.external.parser;
+
+import org.apache.asterix.external.api.IRawRecord;
+import org.apache.asterix.external.api.IRecordDataParser;
+import org.apache.asterix.external.input.record.CharArrayRecord;
+import org.apache.asterix.om.types.ARecordType;
+import org.json.JSONException;
+import org.json.JSONObject;
+import org.json.XML;
+
+import java.io.DataOutput;
+import java.io.IOException;
+
+public class XMLFileParser extends AbstractDataParser implements 
IRecordDataParser<char[]> {
+
+    ARecordType recordType;
+    ADMDataParser admDataParser;
+    CharArrayRecord charArrayRecord;
+
+    public XMLFileParser(ARecordType recordType, ADMDataParser admDataParser){
+        this.recordType = recordType;
+        this.admDataParser = admDataParser;
+        charArrayRecord = new CharArrayRecord();
+    }
+
+    @Override
+    public void parse(IRawRecord<? extends char[]> record, DataOutput out) 
throws IOException {
+        try {
+            JSONObject xmlObj = XML.toJSONObject(record.toString());
+            String jsonStr = xmlObj.getJSONObject("alert").toString(4);
+            charArrayRecord.set(jsonStr.toCharArray());
+            charArrayRecord.endRecord();
+            admDataParser.parse(charArrayRecord,out);
+        } catch (JSONException e) {
+            new IOException(e);
+        }
+    }
+}
diff --git 
a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/parser/factory/XMLFileParserFactory.java
 
b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/parser/factory/XMLFileParserFactory.java
new file mode 100644
index 0000000..206fff3
--- /dev/null
+++ 
b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/parser/factory/XMLFileParserFactory.java
@@ -0,0 +1,74 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.asterix.external.parser.factory;
+
+import org.apache.asterix.external.api.IExternalDataSourceFactory;
+import org.apache.asterix.external.api.IRecordDataParser;
+import org.apache.asterix.external.api.IRecordDataParserFactory;
+import org.apache.asterix.external.api.IStreamDataParser;
+import org.apache.asterix.external.parser.ADMDataParser;
+import org.apache.asterix.external.parser.XMLFileParser;
+import org.apache.asterix.external.util.ExternalDataConstants;
+import org.apache.asterix.external.util.ExternalDataUtils;
+import org.apache.asterix.om.types.ARecordType;
+import org.apache.hyracks.api.context.IHyracksTaskContext;
+import org.apache.hyracks.api.exceptions.HyracksDataException;
+import org.xml.sax.SAXException;
+
+import javax.xml.parsers.ParserConfigurationException;
+import java.io.IOException;
+import java.util.HashMap;
+import java.util.Map;
+
+/**
+ * Created by Xikui on 6/28/16.
+ */
+public class XMLFileParserFactory implements IRecordDataParserFactory<char[]> {
+
+    private ARecordType recordType;
+    private String XML_Template;
+    private Map<String, String> configuration;
+
+    @Override
+    public IRecordDataParser<char[]> createRecordParser(IHyracksTaskContext 
ctx) throws HyracksDataException {
+        return new XMLFileParser(recordType, new ADMDataParser(recordType,
+                
ExternalDataUtils.getDataSourceType(configuration).equals(IExternalDataSourceFactory.DataSourceType.STREAM)));
+    }
+
+    @Override
+    public void configure(Map<String, String> configuration) {
+        this.configuration = configuration;
+    }
+
+    @Override
+    public void setRecordType(ARecordType recordType) {
+        this.recordType = recordType;
+    }
+
+    @Override
+    public Class<?> getRecordClass() {
+        return char[].class;
+    }
+
+    @Override
+    public void setMetaType(ARecordType metaType) {
+
+    }
+}
diff --git 
a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/provider/ParserFactoryProvider.java
 
b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/provider/ParserFactoryProvider.java
index ebe3276..ea94f3c 100644
--- 
a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/provider/ParserFactoryProvider.java
+++ 
b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/provider/ParserFactoryProvider.java
@@ -29,6 +29,7 @@
 import org.apache.asterix.external.parser.factory.RSSParserFactory;
 import 
org.apache.asterix.external.parser.factory.RecordWithMetadataParserFactory;
 import org.apache.asterix.external.parser.factory.TweetParserFactory;
+import org.apache.asterix.external.parser.factory.XMLFileParserFactory;
 import org.apache.asterix.external.util.ExternalDataConstants;
 import org.apache.asterix.external.util.ExternalDataUtils;
 
@@ -73,6 +74,8 @@
                 return new RSSParserFactory();
             case ExternalDataConstants.FORMAT_RECORD_WITH_METADATA:
                 return new RecordWithMetadataParserFactory();
+            case ExternalDataConstants.FORMAT_XML:
+                return new XMLFileParserFactory();
             default:
                 try {
                     return (IDataParserFactory) 
Class.forName(parser).newInstance();
diff --git 
a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/provider/StreamRecordReaderProvider.java
 
b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/provider/StreamRecordReaderProvider.java
index d11e97f..71e8b06 100644
--- 
a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/provider/StreamRecordReaderProvider.java
+++ 
b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/provider/StreamRecordReaderProvider.java
@@ -27,6 +27,7 @@
 import 
org.apache.asterix.external.input.record.reader.stream.QuotedLineRecordReader;
 import 
org.apache.asterix.external.input.record.reader.stream.SemiStructuredRecordReader;
 import 
org.apache.asterix.external.input.record.reader.stream.StreamRecordReader;
+import 
org.apache.asterix.external.input.record.reader.stream.XMLFileRecordReader;
 import org.apache.asterix.external.util.ExternalDataConstants;
 import org.apache.asterix.external.util.ExternalDataUtils;
 import org.apache.hyracks.api.exceptions.HyracksDataException;
@@ -35,7 +36,8 @@
     public enum Format {
         SEMISTRUCTURED,
         CSV,
-        LINE_SEPARATED
+        LINE_SEPARATED,
+        XML_FILE
     }
 
     public static Format getReaderFormat(Map<String, String> configuration) 
throws AsterixException {
@@ -51,6 +53,8 @@
                 case ExternalDataConstants.FORMAT_DELIMITED_TEXT:
                 case ExternalDataConstants.FORMAT_CSV:
                     return Format.CSV;
+                case ExternalDataConstants.FORMAT_XML:
+                    return Format.XML_FILE;
             }
             throw new AsterixException("Unknown format: " + format);
         }
@@ -74,6 +78,8 @@
                 return new SemiStructuredRecordReader(inputStream,
                         
configuration.get(ExternalDataConstants.KEY_RECORD_START),
                         
configuration.get(ExternalDataConstants.KEY_RECORD_END));
+            case XML_FILE:
+                return new XMLFileRecordReader(inputStream);
             default:
                 throw new HyracksDataException("Unknown format: " + format);
         }
diff --git 
a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/ExternalDataConstants.java
 
b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/ExternalDataConstants.java
index c5167c1..a9f805c 100644
--- 
a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/ExternalDataConstants.java
+++ 
b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/ExternalDataConstants.java
@@ -32,6 +32,8 @@
     public static final String KEY_STREAM = "stream";
     // used to specify the dataverse of the adapter
     public static final String KEY_DATAVERSE = "dataverse";
+    // optional schema provided by datasource, used for data type inference
+    public static final String KEY_DATASOURCE_SCHEMA = "datasource-schema";
     // used to specify the socket addresses when reading data from sockets
     public static final String KEY_SOCKETS = "sockets";
     // specify whether the socket address points to an NC or an IP
@@ -153,6 +155,7 @@
     public static final String FORMAT_LINE_SEPARATED = "line-separated";
     public static final String FORMAT_HDFS_WRITABLE = "hdfs-writable";
     public static final String FORMAT_KV = "kv";
+    public static final String FORMAT_XML = "xml";
 
     /**
      * input streams
diff --git 
a/asterixdb/asterix-metadata/src/main/java/org/apache/asterix/metadata/feeds/FeedMetadataUtil.java
 
b/asterixdb/asterix-metadata/src/main/java/org/apache/asterix/metadata/feeds/FeedMetadataUtil.java
index a6d5c48..3c3cb6b 100644
--- 
a/asterixdb/asterix-metadata/src/main/java/org/apache/asterix/metadata/feeds/FeedMetadataUtil.java
+++ 
b/asterixdb/asterix-metadata/src/main/java/org/apache/asterix/metadata/feeds/FeedMetadataUtil.java
@@ -323,7 +323,6 @@
             ARecordType adapterOutputType = getOutputType(feed, configuration, 
ExternalDataConstants.KEY_TYPE_NAME);
             ARecordType metaType = getOutputType(feed, configuration, 
ExternalDataConstants.KEY_META_TYPE_NAME);
             ExternalDataUtils.prepareFeed(configuration, 
feed.getDataverseName(), feed.getFeedName());
-            ExternalDataUtils.prepareFeed(configuration, 
feed.getDataverseName(), feed.getFeedName());
             // Get adapter from metadata dataset <Metadata dataverse>
             DatasourceAdapter adapterEntity = 
MetadataManager.INSTANCE.getAdapter(mdTxnCtx,
                     MetadataConstants.METADATA_DATAVERSE_NAME, adapterName);

-- 
To view, visit https://asterix-gerrit.ics.uci.edu/1269
To unsubscribe, visit https://asterix-gerrit.ics.uci.edu/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: Ia36101a0761973a9edb96b42d3dcc117661301da
Gerrit-PatchSet: 1
Gerrit-Project: asterixdb
Gerrit-Branch: master
Gerrit-Owner: Xikui Wang <[email protected]>

Reply via email to