Xikui Wang has uploaded a new change for review. https://asterix-gerrit.ics.uci.edu/1269
Change subject: Introduce XML Adaptor & Parser ...................................................................... Introduce XML Adaptor & Parser 1. Add file system XML adaptor. 2. Add xml parser based on XML to JSON and ADMParser. Change-Id: Ia36101a0761973a9edb96b42d3dcc117661301da --- A asterixdb/asterix-app/data/xml/ER.xml A asterixdb/asterix-app/data/xml/HSA.xml A asterixdb/asterix-app/data/xml/STA.xml A asterixdb/asterix-app/data/xml/small_ER.xml M asterixdb/asterix-app/src/test/resources/runtimets/only.xml A asterixdb/asterix-app/src/test/resources/runtimets/queries/feeds/xml-adaptor/xml-adaptor.1.ddl.aql A asterixdb/asterix-app/src/test/resources/runtimets/queries/feeds/xml-adaptor/xml-adaptor.2.update.aql A asterixdb/asterix-app/src/test/resources/runtimets/queries/feeds/xml-adaptor/xml-adaptor.3.sleep.aql A asterixdb/asterix-app/src/test/resources/runtimets/queries/feeds/xml-adaptor/xml-adaptor.4.update.aql A asterixdb/asterix-app/src/test/resources/runtimets/queries/feeds/xml-adaptor/xml-adaptor.5.query.aql A asterixdb/asterix-app/src/test/resources/runtimets/queries/feeds/xml-adaptor/xml-adaptor.6.ddl.aql A asterixdb/asterix-app/src/test/resources/runtimets/results/feeds/xml-adaptor/xml-adaptor.1.adm M asterixdb/asterix-app/src/test/resources/runtimets/testsuite.xml M asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/CharArrayRecord.java A asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/stream/XMLFileRecordReader.java A asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/parser/XMLFileParser.java A asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/parser/factory/XMLFileParserFactory.java M asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/provider/ParserFactoryProvider.java M asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/provider/StreamRecordReaderProvider.java M asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/ExternalDataConstants.java M asterixdb/asterix-metadata/src/main/java/org/apache/asterix/metadata/feeds/FeedMetadataUtil.java 21 files changed, 482 insertions(+), 3 deletions(-) git pull ssh://asterix-gerrit.ics.uci.edu:29418/asterixdb refs/changes/69/1269/1 diff --git a/asterixdb/asterix-app/data/xml/ER.xml b/asterixdb/asterix-app/data/xml/ER.xml new file mode 100644 index 0000000..e6db4e8 --- /dev/null +++ b/asterixdb/asterix-app/data/xml/ER.xml @@ -0,0 +1,45 @@ +<?xml version = "1.0" encoding = "UTF-8"?> +<alert xmlns = "urn:oasis:names:tc:emergency:cap:1.2"> + <identifier>TRI13970876.2</identifier> + <sender>[email protected]</sender> + <sent>2003-06-11T20:56:00-07:00</sent> + <status>Actual</status> + <msgType>Update</msgType> + <scope>Public</scope> + <references>[email protected],TRI13970876.1,2003-06-11T20:30:00-07:00</references> + <info> + <category>Geo</category> + <event>Earthquake</event> + <urgency>Past</urgency> + <severity>Minor</severity> + <certainty>Observed</certainty> + <senderName>Southern California Seismic Network (TriNet) operated by Caltech and USGS</senderName> + <headline>EQ 3.4 Imperial County CA</headline> + <description>A minor earthquake measuring 3.4 on the Richter scale occurred near Brawley, California at 8:30 PM Pacific Daylight Time on Wednesday, June 11, 2003. (This event has now been reviewed by a seismologist)</description> + <web>http://www.trinet.org/scsn/scsn.html</web> + <parameter> + <valueName>EventID</valueName> + <value>13970876</value> + </parameter> + <parameter> + <valueName>Version</valueName> + <value>1</value> + </parameter> + <parameter> + <valueName>Magnitude</valueName> + <value>3.4 Ml</value> + </parameter> + <parameter> + <valueName>Depth</valueName> + <value>11.8 mi.</value> + </parameter> + <parameter> + <valueName>Quality</valueName> + <value>Excellent</value> + </parameter> + <area> + <areaDesc>1 mi. WSW of Brawley, CA; 11 mi. N of El Centro, CA; 30 mi. E of OCOTILLO (quarry); 1 mi. N of the Imperial Fault</areaDesc> + <circle>32.9525,-115.5527 0</circle> + </area> + </info> +</alert> \ No newline at end of file diff --git a/asterixdb/asterix-app/data/xml/HSA.xml b/asterixdb/asterix-app/data/xml/HSA.xml new file mode 100644 index 0000000..8043ea6 --- /dev/null +++ b/asterixdb/asterix-app/data/xml/HSA.xml @@ -0,0 +1,33 @@ +<?xml version = "1.0" encoding = "UTF-8"?> +<alert xmlns = "urn:oasis:names:tc:emergency:cap:1.2"> + <identifier>43b080713727</identifier> + <sender>[email protected]</sender> + <sent>2003-04-02T14:39:01-05:00</sent> + <status>Actual</status> + <msgType>Alert</msgType> + <scope>Public</scope> + <info> + <category>Security</category> + <event>Homeland Security Advisory System Update</event> + <urgency>Immediate</urgency> + <severity>Severe</severity> + <certainty>Likely</certainty> + <senderName>U.S. Government, Department of Homeland Security</senderName> + <headline>Homeland Security Sets Code ORANGE</headline> + <description>The Department of Homeland Security has elevated the Homeland Security Advisory System threat level to ORANGE / High in response to intelligence which may indicate a heightened threat of terrorism.</description> + <instruction> A High Condition is declared when there is a high risk of terrorist attacks. In addition to the Protective Measures taken in the previous Threat Conditions, Federal departments and agencies should consider agency-specific Protective Measures in accordance with their existing plans.</instruction> + <web>http://www.dhs.gov/dhspublic/display?theme=29</web> + <parameter> + <valueName>HSAS</valueName> + <value>ORANGE</value> + </parameter> + <resource> + <resourceDesc>Image file (GIF)</resourceDesc> + <mimeType>image/gif</mimeType> + <uri>http://www.dhs.gov/dhspublic/getAdvisoryImage</uri> + </resource> + <area> + <areaDesc>U.S. nationwide and interests worldwide</areaDesc> + </area> + </info> +</alert> \ No newline at end of file diff --git a/asterixdb/asterix-app/data/xml/STA.xml b/asterixdb/asterix-app/data/xml/STA.xml new file mode 100644 index 0000000..fe96743 --- /dev/null +++ b/asterixdb/asterix-app/data/xml/STA.xml @@ -0,0 +1,43 @@ +<?xml version = "1.0" encoding = "UTF-8"?> +<alert xmlns = "urn:oasis:names:tc:emergency:cap:1.2"> + <identifier>KSTO1055887203</identifier> + <sender>[email protected]</sender> + <sent>2003-06-17T14:57:00-07:00</sent> + <status>Actual</status> + <msgType>Alert</msgType> + <scope>Public</scope> + <info> + <category>Met</category> + <event>SEVERE THUNDERSTORM</event> + <responseType>Shelter</responseType> + <urgency>Immediate</urgency> + <severity>Severe</severity> + <certainty>Observed</certainty> + <eventCode> + <valueName>SAME</valueName> + <value>SVR</value> + </eventCode> + <expires>2003-06-17T16:00:00-07:00</expires> + <senderName>NATIONAL WEATHER SERVICE SACRAMENTO CA</senderName> + <headline>SEVERE THUNDERSTORM WARNING</headline> + <description> AT 254 PM PDT...NATIONAL WEATHER SERVICE DOPPLER RADAR INDICATED A SEVERE THUNDERSTORM OVER SOUTH CENTRAL ALPINE COUNTY...OR ABOUT 18 MILES SOUTHEAST OF KIRKWOOD...MOVING SOUTHWEST AT 5 MPH. HAIL...INTENSE RAIN AND STRONG DAMAGING WINDS ARE LIKELY WITH THIS STORM.</description> + <instruction>TAKE COVER IN A SUBSTANTIAL SHELTER UNTIL THE STORM PASSES.</instruction> + <contact>BARUFFALDI/JUSKIE</contact> + <area> + <areaDesc>EXTREME NORTH CENTRAL TUOLUMNE COUNTY IN CALIFORNIA, EXTREME NORTHEASTERN CALAVERAS COUNTY IN CALIFORNIA, SOUTHWESTERN ALPINE COUNTY IN CALIFORNIA</areaDesc> + <polygon>38.47,-120.14 38.34,-119.95 38.52,-119.74 38.62,-119.89 38.47,-120.14</polygon> + <geocode> + <valueName>SAME</valueName> + <value>006109</value> + </geocode> + <geocode> + <valueName>SAME</valueName> + <value>006009</value> + </geocode> + <geocode> + <valueName>SAME</valueName> + <value>006003</value> + </geocode> + </area> + </info> +</alert> \ No newline at end of file diff --git a/asterixdb/asterix-app/data/xml/small_ER.xml b/asterixdb/asterix-app/data/xml/small_ER.xml new file mode 100644 index 0000000..fd25fd5 --- /dev/null +++ b/asterixdb/asterix-app/data/xml/small_ER.xml @@ -0,0 +1,19 @@ +<?xml version = "1.0" encoding = "UTF-8"?> +<alert xmlns = "urn:oasis:names:tc:emergency:cap:1.2"> + <identifier>TRI13970876.3</identifier> + <info> + <category>Geo</category> + <parameter> + <valueName>EventID</valueName> + <value>13970876</value> + </parameter> + <parameter> + <valueName>Magnitude</valueName> + <value>3.4 Ml</value> + </parameter> + <area> + <areaDesc>1 mi. WSW of Brawley, CA; 11 mi. N of El Centro, CA; 30 mi. E of OCOTILLO (quarry); 1 mi. N of the Imperial Fault</areaDesc> + <circle>32.9525,-115.5527 0</circle> + </area> + </info> +</alert> \ No newline at end of file diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/only.xml b/asterixdb/asterix-app/src/test/resources/runtimets/only.xml index 12b0d83..26406cf 100644 --- a/asterixdb/asterix-app/src/test/resources/runtimets/only.xml +++ b/asterixdb/asterix-app/src/test/resources/runtimets/only.xml @@ -19,5 +19,10 @@ !--> <test-suite xmlns="urn:xml.testframework.asterix.apache.org" ResultOffsetPath="results" QueryOffsetPath="queries"> <test-group name="failed"> + <test-case FilePath="feeds"> + <compilation-unit name="xml-adaptor"> + <output-dir compare="Text">xml-adaptor</output-dir> + </compilation-unit> + </test-case> </test-group> </test-suite> diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries/feeds/xml-adaptor/xml-adaptor.1.ddl.aql b/asterixdb/asterix-app/src/test/resources/runtimets/queries/feeds/xml-adaptor/xml-adaptor.1.ddl.aql new file mode 100644 index 0000000..7b58373 --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries/feeds/xml-adaptor/xml-adaptor.1.ddl.aql @@ -0,0 +1,32 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +drop dataverse xml_feeds if exists; +create dataverse xml_feeds; +use dataverse xml_feeds; + +create type XML_Alert as open{ + identifier: string +} + +create dataset XML_Alerts(XML_Alert) +primary key identifier; + +create feed XML_Alert_Feeds using localfs +(("path"="asterix_nc1://data/xml/"),("format"="xml"),("type-name"="XML_Alert")); diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries/feeds/xml-adaptor/xml-adaptor.2.update.aql b/asterixdb/asterix-app/src/test/resources/runtimets/queries/feeds/xml-adaptor/xml-adaptor.2.update.aql new file mode 100644 index 0000000..c5d91d2 --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries/feeds/xml-adaptor/xml-adaptor.2.update.aql @@ -0,0 +1,20 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +use dataverse xml_feeds; +connect feed XML_Alert_Feeds to dataset XML_Alerts; \ No newline at end of file diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries/feeds/xml-adaptor/xml-adaptor.3.sleep.aql b/asterixdb/asterix-app/src/test/resources/runtimets/queries/feeds/xml-adaptor/xml-adaptor.3.sleep.aql new file mode 100644 index 0000000..5af9639 --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries/feeds/xml-adaptor/xml-adaptor.3.sleep.aql @@ -0,0 +1,19 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +1000 \ No newline at end of file diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries/feeds/xml-adaptor/xml-adaptor.4.update.aql b/asterixdb/asterix-app/src/test/resources/runtimets/queries/feeds/xml-adaptor/xml-adaptor.4.update.aql new file mode 100644 index 0000000..c69b966 --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries/feeds/xml-adaptor/xml-adaptor.4.update.aql @@ -0,0 +1,20 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +use dataverse xml_feeds; +disconnect feed XML_Alert_Feeds from dataset XML_Alerts; \ No newline at end of file diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries/feeds/xml-adaptor/xml-adaptor.5.query.aql b/asterixdb/asterix-app/src/test/resources/runtimets/queries/feeds/xml-adaptor/xml-adaptor.5.query.aql new file mode 100644 index 0000000..4dee200 --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries/feeds/xml-adaptor/xml-adaptor.5.query.aql @@ -0,0 +1,20 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +use dataverse xml_feeds; +count(for $i in dataset XML_Alerts return $i); \ No newline at end of file diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries/feeds/xml-adaptor/xml-adaptor.6.ddl.aql b/asterixdb/asterix-app/src/test/resources/runtimets/queries/feeds/xml-adaptor/xml-adaptor.6.ddl.aql new file mode 100644 index 0000000..25b2094 --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries/feeds/xml-adaptor/xml-adaptor.6.ddl.aql @@ -0,0 +1,20 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +use dataverse xml_feeds; +drop dataverse xml_feeds if exists; \ No newline at end of file diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/feeds/xml-adaptor/xml-adaptor.1.adm b/asterixdb/asterix-app/src/test/resources/runtimets/results/feeds/xml-adaptor/xml-adaptor.1.adm new file mode 100644 index 0000000..00750ed --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/feeds/xml-adaptor/xml-adaptor.1.adm @@ -0,0 +1 @@ +3 diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/testsuite.xml b/asterixdb/asterix-app/src/test/resources/runtimets/testsuite.xml index e90b2e7..3a5a891 100644 --- a/asterixdb/asterix-app/src/test/resources/runtimets/testsuite.xml +++ b/asterixdb/asterix-app/src/test/resources/runtimets/testsuite.xml @@ -266,6 +266,11 @@ <output-dir compare="Text">issue_230_feeds</output-dir> </compilation-unit> </test-case> + <test-case FilePath="feeds"> + <compilation-unit name="xml-adaptor"> + <output-dir compare="Text">xml-adaptor</output-dir> + </compilation-unit> + </test-case> </test-group> <test-group name="upsert"> <test-case FilePath="upsert"> diff --git a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/CharArrayRecord.java b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/CharArrayRecord.java index 33f9673..1a2aa4c 100644 --- a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/CharArrayRecord.java +++ b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/CharArrayRecord.java @@ -88,7 +88,7 @@ @Override public String toString() { - return String.valueOf(value, 0, size == 0 ? 0 : size - 1); + return String.valueOf(value, 0, size); } public void endRecord() throws IOException { diff --git a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/stream/XMLFileRecordReader.java b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/stream/XMLFileRecordReader.java new file mode 100644 index 0000000..08eb410 --- /dev/null +++ b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/stream/XMLFileRecordReader.java @@ -0,0 +1,55 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.asterix.external.input.record.reader.stream; + +import org.apache.asterix.external.api.AsterixInputStream; +import org.apache.asterix.external.util.ExternalDataConstants; + +import java.io.IOException; + +public class XMLFileRecordReader extends StreamRecordReader { + + protected boolean newRecordFormed; + protected boolean prevCharLF = false; + + public XMLFileRecordReader(AsterixInputStream inputStream) { + super(inputStream); + } + + @Override public boolean hasNext() throws IOException { + newRecordFormed = false; + record.reset(); + prevCharLF = false; + while (!newRecordFormed) { + if (done) + return false; + + bufferLength = reader.read(inputBuffer); + + if((bufferLength == 1 && inputBuffer[0]==ExternalDataConstants.BYTE_LF) || bufferLength == -1){ + newRecordFormed = true; +// record.endRecord(); + } else{ + record.append(inputBuffer, 0, bufferLength); + } + } + return newRecordFormed; + } +} diff --git a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/parser/XMLFileParser.java b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/parser/XMLFileParser.java new file mode 100644 index 0000000..6e4b9af --- /dev/null +++ b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/parser/XMLFileParser.java @@ -0,0 +1,57 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.asterix.external.parser; + +import org.apache.asterix.external.api.IRawRecord; +import org.apache.asterix.external.api.IRecordDataParser; +import org.apache.asterix.external.input.record.CharArrayRecord; +import org.apache.asterix.om.types.ARecordType; +import org.json.JSONException; +import org.json.JSONObject; +import org.json.XML; + +import java.io.DataOutput; +import java.io.IOException; + +public class XMLFileParser extends AbstractDataParser implements IRecordDataParser<char[]> { + + ARecordType recordType; + ADMDataParser admDataParser; + CharArrayRecord charArrayRecord; + + public XMLFileParser(ARecordType recordType, ADMDataParser admDataParser){ + this.recordType = recordType; + this.admDataParser = admDataParser; + charArrayRecord = new CharArrayRecord(); + } + + @Override + public void parse(IRawRecord<? extends char[]> record, DataOutput out) throws IOException { + try { + JSONObject xmlObj = XML.toJSONObject(record.toString()); + String jsonStr = xmlObj.getJSONObject("alert").toString(4); + charArrayRecord.set(jsonStr.toCharArray()); + charArrayRecord.endRecord(); + admDataParser.parse(charArrayRecord,out); + } catch (JSONException e) { + new IOException(e); + } + } +} diff --git a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/parser/factory/XMLFileParserFactory.java b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/parser/factory/XMLFileParserFactory.java new file mode 100644 index 0000000..206fff3 --- /dev/null +++ b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/parser/factory/XMLFileParserFactory.java @@ -0,0 +1,74 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.asterix.external.parser.factory; + +import org.apache.asterix.external.api.IExternalDataSourceFactory; +import org.apache.asterix.external.api.IRecordDataParser; +import org.apache.asterix.external.api.IRecordDataParserFactory; +import org.apache.asterix.external.api.IStreamDataParser; +import org.apache.asterix.external.parser.ADMDataParser; +import org.apache.asterix.external.parser.XMLFileParser; +import org.apache.asterix.external.util.ExternalDataConstants; +import org.apache.asterix.external.util.ExternalDataUtils; +import org.apache.asterix.om.types.ARecordType; +import org.apache.hyracks.api.context.IHyracksTaskContext; +import org.apache.hyracks.api.exceptions.HyracksDataException; +import org.xml.sax.SAXException; + +import javax.xml.parsers.ParserConfigurationException; +import java.io.IOException; +import java.util.HashMap; +import java.util.Map; + +/** + * Created by Xikui on 6/28/16. + */ +public class XMLFileParserFactory implements IRecordDataParserFactory<char[]> { + + private ARecordType recordType; + private String XML_Template; + private Map<String, String> configuration; + + @Override + public IRecordDataParser<char[]> createRecordParser(IHyracksTaskContext ctx) throws HyracksDataException { + return new XMLFileParser(recordType, new ADMDataParser(recordType, + ExternalDataUtils.getDataSourceType(configuration).equals(IExternalDataSourceFactory.DataSourceType.STREAM))); + } + + @Override + public void configure(Map<String, String> configuration) { + this.configuration = configuration; + } + + @Override + public void setRecordType(ARecordType recordType) { + this.recordType = recordType; + } + + @Override + public Class<?> getRecordClass() { + return char[].class; + } + + @Override + public void setMetaType(ARecordType metaType) { + + } +} diff --git a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/provider/ParserFactoryProvider.java b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/provider/ParserFactoryProvider.java index ebe3276..ea94f3c 100644 --- a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/provider/ParserFactoryProvider.java +++ b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/provider/ParserFactoryProvider.java @@ -29,6 +29,7 @@ import org.apache.asterix.external.parser.factory.RSSParserFactory; import org.apache.asterix.external.parser.factory.RecordWithMetadataParserFactory; import org.apache.asterix.external.parser.factory.TweetParserFactory; +import org.apache.asterix.external.parser.factory.XMLFileParserFactory; import org.apache.asterix.external.util.ExternalDataConstants; import org.apache.asterix.external.util.ExternalDataUtils; @@ -73,6 +74,8 @@ return new RSSParserFactory(); case ExternalDataConstants.FORMAT_RECORD_WITH_METADATA: return new RecordWithMetadataParserFactory(); + case ExternalDataConstants.FORMAT_XML: + return new XMLFileParserFactory(); default: try { return (IDataParserFactory) Class.forName(parser).newInstance(); diff --git a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/provider/StreamRecordReaderProvider.java b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/provider/StreamRecordReaderProvider.java index d11e97f..71e8b06 100644 --- a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/provider/StreamRecordReaderProvider.java +++ b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/provider/StreamRecordReaderProvider.java @@ -27,6 +27,7 @@ import org.apache.asterix.external.input.record.reader.stream.QuotedLineRecordReader; import org.apache.asterix.external.input.record.reader.stream.SemiStructuredRecordReader; import org.apache.asterix.external.input.record.reader.stream.StreamRecordReader; +import org.apache.asterix.external.input.record.reader.stream.XMLFileRecordReader; import org.apache.asterix.external.util.ExternalDataConstants; import org.apache.asterix.external.util.ExternalDataUtils; import org.apache.hyracks.api.exceptions.HyracksDataException; @@ -35,7 +36,8 @@ public enum Format { SEMISTRUCTURED, CSV, - LINE_SEPARATED + LINE_SEPARATED, + XML_FILE } public static Format getReaderFormat(Map<String, String> configuration) throws AsterixException { @@ -51,6 +53,8 @@ case ExternalDataConstants.FORMAT_DELIMITED_TEXT: case ExternalDataConstants.FORMAT_CSV: return Format.CSV; + case ExternalDataConstants.FORMAT_XML: + return Format.XML_FILE; } throw new AsterixException("Unknown format: " + format); } @@ -74,6 +78,8 @@ return new SemiStructuredRecordReader(inputStream, configuration.get(ExternalDataConstants.KEY_RECORD_START), configuration.get(ExternalDataConstants.KEY_RECORD_END)); + case XML_FILE: + return new XMLFileRecordReader(inputStream); default: throw new HyracksDataException("Unknown format: " + format); } diff --git a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/ExternalDataConstants.java b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/ExternalDataConstants.java index c5167c1..a9f805c 100644 --- a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/ExternalDataConstants.java +++ b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/ExternalDataConstants.java @@ -32,6 +32,8 @@ public static final String KEY_STREAM = "stream"; // used to specify the dataverse of the adapter public static final String KEY_DATAVERSE = "dataverse"; + // optional schema provided by datasource, used for data type inference + public static final String KEY_DATASOURCE_SCHEMA = "datasource-schema"; // used to specify the socket addresses when reading data from sockets public static final String KEY_SOCKETS = "sockets"; // specify whether the socket address points to an NC or an IP @@ -153,6 +155,7 @@ public static final String FORMAT_LINE_SEPARATED = "line-separated"; public static final String FORMAT_HDFS_WRITABLE = "hdfs-writable"; public static final String FORMAT_KV = "kv"; + public static final String FORMAT_XML = "xml"; /** * input streams diff --git a/asterixdb/asterix-metadata/src/main/java/org/apache/asterix/metadata/feeds/FeedMetadataUtil.java b/asterixdb/asterix-metadata/src/main/java/org/apache/asterix/metadata/feeds/FeedMetadataUtil.java index a6d5c48..3c3cb6b 100644 --- a/asterixdb/asterix-metadata/src/main/java/org/apache/asterix/metadata/feeds/FeedMetadataUtil.java +++ b/asterixdb/asterix-metadata/src/main/java/org/apache/asterix/metadata/feeds/FeedMetadataUtil.java @@ -323,7 +323,6 @@ ARecordType adapterOutputType = getOutputType(feed, configuration, ExternalDataConstants.KEY_TYPE_NAME); ARecordType metaType = getOutputType(feed, configuration, ExternalDataConstants.KEY_META_TYPE_NAME); ExternalDataUtils.prepareFeed(configuration, feed.getDataverseName(), feed.getFeedName()); - ExternalDataUtils.prepareFeed(configuration, feed.getDataverseName(), feed.getFeedName()); // Get adapter from metadata dataset <Metadata dataverse> DatasourceAdapter adapterEntity = MetadataManager.INSTANCE.getAdapter(mdTxnCtx, MetadataConstants.METADATA_DATAVERSE_NAME, adapterName); -- To view, visit https://asterix-gerrit.ics.uci.edu/1269 To unsubscribe, visit https://asterix-gerrit.ics.uci.edu/settings Gerrit-MessageType: newchange Gerrit-Change-Id: Ia36101a0761973a9edb96b42d3dcc117661301da Gerrit-PatchSet: 1 Gerrit-Project: asterixdb Gerrit-Branch: master Gerrit-Owner: Xikui Wang <[email protected]>
