Xikui Wang has uploaded a new change for review.
https://asterix-gerrit.ics.uci.edu/1416
Change subject: WIP - Load parser from classpath in ParserFactoryProvider
......................................................................
WIP - Load parser from classpath in ParserFactoryProvider
Change the behavior of ParserFactoryProvider to allow it loads parser
from classpath.
Change-Id: I2ac039fe3daaf0636cf004289bd0c8a3229197a9
---
M
asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/api/IDataParserFactory.java
M
asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/parser/factory/ADMDataParserFactory.java
M
asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/parser/factory/DelimitedDataParserFactory.java
M
asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/parser/factory/HiveDataParserFactory.java
M
asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/parser/factory/RSSParserFactory.java
M
asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/parser/factory/RecordWithMetadataParserFactory.java
M
asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/parser/factory/TweetParserFactory.java
M
asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/provider/ParserFactoryProvider.java
A
asterixdb/asterix-external-data/src/main/resources/META-INF/services/org.apache.asterix.external.api.IDataParserFactory
M
asterixdb/asterix-external-data/src/test/java/org/apache/asterix/external/library/ClassAdParserFactory.java
M
asterixdb/asterix-external-data/src/test/java/org/apache/asterix/external/parser/factory/TestRecordWithPKParserFactory.java
A
asterixdb/asterix-external-data/src/test/java/org/apache/asterix/external/parser/test/ParserFactoryProviderLoadParserTest.java
12 files changed, 175 insertions(+), 24 deletions(-)
git pull ssh://asterix-gerrit.ics.uci.edu:29418/asterixdb
refs/changes/16/1416/1
diff --git
a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/api/IDataParserFactory.java
b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/api/IDataParserFactory.java
index 1fc97c9..3dd3903 100644
---
a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/api/IDataParserFactory.java
+++
b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/api/IDataParserFactory.java
@@ -58,4 +58,6 @@
* @param metaType
*/
public void setMetaType(ARecordType metaType);
+
+ public String[] getFormats();
}
diff --git
a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/parser/factory/ADMDataParserFactory.java
b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/parser/factory/ADMDataParserFactory.java
index efc9574..b0a1db2 100644
---
a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/parser/factory/ADMDataParserFactory.java
+++
b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/parser/factory/ADMDataParserFactory.java
@@ -30,6 +30,7 @@
public class ADMDataParserFactory extends
AbstractRecordStreamParserFactory<char[]> {
private static final long serialVersionUID = 1L;
+ private static String[] formats = { "adm", "json", "semi-structured" };
@Override
public IRecordDataParser<char[]> createRecordParser(IHyracksTaskContext
ctx) {
@@ -56,4 +57,9 @@
public void setMetaType(ARecordType metaType) {
}
+ @Override
+ public String[] getFormats() {
+ return formats;
+ }
+
}
diff --git
a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/parser/factory/DelimitedDataParserFactory.java
b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/parser/factory/DelimitedDataParserFactory.java
index f724b48..a502457 100644
---
a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/parser/factory/DelimitedDataParserFactory.java
+++
b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/parser/factory/DelimitedDataParserFactory.java
@@ -34,6 +34,7 @@
public class DelimitedDataParserFactory extends
AbstractRecordStreamParserFactory<char[]> {
private static final long serialVersionUID = 1L;
+ private static String[] formats = { "csv", "delimited-text" };
@Override
public IRecordDataParser<char[]> createRecordParser(IHyracksTaskContext
ctx) throws HyracksDataException {
@@ -97,4 +98,9 @@
public void setMetaType(ARecordType metaType) {
}
+ @Override
+ public String[] getFormats() {
+ return formats;
+ }
+
}
diff --git
a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/parser/factory/HiveDataParserFactory.java
b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/parser/factory/HiveDataParserFactory.java
index a4c8679..796eb9d 100644
---
a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/parser/factory/HiveDataParserFactory.java
+++
b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/parser/factory/HiveDataParserFactory.java
@@ -34,6 +34,7 @@
public class HiveDataParserFactory implements
IRecordDataParserFactory<Writable> {
private static final long serialVersionUID = 1L;
+ private static String[] formats = { "hive", "hive-parser"};
private Map<String, String> configuration;
private ARecordType recordType;
private String hiveSerdeClassName;
@@ -71,4 +72,8 @@
public void setMetaType(ARecordType metaType) {
}
+ @Override public String[] getFormats() {
+ return formats;
+ }
+
}
diff --git
a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/parser/factory/RSSParserFactory.java
b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/parser/factory/RSSParserFactory.java
index 7465455..25308f2 100644
---
a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/parser/factory/RSSParserFactory.java
+++
b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/parser/factory/RSSParserFactory.java
@@ -31,6 +31,7 @@
public class RSSParserFactory implements
IRecordDataParserFactory<SyndEntryImpl> {
private static final long serialVersionUID = 1L;
+ private static String[] formats = { "rss" };
private ARecordType recordType;
@Override
@@ -58,4 +59,9 @@
public void setMetaType(ARecordType metaType) {
}
+ @Override
+ public String[] getFormats() {
+ return formats;
+ }
+
}
diff --git
a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/parser/factory/RecordWithMetadataParserFactory.java
b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/parser/factory/RecordWithMetadataParserFactory.java
index 96c592a..af9aab9 100644
---
a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/parser/factory/RecordWithMetadataParserFactory.java
+++
b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/parser/factory/RecordWithMetadataParserFactory.java
@@ -37,6 +37,7 @@
public class RecordWithMetadataParserFactory<I, O> implements
IRecordDataParserFactory<I> {
private static final long serialVersionUID = 1L;
+ private static String[] formats = { "record-with-metadata" };
private ARecordType metaType;
private ARecordType recordType;
private IRecordDataParserFactory<O> recordParserFactory;
@@ -82,6 +83,11 @@
}
@Override
+ public String[] getFormats() {
+ return formats;
+ }
+
+ @Override
public Class<?> getRecordClass() {
return converterFactory.getInputClass();
}
diff --git
a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/parser/factory/TweetParserFactory.java
b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/parser/factory/TweetParserFactory.java
index 3539f6e..771f56a 100644
---
a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/parser/factory/TweetParserFactory.java
+++
b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/parser/factory/TweetParserFactory.java
@@ -31,6 +31,7 @@
public class TweetParserFactory implements IRecordDataParserFactory<String> {
private static final long serialVersionUID = 1L;
+ private static String[] formats = { "twitter-status" };
private ARecordType recordType;
@Override
@@ -59,4 +60,9 @@
// do nothing
}
+ @Override
+ public String[] getFormats() {
+ return formats;
+ }
+
}
diff --git
a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/provider/ParserFactoryProvider.java
b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/provider/ParserFactoryProvider.java
index ebe3276..cbcbded 100644
---
a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/provider/ParserFactoryProvider.java
+++
b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/provider/ParserFactoryProvider.java
@@ -18,6 +18,13 @@
*/
package org.apache.asterix.external.provider;
+import java.io.IOException;
+import java.io.InputStream;
+import java.net.URL;
+import java.nio.charset.Charset;
+import java.util.Collections;
+import java.util.Enumeration;
+import java.util.HashMap;
import java.util.Map;
import org.apache.asterix.common.exceptions.AsterixException;
@@ -31,8 +38,15 @@
import org.apache.asterix.external.parser.factory.TweetParserFactory;
import org.apache.asterix.external.util.ExternalDataConstants;
import org.apache.asterix.external.util.ExternalDataUtils;
+import org.apache.commons.io.IOUtils;
+import org.codehaus.jettison.json.JSONArray;
+import org.codehaus.jettison.json.JSONException;
+import org.codehaus.jettison.json.JSONObject;
public class ParserFactoryProvider {
+
+ private static final String RESOURCE =
"META-INF/services/org.apache.asterix.external.api.IDataParserFactory";
+ private static Map<String, Class> factories = null;
private ParserFactoryProvider() {
}
@@ -54,32 +68,60 @@
return parserFactory;
}
+ protected static IDataParserFactory getInstance(Class clazz) throws
AsterixException {
+ try {
+ return (IDataParserFactory) clazz.newInstance();
+ } catch (IllegalAccessException | InstantiationException |
ClassCastException e) {
+ throw new AsterixException("Cannot create: " +
clazz.getSimpleName(), e);
+ }
+ }
+
@SuppressWarnings("rawtypes")
public static IDataParserFactory getDataParserFactory(String parser)
throws AsterixException {
- switch (parser) {
- case ExternalDataConstants.FORMAT_ADM:
- case ExternalDataConstants.FORMAT_JSON:
- case ExternalDataConstants.FORMAT_SEMISTRUCTURED:
- return new ADMDataParserFactory();
- case ExternalDataConstants.FORMAT_DELIMITED_TEXT:
- case ExternalDataConstants.FORMAT_CSV:
- return new DelimitedDataParserFactory();
- case ExternalDataConstants.FORMAT_HIVE:
- case ExternalDataConstants.PARSER_HIVE:
- return new HiveDataParserFactory();
- case ExternalDataConstants.FORMAT_TWEET:
- return new TweetParserFactory();
- case ExternalDataConstants.FORMAT_RSS:
- return new RSSParserFactory();
- case ExternalDataConstants.FORMAT_RECORD_WITH_METADATA:
- return new RecordWithMetadataParserFactory();
- default:
- try {
- return (IDataParserFactory)
Class.forName(parser).newInstance();
- } catch (IllegalAccessException | ClassNotFoundException |
InstantiationException
- | ClassCastException e) {
- throw new AsterixException("Unknown format: " + parser, e);
- }
+
+ if (factories == null) {
+ factories = initFactories();
}
+
+ if (factories.containsKey(parser)) {
+ return getInstance(factories.get(parser));
+ }
+
+ try {
+ // ideally, this should not happen
+ return (IDataParserFactory) Class.forName(parser).newInstance();
+ } catch (IllegalAccessException | ClassNotFoundException |
InstantiationException | ClassCastException e) {
+ throw new AsterixException("Unknown format: " + parser, e);
+ }
+ }
+
+ protected static Map<String, Class> initFactories() throws
AsterixException {
+ Map<String, Class> factories = new HashMap<>();
+ ClassLoader cl = ParserFactoryProvider.class.getClassLoader();
+ final Charset encoding = Charset.forName("UTF-8");
+ try {
+ Enumeration<URL> urls = cl.getResources(RESOURCE);
+ for (URL url : Collections.list(urls)) {
+ System.out.println(url);
+ InputStream is = url.openStream();
+ String config = IOUtils.toString(is, encoding);
+ is.close();
+ String[] classNames = config.split("\n");
+ for (String className : classNames) {
+ final Class<?> clazz = Class.forName(className);
+ String[] formats = ((IDataParserFactory)
clazz.newInstance()).getFormats();
+ for (String format : formats) {
+ if (factories.containsKey(format)) {
+ throw new AsterixException("Duplicate format " +
format);
+ }
+ factories.put(format, clazz);
+ }
+ }
+ }
+ } catch (IOException | ClassNotFoundException | InstantiationException
+ | IllegalAccessException e) {
+ throw new AsterixException(e);
+ }
+ return factories;
}
}
diff --git
a/asterixdb/asterix-external-data/src/main/resources/META-INF/services/org.apache.asterix.external.api.IDataParserFactory
b/asterixdb/asterix-external-data/src/main/resources/META-INF/services/org.apache.asterix.external.api.IDataParserFactory
new file mode 100644
index 0000000..840f619
--- /dev/null
+++
b/asterixdb/asterix-external-data/src/main/resources/META-INF/services/org.apache.asterix.external.api.IDataParserFactory
@@ -0,0 +1,6 @@
+org.apache.asterix.external.parser.factory.ADMDataParserFactory
+org.apache.asterix.external.parser.factory.DelimitedDataParserFactory
+org.apache.asterix.external.parser.factory.HiveDataParserFactory
+org.apache.asterix.external.parser.factory.RecordWithMetadataParserFactory
+org.apache.asterix.external.parser.factory.RSSParserFactory
+org.apache.asterix.external.parser.factory.TweetParserFactory
\ No newline at end of file
diff --git
a/asterixdb/asterix-external-data/src/test/java/org/apache/asterix/external/library/ClassAdParserFactory.java
b/asterixdb/asterix-external-data/src/test/java/org/apache/asterix/external/library/ClassAdParserFactory.java
index 17b83c2..d8cc3bb 100644
---
a/asterixdb/asterix-external-data/src/test/java/org/apache/asterix/external/library/ClassAdParserFactory.java
+++
b/asterixdb/asterix-external-data/src/test/java/org/apache/asterix/external/library/ClassAdParserFactory.java
@@ -34,6 +34,7 @@
public class ClassAdParserFactory implements IRecordDataParserFactory<char[]> {
private static final long serialVersionUID = 1L;
+ private static final String[] formats = { "line-separated" };
public static final String KEY_OLD_FORMAT = "old-format";
public static final String KEY_EVALUATE = "evaluate";
public static final String KEY_KEEP_EXPR = "keep-expr";
@@ -121,4 +122,9 @@
public void setMetaType(ARecordType metaType) {
}
+ @Override
+ public String[] getFormats() {
+ return formats;
+ }
+
}
diff --git
a/asterixdb/asterix-external-data/src/test/java/org/apache/asterix/external/parser/factory/TestRecordWithPKParserFactory.java
b/asterixdb/asterix-external-data/src/test/java/org/apache/asterix/external/parser/factory/TestRecordWithPKParserFactory.java
index 5b23094..91919d1 100644
---
a/asterixdb/asterix-external-data/src/test/java/org/apache/asterix/external/parser/factory/TestRecordWithPKParserFactory.java
+++
b/asterixdb/asterix-external-data/src/test/java/org/apache/asterix/external/parser/factory/TestRecordWithPKParserFactory.java
@@ -18,6 +18,7 @@
*/
package org.apache.asterix.external.parser.factory;
+import java.util.ArrayList;
import java.util.Map;
import java.util.TreeMap;
@@ -37,6 +38,7 @@
public class TestRecordWithPKParserFactory<T> implements
IRecordDataParserFactory<RecordWithPK<T>> {
private static final long serialVersionUID = 1L;
+ private static final ArrayList<String> formats = new ArrayList<>();
private ARecordType recordType;
private IRecordDataParserFactory<char[]> recordParserFactory;
private String format;
@@ -49,6 +51,7 @@
public void configure(Map<String, String> configuration) throws
AsterixException {
TreeMap<String, String> parserConf = new TreeMap<String, String>();
format = configuration.get(ExternalDataConstants.KEY_RECORD_FORMAT);
+ formats.add(format);
parserConf.put(ExternalDataConstants.KEY_FORMAT, format);
recordParserFactory =
(IRecordDataParserFactory<char[]>)
ParserFactoryProvider.getDataParserFactory(null, parserConf);
@@ -75,4 +78,10 @@
@Override
public void setMetaType(ARecordType metaType) {
}
+
+ @Override
+ public String[] getFormats() {
+ return (String[]) formats.toArray();
+ }
+
}
diff --git
a/asterixdb/asterix-external-data/src/test/java/org/apache/asterix/external/parser/test/ParserFactoryProviderLoadParserTest.java
b/asterixdb/asterix-external-data/src/test/java/org/apache/asterix/external/parser/test/ParserFactoryProviderLoadParserTest.java
new file mode 100644
index 0000000..effb7cd
--- /dev/null
+++
b/asterixdb/asterix-external-data/src/test/java/org/apache/asterix/external/parser/test/ParserFactoryProviderLoadParserTest.java
@@ -0,0 +1,51 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.asterix.external.parser.test;
+
+import org.apache.asterix.common.exceptions.AsterixException;
+import org.apache.asterix.external.api.IDataParserFactory;
+import org.apache.asterix.external.parser.factory.ADMDataParserFactory;
+import org.apache.asterix.external.parser.factory.DelimitedDataParserFactory;
+import org.apache.asterix.external.parser.factory.HiveDataParserFactory;
+import org.apache.asterix.external.parser.factory.RSSParserFactory;
+import org.apache.asterix.external.parser.factory.TweetParserFactory;
+import org.apache.asterix.external.provider.ParserFactoryProvider;
+import org.junit.Assert;
+import org.junit.Test;
+
+
+public class ParserFactoryProviderLoadParserTest {
+
+ IDataParserFactory factory;
+ @Test
+ public void test() throws AsterixException {
+ boolean result = true;
+ factory = ParserFactoryProvider.getDataParserFactory("csv");
+ result = result && factory instanceof DelimitedDataParserFactory;
+ factory = ParserFactoryProvider.getDataParserFactory("adm");
+ result = result && factory instanceof ADMDataParserFactory;
+ factory = ParserFactoryProvider.getDataParserFactory("rss");
+ result = result && factory instanceof RSSParserFactory;
+ factory = ParserFactoryProvider.getDataParserFactory("hive");
+ result = result && factory instanceof HiveDataParserFactory;
+ factory = ParserFactoryProvider.getDataParserFactory("twitter-status");
+ result = result && factory instanceof TweetParserFactory;
+ Assert.assertTrue(result);
+ }
+}
--
To view, visit https://asterix-gerrit.ics.uci.edu/1416
To unsubscribe, visit https://asterix-gerrit.ics.uci.edu/settings
Gerrit-MessageType: newchange
Gerrit-Change-Id: I2ac039fe3daaf0636cf004289bd0c8a3229197a9
Gerrit-PatchSet: 1
Gerrit-Project: asterixdb
Gerrit-Branch: master
Gerrit-Owner: Xikui Wang <[email protected]>