Xikui Wang has uploaded a new change for review.
https://asterix-gerrit.ics.uci.edu/1339
Change subject: Fix ASTERIXDB-1609 and OrderedList bug in TweetParser
......................................................................
Fix ASTERIXDB-1609 and OrderedList bug in TweetParser
1. For ASTERIXDB-1609, add UNION type check in writeField, and add one
more case for orderedList.
2. For OrderedList bug, change UnorderedListBuilder to
OrderedListBuilder.
Change-Id: Ia27148cb10206b93dabf7655aed68f3004f96dfd
---
M
asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/parser/TweetParser.java
1 file changed, 43 insertions(+), 23 deletions(-)
git pull ssh://asterix-gerrit.ics.uci.edu:29418/asterixdb
refs/changes/39/1339/1
diff --git
a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/parser/TweetParser.java
b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/parser/TweetParser.java
index 8d483dc..fc69d27 100644
---
a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/parser/TweetParser.java
+++
b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/parser/TweetParser.java
@@ -22,14 +22,16 @@
import org.apache.asterix.builders.IARecordBuilder;
import org.apache.asterix.builders.IAsterixListBuilder;
import org.apache.asterix.builders.ListBuilderFactory;
+import org.apache.asterix.builders.OrderedListBuilder;
import org.apache.asterix.builders.RecordBuilderFactory;
-import org.apache.asterix.builders.UnorderedListBuilder;
import org.apache.asterix.external.api.IRawRecord;
import org.apache.asterix.external.api.IRecordDataParser;
import org.apache.asterix.om.base.AMutablePoint;
import org.apache.asterix.om.base.ANull;
+import org.apache.asterix.om.types.AOrderedListType;
import org.apache.asterix.om.types.ARecordType;
import org.apache.asterix.om.types.ATypeTag;
+import org.apache.asterix.om.types.AUnionType;
import org.apache.asterix.om.types.BuiltinType;
import org.apache.asterix.om.types.IAType;
import org.apache.asterix.om.util.container.IObjectPool;
@@ -60,46 +62,69 @@
aPoint = new AMutablePoint(0, 0);
}
- private void parseUnorderedList(JSONArray jArray, DataOutput output)
throws IOException, JSONException {
+ private void parseJSONArray(JSONArray jArray, DataOutput output,
AOrderedListType orderedListType)
+ throws IOException, JSONException {
ArrayBackedValueStorage itemBuffer = getTempBuffer();
- UnorderedListBuilder unorderedListBuilder = (UnorderedListBuilder)
getUnorderedListBuilder();
+ OrderedListBuilder orderedList = (OrderedListBuilder)
getOrderedListBuilder();
- unorderedListBuilder.reset(null);
+ orderedList.reset(orderedListType);
for (int iter1 = 0; iter1 < jArray.length(); iter1++) {
itemBuffer.reset();
- if (writeField(jArray.get(iter1), null,
itemBuffer.getDataOutput())) {
- unorderedListBuilder.addItem(itemBuffer);
+ if (writeField(jArray.get(iter1), orderedListType.getItemType(),
itemBuffer.getDataOutput())) {
+ orderedList.addItem(itemBuffer);
}
}
- unorderedListBuilder.write(output, true);
+ orderedList.write(output, true);
}
private boolean writeField(Object fieldObj, IAType fieldType, DataOutput
out) throws IOException, JSONException {
boolean writeResult = true;
- if (fieldType != null) {
- switch (fieldType.getTypeTag()) {
+ IAType chkFieldType;
+ if (fieldType instanceof AUnionType) {
+ chkFieldType = ((AUnionType) fieldType).getActualType();
+ } else {
+ chkFieldType = fieldType;
+ }
+ if (chkFieldType != null) {
+ switch (chkFieldType.getTypeTag()) {
case STRING:
- out.write(BuiltinType.ASTRING.getTypeTag().serialize());
+ out.write(fieldType.getTypeTag().serialize());
utf8Writer.writeUTF8(fieldObj.toString(), out);
break;
case INT64:
- aInt64.setValue((long) fieldObj);
+ out.write(fieldType.getTypeTag().serialize());
+ if (fieldObj instanceof Integer) {
+ out.writeLong(((Integer) fieldObj).longValue());
+ } else {
+ out.writeLong((Long) fieldObj);
+ }
int64Serde.serialize(aInt64, out);
break;
case INT32:
- out.write(BuiltinType.AINT32.getTypeTag().serialize());
+ out.write(fieldType.getTypeTag().serialize());
out.writeInt((Integer) fieldObj);
break;
case DOUBLE:
- out.write(BuiltinType.ADOUBLE.getTypeTag().serialize());
+ out.write(fieldType.getTypeTag().serialize());
out.writeDouble((Double) fieldObj);
break;
case BOOLEAN:
- out.write(BuiltinType.ABOOLEAN.getTypeTag().serialize());
+ out.write(fieldType.getTypeTag().serialize());
out.writeBoolean((Boolean) fieldObj);
break;
case RECORD:
- writeRecord((JSONObject) fieldObj, out, (ARecordType)
fieldType);
+ if (((JSONObject) fieldObj).length() != 0) {
+ writeRecord((JSONObject) fieldObj, out, (ARecordType)
chkFieldType);
+ } else {
+ writeResult = false;
+ }
+ break;
+ case ORDEREDLIST:
+ if (((JSONArray) fieldObj).length() != 0) {
+ parseJSONArray((JSONArray) fieldObj, out,
((AOrderedListType) chkFieldType));
+ } else {
+ writeResult = false;
+ }
break;
default:
writeResult = false;
@@ -124,7 +149,7 @@
utf8Writer.writeUTF8((String) fieldObj, out);
} else if (fieldObj instanceof JSONArray) {
if (((JSONArray) fieldObj).length() != 0) {
- parseUnorderedList((JSONArray) fieldObj, out);
+ parseJSONArray((JSONArray) fieldObj, out, null);
} else {
writeResult = false;
}
@@ -190,7 +215,6 @@
}
} else {
//open record type
- int closedFieldCount = 0;
IAType curFieldType = null;
for (String attrName : JSONObject.getNames(obj)) {
if (obj.isNull(attrName) || obj.length() == 0) {
@@ -210,12 +234,8 @@
recBuilder.addField(fieldNameBuffer, fieldValueBuffer);
} else {
recBuilder.addField(attrIdx, fieldValueBuffer);
- closedFieldCount++;
}
}
- }
- if (curRecType != null && closedFieldCount < curFNames.length) {
- throw new HyracksDataException("Non-null field is null");
}
}
recBuilder.write(out, true);
@@ -225,8 +245,8 @@
return recordBuilderPool.allocate(ATypeTag.RECORD);
}
- private IAsterixListBuilder getUnorderedListBuilder() {
- return listBuilderPool.allocate(ATypeTag.UNORDEREDLIST);
+ private IAsterixListBuilder getOrderedListBuilder() {
+ return listBuilderPool.allocate(ATypeTag.ORDEREDLIST);
}
private ArrayBackedValueStorage getTempBuffer() {
--
To view, visit https://asterix-gerrit.ics.uci.edu/1339
To unsubscribe, visit https://asterix-gerrit.ics.uci.edu/settings
Gerrit-MessageType: newchange
Gerrit-Change-Id: Ia27148cb10206b93dabf7655aed68f3004f96dfd
Gerrit-PatchSet: 1
Gerrit-Project: asterixdb
Gerrit-Branch: master
Gerrit-Owner: Xikui Wang <[email protected]>