Jianfeng Jia created ASTERIXDB-1472:
---------------------------------------
Summary: Exception when ingesting the data with filter on a field
Key: ASTERIXDB-1472
URL: https://issues.apache.org/jira/browse/ASTERIXDB-1472
Project: Apache AsterixDB
Issue Type: Bug
Components: Storage
Environment: master code:
commit 2dff79736e6f3f877149317d02395dbd12e16a20
Date: Thu Jun 2 23:13:52 2016 -0700
Reporter: Jianfeng Jia
Assignee: Murtadha Hubail
Here is the aql:
{code}
drop dataverse twitter if exists;
create dataverse twitter if not exists;
use dataverse twitter
create type typeUser if not exists as open {
id: int64,
name: string,
screen_name : string,
lang : string,
location: string,
create_at: date,
description: string,
followers_count: int32,
friends_count: int32,
statues_count: int64
}
create type typePlace if not exists as open{
country : string,
country_code : string,
full_name : string,
id : string,
name : string,
place_type : string,
bounding_box : rectangle
}
create type typeGeoTag if not exists as open {
stateID: int32,
stateName: string,
countyID: int32,
countyName: string,
cityID: int32?,
cityName: string?
}
create type typeTweet if not exists as open{
create_at : datetime,
id: int64,
"text": string,
in_reply_to_status : int64,
in_reply_to_user : int64,
favorite_count : int64,
coordinate: point?,
retweet_count : int64,
lang : string,
is_retweet: boolean,
hashtags : {{ string }} ?,
user_mentions : {{ int64 }} ? ,
user : typeUser,
place : typePlace?,
geo_tag: typeGeoTag
}
create dataset ds_tweet(typeTweet) if not exists primary key id with filter on
create_at;
//"using" "compaction" "policy" CompactionPolicy ( Configuration )? )?
create index text_idx if not exists on ds_tweet("text") type keyword;
create index location_idx if not exists on ds_tweet(coordinate) type rtree;
create index time_idx if not exists on ds_tweet(create_at) type btree;
create index state_idx if not exists on ds_tweet(geo_tag.stateID) type btree;
create index county_idx if not exists on ds_tweet(geo_tag.countyID) type btree;
create index city_idx if not exists on ds_tweet(geo_tag.cityID) type btree;
create feed MessageFeed using localfs(
("path"="128.195.52.77:///home/jianfeng/data/head20m.adm"),
("format"="adm"),
("type-name"="typeTweet"));
set wait-for-completion-feed "true";
connect feed MessageFeed to dataset ds_tweet;
{code}
The exception seems related to the Merging phase
{code}
java.lang.IllegalStateException
at
org.apache.hyracks.storage.am.lsm.common.impls.PrefixMergePolicy.isMergeLagging(PrefixMergePolicy.java:151)
at
org.apache.hyracks.storage.am.lsm.common.impls.LSMHarness.exitComponents(LSMHarness.java:211)
at
org.apache.hyracks.storage.am.lsm.common.impls.LSMHarness.flush(LSMHarness.java:437)
at
org.apache.hyracks.storage.am.lsm.common.impls.LSMTreeIndexAccessor.flush(LSMTreeIndexAccessor.java:105)
at
org.apache.hyracks.storage.am.lsm.rtree.impls.LSMRTreeFlushOperation.call(LSMRTreeFlushOperation.java:74)
at
org.apache.hyracks.storage.am.lsm.rtree.impls.LSMRTreeFlushOperation.call(LSMRTreeFlushOperation.java:34)
at java.util.concurrent.FutureTask.run(FutureTask.java:266)
at
java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
at
java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
at java.lang.Thread.run(Thread.java:744)
{code}
I uploaded small sample data
[here|https://drive.google.com/open?id=0B423M7wGZj9ddlN2Zk1SZmFEOGs]
--
This message was sent by Atlassian JIRA
(v6.3.4#6332)