Taewoo Kim created ASTERIXDB-1880:
-------------------------------------
Summary: Unequal number of valid ... exception during a similarity
join query
Key: ASTERIXDB-1880
URL: https://issues.apache.org/jira/browse/ASTERIXDB-1880
Project: Apache AsterixDB
Issue Type: Bug
Reporter: Taewoo Kim
On an 8-node cluster, the following similarity query generates the following
exception. There is a keyword index on the summary field.
{code}
Unequal number of valid Dictionary BTree, Inverted Lists, Deleted BTree, and
Bloom Filter files found. Aborting cleanup. [HyracksDataException]
{code}
{code}
use dataverse exp;
count(
for $p in dataset
"AmazonReviewProductID"
for $o in dataset
"AmazonReviewNoDup"
for $i in dataset
"AmazonReviewNoDup"
where $p.asin /* +indexnl */ = $o.asin and $p.id >=
int64("6450")
and $p.id <=
int64("7449")
and /* +indexnl */ similarity-jaccard(word-tokens($o.summary),
word-tokens($i.summary)) >= 0.8 and $o.id < $i.id
return {"oid":$o.id, "iid":$i.id}
);
{code}
DDL
{code}
drop dataverse exp if exists;
create dataverse exp;
use dataverse exp;
create type AmazonReviewType as open {
id: uuid
}
create dataset AmazonReviewNoDup(AmazonReviewType) primary key id autogenerated;
create index AmazonReviewNoDup_summary_kw_idx
on AmazonReviewNoDup(summary:string?) type keyword enforced;
create type AmazonProductIDType as closed {
id: int64,
asin: string
}
create dataset AmazonReviewProductID(AmazonProductIDType) primary key id;
{code}
--
This message was sent by Atlassian JIRA
(v6.3.15#6346)