Taewoo Kim created ASTERIXDB-1880:
-------------------------------------

             Summary: Unequal number of valid ... exception during a similarity 
join query
                 Key: ASTERIXDB-1880
                 URL: https://issues.apache.org/jira/browse/ASTERIXDB-1880
             Project: Apache AsterixDB
          Issue Type: Bug
            Reporter: Taewoo Kim


On an 8-node cluster, the following similarity query generates the following 
exception. There is a keyword index on the summary field.

{code}
Unequal number of valid Dictionary BTree, Inverted Lists, Deleted BTree, and 
Bloom Filter files found. Aborting cleanup. [HyracksDataException]
{code}

{code}
use dataverse exp;
count(
for $p in dataset
"AmazonReviewProductID"
for $o in dataset
"AmazonReviewNoDup"
for $i in dataset
"AmazonReviewNoDup"
where $p.asin /* +indexnl */ = $o.asin and $p.id >=
int64("6450")
and $p.id <=
int64("7449")
and /* +indexnl */ similarity-jaccard(word-tokens($o.summary), 
word-tokens($i.summary)) >= 0.8 and $o.id < $i.id
return {"oid":$o.id, "iid":$i.id}
);
{code}


DDL
{code}
drop dataverse exp if exists;
create dataverse exp;
use dataverse exp;

create type AmazonReviewType as open {
        id: uuid
}

create dataset AmazonReviewNoDup(AmazonReviewType) primary key id autogenerated;

create index AmazonReviewNoDup_summary_kw_idx 
on AmazonReviewNoDup(summary:string?) type keyword enforced;

create type AmazonProductIDType as closed {
        id: int64,
        asin: string
}

create dataset AmazonReviewProductID(AmazonProductIDType) primary key id;
{code}



--
This message was sent by Atlassian JIRA
(v6.3.15#6346)

Reply via email to