Hey all, I have a fairly odd case of duplicate documents in our solr index
(See attached xml sample). THe index is roughtly 35k in documents. The only
way I've found to fix the problem is to run a delete statement by id, which
deletes both, I can then re-index that one document. This happened
previosuly but it ended up being an issue with case-sensitivity but this
time the id's appear identical!
Any assistance in tracking this down would be appeciated! I can provide any
other logs if nesseary.
Thanks,
Dan
Sample Select Query:
?xml version=1.0 encoding=UTF-8 ?
- response
- lst name=responseHeader
int name=status0/int
int name=QTime0/int
/lst
- result name=response numFound=2 start=0
- doc
- arr name=categoryId
int151/int
int962/int
int1493/int
int1830/int
/arr
- arr name=finish
strN/A/str
/arr
bool name=hasDigiCastfalse/bool
bool name=hasDigiVistafalse/bool
str name=idhr-802waclighting/str
- arr name=inStock
boolfalse/bool
/arr
bool name=isNewfalse/bool
bool name=isTopSellertrue/bool
str name=manufacturerwac lighting/str
- arr name=masterFinish
strnot applicable/str
/arr
date name=modifiedDate2007-10-15T23:10:01.510Z/date
bool name=onSalefalse/bool
int name=popularity1683/int
- arr name=price
float53.91/float
/arr
date name=productAddDate2007-07-05T00:00:00Z/date
str name=productIDHR-802/str
str name=productTitleLow Voltage Miniature Housing for Recessed
Lighting Fixture/str
str name=serieslow voltage miniature housings/str
- arr name=sku
str /
/arr
str name=theme /
- arr name=upc
str /
/arr
/doc
- doc
- arr name=categoryId
int151/int
int962/int
int1493/int
int1830/int
/arr
- arr name=finish
strN/A/str
/arr
bool name=hasDigiCastfalse/bool
bool name=hasDigiVistafalse/bool
str name=idhr-802waclighting/str
- arr name=inStock
boolfalse/bool
/arr
bool name=isNewfalse/bool
bool name=isTopSellertrue/bool
str name=manufacturerwac lighting/str
- arr name=masterFinish
strnot applicable/str
/arr
date name=modifiedDate2007-11-02T15:33:21.154Z/date
bool name=onSalefalse/bool
int name=popularity1683/int
- arr name=price
float53.91/float
/arr
date name=productAddDate2007-07-05T00:00:00Z/date
str name=productIDHR-802/str
str name=productTitleLow Voltage Miniature Housing for Recessed
Lighting Fixture/str
str name=serieslow voltage miniature housings/str
- arr name=sku
str /
/arr
str name=theme /
- arr name=upc
str /
/arr
/doc
/result
/response
Schema.xml
field name=id type=string indexed=true stored=true/
field name=sku type=textTight indexed=true stored=true
multiValued=true/
field name=upc type=textTight indexed=true stored=true
multiValued=true/
.
!-- field to use to determine and enforce document uniqueness. --
uniqueKeyid/uniqueKey
!-- field for the QueryParser to use when an explicit fieldname is absent
--
defaultSearchFieldtext/defaultSearchField
!-- SolrQueryParser configuration: defaultOperator=AND|OR --
solrQueryParser defaultOperator=OR/
--
View this message in context:
http://www.nabble.com/SOLR-1.2---Duplicate-Documents---tf4762687.html#a13621332
Sent from the Solr - User mailing list archive at Nabble.com.