Hello,

I'm using latest version of Riddle from Github, Sphinx 0.9.9-release
(r2117) and xmlpipe2 as datasource for Sphinx.
I use group by to implement facets (similar to thinking sphinx, but
for xmlpipe2 datasource)
There is a problem: group by works incorrectly for "int", "bool" and
"multi" attributes, but it works ok float attributes.
Here is an example of output:

grouping by has_menu -- bool:

>> MongoRestaurant.facets("")[0]
=> {:status=>0, :total_found=>1, :attribute_names=>["total_likes",
"neighborhood_ids", "lon", "has_delivery", "offer_type_ids",
"has_reservation", "cuisine_ids", "lat", "source_ids", "name_sort",
"address_zipcode", "has_menu", "offer_ids", "restaurant_ids",
"price_range", "score", "city_ids", "total_checkins",
"reviews_avg_score", "bh_id", "@groupby",
"@count"], :attributes=>{"offer_type_ids"=>1073741825, "lon"=>5,
"offer_ids"=>1073741825, "@groupby"=>1, "restaurant_ids"=>1,
"source_ids"=>1073741825, "reviews_avg_score"=>5, "total_checkins"=>1,
"total_likes"=>1, "@count"=>1, "address_zipcode"=>1,
"has_delivery"=>4, "city_ids"=>1, "has_menu"=>4,
"cuisine_ids"=>1073741825, "neighborhood_ids"=>1073741825, "bh_id"=>1,
"score"=>5, "price_range"=>3, "name_sort"=>3, "lat"=>5,
"has_reservation"=>4}, :words=>{"400456007"=>{:docs=>70, :hits=>70}}, 
:time=>0.0, :fields=>["classnamecrc32",
"name", "description", "offer_text", "offer_type_value",
"cuisine_name"], :matches=>[{:doc=>598000, :attributes=>{"offer_type_ids"=>[],
"lon"=>-1.29096734523773, "offer_ids"=>[], "@groupby"=>19700101,
"restaurant_ids"=>598000, "source_ids"=>[], "reviews_avg_score"=>0.0,
"total_checkins"=>0, "total_likes"=>0, "@count"=>70,
"address_zipcode"=>10022, "has_delivery"=>1, "city_ids"=>18819,
"has_menu"=>1, "cuisine_ids"=>[], "neighborhood_ids"=>[16, 22, 56,
60], "bh_id"=>598000, "score"=>0.0, "price_range"=>69,
"name_sort"=>68, "lat"=>0.711285769939423,
"has_reservation"=>0}, :index=>0, :weight=>1273}], :total=>1}

grouping by total_likes -- integer:

=> {:status=>0, :total_found=>1, :attribute_names=>["total_likes",
"neighborhood_ids", "lon", "has_delivery", "offer_type_ids",
"has_reservation", "cuisine_ids", "lat", "source_ids", "name_sort",
"address_zipcode", "has_menu", "offer_ids", "restaurant_ids",
"price_range", "score", "city_ids", "total_checkins",
"reviews_avg_score", "bh_id", "@groupby",
"@count"], :attributes=>{"offer_type_ids"=>1073741825, "lon"=>5,
"offer_ids"=>1073741825, "@groupby"=>1, "restaurant_ids"=>1,
"source_ids"=>1073741825, "reviews_avg_score"=>5, "total_checkins"=>1,
"total_likes"=>1, "@count"=>1, "address_zipcode"=>1,
"has_delivery"=>4, "city_ids"=>1, "has_menu"=>4,
"cuisine_ids"=>1073741825, "neighborhood_ids"=>1073741825, "bh_id"=>1,
"score"=>5, "price_range"=>3, "name_sort"=>3, "lat"=>5,
"has_reservation"=>4}, :words=>{"400456007"=>{:docs=>70, :hits=>70}}, 
:time=>0.001, :fields=>["classnamecrc32",
"name", "description", "offer_text", "offer_type_value",
"cuisine_name"], :matches=>[{:doc=>598000, :attributes=>{"offer_type_ids"=>[],
"lon"=>-1.29096734523773, "offer_ids"=>[], "@groupby"=>19700101,
"restaurant_ids"=>598000, "source_ids"=>[], "reviews_avg_score"=>0.0,
"total_checkins"=>0, "total_likes"=>0, "@count"=>70,
"address_zipcode"=>10022, "has_delivery"=>1, "city_ids"=>18819,
"has_menu"=>1, "cuisine_ids"=>[], "neighborhood_ids"=>[16, 22, 56,
60], "bh_id"=>598000, "score"=>0.0, "price_range"=>69,
"name_sort"=>68, "lat"=>0.711285769939423,
"has_reservation"=>0}, :index=>0, :weight=>1273}], :total=>1}


grouping by reviews_avg_score -- float

=> {:status=>0, :total_found=>9, :attribute_names=>["total_likes",
"neighborhood_ids", "lon", "has_delivery", "offer_type_ids",
"has_reservation", "cuisine_ids", "lat", "source_ids", "name_sort",
"address_zipcode", "has_menu", "offer_ids", "restaurant_ids",
"price_range", "score", "city_ids", "total_checkins",
"reviews_avg_score", "bh_id", "@groupby",
"@count"], :attributes=>{"offer_type_ids"=>1073741825, "lon"=>5,
"offer_ids"=>1073741825, "@groupby"=>1, "restaurant_ids"=>1,
"source_ids"=>1073741825, "reviews_avg_score"=>5, "total_checkins"=>1,
"total_likes"=>1, "@count"=>1, "address_zipcode"=>1,
"has_delivery"=>4, "city_ids"=>1, "has_menu"=>4,
"cuisine_ids"=>1073741825, "neighborhood_ids"=>1073741825, "bh_id"=>1,
"score"=>5, "price_range"=>3, "name_sort"=>3, "lat"=>5,
"has_reservation"=>4}, :words=>{"400456007"=>{:docs=>70, :hits=>70}}, 
:time=>0.001, :fields=>["classnamecrc32",
"name", "description", "offer_text", "offer_type_value",
"cuisine_name"], :matches=>[{:doc=>598261, :attributes=>{"offer_type_ids"=>[],
"lon"=>-1.29073655605316, "offer_ids"=>[], "@groupby"=>20040816,
"restaurant_ids"=>598261, "source_ids"=>[], "reviews_avg_score"=>10.0,
"total_checkins"=>0, "total_likes"=>0, "@count"=>5,
"address_zipcode"=>10028, "has_delivery"=>1, "city_ids"=>18819,
"has_menu"=>1, "cuisine_ids"=>[21], "neighborhood_ids"=>[8, 22, 23,
57], "bh_id"=>598261, "score"=>0.0, "price_range"=>69,
"name_sort"=>64, "lat"=>0.711645185947418,
"has_reservation"=>0}, :index=>0, :weight=>1273},
{:doc=>598904, :attributes=>{"offer_type_ids"=>[11],
"lon"=>-1.29076039791107, "offer_ids"=>[622122], "@groupby"=>20040804,
"restaurant_ids"=>598904, "source_ids"=>[15],
"reviews_avg_score"=>9.0, "total_checkins"=>13, "total_likes"=>1,
"@count"=>3, "address_zipcode"=>10021, "has_delivery"=>1,
"city_ids"=>18819, "has_menu"=>0, "cuisine_ids"=>[],
"neighborhood_ids"=>[22, 23, 28, 57], "bh_id"=>598904,
"score"=>2.4300000667572, "price_range"=>69, "name_sort"=>26,
"lat"=>0.711563467979431,
"has_reservation"=>0}, :index=>1, :weight=>1273},
{:doc=>598488, :attributes=>{"offer_type_ids"=>[], "lon"=>0.0,
"offer_ids"=>[], "@groupby"=>20040722, "restaurant_ids"=>598488,
"source_ids"=>[], "reviews_avg_score"=>8.0, "total_checkins"=>0,
"total_likes"=>0, "@count"=>3, "address_zipcode"=>10012,
"has_delivery"=>1, "city_ids"=>18819, "has_menu"=>1,
"cuisine_ids"=>[33], "neighborhood_ids"=>[2, 9, 22, 61],
"bh_id"=>598488, "score"=>0.0, "price_range"=>69, "name_sort"=>37,
"lat"=>0.0, "has_reservation"=>0}, :index=>2, :weight=>1273},
{:doc=>599149, :attributes=>{"offer_type_ids"=>[],
"lon"=>-1.29116952419281, "offer_ids"=>[], "@groupby"=>20040628,
"restaurant_ids"=>599149, "source_ids"=>[], "reviews_avg_score"=>7.0,
"total_checkins"=>22, "total_likes"=>0, "@count"=>2,
"address_zipcode"=>10017, "has_delivery"=>1, "city_ids"=>18819,
"has_menu"=>0, "cuisine_ids"=>[28], "neighborhood_ids"=>[6, 22, 60],
"bh_id"=>599149, "score"=>0.0, "price_range"=>69, "name_sort"=>45,
"lat"=>0.711319506168365,
"has_reservation"=>0}, :index=>3, :weight=>1273},
{:doc=>598304, :attributes=>{"offer_type_ids"=>[],
"lon"=>-1.52945172786713, "offer_ids"=>[], "@groupby"=>20040604,
"restaurant_ids"=>598304, "source_ids"=>[], "reviews_avg_score"=>6.0,
"total_checkins"=>115, "total_likes"=>0, "@count"=>5,
"address_zipcode"=>60604, "has_delivery"=>1, "city_ids"=>6335,
"has_menu"=>1, "cuisine_ids"=>[], "neighborhood_ids"=>[240],
"bh_id"=>598304, "score"=>0.0, "price_range"=>69, "name_sort"=>0,
"lat"=>0.73090934753418,
"has_reservation"=>0}, :index=>4, :weight=>1273},
{:doc=>598791, :attributes=>{"offer_type_ids"=>[],
"lon"=>-1.29123413562775, "offer_ids"=>[], "@groupby"=>20040511,
"restaurant_ids"=>598791, "source_ids"=>[], "reviews_avg_score"=>5.0,
"total_checkins"=>12, "total_likes"=>6, "@count"=>1,
"address_zipcode"=>10018, "has_delivery"=>1, "city_ids"=>18819,
"has_menu"=>1, "cuisine_ids"=>[2], "neighborhood_ids"=>[7, 22, 60],
"bh_id"=>598791, "score"=>0.0, "price_range"=>70, "name_sort"=>7,
"lat"=>0.711277902126312,
"has_reservation"=>0}, :index=>5, :weight=>1273},
{:doc=>598474, :attributes=>{"offer_type_ids"=>[],
"lon"=>-1.52936661243439, "offer_ids"=>[], "@groupby"=>20040416,
"restaurant_ids"=>598474, "source_ids"=>[], "reviews_avg_score"=>4.0,
"total_checkins"=>925, "total_likes"=>5, "@count"=>1,
"address_zipcode"=>60611, "has_delivery"=>1, "city_ids"=>6335,
"has_menu"=>1, "cuisine_ids"=>[], "neighborhood_ids"=>[217, 287],
"bh_id"=>598474, "score"=>0.0, "price_range"=>71, "name_sort"=>10,
"lat"=>0.731164395809174,
"has_reservation"=>0}, :index=>6, :weight=>1273},
{:doc=>598689, :attributes=>{"offer_type_ids"=>[],
"lon"=>-1.29161155223846, "offer_ids"=>[], "@groupby"=>20040110,
"restaurant_ids"=>598689, "source_ids"=>[], "reviews_avg_score"=>2.0,
"total_checkins"=>2, "total_likes"=>0, "@count"=>2,
"address_zipcode"=>10007, "has_delivery"=>1, "city_ids"=>18819,
"has_menu"=>1, "cuisine_ids"=>[], "neighborhood_ids"=>[2, 17, 22, 65],
"bh_id"=>598689, "score"=>0.0, "price_range"=>69, "name_sort"=>15,
"lat"=>0.71059387922287,
"has_reservation"=>0}, :index=>7, :weight=>1273},
{:doc=>598000, :attributes=>{"offer_type_ids"=>[],
"lon"=>-1.29096734523773, "offer_ids"=>[], "@groupby"=>19700101,
"restaurant_ids"=>598000, "source_ids"=>[], "reviews_avg_score"=>0.0,
"total_checkins"=>0, "total_likes"=>0, "@count"=>48,
"address_zipcode"=>10022, "has_delivery"=>1, "city_ids"=>18819,
"has_menu"=>1, "cuisine_ids"=>[], "neighborhood_ids"=>[16, 22, 56,
60], "bh_id"=>598000, "score"=>0.0, "price_range"=>69,
"name_sort"=>68, "lat"=>0.711285769939423,
"has_reservation"=>0}, :index=>8, :weight=>1273}], :total=>9}



You can easily note that grouping by :has_menu and :total_likes
returns only one result (:total_found=>1). It is incorrect: there are
records with :has_menu == false, total_likes = 1, total_likes =2 etc.
Only group by reviews_avg_score returns correct results


Example of xml data source:
<?xml version="1.0" encoding="utf-8"?>
<sphinx:docset>
<sphinx:schema>
<sphinx:field name="classnamecrc32"/>
<sphinx:field name="name"/>
<sphinx:field name="description"/>
<sphinx:field name="offer_text"/>
<sphinx:field name="offer_type_value"/>
<sphinx:field name="cuisine_name"/>
<sphinx:attr name="address_zipcode" type="int"/>
<sphinx:attr name="restaurant_ids" type="int"/>
<sphinx:attr name="lat" type="float"/>
<sphinx:attr name="has_delivery" type="bool"/>
<sphinx:attr name="source_ids" type="multi"/>
<sphinx:attr name="lon" type="float"/>
<sphinx:attr name="has_reservation" type="bool"/>
<sphinx:attr name="offer_type_ids" type="multi"/>
<sphinx:attr name="price_range" type="str2ordinal"/>
<sphinx:attr name="has_menu" type="bool"/>
<sphinx:attr name="score" type="float"/>
<sphinx:attr name="neighborhood_ids" type="multi"/>
<sphinx:attr name="cuisine_ids" type="multi"/>
<sphinx:attr name="total_checkins" type="int"/>
<sphinx:attr name="offer_ids" type="multi"/>
<sphinx:attr name="reviews_avg_score" type="float"/>
<sphinx:attr name="city_ids" type="int"/>
<sphinx:attr name="name_sort" type="str2ordinal"/>
<sphinx:attr name="total_likes" type="int"/>
<sphinx:attr name="bh_id" type="int"/>
</sphinx:schema>
<sphinx:document id="599105">
<classnamecrc32>400456007</classnamecrc32>
<name><![CDATA[Subway]]></name>
<description><![CDATA[test]]></description>
<offer_text><![CDATA[]]></offer_text>
<offer_type_value><![CDATA[]]></offer_type_value>
<cuisine_name><![CDATA[]]></cuisine_name>
<address_zipcode>60622</address_zipcode>
<restaurant_ids>599105</restaurant_ids>
<lat>0.731224661851994</lat>
<has_delivery>1</has_delivery>
<source_ids></source_ids>
<lon>-1.53024979754365</lon>
<has_reservation>0</has_reservation>
<offer_type_ids></offer_type_ids>
<price_range>1</price_range>
<has_menu>1</has_menu>
<score>0.0</score>
<neighborhood_ids>201,202,284</neighborhood_ids>
<cuisine_ids></cuisine_ids>
<total_checkins>5</total_checkins>
<offer_ids></offer_ids>
<reviews_avg_score>0</reviews_avg_score>
<city_ids>6335</city_ids>
<name_sort>Subway</name_sort>
<total_likes>0</total_likes>
<bh_id>599105</bh_id>
</sphinx:document>
</sphinx:docset>


Thanks, Slava

-- 
You received this message because you are subscribed to the Google Groups 
"Thinking Sphinx" group.
To post to this group, send email to [email protected].
To unsubscribe from this group, send email to 
[email protected].
For more options, visit this group at 
http://groups.google.com/group/thinking-sphinx?hl=en.

Reply via email to