Hi,

I am trying to use a correlated subquery with an 'order by random() limit 2' to 
pick upto two random rows for each value in the outer query. I am not sure if I 
am doing this correctly, but the number of rows I am getting seems to vary 
randomly which doesn't make sense to me. If i replace the order by random() 
with order by product I always get the expected number of rows. I have tried to 
create a simplified version of the code below to replicate the issue.

I am experiencing the problem on sqlite 3.22.0, but I have tried on 
sqliteonline.com which I think is using 3.27.2 and am seeing similar results.

Thanks,
Kevin

---

create table if not exists test_productattribs (product text primary key, attr, 
val);                                                       
                                                                                
                                                            
insert or ignore into test_productattribs values                                
                                                            
  ('1', 'area', 'a'),                                                           
                                                           
  ('2', 'area', 'b'),                                                           
                                                            
  ('3', 'area', 'a'),                                                           
                                                           
  ('4', 'area', 'a')                                                            
                                                            
;                                                                               
                                                           
                                                                                
                                                            
--In the real query, this is done inside the with, but it does not seem 
relevant                                                            
--to the issue.                                                                 
                                                           
create table if not exists                                                      
                                                            
  test_productarea                                                              
                                                           
as select                                                                       
                                                            
  product,                                                                      
                                                           
  val as area                                                                   
                                                            
from                                                                            
                                                           
  test_productattribs                                                           
                                                            
where                                                                           
                                                           
  attr='area'                                                                   
                                                           
;                                                                               
                                                            
                                                                                
                                                            
--I have two areas, 'a' and 'b'. I limit to two random products from each area  
                                                            
--As area 'b' only has one product, I always expect to get 3 rows, 1 for area 
'b', and 2 for                                               
--area 'a'.                                                                     
                                                            
with                                                                            
                                                           
  dareas as (select distinct                                                    
                                                            
    area                                                                        
                                                           
  from                                                                          
                                                            
    test_productarea)                                                           
                                                            
  select                                                                        
                                                           
    da.area,                                                                    
                                                            
    pa.product                                                                  
                                                           
  from                                                                          
                                                            
    dareas as da                                                                
                                                           
    left join test_productarea as pa                                            
                                                            
      --if I don't order by random, but instead by product, I always get the 
correct number of rows                                        
      on pa.product in (select product from test_productarea where da.area=area 
order by random() limit 2)                                 
-- In the real query, this order by is required to see a different number of 
rows to the number in the                                     
-- limit. However, it seems it is not required in the small test dataset.       
                                                           
--  order by                                                                    
                                                            
--    da.area,                                                                  
                                                           
--    pa.product                                                                
                                                            
;                                                                   
_______________________________________________
sqlite-users mailing list
sqlite-users@mailinglists.sqlite.org
http://mailinglists.sqlite.org/cgi-bin/mailman/listinfo/sqlite-users

Reply via email to