#11088: Aggregation problem, table JOINed twice
---------------------------+------------------------------------------------
 Reporter:  [email protected]  |      Status:  new              
Milestone:  1.1            |   Component:  ORM aggregation  
  Version:  1.1-beta-1     |    Keywords:  aggregation, join
    Stage:  Unreviewed     |   Has_patch:  0                
---------------------------+------------------------------------------------
 I don't know if I'm doing something wrong but I've tried to compile all
 the information needed to track this one.

 === Models ===
 video/models.py
  {{{
 class Video(models.Model):
     name = models.TextField()
     dir = models.TextField()
     videoclass = models.ForeignKey('Videoclass')
     status = models.CharField(max_length=24)
     date = models.DateTimeField()
     duration = models.FloatField()
     width = models.IntegerField()
     height = models.IntegerField()
     displayname = models.TextField()
     published = models.IntegerField()
     views = models.IntegerField()
     ratesum = models.IntegerField()
     ratenum = models.IntegerField()
 }}}


 statistics/models.py

 {{{
 class VisitorAction(models.Model):
     name = models.CharField(max_length = 200)

     def __unicode__(self):
         return self.name


 class VisitorLog(models.Model):
     visitor = models.ForeignKey("Visitor")
     video = models.ForeignKey(Video)
     action = models.ForeignKey(VisitorAction)
     seek_video = models.FloatField()
     event_time = models.DateTimeField(default = datetime.datetime.now)
     domain = models.CharField(max_length = 128)
 }}}


 === Datas ===
 statistics_visitorlog contains 100k+ entries

 video_video contains 400 entries

 statistics_visitoraction contains 6 entries ('play', 'seek' ... etc)

 === The QS ===
 {{{
 >>> from video.models import *
 >>> from django.db.models import Count
 >>> Video.objects.annotate(play_log =
 Count('visitorlog')).filter(visitorlog__action__name =
 'play').order_by('-play_log')[0:10]
 }}}
 [Kill the mysqld because it takes forever and have a nice backtrace]
 {{{
 >>> from django.db import connection
 >>> connection.queries[-1]
 {'time': '7.180', 'sql': u'SELECT `video_video`.`id`,
 `video_video`.`name`, `video_video`.`dir`, `video_video`.`videoclass_id`,
 `video_video`.`status`, `video_video`.`date`, `video_video`.`duration`,
 `video_video`.`width`, `video_video`.`height`,
 `video_video`.`displayname`, `video_video`.`published`,
 `video_video`.`views`, `video_video`.`ratesum`, `video_video`.`ratenum`,
 COUNT(`statistics_visitorlog`.`id`) AS `play_log` FROM `video_video` LEFT
 OUTER JOIN `statistics_visitorlog` ON (`video_video`.`id` =
 `statistics_visitorlog`.`video_id`) INNER JOIN `statistics_visitorlog` T4
 ON (`video_video`.`id` = T4.`video_id`) INNER JOIN
 `statistics_visitoraction` ON (T4.`action_id` =
 `statistics_visitoraction`.`id`) WHERE `statistics_visitoraction`.`name` =
 play  GROUP BY `video_video`.`id` ORDER BY play_log DESC LIMIT 10'}
 }}}

 === The sql request ===

 {{{
 SELECT   `video_video`.`id`           ,
             `video_video`.`name`         ,
             `video_video`.`dir`          ,
             `video_video`.`videoclass_id`,
             `video_video`.`status`       ,
             `video_video`.`date`         ,
             `video_video`.`duration`     ,
             `video_video`.`width`        ,
             `video_video`.`height`       ,
             `video_video`.`displayname`  ,
             `video_video`.`published`    ,
             `video_video`.`views`        ,
             `video_video`.`ratesum`      ,
             `video_video`.`ratenum`      ,
             COUNT(`statistics_visitorlog`.`id`) AS `play_log`
    FROM     `video_video`
             LEFT OUTER JOIN `statistics_visitorlog`
             ON
                      (
                               `video_video`.`id` =
 `statistics_visitorlog`.`video_id`
                      )
             INNER JOIN `statistics_visitorlog` T4
             ON
                      (
                               `video_video`.`id` = T4.`video_id`
                      )
             INNER JOIN `statistics_visitoraction`
             ON
                      (
                               T4.`action_id` =
 `statistics_visitoraction`.`id`
                      )
    WHERE    `statistics_visitoraction`.`name` = 'play'
    GROUP BY `video_video`.`id`
 }}}

 It takes forever and burns my cpu. However, if I remove the LEFT OUTER
 JOIN on statistics_visitorlog (which seems to be joined twice), and fix
 the select to call the proper JOINed table:

 {{{
 SELECT   `video_video`.`id`           ,
             `video_video`.`name`         ,
             `video_video`.`dir`          ,
             `video_video`.`videoclass_id`,
             `video_video`.`status`       ,
             `video_video`.`date`         ,
             `video_video`.`duration`     ,
             `video_video`.`width`        ,
             `video_video`.`height`       ,
             `video_video`.`displayname`  ,
             `video_video`.`published`    ,
             `video_video`.`views`        ,
             `video_video`.`ratesum`      ,
             `video_video`.`ratenum`      ,
             COUNT(T4.`id`) AS `play_log`
    FROM     `video_video`
             INNER JOIN `statistics_visitorlog` T4
             ON
                      (
                               `video_video`.`id` = T4.`video_id`
                      )
             INNER JOIN `statistics_visitoraction`
             ON
                      (
                               T4.`action_id` =
 `statistics_visitoraction`.`id`
                      )
    WHERE    `statistics_visitoraction`.`name` = 'play'
    GROUP BY `video_video`.`id`
 }}}

 this returns in 0.6 sec with all the datas needed to fill the objects.

-- 
Ticket URL: <http://code.djangoproject.com/ticket/11088>
Django <http://code.djangoproject.com/>
The Web framework for perfectionists with deadlines.
--~--~---------~--~----~------------~-------~--~----~
You received this message because you are subscribed to the Google Groups 
"Django updates" group.
To post to this group, send email to [email protected]
To unsubscribe from this group, send email to 
[email protected]
For more options, visit this group at 
http://groups.google.com/group/django-updates?hl=en
-~----------~----~----~----~------~----~------~--~---

Reply via email to