#35256: Strange/duplicate join clauses on queries with filter and annotations
-------------------------------------+-------------------------------------
               Reporter:  Bálint     |          Owner:  nobody
  Balina                             |
                   Type:             |         Status:  new
  Uncategorized                      |
              Component:  Database   |        Version:  4.1
  layer (models, ORM)                |       Keywords:  annotate filter
               Severity:  Normal     |  join
           Triage Stage:             |      Has patch:  0
  Unreviewed                         |
    Needs documentation:  0          |    Needs tests:  0
Patch needs improvement:  0          |  Easy pickings:  0
                  UI/UX:  0          |
-------------------------------------+-------------------------------------
 The filtering/annotation on the same data behaves differently based on the
 order of operations.

 See the below example models, code, and generate sql:


 {{{

 class PriceList(models.Model):
     id = models.IntegerField(...)
     customer = models.ForeignKey(...)

 class PriceListItem(models.Model):
     id = models.IntegerField(...)
     price_list = models.ForeignKey("PriceList", models.CASCADE)
     product = models.ForeignKey("Product", models.PROTECT,
 related_name="price_list_items")
     price = models.DecimalField(...)

 class Product(models.Model):
     id = models.IntegerField(...)
     sku = models.CharField(...)

 # retrieve all products, with prices related to a certain customer.
 # The price should be null, if it is not specified for the customer, but
 the product should still be retrieved.

 q1 =
 
Product.objects.annotate(price=F("price_list_items__price")).filter(Q(price_list_items__price_list__isnull=True)
 | Q(price_list_items__price_list__customer_id=1)).values("id", "price")

 # sql (WRONG):
 """
 SELECT "product"."id", t4."price"
 FROM "product"
 LEFT OUTER JOIN "price_list_item" ON ("product"."id" =
 "price_list_item"."product_id")
 LEFT OUTER JOIN "price_list" ON ("price_list_item"."price_list_id" =
 "price_list"."id")
 LEFT OUTER JOIN "price_list_item" t4 ON ("product"."id" = t4."product_id")
 LEFT OUTER JOIN "price_list" t5 ON (t4."price_list_id" = t5."id")
 WHERE (t4."price_list_id" IS NULL OR  t5."customer_id" = 1))
 """

 q2 = Product.objects.filter(Q(price_list_items__price_list__isnull=True) |
 
Q(price_list_items__price_list__customer_id=1)).annotate(price=F("price_list_items__price")).values("id",
 "price")

 # sql:
 """
 SELECT "product"."id", price_list_item."price"
 FROM "product"
 LEFT OUTER JOIN "price_list_item" ON ("product"."id" =
 "price_list_item"."product_id")
 LEFT OUTER JOIN "price_list" ON ("price_list_item"."price_list_id" =
 "price_list"."id")
 WHERE (price_list_item."price_list_id" IS NULL OR
 price_list."customer_id" = 1))
 """

 }}}


 I would expect the 2nd output from both queries. This caused some quite
 hard-to-detect bugs for us.

 It works as expected for ForignKeys, e.g. the other direction:

 {{{

 q3 =
 
PriceListItem.objects.annotate(sku=F("product__sku")).filter(product__sku__icontains="x").values("id",
 "sku")

 # sql
 """
 SELECT "price_list_item"."id", "product"."sku" AS "sku" FROM
 "price_list_item" INNER JOIN "product" ON ("price_list_item"."product_id"
 = "product"."id") WHERE (UPPER("product"."sku"::text) LIKE UPPER('%x%'))
 """

 q4 =
 
PriceListItem.objects.filter(product__sku__icontains="x").annotate(sku=F("product__sku")).values("id",
 "sku")

 # sql
 """
 SELECT "price_list_item"."id", "product"."sku" AS "sku" FROM
 "price_list_item" INNER JOIN "product" ON ("price_list_item"."product_id"
 = "product"."id") WHERE (UPPER("product"."sku"::text) LIKE UPPER('%x%'))
 """


 }}}


 **To Summarize:**
 Filtering on a reverse relation does not reuse joins, if it was already
 declared in annotations, but adds more joins causing invalid queires
 because the extra joins do not have proper filtering.

 The ORM generates this:
 {{{
 SELECT "product"."id", t4."price"
 FROM "product"
 LEFT OUTER JOIN "price_list_item" ON ("product"."id" =
 "price_list_item"."product_id")
 LEFT OUTER JOIN "price_list" ON ("price_list_item"."price_list_id" =
 "price_list"."id")
 LEFT OUTER JOIN "price_list_item" t4 ON ("product"."id" = t4."product_id")
 LEFT OUTER JOIN "price_list" t5 ON (t4."price_list_id" = t5."id")
 WHERE (t4."price_list_id" IS NULL OR  t5."customer_id" = 1))
 }}}

 Instead of this:
 {{{
 SELECT "product"."id", price_list_item."price"
 FROM "product"
 LEFT OUTER JOIN "price_list_item" ON ("product"."id" =
 "price_list_item"."product_id")
 LEFT OUTER JOIN "price_list" ON ("price_list_item"."price_list_id" =
 "price_list"."id")
 WHERE (price_list_item."price_list_id" IS NULL OR
 price_list."customer_id" = 1))
 }}}

 Tested on django 4.1.7
-- 
Ticket URL: <https://code.djangoproject.com/ticket/35256>
Django <https://code.djangoproject.com/>
The Web framework for perfectionists with deadlines.

-- 
You received this message because you are subscribed to the Google Groups 
"Django updates" group.
To unsubscribe from this group and stop receiving emails from it, send an email 
to [email protected].
To view this discussion on the web visit 
https://groups.google.com/d/msgid/django-updates/0107018de63aeb9b-31f41bf8-f14d-4321-b918-2adb2901d1d3-000000%40eu-central-1.amazonses.com.

Reply via email to