#35099: Combining QuerySets with "|" or "&" produce side effects affecting 
further
queries
-------------------------------------+-------------------------------------
               Reporter:  Alan       |          Owner:  nobody
                   Type:  Bug        |         Status:  new
              Component:  Database   |        Version:  5.0
  layer (models, ORM)                |
               Severity:  Normal     |       Keywords:
           Triage Stage:             |      Has patch:  0
  Unreviewed                         |
    Needs documentation:  0          |    Needs tests:  0
Patch needs improvement:  0          |  Easy pickings:  0
                  UI/UX:  0          |
-------------------------------------+-------------------------------------
 Hello everyone.

 Combining some queries with  "|" or "&" somehow affects queries involved
 in the operation, leading to malformed SQL and unexpected results.

 Here are details and steps to reproduce. Apologise, for maybe a bit
 confusing model names, I copied them from production.
 {{{
 class SiteUser(models.Model):
     pass

 class Notification(models.Model):
     user = models.ForeignKey(to=SiteUser, on_delete=models.CASCADE)


 class PayoutRequest(models.Model):
     requester = models.ForeignKey(to=SiteUser, on_delete=models.CASCADE)
 }}}

 Test:
 {{{
 from django.test import TestCase
 from django.db.models import OuterRef, Exists
 from reproduce.models import Notification, SiteUser, PayoutRequest


 class Reproduce(TestCase):

     def test(self):
         u01 = SiteUser.objects.create()
         u02 = SiteUser.objects.create()
         u03 = SiteUser.objects.create()

         Notification.objects.create(user=u01)
         PayoutRequest.objects.create(requester=u01)
         Notification.objects.create(user=u02)
         PayoutRequest.objects.create(requester=u03)

         are_active = SiteUser.objects.all().distinct()
         got_money = SiteUser.objects.filter(
             Exists(PayoutRequest.objects.filter(requester=OuterRef('pk')))
         ).distinct()
         whatever_query = SiteUser.objects.all().distinct()

         # Execute queries first time
         need_help = are_active.exclude(pk__in=got_money)
         notified =
 Notification.objects.filter(user__in=need_help).values_list('user_id',
 flat=True)
         query_before = str(notified.query)

         self.assertEqual(len(notified), 1)  # correct

         whatever_query | got_money  # Touch "got_money" with any other
 query

         # Execute same queries second time
         need_help = are_active.exclude(pk__in=got_money)
         notified =
 Notification.objects.filter(user__in=need_help).values_list('user_id',
 flat=True)
         query_after = str(notified.query)

         print(query_before)
         print(query_after)
         self.assertEqual(len(notified), 1)  # expected 1, got 0
         self.assertEqual(query_before, query_after)  # false
 }}}

 As you can see, merely touching the `got_money` query with any other query
 leads to modifying the results of the same queries executed after that.
 This test case probably may be simplified even further, but unfortunately,
 I have no more time resources to dig much deeper.

 I had another queries built using simple .filter() and .exclude(). Those
 were not affected by combining.
 I found only this query `got_money` using `Exists()` and `OuterRef()` to
 be affected. There might be more of which I am not aware of.

 The reason for this I don't know, but `query_before` and `query_after`
 differs.
 `query_before` correctly separates subqueries using W0, U0, V0 aliases,
 while the `query_after` uses a single U0 alias for all subqueries, leading
 to incorrect results.

 Before
 {{{
 SELECT
   "reproduce_notification"."user_id"
 FROM
   "reproduce_notification"
 WHERE
   "reproduce_notification"."user_id" IN (
     SELECT
       DISTINCT W0."id"
     FROM
       "reproduce_siteuser" W0
     WHERE
       NOT (
         W0."id" IN (
           SELECT
             DISTINCT V0."id"
           FROM
             "reproduce_siteuser" V0
           WHERE
             EXISTS(
               SELECT
                 1 AS "a"
               FROM
                 "reproduce_payoutrequest" U0
               WHERE
                 U0."requester_id" = (V0."id")
               LIMIT
                 1
             )
         )
       )
   )
 }}}

 After
 {{{
 SELECT
   "reproduce_notification"."user_id"
 FROM
   "reproduce_notification"
 WHERE
   "reproduce_notification"."user_id" IN (
     SELECT
       DISTINCT U0."id"
     FROM
       "reproduce_siteuser" U0
     WHERE
       NOT (
         U0."id" IN (
           SELECT
             DISTINCT U0."id"
           FROM
             "reproduce_siteuser" U0
           WHERE
             EXISTS(
               SELECT
                 1 AS "a"
               FROM
                 "reproduce_payoutrequest" U0
               WHERE
                 U0."requester_id" = (U0."id")
               LIMIT
                 1
             )
         )
       )
   )
 }}}

-- 
Ticket URL: <https://code.djangoproject.com/ticket/35099>
Django <https://code.djangoproject.com/>
The Web framework for perfectionists with deadlines.

-- 
You received this message because you are subscribed to the Google Groups 
"Django updates" group.
To unsubscribe from this group and stop receiving emails from it, send an email 
to [email protected].
To view this discussion on the web visit 
https://groups.google.com/d/msgid/django-updates/0107018cefb81ba8-b17a2d82-dc0a-4141-bbe4-e0a7fc2a27d9-000000%40eu-central-1.amazonses.com.

Reply via email to