Re: More than one UNIQUE key when matching items..

Ron Clarke Mon, 22 Mar 2021 06:23:15 -0700

Hey thanks for working out a solution to this deceptive problem.   One of
those you expect to be simple, but then all of a sudden it isn't.


Best regards
Ron





On Sat, 20 Mar 2021 at 19:01, Allan Kamau <[email protected]> wrote:

>
>
>
>
>
>
> On Sat, Mar 20, 2021 at 6:52 PM Ron Clarke <[email protected]> wrote:
>
>> /*
>> I'm trying to port a system from SQL server, and at the same time better
>> learn postgreSQL.
>>
>> I've come across a problem that is easily solved in that world, but I am
>> struggling to find an approach in postgres that works.
>>
>> We have 2 sets of events A and B (sets), they have a shared number
>> (ncode), both have unique Id's
>>
>> We want to link items of set A to those of set B, but each item of each
>> set can only be linked once.    That is we do not want to link all set 'A'
>> items to all set 'B' Items with the same code.
>>
>> In SQL Server this is easy, we insert the records into a temporary table
>> with separate Unique indexes on the id for set a and the ids for set b and
>> put the 'ignore_dup_key' on which tells SQL Server to ignore duplicate rows
>> and carry on.
>>
>> The nearest to this at first appears to be the ON CONFLICT ON CONSTRAINT
>> IGNORE in Postgres. But this only works with a single constraint, at a time
>> i.e. we can't set the ON CONFLICT ON CONSTRAINT IGNORE to work with
>> multiple UNIQUE indexes.
>>
>> To show the problem:
>>
>> I'm using PostgreSQL version 11.
>>
>> */
>> -- source data
>> WITH sd AS (
>> SELECT iid, s, ncode FROM (
>> VALUES (1, 'A', 10),
>>        (2, 'A', 30),
>>        (3, 'A', 10),
>>        (4, 'B', 10),
>>        (5, 'B', 20),
>>        (6, 'B', 10)
>> )
>> AS tx (iid, s, ncode))
>> SELECT iid, s, ncode FROM sd
>>
>> /*     The target result would be :
>>
>>   id:1, A, 10 this matches id:4, B, 10
>>   id:3, A, 10 this matches id:6, B, 10
>> */
>>
>> --  Example to get the *wrong *answer, i.e. both sets of links
>>
>> WITH
>> sd (i, s, n ) AS (
>> SELECT iid, s, ncode FROM (
>> VALUES (1, 'A', 10),
>>   (2, 'A', 30),
>>   (3, 'A', 10),
>>   (4, 'B', 10),
>>   (5, 'B', 20),
>>   (6, 'B', 10)
>> )
>> AS tx (iid, s, ncode))
>> ,
>> x AS ( SELECT
>>
>>     ax.i as ia,
>>     ax.s as sa,
>>     ax.n as na,
>>     bx.i as ib,
>>     bx.s as sb,
>>     bx.n as nb,
>> ROW_NUMBER () OVER (
>>
>> PARTITION BY bx.i
>>
>> ORDER BY
>>
>> ax.i ) as rx
>>
>>     FROM sd AS ax
>>     INNER JOIN sd AS bx ON ax.n = bx.n and ax.i != bx.i and bx.s = 'B'
>>     WHERE ax.s = 'A'
>> )
>> SELECT ia,ib, na, rx FROM x
>> ;
>>
>>
>> /*   I've tried using a recursive CTE where I'm trying to exclude results
>> from the result set that have already been identified, but I can't get an
>> allowed syntax.
>>   Doesn't seem to allow joins to the recursive term to exclude results.
>> */
>>
>>
>> /*   I've tried Unique and Exclusion constraints on temporary table, e.g
>> */
>> --     similar Example to get the wrong answer, i.e. both sets of links
>>
>> DROP TABLE IF EXISTS links ;
>>
>> CREATE TEMPORARY TABLE links
>> (    mid serial ,
>> ia int ,
>> -- ia int UNIQUE,
>> ib int ,
>> -- ib int UNIQUE,
>>     EXCLUDE USING gist (ia WITH =, ib WITH =)
>>
>>   ) ;
>>
>> WITH
>> sd (i, s, n ) AS (
>> SELECT iid, side, ncode FROM (
>> VALUES (1, 'A', 10),
>>   (2, 'A', 30),
>>   (3, 'A', 10),
>>   (4, 'B', 10),
>>   (5, 'B', 20),
>>   (6, 'B', 10)
>> )
>> AS tx (iid, side, ncode))
>> ,
>> x AS (
>> SELECT
>>     ax.i as ia,
>> ax.s as sa,
>> ax.n as na,
>> bx.i as ib,
>> bx.s as sb,
>> bx.n as nb,
>> ROW_NUMBER () OVER (
>> PARTITION BY bx.i
>> ORDER BY
>> ax.i
>> ) as rx
>>     FROM sd AS ax
>>     INNER JOIN sd AS bx ON ax.n = bx.n and ax.i != bx.i and bx.s = 'B'
>>     WHERE ax.s = 'A'
>> )
>> -- SELECT * FROM x
>> INSERT INTO links(ia,ib)
>> SELECT ia, ib FROM x
>> ON CONFLICT ON CONSTRAINT links_ia_ib_excl DO NOTHING;
>>
>>   --
>> SELECT * from links;
>>
>> /*   I've also tried and failed to use array(ia,ib) within or as computed
>> column of an Exclusion constraint of && s on temporary table, e.g
>>   but can't find any syntax that doesn't result in an error
>>   */
>>
>>
>> DROP TABLE IF EXISTS links ;
>>
>> CREATE TEMPORARY TABLE links
>> (    mid serial ,
>> ia int ,
>> -- ia int UNIQUE,
>> ib int ,
>> -- ib int UNIQUE,
>>     ix int[],
>>   EXCLUDE USING gist (ix WITH &&)
>>   ) ;
>>
>> -- This gives me:
>> -- ERROR:  data type integer[] has no default operator class for access
>> method "gist"
>>
>> -- I have the btree_gist extension installed
>>
>>
>> /*
>>
>> I appreciate I could create a cursor from a list of proposed links and
>> step through each one, checking if the id value has been "used up"
>> but I am trying to keep this as a set based operation to give me the
>> results in one statement.
>>
>> There are some similar questions w.r.t. duplicate detection, but these
>> again seem to be solved by evaluating each proposed record individually.
>>     If that's just what I have to do then so be it. There is probably a
>> simple 'postgreSQL' freindly approach I'm still yet to discover having spent
>> too long in Sybase and SQL Server worlds.
>>
>> Thanks for looking at this
>>
>> */
>>
>>
>>
>
>
>
> Hi Ron,
>
> How about the code below.
> It may require testing with more data.
>
>
>
>
>
>
> WITH _sd AS (
>     SELECT iid, s, ncode FROM (
>     VALUES (1, 'A', 10),
>            (2, 'A', 30),
>            (3, 'A', 10),
>            (4, 'B', 10),
>            (5, 'B', 20),
>            (6, 'B', 10)
>     )
>     AS tx (iid, s, ncode)
> )
> --SELECT a.iid, a.s, a.ncode FROM _sd a;
> ,_sd__ab AS
> (
>     SELECT
>         a.iid as iid__a, a.s AS s__a, a.ncode AS ncode__a
>         ,b.iid as iid__b, b.s AS s__b, b.ncode AS ncode__b
>     FROM _sd a
>     JOIN _sd b ON b.ncode=a.ncode AND b.s>a.s
> )
> ,_sd__ab__dist AS
> (
>     SELECT
>         DISTINCT ON(
>             a.iid__a
>             ,a.iid__b
>         )
>         a.iid__a, a.s__a, a.ncode__a
>         ,a.iid__b, a.s__b, a.ncode__b
>     FROM _sd__ab a
>     ORDER BY
>         a.iid__a, a.iid__b, a.s__a, a.ncode__a
>         , a.s__b, a.ncode__b
> )
> ,_sd__ab__dist2 AS
> (
>     SELECT
>         a.iid__a, a.s__a, a.ncode__a
>         ,a.iid__b, a.s__b, a.ncode__b
>         ,a.iid__a__b__row_number1
>         ,a.iid__a__b__row_number2
>     FROM
>     (
>         SELECT
>             a.iid__a, a.s__a, a.ncode__a
>             ,a.iid__b, a.s__b, a.ncode__b
>             ,ROW_NUMBER()OVER(PARTITION BY a.ncode__a,a.iid__a ORDER BY
> a.iid__b)AS iid__a__b__row_number1
>             ,ROW_NUMBER()OVER(PARTITION BY a.ncode__a,a.iid__b ORDER BY
> a.iid__a)AS iid__a__b__row_number2
>         FROM _sd__ab__dist a
>     )a
> )
> SELECT a.*,ROW_NUMBER()OVER(ORDER BY a.iid__a)AS row_number FROM
> _sd__ab__dist2 a WHERE a.iid__a__b__row_number1=iid__a__b__row_number2
> ;
>
>
> Yields
>  iid__a | s__a | ncode__a | iid__b | s__b | ncode__b |
> iid__a__b__row_number1 | iid__a__b__row_number2 | row_number
>
> --------+------+----------+--------+------+----------+------------------------+------------------------+------------
>       1 | A    |       10 |      4 | B    |       10 |
>  1 |                      1 |          1
>       3 | A    |       10 |      6 | B    |       10 |
>  2 |                      2 |          2
> (2 rows)
>
> Time: 2.394 ms
>
> -Allan.
>
>>
>>
>>
>

Re: More than one UNIQUE key when matching items..

Reply via email to