On Sat, Mar 20, 2021 at 6:52 PM Ron Clarke <rclark...@gmail.com> wrote:

> /*
> I'm trying to port a system from SQL server, and at the same time better
> learn postgreSQL.
>
> I've come across a problem that is easily solved in that world, but I am
> struggling to find an approach in postgres that works.
>
> We have 2 sets of events A and B (sets), they have a shared number
> (ncode), both have unique Id's
>
> We want to link items of set A to those of set B, but each item of each
> set can only be linked once.    That is we do not want to link all set 'A'
> items to all set 'B' Items with the same code.
>
> In SQL Server this is easy, we insert the records into a temporary table
> with separate Unique indexes on the id for set a and the ids for set b and
> put the 'ignore_dup_key' on which tells SQL Server to ignore duplicate rows
> and carry on.
>
> The nearest to this at first appears to be the ON CONFLICT ON CONSTRAINT
> IGNORE in Postgres. But this only works with a single constraint, at a time
> i.e. we can't set the ON CONFLICT ON CONSTRAINT IGNORE to work with
> multiple UNIQUE indexes.
>
> To show the problem:
>
> I'm using PostgreSQL version 11.
>
> */
> -- source data
> WITH sd AS (
> SELECT iid, s, ncode FROM (
> VALUES (1, 'A', 10),
>        (2, 'A', 30),
>        (3, 'A', 10),
>        (4, 'B', 10),
>        (5, 'B', 20),
>        (6, 'B', 10)
> )
> AS tx (iid, s, ncode))
> SELECT iid, s, ncode FROM sd
>
> /*     The target result would be :
>
>   id:1, A, 10 this matches id:4, B, 10
>   id:3, A, 10 this matches id:6, B, 10
> */
>
> --  Example to get the *wrong *answer, i.e. both sets of links
>
> WITH
> sd (i, s, n ) AS (
> SELECT iid, s, ncode FROM (
> VALUES (1, 'A', 10),
>   (2, 'A', 30),
>   (3, 'A', 10),
>   (4, 'B', 10),
>   (5, 'B', 20),
>   (6, 'B', 10)
> )
> AS tx (iid, s, ncode))
> ,
> x AS ( SELECT
>
>     ax.i as ia,
>     ax.s as sa,
>     ax.n as na,
>     bx.i as ib,
>     bx.s as sb,
>     bx.n as nb,
> ROW_NUMBER () OVER (
>
> PARTITION BY bx.i
>
> ORDER BY
>
> ax.i ) as rx
>
>     FROM sd AS ax
>     INNER JOIN sd AS bx ON ax.n = bx.n and ax.i != bx.i and bx.s = 'B'
>     WHERE ax.s = 'A'
> )
> SELECT ia,ib, na, rx FROM x
> ;
>
>
> /*   I've tried using a recursive CTE where I'm trying to exclude results
> from the result set that have already been identified, but I can't get an
> allowed syntax.
>   Doesn't seem to allow joins to the recursive term to exclude results.
> */
>
>
> /*   I've tried Unique and Exclusion constraints on temporary table, e.g */
> --     similar Example to get the wrong answer, i.e. both sets of links
>
> DROP TABLE IF EXISTS links ;
>
> CREATE TEMPORARY TABLE links
> (    mid serial ,
> ia int ,
> -- ia int UNIQUE,
> ib int ,
> -- ib int UNIQUE,
>     EXCLUDE USING gist (ia WITH =, ib WITH =)
>
>   ) ;
>
> WITH
> sd (i, s, n ) AS (
> SELECT iid, side, ncode FROM (
> VALUES (1, 'A', 10),
>   (2, 'A', 30),
>   (3, 'A', 10),
>   (4, 'B', 10),
>   (5, 'B', 20),
>   (6, 'B', 10)
> )
> AS tx (iid, side, ncode))
> ,
> x AS (
> SELECT
>     ax.i as ia,
> ax.s as sa,
> ax.n as na,
> bx.i as ib,
> bx.s as sb,
> bx.n as nb,
> ROW_NUMBER () OVER (
> PARTITION BY bx.i
> ORDER BY
> ax.i
> ) as rx
>     FROM sd AS ax
>     INNER JOIN sd AS bx ON ax.n = bx.n and ax.i != bx.i and bx.s = 'B'
>     WHERE ax.s = 'A'
> )
> -- SELECT * FROM x
> INSERT INTO links(ia,ib)
> SELECT ia, ib FROM x
> ON CONFLICT ON CONSTRAINT links_ia_ib_excl DO NOTHING;
>
>   --
> SELECT * from links;
>
> /*   I've also tried and failed to use array(ia,ib) within or as computed
> column of an Exclusion constraint of && s on temporary table, e.g
>   but can't find any syntax that doesn't result in an error
>   */
>
>
> DROP TABLE IF EXISTS links ;
>
> CREATE TEMPORARY TABLE links
> (    mid serial ,
> ia int ,
> -- ia int UNIQUE,
> ib int ,
> -- ib int UNIQUE,
>     ix int[],
>   EXCLUDE USING gist (ix WITH &&)
>   ) ;
>
> -- This gives me:
> -- ERROR:  data type integer[] has no default operator class for access
> method "gist"
>
> -- I have the btree_gist extension installed
>
>
> /*
>
> I appreciate I could create a cursor from a list of proposed links and
> step through each one, checking if the id value has been "used up"
> but I am trying to keep this as a set based operation to give me the
> results in one statement.
>
> There are some similar questions w.r.t. duplicate detection, but these
> again seem to be solved by evaluating each proposed record individually.
>     If that's just what I have to do then so be it. There is probably a
> simple 'postgreSQL' freindly approach I'm still yet to discover having spent
> too long in Sybase and SQL Server worlds.
>
> Thanks for looking at this
>
> */
>
>
>



Hi Ron,

How about the code below.
It may require testing with more data.






WITH _sd AS (
    SELECT iid, s, ncode FROM (
    VALUES (1, 'A', 10),
           (2, 'A', 30),
           (3, 'A', 10),
           (4, 'B', 10),
           (5, 'B', 20),
           (6, 'B', 10)
    )
    AS tx (iid, s, ncode)
)
--SELECT a.iid, a.s, a.ncode FROM _sd a;
,_sd__ab AS
(
    SELECT
        a.iid as iid__a, a.s AS s__a, a.ncode AS ncode__a
        ,b.iid as iid__b, b.s AS s__b, b.ncode AS ncode__b
    FROM _sd a
    JOIN _sd b ON b.ncode=a.ncode AND b.s>a.s
)
,_sd__ab__dist AS
(
    SELECT
        DISTINCT ON(
            a.iid__a
            ,a.iid__b
        )
        a.iid__a, a.s__a, a.ncode__a
        ,a.iid__b, a.s__b, a.ncode__b
    FROM _sd__ab a
    ORDER BY
        a.iid__a, a.iid__b, a.s__a, a.ncode__a
        , a.s__b, a.ncode__b
)
,_sd__ab__dist2 AS
(
    SELECT
        a.iid__a, a.s__a, a.ncode__a
        ,a.iid__b, a.s__b, a.ncode__b
        ,a.iid__a__b__row_number1
        ,a.iid__a__b__row_number2
    FROM
    (
        SELECT
            a.iid__a, a.s__a, a.ncode__a
            ,a.iid__b, a.s__b, a.ncode__b
            ,ROW_NUMBER()OVER(PARTITION BY a.ncode__a,a.iid__a ORDER BY
a.iid__b)AS iid__a__b__row_number1
            ,ROW_NUMBER()OVER(PARTITION BY a.ncode__a,a.iid__b ORDER BY
a.iid__a)AS iid__a__b__row_number2
        FROM _sd__ab__dist a
    )a
)
SELECT a.*,ROW_NUMBER()OVER(ORDER BY a.iid__a)AS row_number FROM
_sd__ab__dist2 a WHERE a.iid__a__b__row_number1=iid__a__b__row_number2
;


Yields
 iid__a | s__a | ncode__a | iid__b | s__b | ncode__b |
iid__a__b__row_number1 | iid__a__b__row_number2 | row_number
--------+------+----------+--------+------+----------+------------------------+------------------------+------------
      1 | A    |       10 |      4 | B    |       10 |
 1 |                      1 |          1
      3 | A    |       10 |      6 | B    |       10 |
 2 |                      2 |          2
(2 rows)

Time: 2.394 ms

-Allan.

>
>
>

Reply via email to