Excuse the top posting. This perhaps: create table srcdata ( CLS1 text not null, CLS2 integer not null, START integer not null, END integer not null ); insert into srcdata values ('ABC1',100,0,1); insert into srcdata values ('ABC1',100,1,1); insert into srcdata values ('ABC1',100,1,3); insert into srcdata values ('ABC1',100,1,3); insert into srcdata values ('ABC1',100,3,4); insert into srcdata values ('ABC1',100,4,3); insert into srcdata values ('ABC1',100,5,6); insert into srcdata values ('ABC1',100,6,20); insert into srcdata values ('ABC1',100,6,20); insert into srcdata values ('ABC1',500,4,19); insert into srcdata values ('ABC1',500,19,4); insert into srcdata values ('ABC2',300,4,4);
sqlite> select * from srcdata; CLS1 CLS2 START END ---------- ---------- ---------- ---------- ABC1 100 0 1 ABC1 100 1 1 ABC1 100 1 3 ABC1 100 1 3 ABC1 100 3 4 ABC1 100 4 3 ABC1 100 5 6 ABC1 100 6 20 ABC1 100 6 20 ABC1 500 4 19 ABC1 500 19 4 ABC2 300 4 4 with src (cls1, cls2, start, end) as ( select distinct cls1, cls2, min(start, end), max(start, end) from srcdata ), c (cls1, cls2, start, end) as ( select cls1, cls2, start, end from src union select c.cls1, c.cls2, s.start, c.end from src as s, c where c.cls1 == s.cls1 and c.cls2 == s.cls2 and c.start == s.end and s.start != c.start ), u (cls1, cls2, start, end) as ( select cls1, cls2, start, end from c as o where not exists ( select * from c where cls1 == o.cls1 and cls2 == o.cls2 and ( ( o.start between start + 1 and end and o.start between start and end - 1 ) or ( o.end between start + 1 and end and o.end between start and end - 1 ) ) ) ) select * from u order by cls1, cls2, start, end ; cls1 cls2 start end ---------- ---------- ---------- ---------- ABC1 100 0 4 ABC1 100 5 20 ABC1 500 4 19 ABC2 300 4 4 So in the CTE "src" makes sure that start < end and removes duplicate rows "c" finds and adds coalesced range rows "u" finds the rows in c where there is no containing row If you have any significant amount of data the following procedure is probably many many times faster since the various bits of the single CTE version are not materialized (though if you can figure out how to force that, it will do the same thing): drop table if exists temp.src; drop table if exists temp.coal; create temporary table src ( cls1 text not null, cls2 text not null, start integer not null, end integer not null ); insert into temp.src select distinct cls1, cls2, min(start, end) as start, max(start, end) as end from srcdata ; create index temp.idx_src on src (cls1, cls2, end); create temporary table coal ( cls1 text not null, cls2 text not null, start integer not null, end integer not null ); insert into temp.coal with c as ( select cls1, cls2, start, end from temp.src union select c.cls1, c.cls2, s.start, c.end from temp.src as s, c where c.cls1 == s.cls1 and c.cls2 == s.cls2 and c.start == s.end and s.start != c.start ) select cls1, cls2, start, end from c ; create index temp.idx_coal on coal (cls1, cls2); select cls1, cls2, start, end from temp.coal as o where not exists ( select 1 from temp.coal as c where cls1 == o.cls1 and cls2 == o.cls2 and ( ( o.start between start + 1 and end and o.start between start and end - 1 ) or ( o.end between start + 1 and end and o.end between start and end - 1 ) ) ) order by cls1, cls2, start, end ; drop table if exists temp.src; drop table if exists temp.coal; -- The fact that there's a Highway to Hell but only a Stairway to Heaven says a lot about anticipated traffic volume. >-----Original Message----- >From: sqlite-users <sqlite-users-boun...@mailinglists.sqlite.org> On >Behalf Of Golding Robert >Sent: Wednesday, 29 January, 2020 08:02 >To: sqlite-users@mailinglists.sqlite.org >Subject: [sqlite] single table data collapse with constraints > >Hi, > >I am emailing to ask if someone could advise me how to create a query or >queries which will collapse some data based on a limited number of >constraints. >I am currently attempting to complete this task using DB Browser for >SQLite. I have tried to write a WITH RECLUSIVE statement as I think this >is the requirement but am struggling with both the abstraction and >syntax. > > >I have data contained with a single table of structure: >CLS1 field (text) >CLS2 field (integer) >START field (integer) >END field (integer > >I need to collapse the data based on the matching of values in fields >CLS1 and CLS2; the final constraint is that if END and START values are >continuous of the another record then they can be collapsed. Therefore >records should only be collapsed if gaps do not exist. The results then >need to written to a new table, leaving the original data as is. > >Input data: assumptions > > 1. Data may or may not be ordered > 2. Duplicates may or may not exist > 3. Start and end values could be the same > 4. Start values are normally lower that the end value, however the >high value could be in the start field > 5. Assume that there is no overlap in terms of start and end values >(namely if they can be joined then one will stop where the next starts) > >Input data: example >CLS1,CLS2,START,END >ABC1,100,0,1 >ABC1,100,1,1 (start and end values could be the same, in the first >instance assume that they may be dissolved if possible, if they cannot >the record need to be retained) >ABC1,100,1,3 >ABC1,100,1,3 (duplicates may or may not be present, if present then they >can be dissolved into a single instance) >ABC1,100,3,4 >ABC1,100,4,3 >ABC1,100,5,6 >ABC1,100,6,20 >ABC1,100,6,20(duplicates may or may not be present, if present then they >can be dissolved into a single instance) >ABC1,500,4,19 >ABC1,500,19,4 (start and end values could be inverted where Start is high >and End id low (this is not the norm but it is legitimate) in this case >start and end values may be inverted, and in this case dissolved into a >single instance) >ABC2,300,4,4 (start and end values could be the same, in the first >instance assume that they may be dissolved if possible, if they cannot >the record need to be retained) > > > >Output data: collapsed/merged expected output >CLS1,CLS2,START,END >ABC1,100,0,4 >ABC1,100,5,20 >ABC1,500,4,19 >ABC2,300,4,4 > > >I would be extremely grateful if anybody could help me with this issue. > > >Regards, > >Rob > > > >************************************************************************* >************************************************************************* >************** > >The content of this email (and any attachment) is confidential. It may >also be legally privileged or otherwise protected from disclosure. >This email should not be used by anyone who is not an original intended >recipient, nor may it be copied or disclosed to anyone who is not an >original intended recipient. > >If you have received this email by mistake please notify us by emailing >the sender, and then delete the email and any copies from your system. > >Liability cannot be accepted for statements made which are clearly the >sender's own and not made on behalf of Network Rail. >Network Rail Infrastructure Limited registered in England and Wales No. >2904587, registered office Network Rail, 2nd Floor, One Eversholt Street, >London, NW1 2DN > >************************************************************************* >************************************************************************* >************** >_______________________________________________ >sqlite-users mailing list >sqlite-users@mailinglists.sqlite.org >http://mailinglists.sqlite.org/cgi-bin/mailman/listinfo/sqlite-users _______________________________________________ sqlite-users mailing list sqlite-users@mailinglists.sqlite.org http://mailinglists.sqlite.org/cgi-bin/mailman/listinfo/sqlite-users