Friday, March 22, 2019

postgreSQL: collapse contiguous ranges

Source: Efficiently select beginning and end of multiple contiguous ranges in Postgresql query
with recursive
data as (
	select * from (
    values	('foo', 2, 3),
		   	('foo', 3, 4),
			('foo', 4, 5),
			('foo', 10, 11),
			('foo', 11, 13),
			('foo', 13, 15),
			('bar', 1, 2),
			('bar', 2, 4),
			('bar', 7, 8)
	) as baz (name, first, last)
),
recur (name, first, last) as (
    select name, first, last, last-first as span from data
    union all
    select name, data.first, data.last, recur.span+data.last-data.first as span
    from data join recur using (name)
    where data.first = recur.last
)
select name, start, start + span as end, span from (
    select name, (last-span) as start, max(span) as span from (
         select name, first, last, max(span) as span 
        from recur
        group by name, first, last
    ) as z
    group by name, (last-span)
) as z

┌──────┬───────┬─────┬──────┐
│ name │ start │ end │ span │
├──────┼───────┼─────┼──────┤
│ bar  │     1 │   4 │    3 │
│ bar  │     7 │   8 │    1 │
│ foo  │    10 │  15 │    5 │
│ foo  │     2 │   5 │    3 │
└──────┴───────┴─────┴──────┘
(4 rows)

Or if we do not have a "last" column.
with recursive
data as (
    select * from (
        values ('foo', 2),
               ('foo', 3),
               ('foo', 4),
               ('foo', 10),
               ('foo', 11),
               ('foo', 13),
               ('bar', 1),
               ('bar', 2),
               ('bar', 7)
    ) as baz (name, first)
),
recur (name, first) as (
    select name, first, 1 as span from data
    union all
    select name, data.first, recur.span+1 as span
    from data
    join recur using (name)
    where data.first = recur.first + 1
)
select name, start, start + span - 1 as end, span from (
    select name, (first+1-span) as start, max(span) as span from (
        select name, first, max(span) as span 
        from recur
        group by name, first
    ) as z
    group by name, start
) as z
order by name, start

┌──────┬───────┬─────┬──────┐
│ name │ start │ end │ span │
├──────┼───────┼─────┼──────┤
│ bar  │     1 │   2 │    2 │
│ bar  │     7 │   7 │    1 │
│ foo  │     2 │   4 │    3 │
│ foo  │    10 │  11 │    2 │
│ foo  │    13 │  13 │    1 │
└──────┴───────┴─────┴──────┘
(5 rows)

The problem is that with recursive tends to be slow when dealing with millions of rows.
How to find the boundaries of groups of contiguous sequential numbers? proposes a faster approach.
with
data as (
    select * from (
        values ('foo', 2),
               ('foo', 3),
               ('foo', 4),
               ('foo', 10),
               ('foo', 11),
               ('foo', 13),
               ('bar', 1),
               ('bar', 2),
               ('bar', 7)
    ) as baz (name, first)
),
island as (
    select first - row_number() over (order by name, first) as grp,
    name, first
    from data
)
select name, min(first) as start, max(first) as end
from island
group by name, grp
order by name, start

┌──────┬───────┬─────┐
│ name │ start │ end │
├──────┼───────┼─────┤
│ bar  │     1 │   2 │
│ bar  │     7 │   7 │
│ foo  │     2 │   4 │
│ foo  │    10 │  11 │
│ foo  │    13 │  13 │
└──────┴───────┴─────┘
(5 rows)