英文:
Build Timeline with overlapping records - SQL-DB2-IBM
问题 {#heading}
| Userid | 总时间 | |--------|----------| | User1 | 04:45:00 | | User2 | 03:45:00 |
英文:
I have a table in which I keep information related to working periods and tasks and I want to get the total amount of working time per user. But every user can work on multiple taks at the same time.
So, the result is that I have overlapped tasks.
This is the table I have
| Userid | Task_id | Start_datetime | End_datetime | Total_Time | |--------|---------|---------------------|---------------------|------------| | User1 | Task1 | 2023-08-09 08:00:00 | 2023-08-09 09:00:00 | 01:00:00 | | User1 | Task2 | 2023-08-09 08:15:00 | 2023-08-09 10:00:00 | 01:45:00 | | User2 | Task1 | 2023-08-09 08:30:00 | 2023-08-09 10:00:00 | 01:30:00 | | User2 | Task2 | 2023-08-09 09:00:00 | 2023-08-09 11:30:00 | 02:30:00 | | User1 | Task3 | 2023-08-09 11:15:00 | 2023-08-09 13:00:00 | 02:45:00 | | User2 | Task3 | 2023-08-09 15:15:00 | 2023-08-09 16:00:00 | 00:45:00 | | User2 | Task1 | 2023-08-09 15:20:00 | 2023-08-09 16:00:00 | 00:40:00 |
If I try to get the total amount of time worked per user I get this, but this is not real because users are multitasks
| Userid | Total_worked_Time per day | |--------|---------------------------| | User1 | 05:30:00 | | User2 | 05:25:00 |
What I would like to get is the total amount of time that every user has been busy.
So the result would be:
| Userid | Total Time | |--------|------------| | User1 | 04:45:00 | | User2 | 03:45:00 |
I've trying some queries and reading about CTE and Overlapping but I can't get the correct result.
答案1 {#1}
得分: 1
表格中的用户1任务3的总时间是错误的,应该是01:45:00。
这里有一个解决方案:
with table1 (Userid, Task_id, Start, End, Total_Time) as (
VALUES
('User1', 'Task1', timestamp '2023-08-09 08:00:00', timestamp '2023-08-09 09:00:00', time '01:00:00'),
('User1', 'Task2', '2023-08-09 08:15:00', '2023-08-09 10:00:00', '01:45:00'),
('User2', 'Task1', '2023-08-09 08:30:00', '2023-08-09 10:00:00', '01:30:00'),
('User2', 'Task2', '2023-08-09 09:00:00', '2023-08-09 11:30:00', '02:30:00'),
('User1', 'Task3', '2023-08-09 11:15:00', '2023-08-09 13:00:00', '02:45:00'),
('User2', 'Task3', '2023-08-09 15:15:00', '2023-08-09 16:00:00', '00:45:00'),
('User2', 'Task1', '2023-08-09 15:20:00', '2023-08-09 16:00:00', '00:40:00')
),
-- 在两个任务共享完全相同的时间段的情况下
distinct_periods as (
select distinct userid, start, end from table1
),
-- 扩展时间段
periods (n, userid, start, end) as (
-- 以不重叠的时间段开始
select 1, userid, start, end from distinct_periods t1
where not exists (
select * from distinct_periods t2
where t2.userid = t1.userid
and (t1.start > t2.start and t1.start <= t2.end
or t1.start = t2.start and t1.end > t2.end)
)
-- 扩展与之重叠的时间段
union all
select n+1, periods.userid, periods.start, distinct_periods.end
from periods, distinct_periods
where
distinct_periods.userid = periods.userid
and distinct_periods.start between periods.start and periods.end
and distinct_periods.end > periods.end
),
-- 添加一个按结束日期降序排列的排名,以便最宽的时间段具有排名1
with_rank as (
select periods.*, rank() over(partition by userid, start order by end desc) rank from periods order by userid, start
)
-- 对排名为1的时间段长度求和
select
userid,
-- 以下行与示例链接中的不同,由于IBM i和DB2 LUW之间的差异
time '00:00:00' + sum(timestampdiff(4, char(end - start))) minutes
as time_busy
from with_rank where rank = 1 group by userid
| USERID | TIME_BUSY | |-------:|----------:| | User1 | 03:45:00 | | User2 | 03:45:00 |
fiddle 英文:
Total time for User 1 task 3 is wrong in your table, should be 01:45:00
Here is a solution
with table1 (Userid, Task_id, Start, End, Total_Time) as (
VALUES
('User1', 'Task1', timestamp '2023-08-09 08:00:00', timestamp '2023-08-09 09:00:00', time '01:00:00'),
('User1', 'Task2', '2023-08-09 08:15:00', '2023-08-09 10:00:00', '01:45:00'),
('User2', 'Task1', '2023-08-09 08:30:00', '2023-08-09 10:00:00', '01:30:00'),
('User2', 'Task2', '2023-08-09 09:00:00', '2023-08-09 11:30:00', '02:30:00'),
('User1', 'Task3', '2023-08-09 11:15:00', '2023-08-09 13:00:00', '02:45:00'),
('User2', 'Task3', '2023-08-09 15:15:00', '2023-08-09 16:00:00', '00:45:00'),
('User2', 'Task1', '2023-08-09 15:20:00', '2023-08-09 16:00:00', '00:40:00')
),
-- is case when two tasks share the exact same period
distinct_periods as (
select distinct userid, start, end from table1
),
-- extend periods
periods (n, userid, start, end) as (
-- start with periods that are not preceded by an overlaping period
select 1, userid, start, end from distinct_periods t1
where not exists (
select * from distinct_periods t2
where t2.userid = t1.userid
and (t1.start > t2.start and t1.start <= t2.end
or t1.start = t2.start and t1.end > t2.end)
)
-- extend with those that overlap
union all
select n+1, periods.userid, periods.start, distinct_periods.end
from periods, distinct_periods
where
distinct_periods.userid = periods.userid
and distinct_periods.start between periods.start and periods.end
and distinct_periods.end > periods.end
-- and n < 10
),
-- add a rank by end date descending so that the widest period has rank 1
with_rank as (
select periods.*, rank() over(partition by userid, start order by end desc) rank from periods order by userid, start
)
-- sum the lengths of periods of rank 1
select
userid,
-- the following line is not the same in the fiddle due to differences between DB2 for IBMi and DB2 LUW
time '00:00:00' + sum(timestampdiff(4, char(end - start))) minutes
as time_busy
from with_rank where rank = 1 group by userid
| USERID | TIME_BUSY | |-------:|----------:| | User1 | 03:45:00 | | User2 | 03:45:00 |
答案2 {#2}
得分: 0
这是翻译好的部分:
"That's awesome. Thank you!
I have added an additional column in order to group results by date too.
So, solution would be like this:
with table1 (Userid, Task_id, Start, End, Total_Time) as (
VALUES
('User1', 'Task1', timestamp '2023-08-09 08:00:00', timestamp '2023-08-09 09:00:00', time '01:00:00'),
('User1', 'Task2', '2023-08-09 08:15:00', '2023-08-09 10:00:00', '01:45:00'),
('User2', 'Task1', '2023-08-09 08:30:00', '2023-08-09 10:00:00', '01:30:00'),
('User2', 'Task2', '2023-08-09 09:00:00', '2023-08-09 11:30:00', '02:30:00'),
('User1', 'Task3', '2023-08-09 11:15:00', '2023-08-09 13:00:00', '02:45:00'),
('User2', 'Task3', '2023-08-09 15:15:00', '2023-08-09 16:00:00', '00:45:00'),
('User2', 'Task1', '2023-08-09 15:20:00', '2023-08-09 16:00:00', '00:40:00')
),
-- is case when two tasks share the exact same period
distinct_periods as (
select distinct userid, start, end, cast(start as date) as workday from table1
),
-- extend periods
periods (n, userid, start, end,workday) as (
-- start with periods that are not preceded by an overlapping period
select 1, userid, start, end, workday from distinct_periods t1
where not exists (
select * from distinct_periods t2
where t2.userid = t1.userid
and (t1.start > t2.start and t1.start <= t2.end
or t1.start = t2.start and t1.end > t2.end)
and (t1.workday=t2.workday)
)
-- extend with those that overlap
union all
select n+1, periods.userid, periods.start, distinct_periods.end, periods.workday
from periods, distinct_periods
where
distinct_periods.userid = periods.userid
and distinct_periods.start between periods.start and periods.end
and distinct_periods.end > periods.end
-- and n < 10
),
-- add a rank by end date descending so that the widest period has rank 1
with_rank as (
select periods.*, rank() over(partition by userid, workday, start order by end desc) rank from periods order by userid, start
)
-- sum the lengths of periods of rank 1
select
workday, userid,
-- the following line is not the same in the fiddle due to differences between DB2 for IBMi and DB2 LUW
time '00:00:00' + sum(timestampdiff(4, char(end - start))) minutes
as time_busy
from with_rank where rank = 1 group by workday, userid
order by workday, userid"
英文:
That's awsesome. Thank you!
I have added an additional column in order to group results by date too.
So, solution would be like this:
with table1 (Userid, Task_id, Start, End, Total_Time) as (
VALUES
('User1', 'Task1', timestamp '2023-08-09 08:00:00', timestamp '2023-08-09 09:00:00', time '01:00:00'),
('User1', 'Task2', '2023-08-09 08:15:00', '2023-08-09 10:00:00', '01:45:00'),
('User2', 'Task1', '2023-08-09 08:30:00', '2023-08-09 10:00:00', '01:30:00'),
('User2', 'Task2', '2023-08-09 09:00:00', '2023-08-09 11:30:00', '02:30:00'),
('User1', 'Task3', '2023-08-09 11:15:00', '2023-08-09 13:00:00', '02:45:00'),
('User2', 'Task3', '2023-08-09 15:15:00', '2023-08-09 16:00:00', '00:45:00'),
('User2', 'Task1', '2023-08-09 15:20:00', '2023-08-09 16:00:00', '00:40:00')
),
-- is case when two tasks share the exact same period
distinct_periods as (
select distinct userid, start, end, cast(start as date) as workday from table1
),
-- extend periods
periods (n, userid, start, end,workday) as (
-- start with periods that are not preceded by an overlaping period
select 1, userid, start, end, workday from distinct_periods t1
where not exists (
select * from distinct_periods t2
where t2.userid = t1.userid
and (t1.start > t2.start and t1.start <= t2.end
or t1.start = t2.start and t1.end > t2.end)
and (t1.workday=t2.workday)
)
-- extend with those that overlap
union all
select n+1, periods.userid, periods.start, distinct_periods.end, periods.workday
from periods, distinct_periods
where
distinct_periods.userid = periods.userid
and distinct_periods.start between periods.start and periods.end
and distinct_periods.end > periods.end
-- and n < 10
),
-- add a rank by end date descending so that the widest period has rank 1
with_rank as (
select periods.*, rank() over(partition by userid, workday, start order by end desc) rank from periods order by userid, start
)
-- sum the lengths of periods of rank 1
select
workday, userid,
-- the following line is not the same in the fiddle due to differences between DB2 for IBMi and DB2 LUW
time '00:00:00' + sum(timestampdiff(4, char(end - start))) minutes
as time_busy
from with_rank where rank = 1 group by workday, userid
order by workday, userid