Обсуждение: Any thoughts on a better approach to this query?
Formatted query attached in addition to placing it inline. The commentary is inline with the query. Basically I've already solved this problem but was wondering if someone has a different perspective; or simply observations.
TIA,
David J.
/*
For a given id there are multiple linked values of differing types.
Types "A" and "B" are important and, if present, should be explicitly assigned.
It is possible that more than one link is associated with a given type.
If either A or B is lacking an explicit value it is assigned a value
from:
1. any extra As that are present
2. any extra Bs that are present
3. any extra non-A/B values that are present
The final result contains values for A and B and
and array of values for whatever links went unused.
*/
WITH demo AS (
-- A sample record where B needs to be assigned and ends
-- up using the excess A
SELECT * FROM (
VALUES (1,1,'A'), (1,2,'A'), (1,3,'C'), (1,4,'D')
) vals (id, link, type)
)
, link_allocation_1 AS (
SELECT id,
-- Grab the first A
(SELECT link
FROM demo
AND type = 'A' LIMIT 1
) AS type_a_first_link,
-- Grab the first B
(SELECT link
FROM demo
AND type = 'B' LIMIT 1
) AS type_b_first_link,
-- Any additional As and Bs are placed into an array
-- and appended to an array constructed from all of the non-A/B
ARRAY(
SELECT link
FROM demo
AND type = 'A'
ORDER BY link
OFFSET 1) ||
ARRAY(
SELECT link
FROM demo
AND type = 'B'
ORDER BY link
OFFSET 1) ||
ARRAY(
SELECT link
FROM demo
AND type NOT IN ('A','B')
ORDER BY link) AS unassigned_links
FROM (SELECT DISTINCT id FROM demo) master
)
, allocate_unassigned_links AS (
SELECT *,
-- Determine how many allocations from the "extra" array are required
-- so that we can trim slice them out of the final result
CASE WHEN type_a_first_link IS NULL
THEN 1
ELSE 0 END +
CASE WHEN type_b_first_link IS NULL
THEN 1
ELSE 0
END AS reassign_count,
-- A always gets the first extra if needed
CASE WHEN type_a_first_link IS NULL
THEN unassigned_links[1]
ELSE type_a_first_link
END AS actual_a_link,
-- B gets the first extra unless A took it in which case it gets the second one
CASE WHEN type_b_first_link IS NULL THEN
CASE WHEN type_a_first_link IS NOT NULL
THEN unassigned_links[1]
ELSE unassigned_links[2]
END
ELSE type_b_first_link
END AS actual_b_link
FROM link_allocation_1
)
SELECT id,
-- For A and B flag is the value was pulled from the extras
type_a_first_link IS NULL AS a_link_is_missing,
actual_a_link,
type_b_first_link IS NULL AS b_link_is_missing,
actual_b_link,
-- Now slice off the first portion of the extras array based upon the assignment count
unassigned_links[1+reassign_count:array_length(unassigned_links,1)] AS final_unassigned_links,
-- output the unsliced array for visual comparison
unassigned_links AS pre_allocation_unassigned_links
FROM allocate_unassigned_links
Output =>
id a_link_is_missing actual_a_link b_link_is_missing actual_b_link final_unassigned_links pre_allocation_unassigned_links
1 False 1 True 2 {3,4} {2,3,4}
Вложения
>?Formatted query attached in addition to placing it inline. The commentary is inline with the query. Basically I've already solved this problem but was wondering if someone has a different perspective; or simply observations.
>
>TIA,
>
>David J.
>
>/*
>For a given id there are multiple linked values of differing types.
>Types "A" and "B" are important and, if present, should be explicitly assigned.
>It is possible that more than one link is associated with a given type.
>If either A or B is lacking an explicit value it is assigned a value
>from:
>1. any extra As that are present
>2. any extra Bs that are present
>3. any extra non-A/B values that are present
>
>The final result contains values for A and B and
>and array of values for whatever links went unused.
>*/
>WITH demo AS (
> -- A sample record where B needs to be assigned and ends
> -- up using the excess A
> SELECT * FROM (
> VALUES (1,1,'A'), (1,2,'A'), (1,3,'C'), (1,4,'D')
> ) vals (id, link, type)
>)
>, link_allocation_1 AS (
> SELECT id,
>
> -- Grab the first A
> (SELECT link
> FROM demo
> WHERE demo.id = master.id
> AND type = 'A' LIMIT 1
> ) AS type_a_first_link,
>
> -- Grab the first B
> (SELECT link
> FROM demo
> WHERE demo.id = master.id
> AND type = 'B' LIMIT 1
> ) AS type_b_first_link,
>
> -- Any additional As and Bs are placed into an array
> -- and appended to an array constructed from all of the non-A/B
> ARRAY(
> SELECT link
> FROM demo
> WHERE demo.id = master.id
> AND type = 'A'
> ORDER BY link
> OFFSET 1) ||
> ARRAY(
> SELECT link
> FROM demo
> WHERE demo.id = master.id
> AND type = 'B'
> ORDER BY link
> OFFSET 1) ||
> ARRAY(
> SELECT link
> FROM demo
> WHERE demo.id = master.id
> AND type NOT IN ('A','B')
> ORDER BY link) AS unassigned_links
> FROM (SELECT DISTINCT id FROM demo) master
>)
>, allocate_unassigned_links AS (
> SELECT *,
> -- Determine how many allocations from the "extra" array are required
> -- so that we can trim slice them out of the final result
> CASE WHEN type_a_first_link IS NULL
> THEN 1
> ELSE 0 END +
> CASE WHEN type_b_first_link IS NULL
> THEN 1
> ELSE 0
> END AS reassign_count,
> -- A always gets the first extra if needed
> CASE WHEN type_a_first_link IS NULL
> THEN unassigned_links[1]
> ELSE type_a_first_link
> END AS actual_a_link,
> -- B gets the first extra unless A took it in which case it gets the second one
> CASE WHEN type_b_first_link IS NULL THEN
> CASE WHEN type_a_first_link IS NOT NULL
> THEN unassigned_links[1]
> ELSE unassigned_links[2]
> END
> ELSE type_b_first_link
> END AS actual_b_link
> FROM link_allocation_1
>)
>SELECT id,
>-- For A and B flag is the value was pulled from the extras
> type_a_first_link IS NULL AS a_link_is_missing,
> actual_a_link,
> type_b_first_link IS NULL AS b_link_is_missing,
> actual_b_link,
>-- Now slice off the first portion of the extras array based upon the assignment count
> unassigned_links[1+reassign_count:array_length(unassigned_links,1)] AS final_unassigned_links,
>-- output the unsliced array for visual comparison
> unassigned_links AS pre_allocation_unassigned_links
> FROM allocate_unassigned_links
>
>
>Output =>
>id
>a_link_is_missing actual_a_link
>b_link_is_missing actual_b_link
>final_unassigned_links pre_allocation_unassigned_links
>1
>False 1
>True 2
>{3,4} {2,3,4}
>
I would do it this way:
-- helper to remove nulls from arrays
-- could be merged into a array_agg_notnulls aggregate function
create function array_not_nulls (a anyarray) returns anyarray as $$
SELECT array_agg (u) from (select unnest($1) u)foo where u is not null;
$$ language sql immutable strict;
WITH demo AS (
-- A sample record where B needs to be assigned and ends
-- up using the excess A
SELECT * FROM (
VALUES
(1,1,'A'), (1,2,'A'), (1,3,'C'), (1,4,'D'),
(2,1,'e'), (2,2,'B'), (2,3,'C'), (2,4,'D'),
(3,1,'A'), (3,2,'y'), (3,3,'z'), (3,4,'B'),
(4,1,'B'), (4,2,'B'), (4,3,'z'), (4,4,'z'), (4,5,'B'),
(5,1,'x'), (5,2,'y'), (5,3,'z'), (5,4,'q')
) vals (id, link, type)
),
PREP AS (
select
id,
bool_or (type='A') as has_a,
bool_or (type='B') as has_b,
count(case when type='A' then 1 end ) as a_ct,
count(case when type='B' then 1 end ) as b_ct,
array_not_nulls(array_agg(case when type='A' then link end ))as ar_a,
array_not_nulls(array_agg(case when type='B' then link end ))as ar_b,
array_not_nulls(array_agg(case when type NOT IN ('A', 'B') then link end)) as ar_others
from demo
GROUP BY id
)
SELECT
id,
a_ct,
b_ct,
ar_a,
ar_b,
ar_others,
coalesce (ar_a[1], case when b_ct > 1 then ar_b[2] else ar_others[1] end ) as link_a,
coalesce (ar_b[1], case when a_ct > 1 then ar_a[2] when a_ct = 0 then ar_others[2] else ar_others[1] end) as link_b,
-- unused others
case when a_ct + b_ct >=2 then ar_others
else ar_others[3 - (a_ct + b_ct) : array_length(ar_others,1)]
end
||
-- unused A & B
case when has_a AND has_b then ar_a[2:a_ct] || ar_b[2:b_ct]
when a_ct > 2 then ar_a[3:a_ct]
when b_ct > 2 then ar_b[3:b_ct]
end as unused
FROM PREP
order by id
regards,
Marc Mamin
>
>TIA,
>
>David J.
>
>/*
>For a given id there are multiple linked values of differing types.
>Types "A" and "B" are important and, if present, should be explicitly assigned.
>It is possible that more than one link is associated with a given type.
>If either A or B is lacking an explicit value it is assigned a value
>from:
>1. any extra As that are present
>2. any extra Bs that are present
>3. any extra non-A/B values that are present
>
>The final result contains values for A and B and
>and array of values for whatever links went unused.
>*/
>WITH demo AS (
> -- A sample record where B needs to be assigned and ends
> -- up using the excess A
> SELECT * FROM (
> VALUES (1,1,'A'), (1,2,'A'), (1,3,'C'), (1,4,'D')
> ) vals (id, link, type)
>)
>, link_allocation_1 AS (
> SELECT id,
>
> -- Grab the first A
> (SELECT link
> FROM demo
> WHERE demo.id = master.id
> AND type = 'A' LIMIT 1
> ) AS type_a_first_link,
>
> -- Grab the first B
> (SELECT link
> FROM demo
> WHERE demo.id = master.id
> AND type = 'B' LIMIT 1
> ) AS type_b_first_link,
>
> -- Any additional As and Bs are placed into an array
> -- and appended to an array constructed from all of the non-A/B
> ARRAY(
> SELECT link
> FROM demo
> WHERE demo.id = master.id
> AND type = 'A'
> ORDER BY link
> OFFSET 1) ||
> ARRAY(
> SELECT link
> FROM demo
> WHERE demo.id = master.id
> AND type = 'B'
> ORDER BY link
> OFFSET 1) ||
> ARRAY(
> SELECT link
> FROM demo
> WHERE demo.id = master.id
> AND type NOT IN ('A','B')
> ORDER BY link) AS unassigned_links
> FROM (SELECT DISTINCT id FROM demo) master
>)
>, allocate_unassigned_links AS (
> SELECT *,
> -- Determine how many allocations from the "extra" array are required
> -- so that we can trim slice them out of the final result
> CASE WHEN type_a_first_link IS NULL
> THEN 1
> ELSE 0 END +
> CASE WHEN type_b_first_link IS NULL
> THEN 1
> ELSE 0
> END AS reassign_count,
> -- A always gets the first extra if needed
> CASE WHEN type_a_first_link IS NULL
> THEN unassigned_links[1]
> ELSE type_a_first_link
> END AS actual_a_link,
> -- B gets the first extra unless A took it in which case it gets the second one
> CASE WHEN type_b_first_link IS NULL THEN
> CASE WHEN type_a_first_link IS NOT NULL
> THEN unassigned_links[1]
> ELSE unassigned_links[2]
> END
> ELSE type_b_first_link
> END AS actual_b_link
> FROM link_allocation_1
>)
>SELECT id,
>-- For A and B flag is the value was pulled from the extras
> type_a_first_link IS NULL AS a_link_is_missing,
> actual_a_link,
> type_b_first_link IS NULL AS b_link_is_missing,
> actual_b_link,
>-- Now slice off the first portion of the extras array based upon the assignment count
> unassigned_links[1+reassign_count:array_length(unassigned_links,1)] AS final_unassigned_links,
>-- output the unsliced array for visual comparison
> unassigned_links AS pre_allocation_unassigned_links
> FROM allocate_unassigned_links
>
>
>Output =>
>id
>a_link_is_missing actual_a_link
>b_link_is_missing actual_b_link
>final_unassigned_links pre_allocation_unassigned_links
>1
>False 1
>True 2
>{3,4} {2,3,4}
>
I would do it this way:
-- helper to remove nulls from arrays
-- could be merged into a array_agg_notnulls aggregate function
create function array_not_nulls (a anyarray) returns anyarray as $$
SELECT array_agg (u) from (select unnest($1) u)foo where u is not null;
$$ language sql immutable strict;
WITH demo AS (
-- A sample record where B needs to be assigned and ends
-- up using the excess A
SELECT * FROM (
VALUES
(1,1,'A'), (1,2,'A'), (1,3,'C'), (1,4,'D'),
(2,1,'e'), (2,2,'B'), (2,3,'C'), (2,4,'D'),
(3,1,'A'), (3,2,'y'), (3,3,'z'), (3,4,'B'),
(4,1,'B'), (4,2,'B'), (4,3,'z'), (4,4,'z'), (4,5,'B'),
(5,1,'x'), (5,2,'y'), (5,3,'z'), (5,4,'q')
) vals (id, link, type)
),
PREP AS (
select
id,
bool_or (type='A') as has_a,
bool_or (type='B') as has_b,
count(case when type='A' then 1 end ) as a_ct,
count(case when type='B' then 1 end ) as b_ct,
array_not_nulls(array_agg(case when type='A' then link end ))as ar_a,
array_not_nulls(array_agg(case when type='B' then link end ))as ar_b,
array_not_nulls(array_agg(case when type NOT IN ('A', 'B') then link end)) as ar_others
from demo
GROUP BY id
)
SELECT
id,
a_ct,
b_ct,
ar_a,
ar_b,
ar_others,
coalesce (ar_a[1], case when b_ct > 1 then ar_b[2] else ar_others[1] end ) as link_a,
coalesce (ar_b[1], case when a_ct > 1 then ar_a[2] when a_ct = 0 then ar_others[2] else ar_others[1] end) as link_b,
-- unused others
case when a_ct + b_ct >=2 then ar_others
else ar_others[3 - (a_ct + b_ct) : array_length(ar_others,1)]
end
||
-- unused A & B
case when has_a AND has_b then ar_a[2:a_ct] || ar_b[2:b_ct]
when a_ct > 2 then ar_a[3:a_ct]
when b_ct > 2 then ar_b[3:b_ct]
end as unused
FROM PREP
order by id
regards,
Marc Mamin
array_not_nulls(array_agg(case when type='A' then link end ))as ar_a,
array_not_nulls(array_agg(case when type='B' then link end ))as ar_b,
array_not_nulls(array_agg(case when type NOT IN ('A', 'B') then link end)) as ar_others
Yeah. I'm not sure why I didn't tend toward the array_agg approach since I have made use of it previously. It cleans up a bit but not as much as I was hoping - but also not seeing. The preponderance of case statements
and array slicing is still present. I image trying to write some kind of "shift" function but that doesn't mesh well with set theory; it seems to be a procedural concept which I suspect would be considerably worse performing.
Thanks for the suggestion and once I get back on that project I'm intended to do a barebones performance comparison of the two which I'll follow up with.
David J.