有趣的問題!我想我已經找到了一種在SQL Server中解決它的方法。不過這是相當複雜:
- 創建計算組合
- 調用存儲過程由1,這是每一個組合,創造組合的列表中爲每個
ids
- Increate的次數存儲過程先前發現
- 插入新組合成一個結果表
下面是完整的代碼:
set nocount on
-- Helper table for sp_Combinations
if OBJECT_ID('tempdb..#combos') is not null
drop table #combos
create table #combos (
comboid int default 1,
basecomboid int,
combosize int default 1,
val int,
primary key (comboid, val))
if OBJECT_ID('dbo.sp_Combinations') is null
exec ('create procedure dbo.sp_Combinations as select 1')
go
-- Creates a list of all combinations of values in #combos,
-- based on a starting list with comboid = 1
alter procedure dbo.sp_Combinations
as begin
delete from #combos where comboid <> 1
declare @n int
select @n = count(*) from #combos
update #combos set combosize = (select count(*) from #combos)
if @n = 1
return
-- Add individual numbers
insert #combos
(comboid, basecomboid, val)
select row_number() over (order by val) + 1
, 1
, val
from #combos
where comboid = 1
declare @k int
set @k = 1
while @k + 1 < @n
begin
declare @max_combo_id int
select @max_combo_id = max(comboid) from #combos
-- Add one new member to each combination of size @k
-- The new member must be larger than all existing members
-- F.e. for @n = 4 and @k = 1:
-- (1) --> (1,2) (1,3) (1,4)
-- (2) --> (2,3) (2,4)
-- (3) --> (3,4)
-- (4) --> no new combinations
;with b as (
select val
from #combos
where comboid = 1
), g as (
select comboid
, max(val) as maxval
from #combos c
where combosize = @k
group by
comboid
)
insert #combos (comboid, basecomboid, combosize, val)
select row_number() over (order by g.comboid) + @max_combo_id
, g.comboid
, @k + 1
, b.val
from b
join g
on b.val > g.maxval
-- Add the members of the base combo
insert #combos (comboid, basecomboid, combosize, val)
select l.comboid
, l.basecomboid
, l.combosize
, b.val
from #combos l
join #combos b
on b.comboid = l.basecomboid
where l.combosize = @k + 1
set @k = @k + 1
end
end
go
go
-- Input table
declare @t table (ids int, idk int)
insert into @t (ids, idk) values
(1, 2), (1, 3), (1, 4),
(2, 2), (2, 3), (2, 4),
(3, 2), (3, 5), (3, 4),
(4, 2), (4, 3);
-- Valid combinations with number of occurrences
declare @combinations table (comboid int, val int, cnt int)
-- Iterate over all ids
declare cur cursor for select distinct ids from @t
open cur
declare @ids int
while 1=1
begin
fetch from cur into @ids
if @@FETCH_STATUS <> 0
break
-- Calculate combinations for this ids
truncate table #combos
insert into #combos (comboid, val) select 1, idk from @t where ids = @ids
exec dbo.sp_Combinations
-- Increase count of existing combinations
update u
set cnt = cnt + 1
from @combinations u
where exists
(
select *
from @combinations a
full join
#combos b
on a.val = b.val
where a.comboid = u.comboid
group by
a.comboid
having max(case when a.val is null then 1 else 0 end) = 0
and max(case when b.val is null then 1 else 0 end) = 0
)
-- Insert new combinations
declare @max_combo_id int
select @max_combo_id = isnull(max(comboid),0) from @combinations
insert @combinations
(comboid, val, cnt)
select n.comboid + @max_combo_id
, n.val
, 1
from #combos n
where not exists
(
select *
from @combinations a
full join
#combos b
on a.val = b.val
where b.comboid = n.comboid
group by
b.comboid
having max(case when a.val is null then 1 else 0 end) = 0
and max(case when b.val is null then 1 else 0 end) = 0
)
end
close cur
deallocate cur
-- Display result
select x.r as combination
, cnt as occurrences
from (
select distinct comboid
, cnt
from @combinations
) a
cross apply
(
select val as [text()]
from @combinations c
where c.comboid = a.comboid
for xml path('')
) x(r)
where a.cnt > 1
order by
len(x.r) desc
, cnt desc
對於示例數據集,這個打印:
combination occurrences
234 2
23 3
24 3
34 2
2 4
3 3
4 3
維塔利 - 你的意思IDK價值的最大#?例如,如果他在尋找2個idk的共同組合的數量,這將是相對容易的。如果尋找共同的2或3個idk,它仍然是可行的(但是對於更大的數據集和直接的SQL來說會變得內存密集)。 – Andrew 2010-08-01 20:19:57
這是一個通過編程語言的微不足道的問題,但是對於大多數數據庫(假設無限制的idk)是直接SQL的困難(並且效率低下)。如果你只能使用SQL,考慮利用DBMS中的存儲過程邏輯來執行一些/所有的邏輯 – Andrew 2010-08-01 20:28:33
我不會那樣做。這就是一些專門從事套裝理論的數學家可能會回答的問題。我會試着「拼合」組合,然後對它們進行計數。但由於這些是整數,所以對於任何SQL來說組合的數量都太大。是否在一定範圍內(如1-9),我只是爲了它的樂趣寫一個SELECT語句能夠做到這一點。 – hol 2010-08-01 21:14:35