2010-08-01 13 views
2

我有一個有兩個int列的表,它們的組合是唯一的。在SQL中計數n元組

ids idk 
--------- 
1 2 
1 3 
1 4 

2 2 
2 3 
2 4 

3 2 
3 5 
3 4 

4 2 
4 3 

我要的是得到最大的集idks的是常見的至少2個IDS。

的結果,這裏應該是這樣的:

(2,3,4) - 2x 
(2,4) - 3x 
(2,3) - 3x 
(3,4) - 2x 
(2,5) - 1x 

正如你看到的,共同idks的數量是最重要的事情。如果數字相同#(2,4)=#(2,5),那麼標準就是發生次數。

在我看來,它似乎需要編程的算法,但我可能是錯的,SQL是有能力的。

+0

維塔利 - 你的意思IDK價值的最大#?例如,如果他在尋找2個idk的共同組合的數量,這將是相對容易的。如果尋找共同的2或3個idk,它仍然是可行的(但是對於更大的數據集和直接的SQL來說會變得內存密集)。 – Andrew 2010-08-01 20:19:57

+0

這是一個通過編程語言的微不足道的問題,但是對於大多數數據庫(假設無限制的idk)是直接SQL的困難(並且效率低下)。如果你只能使用SQL,考慮利用DBMS中的存儲過程邏輯來執行一些/所有的邏輯 – Andrew 2010-08-01 20:28:33

+1

我不會那樣做。這就是一些專門從事套裝理論的數學家可能會回答的問題。我會試着「拼合」組合,然後對它們進行計數。但由於這些是整數,所以對於任何SQL來說組合的數量都太大。是否在一定範圍內(如1-9),我只是爲了它的樂趣寫一個SELECT語句能夠做到這一點。 – hol 2010-08-01 21:14:35

回答

2

有趣的問題!我想我已經找到了一種在SQL Server中解決它的方法。不過這是相當複雜:

  • 創建計算組合
  • 調用存儲過程由1,這是每一個組合,創造組合的列表中爲每個ids
  • Increate的次數存儲過程先前發現
  • 插入新組合成一個結果表

下面是完整的代碼:

set nocount on 

-- Helper table for sp_Combinations 
if OBJECT_ID('tempdb..#combos') is not null 
    drop table #combos 
create table #combos (
    comboid int default 1, 
    basecomboid int, 
    combosize int default 1, 
    val int, 
    primary key (comboid, val)) 


if OBJECT_ID('dbo.sp_Combinations') is null 
    exec ('create procedure dbo.sp_Combinations as select 1') 
go 
-- Creates a list of all combinations of values in #combos, 
-- based on a starting list with comboid = 1 
alter procedure dbo.sp_Combinations 
as begin 
    delete from #combos where comboid <> 1 

    declare @n int 
    select @n = count(*) from #combos 

    update #combos set combosize = (select count(*) from #combos) 

    if @n = 1 
     return 

    -- Add individual numbers 
    insert #combos 
      (comboid, basecomboid, val) 
    select row_number() over (order by val) + 1 
    ,  1 
    ,  val 
    from #combos 
    where comboid = 1 

    declare @k int 
    set @k = 1 
    while @k + 1 < @n 
     begin 
     declare @max_combo_id int 
     select @max_combo_id = max(comboid) from #combos 

     -- Add one new member to each combination of size @k 
     -- The new member must be larger than all existing members 
     -- F.e. for @n = 4 and @k = 1: 
     --  (1) --> (1,2) (1,3) (1,4) 
     --  (2) --> (2,3) (2,4) 
     --  (3) --> (3,4) 
     --  (4) --> no new combinations 
     ;with b as (
      select val 
      from #combos 
      where comboid = 1 
     ), g as (
      select comboid 
      ,  max(val) as maxval 
      from #combos c 
      where combosize = @k 
      group by 
        comboid 
     ) 
     insert #combos (comboid, basecomboid, combosize, val) 
     select row_number() over (order by g.comboid) + @max_combo_id 
     ,  g.comboid 
     ,  @k + 1 
     ,  b.val 
     from b 
     join g 
     on  b.val > g.maxval 

     -- Add the members of the base combo 
     insert #combos (comboid, basecomboid, combosize, val) 
     select l.comboid 
     ,  l.basecomboid 
     ,  l.combosize 
     ,  b.val 
     from #combos l 
     join #combos b 
     on  b.comboid = l.basecomboid 
     where l.combosize = @k + 1 

     set @k = @k + 1 
     end 
end 
go 
go 

-- Input table 
declare @t table (ids int, idk int) 
insert into @t (ids, idk) values 
    (1, 2), (1, 3), (1, 4), 
    (2, 2), (2, 3), (2, 4), 
    (3, 2), (3, 5), (3, 4), 
    (4, 2), (4, 3); 

-- Valid combinations with number of occurrences 
declare @combinations table (comboid int, val int, cnt int) 

-- Iterate over all ids  
declare cur cursor for select distinct ids from @t 
open cur 
declare @ids int 
while 1=1 
    begin 
    fetch from cur into @ids 
    if @@FETCH_STATUS <> 0 
     break 

    -- Calculate combinations for this ids 
    truncate table #combos 
    insert into #combos (comboid, val) select 1, idk from @t where ids = @ids 
    exec dbo.sp_Combinations 

    -- Increase count of existing combinations 
    update u 
    set  cnt = cnt + 1 
    from @combinations u 
    where exists 
      (
      select * 
      from @combinations a 
      full join 
        #combos b 
      on  a.val = b.val 
      where a.comboid = u.comboid 
      group by 
        a.comboid 
      having max(case when a.val is null then 1 else 0 end) = 0 
        and max(case when b.val is null then 1 else 0 end) = 0 
      ) 

    -- Insert new combinations 
    declare @max_combo_id int 
    select @max_combo_id = isnull(max(comboid),0) from @combinations 

    insert @combinations 
      (comboid, val, cnt) 
    select n.comboid + @max_combo_id 
    ,  n.val 
    ,  1 
    from #combos n 
    where not exists 
      (
      select * 
      from @combinations a 
      full join 
        #combos b 
      on  a.val = b.val 
      where b.comboid = n.comboid 
      group by 
        b.comboid 
      having max(case when a.val is null then 1 else 0 end) = 0 
        and max(case when b.val is null then 1 else 0 end) = 0 
      ) 
    end 

close cur 
deallocate cur 

-- Display result 
select x.r as combination 
,  cnt as occurrences 
from (
     select distinct comboid 
     ,  cnt 
     from @combinations 
     ) a 
cross apply 
     (
     select val as [text()] 
     from @combinations c 
     where c.comboid = a.comboid 
     for xml path('') 
     ) x(r) 
where a.cnt > 1 
order by 
     len(x.r) desc 
,  cnt desc 

對於示例數據集,這個打印:

combination occurrences 
234   2 
23   3 
24   3 
34   2 
2   4 
3   3 
4   3