2013-05-21 77 views
1

我有記錄事件的表:查找集羣數據點組

create table #events 
( intRowId int identity(1,1), 
    intItemId int, 
    intUserId int, 
    datEvent datetime) 

這是一個大表與幾百萬行的,記錄的成千上萬的用戶對數千項活動和數萬。

我想查找一組選擇的十個itemID,但只有當它們出現在特定模式下時:我試圖找到行,其中所有這些項目中有10個項目針對它們註冊了相同的用戶標識和及時靠近,說5分鐘。

我絕對沒有想法如何去做這件事。人們會認爲分區涉及某個地方,但幫助,甚至只是在某處開始,將不勝感激。

乾杯, 馬特

+0

開始提示:過濾器通過用戶ID,組5分鐘分組使用CTE –

+0

如果您可以顯示一些示例數據和預期結果,這將有所幫助。 – Kaf

+0

事件是否需要總是以十人一組的形式出現?順便說一下,一些示例數據會有所幫助。 –

回答

1

比方說,你想了解物品ID的統計:1,2,...,10 首先創建一個表EventByItems:

CREATE TABLE EventByItems 
( 
    intRowId int identity(1,1), 
    intUserId int, 
    datEvent datetime, 
    intItem1 int, 
    intItem2 int, 
    intItem3 int, 
    ... 
    intItem10 int 
) 

然後使用查詢來填充本表:

SELECT intUserId, datEvent, 
    SUM(pvt.[1]), SUM(pvt.[2]), SUM(pvt.[3]), ... , SUM(pvt.[10]) 
FROM #events 
PIVOT 
(
    COUNT(intItemId) 
    FOR intItemId IN ([1], [2], [3], ... , [10]) 
) AS pvt 
GROUP BY intUserId, datEvent 

現在我們可以對該表做一些工作。例如,我們可以根據您的邏輯更新它來填補空白。或者我們可以做這樣的查詢:

SELECT 
    intRowId, 
    intUserId, 
    datEvent 
FROM 
    EventByItems AS E 
WHERE 
    ((intItem1 > 0) OR EXISTS(SELECT * 
          FROM EventByItems 
          WHERE intUserId = E.intUserId 
          AND intItem1 > 0 
          AND DATEDIFF(MINUTE, datEvent, E.datEvent) <= 5 
          AND intRowId != E.intRowId)) 
    AND 

    ... 

    AND 
    ((intItem10 > 0) OR EXISTS(SELECT * 
          FROM EventByItems 
          WHERE intUserId = E.intUserId 
          AND intItem10 > 0 
          AND DATEDIFF(MINUTE, datEvent, E.datEvent) <= 5 
          AND intRowId != E.intRowId)) 
+0

就是這樣,謝謝。對不起,沒有提供任何樣本數據。 –

0

好的,下面你會找到一個可以做你想做的工作例子。我假設事件不必出現在幾十個。

但是解決方案非常粗糙,並且運行速度會很慢,特別是如果您增加項目/用戶的數量。無論如何,請不要爲此過分低估我,我希望你會找到一個合適的解決方案:)。與預選事件

臨時表將幫助我的解決方案的性能,但是你真正需要的是窗口的功能類似於甲骨文..

DROP TABLE #events 
GO 

create table #events 
( intRowId int identity(1,1), 
    intItemId int, 
    intUserId int, 
    datEvent datetime) 
GO 

insert into #events (intUserId,intItemId, datEvent) 
select '1','1','2013-05-01 10:25' union all --group1 
select '1','2','2013-05-01 10:25' union all --group1 
select '1','3','2013-05-01 10:26' union all --group1 
select '1','7','2013-05-01 10:25' union all 
select '1','8','2013-05-01 10:25' union all 
select '1','9','2013-05-01 10:26' union all 
select '1','1','2013-05-01 10:50' union all --group2 
select '1','2','2013-05-01 10:52' union all --group2 
select '1','3','2013-05-01 10:59' union all 
select '1','1','2013-05-01 11:10' union all --group3 
select '1','1','2013-05-01 11:12' union all --group3 
select '1','3','2013-05-01 11:17' union all --group3 
select '1','2','2013-05-01 11:25' union all 
select '1','1','2013-05-01 11:31' union all 
select '1','7','2013-05-01 11:32' union all 
select '1','2','2013-05-01 11:50' union all --group4 
select '1','2','2013-05-01 11:50' union all --group4 
select '1','3','2013-05-01 11:50' union all --group4 
select '1','1','2013-05-01 11:56' 
GO 

DROP TABLE #temp 
GO 
select 
    e1.intRowId as intRowId_1, e1.intItemId as intItemId_1, e1.intUserId as intUserId_1, e1.datEvent as datEvent_1 
    ,e2.intRowId as intRowId_2, e2.intItemId as intItemId_2, e2.intUserId as intUserId_2, e2.datEvent as datEvent_2 
into #temp 
from #events e1 
join #events e2 
    on e1.intUserId=e2.intUserId 
    and e1.datEvent<=e2.datEvent 
    and e1.intRowId<>e2.intRowId 
where 1=1 
    and e1.intUserId=1 
    and e2.intUserId=1 
    and e1.intItemId in (1,2,3) 
    and e2.intItemId in (1,2,3) 
    and datediff(minute,e1.datevent,e2.datevent)<6 
order by 
    e1.intRowId, e2.intRowId 
GO 

select distinct 
    * 
from (
    select 
     intRowId_1 as intRowId, intItemId_1 as intItemId, intUserId_1 as intUserId, datEvent_1 as datEvent 
    from #temp 

    UNION ALL 

    select 
     intRowId_2 as intRowId, intItemId_2 as intItemId, intUserId_2 as intUserId, datEvent_2 as datEvent 
    from #temp 
) x 
order by 
    datEvent, intRowId