2013-11-26 26 views
0

我想查找數據庫中80%或更多值位於集合中的所有字段(6,12,24 )。使用數據庫中字段的概率在集合中(6,12,24)

我可以使用sys.tables來做到這一點與sys.columns建立一個聯接,然後看看每個領域的概率?

示例是這可能適用在下面的列表:

6 - Half a Dozen 
12 - A Dozen 
24 - Two Dozen 

所以表將看起來像這樣在程序語言:

select all the tables names in sys.tables and look in each number column in sys.columns for that table where 80% of the values are in the set (6,12,24). 

回答

2

從我的方法對這個答案

惡癖

https://stackoverflow.com/questions/20156747/number-of-null-values-for-every-column-on-ssis/20156981#20156981

你需要看看類似的方法。在這裏,我使用系統元數據來發現哪些列是整數(如果您需要考慮浮點數,則將其納入第一個過濾器中)。

SET NOCOUNT ON; 

DECLARE 
    -- actual query 
    @query nvarchar(max) 
    -- templated query 
, @template nvarchar(max); 


-- Return column & schema/table combo anywhere 
-- there is data in the column that starts like the 
-- search key. 
SELECT 
    @template = N' 
    SELECT COUNT(1) AS rc 
    , ''<COLUMN_NAME/>'' AS cname 
    , ''<SCHEMA/>.<TABLE_NAME/>'' AS tname 
    FROM 
     <SCHEMA/>.<TABLE_NAME/> T 
    WHERE 
     T.<COLUMN_NAME/> IN (6, 12, 24)'; 


DECLARE 
    CSR CURSOR 
FOR 
-- Iterate through all the columns 
SELECT 
    ISC.TABLE_SCHEMA 
, ISC.TABLE_NAME 
, ISC.COLUMN_NAME 
, ISC.IS_NULLABLE 
FROM 
    INFORMATION_SCHEMA.COLUMNS ISC 
WHERE 
    (
     ISC.DATA_TYPE IN ('bigint', 'int', 'smallint', 'tinyint') 
    ); 


-- Cursor variables for capturing candidate schemas, tables and columns 
DECLARE 
    @table_schema sysname 
, @table_name sysname 
, @column_name sysname 
, @nullable varchar(3); 

DECLARE 
    @RESULTS TABLE 
(
    instance_count bigint NOT NULL 
, column_name sysname NOT NULL 
, table_schema nvarchar(500) NOT NULL 
); 

OPEN 
    CSR; 

FETCH NEXT 
FROM 
    CSR 
INTO 
    @table_schema 
, @table_name 
, @column_name 
, @nullable; 

WHILE (@@FETCH_STATUS = 0) 
BEGIN 
    -- stub in actual names, make 'em safe via quotename function 
    SET @query = REPLACE(@template, '<SCHEMA/>', quotename(@table_schema)); 
    SET @query = REPLACE(@query, '<TABLE_NAME/>', quotename(@table_name)); 
    SET @query = REPLACE(@query, '<COLUMN_NAME/>', quotename(@column_name)); 

    BEGIN TRY 
     -- Dump results into a table variable 
     INSERT INTO 
      @RESULTS 
     EXECUTE(@query); 

    END TRY 
    BEGIN CATCH 
     -- print failing query 
     PRINT @query; 

    END CATCH 
    FETCH NEXT 
    FROM 
     CSR 
    INTO 
     @table_schema 
    , @table_name 
    , @column_name 
    , @nullable; 
END 
CLOSE CSR; 
DEALLOCATE CSR; 


WITH ROW_COUNTS AS 
(
    SELECT 
     s.[Name] as table_schema 
    , t.[name] as table_name 
    , SUM(p.rows) as TotalRowCount 
    FROM 
     sys.schemas s 
     LEFT OUTER JOIN 
      sys.tables t 
      ON s.schema_id = t.schema_id 
     LEFT OUTER JOIN 
      sys.partitions p 
      ON t.object_id = p.object_id 
     LEFT OUTER JOIN 
      sys.allocation_units a 
      ON p.partition_id = a.container_id 
    WHERE 
     p.index_id in(0,1) -- 0 heap table , 1 table with clustered index 
     AND p.rows is not null 
     AND a.type = 1 -- row-data only , not LOB 
    GROUP BY 
     s.[Name] 
    , t.[name] 
) 
SELECT 
    RC.table_schema 
, RC.table_name 
, R.column_name 
, R.instance_count 
, RC.TotalRowCount 
    -- ensure we don't divide by zero and perform floating division 
, CAST(R.instance_count/(NULLIF(RC.TotalRowCount, 0) * 1.0) AS decimal(18,2)) AS InstancePercentage 
FROM 
    ROW_COUNTS AS RC 
    INNER JOIN 
     @results R 
     ON R.table_schema = quotename(RC.table_schema) + '.' + quotename(table_name) 
WHERE 
    CAST(R.instance_count/(NULLIF(RC.TotalRowCount, 0) * 1.0) AS decimal(18,2)) > .8; 

如果刪除過濾器,則會看到找到的值的百分比。它目前空出零行表的百分比。

+0

+1,非常好的答案 – Lamak