Sql server 2012 数据库中字段的使用概率在集合(6、12、24)中

Sql server 2012 数据库中字段的使用概率在集合(6、12、24)中,sql-server-2012,Sql Server 2012,我想找到数据库中的所有字段,其中80%或更多的值位于集合6,12,24中 我是否可以使用sys.tables对sys.columns执行此操作,以构建联接,然后查看每个字段内部的概率 以下列表中列出了可能适用的示例: 6 - Half a Dozen 12 - A Dozen 24 - Two Dozen 因此,在过程语言中,该表看起来像这样: select all the tables names in sys.tables and look in each number column in

我想找到数据库中的所有字段,其中80%或更多的值位于集合6,12,24中

我是否可以使用sys.tables对sys.columns执行此操作,以构建联接,然后查看每个字段内部的概率

以下列表中列出了可能适用的示例:

6 - Half a Dozen
12 - A Dozen
24 - Two Dozen
因此,在过程语言中,该表看起来像这样:

select all the tables names in sys.tables and look in each number column in sys.columns for that table where 80% of the values are in the set (6,12,24).

抄袭我对这个答案的看法

您需要考虑一种类似的方法。在这里,我使用系统元数据来发现哪些列是整数,如果需要考虑浮点数,请将其考虑到第一个过滤器中

SET NOCOUNT ON;

DECLARE
    -- actual query 
    @query nvarchar(max)
    -- templated query
,   @template nvarchar(max);


-- Return column & schema/table combo anywhere
-- there is data in the column that starts like the
-- search key.
SELECT
    @template = N'
    SELECT COUNT(1) AS rc
    , ''<COLUMN_NAME/>'' AS cname
    , ''<SCHEMA/>.<TABLE_NAME/>'' AS tname 
    FROM 
        <SCHEMA/>.<TABLE_NAME/> T 
    WHERE 
        T.<COLUMN_NAME/> IN (6, 12, 24)';


DECLARE 
    CSR CURSOR
FOR
-- Iterate through all the columns
SELECT
    ISC.TABLE_SCHEMA
,   ISC.TABLE_NAME
,   ISC.COLUMN_NAME
,   ISC.IS_NULLABLE
FROM
    INFORMATION_SCHEMA.COLUMNS ISC
WHERE
    (
        ISC.DATA_TYPE IN ('bigint', 'int', 'smallint', 'tinyint')
    );


-- Cursor variables for capturing candidate schemas, tables and columns
DECLARE
    @table_schema sysname
,   @table_name sysname
,   @column_name sysname
,   @nullable varchar(3);

DECLARE
    @RESULTS TABLE
(
    instance_count bigint NOT NULL
,   column_name sysname NOT NULL
,   table_schema nvarchar(500) NOT NULL
);

OPEN
    CSR;

FETCH NEXT
FROM
    CSR
INTO
    @table_schema
,   @table_name
,   @column_name
,   @nullable;

WHILE (@@FETCH_STATUS = 0)
BEGIN
    -- stub in actual names, make 'em safe via quotename function
    SET @query = REPLACE(@template, '<SCHEMA/>', quotename(@table_schema));
    SET @query = REPLACE(@query, '<TABLE_NAME/>', quotename(@table_name));
    SET @query = REPLACE(@query, '<COLUMN_NAME/>', quotename(@column_name));

    BEGIN TRY
        -- Dump results into a table variable
        INSERT INTO
            @RESULTS
        EXECUTE(@query);

    END TRY
    BEGIN CATCH
        -- print failing query
        PRINT @query;

    END CATCH
    FETCH NEXT
    FROM
        CSR
    INTO
        @table_schema
    ,   @table_name
    ,   @column_name
    ,   @nullable;
END
CLOSE CSR;
DEALLOCATE CSR;


WITH ROW_COUNTS AS
(
    SELECT
        s.[Name] as table_schema
    ,   t.[name] as table_name
    ,   SUM(p.rows) as TotalRowCount
    FROM
        sys.schemas s
        LEFT OUTER JOIN 
            sys.tables t
            ON s.schema_id = t.schema_id
        LEFT OUTER JOIN 
            sys.partitions p
            ON t.object_id = p.object_id
        LEFT OUTER JOIN  
            sys.allocation_units a
            ON p.partition_id = a.container_id
    WHERE
        p.index_id  in(0,1) -- 0 heap table , 1 table with clustered index
        AND p.rows is not null
        AND a.type = 1  -- row-data only , not LOB
    GROUP BY 
        s.[Name]
    ,   t.[name]
)
SELECT
    RC.table_schema
,   RC.table_name
,   R.column_name
,   R.instance_count
,   RC.TotalRowCount
    -- ensure we don't divide by zero and perform floating division
,   CAST(R.instance_count / (NULLIF(RC.TotalRowCount, 0) * 1.0) AS decimal(18,2)) AS InstancePercentage
FROM
    ROW_COUNTS AS RC
    INNER JOIN
        @results R
        ON R.table_schema = quotename(RC.table_schema) + '.' + quotename(table_name)
WHERE
    CAST(R.instance_count / (NULLIF(RC.TotalRowCount, 0) * 1.0) AS decimal(18,2)) > .8;
如果删除过滤器,则会看到找到的值的百分比。它当前会为行数为零的表清除百分比