Warning: file_get_contents(/data/phpspider/zhask/data//catemap/5/sql/77.json): failed to open stream: No such file or directory in /data/phpspider/zhask/libs/function.php on line 167

Warning: Invalid argument supplied for foreach() in /data/phpspider/zhask/libs/tag.function.php on line 1116

Notice: Undefined index: in /data/phpspider/zhask/libs/function.php on line 180

Warning: array_chunk() expects parameter 1 to be array, null given in /data/phpspider/zhask/libs/function.php on line 181

Warning: file_get_contents(/data/phpspider/zhask/data//catemap/8/python-3.x/16.json): failed to open stream: No such file or directory in /data/phpspider/zhask/libs/function.php on line 167

Warning: Invalid argument supplied for foreach() in /data/phpspider/zhask/libs/tag.function.php on line 1116

Notice: Undefined index: in /data/phpspider/zhask/libs/function.php on line 180

Warning: array_chunk() expects parameter 1 to be array, null given in /data/phpspider/zhask/libs/function.php on line 181
在sql server中为大字节创建一个狭义唯一值的最佳方法是什么?_Sql_Sql Server_Wcf_Tsql - Fatal编程技术网

在sql server中为大字节创建一个狭义唯一值的最佳方法是什么?

在sql server中为大字节创建一个狭义唯一值的最佳方法是什么?,sql,sql-server,wcf,tsql,Sql,Sql Server,Wcf,Tsql,我将数百万条记录放入支持WCF应用程序的表中。应用程序基本上检查记录的存在,如果没有找到,则插入该特定记录 它检查存在性的关键字段之一是VARBINARY256 我目前正在使用HASHBYTES,通过使用SHA2_256算法将字节数减少到32字节,如下所示 CREATE TABLE BlobTable ( BlobID INT, Blob VARBINARY(256), BlobHash VARBINARY(32)) DECLARE @Bin VARBINARY(256) = CRYPT_GE

我将数百万条记录放入支持WCF应用程序的表中。应用程序基本上检查记录的存在,如果没有找到,则插入该特定记录

它检查存在性的关键字段之一是VARBINARY256

我目前正在使用HASHBYTES,通过使用SHA2_256算法将字节数减少到32字节,如下所示

CREATE TABLE BlobTable ( BlobID INT, Blob VARBINARY(256), BlobHash VARBINARY(32)) 
DECLARE @Bin VARBINARY(256) = CRYPT_GEN_RANDOM(256) 
DECLARE @BinHash VARBINARY(32) = HASHBYTES('SHA2_256', @Bin)
DECLARE @Bin VARBINARY(256)

SELECT @_Bin = Blob FROM dbo.BlobTable WITH (ROWLOCK, READPAST) WHERE BlobHash = @BinHash
IF (@_Bin IS NULL)
BEGIN
    INSERT INTO dbo.BlobTable (Blob, BlobHash) VALUES (@Bin, @BinHash)
END
在上面的select语句中有没有降低查询成本的方法?有没有一种方法可以获得VARBINARY256字段的唯一值,该字段比VARBINARY16短或更少,但仍然可以避免重复


谢谢

我认为这里不需要ROWLOCK/readpass。性能的关键是索引。我使用下面的T-SQL运行了一次性能测试,并观察到10线程测试线束的速度约为5K/秒。复合唯一约束键将允许快速确定行的不存在,并且仍然允许在不太可能发生的哈希冲突事件中插入不同的blob。请注意,如果不同的线程试图同时插入同一blob,那么缺少可序列化性可能会导致重复密钥冲突,因此您的代码将需要处理该问题

CREATE TABLE dbo.BlobTable(
      BlobID INT IDENTITY
        CONSTRAINT PK_BlobTable PRIMARY KEY CLUSTERED
    , Blob VARBINARY(256)
    , BlobHash VARBINARY(32)
    );
GO

--load 3M rows
WITH 
     t4 AS (SELECT n FROM (VALUES(0),(0),(0),(0)) t(n))
    ,t256 AS (SELECT 0 AS n FROM t4 AS a CROSS JOIN t4 AS b CROSS JOIN t4 AS c CROSS JOIN t4 AS d)
    ,t16M AS (SELECT ROW_NUMBER() OVER (ORDER BY (a.n)) AS num FROM t256 AS a CROSS JOIN t256 AS b CROSS JOIN t256 AS c)
INSERT INTO dbo.BlobTable WITH(TABLOCKX) (Blob, BlobHash)
SELECT Blob, HASHBYTES('SHA2_256', Blob)
FROM (
    SELECT CRYPT_GEN_RANDOM(256) AS Blob
    FROM t16M
    WHERE num <= 3000000) AS Blobs;
UPDATE STATISTICS dbo.BlobTable WITH FULLSCAN;
ALTER TABLE dbo.BlobTable
    ADD CONSTRAINT UQ_BlobTable1_Blob_BlobHash UNIQUE NONCLUSTERED(BlobHash, Blob);
CHECKPOINT;
GO

CREATE PROC dbo.usp_insert_BlobTable
AS
SET NOCOUNT ON;

DECLARE @Bin VARBINARY(256) = CRYPT_GEN_RANDOM(256);
DECLARE @BinHash VARBINARY(32) = HASHBYTES('SHA2_256', @Bin);
DECLARE @_Bin VARBINARY(256);

INSERT INTO dbo.BlobTable (Blob, BlobHash)
SELECT @Bin, @BinHash
WHERE NOT EXISTS(
    SELECT *
    FROM dbo.BlobTable
    WHERE
        BlobHash = @BinHash
        AND Blob = @Bin
    );

不要使用不适用于您的问题的标记MD5将获得20个字节,但我的感觉是,这将增加冲突的机会,因为我们谈论的是数百万条记录。我把它编入了索引。但是,只要加上唯一的约束,我就会拒绝那一行。如果所有256个字节都不同,我确实希望插入。不考虑散列,因为我使用它只是为了提高性能,但它的冲突对受支持的应用程序不好?看起来很有趣…谢谢分享。那么我可能应该使用MD5。在你的情况下,安全性不是一个问题,但我应该指出,MD5被认为是坏的。我需要在几分之一秒内支持大约1100个WCF服务调用。这意味着这个insert语句将被多次调用,我真的需要事务非常短。据我所知,检查32字节比检查256字节所需的时间要少。我正在select查询中使用ROWLOCK、READPAST提示检查记录的存在。@user1144852,我根据您的评论更新了我的答案。
CREATE TABLE dbo.BlobTable(
      BlobID INT IDENTITY
        CONSTRAINT PK_BlobTable PRIMARY KEY CLUSTERED
    , Blob VARBINARY(256)
    , BlobHash VARBINARY(32)
    );
GO

--load 3M rows
WITH 
     t4 AS (SELECT n FROM (VALUES(0),(0),(0),(0)) t(n))
    ,t256 AS (SELECT 0 AS n FROM t4 AS a CROSS JOIN t4 AS b CROSS JOIN t4 AS c CROSS JOIN t4 AS d)
    ,t16M AS (SELECT ROW_NUMBER() OVER (ORDER BY (a.n)) AS num FROM t256 AS a CROSS JOIN t256 AS b CROSS JOIN t256 AS c)
INSERT INTO dbo.BlobTable WITH(TABLOCKX) (Blob, BlobHash)
SELECT Blob, HASHBYTES('SHA2_256', Blob)
FROM (
    SELECT CRYPT_GEN_RANDOM(256) AS Blob
    FROM t16M
    WHERE num <= 3000000) AS Blobs;
UPDATE STATISTICS dbo.BlobTable WITH FULLSCAN;
ALTER TABLE dbo.BlobTable
    ADD CONSTRAINT UQ_BlobTable1_Blob_BlobHash UNIQUE NONCLUSTERED(BlobHash, Blob);
CHECKPOINT;
GO

CREATE PROC dbo.usp_insert_BlobTable
AS
SET NOCOUNT ON;

DECLARE @Bin VARBINARY(256) = CRYPT_GEN_RANDOM(256);
DECLARE @BinHash VARBINARY(32) = HASHBYTES('SHA2_256', @Bin);
DECLARE @_Bin VARBINARY(256);

INSERT INTO dbo.BlobTable (Blob, BlobHash)
SELECT @Bin, @BinHash
WHERE NOT EXISTS(
    SELECT *
    FROM dbo.BlobTable
    WHERE
        BlobHash = @BinHash
        AND Blob = @Bin
    );