TSQL不为每行生成新值

TSQL不为每行生成新值,tsql,Tsql,我正在尝试匿名化数据库中的所有数据,因此我正在重命名其中的所有人。我之前问了一个类似的问题,并被告知使用NewID强制为每个更新的行创建一个新值,但在这种情况下,它似乎不起作用 我做错了什么 -- Create Table Customer CREATE TABLE #FirstName ( ID int, FirstName nvarchar(255) NULL, Gender nvarchar(255) NULL ) CREATE TABLE #LastName

我正在尝试匿名化数据库中的所有数据,因此我正在重命名其中的所有人。我之前问了一个类似的问题,并被告知使用NewID强制为每个更新的行创建一个新值,但在这种情况下,它似乎不起作用

我做错了什么

-- Create Table Customer
CREATE TABLE #FirstName
(
    ID int,
    FirstName nvarchar(255) NULL,
    Gender nvarchar(255) NULL
)  

CREATE TABLE #LastName (
    ID int,
    LastName nvarchar(255)
)

-- BULK INSERT to import data from Text or CSV File
BULK INSERT #FirstName
FROM 'C:\Users\jhollon\Desktop\tmp\names\firstnames.lined.txt'
WITH
(
 FIRSTROW = 1,
 FIELDTERMINATOR = ',',
 ROWTERMINATOR = '\n'
)

BULK INSERT #LastName
FROM 'C:\Users\jhollon\Desktop\tmp\names\lastnames.lined.txt'
WITH
(
 FIRSTROW = 1,
 FIELDTERMINATOR = ',',
 ROWTERMINATOR = '\n'
)

/*SELECT FirstName FROM #FirstName WHERE ID = (
    SELECT RandomNumber FROM (
        SELECT ABS(CHECKSUM(NewID())) % 1500 AS RandomNumber FROM tblTenant WHERE Sex = '1'
        ) AS A
    );*/

UPDATE tblTenant SET TenantName = ( 
    SELECT LastName + ', ' + FirstName FROM 
        (SELECT UPPER(FirstName) as FirstName FROM #FirstName WHERE ID = (SELECT ABS(CHECKSUM(NewID())) % 500 + 1501)) AS A,
        (SELECT LastName FROM #LastName WHERE ID = (SELECT ABS(CHECKSUM(NewID())) % 200 + 1)) as B
) WHERE Sex = '2';

UPDATE tblTenant SET TenantName = ( 
    SELECT LastName + ', ' + FirstName FROM 
        (SELECT UPPER(FirstName) as FirstName FROM #FirstName WHERE ID = (SELECT ABS(CHECKSUM(NewID())) % 500 + 1)) AS A,
        (SELECT LastName FROM #LastName WHERE ID = (SELECT ABS(CHECKSUM(NewID())) % 200 + 1)) as B
) WHERE Sex = '1';

DROP TABLE #FirstName;
DROP TABLE #LastName;

我不确定我是否理解您的问题,但如果您希望ID是唯一的值,可以将其设置为标识列。 例:


对。子查询将被计算一次,作为可广告的可缓存标量子查询

尝试使用NEWID作为派生表


下面的代码表明,如果没有内部到外部的关联,则在使用上面的交叉应用答案时,不能保证旧名称与新名称不同。 如果F.Id T.Id由NEWID订购,则在名字交叉适用范围内更好

USE tempdb
GO           
IF OBJECT_ID('tblTenant') IS NOT NULL 
  DROP TABLE tblTenant
GO
CREATE TABLE tblTenant
(
  Id        int,
  FirstName nvarchar(20),
  LastName  nvarchar(20),
  Gender    bit
)
INSERT INTO tblTenant
VALUES (1, 'Bob'   , 'Marley', 1),
       (2, 'Boz'   , 'Skaggs', 1)

SELECT DISTINCT FirstName
INTO #FirstNames
FROM tblTenant

SELECT DISTINCT LastName
INTO #LastNames
FROM tblTenant

    -- There is a probability > 0 that a tenant's new name = tenants old name
    SELECT
      OldFirst = T.FirstName,
      OldLast  = T.LastName,
      NewFirst = F.FirstName,
      NewLast  = L.LastName
    FROM  
      tblTenant T

      CROSS APPLY
      (
        SELECT TOP 1 UPPER(FirstName) AS FirstName 
        FROM #FirstNames 
        WHERE CHECKSUM(NEWID()) <> T.ID
        ORDER BY NEWID()
      ) F

      CROSS APPLY
      (
        SELECT TOP 1 LastName 
        FROM #LastNames
        WHERE CHECKSUM(NEWID()) <> T.ID
        ORDER BY NEWID()
      ) L

注:Sex='1'和Sex='2'是客户试图将开发外包时遗留下来的,但失败了,很惨。我知道它应该是一个布尔值或枚举。它们是唯一的,我很懒,不想在CSV导入中查找如何使标识工作,所以我使用命令行工具添加行号。你否决了我的回答,因为我误解了你的要求,因为用你的话来说,你很懒?很好,但花了很长时间。可能是因为它是按行重新调用的。使用上述方法时,租户的新名称可能与租户的旧名称相同。重叠的概率=1/LastNameCount*1/FirstNameCount。原因是内部表与交叉应用中的外部表不相关。请参阅下面的代码。如果发生这种情况,则可以。我的程序可以处理重复的租户名称。如果租户名称保持不变,是否可以接受?未更改的租户名称是问题所在。。。声明的目标是匿名化您的数据。
UPDATE T
SET
    TenantName =  L.LastName + ', ' + F.FirstName
FROM
   tblTenant T
   CROSS APPLY
   (SELECT TOP 1 UPPER(FirstName) as FirstName FROM #FirstName 
           WHERE CHECKSUM(NEWID()) <> T.ID
           ORDER BY NEWID()) F
   CROSS APPLY
   (SELECT TOP 1 LastName FROM #LastName
           WHERE CHECKSUM(NEWID()) <> T.ID
           ORDER BY NEWID()) L
USE tempdb
GO           
IF OBJECT_ID('tblTenant') IS NOT NULL 
  DROP TABLE tblTenant
GO
CREATE TABLE tblTenant
(
  Id        int,
  FirstName nvarchar(20),
  LastName  nvarchar(20),
  Gender    bit
)
INSERT INTO tblTenant
VALUES (1, 'Bob'   , 'Marley', 1),
       (2, 'Boz'   , 'Skaggs', 1)

SELECT DISTINCT FirstName
INTO #FirstNames
FROM tblTenant

SELECT DISTINCT LastName
INTO #LastNames
FROM tblTenant

    -- There is a probability > 0 that a tenant's new name = tenants old name
    SELECT
      OldFirst = T.FirstName,
      OldLast  = T.LastName,
      NewFirst = F.FirstName,
      NewLast  = L.LastName
    FROM  
      tblTenant T

      CROSS APPLY
      (
        SELECT TOP 1 UPPER(FirstName) AS FirstName 
        FROM #FirstNames 
        WHERE CHECKSUM(NEWID()) <> T.ID
        ORDER BY NEWID()
      ) F

      CROSS APPLY
      (
        SELECT TOP 1 LastName 
        FROM #LastNames
        WHERE CHECKSUM(NEWID()) <> T.ID
        ORDER BY NEWID()
      ) L