Sql 从合并的联接结果集中消除空字段

Sql 从合并的联接结果集中消除空字段,sql,sql-server,tsql,null,Sql,Sql Server,Tsql,Null,我创建了以下视图用户\u详细信息\u合并: SELECT DISTINCT coalesce(own.user_name, join_user_name.user_name) AS user_name, coalesce(own.email, join_mail.email) AS email, coalesce(own.first_name, join_name.first_name) AS first_name, coalesce(own.last_name, join_name.last_

我创建了以下视图用户\u详细信息\u合并:

SELECT DISTINCT
coalesce(own.user_name, join_user_name.user_name) AS user_name,
coalesce(own.email, join_mail.email) AS email,
coalesce(own.first_name, join_name.first_name) AS first_name,
coalesce(own.last_name, join_name.last_name) AS last_name
FROM
user_details AS own

LEFT JOIN user_details AS join_user_name ON 
    own.user_name IS NULL AND (
    (join_user_name.email = own.email AND own.email IS NOT NULL) 
    OR (join_user_name.first_name = own.first_name AND join_user_name.last_name = own.last_name 
    AND own.first_name IS NOT NULL AND own.last_name IS NOT NULL))


LEFT JOIN user_details AS join_mail ON 
    own.email IS NULL AND (
    (join_mail.user_name = own.user_name AND own.user_name IS NOT NULL) 
    OR (join_mail.first_name = own.first_name AND join_mail.last_name = own.last_name 
    AND own.first_name IS NOT NULL AND own.last_name IS NOT NULL))

LEFT JOIN user_details AS join_name ON 
    own.first_name IS NULL AND own.last_name IS NULL AND (
    (join_name.email = own.email AND own.email IS NOT NULL) 
    OR (join_name.user_name = own.user_name AND own.user_name IS NOT NULL))

ORDER BY user_name ASC,email ASC, first_name ASC, last_name ASC
这将从以下位置合并我的列:

user_name | email | first_name | last_name
a             b       NULL         NULL
NULL          b        c            d
a            NULL      e            f
NULL          x        y            z

我想要的是:

user_name | email | first_name | last_name
NULL          x        y            z
a             b        c            d
a             b        e            f
当有一行具有相同数据且具有更多信息时,不使用包含行的NULL,但当没有其他行具有更多信息时,仍保留/NULL x y z/

这里的第二个视图正是我所需要的:

SELECT DISTINCT a.user_name,a.email,a.first_name,a.last_name FROM
user_details_merged a
LEFT JOIN user_details_merged b
ON
(
    (
    a.user_name IS NOT NULL OR 
    NOT EXISTS (SELECT user_name FROM user_details_merged b WHERE b.user_name IS NOT NULL AND 
        b.email=ISNULL(a.email,b.email) AND 
        b.first_name=isnull(a.first_name,b.first_name) AND
        b.last_name=isnull(a.last_name,b.last_name))
    )

    AND

    (
    a.email IS NOT NULL OR 
    NOT EXISTS (SELECT email FROM user_details_merged b WHERE b.email IS NOT NULL AND 
        b.user_name=ISNULL(a.user_name,b.user_name) AND 
        b.first_name=isnull(a.first_name,b.first_name) AND
        b.last_name=isnull(a.last_name,b.last_name))
    )

    AND

    (
    (a.first_name IS NOT NULL AND a.last_name IS NOT NULL) OR 
    NOT EXISTS (SELECT email FROM user_details_merged b WHERE b.email IS NOT NULL AND 
        b.user_name=ISNULL(a.user_name,b.user_name) AND 
        b.email=ISNULL(a.email,b.email))
        -- AND b.first_name=isnull(a.first_name,b.first_name) AND b.last_name=isnull(a.last_name,b.last_name))
    )

    AND NOT (a.first_name = b.first_name AND a.last_name = b.last_name AND a.email = b.email AND a.user_name = b.user_name)

)

WHERE coalesce(b.user_name,b.email,b.first_name,b.last_name) IS NOT NULL
主要问题是,数据来自的user_details视图由许多不同表的联合组成。有些只包含用户名和电子邮件,有些只包含电子邮件和名字/姓氏等。这就是为什么没有唯一键,而且由于工会的原因,我无法为视图编制索引。这使得不可能在一小时内执行最后一个视图。 我当前的解决方法是一个过程,它将用户\u details\u合并视图的数据存储在临时表中,并让上面的第二个视图使用此表中的数据。这样我可以将8000行的执行时间减少到7秒

还有其他建议吗


多谢各位,

-如果我这次做对了,您可以使用以下方法解决:

 SELECT ISNULL(A.USER_NAME, B.USER_NAME), A.EMAIL, A.FIRST_NAME, A.LAST_NAME FROM
 user_details A CROSS JOIN user_details B
 WHERE A.EMAIL IS NOT NULL
 AND A.FIRST_NAME IS NOT NULL
 AND A.LAST_NAME IS NOT NULL
 GROUP BY ISNULL(A.USER_NAME, B.USER_NAME), A.EMAIL, A.FIRST_NAME, A.LAST_NAME

尝试对用户详细信息使用外部联接。

哎哟!这是一个混乱的数据模型。最好的解决方案是修复数据模型,以避免需要如此复杂的查询。然而,对于应用程序依赖关系来说,这往往更为复杂,所以我将假设这已经被接受了

我使用了4行示例来提出另一种解决方案。 然后,我添加了两行,其中只有First和Last name的值,这暴露了上面查询中遗漏的场景。 我还将相同的6行复制到超过12K行中,因为数据模型似乎支持这样的场景。这导致上面的查询运行了2个多小时,最后我放弃并停止了它。 我针对我的解决方案运行了12K行,不到一秒钟它就返回了预期结果。 因此,无需再会:

-- =================================================================================
-- BEGIN: SETUP TEST DATA
-- =================================================================================
SET NOCOUNT ON 

IF OBJECT_ID('user_details', 'U') IS NOT NULL DROP TABLE user_details;
GO

CREATE TABLE dbo.user_details (
    user_name   char(1) NULL,
    email       char(1) NULL,
    first_name  char(1) NULL,
    last_name   char(1) NULL
)
GO

INSERT dbo.user_details
SELECT * 
  FROM (
        SELECT * FROM dbo.user_details WHERE 1=2
        UNION ALL SELECT 'a',   'b',    NULL,   NULL
        UNION ALL SELECT NULL,  'b',    'c',    'd'
        UNION ALL SELECT 'a',   NULL,   'e',    'f'
        UNION ALL SELECT NULL,  'x',    'y',    'z'
        UNION ALL SELECT NULL,  NULL,   'y',    'z'
        UNION ALL SELECT NULL,  NULL,   'a',    'z'
       ) A
GO

--/*
-- TURN 6 ROWS INTO OVER 12K ROWS TO TEST PERFORMANCE
DECLARE @count int; SELECT @count = 0
WHILE @count < 11
  BEGIN 
    INSERT user_details
    SELECT * 
      FROM user_details

    SELECT @count = @count + 1
END
--*/
-- =================================================================================
-- END: SETUP TEST DATA
-- =================================================================================


-- =================================================================================
-- BEGIN: NEW SOLUTION FINAL: <1sec on 12288 rows
-- =================================================================================
IF OBJECT_ID('tempdb..#useremail', 'U') IS NOT NULL DROP TABLE #useremail;
IF OBJECT_ID('tempdb..#email', 'U') IS NOT NULL DROP TABLE #email;
IF OBJECT_ID('tempdb..#user', 'U') IS NOT NULL DROP TABLE #user;
IF OBJECT_ID('tempdb..#name', 'U') IS NOT NULL DROP TABLE #name;


-- GET YOUR UNIQUE user_name AND email KEY
SELECT DISTINCT A.user_name, A.email
  INTO #useremail
  FROM user_details A


-- GET YOUR UNIQUE email VALUES
SELECT DISTINCT A.email, A.first_name, A.last_Name
  INTO #email
  FROM user_details A
 WHERE A.email IS NOT NULL


-- GET YOUR UNIQUE user_name VALUES
SELECT DISTINCT A.user_name, A.first_name, A.last_Name
  INTO #user
  FROM user_details A
 WHERE A.user_name IS NOT NULL


-- GET YOUR UNIQUE first_name AND last_Name VALUES NOT PART OF THE KEY
SELECT DISTINCT A.first_name, A.last_Name
  INTO #name
  FROM user_details A
 WHERE A.first_name IS NOT NULL
   AND A.last_Name IS NOT NULL
   AND A.user_name IS NULL
   AND A.email IS NULL


-- CLEAN UP YOUR UNIQUE user_name AND email KEY
DELETE A
-- SELECT *
  FROM #useremail A
  JOIN (
        SELECT *
          FROM #useremail
         WHERE user_name IS NOT NULL
           AND email IS NOT NULL
       ) B
    ON (A.user_name = B.user_name AND A.email     IS NULL)
    OR (A.email     = B.email     AND A.user_name IS NULL)


-- CLEAN UP YOUR UNIQUE email VALUES
DELETE A
-- SELECT *
  FROM #email A
  JOIN (
        SELECT *
          FROM #email
         WHERE first_name IS NOT NULL
           AND last_Name IS NOT NULL
       ) B
    ON A.email = B.email
   AND A.first_name IS NULL
   AND A.last_name IS NULL


-- CLEAN UP YOUR UNIQUE user_name VALUES
DELETE A
-- SELECT *
  FROM #user A
  JOIN (
        SELECT *
          FROM #user
         WHERE first_name IS NOT NULL
           AND last_Name IS NOT NULL
       ) B
    ON A.user_name = B.user_name
   AND A.first_name IS NULL
   AND A.last_name IS NULL


-- CLEAN UP YOUR UNIQUE #name VALUES
DELETE A
-- SELECT *
  FROM #name A
  JOIN #user B
    ON A.first_name = B.first_name
   AND A.last_name  = B.last_name

DELETE A
-- SELECT *
  FROM #name A
  JOIN #email B
    ON A.first_name = B.first_name
   AND A.last_name  = B.last_name


-- GET YOUR DATA
SELECT A.user_name
      ,A.email
      ,U.first_name
      ,U.last_name
      --,*
  FROM #useremail A
  JOIN #user U
    ON A.user_name = U.user_name
 UNION 
SELECT A.user_name
      ,A.email
      ,E.first_name
      ,E.last_name
      --,*
  FROM #useremail A
  JOIN #email E
    ON A.email = E.email
 UNION 
SELECT NULL as [user_name]
      ,NULL as [email]
      ,N.first_name
      ,N.last_name
      --,*
  FROM #name N
-- =================================================================================
-- END: NEW SOLUTION FINAL
-- =================================================================================

对不起,我不明白你的问题。哪一组按最大列排列?你想完成什么?嗨。您可以通过选择MAXuser\u name、email、MAXfirst\u name、MAXlast\u name来消除包含NULL的行。。通过电子邮件分组。但这将删除包含user\u name=NULL的所有行,或消除一个user\u name的不同结果。看起来您正在尝试重复数据消除信息。最大的问题是弄清楚你的身份证是什么。从我的所见所闻来看,电子邮件是共同点。你不把它当作钥匙有什么原因吗?@Heidi是的,这是某种重复数据消除。更像是去冗余。请查看我在上面所做的编辑。每列中都有许多重复项。它们都不可能是唯一的密钥。遗憾的是,我不得不处理来自第三方的合并表。我决不会创造如此可怕的桌子结构;对不起,这样不行。即使有一个b c d:,我仍然会得到空的b c d,并且我必须检查每一列+1的ddl和数据,你听说过sqlfiddle.com吗?谢谢你提到sqlfiddle.com。它看起来是一个非常有用的小工具。
-- =================================================================================
-- BEGIN: SETUP TEST DATA
-- =================================================================================
SET NOCOUNT ON 

IF OBJECT_ID('user_details', 'U') IS NOT NULL DROP TABLE user_details;
GO

CREATE TABLE dbo.user_details (
    user_name   char(1) NULL,
    email       char(1) NULL,
    first_name  char(1) NULL,
    last_name   char(1) NULL
)
GO

INSERT dbo.user_details
SELECT * 
  FROM (
        SELECT * FROM dbo.user_details WHERE 1=2
        UNION ALL SELECT 'a',   'b',    NULL,   NULL
        UNION ALL SELECT NULL,  'b',    'c',    'd'
        UNION ALL SELECT 'a',   NULL,   'e',    'f'
        UNION ALL SELECT NULL,  'x',    'y',    'z'
        UNION ALL SELECT NULL,  NULL,   'y',    'z'
        UNION ALL SELECT NULL,  NULL,   'a',    'z'
       ) A
GO

--/*
-- TURN 6 ROWS INTO OVER 12K ROWS TO TEST PERFORMANCE
DECLARE @count int; SELECT @count = 0
WHILE @count < 11
  BEGIN 
    INSERT user_details
    SELECT * 
      FROM user_details

    SELECT @count = @count + 1
END
--*/
-- =================================================================================
-- END: SETUP TEST DATA
-- =================================================================================


-- =================================================================================
-- BEGIN: NEW SOLUTION FINAL: <1sec on 12288 rows
-- =================================================================================
IF OBJECT_ID('tempdb..#useremail', 'U') IS NOT NULL DROP TABLE #useremail;
IF OBJECT_ID('tempdb..#email', 'U') IS NOT NULL DROP TABLE #email;
IF OBJECT_ID('tempdb..#user', 'U') IS NOT NULL DROP TABLE #user;
IF OBJECT_ID('tempdb..#name', 'U') IS NOT NULL DROP TABLE #name;


-- GET YOUR UNIQUE user_name AND email KEY
SELECT DISTINCT A.user_name, A.email
  INTO #useremail
  FROM user_details A


-- GET YOUR UNIQUE email VALUES
SELECT DISTINCT A.email, A.first_name, A.last_Name
  INTO #email
  FROM user_details A
 WHERE A.email IS NOT NULL


-- GET YOUR UNIQUE user_name VALUES
SELECT DISTINCT A.user_name, A.first_name, A.last_Name
  INTO #user
  FROM user_details A
 WHERE A.user_name IS NOT NULL


-- GET YOUR UNIQUE first_name AND last_Name VALUES NOT PART OF THE KEY
SELECT DISTINCT A.first_name, A.last_Name
  INTO #name
  FROM user_details A
 WHERE A.first_name IS NOT NULL
   AND A.last_Name IS NOT NULL
   AND A.user_name IS NULL
   AND A.email IS NULL


-- CLEAN UP YOUR UNIQUE user_name AND email KEY
DELETE A
-- SELECT *
  FROM #useremail A
  JOIN (
        SELECT *
          FROM #useremail
         WHERE user_name IS NOT NULL
           AND email IS NOT NULL
       ) B
    ON (A.user_name = B.user_name AND A.email     IS NULL)
    OR (A.email     = B.email     AND A.user_name IS NULL)


-- CLEAN UP YOUR UNIQUE email VALUES
DELETE A
-- SELECT *
  FROM #email A
  JOIN (
        SELECT *
          FROM #email
         WHERE first_name IS NOT NULL
           AND last_Name IS NOT NULL
       ) B
    ON A.email = B.email
   AND A.first_name IS NULL
   AND A.last_name IS NULL


-- CLEAN UP YOUR UNIQUE user_name VALUES
DELETE A
-- SELECT *
  FROM #user A
  JOIN (
        SELECT *
          FROM #user
         WHERE first_name IS NOT NULL
           AND last_Name IS NOT NULL
       ) B
    ON A.user_name = B.user_name
   AND A.first_name IS NULL
   AND A.last_name IS NULL


-- CLEAN UP YOUR UNIQUE #name VALUES
DELETE A
-- SELECT *
  FROM #name A
  JOIN #user B
    ON A.first_name = B.first_name
   AND A.last_name  = B.last_name

DELETE A
-- SELECT *
  FROM #name A
  JOIN #email B
    ON A.first_name = B.first_name
   AND A.last_name  = B.last_name


-- GET YOUR DATA
SELECT A.user_name
      ,A.email
      ,U.first_name
      ,U.last_name
      --,*
  FROM #useremail A
  JOIN #user U
    ON A.user_name = U.user_name
 UNION 
SELECT A.user_name
      ,A.email
      ,E.first_name
      ,E.last_name
      --,*
  FROM #useremail A
  JOIN #email E
    ON A.email = E.email
 UNION 
SELECT NULL as [user_name]
      ,NULL as [email]
      ,N.first_name
      ,N.last_name
      --,*
  FROM #name N
-- =================================================================================
-- END: NEW SOLUTION FINAL
-- =================================================================================