Sql server 如何格式化/清除源数据以导入SQL Server SSN和电话号码

Sql server 如何格式化/清除源数据以导入SQL Server SSN和电话号码,sql-server,data-migration,dml,Sql Server,Data Migration,Dml,由于我的数据库存储SSN和电话号码的格式化数据,因此我需要先获取传入数据的方法,而不管其格式如何,并对其进行格式化,以匹配我的数据库在这些字段中存储数据的方式。我要迁移的数据由应用程序由最终用户从外部应用程序临时导入到临时表中,然后进行重构和操作,以便插入到我的客户机数据库中 我在处理没有正则表达式的数据时遇到问题。如何在SQLServer中完成像这样的DML任务?下面是我的两种数据类型所需的输出。我正在努力将源数据转换成这些输出格式 数据存储插入所需的输出格式 SSN:123-45-6789

由于我的数据库存储SSN和电话号码的格式化数据,因此我需要先获取传入数据的方法,而不管其格式如何,并对其进行格式化,以匹配我的数据库在这些字段中存储数据的方式。我要迁移的数据由应用程序由最终用户从外部应用程序临时导入到临时表中,然后进行重构和操作,以便插入到我的客户机数据库中

我在处理没有正则表达式的数据时遇到问题。如何在SQLServer中完成像这样的DML任务?下面是我的两种数据类型所需的输出。我正在努力将源数据转换成这些输出格式

数据存储插入所需的输出格式

SSN:123-45-6789

SSN:如果是8个字符,则用前导零填充

SSN:如果少于8个字符,则用问号“?”…?-?”填充-1234(不要问)

电话:123-456-7890

示例代码

WITH fakeCSVData AS
(
    SELECT '111223333' AS SSN, '(444) 4444444'  AS Phone UNION ALL
    SELECT '211222121' AS SSN, '101 232-4545'   AS Phone UNION ALL
    SELECT '12334556'  AS SSN, '(191) 330-4345' AS Phone UNION ALL
    SELECT '41531'     AS SSN, '(039) 084-8309' AS Phone UNION ALL
    SELECT '220981278' AS SSN, '(298) 372-9234' AS Phone UNION ALL
    SELECT '222013450' AS SSN, '(78) 909-7790'  AS Phone UNION ALL
    SELECT '123456789' AS SSN, '(717)_272-7277' AS Phone UNION ALL
    SELECT '113344556' AS SSN, '210-973-2123'   AS Phone UNION ALL
    SELECT '808768252' AS SSN, '(219) 362-1895' AS Phone UNION ALL
    SELECT '3456'      AS SSN, '895 536-5356'   AS Phone UNION ALL
    SELECT '204874556' AS SSN, '(909) 544-9124' AS Phone UNION ALL
    SELECT '80832934'  AS SSN, '0271932132'     AS Phone


)

SELECT 


    CASE WHEN LTRIM(RTRIM(csv.ssn))           LIKE '[0-9][0-9][0-9]-[0-9][0-9]-[0-9][0-9][0-9][0-9]' THEN LTRIM(RTRIM(csv.ssn))
            WHEN LTRIM(RTRIM(csv.ssn))          LIKE '[0-9][0-9]-[0-9][0-9]-[0-9][0-9][0-9][0-9]'      THEN RIGHT( REPLICATE('0', 1) + LTRIM(RTRIM( csv.ssn )), 11)
            WHEN LTRIM(RTRIM(csv.ssn))          LIKE '[0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9]'   THEN SUBSTRING(LTRIM(RTRIM(csv.ssn)),1,3) + '-' + SUBSTRING(LTRIM(RTRIM(csv.ssn)),4,2) + '-' + SUBSTRING(LTRIM(RTRIM(csv.ssn)),6,4)
            WHEN LTRIM(RTRIM(csv.ssn))          LIKE '[0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9]'        THEN RIGHT( REPLICATE('0', 1) + LTRIM(RTRIM( SUBSTRING(LTRIM(RTRIM(csv.ssn)),1,2) + '-' + SUBSTRING(LTRIM(RTRIM(csv.ssn)),3,2) + '-' + SUBSTRING(LTRIM(RTRIM(csv.ssn)),5,4) )), 11)
            WHEN RIGHT(LTRIM(RTRIM(csv.ssn)),4) LIKE '%[0-9][0-9][0-9][0-9]'                           THEN '???-??-' + RIGHT(LTRIM(RTRIM(csv.ssn)),4)
      END AS SocSecNo
    , NullIf(LEFT( REPLACE( LTRIM(RTRIM( REPLACE(REPLACE(csv.Phone, ')', ''), '(', '') )), ' ' , '-') , 12), '') AS Phone


FROM fakeCSVData csv
SocSecNo    | Phone
--------------------------
111-22-3333 | 444-4444444
211-22-2121 | 101-232-4545
012-33-4556 | 191-330-4345
???-??-1531 | 039-084-8309
220-98-1278 | 298-372-9234
222-01-3450 | 78-909-7790
123-45-6789 | 717_272-7277
???-??-4556 | 210-973-2123
808-76-8252 | 219-362-1895
???-??-3456 | 895-536-5356
204-87-4556 | 909-544-9124
080-83-2934 | 0271932132
样本代码的当前输出

WITH fakeCSVData AS
(
    SELECT '111223333' AS SSN, '(444) 4444444'  AS Phone UNION ALL
    SELECT '211222121' AS SSN, '101 232-4545'   AS Phone UNION ALL
    SELECT '12334556'  AS SSN, '(191) 330-4345' AS Phone UNION ALL
    SELECT '41531'     AS SSN, '(039) 084-8309' AS Phone UNION ALL
    SELECT '220981278' AS SSN, '(298) 372-9234' AS Phone UNION ALL
    SELECT '222013450' AS SSN, '(78) 909-7790'  AS Phone UNION ALL
    SELECT '123456789' AS SSN, '(717)_272-7277' AS Phone UNION ALL
    SELECT '113344556' AS SSN, '210-973-2123'   AS Phone UNION ALL
    SELECT '808768252' AS SSN, '(219) 362-1895' AS Phone UNION ALL
    SELECT '3456'      AS SSN, '895 536-5356'   AS Phone UNION ALL
    SELECT '204874556' AS SSN, '(909) 544-9124' AS Phone UNION ALL
    SELECT '80832934'  AS SSN, '0271932132'     AS Phone


)

SELECT 


    CASE WHEN LTRIM(RTRIM(csv.ssn))           LIKE '[0-9][0-9][0-9]-[0-9][0-9]-[0-9][0-9][0-9][0-9]' THEN LTRIM(RTRIM(csv.ssn))
            WHEN LTRIM(RTRIM(csv.ssn))          LIKE '[0-9][0-9]-[0-9][0-9]-[0-9][0-9][0-9][0-9]'      THEN RIGHT( REPLICATE('0', 1) + LTRIM(RTRIM( csv.ssn )), 11)
            WHEN LTRIM(RTRIM(csv.ssn))          LIKE '[0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9]'   THEN SUBSTRING(LTRIM(RTRIM(csv.ssn)),1,3) + '-' + SUBSTRING(LTRIM(RTRIM(csv.ssn)),4,2) + '-' + SUBSTRING(LTRIM(RTRIM(csv.ssn)),6,4)
            WHEN LTRIM(RTRIM(csv.ssn))          LIKE '[0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9]'        THEN RIGHT( REPLICATE('0', 1) + LTRIM(RTRIM( SUBSTRING(LTRIM(RTRIM(csv.ssn)),1,2) + '-' + SUBSTRING(LTRIM(RTRIM(csv.ssn)),3,2) + '-' + SUBSTRING(LTRIM(RTRIM(csv.ssn)),5,4) )), 11)
            WHEN RIGHT(LTRIM(RTRIM(csv.ssn)),4) LIKE '%[0-9][0-9][0-9][0-9]'                           THEN '???-??-' + RIGHT(LTRIM(RTRIM(csv.ssn)),4)
      END AS SocSecNo
    , NullIf(LEFT( REPLACE( LTRIM(RTRIM( REPLACE(REPLACE(csv.Phone, ')', ''), '(', '') )), ' ' , '-') , 12), '') AS Phone


FROM fakeCSVData csv
SocSecNo    | Phone
--------------------------
111-22-3333 | 444-4444444
211-22-2121 | 101-232-4545
012-33-4556 | 191-330-4345
???-??-1531 | 039-084-8309
220-98-1278 | 298-372-9234
222-01-3450 | 78-909-7790
123-45-6789 | 717_272-7277
???-??-4556 | 210-973-2123
808-76-8252 | 219-362-1895
???-??-3456 | 895-536-5356
204-87-4556 | 909-544-9124
080-83-2934 | 0271932132

我一直在想,如果我有一个简单的方法,首先从传入的源数据中删除所有非数字字符,那么我就可以根据需要格式化字符串。。。但我没有发现任何SQL Server本机函数可以做到这一点。

有点难看,但这可能会有所帮助

WITH fakeCSVData AS
(
    SELECT '111223333' AS SSN, '(444) 4444444'  AS Phone UNION ALL
    SELECT '211222121' AS SSN, '101 232-4545'   AS Phone UNION ALL
    SELECT '12334556'  AS SSN, '(191) 330-4345' AS Phone UNION ALL
    SELECT '41531'     AS SSN, '(039) 084-8309' AS Phone UNION ALL
    SELECT '220981278' AS SSN, '(298) 372-9234' AS Phone UNION ALL
    SELECT '222013450' AS SSN, '(78) 909-7790'  AS Phone UNION ALL
    SELECT '123456789' AS SSN, '(717)_272-7277' AS Phone UNION ALL
    SELECT '113344556' AS SSN, '210-973-2123'   AS Phone UNION ALL
    SELECT '808768252' AS SSN, '(219) 362-1895' AS Phone UNION ALL
    SELECT '3456'      AS SSN, '895 536-5356'   AS Phone UNION ALL
    SELECT '204874556' AS SSN, '(909) 544-9124' AS Phone UNION ALL
    SELECT '80832934'  AS SSN, '0271932132'     AS Phone
)

Select NewSSN = format(try_convert(bigint,SSN),choose(len(SSN)
                                               ,'???-??-???0'
                                               ,'???-??-??00'
                                               ,'???-??-?000'
                                               ,'???-??-0000'
                                               ,'???-?0-0000'
                                               ,'???-00-0000'
                                               ,'??0-00-0000'
                                               ,'?00-00-0000'
                                               ,'000-00-0000') )
      ,NewPhn = format(try_convert(bigint,Phn),choose(len(Phn)
                                               ,'???-???-???0'
                                               ,'???-???-??00'
                                               ,'???-???-?000'
                                               ,'???-???-0000'
                                               ,'???-??0-0000'
                                               ,'???-?00-0000'
                                               ,'???-000-0000'
                                               ,'??0-000-0000'
                                               ,'?00-000-0000'
                                               ,'000-000-0000') )
 From fakeCSVData A
 Cross Apply ( values (  replace(
                         replace(
                         replace(
                         replace(
                         replace(Phone,' ','') 
                         ,'(','')
                         ,')','')
                         ,'-','')
                         ,'_','')
                      )
             ) B(Phn)
返回

编辑

您可能会注意到,
交叉应用
将清理
手机
字符串。这可能需要一些维护,甚至需要一个UDF来剥离非数值