Sql 如何替换字符串中的所有特殊字符
我有一个包含以下列的表:Sql 如何替换字符串中的所有特殊字符,sql,sql-server,string,tsql,Sql,Sql Server,String,Tsql,我有一个包含以下列的表: dbo.SomeInfo - Id - Name - InfoCode 现在我需要将上表的InfoCode更新为 Update dbo.SomeInfo Set InfoCode= REPLACE(Replace(RTRIM(LOWER(Name)),' ','-'),':','') 这会将所有空格替换为名称的-&小写 当我检查信息代码时,我看到一些名称带有一些特殊字符,如 Cathe Friedrich''s Low Impact coffeyfit
dbo.SomeInfo
- Id
- Name
- InfoCode
现在我需要将上表的InfoCode更新为
Update dbo.SomeInfo
Set InfoCode= REPLACE(Replace(RTRIM(LOWER(Name)),' ','-'),':','')
这会将所有空格替换为名称的-
&小写
当我检查信息代码时,我看到一些名称带有一些特殊字符,如
Cathe Friedrich''s Low Impact
coffeyfit-cardio-box-&-burn
Jillian Michaels: Cardio
然后,我将根据以下内容手动编写更新sql
Update dbo.SomeInfo
SET InfoCode= 'cathe-friedrichs-low-impact'
where Name ='Cathe Friedrich''s Low Impact '
现在,这个解决方案对我来说不现实。我查看了以下与Regex和其他相关的链接
The dancer's-workout®----starter package
有些名称具有相同的价值
Sleek Technique™
The Dancer's-workout®
如何编写能够处理所有此类特殊字符的更新sql?使用可以将字符串拆分为字符,然后只保留某些字符,而不是替换每个不可接受的字符:
SELECT (SELECT '' + CASE WHEN N.token COLLATE Latin1_General_BIN LIKE '[A-z0-9]'THEN token ELSE '-' END
FROM dbo.NGrams8k(V.S,1) N
ORDER BY position
FOR XML PATH(''))
FROM (VALUES('Sleek Technique™'),('The Dancer''s-workout®'))V(S);
我在这里使用COLLATE
作为我实例中的默认排序规则™'代码>被忽略,因此我使用二进制排序规则。您可能希望使用COLLATE
将字符串切换回子查询之外的原始排序规则。使用可以将字符串拆分为字符,然后只保留某些字符,而不是替换每个不可接受的字符:
SELECT (SELECT '' + CASE WHEN N.token COLLATE Latin1_General_BIN LIKE '[A-z0-9]'THEN token ELSE '-' END
FROM dbo.NGrams8k(V.S,1) N
ORDER BY position
FOR XML PATH(''))
FROM (VALUES('Sleek Technique™'),('The Dancer''s-workout®'))V(S);
我在这里使用COLLATE
作为我实例中的默认排序规则™'代码>被忽略,因此我使用二进制排序规则。您可能需要使用COLLATE
将字符串切换回子查询之外的原始排序规则。您可以为类似的内容创建一个用户定义的函数
然后在更新中使用UDF
CREATE FUNCTION [dbo].LowerDashString (@str varchar(255))
RETURNS varchar(255)
AS
BEGIN
DECLARE @result varchar(255);
DECLARE @chr varchar(1);
DECLARE @pos int;
SET @result = '';
SET @pos = 1;
-- lowercase the input and remove the single-quotes
SET @str = REPLACE(LOWER(@str),'''','');
-- loop through the characters
-- while replacing anything that's not a letter to a dash
WHILE @pos <= LEN(@str)
BEGIN
SET @chr = SUBSTRING(@str, @pos, 1)
IF @chr LIKE '[a-z]' SET @result += @chr;
ELSE SET @result += '-';
SET @pos += 1;
END;
-- SET @result = TRIM('-' FROM @result); -- SqlServer 2017 and beyond
-- multiple dashes to one dash
WHILE @result LIKE '%--%' SET @result = REPLACE(@result,'--','-');
RETURN @result;
END;
GO
结果:
Id InfoCode
-- -----------------------------
1 cathe-friedrichs-low-impact
2 coffeyfit-cardio-box-burn
3 jillian-michaels-cardio
4 sleek-technique-
5 the-dancers-workout-
您可以为类似的内容创建一个用户定义的函数
然后在更新中使用UDF
CREATE FUNCTION [dbo].LowerDashString (@str varchar(255))
RETURNS varchar(255)
AS
BEGIN
DECLARE @result varchar(255);
DECLARE @chr varchar(1);
DECLARE @pos int;
SET @result = '';
SET @pos = 1;
-- lowercase the input and remove the single-quotes
SET @str = REPLACE(LOWER(@str),'''','');
-- loop through the characters
-- while replacing anything that's not a letter to a dash
WHILE @pos <= LEN(@str)
BEGIN
SET @chr = SUBSTRING(@str, @pos, 1)
IF @chr LIKE '[a-z]' SET @result += @chr;
ELSE SET @result += '-';
SET @pos += 1;
END;
-- SET @result = TRIM('-' FROM @result); -- SqlServer 2017 and beyond
-- multiple dashes to one dash
WHILE @result LIKE '%--%' SET @result = REPLACE(@result,'--','-');
RETURN @result;
END;
GO
结果:
Id InfoCode
-- -----------------------------
1 cathe-friedrichs-low-impact
2 coffeyfit-cardio-box-burn
3 jillian-michaels-cardio
4 sleek-technique-
5 the-dancers-workout-
此方法完全可嵌入:
首先,我们需要一个带有一些测试数据的模型表:
DECLARe @SomeInfo TABLE (Id INT IDENTITY, InfoCode VARCHAR(100));
INSERT INTO @SomeInfo (InfoCode) VALUES
('Cathe Friedrich''s Low Impact')
,('coffeyfit-cardio-box-&-burn')
,('Jillian Michaels: Cardio')
,('Sleek Technique™')
,('The Dancer''s-workout®');
--这就是问题所在
WITH cte AS
(
SELECT 1 AS position
,si.Id
,LOWER(si.InfoCode) AS SourceText
,SUBSTRING(LOWER(si.InfoCode),1,1) AS OneChar
FROM @SomeInfo si
UNION ALL
SELECT cte.position +1
,cte.Id
,cte.SourceText
,SUBSTRING(LOWER(cte.SourceText),cte.position+1,1) AS OneChar
FROM cte
WHERE position < DATALENGTH(SourceText)
)
,Cleaned AS
(
SELECT cte.Id
,(
SELECT CASE WHEN ASCII(cte2.OneChar) BETWEEN 65 AND 90 --A-Z
OR ASCII(cte2.OneChar) BETWEEN 97 AND 122--a-z
OR ASCII(cte2.OneChar) BETWEEN 48 AND 57 --0-9
--You can easily add more ranges
THEN cte2.OneChar ELSE '-'
--You can easily nest another CASE to deal with special characters like the single quote in your examples...
END
FROM cte AS cte2
WHERE cte2.Id=cte.Id
ORDER BY cte2.position
FOR XML PATH('')
) AS normalised
FROM cte
GROUP BY cte.Id
)
,NoDoubleHyphens AS
(
SELECT REPLACE(REPLACE(REPLACE(normalised,'-','<>'),'><',''),'<>','-') AS normalised2
FROM Cleaned
)
SELECT CASE WHEN RIGHT(normalised2,1)='-' THEN SUBSTRING(normalised2,1,LEN(normalised2)-1) ELSE normalised2 END AS FinalResult
FROM NoDoubleHyphens;
此方法完全可嵌入:
首先,我们需要一个带有一些测试数据的模型表:
DECLARe @SomeInfo TABLE (Id INT IDENTITY, InfoCode VARCHAR(100));
INSERT INTO @SomeInfo (InfoCode) VALUES
('Cathe Friedrich''s Low Impact')
,('coffeyfit-cardio-box-&-burn')
,('Jillian Michaels: Cardio')
,('Sleek Technique™')
,('The Dancer''s-workout®');
--这就是问题所在
WITH cte AS
(
SELECT 1 AS position
,si.Id
,LOWER(si.InfoCode) AS SourceText
,SUBSTRING(LOWER(si.InfoCode),1,1) AS OneChar
FROM @SomeInfo si
UNION ALL
SELECT cte.position +1
,cte.Id
,cte.SourceText
,SUBSTRING(LOWER(cte.SourceText),cte.position+1,1) AS OneChar
FROM cte
WHERE position < DATALENGTH(SourceText)
)
,Cleaned AS
(
SELECT cte.Id
,(
SELECT CASE WHEN ASCII(cte2.OneChar) BETWEEN 65 AND 90 --A-Z
OR ASCII(cte2.OneChar) BETWEEN 97 AND 122--a-z
OR ASCII(cte2.OneChar) BETWEEN 48 AND 57 --0-9
--You can easily add more ranges
THEN cte2.OneChar ELSE '-'
--You can easily nest another CASE to deal with special characters like the single quote in your examples...
END
FROM cte AS cte2
WHERE cte2.Id=cte.Id
ORDER BY cte2.position
FOR XML PATH('')
) AS normalised
FROM cte
GROUP BY cte.Id
)
,NoDoubleHyphens AS
(
SELECT REPLACE(REPLACE(REPLACE(normalised,'-','<>'),'><',''),'<>','-') AS normalised2
FROM Cleaned
)
SELECT CASE WHEN RIGHT(normalised2,1)='-' THEN SUBSTRING(normalised2,1,LEN(normalised2)-1) ELSE normalised2 END AS FinalResult
FROM NoDoubleHyphens;