Sql server 如何使用函数从SQL Server中的字符串中剥离所有HTML(em/strong除外)?
我有这个功能:Sql server 如何使用函数从SQL Server中的字符串中剥离所有HTML(em/strong除外)?,sql-server,sql-server-2005,function,substring,Sql Server,Sql Server 2005,Function,Substring,我有这个功能: CREATE FUNCTION [dbo].[udf_StripHTML] (@HTMLText VARCHAR(MAX)) RETURNS VARCHAR(MAX) AS BEGIN DECLARE @Start INT DECLARE @End INT DECLARE @Length INT SET @Start = CHARINDEX('<',@HTMLText) SET @End = CHARINDEX('>',@HTMLText,CHARINDEX('<
CREATE FUNCTION [dbo].[udf_StripHTML]
(@HTMLText VARCHAR(MAX))
RETURNS VARCHAR(MAX)
AS
BEGIN
DECLARE @Start INT
DECLARE @End INT
DECLARE @Length INT
SET @Start = CHARINDEX('<',@HTMLText)
SET @End = CHARINDEX('>',@HTMLText,CHARINDEX('<',@HTMLText))
SET @Length = (@End - @Start) + 1
WHILE @Start > 0
AND @End > 0
AND @Length > 0
BEGIN
SET @HTMLText = STUFF(@HTMLText,@Start,@Length,'')
SET @Start = CHARINDEX('<',@HTMLText)
SET @End = CHARINDEX('>',@HTMLText,CHARINDEX('<',@HTMLText))
SET @Length = (@End - @Start) + 1
END
RETURN LTRIM(RTRIM(@HTMLText))
END
创建函数[dbo].[udf\u StripHTML]
(@HTMLText-VARCHAR(最大值))
返回VARCHAR(最大值)
作为
开始
声明@Start INT
声明@End INT
声明@Length INT
设置@Start=CHARINDEX(“”,@HTMLText,CHARINDEX(“xslt转换通过clr函数(例如,来自MDS程序集的转换)
更新
以下是使用XSLT转换方法的答案:
更新2
那么剩下的唯一选择就是使用正则表达式。同样是通过CLR函数
安装MDS组件
编辑:下面是我用来解决问题的方法,因为我的HTML非常具体。我原来的问题的正确答案是丹尼斯接受的
我使用的数据非常具体
我需要移除的标签都是大写的,即:
我想保留的是小写字母,即:
因此,我可以用PATINDEX而不是CHARINDEX轻松完成这项工作:
ALTER FUNCTION [dbo].[udf_StripHTMLlinks]
(@HTMLText VARCHAR(MAX))
RETURNS VARCHAR(MAX)
AS
BEGIN
DECLARE @Start INT
DECLARE @End INT
DECLARE @Length INT
SET @Start = PATINDEX('%<[/ABCDEFGHIJKLMNOPQRSTUVWXYZ][/ABCDEFGHIJKLMNOPQRSTUVWXYZ >]%',@HTMLText COLLATE SQL_Latin1_General_CP1_CS_AS)
SET @End = CHARINDEX('>',@HTMLText,PATINDEX('%<[/ABCDEFGHIJKLMNOPQRSTUVWXYZ][/ABCDEFGHIJKLMNOPQRSTUVWXYZ >]%',@HTMLText COLLATE SQL_Latin1_General_CP1_CS_AS))
SET @Length = (@End - @Start) + 1
WHILE @Start > 0
AND @End > 0
AND @Length > 0
BEGIN
SET @HTMLText = STUFF(@HTMLText,@Start,@Length,'')
SET @Start = PATINDEX('%<[/ABCDEFGHIJKLMNOPQRSTUVWXYZ][/ABCDEFGHIJKLMNOPQRSTUVWXYZ >]%',@HTMLText COLLATE SQL_Latin1_General_CP1_CS_AS)
SET @End = CHARINDEX('>',@HTMLText,PATINDEX('%<[/ABCDEFGHIJKLMNOPQRSTUVWXYZ][/ABCDEFGHIJKLMNOPQRSTUVWXYZ >]%',@HTMLText COLLATE SQL_Latin1_General_CP1_CS_AS))
SET @Length = (@End - @Start) + 1
END
RETURN LTRIM(RTRIM(@HTMLText))
END
ALTER函数[dbo].[udf\u StripHTMLlinks]
(@HTMLText-VARCHAR(最大值))
返回VARCHAR(最大值)
作为
开始
声明@Start INT
声明@End INT
声明@Length INT
设置@Start=PATINDEX('%]%,@HTMLText-COLLATE-SQL\u-Latin1\u-General\u-CP1\u-CS\u-AS)
设置@End=CHARINDEX('>',@HTMLText,PATINDEX('%]%,@HTMLText-COLLATE-SQL\u-Latin1\u-General\u-CP1\u-CS\u-AS))
设置@Length=(@End-@Start)+1
当@Start>0时
和@End>0
和@Length>0
开始
设置@HTMLText=STUFF(@HTMLText、@Start、@Length、)
设置@Start=PATINDEX('%]%,@HTMLText-COLLATE-SQL\u-Latin1\u-General\u-CP1\u-CS\u-AS)
设置@End=CHARINDEX('>',@HTMLText,PATINDEX('%]%,@HTMLText-COLLATE-SQL\u-Latin1\u-General\u-CP1\u-CS\u-AS))
设置@Length=(@End-@Start)+1
结束
返回LTRIM(RTRIM(@HTMLText))
结束
感谢您的输入。您可以使用如下SQL函数:
ALTER FUNCTION [dbo].[StripOutHTML]
(
@HTMLText VARCHAR(max),
@stripDisallowedOnly BIT
)
returns VARCHAR(max)
AS
BEGIN
DECLARE @Start INT
DECLARE @End INT
DECLARE @Length INT
-- Replace the HTML entity & with the '&' character (this needs to be done first, as
-- '&' might be double encoded as '&')
SET @Start = Charindex('&', @HTMLText)
SET @End = @Start + 4
SET @Length = ( @End - @Start ) + 1
WHILE ( @Start > 0
AND @End > 0
AND @Length > 0 )
BEGIN
SET @HTMLText = Stuff(@HTMLText, @Start, @Length, '&')
SET @Start = Charindex('&', @HTMLText)
SET @End = @Start + 4
SET @Length = ( @End - @Start ) + 1
END
-- Replace the HTML entity < with the '<' character
SET @Start = Charindex('<', @HTMLText)
SET @End = @Start + 3
SET @Length = ( @End - @Start ) + 1
WHILE ( @Start > 0
AND @End > 0
AND @Length > 0 )
BEGIN
SET @HTMLText = Stuff(@HTMLText, @Start, @Length, '<')
SET @Start = Charindex('<', @HTMLText)
SET @End = @Start + 3
SET @Length = ( @End - @Start ) + 1
END
-- Replace the HTML entity > with the '>' character
SET @Start = Charindex('>', @HTMLText)
SET @End = @Start + 3
SET @Length = ( @End - @Start ) + 1
WHILE ( @Start > 0
AND @End > 0
AND @Length > 0 )
BEGIN
SET @HTMLText = Stuff(@HTMLText, @Start, @Length, '>')
SET @Start = Charindex('>', @HTMLText)
SET @End = @Start + 3
SET @Length = ( @End - @Start ) + 1
END
-- Replace the HTML entity & with the '&' character
SET @Start = Charindex('&amp;', @HTMLText)
SET @End = @Start + 4
SET @Length = ( @End - @Start ) + 1
WHILE ( @Start > 0
AND @End > 0
AND @Length > 0 )
BEGIN
SET @HTMLText = Stuff(@HTMLText, @Start, @Length, '&')
SET @Start = Charindex('&amp;', @HTMLText)
SET @End = @Start + 4
SET @Length = ( @End - @Start ) + 1
END
-- Replace the HTML entity with the ' ' character
SET @Start = Charindex(' ', @HTMLText)
SET @End = @Start + 5
SET @Length = ( @End - @Start ) + 1
WHILE ( @Start > 0
AND @End > 0
AND @Length > 0 )
BEGIN
SET @HTMLText = Stuff(@HTMLText, @Start, @Length, ' ')
SET @Start = Charindex(' ', @HTMLText)
SET @End = @Start + 5
SET @Length = ( @End - @Start ) + 1
END
-- Replace any <P>, </P>tags with a <BR>, so they will be replaced with a new line in next step
SET @HTMLText = REPLACE(@HTMLText, '<P>', '<br>')
SET @HTMLText = REPLACE(@HTMLText, '</P>', '<br>')
-- Replace any <BR> tags with a newline
SET @Start = Charindex('<br>', @HTMLText)
SET @End = @Start + 3
SET @Length = ( @End - @Start ) + 1
WHILE ( @Start > 0
AND @End > 0
AND @Length > 0 )
BEGIN
SET @HTMLText = Stuff(@HTMLText, @Start, @Length,
Char(13) + Char(10))
SET @Start = Charindex('<br>', @HTMLText)
SET @End = @Start + 3
SET @Length = ( @End - @Start ) + 1
END
-- Replace any tags with a newline
SET @Start = Charindex('<br/>', @HTMLText)
SET @End = @Start + 4
SET @Length = ( @End - @Start ) + 1
WHILE ( @Start > 0
AND @End > 0
AND @Length > 0 )
BEGIN
SET @HTMLText = Stuff(@HTMLText, @Start, @Length,
'CHAR(13) + CHAR(10)')
SET @Start = Charindex('<br/>', @HTMLText)
SET @End = @Start + 4
SET @Length = ( @End - @Start ) + 1
END
-- Replace any tags with a newline
SET @Start = Charindex('<br />', @HTMLText)
SET @End = @Start + 5
SET @Length = ( @End - @Start ) + 1
WHILE ( @Start > 0
AND @End > 0
AND @Length > 0 )
BEGIN
SET @HTMLText = Stuff(@HTMLText, @Start, @Length,
'CHAR(13) + CHAR(10)')
SET @Start = Charindex('<br />', @HTMLText)
SET @End = @Start + 5
SET @Length = ( @End - @Start ) + 1
END
-- Remove anything between tags
SET @Start = Charindex('<', @HTMLText)
SET @End = Charindex('>', @HTMLText, Charindex('<', @HTMLText))
SET @Length = ( @End - @Start ) + 1
WHILE ( @Start > 0
AND @End > 0
AND @Length > 0 )
BEGIN
IF @stripDisallowedOnly = 1
BEGIN
IF ( Upper(Substring(@HTMLText, @Start, 2)) <> '<B' )
AND ( Upper(Substring(@HTMLText, @Start, 3)) <> '</B' )
AND ( Upper(Substring(@HTMLText, @Start, 2)) <> '<U' )
AND ( Upper(Substring(@HTMLText, @Start, 3)) <> '</U' )
AND ( Upper(Substring(@HTMLText, @Start, 2)) <> '<I' )
AND ( Upper(Substring(@HTMLText, @Start, 3)) <> '</I' )
BEGIN
SET @HTMLText = Stuff(@HTMLText, @Start, @Length, '')
END
ELSE
BEGIN
SET @Length = 0
END
END
ELSE
BEGIN
SET @HTMLText = Stuff(@HTMLText, @Start, @Length, '')
END
SET @Start = Charindex('<', @HTMLText, @End - @Length)
SET @End = Charindex('>', @HTMLText, Charindex('<', @HTMLText,
@Start)
)
SET @Length = ( @End - @Start ) + 1
END
-- Remove any leading space/carriage return
DECLARE @trimchars VARCHAR(10)
SET @trimchars = CHAR(9)+CHAR(10)+CHAR(13)+CHAR(32)
IF @HTMLText LIKE '[' + @trimchars + ']%' SET @HTMLText = SUBSTRING(@HTMLText, PATINDEX('%[^' + @trimchars + ']%', @HTMLText), LEN(@HTMLText))
RETURN Ltrim(Rtrim(@HTMLText))
END
我需要删除adsf属性没有被引用,也没有HREFsSql Server不适合这样的东西,但是我很确定你可以在提交处理之前对输入的HTML字符串应用HTML TITY,因为这是用新数据和旧数据替换旧的不正确数据的一部分非常具体(即:html标记都是大写的,我想保留的是小写的)通过使用PATINDEX而不是CHARINDEX,我可以更轻松地做到这一点-请查看我的答案。我不知道你在说什么。我最初的问题没有它应该说的那么具体。我实际上不需要它来处理所有HTML。只需当前存储在数据库中的HTML。幸运的是,我不想要的数据库中的HTML都在数据库中l字母和我想要的HTML是小写字母,所以我仍然只是删除所有HTML,但只有当它是用大写字母写的时候。我应该在原始问题中更具体一些。
WHILE ( @Start > 0
AND @End > 0
AND @Length > 0 )
BEGIN
IF @stripDisallowedOnly = 1
BEGIN
IF ( Upper(Substring(@HTMLText, @Start, 2)) <> '<B' )
AND ( Upper(Substring(@HTMLText, @Start, 3)) <> '</B' )
AND ( Upper(Substring(@HTMLText, @Start, 2)) <> '<U' )
AND ( Upper(Substring(@HTMLText, @Start, 3)) <> '</U' )
AND ( Upper(Substring(@HTMLText, @Start, 2)) <> '<I' )
AND ( Upper(Substring(@HTMLText, @Start, 3)) <> '</I' )
BEGIN
SET @HTMLText = Stuff(@HTMLText, @Start, @Length, '')
END
ELSE
BEGIN
SET @Length = 0
END
END
ELSE
BEGIN
SET @HTMLText = Stuff(@HTMLText, @Start, @Length, '')
END
SET @Start = Charindex('<', @HTMLText, @End - @Length)
SET @End = Charindex('>', @HTMLText, Charindex('<', @HTMLText,
@Start)
)
SET @Length = ( @End - @Start ) + 1
END