SQL用户定义的函数,用于剥离HTML标记并替换HTML实体
我正在尝试编写一个UDF(实际上我正在将我在web上找到的一些代码改编成一个函数)来完成标题中描述的工作 代码如下:SQL用户定义的函数,用于剥离HTML标记并替换HTML实体,sql,sql-server,sql-server-2008,Sql,Sql Server,Sql Server 2008,我正在尝试编写一个UDF(实际上我正在将我在web上找到的一些代码改编成一个函数)来完成标题中描述的工作 代码如下: declare @txt varchar(max), @start int, @end int, @len int set @txt = '<p class="answer">Informamos que a documentação <strong>deve ser impre
declare @txt varchar(max), @start int, @end int, @len int
set @txt = '<p class="answer">Informamos que a documentação <strong>deve ser impressa e enviada fisicamente pela AGÊNCIA</strong>, contendo confere com oringinal por funcionário CAIXA.</p>'
set @start = charindex('<',@txt)
set @end = charindex('>',@txt,@start)
set @len = (@end - @start) + 1
while @start > 0 and @end > 0 and @len > 0
begin
set @txt = stuff(@txt,@start,@len,'')
set @start = charindex('<',@txt)
set @end = charindex('>',@txt,@start)
set @len = (@end - @start) + 1
end
SET @txt = REPLACE(@txt,' ',' ') --space
SET @txt = REPLACE(@txt,'“',CHAR(34)) --"
SET @txt = REPLACE(@txt,'”',CHAR(34)) --"
SET @txt = REPLACE(@txt,'‘',CHAR(39)) --'
SET @txt = REPLACE(@txt,'’',CHAR(39)) --'
SET @txt = REPLACE(@txt,'–',CHAR(150)) -- –
SET @txt = REPLACE(@txt,'—',CHAR(151)) -- —
SET @txt = REPLACE(@txt,'º',CHAR(186)) -- º
SET @txt = REPLACE(@txt,'ª',CHAR(170)) -- ª
SET @txt = REPLACE(@txt,'§',CHAR(167)) -- §
--------------------------------------------------------------
SET @txt = REPLACE(@txt,'"',CHAR(34)) --"
SET @txt = REPLACE(@txt,''',CHAR(39)) --'
--------------------------------------------------------------
SET @txt = REPLACE(@txt,'à','à') --à
SET @txt = REPLACE(@txt,'á','á') --á
SET @txt = REPLACE(@txt,'ã','ã') --ã
SET @txt = REPLACE(@txt,'â','â') --â
SET @txt = REPLACE(@txt,'ä','ä') --ä
SET @txt = REPLACE(@txt,'é','é') --é
SET @txt = REPLACE(@txt,'ê','ê') --ê
SET @txt = REPLACE(@txt,'í','í') --í
SET @txt = REPLACE(@txt,'ó','ó') --ó
SET @txt = REPLACE(@txt,'õ','õ') --õ
SET @txt = REPLACE(@txt,'ø','ø') --ø
SET @txt = REPLACE(@txt,'ú','ú') --ú
SET @txt = REPLACE(@txt,'ü','ü') --ü
SET @txt = REPLACE(@txt,'ç','ç') --ç
--------------------------------------------------------------
SET @txt = REPLACE(@txt,'À',CHAR(192)) --À
SET @txt = REPLACE(@txt,'Á',CHAR(193)) --Á
SET @txt = REPLACE(@txt,'Ã',CHAR(195)) --Ã
SET @txt = REPLACE(@txt,'Â',CHAR(194)) --Â
SET @txt = REPLACE(@txt,'Ä',CHAR(196)) --Ä
SET @txt = REPLACE(@txt,'É',CHAR(201)) --É
SET @txt = REPLACE(@txt,'Ê',CHAR(202)) --Ê
SET @txt = REPLACE(@txt,'Í',CHAR(205)) --Í
SET @txt = REPLACE(@txt,'Ó',CHAR(211)) --Ó
SET @txt = REPLACE(@txt,'Õ',CHAR(213)) --Õ
SET @txt = REPLACE(@txt,'Ø',CHAR(216)) --Ø
SET @txt = REPLACE(@txt,'Ú',CHAR(218)) --Ú
SET @txt = REPLACE(@txt,'Ü',CHAR(220)) --Ü
SET @txt = REPLACE(@txt,'Ç',CHAR(199)) --Ç
select LTRIM(RTRIM(@txt))
声明@txt varchar(max)、@start int、@end int、@len int
设置@txt='提供文件和资料&阿蒂尔德;o开发pela AG和Ecirc财务环境管理公司;NCIA,contendo Conference公司的原始职务和职责;里约热内卢CAIXA。
'
设置@start=charindex(“”,@txt,@start)
设置@len=(@end-@start)+1
而@start>0和@end>0和@len>0
开始
设置@txt=stuff(@txt、@start、@len、,)
设置@start=charindex(“”,@txt,@start)
设置@len=(@end-@start)+1
结束
设置@txt=REPLACE(@txt,,“”)——空格
设置@txt=REPLACE(@txt,&ldquo;字符(34))--”
设置@txt=REPLACE(@txt,&rdquo;字符(34))--”
设置@txt=REPLACE(@txt,&lsquo;,CHAR(39))--'
设置@txt=REPLACE(@txt',CHAR(39))--'
设置@txt=REPLACE(@txt,&ndash;',CHAR(150))--
设置@txt=REPLACE(@txt,&mdash;',CHAR(151))---
设置@txt=REPLACE(@txt,º;',CHAR(186))--º
设置@txt=REPLACE(@txt,ª;',CHAR(170))--ª
设置@txt=REPLACE(@txt,§;',CHAR(167))--
--------------------------------------------------------------
设置@txt=REPLACE(@txt,";',CHAR(34))--”
设置@txt=REPLACE(@txt,';',CHAR(39))--'
--------------------------------------------------------------
设置@txt=REPLACE(@txt,à;','a')--a
设置@txt=REPLACE(@txt,á;','á')--á
设置@txt=REPLACE(@txt,ã;','ã')--ã
设置@txt=REPLACE(@txt,â;,'a')--
设置@txt=REPLACE(@txt,ä;','ä')--ä
设置@txt=REPLACE(@txt,é;','é')--é
设置@txt=REPLACE(@txt,ê;','ê')--ê
设置@txt=REPLACE(@txt,í;','í')--í
设置@txt=REPLACE(@txt,ó;','ó')--ó
设置@txt=REPLACE(@txt,õ;','õ')--õ
设置@txt=REPLACE(@txt,ø;','ø')--ø
设置@txt=REPLACE(@txt,ú;,'ú')--ú
设置@txt=REPLACE(@txt,ü;,'u')--u
SET@txt=REPLACE(@txt,ç;','ç')--ç
--------------------------------------------------------------
SET@txt=REPLACE(@txt,À;',CHAR(192))--
SET@txt=REPLACE(@txt,Á;',CHAR(193))--
SET@txt=REPLACE(@txt,Ã;',CHAR(195))--
SET@txt=REPLACE(@txt,Â;',CHAR(194))--
SET@txt=REPLACE(@txt,Ä;',CHAR(196))--
SET@txt=REPLACE(@txt,É;',CHAR(201))--
设置@txt=REPLACE(@txt,Ê;',CHAR(202))--
设置@txt=REPLACE(@txt,Í;',CHAR(205))--
SET@txt=REPLACE(@txt,Ó;',CHAR(211))--
SET@txt=REPLACE(@txt,Õ;',CHAR(213))--
SET@txt=REPLACE(@txt,Ø;',CHAR(216))--
设置@txt=REPLACE(@txt,Ú;',CHAR(218))--
设置@txt=REPLACE(@txt,Ü;',CHAR(220))--
SET@txt=REPLACE(@txt,Ç;',CHAR(199))--
选择LTRIM(RTRIM(@txt))
它剥离HTML标记,只转换小写HTML实体,在单词AGÊ;
NCIA(AGÊNCIA)中查找类似于大写的Ê;
时不起作用,而是打印AGÊNCIA
有什么帮助可以让它正常工作吗
编辑:PS:我无法更改我的数据库排序规则,正如@dzomba所建议的那样如果您需要,您需要将数据库排序规则设置为区分大小写 更新!这是一个解决方法,但我认为这可以完成工作。我没有sql server来测试它,但我几乎可以肯定它工作得很好
declare @txt varchar(max)
declare @start int
declare @end int
declare @len int
set @txt = '<p class="answer">Informamos que a documentação <strong>deve ser impressa e enviada fisicamente pela AGÊNCIA</strong>, contendo confere com oringinal por funcionário CAIXA.</p>'
set @start = charindex('<',@txt)
set @end = charindex('>',@txt,@start)
set @len = (@end - @start) + 1
while @start > 0 and @end > 0 and @len > 0
begin
set @txt = stuff(@txt,@start,@len,'')
set @start = charindex('<',@txt)
set @end = charindex('>',@txt,@start)
set @len = (@end - @start) + 1
end
DECLARE @table (txtColumn varchar(max) COLLATE SQL_Latin1_General_CP1_CS_AS ) --make the column case sensitive
INSERT INTO @table (txtColumn)
SELECT @txt
UPDATE @table set txtColumn = REPLACE(txtColumn,' ',' ') --space
UPDATE @table set txtColumn = REPLACE(txtColumn,'“',CHAR(34)) --"
UPDATE @table set txtColumn = REPLACE(txtColumn,'”',CHAR(34)) --"
UPDATE @table set txtColumn = REPLACE(txtColumn,'‘',CHAR(39)) --'
UPDATE @table set txtColumn = REPLACE(txtColumn,'’',CHAR(39)) --'
UPDATE @table set txtColumn = REPLACE(txtColumn,'–',CHAR(150)) -- –
UPDATE @table set txtColumn = REPLACE(txtColumn,'—',CHAR(151)) -- —
UPDATE @table set txtColumn = REPLACE(txtColumn,'º',CHAR(186)) -- º
UPDATE @table set txtColumn = REPLACE(txtColumn,'ª',CHAR(170)) -- ª
UPDATE @table set txtColumn = REPLACE(txtColumn,'§',CHAR(167)) -- §
-------------------------------------------------------------
UPDATE @table set txtColumn = REPLACE(txtColumn,'"',CHAR(34)) --"
UPDATE @table set txtColumn = REPLACE(txtColumn,''',CHAR(39)) --'
--------------------------------------------------------------
UPDATE @table set txtColumn = REPLACE(txtColumn,'à','à') --à
UPDATE @table set txtColumn = REPLACE(txtColumn,'á','á') --á
UPDATE @table set txtColumn = REPLACE(txtColumn,'ã','ã') --ã
UPDATE @table set txtColumn = REPLACE(txtColumn,'â','â') --â
UPDATE @table set txtColumn = REPLACE(txtColumn,'ä','ä') --ä
UPDATE @table set txtColumn = REPLACE(txtColumn,'é','é') --é
UPDATE @table set txtColumn = REPLACE(txtColumn,'ê','ê') --ê
UPDATE @table set txtColumn = REPLACE(txtColumn,'í','í') --í
UPDATE @table set txtColumn = REPLACE(txtColumn,'ó','ó') --ó
UPDATE @table set txtColumn = REPLACE(txtColumn,'õ','õ') --õ
UPDATE @table set txtColumn = REPLACE(txtColumn,'ø','ø') --ø
UPDATE @table set txtColumn = REPLACE(txtColumn,'ú','ú') --ú
UPDATE @table set txtColumn = REPLACE(txtColumn,'ü','ü') --ü
UPDATE @table set txtColumn = REPLACE(txtColumn,'ç','ç') --ç
-------------------------------------------------------------
UPDATE @table set txtColumn = REPLACE(txtColumn,'À',CHAR(192)) --À
UPDATE @table set txtColumn = REPLACE(txtColumn,'Á',CHAR(193)) --Á
UPDATE @table set txtColumn = REPLACE(txtColumn,'Ã',CHAR(195)) --Ã
UPDATE @table set txtColumn = REPLACE(txtColumn,'Â',CHAR(194)) --Â
UPDATE @table set txtColumn = REPLACE(txtColumn,'Ä',CHAR(196)) --Ä
UPDATE @table set txtColumn = REPLACE(txtColumn,'É',CHAR(201)) --É
UPDATE @table set txtColumn = REPLACE(txtColumn,'Ê',CHAR(202)) --Ê
UPDATE @table set txtColumn = REPLACE(txtColumn,'Í',CHAR(205)) --Í
UPDATE @table set txtColumn = REPLACE(txtColumn,'Ó',CHAR(211)) --Ó
UPDATE @table set txtColumn = REPLACE(txtColumn,'Õ',CHAR(213)) --Õ
UPDATE @table set txtColumn = REPLACE(txtColumn,'Ø',CHAR(216)) --Ø
UPDATE @table set txtColumn = REPLACE(txtColumn,'Ú',CHAR(218)) --Ú
UPDATE @table set txtColumn = REPLACE(txtColumn,'Ü',CHAR(220)) --Ü
UPDATE @table set txtColumn = REPLACE(txtColumn,'Ç',CHAR(199)) --Ç
SELECT LTRIM(RTRIM(txtColumn)) FROM @table
--- IN THE END DROP THE TABLE
DROP TABLE @table
declare@txt varchar(最大值)
声明@start int
声明@end int
声明@lenint
set@txt='文件信息ç;ã;o开发环境财政信息公司Ê;NCIA,内容为原始财务职能会议á;rio CAIXA。
'
设置@start=charindex(“”,@txt,@start)
设置@len=(@end-@start)+1
而@start>0和@end>0和@len>0
开始
设置@txt=stuff(@txt、@start、@len、,)
设置@start=charindex(“”,@txt,@start)
设置@len=(@end-@start)+1
结束
声明@table(txtColumn varchar(max)COLLATE SQL\u Latin1\u General\u CP1\u CS\u AS)--使列区分大小写
插入@table(txtColumn)
选择@txt
更新@table set txtColumn=REPLACE(txtColumn,,“”)——空格
更新@table set txtColumn=REPLACE(txtColumn,&ldquo;字符(34))--”
更新@table set txtColumn=REPLACE(txtColumn,&rdquo;字符(34))--”
更新@table set txtColumn=REPLACE(txtColumn,&lsquo;字符(39))--'
更新@table set txtColumn=REPLACE(txtColumn,'rsquo;字符(39))--'
更新@table set txtColumn=REPLACE(txtColumn,&ndash;',CHAR(150))--
UPDATE@table set txtColumn=REPLACE(txtColumn,“&mdash;”,CHAR(151))---
更新@table set txtColumn=REPLACE(txtColumn,º;',CHAR(186))--
更新@table set txtColumn=REPLACE(txtColumn,ª;',CHAR(170))--ª
更新@table set txtColumn=REPLACE(txtColumn,§;,CHAR(167))--
-------------------------------------------------------------
更新@table set txtColumn=REPLACE(txtColumn,";',CHAR(34))--”
更新@table set txtColumn=REPLACE(txtColumn,';',CHAR(39))--'
--------------------------------------------------------------
更新@table set txtColumn=REPLACE(txtColumn,“à;”,“a”)--a
更新@table set txtColumn=REPLACE(txtColumn,'aacute;','á')--á
更新@table set txtColumn=REPLACE(txtColumn,ã;','ã')--ã
UPDATE@table set txtColumn=REPLACE(txtColumn,â;,'a')--
更新@table set txtColumn=REPLACE(txtColumn,ä;','ä')--ä
更新@table set txtColumn=REPLACE(txtColumn,'eacute;','é')--é
更新@table set txtColumn=REPLACE(txtColumn,'ecirc;','ê')--
更新@table set txtColumn=REPLACE(txtColumn,í;,'í')--í
更新@table set txtColumn=REPLACE(txtColumn,'oacute;','ó')--
更新@table set txtColumn=REPLACE(txtColumn,õ;','õ')--õ
更新@table set txtColumn=REPLACE(txtColumn,“ø;”,“ø”)