SQL用户定义的函数,用于剥离HTML标记并替换HTML实体

SQL用户定义的函数,用于剥离HTML标记并替换HTML实体,sql,sql-server,sql-server-2008,Sql,Sql Server,Sql Server 2008,我正在尝试编写一个UDF(实际上我正在将我在web上找到的一些代码改编成一个函数)来完成标题中描述的工作 代码如下: declare @txt varchar(max), @start int, @end int, @len int set @txt = '<p class=&#34;answer&#34;>Informamos que a documenta&ccedil;&atilde;o <strong>deve ser impre

我正在尝试编写一个UDF(实际上我正在将我在web上找到的一些代码改编成一个函数)来完成标题中描述的工作

代码如下:

declare @txt varchar(max), @start int, @end int, @len int

set @txt = '<p class=&#34;answer&#34;>Informamos que a documenta&ccedil;&atilde;o <strong>deve ser impressa e enviada fisicamente pela AG&Ecirc;NCIA</strong>, contendo confere com oringinal por funcion&aacute;rio CAIXA.</p>'
set @start = charindex('<',@txt)
set @end = charindex('>',@txt,@start)
set @len = (@end - @start) + 1

while @start > 0 and @end > 0 and @len > 0
begin
    set @txt = stuff(@txt,@start,@len,'')
    set @start = charindex('<',@txt)
    set @end = charindex('>',@txt,@start)
    set @len = (@end - @start) + 1
end

SET @txt = REPLACE(@txt,'&nbsp;',' ') --space
SET @txt = REPLACE(@txt,'&ldquo;',CHAR(34)) --"
SET @txt = REPLACE(@txt,'&rdquo;',CHAR(34)) --"
SET @txt = REPLACE(@txt,'&lsquo;',CHAR(39)) --'
SET @txt = REPLACE(@txt,'&rsquo;',CHAR(39)) --'
SET @txt = REPLACE(@txt,'&ndash;',CHAR(150)) -- –
SET @txt = REPLACE(@txt,'&mdash;',CHAR(151)) -- —
SET @txt = REPLACE(@txt,'&ordm;',CHAR(186)) -- º
SET @txt = REPLACE(@txt,'&ordf;',CHAR(170)) -- ª
SET @txt = REPLACE(@txt,'&sect;',CHAR(167)) -- §
--------------------------------------------------------------
SET @txt = REPLACE(@txt,'&#34;',CHAR(34)) --"
SET @txt = REPLACE(@txt,'&#39;',CHAR(39)) --'
--------------------------------------------------------------

SET @txt = REPLACE(@txt,'&agrave;','à') --à
SET @txt = REPLACE(@txt,'&aacute;','á') --á
SET @txt = REPLACE(@txt,'&atilde;','ã') --ã
SET @txt = REPLACE(@txt,'&acirc;','â') --â
SET @txt = REPLACE(@txt,'&auml;','ä') --ä
SET @txt = REPLACE(@txt,'&eacute;','é') --é
SET @txt = REPLACE(@txt,'&ecirc;','ê') --ê
SET @txt = REPLACE(@txt,'&iacute;','í') --í
SET @txt = REPLACE(@txt,'&oacute;','ó') --ó
SET @txt = REPLACE(@txt,'&otilde;','õ') --õ     
SET @txt = REPLACE(@txt,'&oslash;','ø') --ø
SET @txt = REPLACE(@txt,'&uacute;','ú') --ú
SET @txt = REPLACE(@txt,'&uuml;','ü') --ü
SET @txt = REPLACE(@txt,'&ccedil;','ç') --ç
--------------------------------------------------------------
SET @txt = REPLACE(@txt,'&Agrave;',CHAR(192)) --À
SET @txt = REPLACE(@txt,'&Aacute;',CHAR(193)) --Á
SET @txt = REPLACE(@txt,'&Atilde;',CHAR(195)) --Ã
SET @txt = REPLACE(@txt,'&Acirc;',CHAR(194)) --Â
SET @txt = REPLACE(@txt,'&Auml;',CHAR(196)) --Ä
SET @txt = REPLACE(@txt,'&Eacute;',CHAR(201)) --É
SET @txt = REPLACE(@txt,'&Ecirc;',CHAR(202)) --Ê
SET @txt = REPLACE(@txt,'&Iacute;',CHAR(205)) --Í
SET @txt = REPLACE(@txt,'&Oacute;',CHAR(211)) --Ó
SET @txt = REPLACE(@txt,'&Otilde;',CHAR(213)) --Õ
SET @txt = REPLACE(@txt,'&Oslash;',CHAR(216)) --Ø
SET @txt = REPLACE(@txt,'&Uacute;',CHAR(218)) --Ú
SET @txt = REPLACE(@txt,'&Uuml;',CHAR(220)) --Ü
SET @txt = REPLACE(@txt,'&Ccedil;',CHAR(199)) --Ç

select LTRIM(RTRIM(@txt))
声明@txt varchar(max)、@start int、@end int、@len int
设置@txt='

提供文件和资料&阿蒂尔德;o开发pela AG和Ecirc财务环境管理公司;NCIA,contendo Conference公司的原始职务和职责;里约热内卢CAIXA。

' 设置@start=charindex(“”,@txt,@start) 设置@len=(@end-@start)+1 而@start>0和@end>0和@len>0 开始 设置@txt=stuff(@txt、@start、@len、,) 设置@start=charindex(“”,@txt,@start) 设置@len=(@end-@start)+1 结束 设置@txt=REPLACE(@txt,,“”)——空格 设置@txt=REPLACE(@txt,&ldquo;字符(34))--” 设置@txt=REPLACE(@txt,&rdquo;字符(34))--” 设置@txt=REPLACE(@txt,&lsquo;,CHAR(39))--' 设置@txt=REPLACE(@txt',CHAR(39))--' 设置@txt=REPLACE(@txt,&ndash;',CHAR(150))-- 设置@txt=REPLACE(@txt,&mdash;',CHAR(151))--- 设置@txt=REPLACE(@txt,º;',CHAR(186))--º 设置@txt=REPLACE(@txt,ª;',CHAR(170))--ª 设置@txt=REPLACE(@txt,§;',CHAR(167))-- -------------------------------------------------------------- 设置@txt=REPLACE(@txt,";',CHAR(34))--” 设置@txt=REPLACE(@txt,';',CHAR(39))--' -------------------------------------------------------------- 设置@txt=REPLACE(@txt,à;','a')--a 设置@txt=REPLACE(@txt,á;','á')--á 设置@txt=REPLACE(@txt,ã;','ã')--ã 设置@txt=REPLACE(@txt,â;,'a')-- 设置@txt=REPLACE(@txt,ä;','ä')--ä 设置@txt=REPLACE(@txt,é;','é')--é 设置@txt=REPLACE(@txt,ê;','ê')--ê 设置@txt=REPLACE(@txt,í;','í')--í 设置@txt=REPLACE(@txt,ó;','ó')--ó 设置@txt=REPLACE(@txt,õ;','õ')--õ 设置@txt=REPLACE(@txt,ø;','ø')--ø 设置@txt=REPLACE(@txt,ú;,'ú')--ú 设置@txt=REPLACE(@txt,ü;,'u')--u SET@txt=REPLACE(@txt,ç;','ç')--ç -------------------------------------------------------------- SET@txt=REPLACE(@txt,À;',CHAR(192))-- SET@txt=REPLACE(@txt,Á;',CHAR(193))-- SET@txt=REPLACE(@txt,Ã;',CHAR(195))-- SET@txt=REPLACE(@txt,Â;',CHAR(194))-- SET@txt=REPLACE(@txt,Ä;',CHAR(196))-- SET@txt=REPLACE(@txt,É;',CHAR(201))-- 设置@txt=REPLACE(@txt,Ê;',CHAR(202))-- 设置@txt=REPLACE(@txt,Í;',CHAR(205))-- SET@txt=REPLACE(@txt,Ó;',CHAR(211))-- SET@txt=REPLACE(@txt,Õ;',CHAR(213))-- SET@txt=REPLACE(@txt,Ø;',CHAR(216))-- 设置@txt=REPLACE(@txt,Ú;',CHAR(218))-- 设置@txt=REPLACE(@txt,Ü;',CHAR(220))-- SET@txt=REPLACE(@txt,Ç;',CHAR(199))-- 选择LTRIM(RTRIM(@txt))
它剥离HTML标记,只转换小写HTML实体,在单词AG
Ê;
NCIA(AGÊNCIA)中查找类似于大写的
Ê;
时不起作用,而是打印AGÊNCIA

有什么帮助可以让它正常工作吗


编辑:PS:我无法更改我的数据库排序规则,正如@dzomba所建议的那样如果您需要,您需要将数据库排序规则设置为区分大小写

更新!这是一个解决方法,但我认为这可以完成工作。我没有sql server来测试它,但我几乎可以肯定它工作得很好

declare @txt varchar(max)
declare @start int
declare @end int
declare @len int

set @txt = '<p class=&#34;answer&#34;>Informamos que a documenta&ccedil;&atilde;o <strong>deve ser impressa e enviada fisicamente pela AG&Ecirc;NCIA</strong>, contendo confere com oringinal por funcion&aacute;rio CAIXA.</p>'
set @start = charindex('<',@txt)
set @end = charindex('>',@txt,@start)
set @len = (@end - @start) + 1

while @start > 0 and @end > 0 and @len > 0
begin
    set @txt = stuff(@txt,@start,@len,'')
    set @start = charindex('<',@txt)
    set @end = charindex('>',@txt,@start)
    set @len = (@end - @start) + 1
end

DECLARE @table (txtColumn varchar(max) COLLATE SQL_Latin1_General_CP1_CS_AS ) --make the column case sensitive


INSERT INTO @table (txtColumn)
SELECT @txt

UPDATE @table set txtColumn = REPLACE(txtColumn,'&nbsp;',' ') --space
UPDATE @table set txtColumn = REPLACE(txtColumn,'&ldquo;',CHAR(34)) --"
UPDATE @table set txtColumn = REPLACE(txtColumn,'&rdquo;',CHAR(34)) --"
UPDATE @table set txtColumn = REPLACE(txtColumn,'&lsquo;',CHAR(39)) --'
UPDATE @table set txtColumn = REPLACE(txtColumn,'&rsquo;',CHAR(39)) --'
UPDATE @table set txtColumn = REPLACE(txtColumn,'&ndash;',CHAR(150)) -- –
UPDATE @table set txtColumn = REPLACE(txtColumn,'&mdash;',CHAR(151)) -- —
UPDATE @table set txtColumn = REPLACE(txtColumn,'&ordm;',CHAR(186)) -- º
UPDATE @table set txtColumn = REPLACE(txtColumn,'&ordf;',CHAR(170)) -- ª
UPDATE @table set txtColumn = REPLACE(txtColumn,'&sect;',CHAR(167)) -- §
    -------------------------------------------------------------
UPDATE @table set txtColumn = REPLACE(txtColumn,'&#34;',CHAR(34)) --"
UPDATE @table set txtColumn = REPLACE(txtColumn,'&#39;',CHAR(39)) --'
    --------------------------------------------------------------

UPDATE @table set txtColumn = REPLACE(txtColumn,'&agrave;','à') --à
UPDATE @table set txtColumn = REPLACE(txtColumn,'&aacute;','á') --á
UPDATE @table set txtColumn = REPLACE(txtColumn,'&atilde;','ã') --ã
UPDATE @table set txtColumn = REPLACE(txtColumn,'&acirc;','â') --â
UPDATE @table set txtColumn = REPLACE(txtColumn,'&auml;','ä') --ä
UPDATE @table set txtColumn = REPLACE(txtColumn,'&eacute;','é') --é
UPDATE @table set txtColumn = REPLACE(txtColumn,'&ecirc;','ê') --ê
UPDATE @table set txtColumn = REPLACE(txtColumn,'&iacute;','í') --í
UPDATE @table set txtColumn = REPLACE(txtColumn,'&oacute;','ó') --ó
UPDATE @table set txtColumn = REPLACE(txtColumn,'&otilde;','õ') --õ     
UPDATE @table set txtColumn = REPLACE(txtColumn,'&oslash;','ø') --ø
UPDATE @table set txtColumn = REPLACE(txtColumn,'&uacute;','ú') --ú
UPDATE @table set txtColumn = REPLACE(txtColumn,'&uuml;','ü') --ü
UPDATE @table set txtColumn = REPLACE(txtColumn,'&ccedil;','ç') --ç
    -------------------------------------------------------------
UPDATE @table set txtColumn = REPLACE(txtColumn,'&Agrave;',CHAR(192)) --À
UPDATE @table set txtColumn = REPLACE(txtColumn,'&Aacute;',CHAR(193)) --Á
UPDATE @table set txtColumn = REPLACE(txtColumn,'&Atilde;',CHAR(195)) --Ã
UPDATE @table set txtColumn = REPLACE(txtColumn,'&Acirc;',CHAR(194)) --Â
UPDATE @table set txtColumn = REPLACE(txtColumn,'&Auml;',CHAR(196)) --Ä
UPDATE @table set txtColumn = REPLACE(txtColumn,'&Eacute;',CHAR(201)) --É
UPDATE @table set txtColumn = REPLACE(txtColumn,'&Ecirc;',CHAR(202)) --Ê
UPDATE @table set txtColumn = REPLACE(txtColumn,'&Iacute;',CHAR(205)) --Í
UPDATE @table set txtColumn = REPLACE(txtColumn,'&Oacute;',CHAR(211)) --Ó
UPDATE @table set txtColumn = REPLACE(txtColumn,'&Otilde;',CHAR(213)) --Õ
UPDATE @table set txtColumn = REPLACE(txtColumn,'&Oslash;',CHAR(216)) --Ø
UPDATE @table set txtColumn = REPLACE(txtColumn,'&Uacute;',CHAR(218)) --Ú
UPDATE @table set txtColumn = REPLACE(txtColumn,'&Uuml;',CHAR(220)) --Ü
UPDATE @table set txtColumn = REPLACE(txtColumn,'&Ccedil;',CHAR(199)) --Ç

    SELECT LTRIM(RTRIM(txtColumn)) FROM @table

--- IN THE END DROP THE TABLE
    DROP TABLE @table
declare@txt varchar(最大值)
声明@start int
声明@end int
声明@lenint
set@txt='

文件信息ç;ã;o开发环境财政信息公司Ê;NCIA,内容为原始财务职能会议á;rio CAIXA。

' 设置@start=charindex(“”,@txt,@start) 设置@len=(@end-@start)+1 而@start>0和@end>0和@len>0 开始 设置@txt=stuff(@txt、@start、@len、,) 设置@start=charindex(“”,@txt,@start) 设置@len=(@end-@start)+1 结束 声明@table(txtColumn varchar(max)COLLATE SQL\u Latin1\u General\u CP1\u CS\u AS)--使列区分大小写 插入@table(txtColumn) 选择@txt 更新@table set txtColumn=REPLACE(txtColumn,,“”)——空格 更新@table set txtColumn=REPLACE(txtColumn,&ldquo;字符(34))--” 更新@table set txtColumn=REPLACE(txtColumn,&rdquo;字符(34))--” 更新@table set txtColumn=REPLACE(txtColumn,&lsquo;字符(39))--' 更新@table set txtColumn=REPLACE(txtColumn,'rsquo;字符(39))--' 更新@table set txtColumn=REPLACE(txtColumn,&ndash;',CHAR(150))-- UPDATE@table set txtColumn=REPLACE(txtColumn,“&mdash;”,CHAR(151))--- 更新@table set txtColumn=REPLACE(txtColumn,º;',CHAR(186))-- 更新@table set txtColumn=REPLACE(txtColumn,ª;',CHAR(170))--ª 更新@table set txtColumn=REPLACE(txtColumn,§;,CHAR(167))-- ------------------------------------------------------------- 更新@table set txtColumn=REPLACE(txtColumn,";',CHAR(34))--” 更新@table set txtColumn=REPLACE(txtColumn,';',CHAR(39))--' -------------------------------------------------------------- 更新@table set txtColumn=REPLACE(txtColumn,“à;”,“a”)--a 更新@table set txtColumn=REPLACE(txtColumn,'aacute;','á')--á 更新@table set txtColumn=REPLACE(txtColumn,ã;','ã')--ã UPDATE@table set txtColumn=REPLACE(txtColumn,â;,'a')-- 更新@table set txtColumn=REPLACE(txtColumn,ä;','ä')--ä 更新@table set txtColumn=REPLACE(txtColumn,'eacute;','é')--é 更新@table set txtColumn=REPLACE(txtColumn,'ecirc;','ê')-- 更新@table set txtColumn=REPLACE(txtColumn,í;,'í')--í 更新@table set txtColumn=REPLACE(txtColumn,'oacute;','ó')-- 更新@table set txtColumn=REPLACE(txtColumn,õ;','õ')--õ 更新@table set txtColumn=REPLACE(txtColumn,“ø;”,“ø”)