SQL文本过滤
我想从数据集中过滤出几个关键短语。不幸的是,到目前为止,我能想到的唯一算法是嵌套的replace语句,例如:SQL文本过滤,sql,sql-server,tsql,Sql,Sql Server,Tsql,我想从数据集中过滤出几个关键短语。不幸的是,到目前为止,我能想到的唯一算法是嵌套的replace语句,例如: SELECT REPLACE( REPLACE(FIELDNAME,'</u>','') ,'<u>','') 选择 替换( 替换(字段名,,“”) ,'','') 其中FIELDNAME是存储在表中的原始HTML代码。正如你所看到的,这是可怕的。有更好的想法吗?我认为在TSQL中没有更好的方法 如果您
SELECT
REPLACE(
REPLACE(FIELDNAME,'</u>','')
,'<u>','')
选择
替换(
替换(字段名,,“”)
,'','')
其中FIELDNAME是存储在表中的原始HTML代码。正如你所看到的,这是可怕的。有更好的想法吗?我认为在TSQL中没有更好的方法
如果您在SQL层之上有另一个环境(例如asp.net),您可能会更幸运地在这方面进行过滤。我认为TSQL中没有更好的方法
如果您在SQL层上有另一个环境(例如asp.net),您可能会更幸运地在其中进行过滤。此类字符串操作最好由处理。此类字符串操作最好由处理。讨论了类似但更复杂的“清理”问题HTML文本根据规则列表(您需要在此网站注册,但仅此而已)。讨论中包括了几种使用T-SQL的方法,以及一种使用SQLCLR的方法,这是迄今为止最快的方法。在我编写VB.Net/SQLCLR解决方案时,我将它包括在下面 以下是它实现的文本替换/转换:
导入系统
导入系统数据
导入System.Data.SqlClient
导入System.Data.SqlTypes
导入Microsoft.SqlServer.Server
部分公共类UserDefinedFunctions
公共枚举状态
空间1
实体
HTMLTag
标准
字1
剧本
风格
结束枚举
枚举子状态
没有一个
结束开始
尾端斜线
结束枚举
常量字符空间为整数=32
常量字符作为整数=38
常量字符斜杠为整数=47
常量字符为整数=60
常量CharGT为整数=62
常量字符为整数=65
常量字符为整数=88
常量CharZ为整数=90
常量字符a为整数=97
常量字符b为整数=98
常量字符作为整数=110
常量字符p为整数=112
常量字符作为整数=115
常量Char_z为整数=122
常量CharDash为整数=45
常量CharSemiC为整数=59
_
公共共享函数HTMLCleaner(ByVal字符作为SqlTypes.SqlBytes)作为SqlTypes.SqlBytes
作为字节的Dim b
尺寸i为整数,j为整数
变暗为字节()
将状态变暗为状态=状态。空间1
将子状态标注为子状态=子状态。无
Dim strAccum As String=“”
重拨出(0到字符长度-1)
对于i=0到字符长度-1
b=字符(i)
选择案例状态
案例说明。规范
选择案例b
案例CharA至CharZ,Char_a至Char_z
Out(j)=b
j=j+1
状态=状态。规范
格值空间
Out(j)=b
j=j+1
State=States.Space1
案例查拉姆
状态=状态。实体“跳过输出”
箱字符
Out(j)=CharSpace
j=j+1
State=States.HTMLTag
Case CharDash
Out(j)=b
j=j+1
状态=状态。规范
其他情况
状态=状态。规范“跳过输出”
结束选择
案例2.1
选择案例b
格值空间
“放弃前导空格和多个空格”
案例查拉姆
状态=状态。实体“跳过输出”
箱字符
Out(j)=CharSpace
j=j+1
State=States.HTMLTag
Case CharDash
Out(j)=b
j=j+1
状态=状态。规范
案例CharA至CharZ,Char_a至Char_z
Out(j)=b
j=j+1
State=States.Word1
其他情况
状态=状态。规范“跳过输出”
结束选择
案例1.1
选择案例b
格值空间
'单字符字,从输出缩回:'
j=j-1
State=States.Space1
案例查拉姆
状态=状态。实体“跳过输出”
箱字符
Out(j)=CharSpace
j=j+1
State=States.HTMLTag
Case CharDash
Imports System
Imports System.Data
Imports System.Data.SqlClient
Imports System.Data.SqlTypes
Imports Microsoft.SqlServer.Server
Partial Public Class UserDefinedFunctions
Public Enum States
Space1
Entity
HTMLTag
Norm
Word1
Script
Style
End Enum
Enum SubStates
None
EndBegin
EndSlash
End Enum
Const CharSpace As Integer = 32
Const CharAmp As Integer = 38
Const CharSlash As Integer = 47
Const CharLT As Integer = 60
Const CharGT As Integer = 62
Const CharA As Integer = 65
Const CharX As Integer = 88
Const CharZ As Integer = 90
Const Char_a As Integer = 97
Const Char_b As Integer = 98
Const Char_n As Integer = 110
Const Char_p As Integer = 112
Const Char_s As Integer = 115
Const Char_z As Integer = 122
Const CharDash As Integer = 45
Const CharSemiC As Integer = 59
<Microsoft.SqlServer.Server.SqlFunction( _
DataAccess:=DataAccessKind.None _
, IsDeterministic:=True _
, IsPrecise:=True)> _
Public Shared Function HTMLCleaner(ByVal chars As SqlTypes.SqlBytes) As SqlTypes.SqlBytes
Dim b As Byte
Dim i As Integer, j As Integer
Dim Out As Byte()
Dim State As States = States.Space1
Dim Substate As SubStates = SubStates.None
Dim strAccum As String = ""
ReDim Out(0 To chars.Length - 1)
For i = 0 To chars.Length - 1
b = chars(i)
Select Case State
Case States.Norm
Select Case b
Case CharA To CharZ, Char_a To Char_z
Out(j) = b
j = j + 1
State = States.Norm
Case CharSpace
Out(j) = b
j = j + 1
State = States.Space1
Case CharAmp
State = States.Entity 'skip output'
Case CharLT
Out(j) = CharSpace
j = j + 1
State = States.HTMLTag
Case CharDash
Out(j) = b
j = j + 1
State = States.Norm
Case Else
State = States.Norm 'skip output'
End Select
Case States.Space1
Select Case b
Case CharSpace
'discard leading & multiple spaces'
Case CharAmp
State = States.Entity 'skip output'
Case CharLT
Out(j) = CharSpace
j = j + 1
State = States.HTMLTag
Case CharDash
Out(j) = b
j = j + 1
State = States.Norm
Case CharA To CharZ, Char_a To Char_z
Out(j) = b
j = j + 1
State = States.Word1
Case Else
State = States.Norm 'skip output'
End Select
Case States.Word1
Select Case b
Case CharSpace
'single char word, retract from output:'
j = j - 1
State = States.Space1
Case CharAmp
State = States.Entity 'skip output'
Case CharLT
Out(j) = CharSpace
j = j + 1
State = States.HTMLTag
Case CharDash
Out(j) = b
j = j + 1
State = States.Norm
Case CharA To CharZ, Char_a To Char_z
Out(j) = b
j = j + 1
State = States.Norm
Case Else
State = States.Norm 'skip output'
End Select
Case States.Entity
Select Case b
Case CharSemiC
'End of entity, wrap it up:'
If strAccum = "nbsp" Then
Out(j) = CharSpace
j = j + 1
strAccum = ""
State = States.Space1
Else
'output "X"'
Out(j) = CharX
j = j + 1
State = States.Norm
End If
Case Else
'else, keep scanning for semicolon...'
' accumulate entity chars:'
strAccum = strAccum & b
End Select
Case States.HTMLTag
If b = CharGT Then
If strAccum = "SCRIPT" Then
strAccum = ""
State = States.Script
Substate = SubStates.None
ElseIf strAccum = "STYLE" Then
strAccum = ""
State = States.Style
Substate = SubStates.None
Else
Out(j) = CharSpace
j = j + 1
State = States.Space1
strAccum = ""
End If
Else
'accumulate tag name'
strAccum = strAccum & b
End If
Case States.Script
Select Case Substate
Case SubStates.None
If b = CharGT Then
Substate = SubStates.EndBegin
End If
Case SubStates.EndBegin
If b = CharSlash Then
Substate = SubStates.EndSlash
strAccum = ""
Else
Substate = SubStates.None
End If
Case SubStates.EndSlash
If b = CharGT Then
If strAccum = "SCRIPT" Then
'end of script found; output nothing'
State = States.Norm
Substate = SubStates.None
Else
'false alarm, back to script-scanning'
Substate = SubStates.None
End If
Else
'accumulate the end-tags label'
strAccum = strAccum & b
End If
End Select
Case States.Style
Select Case Substate
Case SubStates.None
If b = CharGT Then
Substate = SubStates.EndBegin
End If
Case SubStates.EndBegin
If b = CharSlash Then
Substate = SubStates.EndSlash
strAccum = ""
Else
Substate = SubStates.None
End If
Case SubStates.EndSlash
If b = CharGT Then
If strAccum = "STYLE" Then
'end of script found; output nothing'
State = States.Norm
Substate = SubStates.None
Else
'false alarm, back to script-scanning'
Substate = SubStates.None
End If
Else
'accumulate the end-tags label'
strAccum = strAccum & b
End If
End Select
Case Else
Debug.Assert(1 = 0)
End Select
'extra check for multiple spaces'
If j > 1 _
AndAlso (Out(j - 1) = CharSpace _
And Out(j - 2) = CharSpace) Then
j = j - 1 'roll back the last character'
ElseIf j = 1 AndAlso Out(0) = CharSpace Then
j = 0 'overwrite leading space'
End If
Next
'remove any trailing space:'
If j > 0 AndAlso Out(j - 1) = CharSpace Then j = j - 1
'trim off the trailing excess'
ReDim Preserve Out(0 To j - 1)
Return New SqlBytes(Out)
End Function
<Microsoft.SqlServer.Server.SqlFunction( _
DataAccess:=DataAccessKind.None _
, IsDeterministic:=True _
, IsPrecise:=True)> _
Public Shared Function HTMLCopy2(ByVal chars As SqlTypes.SqlBytes) As SqlTypes.SqlBytes
Dim out() As Byte
ReDim out(0 To chars.Length - 1)
For i As Integer = 0 To chars.Length - 1
out(i) = chars.Buffer(i)
Next
Return New SqlBytes(out)
End Function
<Microsoft.SqlServer.Server.SqlFunction( _
DataAccess:=DataAccessKind.None _
, IsDeterministic:=True _
, IsPrecise:=True)> _
Public Shared Function HTMLCopy(ByVal chars As SqlTypes.SqlBytes) As SqlTypes.SqlBytes
' Add your code here'
Return New SqlTypes.SqlBytes(chars.Buffer)
End Function
End Class