Vbscript 将除介词外的字符串大写

Vbscript 将除介词外的字符串大写,vbscript,asp-classic,vb6,Vbscript,Asp Classic,Vb6,我使用下面的代码从文本框中获取输入的字符串,并将其转换为大写字母,但诸如(the、and、an、as、to或on)等词除外 问题#1:我希望字符串的第一个单词始终大写,无论该单词是什么 问题#2:当字符串重新组合在一起时,单词间距不正确 xText = queryForHTML xTextSplit = split(xText, " ") for each item in xTextSplit xWord = item if lcase(item) = "the"

我使用下面的代码从文本框中获取输入的字符串,并将其转换为大写字母,但诸如(the、and、an、as、to或on)等词除外

问题#1:我希望字符串的第一个单词始终大写,无论该单词是什么

问题#2:当字符串重新组合在一起时,单词间距不正确

xText = queryForHTML    
xTextSplit = split(xText, " ")

for each item in xTextSplit

    xWord = item

    if lcase(item) = "the" or lcase(item) = "and" or lcase(item) = "an" or lcase(item) = "as" or lcase(item) = "to" or lcase(item) = "is" or lcase(item) = "on" then
        xWord = lcase(item)
    end if

    xCompleteWord = xCompleteWord & " " & xWord

next

queryForHTML = xCompleteWord
基本思想是使用一个正则表达式来匹配任何“word”字符序列(
[a-zA-Z0-9]
),对于每个序列,调用一个函数,该函数接收字符串匹配的参数、包含单词的捕获组、在字符串中找到单词的位置以及完整的源字符串


如果单词位于位置0,则大写。如果单词是“noise”单词,则将其小写,否则,该单词将大写。

以下代码基于
GetStringTypeW()
Win32 API函数,该函数提供有关字符串中字符的信息。您只担心大写或小写字符。代码的问题在于,它只适用于空格分隔单词的最简单情况。但是单词可以被标点符号打断。还有许多Unicode字符没有“大写”和“小写”的概念

我利用GetStringTypeW()来完成这项工作,而不是编写这种枯燥、容易出错的代码。我遍历数组中的每个元素,其中每个元素对应于字符串中相同位置的一个字符。我有一个标志
bInWord
,它存储当前位置是否在单词内部。如果我们点击了一个大写或小写字符,但还没有设置,我们就设置它,并将当前位置保存为单词的开头。此外,如果我们找到了一个大写字符,并且我们已经知道我们在一个单词中,那么我们在那里,然后通过写入返回的字符串,使字符变成小写。 当我们点击非字母字符,或到达字符串末尾,并且设置了
bInWord
时,我们将最后一个单词与“大小写不正确”的单词列表进行比较。如果我们匹配,并且第一个字符是大写,那么我们将用小写字符覆盖该字符。如果我们不匹配,并且第一个字符是小写,我们将用大写字符覆盖该字符

Option Explicit

Private Declare Function GetStringTypeW Lib "Kernel32.dll" ( _
    ByVal dwInfoType As Long, _
    ByVal lpSrcStr As Long, _
    ByVal cchSrc As Long, _
    ByRef lpCharType As Integer _
) As Long

Private Const CT_CTYPE1                     As Long = &H1

Private Const C1_UPPER                      As Long = &H1     ' Uppercase
Private Const C1_LOWER                      As Long = &H2     ' Lowercase
Private Const C1_DIGIT                      As Long = &H4     ' Decimal digits
Private Const C1_SPACE                      As Long = &H8     ' Space characters
Private Const C1_PUNCT                      As Long = &H10    ' Punctuation
Private Const C1_CNTRL                      As Long = &H20    ' Control characters
Private Const C1_BLANK                      As Long = &H40    ' Blank characters
Private Const C1_XDIGIT                     As Long = &H80    ' Hexadecimal digits
Private Const C1_ALPHA                      As Long = &H100   ' Any linguistic character: alphabetical, syllabary, or ideographic
Private Const C1_DEFINED                    As Long = &H200   ' A defined character, but not one of the other C1_* types

Private Function ProperCaseWords(ByRef in_sText As String) As String

    Dim lTextLen            As Long
    Dim aiCharType()        As Integer
    Dim lPos                As Long
    Dim lPosStartWord       As Long
    Dim bInWord             As Boolean
    Dim bFirstCharUCase     As Boolean
    Dim sWord               As String

    ' Output buffer contains a copy of the original string.
    ProperCaseWords = in_sText

    lTextLen = Len(in_sText)

    ' Resize the character type buffer to be one more than the string.
    ReDim aiCharType(1 To lTextLen + 1)

    ' Retrieve string type data about this Unicode string into <aiCharType()>.
    ' If it fails, then we just return the original string.
    ' Note that the last element in the array is not filled by this function, and will contain zero.
    ' This is deliberate, so we can handle the corner case where the last word is right at the end of the string.
    If (GetStringTypeW(CT_CTYPE1, StrPtr(ProperCaseWords), lTextLen, aiCharType(1))) = 0 Then
        Exit Function
    End If

    ' We start outside a word.
    bInWord = False

    ' Iterate through the entire array, including the last element which corresponds to no character.
    For lPos = 1 To lTextLen + 1

        If (aiCharType(lPos) And C1_LOWER) = C1_LOWER Then
        ' Lower case characters.
            If Not bInWord Then
                bFirstCharUCase = False
                lPosStartWord = lPos
                bInWord = True
            End If
        ElseIf (aiCharType(lPos) And C1_UPPER) = C1_UPPER Then
        ' Upper case characters.
            If bInWord Then
            ' If we are already in the word, i.e. past the first character, then we know that the character *should* be lower case.
                Mid$(ProperCaseWords, lPos, 1) = LCase$(Mid$(ProperCaseWords, lPos, 1))
            Else
                bFirstCharUCase = True
                lPosStartWord = lPos
                bInWord = True
            End If
        Else
        ' Non lower or upper case characters. Also includes last (zero) element.
            If bInWord Then
            ' If we are in a word, and the latest character is non-alphabetical, then we now check what word it is, and
            ' decide whether to make the first character upper or lower case.
                bInWord = False

                ' Retrieve the word from the string, and deliberately make the first character lower case.
                ' Note that all other characters in the word would have already been made lower case.
                sWord = Mid$(ProperCaseWords, lPosStartWord, lPos - lPosStartWord)
                If bFirstCharUCase Then
                    Mid$(sWord, 1, 1) = LCase$(Mid$(sWord, 1, 1))
                End If

                ' Compare our word against a lower-case word list.
                Select Case sWord
                Case "in", "on", "an", "to", "and", "the", "with", "that", "is" ' <=== CUSTOM LIST OF WORDS
                    If bFirstCharUCase Then
                        Mid$(ProperCaseWords, lPosStartWord, 1) = LCase$(Mid$(ProperCaseWords, lPosStartWord, 1))
                    End If
                Case Else
                    If Not bFirstCharUCase Then
                        Mid$(ProperCaseWords, lPosStartWord, 1) = UCase$(Mid$(ProperCaseWords, lPosStartWord, 1))
                    End If
                End Select
            End If
        End If

    Next lPos

End Function
选项显式
私有声明函数GetStringTypeW Lib“Kernel32.dll”(_
ByVal是一种类型,只要_
ByVal lpSrcStr,只要_
ByVal cchSrc,只要_
ByRef LPChartType为整数_
)只要
私有Const CT_CTYPE1的长度=&H1
Private Const C1_大写,长度=&H1'大写
Private Const C1_LOWER As Long=&H2'小写
私有常量C1_数字,长度=&H4'十进制数字
Private Const C1_空格长度=&H8'空格字符
Private Const C1_point As Long=&H10'标点符号
Private Const C1_CNTRL作为Long=&H20'控制字符
Private Const C1_为空,长度=&H40'为空字符
私有常量C1_XDIGIT的长度=&H80'十六进制数字
Private Const C1_ALPHA As Long=&H100'任何语言字符:字母、音节或表意文字
Private Const C1_定义为Long=&H200'定义的字符,但不是其他C1_*类型之一
私有函数ProperCaseWords(ByRef在_sText中作为字符串)作为字符串
暗淡的和长的一样长
Dim aiCharType()作为整数
暗LPO与长LPO一样
暗淡的lPosStartWord如长
作为布尔值的双字
Dim bFirstCharUCase作为布尔值
暗剑如弦
'输出缓冲区包含原始字符串的副本。
ProperCaseWords=in_sText
lTextLen=Len(英寸)
'将字符类型缓冲区的大小调整为比字符串大一个。
重拨aiCharType(1至lTextLen+1)
'将有关此Unicode字符串的字符串类型数据检索到中。
'如果失败,那么我们只返回原始字符串。
'请注意,数组中的最后一个元素不是由此函数填充的,它将包含零。
'这是经过深思熟虑的,因此我们可以处理最后一个单词位于字符串末尾的情况。
如果(GetStringTypeW(CT_CTYPE1,strprtr(ProperCaseWords),lTextLen,aiCharType(1))=0,那么
退出功能
如果结束
“我们从一个词外开始。
bInWord=False
'遍历整个数组,包括不对应任何字符的最后一个元素。
对于LPO=1到lTextLen+1
如果(aiCharType(lPos)和C1_LOWER)=C1_LOWER,则
'小写字符。
如果不是的话
bFirstCharUCase=False
lPosStartWord=lPos
bInWord=True
如果结束
ElseIf(Aichart类型(lPos)和C1_UPPER)=C1_UPPER然后
'大写字符。
如果是的话
'如果我们已经在单词中,即超过了第一个字符,那么我们知道字符*应该是小写的。
Mid$(PROPERCASEWORD,LPO,1)=LCase$(Mid$(PROPERCASEWORD,LPO,1))
其他的
bFirstCharUCase=True
lPosStartWord=lPos
bInWord=True
如果结束
其他的
'非小写或大写字符。还包括最后一个(零)元素。
如果是的话
“如果我们在一个单词中,并且最新的字符不是按字母顺序排列的,那么我们现在检查它是什么单词,然后
'决定第一个字符是大写还是小写。
bInWord=False
'从字符串中检索单词,并故意使第一个字符小写。
'
Option Explicit

Private Declare Function GetStringTypeW Lib "Kernel32.dll" ( _
    ByVal dwInfoType As Long, _
    ByVal lpSrcStr As Long, _
    ByVal cchSrc As Long, _
    ByRef lpCharType As Integer _
) As Long

Private Const CT_CTYPE1                     As Long = &H1

Private Const C1_UPPER                      As Long = &H1     ' Uppercase
Private Const C1_LOWER                      As Long = &H2     ' Lowercase
Private Const C1_DIGIT                      As Long = &H4     ' Decimal digits
Private Const C1_SPACE                      As Long = &H8     ' Space characters
Private Const C1_PUNCT                      As Long = &H10    ' Punctuation
Private Const C1_CNTRL                      As Long = &H20    ' Control characters
Private Const C1_BLANK                      As Long = &H40    ' Blank characters
Private Const C1_XDIGIT                     As Long = &H80    ' Hexadecimal digits
Private Const C1_ALPHA                      As Long = &H100   ' Any linguistic character: alphabetical, syllabary, or ideographic
Private Const C1_DEFINED                    As Long = &H200   ' A defined character, but not one of the other C1_* types

Private Function ProperCaseWords(ByRef in_sText As String) As String

    Dim lTextLen            As Long
    Dim aiCharType()        As Integer
    Dim lPos                As Long
    Dim lPosStartWord       As Long
    Dim bInWord             As Boolean
    Dim bFirstCharUCase     As Boolean
    Dim sWord               As String

    ' Output buffer contains a copy of the original string.
    ProperCaseWords = in_sText

    lTextLen = Len(in_sText)

    ' Resize the character type buffer to be one more than the string.
    ReDim aiCharType(1 To lTextLen + 1)

    ' Retrieve string type data about this Unicode string into <aiCharType()>.
    ' If it fails, then we just return the original string.
    ' Note that the last element in the array is not filled by this function, and will contain zero.
    ' This is deliberate, so we can handle the corner case where the last word is right at the end of the string.
    If (GetStringTypeW(CT_CTYPE1, StrPtr(ProperCaseWords), lTextLen, aiCharType(1))) = 0 Then
        Exit Function
    End If

    ' We start outside a word.
    bInWord = False

    ' Iterate through the entire array, including the last element which corresponds to no character.
    For lPos = 1 To lTextLen + 1

        If (aiCharType(lPos) And C1_LOWER) = C1_LOWER Then
        ' Lower case characters.
            If Not bInWord Then
                bFirstCharUCase = False
                lPosStartWord = lPos
                bInWord = True
            End If
        ElseIf (aiCharType(lPos) And C1_UPPER) = C1_UPPER Then
        ' Upper case characters.
            If bInWord Then
            ' If we are already in the word, i.e. past the first character, then we know that the character *should* be lower case.
                Mid$(ProperCaseWords, lPos, 1) = LCase$(Mid$(ProperCaseWords, lPos, 1))
            Else
                bFirstCharUCase = True
                lPosStartWord = lPos
                bInWord = True
            End If
        Else
        ' Non lower or upper case characters. Also includes last (zero) element.
            If bInWord Then
            ' If we are in a word, and the latest character is non-alphabetical, then we now check what word it is, and
            ' decide whether to make the first character upper or lower case.
                bInWord = False

                ' Retrieve the word from the string, and deliberately make the first character lower case.
                ' Note that all other characters in the word would have already been made lower case.
                sWord = Mid$(ProperCaseWords, lPosStartWord, lPos - lPosStartWord)
                If bFirstCharUCase Then
                    Mid$(sWord, 1, 1) = LCase$(Mid$(sWord, 1, 1))
                End If

                ' Compare our word against a lower-case word list.
                Select Case sWord
                Case "in", "on", "an", "to", "and", "the", "with", "that", "is" ' <=== CUSTOM LIST OF WORDS
                    If bFirstCharUCase Then
                        Mid$(ProperCaseWords, lPosStartWord, 1) = LCase$(Mid$(ProperCaseWords, lPosStartWord, 1))
                    End If
                Case Else
                    If Not bFirstCharUCase Then
                        Mid$(ProperCaseWords, lPosStartWord, 1) = UCase$(Mid$(ProperCaseWords, lPosStartWord, 1))
                    End If
                End Select
            End If
        End If

    Next lPos

End Function