用python验证XMPP jid？_Python_Validation_Xmpp

用python验证XMPP jid？

python validation xmpp

用python验证XMPP jid？,python,validation,xmpp,Python,Validation,Xmpp,验证xmpp jid的正确方法是什么？语法已经描述过了，但我不太懂。而且，它看起来相当复杂，所以使用一个库来完成它似乎是一个好主意我目前正在使用xmpppy，但我似乎找不到如何用它来验证jid。感谢您的帮助首先，当前JID的最佳参考是我只是想在这里给你正则表达式，但有点失控，实现了所有规范： import re import sys import socket import encodings.idna import stringprep # These characters aren'

验证xmpp jid的正确方法是什么？语法已经描述过了，但我不太懂。而且，它看起来相当复杂，所以使用一个库来完成它似乎是一个好主意

我目前正在使用xmpppy，但我似乎找不到如何用它来验证jid。感谢您的帮助

首先，当前JID的最佳参考是

我只是想在这里给你正则表达式，但有点失控，实现了所有规范：

import re
import sys
import socket
import encodings.idna
import stringprep

# These characters aren't allowed in domain names that are used
# in XMPP
BAD_DOMAIN_ASCII = "".join([chr(c) for c in range(0,0x2d) +
                    [0x2e, 0x2f] +
                    range(0x3a,0x41) +
                    range(0x5b,0x61) +
                    range(0x7b, 0x80)])

# check bi-directional character validity
def bidi(chars):
    RandAL = map(stringprep.in_table_d1, chars)
    for c in RandAL:
        if c:
            # There is a RandAL char in the string. Must perform further
            # tests:
            # 1) The characters in section 5.8 MUST be prohibited.
            # This is table C.8, which was already checked
            # 2) If a string contains any RandALCat character, the string
            # MUST NOT contain any LCat character.
            if filter(stringprep.in_table_d2, chars):
                raise UnicodeError("Violation of BIDI requirement 2")

            # 3) If a string contains any RandALCat character, a
            # RandALCat character MUST be the first character of the
            # string, and a RandALCat character MUST be the last
            # character of the string.
            if not RandAL[0] or not RandAL[-1]:
                raise UnicodeError("Violation of BIDI requirement 3")

def nodeprep(u):
    chars = list(unicode(u))
    i = 0
    while i < len(chars):
        c = chars[i]
        # map to nothing
        if stringprep.in_table_b1(c):
            del chars[i]
        else:
            # case fold
            chars[i] = stringprep.map_table_b2(c)
            i += 1
    # NFKC
    chars = stringprep.unicodedata.normalize("NFKC", "".join(chars))
    for c in chars:
        if (stringprep.in_table_c11(c) or
            stringprep.in_table_c12(c) or
            stringprep.in_table_c21(c) or
            stringprep.in_table_c22(c) or
            stringprep.in_table_c3(c) or
            stringprep.in_table_c4(c) or
            stringprep.in_table_c5(c) or
            stringprep.in_table_c6(c) or
            stringprep.in_table_c7(c) or
            stringprep.in_table_c8(c) or
            stringprep.in_table_c9(c) or
            c in "\"&'/:<>@"):
            raise UnicodeError("Invalid node character")

    bidi(chars)

    return chars

def resourceprep(res):
    chars = list(unicode(res))
    i = 0
    while i < len(chars):
        c = chars[i]
        # map to nothing
        if stringprep.in_table_b1(c):
            del chars[i]
        else:
            i += 1
    # NFKC
    chars = stringprep.unicodedata.normalize("NFKC", "".join(chars))
    for c in chars:
        if (stringprep.in_table_c12(c) or
            stringprep.in_table_c21(c) or
            stringprep.in_table_c22(c) or
            stringprep.in_table_c3(c) or
            stringprep.in_table_c4(c) or
            stringprep.in_table_c5(c) or
            stringprep.in_table_c6(c) or
            stringprep.in_table_c7(c) or
            stringprep.in_table_c8(c) or
            stringprep.in_table_c9(c)):
            raise UnicodeError("Invalid node character")

    bidi(chars)

    return chars

def parse_jid(jid):
    # first pass
    m = re.match("^(?:([^\"&'/:<>@]{1,1023})@)?([^/@]{1,1023})(?:/(.{1,1023}))?$", jid)
    if not m:
        return False

    (node, domain, resource) = m.groups()
    try:
        # ipv4 address?
        socket.inet_pton(socket.AF_INET, domain)
    except socket.error:
        # ipv6 address?
        try:
            socket.inet_pton(socket.AF_INET6, domain)
        except socket.error:
            # domain name
            dom = []
            for label in domain.split("."):
                try:
                    label = encodings.idna.nameprep(unicode(label))
                    encodings.idna.ToASCII(label)
                except UnicodeError:
                    return False

                # UseSTD3ASCIIRules is set, but Python's nameprep doesn't enforce it.
                # a) Verify the absence of non-LDH ASCII code points; that is, the
                for c in label:
                    if c in BAD_DOMAIN_ASCII:
                        return False
                # Verify the absence of leading and trailing hyphen-minus
                if label[0] == '-' or label[-1] == "-":
                    return False
                dom.append(label)
            domain = ".".join(dom)
    try:
        if node is not None:
            node = nodeprep(node)
        if resource is not None:
            resource = resourceprep(resource)
    except UnicodeError:
        return False

    return node, domain, resource

if __name__ == "__main__":
    results = parse_jid(sys.argv[1])
    if not results:
        print "FAIL"
    else:   
        print results

重新导入
导入系统
导入套接字
导入编码.idna
导入stringprep
#在使用的域名中不允许使用这些字符
#在XMPP中
BAD_DOMAIN_ASCII=”“.join（[chr（c）表示范围（0,0x2d）内的c）+
[0x2e，0x2f]+
范围（0x3a、0x41）+
范围（0x5b、0x61）+
范围（0x7b，0x80）]）
#检查双向字符有效性
def bidi（字符）：
RandAL=map（stringprep.in_表_d1，字符）
对于RandAL中的c：
如果是c：
#字符串中有一个RandAL字符。必须进一步执行
#测试：
#1）必须禁止使用第5.8节中的字符。
#这是已经检查过的表C.8
#2）如果字符串包含任何RandALCat字符，则该字符串
#不能包含任何LCat字符。
如果过滤器（stringprep.in_表_d2，字符）：
提出错误（“违反BIDI要求2”）
#3）如果字符串包含任何RandALCat字符，则
#RandALCat字符必须是
#字符串，并且RandALCat字符必须是最后一个
#字符串的字符。
如果不是RandAL[0]或不是RandAL[-1]：
提出错误（“违反BIDI要求3”）
def nodeprep（u）：
字符=列表（unicode（u））
i=0
而我则是：
c=字符[i]
#化为乌有
如果stringprep.在表b1（c）中：
德尔查尔斯[i]
其他：
#箱折
chars[i]=stringprep.map\u table\u b2（c）
i+=1
#NFKC
chars=stringprep.unicodedata.normalize（“NFKC”，“join（chars））
对于以字符表示的c：
如果（表11（c）中的stringprep.或
表12（c）中的stringprep.或
表c21（c）中的stringprep.或
表c22（c）中的stringprep.或
表c3（c）中的stringprep.或
表c4（c）中的stringprep.或
表c5（c）中的stringprep.或
表c6（c）中的stringprep.或
表7（c）中的stringprep.或
表8（c）中的stringprep.或
表9（c）中的stringprep.或
在“\”和“/：@”中的c：
引发Unicode错误（“无效节点字符”）
比迪（查尔斯）
返回字符
def资源准备（res）：
字符=列表（unicode（res））
i=0
而我则是：
c=字符[i]
#化为乌有
如果stringprep.在表b1（c）中：
德尔查尔斯[i]
其他：
i+=1
#NFKC
chars=stringprep.unicodedata.normalize（“NFKC”，“join（chars））
对于以字符表示的c：
如果（表12（c）中的stringprep.或
表c21（c）中的stringprep.或
表c22（c）中的stringprep.或
表c3（c）中的stringprep.或
表c4（c）中的stringprep.或
表c5（c）中的stringprep.或
表c6（c）中的stringprep.或
表7（c）中的stringprep.或
表8（c）中的stringprep.或
stringprep.in_表_c9（c））：
引发Unicode错误（“无效节点字符”）
比迪（查尔斯）
返回字符
def parse_jid（jid）：
#第一关
m=重新匹配（“^（？（[^\”&“/：@]{11023}”）？（[^/@]{11023}）（？：/（{11023}））？$”，jid）
如果不是m：
返回错误
（节点、域、资源）=m.groups（）
尝试：
#ipv4地址？
socket.inet\u pton（socket.AF\u inet，域）
除socket.error外：
#ipv6地址？
尝试：
socket.inet\u pton（socket.AF\u INET6，域）
除socket.error外：
#域名
dom=[]
对于域中的标签。拆分（“.”）：
尝试：
label=encodings.idna.nameprep（unicode（标签））
编码.idna.ToASCII（标签）
除Unicode错误外：
返回错误
#UseSTD3ASCIIRules已设置，但Python的nameprep没有强制执行它。
#a）验证是否存在非LDH ASCII码点；就是
对于标签中的c：
如果c在坏的\u域\u ASCII中：
返回错误
#验证是否缺少前导和尾随连字符减号
如果标签[0]='-'或标签[-1]='-'：
返回错误
dom.append（标签）
domain=“.”.join（dom）
尝试：
如果节点不是“无”：
node=nodeprep（节点）
如果资源不是无：
资源=资源准备（资源）
除Unicode错误外：
返回错误
返回节点、域、资源
如果名称=“\uuuuu main\uuuuuuuu”：
results=parse_jid（sys.argv[1]）
如果没有结果：
打印“失败”
其他：
打印结果

是的，这是一个很大的工作。所有这些都有很好的理由，但如果工作组取得成果，我们希望将来能将其简化一些。

很抱歉请求太晚；我本来打算按照您的方式实现它，但我想知道在代码点上的迭代是否真的适用于stringprep。在本文中，他们讨论的是字符，字符不一定等同于代码点（考虑组合变音符号）。还是我遗漏了一些关于unicode术语的内容？stringprep RFC是在IETF对unicode的观点做出细微的改变以解决问题之前编写的。当RFC说“字符”时，在大多数地方它的意思是“代码点”。我们正在努力