
在python中使用正则表达式拆分罗马数字,python,regex,Python,Regex,我需要在罗马数字上拆分文本。这是我的文本 This is the part (a) of question number one. i. This is sub part one of part (a) question one ii. This is sub part two of part (a) question one iii. This is sub part three of part (a) question one 事实上,这是一个问题的一部分。我多么希望它被分解如下。 Thi


This is the part (a) of question number one. i. This is sub part one of part (a) question one ii. This is sub part two of part (a) question one iii. This is sub part three of part (a) question one

text = This is the part (a) of question number one. i. This is sub part one of part (a) question one ii. This is sub part two of part (a) question one iii. This is sub part three of part (a) question one
for m in re.split(r' [a-z]+\. ',text):


text = 'This is the part (a) of question number one. i. This is sub part one of part (a) question one ii. This is sub part two of part (a) question one iii. This is sub part three of part (a) question one'

for m in re.split(r' [MDCLXVI]+\. ', text, flags=re.IGNORECASE):


This is the part (a) of question number




class RomanError(Exception): pass
class OutOfRangeError(RomanError): pass
class NotIntegerError(RomanError): pass
class InvalidRomanNumeralError(RomanError): pass

def toRoman(n):
    """convert integer to Roman numeral"""
    if not (0 < n < 5000):
        raise OutOfRangeError, "number out of range (must be 1..4999)"
    if int(n) != n:
        raise NotIntegerError, "decimals can not be converted"
    romanNumeralMap = (('M',  1000), ('CM', 900), ('D',  500), ('CD', 400), ('C',  100), ('XC', 90),
       ('L',  50), ('XL', 40), ('X',  10), ('IX', 9), ('V',  5), ('IV', 4), ('I',  1))
    result = ""
    for numeral, integer in romanNumeralMap:
        while n >= integer:
            result += numeral
            n -= integer
    return result

>>> pat=' (?:'+'|'.join([int_to_roman(i).lower() for i in range(1,21)])+')\. '
>>> pat
' (?:i|ii|iii|iv|v|vi|vii|viii|ix|x|xi|xii|xiii|xiv|xv|xvi|xvii|xviii|xix|xx)\\. '

>>> print '\n'.join(re.split(pat, txt))
>>> pat=re.compile('''\
... [ ]                 # one space
... m{0,4}              # thousands - 0 to 4 M's
... (?:cm|cd|d?c{0,3})  # hundreds - 900 (CM), 400 (CD), 0-300 (0 to 3 C's),
...                     #            or 500-800 (D, followed by 0 to 3 C's)
... (?:xc|xl|l?x{0,3})  # tens - 90 (XC), 40 (XL), 0-30 (0 to 3 X's),
...                     #        or 50-80 (L, followed by 0 to 3 X's)
... (?:ix|iv|v?i{0,3})  # ones - 9 (IX), 4 (IV), 0-3 (0 to 3 I's),
...                     #        or 5-8 (V, followed by 0 to 3 I's)
... [.][ ]                # full stop then a space''', re.X)
>>> print '\n'.join(pat.split(txt))
>>> print '\n'.join(re.split(pat, txt))
>>> print '\n'.join(pat.split(txt))
