Python中的复杂数据提取
我需要一些帮助来启动一个程序。我每周都会参加几场在线扑克比赛。事实证明,我使用的网站记录了手的历史记录,并将它们作为.txt文件保存到我的硬盘上。不幸的是,数据的格式有些粗糙。我想创建一个程序,记录每一只手,告诉我我赢了或输了多少。我在下面粘贴了一只手的样本,我想从每只手上提取以下信息Python中的复杂数据提取,python,data-extraction,Python,Data Extraction,我需要一些帮助来启动一个程序。我每周都会参加几场在线扑克比赛。事实证明,我使用的网站记录了手的历史记录,并将它们作为.txt文件保存到我的硬盘上。不幸的是,数据的格式有些粗糙。我想创建一个程序,记录每一只手,告诉我我赢了或输了多少。我在下面粘贴了一只手的样本,我想从每只手上提取以下信息 盲板和赌注。向下滚动示例,可以看到“玩家8有小盲板(250)”,然后是“玩家1有大盲板(500)”。以上提到了每位玩家的赌注“玩家英雄赌注(50)”。在这种情况下,小盲=250,大盲=500,安特=50 我的堆栈
HandNumber = 000001
BigBlind = 500
Ante = 50
Players = 8
StackSize = 17595
Hand = 10c7h
Position = 6 # small blind = 1; add 5 since I'm 5 positions removed
Profit = -50
我的经验水平:我参加了大约6个月的Python开发、数据科学和SQL在线课程。我对类有一些熟悉,但没有太多创建自己的类的经验。我设计了一些程序,帮助使用正则表达式从财务报表中提取数据 通过使用正则表达式分割不同的游戏,然后使用更多正则表达式提取信息,这将是最容易解决的问题。 我会制作一个类来保存所有这些信息。然后您可以使用db或json来存储此信息
def split_file(file_handle):
pat_str = '''\
^Game started at: (?P<game_start>.*?)
(?P<game>.*?)
^------ Summary ------
(?P<summary>.*)
^Game ended at: (?P<game_end>.*)$\
'''
pat = re.compile(pat_str, flags=re.MULTILINE|re.DOTALL)
text = file_handle.read()
for game in pat.finditer(text):
yield game
class Pokergame:
def __init__(self, game_info, playername = 'Hero'):
self.game_start = datetime.datetime.strptime(game_info['game_start'], "%Y/%m/%d %H:%M:%S")
self.game_end = datetime.datetime.strptime(game_info['game_end'], "%Y/%m/%d %H:%M:%S")
self.game_info = _parse_game(game_info['game'], playername)
self.summary = _parse_summary(game_info['summary'], playername)
def _parse_game(game_str, playername):
pattern_seat = f'Seat (\d+): {playername} \((\d+)\).'
seat_match = re.search(pattern=pattern_seat, string=game_str)
if seat_match:
seat, stack = seat_match.groups()
pattern_cards = f'Player {playername} received card: \[(?P<card>\w+)\]'
cards = tuple(i['card'] for i in re.finditer(pattern_cards, game_str))
result = {
'seat': seat,
'stack': stack,
'cards': cards,
'text': game_str,
}
return result
def _parse_summary(summary_str, playername):
return summary_str
games = []
with StringIO(hand_text) as file_handle:
for game_info in split_file(file_handle):
games.append(Pokergame(game_info))
def split_file(file_handle):
pat_str = '''\
^Game started at: (?P<game_start>.*?)
(?P<game>.*?)
^------ Summary ------
(?P<summary>.*)
^Game ended at: (?P<game_end>.*)$\
'''
pat = re.compile(pat_str, flags=re.MULTILINE|re.DOTALL)
text = file_handle.read()
for game in pat.finditer(text):
yield game
class Pokergame:
def __init__(self, game_info, playername = 'Hero'):
self.game_start = datetime.datetime.strptime(game_info['game_start'], "%Y/%m/%d %H:%M:%S")
self.game_end = datetime.datetime.strptime(game_info['game_end'], "%Y/%m/%d %H:%M:%S")
self.game_info = _parse_game(game_info['game'], playername)
self.summary = _parse_summary(game_info['summary'], playername)
def _parse_game(game_str, playername):
pattern_seat = f'Seat (\d+): {playername} \((\d+)\).'
seat_match = re.search(pattern=pattern_seat, string=game_str)
if seat_match:
seat, stack = seat_match.groups()
pattern_cards = f'Player {playername} received card: \[(?P<card>\w+)\]'
cards = tuple(i['card'] for i in re.finditer(pattern_cards, game_str))
result = {
'seat': seat,
'stack': stack,
'cards': cards,
'text': game_str,
}
return result
def _parse_summary(summary_str, playername):
return summary_str
games = []
with StringIO(hand_text) as file_handle:
for game_info in split_file(file_handle):
games.append(Pokergame(game_info))
games[0].game_info
{'cards': ('10c', '7h'),
'seat': '3',
'stack': '17595',
'text': "Game ID: 1094127759 250/500 $5,000 GTD, ...\nPlayer Player4 mucks cards"}