Python 3.x 在docx文件中搜索一个单词,并将该文件复制到关键字文件夹中
我正在做一项任务,其中我有一个文本文件,其中包含特定的关键字,如C、Angular、Python、Rest、MySQL,并且必须创建文件夹。在此之后,我必须在简历中搜索这些关键字,如果找到了,将其复制到每个文件夹中 例如,A拥有C和Angular技能,因此他/她的简历将同时放在这两个文件夹中 我已完成文件夹创建,我需要帮助在.docx文件中搜索单词并复制到请求的文件夹。我已在网上查找资料,但无法继续。有谁能给我一些如何在文档中搜索单词/字符串的线索吗。文件 这是我的密码: 文件夹创建 以及阅读docx 希望这有助于:Python 3.x 在docx文件中搜索一个单词,并将该文件复制到关键字文件夹中,python-3.x,python-2.7,python-docx,Python 3.x,Python 2.7,Python Docx,我正在做一项任务,其中我有一个文本文件,其中包含特定的关键字,如C、Angular、Python、Rest、MySQL,并且必须创建文件夹。在此之后,我必须在简历中搜索这些关键字,如果找到了,将其复制到每个文件夹中 例如,A拥有C和Angular技能,因此他/她的简历将同时放在这两个文件夹中 我已完成文件夹创建,我需要帮助在.docx文件中搜索单词并复制到请求的文件夹。我已在网上查找资料,但无法继续。有谁能给我一些如何在文档中搜索单词/字符串的线索吗。文件 这是我的密码: 文件夹创建 以及阅读d
from docx import Document
from shutil import copyfile
import os, re, random
# Folder which contains all the resumes
ALL_RESUMES = "all_resumes/"
# The folder which will contain the separated resumes
SEGREGATED_RESUMES = "topic_wise_resumes/"
def get_keywords(keywords_file, create_new = False):
"""
Get all keywords from file keywords_file. We get all keywords in lower case to remove confusion down the line.
"""
fileOpen = open(keywords_file, "r")
words = [x.strip().lower() for x in fileOpen.readline().split(',')]
keywords = []
for keyword in words:
keywords.append(keyword)
if(not(os.path.isdir(SEGREGATED_RESUMES))):
os.makedirs(SEGREGATED_RESUMES + keyword)
return keywords
def segregate_resumes(keywords):
"""
Copy the resumes to the appropriate folders
"""
# The pattern for regex match
keyword_pattern = "|".join(keywords)
# All resumes
for filename in os.listdir(ALL_RESUMES):
# basic sanity check
if filename.endswith(".docx"):
document = Document(ALL_RESUMES + filename)
all_texts = []
for p in document.paragraphs:
all_texts.append(p.text)
# The entire text in the resume in lowercase
all_words_in_resume = " ".join(all_texts).lower()
# The matching keywords
matches = re.findall(keyword_pattern, all_words_in_resume)
# Copy the resume to the keyword folder
for match in matches:
copyfile(ALL_RESUMES + filename, SEGREGATED_RESUMES + match + "/" + filename)
def create_sample_resumes(keywords, num = 5):
"""
Function to create sample resumes for testing
"""
for i in range(num):
document = Document()
document.add_heading('RESUME{}'.format(i))
skills_ = random.sample(keywords, 2)
document.add_paragraph("I have skills - {} and {}".format(*skills_))
document.save(ALL_RESUMES + "resume{}.docx".format(i))
keywords = get_keywords("ResumeKeyword.txt")
print(keywords)
# create_sample_resumes(keywords)
segregate_resumes(keywords)
from docx import Document
document = Document('A.docx')
word = "Angular"
for x in document.paragraphs:
print(x.text)
from docx import Document
from shutil import copyfile
import os, re, random
# Folder which contains all the resumes
ALL_RESUMES = "all_resumes/"
# The folder which will contain the separated resumes
SEGREGATED_RESUMES = "topic_wise_resumes/"
def get_keywords(keywords_file, create_new = False):
"""
Get all keywords from file keywords_file. We get all keywords in lower case to remove confusion down the line.
"""
fileOpen = open(keywords_file, "r")
words = [x.strip().lower() for x in fileOpen.readline().split(',')]
keywords = []
for keyword in words:
keywords.append(keyword)
if(not(os.path.isdir(SEGREGATED_RESUMES))):
os.makedirs(SEGREGATED_RESUMES + keyword)
return keywords
def segregate_resumes(keywords):
"""
Copy the resumes to the appropriate folders
"""
# The pattern for regex match
keyword_pattern = "|".join(keywords)
# All resumes
for filename in os.listdir(ALL_RESUMES):
# basic sanity check
if filename.endswith(".docx"):
document = Document(ALL_RESUMES + filename)
all_texts = []
for p in document.paragraphs:
all_texts.append(p.text)
# The entire text in the resume in lowercase
all_words_in_resume = " ".join(all_texts).lower()
# The matching keywords
matches = re.findall(keyword_pattern, all_words_in_resume)
# Copy the resume to the keyword folder
for match in matches:
copyfile(ALL_RESUMES + filename, SEGREGATED_RESUMES + match + "/" + filename)
def create_sample_resumes(keywords, num = 5):
"""
Function to create sample resumes for testing
"""
for i in range(num):
document = Document()
document.add_heading('RESUME{}'.format(i))
skills_ = random.sample(keywords, 2)
document.add_paragraph("I have skills - {} and {}".format(*skills_))
document.save(ALL_RESUMES + "resume{}.docx".format(i))
keywords = get_keywords("ResumeKeyword.txt")
print(keywords)
# create_sample_resumes(keywords)
segregate_resumes(keywords)