python3 fuzzyfuzzy不返回数组的索引值
我正在尝试修改python3 fuzzyfuzzy不返回数组的索引值,python,arrays,python-2.7,python-3.x,fuzzywuzzy,Python,Arrays,Python 2.7,Python 3.x,Fuzzywuzzy,我正在尝试修改fuzzyfuzzy库。模块进程返回分数和数组元素。但我希望它返回元素的索引以及分数、项目、索引组。 以下是我尝试过的: #!/usr/bin/env python # encoding: utf-8 from fuzzywuzzy import fuzz from fuzzywuzzy import utils import heapq import logging from functools import partial default_scorer = fuzz.WRa
fuzzyfuzzy
库。模块进程返回分数
和数组元素。但我希望它返回元素的索引以及分数、项目、索引组。以下是我尝试过的:
#!/usr/bin/env python
# encoding: utf-8
from fuzzywuzzy import fuzz
from fuzzywuzzy import utils
import heapq
import logging
from functools import partial
default_scorer = fuzz.WRatio
default_processor = utils.full_process
def extractWithoutOrder(query, choices, processor=default_processor, scorer=default_scorer, score_cutoff=0):
def no_process(x):
return x
try:
if choices is None or len(choices) == 0:
raise StopIteration
except TypeError:
pass
if processor is None:
processor = no_process
processed_query = processor(query)
if len(processed_query) == 0:
logging.warning(u"Applied processor reduces input query to empty string, "
"all comparisons will have score 0. "
"[Query: \'{0}\']".format(query))
# Don't run full_process twice
if scorer in [fuzz.WRatio, fuzz.QRatio,
fuzz.token_set_ratio, fuzz.token_sort_ratio,
fuzz.partial_token_set_ratio, fuzz.partial_token_sort_ratio,
fuzz.UWRatio, fuzz.UQRatio] \
and processor == utils.full_process:
processor = no_process
# Only process the query once instead of for every choice
if scorer in [fuzz.UWRatio, fuzz.UQRatio]:
pre_processor = partial(utils.full_process, force_ascii=False)
scorer = partial(scorer, full_process=False)
elif scorer in [fuzz.WRatio, fuzz.QRatio,
fuzz.token_set_ratio, fuzz.token_sort_ratio,
fuzz.partial_token_set_ratio, fuzz.partial_token_sort_ratio]:
pre_processor = partial(utils.full_process, force_ascii=True)
scorer = partial(scorer, full_process=False)
else:
pre_processor = no_process
processed_query = pre_processor(processed_query)
count = -1
try:
# See if choices is a dictionary-like object.
for key, choice in choices.items():
count = count + 1
processed = pre_processor(processor(choice))
score = scorer(processed_query, processed)
if score >= score_cutoff:
yield (choice, score, key,count)
except AttributeError:
# It's a list; just iterate over it.
for choice in choices:
count = count + 1
processed = pre_processor(processor(choice))
score = scorer(processed_query, processed)
if score >= score_cutoff:
yield (choice, score,count)
def extract(query, choices, processor=default_processor, scorer=default_scorer, limit=5):
sl = extractWithoutOrder(query, choices, processor, scorer)
return heapq.nlargest(limit, sl, key=lambda i: i[1]) if limit is not None else \
sorted(sl, key=lambda i: i[1], reverse=True)
当我尝试实现它时,结果是FuzzyWzzy之前显示的结果
import process as p
box=['ness', 'apple','banana','carrot','duck','eagle','fish','gate','hitler']
p.extract('b',box)
[('banana', 90), ('apple', 0), ('carrot', 0), ('duck', 0), ('eagle', 0)]
但我期待它的回报是:
[('banana', 90, 2), ('apple', 0, 1), ('carrot', 0, 3), ('duck', 0, 4), ('eagle', 0, 5)]
[('banana', 90, 2), ('apple', 0, 1), ('carrot', 0, 3), ('duck', 0, 4), ('eagle', 0, 5)]
请让我知道建议。对于那些寻找答案的人,可以将字典传递给流程
from fuzzywuzzy import fuzz
from fuzzywuzzy import process
box = ['apple','banana','carrot','duck','eagle']
box_dict = {i: val for i, val in enumerate(box)}
process.extract("b", box_dict, scorer=fuzz.WRatio)
# O/P -> [("banana", 90, 1), ('apple', 0, 0), ('carrot', 0, 2), ('duck', 0, 3), ('eagle', 0, 4)]
作为fuzzyfuzzy的替代方法,您可以使用(我是作者)返回索引: 来自rapidfuzz导入过程的
盒子=[“尼斯”,“苹果”,“香蕉”,“胡萝卜”,“鸭子”,“鹰”,“鱼”,“门”,“希特勒]]
p、 摘录('b',方框)
返回