Python 前导字长

Python 前导字长,python,Python,我必须创建一个函数,该函数接受单个参数word,并返回文本中word前面的单词的平均长度(以字符为单位)。如果单词恰好是文本中出现的第一个单词,则该出现的前一个单词的长度应为零。比如说 >>> average_length("the") 4.4 >>> average_length('whale') False average_length('ship.') 3.0


>>> average_length("the")
>>> average_length('whale')

def average_length(word):
    text = "Call me Ishmael. Some years ago - never mind how long..........."
    words = text.split()
    wordCount = len(words)

    Sum = 0
    for word in words:
        ch = len(word)
        Sum = Sum + ch
    avg = Sum/wordCount
    return avg




from collections import defaultdict
prec = defaultdict(list)
text = "Call me Ishmael. Some years ago..".split()

first, second = iter(text), iter(text)
for one, two in zip(first, second):  # pairwise

# avg_prec_len = {key: sum(prec[key]) / len(prec[key]) for key in prec}
avg_prec_len = {}
for key in prec:
    # prec[key] is a list of lengths
    avg[key] = sum(prec[key]) / len(prec[key])

def match_previous(lst, word):
    # keep matches_count of how many times we find a match and total lengths
    matches_count = total_length_sum = 0.0
    # pull first element from list to use as preceding word
    previous_word = lst[0]
    # slice rest of words from the list 
    # so we always compare two consecutive words
    rest_of_words = lst[1:]
    # catch where first word is "word" and add 1 to matches_count
    if previous_word == word:
        matches_count += 1
    for current_word in rest_of_words:
        # if the current word matches our "word"
        # add length of previous word to total_length_sum
        # and increase matches_count.
        if word == current_word:
            total_length_sum += len(previous_word)
            matches_count += 1
        # always update to keep track of word just seen
        previous_word = current_word
    # if  matches_count is 0 we found no word in the text that matched "word"
    return total_length_sum / matches_count if matches_count else False

In [41]: text = "Call me Ishmael. Some years ago - never mind how long precisely - having little or no money in my purse, and nothing particular to interest me on shore, I thought I would sail about a little and see the watery part of the world. It is a way I have of driving off the spleen and regulating the circulation. Whenever I find myself growing grim about the mouth; whenever it is a damp, drizzly November in my soul; whenever I find myself involuntarily pausing before coffin warehouses, and bringing up the rear of every funeral I meet; and especially whenever my hypos get such an upper hand of me, that it requires a strong moral principle to previous_wordent me from deliberately stepping into the street, and methodically knocking people's hats off - then, I acmatches_count it high time to get to sea as soon as I can. This is my substitute for pistol and ball. With a philosophical flourish Cato throws himself upon his sword; I quietly take to the ship. There is nothing surprising in this. If they but knew it, almost all men in their degree, some time or other, cherish very nearly the same feelings towards the ocean with me."

In [42]: match_previous(text.split(),"the")
Out[42]: 4.4

In [43]: match_previous(text.split(),"ship.")
Out[43]: 3.0

In [44]: match_previous(text.split(),"whale")
Out[44]: False

In [45]: match_previous(text.split(),"Call")
Out[45]: 0.0


def match_previous(lst, word):
    matches_count = total_length_sum = 0.0
    previous_word = lst[0]
    rest_of_words = lst[1:]
    if previous_word == word:
        print("First word matches.")
        matches_count += 1
    for ind, current_word in enumerate(rest_of_words, 1):
        print("On iteration {}.\nprevious_word = {} and current_word = {}.".format(ind, previous_word, current_word))
        if word == current_word:
            total_length_sum += len(previous_word)
            matches_count += 1
            print("We found a match at index {} in our list of words.".format(ind-1))
        print("Updating previous_word from {} to {}.".format(previous_word, current_word))
        previous_word = current_word
    return total_length_sum / matches_count if matches_count else False

In [59]: match_previous(["bar","foo","foobar","hello", "world","bar"],"bar")
First word matches.
On iteration 1.
previous_word = bar and current_word = foo.
Updating previous_word from bar to foo.
On iteration 2.
previous_word = foo and current_word = foobar.
Updating previous_word from foo to foobar.
On iteration 3.
previous_word = foobar and current_word = hello.
Updating previous_word from foobar to hello.
On iteration 4.
previous_word = hello and current_word = world.
Updating previous_word from hello to world.
On iteration 5.
previous_word = world and current_word = bar.
We found a match at index 4 in our list of words.
Updating previous_word from world to bar.
Out[59]: 2.5

def match_previous(lst, word):
    matches_count = total_length_sum = 0.0
    # create an iterator
    _iterator = iter(lst)
    # pull first word from iterator
    previous_word = next(_iterator)
    if previous_word == word:
        matches_count += 1
    # _iterator will give us all bar the first word we consumed with  next(_iterator)
    for current_word in _iterator:

In [61]: l = [1,2,3,4]

In [62]: it = iter(l)

In [63]: next(it)
Out[63]: 1

In [64]: next(it)
Out[64]: 2
# consumed two of four so we are left with two
In [65]: list(it)
Out[65]: [3, 4]



In [70]: for tup in match_previous_generator("the","Call", "whale", "ship."):
   ....:     print(tup)
('the', 4.4)
('Call', 0.0)
('whale', False)
('ship.', 3.0)


from __future__ import division  # int / int should result in float

# Input data:
text = "Lorem ipsum dolor sit amet dolor ..."
word = "dolor"

# First of all, let's extract words from string
words = text.split()

# Find indices of picked word in words
indices = [i for i, some_word in enumerate(words) if some_word == word]

# Find indices of preceding words
preceding_indices = [i-1 for i in indices]

# Find preceding words, handle first word case
preceding_words = [words[i] if i != -1 else "" for i in preceding_indices]

# Calculate mean of words length
mean = sum(len(w) for w in preceding_words) / len(preceding_words)

# Check if result is correct
# (len('ipsum') + len('amet')) / 2 = 9 / 2 = 4.5
assert mean == 4.5

def mean_length_of_preceding_words(word, text):
    words = text.split()
    indices = [i for i, some_word in enumerate(words) if some_word == word]
    preceding_indices = [i-1 for i in indices]
    preceding_words = [words[i] if i != -1 else "" for i in preceding_indices]
    mean = sum(len(w) for w in preceding_words) / len(preceding_words)
    return mean
assert mean_length_of_preceding_words("Lorem", "Lorem ipsum dolor sit amet dolor ...") == 0.0
assert mean_length_of_preceding_words("dolor", "Lorem ipsum dolor sit amet dolor ...") == 4.5
mean_length_of_preceding_words("E", "A B C D")  # ZeroDivisionError - average length of zero words does not exist


def mean_length_of_preceding_words(word, text):
    words = text.split()
    if word not in words:
        return False
    indices = [i for i, some_word in enumerate(words) if some_word == word]
    preceding_indices = [i-1 for i in indices]
    preceding_words = [words[i] if i != -1 else "" for i in preceding_indices]
    mean = sum(len(w) for w in preceding_words) / len(preceding_words)
    return mean

assert mean_length_of_preceding_words("E", "A B C D") is False




def average_length(text, word):
    words = ['']+[w.strip(''',.?!'":''') for w in text.split() if w != '-']
    if word not in words: return False
    match = [len(prev) for prev, curr in zip(words[:-1],words[1:]) if curr==word]
    return 1.0*sum(match)/len(match)

