Python 如何通过排除库来修改代码_Python_Python 3.x

Python 如何通过排除库来修改代码

python python-3.x

Python 如何通过排除库来修改代码,python,python-3.x,Python,Python 3.x,我正在编写一个程序来执行以下操作：读取给定的文件名，并打印统计信息的快速摘要打印字长频率表和图表打印字长频率图。打印空白线路打印相对频率的图形表示每个单词的长度以下是用于测试代码的文本文件数据： This is before the start and should be ignored. So should this and this *** START OF SYNTHETIC TEST CASE *** a blah ba ba *** END OF SYNTHETIC

我正在编写一个程序来执行以下操作：

读取给定的文件名，并打印统计信息的快速摘要

打印字长频率表和图表

打印字长频率图。打印空白线路

打印相对频率的图形表示每个单词的长度

以下是用于测试代码的文本文件数据：

This is before the start and should be ignored.
So should this
and this


*** START OF SYNTHETIC TEST CASE ***
a blah ba ba
*** END OF SYNTHETIC TEST CASE ***

This is after the end and should be ignored too.
Have a nice day.

以下是我目前的代码：

import os
from collections import Counter

TABLE_TITLE = " Len  Freq"
FREQ_TABLE_TEMPLATE = "{:>4}{:>6}"

GRAPH_TITLE = " Len  Freq Graph"
GRAPH_LINE_TEMPLATE = "{:>4}{:>5}% {}"


def get_filename():
    filename = input("Please enter filename: ")
    while not os.path.isfile(filename):
        print(filename, "not found...")
        filename = input("Please enter filename: ")
    return filename

def get_words_from_file(filename):
    lines = open_and_read(filename)
    stripped = strip_data(lines)

    return stripped    

def open_and_read(filename):
    should_add = False
    processed_data = []

    infile = open(filename, 'r', encoding='utf-8')
    raw_data = infile.readlines()
    for line in raw_data:
        if line.startswith("*** START"):
            should_add = True
        elif line.startswith("*** END OF"):
            should_add = False
            break
        if should_add:
            processed_data.append(line)
    processed_data.pop(0)

    return processed_data      


def strip_data(raw_data):
    stripped_list = get_words(raw_data)
    processed_data = remove_punctuation(stripped_list)  

    return processed_data


def get_words(raw_data):
    """
    Takes a list, raw_data, splits and strips words. 
    returns a list stripped_list
    """

    stripped_list = []
    for word in raw_data:
        word = word.strip('\n"-:\';,.').split(' ')
        for bit in word:
            bit = bit.strip('\n"-:\';,.').split(' ')
            stripped_list.append(bit)    

    return stripped_list

def remove_punctuation(stripped_list):
    """
    Takes a list, stripped_list, removes the all non alpha words.
    returns a list, processed_data
    """
    processed_data = []

    for piece in stripped_list:
        for chunk in piece:
            if chunk.isalpha():
                chunk = chunk.lower()
                processed_data.append(chunk)
    return processed_data    


def avg_word_length(words):
    """
    Takes a list, words and counts the average length of the words in the list.
    Returns list average_leng
    """
    sum_lengths = 0
    for word in words:
        sum_lengths += len(word)
    average_leng = sum_lengths / len(words)
    return average_leng

def max_word_length(words):
    """Returns the length of the longest word in the list of words.
    Or 0 if there are no words in the list.
    """
    if len(words) > 0:
        max_length = len(words[0])
        for word in words:
            length = len(word)
            if length > max_length:
                max_length = length
    else:
        max_length = 0
    return max_length


def max_frequency(words):
    count = Counter(words).most_common(1)
    freq_count = count[0][1]

    return freq_count


def length_freq(words):
    """
    takes a list(words), and counts the amount of times the frequecny of a word appears
    Returns a list of the frequecny of a words length(len_freq)
    """

    words_length = [len(word) for word in words]
    len_freq = Counter(words_length).most_common()

    for i in range(1, max(words_length)):    #gets the first value of the tuple   
        test_set = [len_freq[x][0] for x in range(len(len_freq))] #and checks if already in the set      
        if i not in test_set: #if not adds it as a tuple (i,0)
            len_freq.append((i, 0))

    return len_freq


def print_length_table(words):
    freq_dict = length_freq(words)

    print()
    print(TABLE_TITLE)
    for pair in sorted(freq_dict):
        print(FREQ_TABLE_TEMPLATE.format(pair[0], pair[1]))


def print_length_graph_hori(words):

    print()    
    print(GRAPH_TITLE)
    relative_freq = get_percentage(words)
    for i in range(len(relative_freq)):
        number = relative_freq[i][0]
        percent = relative_freq[i][1]
        graph_line = "=" * percent
        print(GRAPH_LINE_TEMPLATE.format(number, percent, graph_line))


def get_percentage(words):
    """
    Returns a sorted list (relative_freq)
    """
    lengths = length_freq(words)
    relative_freq = []    

    for value in lengths:
        percentage = int(value[1] / len(words) * 100)
        relative_freq.append((value[0], percentage))
    relative_freq = sorted(relative_freq)
    return relative_freq

def print_length_graph_vert(words):
    relative_freq = get_percentage(words)
    bars = [percent[1] for percent in relative_freq]
    next_10 = to_next_10(bars)

    print("\n% frequency")
    for percentage in range(next_10, 0, -1):
        if percentage < 10:
            print("  {}  ".format(percentage), end="")
        else:
            print(" {}  ".format(percentage), end="")

        for point in bars:
            if int(point) >= percentage:
                print(" ** ", end="")
            else:
                print(" " * 4, end="")

        print()
    print(" " * 5, end="")
    for i in range(len(relative_freq)):
        if i < 9:
            print(" 0{} ".format(i + 1), end="")
        else:
            print(" {} ".format(i + 1), end="")
    print("\n" + " " * (len(relative_freq) * 4 - 7) + "word length")


def to_next_10(bars):
    """
    Takes a list(bars)
    Maps the value of bars to a new list(bars_sort) and rounds to nearest 10
    Returns int(next_10)
    """

    bars_sort = bars[:]
    bars_sort = sorted(bars_sort)
    next_10 = bars_sort[-1]
    is_not_x10 = True
    while is_not_x10:
        next_10 += 1
        if next_10 % 10 == 0:
            is_not_x10 = False
    return next_10


def print_results(words):
    average_length = avg_word_length(words)
    max_length = max_word_length(words)
    max_freq = max_frequency(words)

    print()
    print("Word summary (all words):")
    print(" Number of words = {}".format(len(words)))
    print(" Avg word length = {:.2f}".format(average_length))
    print(" Max word length = {}".format(max_length))
    print(" Max frequency = {}".format(max_freq))
    print_length_table(words)
    print_length_graph_hori(words)
    print_length_graph_vert(words)


def main():
    """ Gets the job done """


    text = get_filename()
    print(" {} loaded ok.".format(text))
    words = get_words_from_file(text)
    print_results(words)


main()

我现在需要更改代码以强制执行以下规则：

我只能导入

re

和

os

库。没有其他图书馆

代码现在必须使用模式

“[a-z]+[-'][a-z]+|[a-z]+[']？|[a-z]+”

如果不允许使用“collections”模块，您可以自己重新实现计数器类的位（至少是您正在使用的部分）（这将是init（）方法和最常见的（）方法）

我不明白正则表达式应该用来做什么

编辑：好的，这是一个collections.Counter的脑死亡实现

class MyCounter(object):
    def __init__(self, iterable):
        """
        initialize a counter object with something iterable
        """
        self._data = dict()

        # set up a dictionary that counts how many of each item we have
        for item in iterable:
            try:
                self._data[item] += 1
            except KeyError:
                self._data[item] = 1

    def most_common(self, n=None):
        """
        return the most common items from the object, along with their count.
        If n=None, return the whole list
        """
        # build a list of counts
        list_of_counts = self._data.items()

        # sort the list in descending order. Ordinarily, we would use sorted()
        # along with operator.itemgetter, but since we are not allowed to use
        # anything but re and os, we can just do a selection sort.
        for i in range(len(list_of_counts)):
            for j in range(i+1, len(list_of_counts)):
                if list_of_counts[i] > list_of_counts[j]:
                    temp = list_of_counts[j]
                    list_of_counts[j] = list_of_counts[i]
                    list_of_counts[i] = temp

        # return what is needed.             
        if n is None:
            return list_of_counts

        return list_of_counts[:n]

##############################################################################
## the code from here down is not part of the solution, it is proof that the
## solution works
import unittest
from collections import Counter

class MyCounterTest(unittest.TestCase):
    def test_single_most_common(self):
        """
        check when we have a single most-common value
        """
        # illustrate the behavior of collections.Counter
        system_counter = Counter(['a','a','b','c'])
        system_common = system_counter.most_common(n=1)[0]
        self.assertEqual(system_common[0], 'a')
        self.assertEqual(system_common[1], 2)

        # confirm we get the same results from our Counter
        my_counter = MyCounter(['a','a','b','c'])
        my_common = my_counter.most_common(n=1)[0]
        self.assertEqual(my_common[0], 'a')
        self.assertEqual(my_common[1], 2)

    def test_with_none(self):
        system_counter = Counter(['a','a','b','c'])
        self.assertEqual(len(system_counter.most_common()), 3)

        my_counter = MyCounter(['a','a','b','c'])
        self.assertEqual(len(my_counter.most_common()), 3)

if __name__ == '__main__':
    unittest.main()

你在上面看到的是我的代码。我不知道如何模拟库函数，因此我提出了这个问题。如果您没有注意到，我会将其放在标题下：

这是我目前的代码：

阅读以选择表达式所示的单词

class MyCounter(object):
    def __init__(self, iterable):
        """
        initialize a counter object with something iterable
        """
        self._data = dict()

        # set up a dictionary that counts how many of each item we have
        for item in iterable:
            try:
                self._data[item] += 1
            except KeyError:
                self._data[item] = 1

    def most_common(self, n=None):
        """
        return the most common items from the object, along with their count.
        If n=None, return the whole list
        """
        # build a list of counts
        list_of_counts = self._data.items()

        # sort the list in descending order. Ordinarily, we would use sorted()
        # along with operator.itemgetter, but since we are not allowed to use
        # anything but re and os, we can just do a selection sort.
        for i in range(len(list_of_counts)):
            for j in range(i+1, len(list_of_counts)):
                if list_of_counts[i] > list_of_counts[j]:
                    temp = list_of_counts[j]
                    list_of_counts[j] = list_of_counts[i]
                    list_of_counts[i] = temp

        # return what is needed.             
        if n is None:
            return list_of_counts

        return list_of_counts[:n]

##############################################################################
## the code from here down is not part of the solution, it is proof that the
## solution works
import unittest
from collections import Counter

class MyCounterTest(unittest.TestCase):
    def test_single_most_common(self):
        """
        check when we have a single most-common value
        """
        # illustrate the behavior of collections.Counter
        system_counter = Counter(['a','a','b','c'])
        system_common = system_counter.most_common(n=1)[0]
        self.assertEqual(system_common[0], 'a')
        self.assertEqual(system_common[1], 2)

        # confirm we get the same results from our Counter
        my_counter = MyCounter(['a','a','b','c'])
        my_common = my_counter.most_common(n=1)[0]
        self.assertEqual(my_common[0], 'a')
        self.assertEqual(my_common[1], 2)

    def test_with_none(self):
        system_counter = Counter(['a','a','b','c'])
        self.assertEqual(len(system_counter.most_common()), 3)

        my_counter = MyCounter(['a','a','b','c'])
        self.assertEqual(len(my_counter.most_common()), 3)

if __name__ == '__main__':
    unittest.main()