Warning: file_get_contents(/data/phpspider/zhask/data//catemap/8/python-3.x/19.json): failed to open stream: No such file or directory in /data/phpspider/zhask/libs/function.php on line 167

Warning: Invalid argument supplied for foreach() in /data/phpspider/zhask/libs/tag.function.php on line 1116

Notice: Undefined index: in /data/phpspider/zhask/libs/function.php on line 180

Warning: array_chunk() expects parameter 1 to be array, null given in /data/phpspider/zhask/libs/function.php on line 181
Python 如何通过排除库来修改代码_Python_Python 3.x - Fatal编程技术网

Python 如何通过排除库来修改代码

Python 如何通过排除库来修改代码,python,python-3.x,Python,Python 3.x,我正在编写一个程序来执行以下操作: 读取给定的文件名,并打印统计信息的快速摘要 打印字长频率表和图表 打印字长频率图。打印空白 线路 打印相对频率的图形表示 每个单词的长度 以下是用于测试代码的文本文件数据: This is before the start and should be ignored. So should this and this *** START OF SYNTHETIC TEST CASE *** a blah ba ba *** END OF SYNTHETIC

我正在编写一个程序来执行以下操作:

  • 读取给定的文件名,并打印统计信息的快速摘要
  • 打印字长频率表和图表
  • 打印字长频率图。打印空白 线路
  • 打印相对频率的图形表示 每个单词的长度
  • 以下是用于测试代码的文本文件数据:

    This is before the start and should be ignored.
    So should this
    and this
    
    
    *** START OF SYNTHETIC TEST CASE ***
    a blah ba ba
    *** END OF SYNTHETIC TEST CASE ***
    
    This is after the end and should be ignored too.
    Have a nice day.
    
    以下是我目前的代码:

    import os
    from collections import Counter
    
    TABLE_TITLE = " Len  Freq"
    FREQ_TABLE_TEMPLATE = "{:>4}{:>6}"
    
    GRAPH_TITLE = " Len  Freq Graph"
    GRAPH_LINE_TEMPLATE = "{:>4}{:>5}% {}"
    
    
    def get_filename():
        filename = input("Please enter filename: ")
        while not os.path.isfile(filename):
            print(filename, "not found...")
            filename = input("Please enter filename: ")
        return filename
    
    def get_words_from_file(filename):
        lines = open_and_read(filename)
        stripped = strip_data(lines)
    
        return stripped    
    
    def open_and_read(filename):
        should_add = False
        processed_data = []
    
        infile = open(filename, 'r', encoding='utf-8')
        raw_data = infile.readlines()
        for line in raw_data:
            if line.startswith("*** START"):
                should_add = True
            elif line.startswith("*** END OF"):
                should_add = False
                break
            if should_add:
                processed_data.append(line)
        processed_data.pop(0)
    
        return processed_data      
    
    
    def strip_data(raw_data):
        stripped_list = get_words(raw_data)
        processed_data = remove_punctuation(stripped_list)  
    
        return processed_data
    
    
    def get_words(raw_data):
        """
        Takes a list, raw_data, splits and strips words. 
        returns a list stripped_list
        """
    
        stripped_list = []
        for word in raw_data:
            word = word.strip('\n"-:\';,.').split(' ')
            for bit in word:
                bit = bit.strip('\n"-:\';,.').split(' ')
                stripped_list.append(bit)    
    
        return stripped_list
    
    def remove_punctuation(stripped_list):
        """
        Takes a list, stripped_list, removes the all non alpha words.
        returns a list, processed_data
        """
        processed_data = []
    
        for piece in stripped_list:
            for chunk in piece:
                if chunk.isalpha():
                    chunk = chunk.lower()
                    processed_data.append(chunk)
        return processed_data    
    
    
    def avg_word_length(words):
        """
        Takes a list, words and counts the average length of the words in the list.
        Returns list average_leng
        """
        sum_lengths = 0
        for word in words:
            sum_lengths += len(word)
        average_leng = sum_lengths / len(words)
        return average_leng
    
    def max_word_length(words):
        """Returns the length of the longest word in the list of words.
        Or 0 if there are no words in the list.
        """
        if len(words) > 0:
            max_length = len(words[0])
            for word in words:
                length = len(word)
                if length > max_length:
                    max_length = length
        else:
            max_length = 0
        return max_length
    
    
    def max_frequency(words):
        count = Counter(words).most_common(1)
        freq_count = count[0][1]
    
        return freq_count
    
    
    def length_freq(words):
        """
        takes a list(words), and counts the amount of times the frequecny of a word appears
        Returns a list of the frequecny of a words length(len_freq)
        """
    
        words_length = [len(word) for word in words]
        len_freq = Counter(words_length).most_common()
    
        for i in range(1, max(words_length)):    #gets the first value of the tuple   
            test_set = [len_freq[x][0] for x in range(len(len_freq))] #and checks if already in the set      
            if i not in test_set: #if not adds it as a tuple (i,0)
                len_freq.append((i, 0))
    
        return len_freq
    
    
    def print_length_table(words):
        freq_dict = length_freq(words)
    
        print()
        print(TABLE_TITLE)
        for pair in sorted(freq_dict):
            print(FREQ_TABLE_TEMPLATE.format(pair[0], pair[1]))
    
    
    def print_length_graph_hori(words):
    
        print()    
        print(GRAPH_TITLE)
        relative_freq = get_percentage(words)
        for i in range(len(relative_freq)):
            number = relative_freq[i][0]
            percent = relative_freq[i][1]
            graph_line = "=" * percent
            print(GRAPH_LINE_TEMPLATE.format(number, percent, graph_line))
    
    
    def get_percentage(words):
        """
        Returns a sorted list (relative_freq)
        """
        lengths = length_freq(words)
        relative_freq = []    
    
        for value in lengths:
            percentage = int(value[1] / len(words) * 100)
            relative_freq.append((value[0], percentage))
        relative_freq = sorted(relative_freq)
        return relative_freq
    
    def print_length_graph_vert(words):
        relative_freq = get_percentage(words)
        bars = [percent[1] for percent in relative_freq]
        next_10 = to_next_10(bars)
    
        print("\n% frequency")
        for percentage in range(next_10, 0, -1):
            if percentage < 10:
                print("  {}  ".format(percentage), end="")
            else:
                print(" {}  ".format(percentage), end="")
    
            for point in bars:
                if int(point) >= percentage:
                    print(" ** ", end="")
                else:
                    print(" " * 4, end="")
    
            print()
        print(" " * 5, end="")
        for i in range(len(relative_freq)):
            if i < 9:
                print(" 0{} ".format(i + 1), end="")
            else:
                print(" {} ".format(i + 1), end="")
        print("\n" + " " * (len(relative_freq) * 4 - 7) + "word length")
    
    
    def to_next_10(bars):
        """
        Takes a list(bars)
        Maps the value of bars to a new list(bars_sort) and rounds to nearest 10
        Returns int(next_10)
        """
    
        bars_sort = bars[:]
        bars_sort = sorted(bars_sort)
        next_10 = bars_sort[-1]
        is_not_x10 = True
        while is_not_x10:
            next_10 += 1
            if next_10 % 10 == 0:
                is_not_x10 = False
        return next_10
    
    
    def print_results(words):
        average_length = avg_word_length(words)
        max_length = max_word_length(words)
        max_freq = max_frequency(words)
    
        print()
        print("Word summary (all words):")
        print(" Number of words = {}".format(len(words)))
        print(" Avg word length = {:.2f}".format(average_length))
        print(" Max word length = {}".format(max_length))
        print(" Max frequency = {}".format(max_freq))
        print_length_table(words)
        print_length_graph_hori(words)
        print_length_graph_vert(words)
    
    
    def main():
        """ Gets the job done """
    
    
        text = get_filename()
        print(" {} loaded ok.".format(text))
        words = get_words_from_file(text)
        print_results(words)
    
    
    main()
    
    我现在需要更改代码以强制执行以下规则:

  • 我只能导入
    re
    os
    库。没有其他图书馆
  • 代码现在必须使用模式
    “[a-z]+[-'][a-z]+|[a-z]+[']?|[a-z]+”

  • 如果不允许使用“collections”模块,您可以自己重新实现计数器类的位(至少是您正在使用的部分)(这将是init()方法和最常见的()方法)

    我不明白正则表达式应该用来做什么

    编辑:好的,这是一个collections.Counter的脑死亡实现

    class MyCounter(object):
        def __init__(self, iterable):
            """
            initialize a counter object with something iterable
            """
            self._data = dict()
    
            # set up a dictionary that counts how many of each item we have
            for item in iterable:
                try:
                    self._data[item] += 1
                except KeyError:
                    self._data[item] = 1
    
        def most_common(self, n=None):
            """
            return the most common items from the object, along with their count.
            If n=None, return the whole list
            """
            # build a list of counts
            list_of_counts = self._data.items()
    
            # sort the list in descending order. Ordinarily, we would use sorted()
            # along with operator.itemgetter, but since we are not allowed to use
            # anything but re and os, we can just do a selection sort.
            for i in range(len(list_of_counts)):
                for j in range(i+1, len(list_of_counts)):
                    if list_of_counts[i] > list_of_counts[j]:
                        temp = list_of_counts[j]
                        list_of_counts[j] = list_of_counts[i]
                        list_of_counts[i] = temp
    
            # return what is needed.             
            if n is None:
                return list_of_counts
    
            return list_of_counts[:n]
    
    ##############################################################################
    ## the code from here down is not part of the solution, it is proof that the
    ## solution works
    import unittest
    from collections import Counter
    
    class MyCounterTest(unittest.TestCase):
        def test_single_most_common(self):
            """
            check when we have a single most-common value
            """
            # illustrate the behavior of collections.Counter
            system_counter = Counter(['a','a','b','c'])
            system_common = system_counter.most_common(n=1)[0]
            self.assertEqual(system_common[0], 'a')
            self.assertEqual(system_common[1], 2)
    
            # confirm we get the same results from our Counter
            my_counter = MyCounter(['a','a','b','c'])
            my_common = my_counter.most_common(n=1)[0]
            self.assertEqual(my_common[0], 'a')
            self.assertEqual(my_common[1], 2)
    
        def test_with_none(self):
            system_counter = Counter(['a','a','b','c'])
            self.assertEqual(len(system_counter.most_common()), 3)
    
            my_counter = MyCounter(['a','a','b','c'])
            self.assertEqual(len(my_counter.most_common()), 3)
    
    if __name__ == '__main__':
        unittest.main()
    

    你在上面看到的是我的代码。我不知道如何模拟库函数,因此我提出了这个问题。如果您没有注意到,我会将其放在标题下:
    这是我目前的代码:
    阅读以选择表达式所示的单词
    class MyCounter(object):
        def __init__(self, iterable):
            """
            initialize a counter object with something iterable
            """
            self._data = dict()
    
            # set up a dictionary that counts how many of each item we have
            for item in iterable:
                try:
                    self._data[item] += 1
                except KeyError:
                    self._data[item] = 1
    
        def most_common(self, n=None):
            """
            return the most common items from the object, along with their count.
            If n=None, return the whole list
            """
            # build a list of counts
            list_of_counts = self._data.items()
    
            # sort the list in descending order. Ordinarily, we would use sorted()
            # along with operator.itemgetter, but since we are not allowed to use
            # anything but re and os, we can just do a selection sort.
            for i in range(len(list_of_counts)):
                for j in range(i+1, len(list_of_counts)):
                    if list_of_counts[i] > list_of_counts[j]:
                        temp = list_of_counts[j]
                        list_of_counts[j] = list_of_counts[i]
                        list_of_counts[i] = temp
    
            # return what is needed.             
            if n is None:
                return list_of_counts
    
            return list_of_counts[:n]
    
    ##############################################################################
    ## the code from here down is not part of the solution, it is proof that the
    ## solution works
    import unittest
    from collections import Counter
    
    class MyCounterTest(unittest.TestCase):
        def test_single_most_common(self):
            """
            check when we have a single most-common value
            """
            # illustrate the behavior of collections.Counter
            system_counter = Counter(['a','a','b','c'])
            system_common = system_counter.most_common(n=1)[0]
            self.assertEqual(system_common[0], 'a')
            self.assertEqual(system_common[1], 2)
    
            # confirm we get the same results from our Counter
            my_counter = MyCounter(['a','a','b','c'])
            my_common = my_counter.most_common(n=1)[0]
            self.assertEqual(my_common[0], 'a')
            self.assertEqual(my_common[1], 2)
    
        def test_with_none(self):
            system_counter = Counter(['a','a','b','c'])
            self.assertEqual(len(system_counter.most_common()), 3)
    
            my_counter = MyCounter(['a','a','b','c'])
            self.assertEqual(len(my_counter.most_common()), 3)
    
    if __name__ == '__main__':
        unittest.main()