Python 关于哈夫曼树编码（调试）_Python_Huffman Code

Python 关于哈夫曼树编码（调试）

python

Python 关于哈夫曼树编码（调试）,python,huffman-code,Python,Huffman Code,编码员 import sys import operator class node: def __init__ (self, element, num, leftChild, rightChild): self.element = element #eg "a" self.num = num #eg 2 self.leftChild = leftChild self.rightChild = right

编码员

import sys
import operator

class node:
    def __init__ (self, element, num, leftChild, rightChild):
        self.element = element #eg "a"
        self.num = num #eg 2
        self.leftChild = leftChild
        self.rightChild = rightChild

def countChar (text): #counting num of occurance of each char and return a dict
    charDict = {}
    for i in text:
        if i == "\n":
            continue
        if charDict.get(i, None) == None:
            charDict[i] = 1
        else:
            charDict[i] += 1
    sortedCharDict = dict (sorted (charDict.items(), key = operator.itemgetter(1), reverse = False))
    #method of sorting dictionary from https://www.w3resource.com/python-exercises/dictionary/python-data-type-dictionary-exercise-1.php
    return sortedCharDict

def readfile (argv): #extract text from the input file
    file = open (argv, "r")
    text = file.read()
    file.close()
    return text

def getBaseNode (charDict):
    trees = []
    for key in charDict:
        trees.append(node(key, charDict[key], None, None))
    return trees

def buildTree (trees):
    if len(trees) == 1:
        return trees
    else:
        if trees[0].element <= trees[1].element:
            left = trees[0]
            right = trees[1]
        else:
            left = trees[1]
            right = trees[0]
        new = node (left.element + right.element, left.num + right.num, left, right)
        trees = trees[2:]
        if not trees:
            trees = trees + [new]
        else:
            trees = [new] + trees
            for i in range(1, len(trees)):
                if new.num <= trees[i].num:
                    trees[0], trees[i - 1] = trees [i - 1], trees[0]
                    break
        return buildTree (trees)

def treeSearchCoding (tree, target, code):
    if target == tree.element:
        return code
    if target not in tree.element:
        return ""
    elif target in tree.leftChild.element:
        return treeSearchCoding (tree.leftChild, target, code + "0")
    else:
        return treeSearchCoding (tree.rightChild, target, code + "1")

def fetchCode (huffmanTree):
    codeDict = {}
    for i in range (32, 127):
        target = chr (i)
        code = treeSearchCoding (huffmanTree, target, "")
        if code != "":
            codeDict [target] = code
    return codeDict
    
def printCodeTable (codeDict, charDict):
    ave = 0
    symNum = 0
    for i in codeDict:
        ave += len(codeDict[i]) * charDict[i]
        symNum += charDict[i]
    ave = round (ave / symNum, 3)
    file = open ("code.txt", "w")
    for j in codeDict:
        if j == " ":
            msg = "space: " + codeDict[j] + "\n"
        else:
            msg = j + ": " + codeDict[j] + "\n"
        file.write (msg)
    msg = "Ave = " + str(ave) + " bits per symbol"
    file.write (msg)
    file.close()

def printEncodeMsg (text, codeDict):
    msg = ""
    count = 0
    for i in text:
        if i not in codeDict:
            continue
        else:
            msg += codeDict[i]
    file = open ("encodemsg.txt", "w")
    for j in msg:
        file.write(j)
        count += 1
        if count == 80:
            file.write("\n")
            count = 0
    file.close()

def main (argv):
    text = readfile (argv)
    charDict = countChar (text)
    trees = getBaseNode (charDict)
    trees = buildTree (trees)
    huffmanTree = trees[0]
    codeDict = fetchCode (huffmanTree)
    printCodeTable (codeDict, charDict)
    printEncodeMsg (text, codeDict)
    
    
if __name__ == "__main__":
    main(sys.argv[1])

导入系统进口经营者类节点： def uuu init uuuu（self、element、num、leftChild、righchild）： self.element=元素#例如“a” self.num=num#例如2 self.leftChild=leftChild self.rightChild=rightChild def countChar（text）：#计算每个字符的发生次数并返回一个dict charDict={} 对于文本中的i：如果i==“\n”：持续如果charDict.get（i，None）=None： charDict[i]=1 其他： charDict[i]+=1 sortedCharDict=dict（已排序（charDict.items（），key=operator.itemgetter（1），reverse=False）） #对字典进行排序的方法https://www.w3resource.com/python-exercises/dictionary/python-data-type-dictionary-exercise-1.php 返回分拣硬件 def readfile（argv）：#从输入文件中提取文本文件=打开（argv，“r”） text=file.read（） file.close（）文件返回文本 def getBaseNode（charDict）：树=[] 对于charDict中的键：追加（节点（key，charDict[key]，None，None））回归树 def构建树（树）：如果len（树）==1：回归树其他：如果树[0]。元素py.coder.py input.txt