C++ 处理huffman压缩/解压缩中的最后一个字节_C++_Data Structures_Huffman Code

C++ 处理huffman压缩/解压缩中的最后一个字节

c++ data-structures

C++ 处理huffman压缩/解压缩中的最后一个字节,c++,data-structures,huffman-code,C++,Data Structures,Huffman Code,我有一个程序，根据文本输入文件中读取的ASCII字符频率生成哈夫曼树。哈夫曼代码存储在256个元素的字符串数组中，如果未读取字符，则为空字符串。该程序还对输出文件进行编码和压缩，然后能够将压缩后的文件作为输入文件进行解压缩和解码总之，我的程序获取一个输入文件压缩并编码一个输出文件，关闭输出文件并打开编码作为输入文件，并获取一个新的输出文件，该文件应该具有与原始文本输入文件相同的解码消息 void encodeOutput(const string & fileName, const s

我有一个程序，根据文本输入文件中读取的ASCII字符频率生成哈夫曼树。哈夫曼代码存储在256个元素的字符串数组中，如果未读取字符，则为空字符串。该程序还对输出文件进行编码和压缩，然后能够将压缩后的文件作为输入文件进行解压缩和解码

总之，我的程序获取一个输入文件压缩并编码一个输出文件，关闭输出文件并打开编码作为输入文件，并获取一个新的输出文件，该文件应该具有与原始文本输入文件相同的解码消息

void encodeOutput(const string & fileName, const string & fileName2, string code[256]) {
    ifstream ifile; //to read file
    ifile.open(fileName, ios::binary);
    if (!ifile)//to check if file is open or not
    {
        die("Can't read again"); // function that exits program if can't open
    }
    ofstream ofile;
    ofile.open(fileName2, ios::binary);
    if (!ofile) {
        die("Can't open encoding output file");
    }
    int read; 
    read = ifile.get(); //read one char from file and store it in int
    char buffer = 0, bit_count = 0;
    while (read != -1) {//run this loop until reached to end of file(-1)
        for (unsigned b = 0; b < code[read].size(); b++) { // loop through bits (code[read] outputs huffman code)
            buffer <<= 1;
            buffer |= code[read][b] != '0';
            bit_count++;
            if (bit_count == 8) {
                ofile << buffer;
                buffer = 0;
                bit_count = 0;
            }
        }
        read = ifile.get();
    }

    if (bit_count != 0)
        ofile << char(buffer << (8 - bit_count));

    ifile.close();
    ofile.close();
}

void decodeOutput(const string & fileName2, const string & fileName3, string code[256], const unsigned long long freq[256]) {
    ifstream ifile;
    ifile.open(fileName2, ios::binary);
    if (!ifile)
    {
        die("Can't read again");
    }
    ofstream ofile;
    ofile.open(fileName3, ios::binary);
    if (!ofile) {
        die("Can't open encoding output file");
    }
    priority_queue < node > q;
    for (unsigned i = 0; i < 256; i++) {
        if (freq[i] == 0) {
            code[i] = "";
        }
    }

    for (unsigned i = 0; i < 256; i++)
        if (freq[i])
            q.push(node(unsigned(i), freq[i]));

    if (q.size() < 1) {
        die("no data");
    }

    while (q.size() > 1) {
        node *child0 = new node(q.top());
        q.pop();
        node *child1 = new node(q.top());
        q.pop();
        q.push(node(child0, child1));
    } // created the tree
    string answer = "";
    const node * temp = &q.top(); // root 
    for (int c; (c = ifile.get()) != EOF;) {
        for (unsigned p = 8; p--;) { //reading 8 bits at a time 
            if ((c >> p & 1) == '0') { // if bit is a 0
                temp = temp->child0; // go left
            }
            else { // if bit is a 1
                temp = temp->child1; // go right
            }
            if (temp->child0 == NULL && temp->child1 == NULL) // leaf node
            {
                answer += temp->value;
                temp = &q.top();
            }
        }
    }
  ofile << ans;
}

我目前这个程序的问题是：当解码压缩文件时，我得到了一个额外的字符，或者说在解码的原始输入文件中没有。这是因为我所知道的垃圾。通过研究，我发现一个解决方案可能是在读取垃圾位之前使用psuedo EOF字符停止解码，但我不确定如何在我当前处理编码和解码的函数中实现这一点，因此非常感谢所有的指导和帮助

我的最终目标是能够使用这个程序也完全解码编码文件，而无需将垃圾位发送到输出文件

下面我有两个函数，encodedOutput和decodeOutput，它们处理压缩和解压缩

（对于encodedOutput函数，fileName是输入文件参数，fileName2是输出文件参数）

（对于decodeOutput函数，fileName2是输入文件参数，fileName 3是输出文件参数）

代码[256]是这两个函数的参数，并保存原始输入文件中读取的每个唯一字符的哈夫曼代码，例如，输入文件中读取的字符“H”在传递给函数时，可能在代码[72]的代码数组中存储了代码“111”

freq[256]保存每个ascii字符的读取频率，如果它不在原始输入文件中，则保存0

void encodeOutput(const string & fileName, const string & fileName2, string code[256]) {
    ifstream ifile; //to read file
    ifile.open(fileName, ios::binary);
    if (!ifile)//to check if file is open or not
    {
        die("Can't read again"); // function that exits program if can't open
    }
    ofstream ofile;
    ofile.open(fileName2, ios::binary);
    if (!ofile) {
        die("Can't open encoding output file");
    }
    int read; 
    read = ifile.get(); //read one char from file and store it in int
    char buffer = 0, bit_count = 0;
    while (read != -1) {//run this loop until reached to end of file(-1)
        for (unsigned b = 0; b < code[read].size(); b++) { // loop through bits (code[read] outputs huffman code)
            buffer <<= 1;
            buffer |= code[read][b] != '0';
            bit_count++;
            if (bit_count == 8) {
                ofile << buffer;
                buffer = 0;
                bit_count = 0;
            }
        }
        read = ifile.get();
    }

    if (bit_count != 0)
        ofile << char(buffer << (8 - bit_count));

    ifile.close();
    ofile.close();
}

void decodeOutput(const string & fileName2, const string & fileName3, string code[256], const unsigned long long freq[256]) {
    ifstream ifile;
    ifile.open(fileName2, ios::binary);
    if (!ifile)
    {
        die("Can't read again");
    }
    ofstream ofile;
    ofile.open(fileName3, ios::binary);
    if (!ofile) {
        die("Can't open encoding output file");
    }
    priority_queue < node > q;
    for (unsigned i = 0; i < 256; i++) {
        if (freq[i] == 0) {
            code[i] = "";
        }
    }

    for (unsigned i = 0; i < 256; i++)
        if (freq[i])
            q.push(node(unsigned(i), freq[i]));

    if (q.size() < 1) {
        die("no data");
    }

    while (q.size() > 1) {
        node *child0 = new node(q.top());
        q.pop();
        node *child1 = new node(q.top());
        q.pop();
        q.push(node(child0, child1));
    } // created the tree
    string answer = "";
    const node * temp = &q.top(); // root 
    for (int c; (c = ifile.get()) != EOF;) {
        for (unsigned p = 8; p--;) { //reading 8 bits at a time 
            if ((c >> p & 1) == '0') { // if bit is a 0
                temp = temp->child0; // go left
            }
            else { // if bit is a 1
                temp = temp->child1; // go right
            }
            if (temp->child0 == NULL && temp->child1 == NULL) // leaf node
            {
                answer += temp->value;
                temp = &q.top();
            }
        }
    }
  ofile << ans;
}

void encodeOutput（常量字符串和文件名，常量字符串和文件名2，字符串代码[256]）{
ifstream ifile；//读取文件
open（文件名，ios:：binary）；
if（！ifile）//检查文件是否打开
{
die（“无法再次读取”）；//无法打开时退出程序的函数
}
气流；
open（fileName2，ios:：binary）；
如果（！ofile）{
die（“无法打开编码输出文件”）；
}
int-read；
read=ifile.get（）；//从文件中读取一个字符并将其存储在int中
字符缓冲区=0，位计数=0；
while（read！=-1）{//运行此循环，直到到达文件的末尾（-1）
for（无符号b=0；bchild1；//向右走
}
如果（temp->child0==NULL&&temp->child1==NULL）//叶节点
{
回答+=温度->值；
temp=&q.top（）；
}
}
}
文件将其更改为freq[257]
和code[257]
，并将freq[256]
设置为一。您的EOF是symbol 256，它将在流的末尾出现一次。在编码结束时，发送符号256。当您在解码时收到符号256时，停止。
将其更改为freq[257]
和code[257]
，并将freq[256]
设置为一。您的EOF是symbol 256，它将在流的末尾出现一次。在编码结束时，发送符号256。当解码时收到符号256时，停止