C++ 在huffman压缩/解压缩中处理额外字节
我有一个程序,根据文本输入文件中读取的ASCII字符频率生成哈夫曼树。哈夫曼代码存储在256个元素的字符串数组中,如果未读取字符,则为空字符串。然后,该程序还对输出文件进行编码和压缩,目前具有一些解压缩和解码功能 总之,我的程序获取一个输入文件压缩并编码一个输出文件,关闭输出文件并打开编码作为输入文件,并获取一个新的输出文件,该文件应该具有与原始文本输入文件相同的解码消息C++ 在huffman压缩/解压缩中处理额外字节,c++,data-structures,visual-studio-2017,huffman-code,C++,Data Structures,Visual Studio 2017,Huffman Code,我有一个程序,根据文本输入文件中读取的ASCII字符频率生成哈夫曼树。哈夫曼代码存储在256个元素的字符串数组中,如果未读取字符,则为空字符串。然后,该程序还对输出文件进行编码和压缩,目前具有一些解压缩和解码功能 总之,我的程序获取一个输入文件压缩并编码一个输出文件,关闭输出文件并打开编码作为输入文件,并获取一个新的输出文件,该文件应该具有与原始文本输入文件相同的解码消息 void encodeOutput(const string & fileName, const string &a
void encodeOutput(const string & fileName, const string & fileName2, string code[256]) {
ifstream ifile; //to read file
ifile.open(fileName, ios::binary);
if (!ifile)//to check if file is open or not
{
die("Can't read again"); // function that exits program if can't open
}
ofstream ofile;
ofile.open(fileName2, ios::binary);
if (!ofile) {
die("Can't open encoding output file");
}
int read;
read = ifile.get(); //read one char from file and store it in int
char buffer = 0, bit_count = 0;
while (read != -1) {//run this loop until reached to end of file(-1)
for (unsigned b = 0; b < code[read].size(); b++) { // loop through bits (code[read] outputs huffman code)
buffer <<= 1;
buffer |= code[read][b] != '0';
bit_count++;
if (bit_count == 8) {
ofile << buffer;
buffer = 0;
bit_count = 0;
}
}
read = ifile.get();
}
if (bit_count != 0)
ofile << (buffer << (8 - bit_count));
ifile.close();
ofile.close();
}
void decodeOutput(const string & fileName2, const string & fileName3, string code[256], const unsigned long long freq[256]) {
ifstream ifile;
ifile.open(fileName2, ios::binary);
if (!ifile)
{
die("Can't read again");
}
ofstream ofile;
ofile.open(fileName3, ios::binary);
if (!ofile) {
die("Can't open encoding output file");
}
priority_queue < node > q;
for (unsigned i = 0; i < 256; i++) {
if (freq[i] == 0) {
code[i] = "";
}
}
for (unsigned i = 0; i < 256; i++)
if (freq[i])
q.push(node(unsigned(i), freq[i]));
if (q.size() < 1) {
die("no data");
}
while (q.size() > 1) {
node *child0 = new node(q.top());
q.pop();
node *child1 = new node(q.top());
q.pop();
q.push(node(child0, child1));
} // created the tree
string answer = "";
const node * temp = &q.top(); // root
for (int c; (c = ifile.get()) != EOF;) {
for (unsigned p = 8; p--;) { //reading 8 bits at a time
if ((c >> p & 1) == '0') { // if bit is a 0
temp = temp->child0; // go left
}
else { // if bit is a 1
temp = temp->child1; // go right
}
if (temp->child0 == NULL && temp->child1 == NULL) // leaf node
{
answer += temp->value;
temp = &q.top();
}
}
}
ofile << ans;
}
我的问题是,在压缩时的测试运行中,我注意到我有3个额外的字节,而反过来,当我解压缩和解码我的编码文件时,这3个额外的字节被解码到我的输出文件中。根据原始输入文件中的文本量,我的其他测试会输出这些额外的字节
我的研究让我提出了一些建议,比如将编码输出文件的前8个字节设为无符号长的64位,给出文件中的字节数,或者使用psuedo EOF,但我一直在思考如何处理它,以及鉴于我已经编写或修改的代码,哪一个是处理它的明智方法如果其中任何一个都是聪明的方式
对此问题的任何指导或解决方案均表示感谢
(对于encodedOutput函数,fileName是输入文件参数,fileName2是输出文件参数)
(对于decodeOutput函数,fileName2是输入文件参数,fileName 3是输出文件参数)
代码[256]是这两个函数的参数,并保存原始输入文件中读取的每个唯一字符的哈夫曼代码,例如,输入文件中读取的字符“H”在传递给函数时,可能在代码[72]的代码数组中存储了代码“111”
freq[256]保存每个ascii字符的读取频率,如果它不在原始输入文件中,则保存0
void encodeOutput(const string & fileName, const string & fileName2, string code[256]) {
ifstream ifile; //to read file
ifile.open(fileName, ios::binary);
if (!ifile)//to check if file is open or not
{
die("Can't read again"); // function that exits program if can't open
}
ofstream ofile;
ofile.open(fileName2, ios::binary);
if (!ofile) {
die("Can't open encoding output file");
}
int read;
read = ifile.get(); //read one char from file and store it in int
char buffer = 0, bit_count = 0;
while (read != -1) {//run this loop until reached to end of file(-1)
for (unsigned b = 0; b < code[read].size(); b++) { // loop through bits (code[read] outputs huffman code)
buffer <<= 1;
buffer |= code[read][b] != '0';
bit_count++;
if (bit_count == 8) {
ofile << buffer;
buffer = 0;
bit_count = 0;
}
}
read = ifile.get();
}
if (bit_count != 0)
ofile << (buffer << (8 - bit_count));
ifile.close();
ofile.close();
}
void decodeOutput(const string & fileName2, const string & fileName3, string code[256], const unsigned long long freq[256]) {
ifstream ifile;
ifile.open(fileName2, ios::binary);
if (!ifile)
{
die("Can't read again");
}
ofstream ofile;
ofile.open(fileName3, ios::binary);
if (!ofile) {
die("Can't open encoding output file");
}
priority_queue < node > q;
for (unsigned i = 0; i < 256; i++) {
if (freq[i] == 0) {
code[i] = "";
}
}
for (unsigned i = 0; i < 256; i++)
if (freq[i])
q.push(node(unsigned(i), freq[i]));
if (q.size() < 1) {
die("no data");
}
while (q.size() > 1) {
node *child0 = new node(q.top());
q.pop();
node *child1 = new node(q.top());
q.pop();
q.push(node(child0, child1));
} // created the tree
string answer = "";
const node * temp = &q.top(); // root
for (int c; (c = ifile.get()) != EOF;) {
for (unsigned p = 8; p--;) { //reading 8 bits at a time
if ((c >> p & 1) == '0') { // if bit is a 0
temp = temp->child0; // go left
}
else { // if bit is a 1
temp = temp->child1; // go right
}
if (temp->child0 == NULL && temp->child1 == NULL) // leaf node
{
answer += temp->value;
temp = &q.top();
}
}
}
ofile << ans;
}
void encodeOutput(常量字符串和文件名,常量字符串和文件名2,字符串代码[256]){
ifstream ifile;//读取文件
open(文件名,ios::binary);
if(!ifile)//检查文件是否打开
{
die(“无法再次读取”);//无法打开时退出程序的函数
}
气流;
open(fileName2,ios::binary);
如果(!ofile){
die(“无法打开编码输出文件”);
}
int-read;
read=ifile.get();//从文件中读取一个字符并将其存储在int中
字符缓冲区=0,位计数=0;
while(read!=-1){//运行此循环,直到到达文件的末尾(-1)
for(无符号b=0;bchild1;//向右走
}
如果(temp->child0==NULL&&temp->child1==NULL)//叶节点
{
回答+=温度->值;
temp=&q.top();
}
}
}
文件是因为规则,(缓冲区是因为规则,(缓冲区