C+中的LZ78编码问题+；我试图在C++中实现LZ78压缩算法，我希望我的程序能这样工作：< /P> 打开文件并将内容读入字符串_C++_Algorithm_Io_Compression

C+中的LZ78编码问题+；我试图在C++中实现LZ78压缩算法，我希望我的程序能这样工作：< /P> 打开文件并将内容读入字符串

c++ algorithm io compression

C+中的LZ78编码问题+；我试图在C++中实现LZ78压缩算法，我希望我的程序能这样工作：< /P> 打开文件并将内容读入字符串,c++,algorithm,io,compression,C++,Algorithm,Io,Compression,压缩字符串，输出包含字典中每个（键、字符）的二进制表示形式的字符串将字符串的每个8字符块转换为其二进制数将这些数字写入文件例如： string=“Kakashi。” 运行该算法会产生（0，K）（0，a）（0，k） (二,s) （0，h）（0，i） (0, .) （0，）所有对的二进制表示： 000000000 10010110000000000000110010000000000001101100000011001110011000000000000010100000011010

压缩字符串，输出包含字典中每个（键、字符）的二进制表示形式的字符串

将字符串的每个8字符块转换为其二进制数

将这些数字写入文件

例如：

string=“Kakashi。”

运行该算法会产生（0，K）（0，a）（0，k） (二,s) （0，h）（0，i） (0, .) （0，）

所有对的二进制表示： 000000000 10010110000000000000110010000000000001101100000011001110011000000000000010100000011010010000000000000101110000000000001010

在我的代码中，我一次读取8个字符以上的字符串，将其转换为一个数字，然后以二进制模式将其写入文件

但是，如果我要压缩的字符串长一点，当我解压缩它时，并不是每个单词都是正确的，有些字母丢失了，有些字母的顺序不正确。我相信我在将压缩文本写入文件时犯了很大的错误

这是我的全部代码：

#include <bits/stdc++.h>
#include <list>
#include <bitset>
#include <iostream>
#include <fstream>
#include <string>
#include <algorithm>
#include <queue>

using namespace std;

string encode_int(int in)
{
  return bitset<8>(in).to_string();
}

int decode_int(char out)
{
  return bitset<8>(out).to_ulong();
}

string encode_char(string in)
{
  return bitset<8>(in[0]).to_string();
}

char decode_char(string out)
{
  return (char)bitset<8>(out).to_ulong();
}

struct Dict
{
  string label; // dictionary entry string
  char output;  // first non-matching symbol
  int entry;    // longest matching dictionary entry

  Dict(string label, int entry, char output) // constructor
  {
    this->label = label;
    this->entry = entry;
    this->output = output;
  }
};

int find(string l, list<Dict> enc_list)
{ // change list to map

  list<Dict> temp = enc_list;
  int i = 1;

  while(!temp.empty())
  {
      if(!(l.compare(temp.front().label)))
      {
          return i;
      }
      temp.pop_front();
      i++;
  }
  return -1;
}

void write_file(string input, string output_filename)
{
  string one_byte;
  unsigned long bin_number;
  unsigned char chr;
  int i, len = input.length();

  FILE *fp;
  fp = fopen(output_filename.c_str(), "wb");

  if(fp == NULL)
  {
    printf("Unable to open output file!\n");
    return;
  }

  for (i=0; i<len; i+= 8)
  {
    one_byte = input.substr(i, 8);
    bin_number = strtol(one_byte.c_str(), NULL, 2);

    chr = bin_number;
    fprintf(fp, "%c", bin_number);
  }

  fclose(fp);
}

void LZ78_Compress(string txt, string output_filename)
{
  list <Dict> Dictionary;
  string Prefix = "", Char, compressed;

  int CodeWord, IndexForPrefix = 1, len, i;

  len = txt.length();


  for(i=0; i<len; i++){

     Char = string(1, txt[i]);

     IndexForPrefix = find((Prefix + Char), Dictionary);  // if it equals to -1, it means (Prefix + Char) is not in the dictionary
     if(IndexForPrefix != -1)
     {    
         Prefix = Prefix + Char; // if  Prefix + Char already exists, append Char
     }

     else
     {
        if(Prefix.empty())
        {
          CodeWord = 0;           // if Prefix is empty, a new letter was processed
          compressed += "00000000";
        }
        else
        {
          CodeWord = find(Prefix, Dictionary);     // search Prefix index
          compressed += encode_int(CodeWord);       // encode index
        }

        compressed += encode_char(Char);                                // encode char
        Dictionary.push_back(Dict((Prefix + Char), CodeWord, txt[i])); // add new entry to the dictionary
        Prefix.clear();      
     }
  } 

  write_file(compressed, output_filename);
}

void LZ78_Decompress(string input_filename, string output_filename)
{
  // Decompression Variables
  string dict = "";
  string decompressed_text;      // the the decomressed string
  string compressed_text;        // the compressed input
  string character;              // the character immediately after the current codeword
  string temp;                   

  unsigned char ch;
  unsigned int codeword, l = 0, i, len;           // the current dictionary entry being processed

  FILE *fp;
  fp = fopen(input_filename.c_str(), "rb");

  if(fp == NULL)
  {
    printf("Unable to open compressed file!\n");
    return;
  }


  while(fscanf(fp, "%c", &ch) == 1)
  {
    compressed_text += ch;
  }
  len = compressed_text.length();

  fclose(fp);

  ofstream outfile(output_filename.c_str(), ios::binary);

  int *idx = new int[len]; // used for storing the index of the i-th dictionary entry

  for (i=0;i<len;i+=2)
  {
    codeword = compressed_text[i];                      // longest matching dictionary entry
    character = compressed_text.substr(i + 1, 1);       // first non-matching symbol
    dict += character;                           
    idx[l] = codeword;
    l++; // idx size

    // let's say l = 0
    // then (idx[0], dict[0]) represents the first dictionary entry

    if(codeword == 0)
    {
        decompressed_text += character; // new letter, just append
    }

    else
    {      
       while(codeword > 0)  // go back in the dictionary string, adding each letter until you get one with codeword = 0
       {
        temp += dict[codeword-1];
        codeword = idx[codeword-1];
       }
       reverse(temp.begin(), temp.end()); // restore correct order
       decompressed_text += temp;         // append string and char
       decompressed_text += character;
       temp.clear();
    }
  }
  outfile << decompressed_text;
  outfile.close();
}


void Compress(string input_filename, string output_filename)
{
  ifstream in(input_filename.c_str());
  string line, txt;

  while(getline(in, line))
  {
    txt += line;
    txt += "\n";
  }
  in.close();

  LZ78_Compress(txt, output_filename);
}

int main()
{
  Compress("FullText.txt", "Compressed.out");
  LZ78_Decompress("Compressed.out", "Decompressed.out");
}

#包括
#包括
#包括
#包括
#包括
#包括
#包括
#包括
使用名称空间std；
字符串编码_int（int-in）
{
将位集（in.）返回到_字符串（）；
}
int解码_int（字符输出）
{
将位集（out.）返回到_ulong（）；
}
字符串编码字符（字符串输入）
{
将位集（在[0]中）返回到_string（）；
}
字符解码\u字符（字符串输出）
{
返回（char）位集（out）.to_ulong（）；
}
结构指令
{
字符串标签；//字典条目字符串
char输出；//第一个不匹配的符号
int entry；//最长匹配字典项
Dict（字符串标签、int项、char输出）//构造函数
{
此->标签=标签；
此->条目=条目；
这个->输出=输出；
}
};
int find（字符串l，列表enc_列表）
{//将列表更改为映射
列表温度=附件列表；
int i=1；
而（！temp.empty（））
{
如果（！（l.比较（温度前（）.标签）））
{
返回i；
}
前面板温度（）；
i++；
}
返回-1；
}
无效写入文件（字符串输入、字符串输出文件名）
{
字符串一个字节；
无符号长bin_编号；
无符号字符chr；
int i，len=input.length（）；
文件*fp；
fp=fopen（output_filename.c_str（），“wb”）；
如果（fp==NULL）
{
printf（“无法打开输出文件！\n”）；
返回；
}
对于（i=0；i如何encode_int（）
和decode_int（））
知道要使用多少位吗？我只是在默认情况下设置为8位，这样我就可以将它们作为字符写入。但我刚刚意识到它将字典限制为255个条目。建议：首先进行建模-不要担心二进制表示的字符/字符串表示，只需使用您的实现环境中最简单的方法来处理数字。一旦建模实体（LZ：子字符串）的编码和解码工作正常，处理数字的紧凑表示。如何编码int（）
和解码int（）
知道要使用多少位吗？我只是在默认情况下设置为8位，这样我就可以将它们作为字符写入。但我刚刚意识到它将字典限制为255个条目。建议：首先进行建模-不要担心二进制表示的字符/字符串表示，只需使用您的实现环境中最简单的方法来处理数字。一旦对模型实体（LZ：子字符串）进行编码和解码，解决数字的紧凑表示问题。