Warning: file_get_contents(/data/phpspider/zhask/data//catemap/6/cplusplus/141.json): failed to open stream: No such file or directory in /data/phpspider/zhask/libs/function.php on line 167

Warning: Invalid argument supplied for foreach() in /data/phpspider/zhask/libs/tag.function.php on line 1116

Notice: Undefined index: in /data/phpspider/zhask/libs/function.php on line 180

Warning: array_chunk() expects parameter 1 to be array, null given in /data/phpspider/zhask/libs/function.php on line 181
C++ 如何编译C++;使用指定的编码?_C++_Gcc_Encoding_Utf 8_Interpreter - Fatal编程技术网

C++ 如何编译C++;使用指定的编码?

C++ 如何编译C++;使用指定的编码?,c++,gcc,encoding,utf-8,interpreter,C++,Gcc,Encoding,Utf 8,Interpreter,作为一种爱好,我正在为自己的编程语言做一个解释器。 我的问题是Windows CMD中的非ascii字符显示不正确。 我正在读取的源文件保存为UTF-8。我想它是UTF-8,没有BOM。例如,当我的源文件显示: print "á" 在我的Mac上,我得到了预期的输出。字母a但是在我的电脑上我得到├í。我认为这是一个代码页问题,我使用的代码页上有字母á。然后我尝试了不同的字体。露西达·格兰德工作。但是在Python解释器中,字母以默认字体显示 我问了StackOverflow上的人,有人说我的程

作为一种爱好,我正在为自己的编程语言做一个解释器。 我的问题是Windows CMD中的非ascii字符显示不正确。 我正在读取的源文件保存为UTF-8。我想它是UTF-8,没有BOM。例如,当我的源文件显示:

print "á"
在我的Mac上,我得到了预期的输出。字母
a
但是在我的电脑上我得到
á
。我认为这是一个代码页问题,我使用的代码页上有字母
á
。然后我尝试了不同的字体。露西达·格兰德工作。但是在Python解释器中,字母
以默认字体显示

我问了StackOverflow上的人,有人说我的程序本身可能是用错误的编码编译的。所以我的问题是,如何指定/更改C++编译我的文件时使用的编码。我在我的编译器中使用TDM-GCC,我也使用了MinGW,并且遇到了同样的问题

谢谢你的帮助

---编辑---

下面是我的整个源文件。您可以这样编译它:

c++ myfile.cc -o myprogram -std=c++11
每当我运行“myprogram.exe somefile.mylang”时,其中somefile.mylang会显示:

print "Hello á"
我在windows CMD上获得以下输出:

"Hello á"
我不知道Python、Lua、Ruby等如何。。。可以使用默认控制台字体并输出正确的字符

#include <iostream>
#include <string>
#include <fstream>
#include <sstream>
#include <vector>
#include <cstdlib>

using namespace std;

/* Global Variables */
/* Not all of these are actual "keywords" that can be used in programs.
   They are called keywords because they are reserved, either because they
   are specified as keywords in the grammar or because they are reserved by
   the interpreter for internal use. */
string keywords[9] = { "print", "string", "sc", "variable", "eq", "undefined", "nl", "num", "expr" };
/* We store tokens in a vector, we could use an array but specifying an arrays
   size at runtime is technically impossible and the work arounds are a pain. */
vector<string> tokens;

/* Our "symbol table" is just a vector too because, we can only determine how
   large the symbol table should be at runtime, so we use a vector to make things
   easier. */
vector<string> variables;

/* Function Declarations */
/* We declare all of the functions up here because it makes it easy to see how many
   functions we have and it makes it easier to find inefficiencies, also it makes the
   code look nicer. */
void exec_program();
string load_program();
string lex();
void parse();

void doPRINT();
void doASSIGN();
void goGETVAR();

/* Definitions */
/* These are our constants, these are defined as constant at the start of the program so
   that if anything goes wrong in the execution of the code we can always display the
   right kind of errors. */
#define IO_ERROR "[IO ERROR] "
#define SYNTAX_ERROR "[SYNTAX ERROR] "
#define ASSIGN_ERROR "[ASSIGN ERROR] "

/* We load the program into the interpreter by reading the file */
string load_program(string filename) {

    string filedata;

    ifstream rdfile(filename);
    /* We check to see whether or not we can open the file. This doesn't tell use whether
       the file exists because permissions could also prevent us being able to open the file. */
    if (!rdfile) {
        cout << IO_ERROR << "Unable to open the file \"" << filename << "\"." << endl;
        exit(0);
    }
    /* Loop through and grab each line of the file, then store each line in filedata. */
    for (std::string line; std::getline(rdfile, line); )
    {
        filedata += line;
        filedata += "\n";
    }

    /* Close the file when we're done. */
    rdfile.close();

    /* Return the data so that the rest of the program can use it. */
    return filedata;                       
}

void lex(string prog) {
    int i = 0;
    string toks = "";
  string n = "";
  string expr = "";
    bool state = 0;
  bool exprStarted = 0;
  bool isexpr = 0;
    string s = "";

    for(i = 0; i < prog.size(); ++i) {
        toks += prog[i];
        if (toks == " " and state == 0) {
        toks = "";
        if (n != "") {
          //isexpr = 1;
          //tokens.push_back(keywords[7] + ":" + n);
        }
        n = "";
      } else if (toks == ";" and state == 0) {
        toks = "";
        if (expr != "" and isexpr == 1) {
          tokens.push_back(keywords[8] + ":[" + expr + "]");
        } else if (n != "" and isexpr == 0) {
          tokens.push_back(keywords[7] + ":" + expr);
        }
        if (tokens.back() != "sc") {
          tokens.push_back(keywords[2]); 
        }
        n = "";
        expr = "";
        isexpr = 0;
      } else if (toks == "\n" and state == 0) {
            toks = "";
        if (expr != "" and isexpr == 1) {
          tokens.push_back(keywords[8] + ":[" + expr + "]");
        } else if (n != "" and isexpr == 0) {
          tokens.push_back(keywords[7] + ":" + expr);
        }
        if (tokens.back() != "sc") {
          tokens.push_back(keywords[2]); 
        }
        n = "";
        expr = "";
        isexpr = 0;
        } else if (toks == "0" or toks == "1" or toks == "2" or toks == "3" or toks == "4" or toks == "5" 
        or toks == "6" or toks == "7" or toks == "8" or toks == "9") {
        if (state == 0) {
          n += toks;
          expr += toks;
        } else {
          s += toks;
        }
        toks = "";
      } else if (toks == "+" or toks == "-" or toks == "*" or toks == "/") {
        expr += toks;
        isexpr = 1;
        toks = "";
        n = "";
      } else if (toks == keywords[0]) {
            tokens.push_back(keywords[0]);
            toks = "";
        } else if (toks == "\"") {
            if (state == 0) {
                state = 1;
            } else if (state == 1) {
                state = 0;
                tokens.push_back(keywords[1] + ":" + s + "\"");
                s = "";
                toks = "";
            }
        } else if (state == 1) {
            s += toks;
            toks = "";
        }
    }
    int ii = 0;
    while (ii < tokens.size()) {
        //cout << tokens[ii] << endl;
        ii++;
    }
}

string evalExpression(string expr) {
  int res = 0;
  int getnextnum = 0;
  int iter = 0;
  int it = 0;
  string opp = "";
  string num = "";
  string num1 = "";
  string num2 = "";
  string result = "";
  vector<string> numholder;

  for (char & c : expr) {
    if (c == '0' or c == '1' or c == '2' or c == '3' or c == '4' or c == '5' or
      c == '6' or c == '7' or c == '8' or c == '9') {
      // c is a number
      num += c;

    } else if (c == '+' or c == '-' or c == '*' or c == '/') {
      // c is an operator
      numholder.push_back(num);
      if (c == '+') {
        opp = "+";
      } else if (c == '-') {
        opp = "-";
      } else if (c == '*') {
        opp = "*";
      } else if (c == '/') {
        opp = "/";
      }
      numholder.push_back(opp);
      num = "";

    } else if (c == ']') {
      // end of expression
      numholder.push_back(num);

    } else if (c == '(' or c == ')') {
      // c is a round bracket

    }
  }

  for ( iter = 0; iter < numholder.size(); ++iter) {
    if (numholder[iter][0] == '+' or numholder[iter][0] == '-' or numholder[iter][0] == '*' or numholder[iter][0] == '/') {
      iter++;
    }
    if (numholder[iter][0] == '0' or '1' or '2' or '3' or '4' or '5' or '6' or '7' or '8' or '9') {
      // num = NUMBER
      if (num1 == "") {
        num1 = numholder[iter];
      }
      else if (num2 == "") {
        num2 = numholder[iter];
      }
    }

    if (iter-1 >= 0) {
        it = iter - 1;
        //cout << numholder[iter] << "    " << numholder[iter-1] << "    num1 = " << num1 << "    num2 = " << num2 << endl;

        if (numholder[it][0] == '+' and num1 != "" and num2 != "") {
          res = stoi(num1) + stoi(num2);
          num1 = to_string(res);
          num2 = "";
        } else if (numholder[it][0] == '-' and num1 != "" and num2 != "") {
          res = stoi(num1) - stoi(num2);
          num1 = to_string(res);
          num2 = "";
        } else if (numholder[it][0] == '*' and num1 != "" and num2 != "") {
          res = stoi(num1) * stoi(num2);
          num1 = to_string(res);
          num2 = "";
        } else if (numholder[it][0] == '/' and num1 != "" and num2 != "") {
          res = stoi(num1) / stoi(num2);
          num1 = to_string(res);
          num2 = "";
        }
    }
    //iter++;
  }
  numholder.clear();
  num1 = "";
  num2 = "";
  num = "";
  //cout << res << endl;
  expr = to_string(res);

  return expr;
}

void doPRINT(string toPrint) {
  if (toPrint.substr(0,6) == "string") {
    toPrint = toPrint.substr (7);
    toPrint = toPrint.substr(1,toPrint.size() - 2);
  } else if (toPrint.substr(0,3) == "num") {
    toPrint = toPrint.substr (4);
  } else if (toPrint.substr(0,4) == "expr") {
    toPrint = toPrint.substr (6);
    toPrint = evalExpression(toPrint);
  }
  cout << toPrint << endl;
}

void parse(vector<string> tokens) {
    int i = 0;
    while (i < tokens.size()) {

    if (tokens[i] + " " + tokens[i+1].substr(0,6) + " " + tokens[i+2] == "print string sc" or
        tokens[i] + " " + tokens[i+1].substr(0,3) + " " + tokens[i+2] == "print num sc" or
        tokens[i] + " " + tokens[i+1].substr(0,4) + " " + tokens[i+2] == "print expr sc") {
      doPRINT(tokens[i+1]);
      i+=3;
    }
    }
}

/* Main program exec function */
void exec_program(string filename) {
    lex(load_program(filename));
    parse(tokens);
}

/* The main function, we have to start somewhere. */
int main(int argc, char* argv[]) {

    if (!argv[1]) {
        cout << "Usage: reedoo <filename> [args]" << endl;
    } else {
    exec_program(argv[1]);
    }
    return 0;
}
#包括
#包括
#包括
#包括
#包括
#包括
使用名称空间std;
/*全局变量*/
/*并非所有这些都是可以在程序中使用的实际“关键字”。
它们之所以被称为关键字,是因为它们是保留的,或者是因为
指定为语法中的关键字,或因为它们是由
内部使用的口译员*/
字符串关键字[9]={“print”、“string”、“sc”、“variable”、“eq”、“undefined”、“nl”、“num”、“expr”};
/*我们将令牌存储在向量中,我们可以使用数组,但可以指定数组
运行时的大小在技术上是不可能的,解决方法是一件痛苦的事情*/
向量标记;
/*我们的“符号表”也是一个向量,因为我们只能确定
符号表在运行时应该很大,所以我们使用一个向量来表示
更容易*/
向量变量;
/*函数声明*/
/*我们在这里声明了所有函数,因为这样可以很容易地看到有多少个函数
我们拥有的功能使我们更容易发现效率低下的地方,也使
代码看起来更好*/
void exec_程序();
字符串加载_程序();
字符串lex();
void parse();
void doPRINT();
void doASSIGN();
void goGETVAR();
/*定义*/
/*这些是我们的常数,它们在程序开始时被定义为常数,所以
如果代码执行中出现任何错误,我们可以始终显示
正确的错误类型*/
#定义IO_错误“[IO错误]”
#定义语法错误“[语法错误]”
#定义分配错误“[分配错误]”
/*我们通过读取文件将程序加载到解释器中*/
字符串加载程序(字符串文件名){
字符串文件数据;
ifstream文件(文件名);
/*我们检查是否可以打开该文件。这不能说明是否可以使用
该文件存在,因为权限也可能阻止我们打开该文件*/
如果(!rdfile){

cout这不是关于如何编译myprogram.exe,而是myprogram.exe如何处理somefile.mylang

作为语言开发人员,您有责任说“mylang脚本中程序的源文件应该是utf-8”,或者在源文件中提供一个识别代码页标记。 您还应该说“mylang语言中的字符串被编码为UTF foo”(因为这会影响诸如“hello”.charAt(3)或其他等效方法之类的操作)


然后是编译器/解释器(myprogram.exe)打开源代码(somefile.mylang)的职责用适当的编码,并将其转换为UTFO的内部表示。

我不确定我理解这个问题。无论如何,它不是C++,而是必须告诉字符编码的Windows控制台。或者可以帮助。将输出重定向到文件,用十六进制编辑器打开。如果它的预期字节序列,责怪。您的终端。如果没有,请显示代码。@Ali我添加了源代码。@Plasmah我添加了代码。好的,让我们试着缩小问题的范围,并检查我是否正确理解它。请将名为data.txt的文件放在适当的目录中。在此data.txt文件中放置一个
á
。此程序是否打印
á
在Windows上?