Warning: file_get_contents(/data/phpspider/zhask/data//catemap/2/shell/5.json): failed to open stream: No such file or directory in /data/phpspider/zhask/libs/function.php on line 167

Warning: Invalid argument supplied for foreach() in /data/phpspider/zhask/libs/tag.function.php on line 1116

Notice: Undefined index: in /data/phpspider/zhask/libs/function.php on line 180

Warning: array_chunk() expects parameter 1 to be array, null given in /data/phpspider/zhask/libs/function.php on line 181
Shell 在unicode文本中转换unicode实体_Shell_Batch File_Cmd_Cygwin - Fatal编程技术网

Shell 在unicode文本中转换unicode实体

Shell 在unicode文本中转换unicode实体,shell,batch-file,cmd,cygwin,Shell,Batch File,Cmd,Cygwin,我有一个包含unicode实体的文本文件。是否有办法通过cmd/batch或cygwin将所有这些实体转换为文本中的等效实体。我似乎找不到太多关于这个的信息 i、 ejournal\u0027s将成为journal的使用.bat扩展名保存此文件,例如decodeStrings.bat: 0</* : @echo off cscript /nologo /E:jscript "%~f0" %* exit /b %errorlevel% */0; var jsEscap

我有一个包含unicode实体的文本文件。是否有办法通过cmd/batch或cygwin将所有这些实体转换为文本中的等效实体。我似乎找不到太多关于这个的信息


i、 e
journal\u0027s
将成为
journal的

使用
.bat
扩展名保存此文件,例如decodeStrings.bat:

0</* :
@echo off

    cscript /nologo /E:jscript "%~f0" %*

exit /b %errorlevel% */0;


    var jsEscapes = {
      'n': '\n',
      'r': '\r',
      't': '\t',
      'f': '\f',
      'v': '\v',
      'b': '\b'
    };


    //string evaluation
    //http://stackoverflow.com/questions/24294265/how-to-re-enable-special-character-sequneces-in-javascript

    function decodeJsEscape(_, hex0, hex1, octal, other) {
      var hex = hex0 || hex1;
      if (hex) { return String.fromCharCode(parseInt(hex, 16)); }
      if (octal) { return String.fromCharCode(parseInt(octal, 8)); }
      return jsEscapes[other] || other;
    }

    function decodeJsString(s) {
      return s.replace(
          // Matches an escape sequence with UTF-16 in group 1, single byte hex in group 2,
          // octal in group 3, and arbitrary other single-character escapes in group 4.
          /\\(?:u([0-9A-Fa-f]{4})|x([0-9A-Fa-f]{2})|([0-3][0-7]{0,2}|[4-7][0-7]?)|(.))/g,
          decodeJsEscape);
    }

    var ARGS = WScript.Arguments;

    for (var i=0;i<ARGS.Length;i++) {
        WScript.Echo(decodeJsString(ARGS(i)));
    }
输出:

journal's
journal's
下面是一个评估文件的脚本(只需传递所需数量的文件):


0这是一个JSON文件吗?不是,只是一个文本文件。
journal's
journal's
0</* :
@echo off

    cscript /nologo /E:jscript "%~f0" %*

exit /b %errorlevel% */0;

    var ARGS = WScript.Arguments;

    if (ARGS.Length < 1 ) {
        WScript.Echo("Wrong arguments");
        WScript.Quit(1);
    }

    if (ARGS.Item(0).toLowerCase() == "-help" || ARGS.Item(0).toLowerCase() == "-h") {
        WScript.Echo("Evaluates unicode/special sequences in file");
        WScript.Echo(WScript.ScriptName + " path_to_file [path_to_file]");
        WScript.Quit(0);

    }


    var jsEscapes = {
      'n': '\n',
      'r': '\r',
      't': '\t',
      'f': '\f',
      'v': '\v',
      'b': '\b'
    };


    //string evaluation
    //http://stackoverflow.com/questions/24294265/how-to-re-enable-special-character-sequneces-in-javascript

    function decodeJsEscape(_, hex0, hex1, octal, other) {
      var hex = hex0 || hex1;
      if (hex) { return String.fromCharCode(parseInt(hex, 16)); }
      if (octal) { return String.fromCharCode(parseInt(octal, 8)); }
      return jsEscapes[other] || other;
    }

    function decodeJsString(s) {
      return s.replace(
          // Matches an escape sequence with UTF-16 in group 1, single byte hex in group 2,
          // octal in group 3, and arbitrary other single-character escapes in group 4.
          /\\(?:u([0-9A-Fa-f]{4})|x([0-9A-Fa-f]{2})|([0-3][0-7]{0,2}|[4-7][0-7]?)|(.))/g,
          decodeJsEscape);
    }




  function getContent(file) {
        // :: http://www.dostips.com/forum/viewtopic.php?f=3&t=3855&start=15&p=28898  ::
        var ado = WScript.CreateObject("ADODB.Stream");
        ado.Type = 2;  // adTypeText = 2

        ado.CharSet = "iso-8859-1";  // code page with minimum adjustments for input
        ado.Open();
        ado.LoadFromFile(file);

        var adjustment = "\u20AC\u0081\u201A\u0192\u201E\u2026\u2020\u2021" +
                         "\u02C6\u2030\u0160\u2039\u0152\u008D\u017D\u008F" +
                         "\u0090\u2018\u2019\u201C\u201D\u2022\u2013\u2014" +
                         "\u02DC\u2122\u0161\u203A\u0153\u009D\u017E\u0178" ;


        var fs = new ActiveXObject("Scripting.FileSystemObject");
        var size = (fs.getFile(file)).size;

        var lnkBytes = ado.ReadText(size);
        ado.Close();
        var chars=lnkBytes.split('');
        for (var indx=0;indx<size;indx++) {
            if ( chars[indx].charCodeAt(0) > 255 ) {
               chars[indx] = String.fromCharCode(128 + adjustment.indexOf(chars[indx]));
            }
        }
        return chars.join("");
   }

   function writeContent(file,content) {
        var ado = WScript.CreateObject("ADODB.Stream");
        ado.Type = 2;  // adTypeText = 2
        ado.CharSet = "iso-8859-1";  // right code page for output (no adjustments)
        //ado.Mode=2;
        ado.Open();

        ado.WriteText(content);
        ado.SaveToFile(file, 2);
        ado.Close();    
   }

    for (var i=0;i<ARGS.Length;i++) {
        WScript.Echo("Processing: "+ARGS.Item(i));
        var content=getContent(ARGS.Item(i));
        writeContent(ARGS.Item(i) , decodeJsString(content));
    }