解析包含unicode字符的JSON数据

解析包含unicode字符的JSON数据,json,utf-8,axios,Json,Utf 8,Axios,我用UTF8编码了一个json文件,其中包含存储在AWS S3存储中的unicode字符。为了能够从React项目下载该文件,我创建了AWS网关API和Lambda函数。在该项目中,我使用Axios库下载了该文件,并且似乎下载正确(我使用console.log检查了内容)。但是,当我使用JSON.parse解析文件时,会出现“字符串文字中的控制字符错误”错误。使用JSON验证器检查内容返回肯定值,API网关的http响应头正确设置为“content-type:application/JSON”和

我用UTF8编码了一个json文件,其中包含存储在AWS S3存储中的unicode字符。为了能够从React项目下载该文件,我创建了AWS网关API和Lambda函数。在该项目中,我使用Axios库下载了该文件,并且似乎下载正确(我使用console.log检查了内容)。但是,当我使用JSON.parse解析文件时,会出现“字符串文字中的控制字符错误”错误。使用JSON验证器检查内容返回肯定值,API网关的http响应头正确设置为“content-type:application/JSON”和“charset:utf-8”。仅当我从文件中删除所有UTF8 unicode字符时,JSON.parse才起作用。当我删除包含unicode字符的部分时,我甚至不需要使用JSON解析。我可以从脚本中将其作为对象访问。这是我的json文件的内容

{
  "en": [
    {
      "Question": "Question 1",
      "Choice": [ "Strongly  Agree", "Agree", "Neither Agree Nor Disagree", "Disagree", "Strongly Disagree" ]
    },
    {
      "Question": "Question 2",
      "Choice": [ "Strongly  Agree", "Agree", "Neither Agree Nor Disagree", "Disagree", "Strongly Disagree" ]
    },
    {
      "Question": "Question 3",
      "Choice": [ "Strongly  Agree", "Agree", "Neither Agree Nor Disagree", "Disagree", "Strongly Disagree" ]
    },
    {
      "Question": "Question 4",
      "Choice": [ "Strongly  Agree", "Agree", "Neither Agree Nor Disagree", "Disagree", "Strongly Disagree" ]
    }
  ],
  "fr": [
    {
      "Question": "Question 1",
      "Choice": [ "Tout À Fait d'Accord", "d'Accord", "Ni En Désaccord Ni d'Accord", "Pas d'Accord", "Pas Du Tout d'Accord"]
    },
    {
      "Question": "Question 2",
      "Choice": [ "Tout À Fait d'Accord", "d'Accord", "Ni En Désaccord Ni d'Accord", "Pas d'Accord", "Pas Du Tout d'Accord"]
    },
    {
      "Question": "Question 3",
      "Choice": [ "Tout À Fait d'Accord", "d'Accord", "Ni En Désaccord Ni d'Accord", "Pas d'Accord", "Pas Du Tout d'Accord"]
    },
    {
      "Question": "Question 4",
      "Choice": [ "Tout À Fait d'Accord", "d'Accord", "Ni En Désaccord Ni d'Accord", "Pas d'Accord", "Pas Du Tout d'Accord"]
    }
  ],
  "pt": [
    {
      "Question": "Questão 1",
      "Choice": [ "Concordo Plenamente", "Aceita", "Não Concordo Nem Discordo", "Discordar", "Discordo Fortemente" ]
    },
    {
      "Question": "Questão 2",
      "Choice": [ "Concordo Plenamente", "Aceita", "Não Concordo Nem Discordo", "Discordar", "Discordo Fortemente" ]
    },
    {
      "Question": "Questão 3",
      "Choice": [ "Concordo Plenamente", "Aceita", "Não Concordo Nem Discordo", "Discordar", "Discordo Fortemente" ]
    },
    {
      "Question": "Questão 4",
      "Choice": [ "Concordo Plenamente", "Aceita", "Não Concordo Nem Discordo", "Discordar", "Discordo Fortemente" ]
    }
  ],
  "my": [
    {
      "Question": "မေးခွန်း ၁",
      "Choice": [ "အပြည့်အ၀ထောက်ခံတယ်", "ထောက်ခံတယ်", "ထောက်ခံတယ်လည်းမဟုတ်ဘူး မထောက်ခံတယ်လည်းမဟုတ်ဘူး", "မထောက်ခံဘူး", "အပြည့်အ၀မထောက်ခံဘူး" ]
    },
    {
      "Question": "မေးခွန်း ၂",
      "Choice": [ "Strongly  Disagree", "Somewhat Disagree", "Agree", "Somewhat Agree", "Strongly Agree" ]
    },
    {
      "Question": "မေးခွန်း ၃",
      "Choice": [ "Strongly  Disagree", "Somewhat Disagree", "Agree", "Somewhat Agree", "Strongly Agree" ]
    },
    {
      "Question": "မေးခွန်း ၄",
      "Choice": [ "Strongly  Disagree", "Somewhat Disagree", "Agree", "Somewhat Agree", "Strongly Agree" ]
    }
  ]
}
编辑 这是负责下载和解析该文件的代码:

    let request = {
        host: process.env.AWS_HOST,
        method: 'GET',
        url: process.env.AWS_URL,
        path: process.env.AWS_PATH
    }

    let signedRequest = aws4.sign(request, {
        secretAccessKey: process.env.AWS_SECRET_KEY,
        accessKeyId: process.env.AWS_ACCESS_KEY
     });


    axios(signedRequest)
        .then(response => {

            console.log(response.data); 

            JSON.parse(response.data); // Error!

        })
        .catch((error) => {
            console.log("error",error);
        });
编辑
我更正了标题以反映我所问的问题

我刚刚发现是我的lambda函数导致了这个问题。该函数从s3存储桶读取文件,并将数据编码为ascii,然后作为响应返回。将其修复为utf-8可解决此问题。感谢@Tomalak为您抽出时间

const done = (err, res) => callback(null, {
    statusCode: err ? '400' : '200',
    body: err ? err.message :  res.Body.toString('utf-8'), // <<-- this line 
    headers: {
        'Content-Type': 'application/json',
        'Charset': 'utf-8',
        "'Access-Control-Allow-Methods": "GET, POST, OPTIONS"
    },
});
constdone=(err,res)=>callback(null{
状态代码:错误?'400':'200',

body:err?err.message:res.body.toString('utf-8'),//您使用什么编辑器来编辑文件?它是否设置为“utf-8无BOM”?我之所以这样问是因为您提到“UTF8字符”。没有UTF-8字符。有Unicode字符。UTF-8只是将Unicode表示为文件中字节的多种方法之一-它是一种编码。不是每个Unicode文件都自动编码为UTF-8。你说得对!我把编码与字符集混淆了。感谢你指出这一点。为了回答你的问题,我使用Atom editor,它确实如此es似乎没有UTFOkay的“带BOM”或“不带BOM”选项,那么您需要显示加载文件的代码。将其减少到最小值(如五行)复制错误的测试程序。另外,从磁盘加载文件作为测试。是否发生相同的错误?我将代码添加到我的帖子中。我在本地导入了json对象,没有任何问题。感谢发布您的解决方案和解释!