如何使用MapReduce解析Json文件?

如何使用MapReduce解析Json文件?,json,hadoop,mapreduce,Json,Hadoop,Mapreduce,我是json格式的新手。我试图学习如何解析Json文件,并使用MapReduce编程模型提取其中的数据。是否有任何Json解析器可以读取记录中的多行。 以下是我的Json文件中可能存在的元素数和最大元素数: { "type": "", "format": "", "version": "", "id": "", "start": "", "cp": "", message:{ "proto": "","protoVer": "","cliIP":

我是json格式的新手。我试图学习如何解析Json文件,并使用MapReduce编程模型提取其中的数据。是否有任何Json解析器可以读取记录中的多行。 以下是我的Json文件中可能存在的元素数和最大元素数:

{ 
    "type": "",
    "format": "",
    "version": "",
    "id": "",
    "start": "",
    "cp": "",
message:{ "proto": "","protoVer": "","cliIP": "","reqPort": "","reqHost": "","reqMethod": "","reqPath": "","reqQuery": "","reqCT": "","reqLen": "","sslVer": "","status": "","redirURL":  "","respCT": "","respLen": "","bytes": "","UA": "","fwdHost":},

reqHdr:{"accEnc": "","accLang": "","auth": "","cacheCtl": "","conn": "","contMD5": "","cookie": "","DNT": "","expect": "","ifMatch": "","ifMod": "","ifNone": "","ifRange": "","ifUnmod": "","range": "","referer": "","te": "","upgrade": "","via": "","xFrwdFor": "","xReqWith": ""},

"respHdr": {"accRange": "","allowOrigin": "","age": "","allow": "","cacheCtl": "","conn": "","contEnc": "","contLang": "","contMD5": "","contDisp": "","contRange": "","date": "","eTag": "","expires": "","lastMod": "","link": "","p3p": "","retry": "","server": "","trailer": "","transEnc": "","vary": "","via": "","warning": "","wwwAuth": "","xPwrdBy": "","setCookie": ""},

"netPerf": {"downloadTime": "","originName": "","originIP": "","originInitIP": "","originRetry": "","lastMileRTT": "","midMileLatency": "","netOriginLatency": "","lastMileBW": "","cacheStatus": "","firstByte": "","lastByte": "","asnum": "","network": "","netType": "","edgeIP": ""},

"geo": {"country": "","region": "","city": ""},

"waf" : {"logVer" : "1.0","ipRules" : "","appRules" : "","warn" : "","deny" : ""},

"content": {"custom_name": "custom_value"},

}
这些是我在json文件中的示例值

    `{"type":"cloud_monitor","format":"default","version":"1.0","id":"71101cb85441995d11a43bb","start":"1413585245.921","cp":"254623","message":{"proto":"http","protoVer":"1.1","status":"403","cliIP":"23.79.231.14","reqPort":"80","reqHost":"ksd.metareactor.com","reqMethod":"GET","reqPath":"%2findex.php","reqQuery":"path%3d57%26product_id%3d49%26route%3d%255Cwinnt%255Cwin.ini%2500.","respCT":"text/html","respLen":"286","bytes":"286","UA":"mozilla-saturn","fwdHost":"origin-demo2-akamaized.scoe-sil.net"}`,
"reqHdr":{"accEnc":"gzip,%20deflate","cookie":"PHPSESSID%3dkkqoodvfe0rt9l7lbvqghk6e15%3bcurrency%3dUSD%3blanguage%3den"},"netPerf":{"downloadTime":"8","lastMileRTT":"20","cacheStatus":"0","firstByte":"1","lastByte":"1","asnum":"12222","edgeIP":"184.28.16.109"},"geo":{"country":"US","region":"CA","city":"SANFRANCISCO","lat":"37.7795","long":"-122.4195"},"network":{"edgeIP":"184.28.16.109","asnum":"12222","network":"","networkType":""},"waf":{"ver":"2.0","policy":"qik1_12418","ruleSet":"KRS%201.0","mode":"scr","rsr":"1","dor":"0","oft":"0","riskGroups":":INBOUND-ANOMALY","riskTuples":":-3000002","riskScores":":-1000","pAction":"","pRate":"","warnRules":"3000002","warnSlrs":"ARGS%3aroute","warnData":"d2lubnQvd2luLmluaQ%3d%3d","warnTags":"AKAMAI%2fWEB_ATTACK%2fFILE_INJECTION","denyRules":"INBOUND-ANOMALY","denyData":"U2NvcmU6IDEwMDAsIERFTlkgdGhyZXNob2xkOiAyNSwgQWxlcnQgUnVsZXM6IDMwMDAwMDIsIERlbnkgUnVsZTogLCBMYXN0IE1hdGNoZWQgTWVzc2FnZTogTG9jYWwgU3lzdGVtIEZpbGUgQWNjZXNzIEF0dGVtcHQ%3d"}}
我有一个JavaJSON解析器,但我可以用它来读取一行代码。如何识别Json文件中的多行记录并在MapReduce代码中使用它来提取数据

我的Json解析器类:

String[] tuple = value.toString().split("\n");
try {
    for(int i=0; i<tuple.length; i++) {
        JSONObject jsonobj = new JSONObject(tuple[i]);
        type    = (String) jsonobj.get("type");
        format  = (String) jsonobj.get("format");
        version = (String) jsonobj.get("version");
        id      = (String) jsonobj.get("id");
        start   = (String) jsonobj.get("start");
        cp      = (String) jsonobj.get("cp");
        message = (String) jsonobj.get("message");
    }
} catch (JSONException e) {
    e.printStackTrace();
}
String[]tuple=value.toString().split(“\n”);
试一试{

对于(int i=0;通常MapReduce最适合平面json对象。每行只有一个。文件的每一行都有相同的jsonSo格式。所以在MapReduce中使用多行解析是不可能的吗?还有,文件有多大?有多少个?为什么你认为你需要MapReduce/Hadoop?一点也不可能。我没有说过。我只是说d您最好先对文件进行预处理以使数据平坦化100MB在Hadoop中仍然很小。我的观点是,我6岁的笔记本电脑使用4GB内存可能可以像MapReduce一样快速解析数据。您是否知道Spark或Pig有一个本机JSON解析器?Hive表可以基于JSON数据构建?