使用Java将大型JSON文件拆分为较小的JSON文件
我有一个JSON格式的大数据集,为了便于使用,我想在保持结构的同时将其拆分为多个JSON文件。 例如:使用Java将大型JSON文件拆分为较小的JSON文件,java,json,file,split,Java,Json,File,Split,我有一个JSON格式的大数据集,为了便于使用,我想在保持结构的同时将其拆分为多个JSON文件。 例如:{ “{”用户“:[ { “用户ID”:1, “名字”:“克里斯”, “姓氏”:“李”, “电话号码”:“123456”, “电子邮件地址”:“krish。lee@learningcontainer.com" }, { “用户ID”:2, “firstName”:“racks”, “姓氏”:“杰克森”, “电话号码”:“123456”, “电子邮件地址”:“机架”。jacson@learning
{
“{”用户“:[
{
“用户ID”:1,
“名字”:“克里斯”,
“姓氏”:“李”,
“电话号码”:“123456”,
“电子邮件地址”:“krish。lee@learningcontainer.com"
},
{
“用户ID”:2,
“firstName”:“racks”,
“姓氏”:“杰克森”,
“电话号码”:“123456”,
“电子邮件地址”:“机架”。jacson@learningcontainer.com"
},
{
“用户ID”:3,
“名字”:“拒绝”,
“姓氏”:“烤”,
“电话号码”:“33333333”,
“电子邮件地址”:“拒绝”。roast@learningcontainer.com"
},
{
“用户ID”:4,
“firstName”:“devid”,
“姓氏”:“neo”,
“电话号码”:“2222222”,
“电子邮件地址”:“设备”。neo@learningcontainer.com"
},
{
“用户ID”:5,
“名字”:“jone”,
“lastName”:“mac”,
“电话号码”:“111111111”,
“电子邮件地址”:“jone。mac@learningcontainer.com"
}
]
}
我应该能够以这样一种方式对其进行拆分,即每个用户标识都指向不同的文件。
到目前为止,我已经尝试将它们放到一个映射中,并尝试拆分映射,并将其转换为数组并拆分数组,但运气不太好。这些文件包含userid,但不再是json格式
关于如何在Java中实现这一点,有什么建议吗
预期结果:{“用户”:[
{
“用户ID”:1,
“名字”:“克里斯”,
“姓氏”:“李”,
“电话号码”:“123456”,
“电子邮件地址”:“krish。lee@learningcontainer.com"
}
]
}
要处理大文件,更喜欢使用面向流/事件的解析。Gson和Jackson都支持这种方式。仅举一个小JSON解析器的示例:
导入java.io.File;
导入java.io.FileWriter;
导入java.io.IOException;
导入java.io.UncheckedIOException;
导入java.io.Writer;
公共类SplitMyJson{
私有静态最终字符串jsonToSplit=“{\”用户\:[\n”+
“{\n”+
“用户ID”:1\n+
“\'firstName\':\'Krish\',\n”+
“\”姓氏\“:\”李\“,\n”+
“\”电话号码\“:\”123456\“,\n”+
“\”emailAddress\“:\”krish。lee@learningcontainer.com\“\n”+
},\n+
“{\n”+
“用户ID”:2\n+
“\'firstName\':\'racks\',\n”+
“\'lastName\':\'jacson\',\n”+
“\”电话号码\“:\”123456\“,\n”+
“\”电子邮件地址\“:\”机架。jacson@learningcontainer.com\“\n”+
},\n+
“{\n”+
“用户ID”:3\n+
“\“firstName\”:\“拒绝”,\n”+
“\“lastName\:\”烤\“,\n”+
“\”电话号码\“:\”33333333\,\n”+
“\”电子邮件地址\“:\”拒绝。roast@learningcontainer.com\“\n”+
},\n+
“{\n”+
“用户ID”:4\n+
“firstName\:\“devid\”,\n+
“lastName\:\“neo\,\n”+
“\“电话号码\:\“222222\”,\n”+
“\”电子邮件地址\“:\”设备。neo@learningcontainer.com\“\n”+
},\n+
“{\n”+
“用户ID”:5\n+
“\'firstName\':\'jone\',\n”+
“\”lastName\“:\”mac\“,\n”+
“\”电话号码\“:\”111111111\“,\n”+
“\”emailAddress\“:\”jone。mac@learningcontainer.com\“\n”+
“}\n”+
“]\n”+
"}";
公共静态void main(字符串[]args){
final JsonParser=new JsonParser();
setListener(新的拆分器(新文件(“/home/gudkov/mytest”));
parser.parse(jsontospilt);//如果您读取一个文件,则在循环中逐部分调用parse()多次,直到EOF
parser.eoj();//然后调用.eoj()
}
静态类拆分器扩展JsonParserListenerAdaptor{
私有最终JsonGenerator JsonGenerator=新JsonGenerator();
私有最终AppendableWriter AppendableWriter=新AppendableWriter();
私有最终文件输出文件夹;
私有信息的深度;
私有int用户索引;
拆分器(最终文件输出文件夹){
this.outputFolder=outputFolder;
如果(!outputFolder.exists()){
outputFolder.mkdirs();
}
setOutput(appendableWriter);
}
私有布尔值userJustStarted(){
返回objectDepth==2;
}
私有布尔userJustEnded(){
返回objectDepth==1;
}
私有布尔notInUser(){
返回深度<2;
}
@凌驾
公共布尔值OnObject已启动(){
objectDepth++;
if(notInUser())返回true;
if(userJustStarted()){
试一试{
set(新文件编写器(新文件(outputFolder,“user-”+userIndex+“.json”)));
}捕获(IOE异常){
抛出新的未选中异常(e);
}
userIndex++;
}
jsonggenerator.startObject();
返回true;
}
@凌驾
公共布尔onObjectEnded(){
if(notInUser()){
对象深度--;
返回true;
}
对象深度--;
jsongGenerator.endObject();
如果(userjustend()){//用户对象已结束
试一试{
jsonggenerator.eoj();
appendableWriter.output().close();
}捕获(IOE异常){
抛出新的未选中异常(e);
}
}
返回true;
import java.io.File;
import java.io.FileWriter;
import java.io.IOException;
import java.io.UncheckedIOException;
import java.io.Writer;
public class SplitMyJson {
private static final String jsonToSplit = "{\"users\": [\n" +
" {\n" +
" \"userId\": 1,\n" +
" \"firstName\": \"Krish\",\n" +
" \"lastName\": \"Lee\",\n" +
" \"phoneNumber\": \"123456\",\n" +
" \"emailAddress\": \"krish.lee@learningcontainer.com\"\n" +
" },\n" +
" {\n" +
" \"userId\": 2,\n" +
" \"firstName\": \"racks\",\n" +
" \"lastName\": \"jacson\",\n" +
" \"phoneNumber\": \"123456\",\n" +
" \"emailAddress\": \"racks.jacson@learningcontainer.com\"\n" +
" },\n" +
" {\n" +
" \"userId\": 3,\n" +
" \"firstName\": \"denial\",\n" +
" \"lastName\": \"roast\",\n" +
" \"phoneNumber\": \"33333333\",\n" +
" \"emailAddress\": \"denial.roast@learningcontainer.com\"\n" +
" },\n" +
" {\n" +
" \"userId\": 4,\n" +
" \"firstName\": \"devid\",\n" +
" \"lastName\": \"neo\",\n" +
" \"phoneNumber\": \"222222222\",\n" +
" \"emailAddress\": \"devid.neo@learningcontainer.com\"\n" +
" },\n" +
" {\n" +
" \"userId\": 5,\n" +
" \"firstName\": \"jone\",\n" +
" \"lastName\": \"mac\",\n" +
" \"phoneNumber\": \"111111111\",\n" +
" \"emailAddress\": \"jone.mac@learningcontainer.com\"\n" +
" }\n" +
" ]\n" +
"}";
public static void main(String[] args) {
final JsonParser parser = new JsonParser();
parser.setListener(new Splitter(new File("/home/gudkov/mytest")));
parser.parse(jsonToSplit); // if you read a file, call parse() several times part by part in a loop until EOF
parser.eoj(); // and then call .eoj()
}
static class Splitter extends JsonParserListenerAdaptor {
private final JsonGenerator jsonGenerator = new JsonGenerator();
private final AppendableWriter<Writer> appendableWriter = new AppendableWriter<>();
private final File outputFolder;
private int objectDepth;
private int userIndex;
Splitter(final File outputFolder) {
this.outputFolder = outputFolder;
if (!outputFolder.exists()) {
outputFolder.mkdirs();
}
jsonGenerator.setOutput(appendableWriter);
}
private boolean userJustStarted() {
return objectDepth == 2;
}
private boolean userJustEnded() {
return objectDepth == 1;
}
private boolean notInUser() {
return objectDepth < 2;
}
@Override
public boolean onObjectStarted() {
objectDepth++;
if (notInUser()) return true;
if (userJustStarted()) {
try {
appendableWriter.set(new FileWriter(new File(outputFolder, "user-" + userIndex + ".json")));
} catch (IOException e) {
throw new UncheckedIOException(e);
}
userIndex++;
}
jsonGenerator.startObject();
return true;
}
@Override
public boolean onObjectEnded() {
if (notInUser()) {
objectDepth--;
return true;
}
objectDepth--;
jsonGenerator.endObject();
if (userJustEnded()) { // user object ended
try {
jsonGenerator.eoj();
appendableWriter.output().close();
} catch (IOException e) {
throw new UncheckedIOException(e);
}
}
return true;
}
@Override
public boolean onArrayStarted() {
if (notInUser()) return true;
jsonGenerator.startArray();
return true;
}
@Override
public boolean onArrayEnded() {
if (notInUser()) return true;
jsonGenerator.endArray();
return true;
}
@Override
public boolean onObjectMember(final CharSequence name) {
if (notInUser()) return true;
jsonGenerator.objectMember(name);
return true;
}
@Override
public boolean onStringValue(final CharSequence data) {
if (notInUser()) return true;
jsonGenerator.stringValue(data, true);
return true;
}
@Override
public boolean onNumberValue(final JsonNumber number) {
if (notInUser()) return true;
jsonGenerator.numberValue(number);
return true;
}
@Override
public boolean onTrueValue() {
if (notInUser()) return true;
jsonGenerator.trueValue();
return true;
}
@Override
public boolean onFalseValue() {
if (notInUser()) return true;
jsonGenerator.falseValue();
return true;
}
@Override
public boolean onNullValue() {
if (notInUser()) return true;
jsonGenerator.nullValue();
return true;
}
}
}