Python 将数据从JSON文件提取到CSV
我有一个结构非常复杂的大JSON文件 您可以在这里查看: 它包含700多万行,我只想提取“text”字段 我已经编写了一个python代码,在整个文件中增加了“text”键或字段的所有值,它只提取了12个值!当我在Visualstudio上打开JSON文件时,我有超过19000个值 您可以在此处看到代码:Python 将数据从JSON文件提取到CSV,python,json,csv,machine-learning,deep-learning,Python,Json,Csv,Machine Learning,Deep Learning,我有一个结构非常复杂的大JSON文件 您可以在这里查看: 它包含700多万行,我只想提取“text”字段 我已经编写了一个python代码,在整个文件中增加了“text”键或字段的所有值,它只提取了12个值!当我在Visualstudio上打开JSON文件时,我有超过19000个值 您可以在此处看到代码: import json import csv with open("/Users/zahraa-maher/rasa-init-demo/venv/Tickie/external_d
import json
import csv
with open("/Users/zahraa-maher/rasa-init-demo/venv/Tickie/external_data/frames2.json") as file:
data = json.load(file)
fname = "outputText8.csv"
with open(fname, "w") as file:
csv_file = csv.writer(file,lineterminator='\n')
csv_file.writerow(["text"])
for item in data[i]["turns"]:
csv_file.writerow([item['text']])
请看一看JSON文件,因为它非常大,结构复杂,所以我不会将它粘贴到这里查看,因为它不可理解
这也是son文件的一部分:
[
{
"user_id": "U22HTHYNP",
"turns": [
{
"text": "I'd like to book a trip to Atlantis from Caprica on Saturday, August 13, 2016 for 8 adults. I have a tight budget of 1700.",
"labels": {
"acts": [
{
"args": [
{
"val": "book",
"key": "intent"
}
],
"name": "inform"
},
{
"args": [
{
"val": "Atlantis",
"key": "dst_city"
},
{
"val": "Caprica",
"key": "or_city"
},
{
"val": "Saturday, August 13, 2016",
"key": "str_date"
},
{
"val": "8",
"key": "n_adults"
},
{
"val": "1700",
"key": "budget"
}
],
"name": "inform"
}
],
"acts_without_refs": [
{
"args": [
{
"val": "book",
"key": "intent"
}
],
"name": "inform"
},
{
"args": [
{
"val": "Atlantis",
"key": "dst_city"
},
{
"val": "Caprica",
"key": "or_city"
},
{
"val": "Saturday, August 13, 2016",
"key": "str_date"
},
{
"val": "8",
"key": "n_adults"
},
{
"val": "1700",
"key": "budget"
}
],
"name": "inform"
}
],
"active_frame": 1,
"frames": [
{
"info": {
"intent": [
{
"val": "book",
"negated": false
}
],
"budget": [
{
"val": "1700.0",
"negated": false
}
],
"dst_city": [
{
"val": "Atlantis",
"negated": false
}
],
"or_city": [
{
"val": "Caprica",
"negated": false
}
],
"str_date": [
{
"val": "august 13",
"negated": false
}
],
"n_adults": [
{
"val": "8",
"negated": false
}
]
},
"frame_id": 1,
"requests": [],
"frame_parent_id": null,
"binary_questions": [],
"compare_requests": []
}
]
},
"author": "user",
"timestamp": 1471272019730.0
},
{
"db": {
"result": [
[
{
"trip": {
"returning": {
"duration": {
"hours": 0,
"min": 51
},
"arrival": {
"hour": 10,
"year": 2016,
"day": 24,
"min": 51,
"month": 8
},
"departure": {
"hour": 10,
"year": 2016,
"day": 24,
"min": 0,
"month": 8
}
},
"seat": "ECONOMY",
"leaving": {
"duration": {
"hours": 0,
"min": 51
},
"arrival": {
"hour": 0,
"year": 2016,
"day": 16,
"min": 51,
"month": 8
},
"departure": {
"hour": 0,
"year": 2016,
"day": 16,
"min": 0,
"month": 8
}
},
"or_city": "Porto Alegre",
"duration_days": 9
},
"price": 2118.81,
"hotel": {
"gst_rating": 7.15,
"vicinity": [],
"name": "Scarlet Palms Resort",
"country": "Brazil",
"amenities": [
"FREE_BREAKFAST",
"FREE_PARKING",
"FREE_WIFI"
],
"dst_city": "Goiania",
"category": "3.5 star hotel"
}
},
{
"trip": {
"returning": {
"duration": {
"hours": 2,
"min": 37
},
"arrival": {
"hour": 12,
"year": 2016,
"day": 10,
"min": 37,
"month": 8
},
"departure": {
"hour": 10,
"year": 2016,
"day": 10,
"min": 0,
"month": 8
}
},
"seat": "ECONOMY",
"leaving": {
"duration": {
"hours": 2,
"min": 37
},
"arrival": {
"hour": 0,
"year": 2016,
"day": 4,
"min": 37,
"month": 8
},
"departure": {
"hour": 22,
"year": 2016,
"day": 3,
"min": 0,
"month": 8
}
},
"or_city": "Porto Alegre",
"duration_days": 7
},
"price": 2369.83,
"hotel": {
"gst_rating": 0,
"vicinity": [],
"name": "Sunway Hostel",
"country": "Argentina",
"amenities": [
"FREE_BREAKFAST",
"FREE_WIFI"
],
"dst_city": "Rosario",
"category": "2.0 star hotel"
}
},
{
"trip": {
"returning": {
"duration": {
"hours": 0,
"min": 51
},
"arrival": {
"hour": 10,
"year": 2016,
"day": 24,
"min": 51,
"month": 8
},
"departure": {
"hour": 10,
"year": 2016,
"day": 24,
"min": 0,
"month": 8
}
},
"seat": "BUSINESS",
"leaving": {
"duration": {
"hours": 0,
"min": 51
},
"arrival": {
"hour": 0,
"year": 2016,
"day": 16,
"min": 51,
"month": 8
},
"departure": {
"hour": 0,
"year": 2016,
"day": 16,
"min": 0,
"month": 8
}
},
"or_city": "Porto Alegre",
"duration_days": 9
},
"price": 2375.72,
"hotel": {
"gst_rating": 7.15,
"vicinity": [],
"name": "Scarlet Palms Resort",
"country": "Brazil",
"amenities": [
"FREE_BREAKFAST",
"FREE_PARKING",
"FREE_WIFI"
],
"dst_city": "Goiania",
"category": "3.5 star hotel"
}
},
{
"trip": {
"returning": {
"duration": {
"hours": 1,
"min": 30
},
"arrival": {
"hour": 11,
"year": 2016,
"day": 1,
"min": 30,
"month": 9
},
"departure": {
"hour": 10,
"year": 2016,
"day": 1,
"min": 0,
"month": 9
}
},
"seat": "BUSINESS",
"leaving": {
"duration": {
"hours": 1,
"min": 30
},
"arrival": {
"hour": 18,
"year": 2016,
"day": 19,
"min": 30,
"month": 8
},
"departure": {
"hour": 17,
"year": 2016,
"day": 19,
"min": 0,
"month": 8
}
},
"or_city": "Porto Alegre",
"duration_days": 13
},
"price": 2492.95,
"hotel": {
"gst_rating": 0,
"vicinity": [],
"name": "Hotel Mundo",
"country": "Brazil",
"amenities": [
"FREE_BREAKFAST",
"FREE_WIFI",
"FREE_PARKING"
],
"dst_city": "Manaus",
"category": "2.5 star hotel"
}
},
{
"trip": {
"returning": {
"duration": {
"hours": 0,
"min": 51
},
"arrival": {
"hour": 10,
"year": 2016,
"day": 31,
"min": 51,
"month": 8
},
"departure": {
"hour": 10,
"year": 2016,
"day": 31,
"min": 0,
"month": 8
}
},
"seat": "ECONOMY",
"leaving": {
"duration": {
"hours": 0,
"min": 51
},
"arrival": {
"hour": 19,
"year": 2016,
"day": 27,
"min": 51,
"month": 8
},
"departure": {
"hour": 19,
"year": 2016,
"day": 27,
"min": 0,
"month": 8
}
},
"or_city": "Porto Alegre",
"duration_days": 4
},
"price": 2538.0,
"hotel": {
"gst_rating": 8.22,
"vicinity": [],
"name": "The Glee",
"country": "Brazil",
"amenities": [
"FREE_BREAKFAST",
"FREE_WIFI"
],
"dst_city": "Recife",
"category": "4.0 star hotel"
}
}
],
[],
[],
[],
[],
[],
[]
],
"search": [
{
"ORIGIN_CITY": "Porto Alegre",
"PRICE_MIN": "2000",
"NUM_ADULTS": "2",
"timestamp": 1471271949.995,
"PRICE_MAX": "3000",
"ARE_DATES_FLEXIBLE": "true",
"NUM_CHILDREN": "5",
"START_TIME": "1470110400000",
"MAX_DURATION": 2592000000.0,
"DESTINATION_CITY": "Brazil",
"RESULT_LIMIT": "10",
"END_TIME": "1472616000000"
},
{
"ORIGIN_CITY": "Atlantis",
"NUM_ADULTS": "8",
"RESULT_LIMIT": "10",
"timestamp": 1471272148.124,
"PRICE_MAX": "1700",
"NUM_CHILDREN": "",
"ARE_DATES_FLEXIBLE": "true",
"START_TIME": "NaN",
"END_TIME": "NaN"
},
{
"ORIGIN_CITY": "Caprica",
"PRICE_MAX": "1700",
"NUM_ADULTS": "8",
"RESULT_LIMIT": "10",
"timestamp": 1471272189.07,
"DESTINATION_CITY": "Atlantis",
"NUM_CHILDREN": "",
"ARE_DATES_FLEXIBLE": "true",
"START_TIME": "1470715200000",
"END_TIME": "1472011200000"
},
{
"ORIGIN_CITY": "Caprica",
"PRICE_MAX": "1700",
"NUM_ADULTS": "8",
"RESULT_LIMIT": "10",
"timestamp": 1471272205.436,
"DESTINATION_CITY": "Atlantis",
"NUM_CHILDREN": "",
"ARE_DATES_FLEXIBLE": "true",
"START_TIME": "1470715200000",
"END_TIME": "1472011200000"
},
{
"ORIGIN_CITY": "Caprica",
"PRICE_MIN": "1700",
"NUM_ADULTS": "8",
"RESULT_LIMIT": "10",
"timestamp": 1471272278.72,
"DESTINATION_CITY": "Atlantis",
"NUM_CHILDREN": "",
"ARE_DATES_FLEXIBLE": "true",
"START_TIME": "1470715200000",
"END_TIME": "1472011200000"
},
{
"ORIGIN_CITY": "Caprica",
"PRICE_MIN": "1700",
"NUM_ADULTS": "8",
"RESULT_LIMIT": "10",
"timestamp": 1471272454.542,
"DESTINATION_CITY": "Atlantis",
"NUM_CHILDREN": "",
"ARE_DATES_FLEXIBLE": "true",
"START_TIME": "1471060800000",
"END_TIME": "1472011200000"
},
{
"ORIGIN_CITY": "Caprica",
"PRICE_MIN": "1700",
"NUM_ADULTS": "8",
"RESULT_LIMIT": "10",
"timestamp": 1471272466.008,
"DESTINATION_CITY": "Atlantis",
"NUM_CHILDREN": "",
"ARE_DATES_FLEXIBLE": "true",
"START_TIME": "1471060800000",
"END_TIME": "1472011200000"
}
]
},
如何修改它以将所有“文本”值从JSON文件提取到CSV文件?尝试:
import json
import csv
with open("/Users/zahraa-maher/rasa-init-demo/venv/Tickie/external_data/frames2.json") as file:
data = json.load(file)
fname = "outputText8.csv"
with open(fname, "w") as file:
csv_file = csv.writer(file,lineterminator='\n')
csv_file.writerow(["text"])
for keys,values in data.items():
现在由您决定要保存哪些字段,如果您使用调试器,您可以查看值和键,这是一个使用
pandas
的潜在解决方案:
import pandas as pd
#importing data
dj = pd.read_json("frames2.json")
dtext = dj[["user_id","turns"]]
#Saving text records in a list
list_ = []
for record in dtext["turns"].values:
for r in record:
list_.append(r["text"])
#Exporting the csv
out = pd.Series(list_,name="text")
out.to_csv("text.csv")
它给出以下输出。
我在哪里定义和增加,我在你的代码中看不到它实际上,我没有定义它,我把它放在这里是因为当我在数据[item][“turns”]中为item编写时,它显示了一个错误。我是python的初学者,只想用它进行提取。您能指导我如何对其进行建模以使其正常工作并提取所需的所有数据吗?如果数据是Json类型,您应该以不同的方式迭代数据中的键和值。items()您能在回答中用代码编写它并告诉我如何做吗?正如我告诉你们的,我是一个初学者,首先不能理解,我在
数据[I]['turns]
中是什么?其次,json比这段代码所能处理的更复杂。例如,“text”的第二个实例是数据[0]['turns'][1]['db']。您需要一种通用的方法来遍历这个怪物并找到所有的“文本”属性。非常感谢,我在for循环之后添加的代码是:csv_文件.writerow([item['text']])
,它返回以下错误:在9 csv_文件=csv.writer(文件,lineterminator='\n')10 csv_文件.writerow([“text”])-->11对于键,data.items()中的值:12 csv_file.writerow([item['text']])AttributeError:'list'对象没有属性'items'让我们说data={'text':'hi'}因此,键将等于文本,值将等于HIT非常感谢:它为我显示了此错误:NameError:name'dtext'未定义如何解决?抱歉,我忘记了代码中的一行。请再次检查我的答案。我已对其进行了编辑。再次感谢您,它还显示以下错误:NameError:name'list_'未定义,因此您知道如何解决它?请重试。我已经编辑了它的作品!非常感谢你的帮助,我真的很感激