Python csv列到JSON

Python csv列到JSON,python,json,csv,Python,Json,Csv,我有以下几点 id, o1, o2, o3 'jess', 1.0, 4, 0.3 'jill', 0, 5, 0.123 'jamie', -3, 0.2, 1.0 并且希望它在一个嵌套的json中,每个列都作为一个json,并在标题名上键入: myjson = { "o1": {"jess": 1.0, "jill": 0, "jamie": -3}, "o2": {"jess": 4, "jill": 5, "jamie": 0.2}, "o3":

我有以下几点

id, o1, o2, o3
'jess', 1.0, 4, 0.3
'jill', 0, 5, 0.123
'jamie', -3, 0.2, 1.0
并且希望它在一个嵌套的json中,每个列都作为一个json,并在标题名上键入:

myjson = {
    "o1": {"jess": 1.0, "jill":   0, "jamie": -3}, 
    "o2": {"jess":   4, "jill":   5, "jamie": 0.2}, 
    "o3": {"jess": 0.3, "jill": 0.2, "jamie": 1.0}, 
不确定最好的(最像蟒蛇的)方法。这是我的第一次尝试:

import csv
with open(myfile, "r") as f:
    reader = csv.reader(f, delimiter=',', quotechar='"')
    first = True
    for line in reader:
        if first:
            myjson = {key: dict() for key in line}
            header = list(line)
            first = False
        for i in range(len(header)):
            id = line[0]
            myjson[header[i+1]][id] = line[i+1]
我想有更好的方法来做到这一点

Edit:应该在前面指定,但我不想使用熊猫之类的东西。这需要非常快,并且具有最小的包依赖性

这可能是“作弊”,但这对我来说一直有效。如果没有-没有一点代码无法修复的。但是我使用了
Pandas
模块。它确实满足了我的很多数据需求。我将csv读入一个数据帧,然后将数据帧放入JSON(或任何其他格式)

它超级简单,易于定制。您可能需要处理更多的变量。以下是文档:, 这本书读起来总是很好的:

这可能是“作弊”,但这对我来说一直都很有效。如果没有-没有一点代码无法修复的。但是我使用了
Pandas
模块。它确实满足了我的很多数据需求。我将csv读入一个数据帧,然后将数据帧放入JSON(或任何其他格式)

它超级简单,易于定制。您可能需要处理更多的变量。以下是文档:,
这总是一个很好的答案:

我肯定认为下面的答案太长了,但是如果你仍然需要答案,这是可行的。 我已根据您的数据创建了test.csv

我不知道你为什么不想使用熊猫,但无论如何

import csv
import itertools
from itertools import *
import json


def read_with_header():
    with open ('/Users/bhargavsaidama/test.csv', 'rb') as f:
        reader = csv.reader(f, delimiter = ',', quotechar = '|')
        row_count = 0
        keys = []
        for row in reader:
            row_count = row_count + 1
            keys.append(row)
        header = keys[0]
        return row_count, header


def reading_ignore_header():

    row_count, header = read_with_header()

    with open('/Users/bhargavsaidama/test.csv', 'rb') as f:
        f.next()
        # row_count = sum(1 for row in f)
        # # print row_count
        reader = csv.reader(f, delimiter = ',' , quotechar = '|')
        result = []
        values = ()

        for row in reader:
            # row_count is taken including header file in the above function
            values = tuple((itertools.combinations(row, 2)))[:(row_count-1)]  # row_count is important,since your keys are rows

            for x, y in values:
                result.append({x:y})
        return result, header

# The following function is taken from here
# http://stackoverflow.com/questions/312443/how-do-you-split-a-list-into-evenly-sized-chunks

def chunks(l, n):
    """Yield successive n-sized chunks from l."""
    for i in range(0, len(l), n):
        yield l[i:i + n]


def main():

    result, header = reading_ignore_header()
    final_values = list(chunks(result,3)) # here 3 reflects (row_count-1)
    header = header[1:]        # seems like u wanna ignore Id 
    data_str = json.dumps(dict(zip(header, final_values)))
    data_json = json.loads(data_str)
    print data_str, data_json
    return data_str, data_json


if __name__ == "__main__":
    main()
希望它能帮助你,如果你能优化它,那就去做吧。我还将学习:)


谢谢

我肯定认为下面的答案太长了,但是如果你仍然需要答案,这就行了。 我已根据您的数据创建了test.csv

我不知道你为什么不想使用熊猫,但无论如何

import csv
import itertools
from itertools import *
import json


def read_with_header():
    with open ('/Users/bhargavsaidama/test.csv', 'rb') as f:
        reader = csv.reader(f, delimiter = ',', quotechar = '|')
        row_count = 0
        keys = []
        for row in reader:
            row_count = row_count + 1
            keys.append(row)
        header = keys[0]
        return row_count, header


def reading_ignore_header():

    row_count, header = read_with_header()

    with open('/Users/bhargavsaidama/test.csv', 'rb') as f:
        f.next()
        # row_count = sum(1 for row in f)
        # # print row_count
        reader = csv.reader(f, delimiter = ',' , quotechar = '|')
        result = []
        values = ()

        for row in reader:
            # row_count is taken including header file in the above function
            values = tuple((itertools.combinations(row, 2)))[:(row_count-1)]  # row_count is important,since your keys are rows

            for x, y in values:
                result.append({x:y})
        return result, header

# The following function is taken from here
# http://stackoverflow.com/questions/312443/how-do-you-split-a-list-into-evenly-sized-chunks

def chunks(l, n):
    """Yield successive n-sized chunks from l."""
    for i in range(0, len(l), n):
        yield l[i:i + n]


def main():

    result, header = reading_ignore_header()
    final_values = list(chunks(result,3)) # here 3 reflects (row_count-1)
    header = header[1:]        # seems like u wanna ignore Id 
    data_str = json.dumps(dict(zip(header, final_values)))
    data_json = json.loads(data_str)
    print data_str, data_json
    return data_str, data_json


if __name__ == "__main__":
    main()
希望它能帮助你,如果你能优化它,那就去做吧。我还将学习:)


谢谢

这里有一个更简单的解决方案,您需要pyexcel和pyexcel文本:

>>> import pyexcel as p
>>> sheet=p.get_sheet(file_name='test.csv')
>>> sheet
test.csv:
+---------+-----+-----+-------+
| id      | o1  | o2  | o3    |
+---------+-----+-----+-------+
| 'jess'  | 1.0 | 4   | 0.3   |
+---------+-----+-----+-------+
| 'jill'  | 0   | 5   | 0.123 |
+---------+-----+-----+-------+
| 'jamie' | 3   | 0.2 | 1.0   |
+---------+-----+-----+-------+
>>> sheet.transpose()
>>> sheet.name_columns_by_row(0)
>>> sheet.name_rows_by_column(0)
>>> sheet
test.csv:
+----+--------+--------+---------+
|    | 'jess' | 'jill' | 'jamie' |
+====+========+========+=========+
| o1 | 1.0    | 0      | 3       |
+----+--------+--------+---------+
| o2 | 4      | 5      | 0.2     |
+----+--------+--------+---------+
| o3 | 0.3    | 0.123  | 1.0     |
+----+--------+--------+---------+
>>> sheet.get_json(write_title=False) # pip install pyexcel-text
'{"o1": {"\'jamie\'": 3, "\'jess\'": 1.0, "\'jill\'": 0}, "o2": {"\'jamie\'": "0.2", "\'jess\'": 4, "\'jill\'": 5}, "o3": {"\'jamie\'": 1.0, "\'jess\'": "0.3", "\'jill\'": "0.123"}}'

这是一个更简单的解决方案,您需要pyexcel和pyexcel文本:

>>> import pyexcel as p
>>> sheet=p.get_sheet(file_name='test.csv')
>>> sheet
test.csv:
+---------+-----+-----+-------+
| id      | o1  | o2  | o3    |
+---------+-----+-----+-------+
| 'jess'  | 1.0 | 4   | 0.3   |
+---------+-----+-----+-------+
| 'jill'  | 0   | 5   | 0.123 |
+---------+-----+-----+-------+
| 'jamie' | 3   | 0.2 | 1.0   |
+---------+-----+-----+-------+
>>> sheet.transpose()
>>> sheet.name_columns_by_row(0)
>>> sheet.name_rows_by_column(0)
>>> sheet
test.csv:
+----+--------+--------+---------+
|    | 'jess' | 'jill' | 'jamie' |
+====+========+========+=========+
| o1 | 1.0    | 0      | 3       |
+----+--------+--------+---------+
| o2 | 4      | 5      | 0.2     |
+----+--------+--------+---------+
| o3 | 0.3    | 0.123  | 1.0     |
+----+--------+--------+---------+
>>> sheet.get_json(write_title=False) # pip install pyexcel-text
'{"o1": {"\'jamie\'": 3, "\'jess\'": 1.0, "\'jill\'": 0}, "o2": {"\'jamie\'": "0.2", "\'jess\'": 4, "\'jill\'": 5}, "o3": {"\'jamie\'": 1.0, "\'jess\'": "0.3", "\'jill\'": "0.123"}}'

对不起,我不应该指定熊猫。我试图不依赖额外的包,也不想花时间构建一个数据帧而放弃它。这是最简单的方法@萨尔,明白了。然而,对于您关于“构建数据帧以放弃它”的观点,创建数据帧需要一分钟的时间。不要为放弃你的data-baby感到难过。设计规范实际上说“没有熊猫”。)对不起,我不应该指定熊猫。我试图不依赖额外的包,也不想花时间构建一个数据帧而放弃它。这是最简单的方法@萨尔,明白了。然而,对于您关于“构建数据帧以放弃它”的观点,创建数据帧需要一分钟的时间。不要为放弃你的data-baby感到难过。设计规范实际上说“没有熊猫”。)希望能有帮助。@Bhargav抱歉,这不是答案。答案是每一行都记下,并在列名上键入一个dict。我试图获取每一列,并在行的第一个条目上键入dict。希望能有帮助。@Bhargav抱歉,这不是答案。答案是每一行都记下,并在列名上键入一个dict。我试图获取每一列,并在行的第一个条目上键入dict。