自动创建表并从json文件插入数据_Json_Postgresql_Plpgsql

自动创建表并从json文件插入数据

json postgresql

自动创建表并从json文件插入数据,json,postgresql,plpgsql,Json,Postgresql,Plpgsql,我有将近50个json文件。我想基于这些文件创建Postgres数据库。每个文件包含一个表的数据。文件不是很大（最多几千条记录）。来自customers.json的示例数据（实际上还有更多字段，我已经简化了）：我尝试编写一个函数，创建一个表并将所有数据插入其中。我的尝试基于使用EXECUTE的动态查询： CREATE OR REPLACE FUNCTION import_json(table_name text, data json) RETURNS VOID AS $$ DECLARE

我有将近50个json文件。我想基于这些文件创建Postgres数据库。每个文件包含一个表的数据。文件不是很大（最多几千条记录）。来自

customers.json

的示例数据（实际上还有更多字段，我已经简化了）：

我尝试编写一个函数，创建一个表并将所有数据插入其中。我的尝试基于使用EXECUTE的动态查询：

CREATE OR REPLACE FUNCTION import_json(table_name text, data json)
RETURNS VOID AS $$
DECLARE
    query text;
    colname text;
BEGIN
    query := 'CREATE TABLE ' || table_name || ' (';
    FOR colname IN SELECT json_object_keys(data->0)
    LOOP query := query || lower(colname) || ' text,';
    END LOOP;
    query := rtrim(query, ',') || ');';
    EXECUTE(query);
END $$ LANGUAGE plpgsql;

My函数使用预期的列名创建一个表，但所有列都是文本类型。问题是我不知道如何定义适当类型的列

json文件格式良好，包含整数、数字、日期、时间戳和文本值。我想要一张桌子：

CREATE TABLE customers (
  id integer, 
  fullname text, 
  address text, 
  turnover numeric, 
  date_of_registration date, 
  last_modified_at timestamp);

主要问题：如何识别生成表中的列类型

此外，是否有一种简单的方法将Pascal转换为下划线符号（“注册日期”->“注册日期”）？

您可以通过检查值来确定列的类型。下面的函数从一对（键、值）格式化列的定义。它使用。它还将列名转换为带下划线的符号（使用函数）。当然，如果值表示NULL，函数将无法正常工作，因此您必须检查第一个json记录是否包含所有非NULL值

create or replace function format_column(ckey text, cval text)
returns text language sql immutable as $$
    select format('%s %s',
        lower(regexp_replace(ckey, '(.)([A-Z])', '\1_\2', 'g')),
        case 
            when cval ~ '^[\+-]{0,1}\d+$' then 'integer'
            when cval ~ '^[\+-]{0,1}\d*\.\d+$' then 'numeric'
            when cval ~ '^"\d\d\d\d-\d\d-\d\d"$' then 'date'
            when cval ~ '^"\d\d\d\d-\d\d-\d\d \d\d:\d\d:\d\d"$' then 'timestamp'
            else 'text' 
        end
    )
$$;

select format_column(key, value)
from (
    values 
        ('Id', '55948'),
        ('FullName', '"Full name #1"'),
        ('Turnover', '120400.5'),
        ('DateOfRegistration', '"2014-02-13"')
    ) val(key, value);

       format_column       
---------------------------
 id integer
 full_name text
 turnover numeric
 date_of_registration date
(4 rows)

在main函数中，不需要变量或循环。使用函数格式化带有参数的字符串，并创建文本列表。由于需要键和值，请使用

json\u each（）

而不是

json\u object\u keys（）

。在第二个查询中，您可以使用它来确保将聚合的值列表划分为连续记录

create or replace function import_table(table_name text, jdata json)
returns void language plpgsql as $$
begin
    execute format('create table %s (%s)', table_name, string_agg(col, ', '))
    from (
        select format_column(key::text, value::text) col
        from json_each(jdata->0)
        ) sub;

    execute format('insert into %s values %s', table_name, string_agg(val, ','))
    from (
        with lines as (
            select row_number() over () rn, line
            from (
                select json_array_elements(jdata) line
                ) sub
            )
        select rn, format('(%s)', string_agg(value, ',')) val
        from (
            select rn, format('%L', trim(value::text, '"')) as value
            from lines, json_each(line)
            ) sub
        group by 1
        ) sub;
end $$;

测试：

您是反对只使用plpgsql还是外部语言可以。另外，您是否有表的架构，在该架构中定义了应该是什么列类型，或者您正试图从字段名推断出它，即字段名以“日期”开始，设置为日期，字段名是id，然后是整数？半相关提示：使用

格式和%I
而不是使用|在动态SQL中串联。就我个人而言，无论如何，我会用Python或其他东西来完成这项工作。如果你愿意使用Python，那么这是可以做到的，不会有太多麻烦。提示您的主要问题：要了解目标表的类型。，
create or replace function import_table(table_name text, jdata json)
returns void language plpgsql as $$
begin
    execute format('create table %s (%s)', table_name, string_agg(col, ', '))
    from (
        select format_column(key::text, value::text) col
        from json_each(jdata->0)
        ) sub;

    execute format('insert into %s values %s', table_name, string_agg(val, ','))
    from (
        with lines as (
            select row_number() over () rn, line
            from (
                select json_array_elements(jdata) line
                ) sub
            )
        select rn, format('(%s)', string_agg(value, ',')) val
        from (
            select rn, format('%L', trim(value::text, '"')) as value
            from lines, json_each(line)
            ) sub
        group by 1
        ) sub;
end $$; 

select import_table('customers', 
    '[{ "Id": 55948,
        "FullName": "Full name #1",
        "Address": "Address #1",
        "Turnover": 120400.5,
        "DateOfRegistration": "2014-02-13",
        "LastModifiedAt": "2015-11-03 12:04:44" },
    {   "Id": 55949,
        "FullName": "Full name %2",
        "Address": "Address #2",
        "Turnover": 120000.0,
        "DateOfRegistration": "2012-12-01",
        "LastModifiedAt": "2015-11-04 17:14:21" }]');

\d customers
                    Table "public.customers"
        Column        |            Type             | Modifiers 
----------------------+-----------------------------+-----------
 id                   | integer                     | 
 full_name            | text                        | 
 address              | text                        | 
 turnover             | numeric                     | 
 date_of_registration | date                        | 
 last_modified_at     | timestamp without time zone |

select * from customers;

  id   |  full_name   |  address   | turnover | date_of_registration |  last_modified_at   
-------+--------------+------------+----------+----------------------+---------------------
 55948 | Full name #1 | Address #1 | 120400.5 | 2014-02-13           | 2015-11-03 12:04:44
 55949 | Full name %2 | Address #2 | 120000.0 | 2012-12-01           | 2015-11-04 17:14:21
(2 rows)