Json XML解析-Erlang

Json XML解析-Erlang,json,xml,erlang,Json,Xml,Erlang,我想将XML字符串解析为erlang列表,然后再解析为JSON 输入示例: <?xml version="1.0" encoding="UTF-8"?> <!--some message here--> <start> <data> <number id="333">test message</number> <data>current date</data> &l

我想将XML字符串解析为erlang列表,然后再解析为JSON

输入示例:

<?xml version="1.0" encoding="UTF-8"?>
<!--some message here-->
<start>
   <data>
      <number id="333">test message</number>
      <data>current date</data>
   </data>
   <mass>
      <client>35</client>
      <address>lattitude</address>
      <code>3454343</code>
      <foo tipo="casa">Some text message 2</foo>
      <product>TEST</product>
   </mass>
</start>
输出应为:

{
  "start": {
    "data": {
      "number": {
        "@id": "333",
        "#text": "test message"
      },
      "data": "current date"
    },
    "mass": {
      "client": "35",
      "address": "lattitude",
      "code": "3454343",
      "foo": {
        "@tipo": "casa",
        "#text": "Some text message 2"
      },
      "product": "TEST"
    }
  }
}
我正在尝试使用erlsom:simple_form(Xml)

并获得:

{ok,{"start",[],
     [{"data",[],
       [{"number",[{"id","333"}],["test message"]},
        {"data",[],["current date"]}]},
      {"mass",[],
       [{"client",[],["35"]},
        {"address",[],["lattitude"]},
        {"code",[],["3454343"]},
        {"foo",[{"tipo","casa"}],["Some text message 2"]},
        {"product",[],["TEST"]}]}]},
    []}
现在我想删除这些空属性。有什么简单的方法可以做到这一点吗? 提前谢谢

更新:使用来自的解决方案使其工作

但是

{"start",
 [{"data",
   [{"number","test message"},{"data","current date"}]},
  {"mass",
   [{"client","35"},
    {"address","lattitude"},
    {"code","3454343"},
    {"foo","Some text message 2"},
    {"product","TEST"}]}]}

没有[{“id”,“333”}]和[{“tipo”,“casa”}]列表:(

简单解析的输出是一种设置格式:
{Node,Attributes,Children}
,因此您可以编写一个简单的解析器,将您拥有的结构转换为嵌套的proplist。这样,您可以使用该proplist或将其转换为JSON字符串

-module(transform).

-export([test/0]).

test() -> parse(data()).

parse({Node, [], [Value]}) when is_list(Value) ->
    [{Node, Value}];
parse({Node, [], Children}) ->
    V = children_to_struct(Children, []),
    [{Node, V}];
parse({Node, Attributes, Children}) ->
    V = attributes_to_struct(Attributes, []) ++ children_to_struct(Children, []),
    [{Node, V}].

children_to_struct([], Acc) -> Acc;
children_to_struct([Value], Acc) when is_list(Value) ->
    Acc ++ [{"#text", Value}];
children_to_struct([Value | T], Acc) when is_tuple(Value) ->
    children_to_struct(T, Acc ++ parse(Value)).

attributes_to_struct([], Acc) -> Acc;
attributes_to_struct([{K, V}|T], Acc) ->
    attributes_to_struct(T, Acc ++ [{"@" ++ K, V}]).

data() ->
    {"start",[],
     [{"data",[],
       [{"number",[{"id","333"}],["test message"]},
        {"data",[],["current date"]}]},
      {"mass",[],
       [{"client",[],["35"]},
        {"address",[],["lattitude"]},
        {"code",[],["3454343"]},
        {"foo",[{"tipo","casa"}],["Some text message 2"]},
        {"product",[],["TEST"]}]}]}.
使用mochijson在shell中运行它:

Eshell V7.3  (abort with ^G)
1> c(transform).
{ok,transform}
2> T = transform:test().
[{"start",
  [{"data",
    [{"number",[{"@id","333"},{"#text","test message"}]},
     {"data","current date"}]},
   {"mass",
    [{"client","35"},
     {"address","lattitude"},
     {"code","3454343"},
     {"foo",[{"@tipo","casa"},{"#text","Some text message 2"}]},
     {"product","TEST"}]}]}]
3> 
4> iolist_to_binary(mochijson2:encode(T)).
<<"{\"start\":{\"data\":{\"number\":{\"@id\":[51,51,51],\"#text\":[116,101,115,116,32,109,101,115,115,97,103,101]},\"data\":{\"#text"...>>
Eshell V7.3(使用^G中止)
1> c(变换)。
{好的,变换}
2> T=transform:test()。
[{“开始”,
[{“数据”,
[{“number”、[{“@id”、“333”}、{“#text”、“testmessage”}],
{“数据”,“当前日期”}]},
{“弥撒”,
[{“客户”,“35”},
{“地址”,“格子”},
{“代码”,“3454343”},
{“foo”,“tipo”,“casa”},{“text”,“Some text message 2”},
{“产品”、“测试”}]}]
3> 
4> iolist_到_二进制(mochijson2:encode(T))。

简单解析的输出是一种设置格式:
{Node,Attributes,Children}
,因此您可以编写一个简单的解析器,将您拥有的结构转换为嵌套的proplist。使用它,您可以使用该proplist或将其转换为JSON字符串

-module(transform).

-export([test/0]).

test() -> parse(data()).

parse({Node, [], [Value]}) when is_list(Value) ->
    [{Node, Value}];
parse({Node, [], Children}) ->
    V = children_to_struct(Children, []),
    [{Node, V}];
parse({Node, Attributes, Children}) ->
    V = attributes_to_struct(Attributes, []) ++ children_to_struct(Children, []),
    [{Node, V}].

children_to_struct([], Acc) -> Acc;
children_to_struct([Value], Acc) when is_list(Value) ->
    Acc ++ [{"#text", Value}];
children_to_struct([Value | T], Acc) when is_tuple(Value) ->
    children_to_struct(T, Acc ++ parse(Value)).

attributes_to_struct([], Acc) -> Acc;
attributes_to_struct([{K, V}|T], Acc) ->
    attributes_to_struct(T, Acc ++ [{"@" ++ K, V}]).

data() ->
    {"start",[],
     [{"data",[],
       [{"number",[{"id","333"}],["test message"]},
        {"data",[],["current date"]}]},
      {"mass",[],
       [{"client",[],["35"]},
        {"address",[],["lattitude"]},
        {"code",[],["3454343"]},
        {"foo",[{"tipo","casa"}],["Some text message 2"]},
        {"product",[],["TEST"]}]}]}.
使用mochijson在shell中运行它:

Eshell V7.3  (abort with ^G)
1> c(transform).
{ok,transform}
2> T = transform:test().
[{"start",
  [{"data",
    [{"number",[{"@id","333"},{"#text","test message"}]},
     {"data","current date"}]},
   {"mass",
    [{"client","35"},
     {"address","lattitude"},
     {"code","3454343"},
     {"foo",[{"@tipo","casa"},{"#text","Some text message 2"}]},
     {"product","TEST"}]}]}]
3> 
4> iolist_to_binary(mochijson2:encode(T)).
<<"{\"start\":{\"data\":{\"number\":{\"@id\":[51,51,51],\"#text\":[116,101,115,116,32,109,101,115,115,97,103,101]},\"data\":{\"#text"...>>
Eshell V7.3(使用^G中止)
1> c(变换)。
{好的,变换}
2> T=transform:test()。
[{“开始”,
[{“数据”,
[{“number”、[{“@id”、“333”}、{“#text”、“testmessage”}],
{“数据”,“当前日期”}]},
{“弥撒”,
[{“客户”,“35”},
{“地址”,“格子”},
{“代码”,“3454343”},
{“foo”,“tipo”,“casa”},{“text”,“Some text message 2”},
{“产品”、“测试”}]}]
3> 
4> iolist_到_二进制(mochijson2:encode(T))。
我建议使用JSONXMLjiffyexml具有本机代码,这意味着它们速度非常快

克隆并编译它们。
在编译它们之前,您应该安装g++libexpat-dev

例如:

-module(test).
-export([convert/1]).
-include("exml/include/exml.hrl"). %% In my test





convert(XML) when erlang:is_binary(XML) ->
    {ok, XMLEl} = exml:parse(XML),
    jiffy:encode({[convert2(XMLEl)]}).





convert2(#xmlel{name = Name
              ,attrs = []
              ,children = [{xmlcdata, Data}]}) ->
    {Name, Data};
convert2(#xmlel{name = Name
              ,attrs = Attrs
              ,children = Children}) ->
    {Name,  {convert_attrs(Attrs) ++ convert_children(Children)}}.





convert_attrs(Attrs) ->
    convert_attrs(Attrs,[]).

convert_attrs([Attr|Attrs1], Attrs2) ->
    convert_attrs(Attrs1, [convert_attr(Attr)|Attrs2]);
convert_attrs([], Attrs2) ->
    lists:reverse(Attrs2).





convert_attr({Attr, Value}) ->
    {<<$@, Attr/binary>>, Value}.





convert_children(Children) ->
    convert_children(Children, []).

convert_children([Child|Children1], Children2) ->
    convert_children(Children1, [convert_child(Child)|Children2]);
convert_children([], Children2) ->
    lists:reverse(Children2).





convert_child({xmlcdata, Data}) ->
    {<<"#text">>, Data};
convert_child(#xmlel{}=XMLEl) ->
    convert2(XMLEl).
我建议使用JSONXMLjiffyexml具有本机代码,这意味着它们速度非常快

克隆并编译它们。
在编译它们之前,您应该安装g++libexpat-dev

例如:

-module(test).
-export([convert/1]).
-include("exml/include/exml.hrl"). %% In my test





convert(XML) when erlang:is_binary(XML) ->
    {ok, XMLEl} = exml:parse(XML),
    jiffy:encode({[convert2(XMLEl)]}).





convert2(#xmlel{name = Name
              ,attrs = []
              ,children = [{xmlcdata, Data}]}) ->
    {Name, Data};
convert2(#xmlel{name = Name
              ,attrs = Attrs
              ,children = Children}) ->
    {Name,  {convert_attrs(Attrs) ++ convert_children(Children)}}.





convert_attrs(Attrs) ->
    convert_attrs(Attrs,[]).

convert_attrs([Attr|Attrs1], Attrs2) ->
    convert_attrs(Attrs1, [convert_attr(Attr)|Attrs2]);
convert_attrs([], Attrs2) ->
    lists:reverse(Attrs2).





convert_attr({Attr, Value}) ->
    {<<$@, Attr/binary>>, Value}.





convert_children(Children) ->
    convert_children(Children, []).

convert_children([Child|Children1], Children2) ->
    convert_children(Children1, [convert_child(Child)|Children2]);
convert_children([], Children2) ->
    lists:reverse(Children2).





convert_child({xmlcdata, Data}) ->
    {<<"#text">>, Data};
convert_child(#xmlel{}=XMLEl) ->
    convert2(XMLEl).

你检查过这个问题吗?这似乎接近你想要的。是的,检查过。不幸的是,这个解决方案不适用于meThen post(作为你问题的更新)当您尝试解决方案3517914时会发生什么情况。@marco.m完成。谢谢您是否尝试修改该解析器,或编写自己的解析器,以将元组列表转换为您想要的格式?您检查过这个问题吗?这似乎与您要找的很接近。是的,检查过。不幸的是,该解决方案不适用于meThen post(作为您问题的更新)当您尝试解决方案3517914时会发生什么情况。@marco.m完成。谢谢您是否尝试修改该解析器,或编写自己的解析器,以将元组列表转换为您想要的格式?非常感谢。工作非常完美。我将在学习erlang时仔细检查您的代码。非常感谢。工作非常完美。我将在学习erla时仔细检查您的代码ng。