Javascript合并标记对象数组,同时保持标记在文本中的位置

Javascript合并标记对象数组,同时保持标记在文本中的位置,javascript,Javascript,给我一个这样的句子 "Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua." 我已经构建了一个句子的json表示,并将其拆分为标记,对于每个标记,我根据以下方法设置第一个和最后一个字符的偏移量: { "tokens": [{ &qu

给我一个这样的句子

"Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua."
我已经构建了一个句子的json表示,并将其拆分为标记,对于每个标记,我根据以下方法设置第一个和最后一个字符的偏移量:

{
    "tokens": [{
            "word": "Lorem",
            "characterOffsetBegin": 0,
            "characterOffsetEnd": 4
        },
        {
            "word": "ipsum",
            "characterOffsetBegin": 6,
            "characterOffsetEnd": 10
        },
        {
            "word": "dolor",
            "characterOffsetBegin": 12,
            "characterOffsetEnd": 16
        },
        {
            "word": "sit",
            "characterOffsetBegin": 18,
            "characterOffsetEnd": 20
        },
        {
            "word": "amet",
            "characterOffsetBegin": 22,
            "characterOffsetEnd": 25
        },
        {
            "word": "consectetur",
            "characterOffsetBegin": 28,
            "characterOffsetEnd": 38
        },
        {
            "word": "adipiscing",
            "characterOffsetBegin": 40,
            "characterOffsetEnd": 49
        },
        {
            "word": "elit",
            "characterOffsetBegin": 51,
            "characterOffsetEnd": 54
        },
        {
            "word": "sed",
            "characterOffsetBegin": 57,
            "characterOffsetEnd": 59
        },
        {
            "word": "do",
            "characterOffsetBegin": 61,
            "characterOffsetEnd": 62
        },
        {
            "word": "eiusmod",
            "characterOffsetBegin": 64,
            "characterOffsetEnd": 70
        },
        {
            "word": "tempor",
            "characterOffsetBegin": 72,
            "characterOffsetEnd": 77
        },
        {
            "word": "incididunt",
            "characterOffsetBegin": 79,
            "characterOffsetEnd": 88
        },
        {
            "word": "ut",
            "characterOffsetBegin": 90,
            "characterOffsetEnd": 91
        },
        {
            "word": "labore",
            "characterOffsetBegin": 93,
            "characterOffsetEnd": 98
        },
        {
            "word": "et",
            "characterOffsetBegin": 100,
            "characterOffsetEnd": 101
        },
        {
            "word": "dolore",
            "characterOffsetBegin": 103,
            "characterOffsetEnd": 108
        },
        {
            "word": "magna",
            "characterOffsetBegin": 110,
            "characterOffsetEnd": 114
        },
        {
            "word": "aliqua",
            "characterOffsetBegin": 116,
            "characterOffsetEnd": 121
        }
    ]
}
现在,我为一个非常接近的句子(比如拼写错误)提供了另一个标记数组,它可能在文本中的相同位置有稍微不同的标记(比如由于拼写错误),因此相同的
characterOffsetBegin
characterOffsetEnd
,最终每个标记都有额外的属性:

{
    "tokens": [{
            "word": "'orem",
            "characterOffsetBegin": 0,
            "characterOffsetEnd": 4,
            "label": "0"
        },
        {
            "word": "ipsum",
            "characterOffsetBegin": 6,
            "characterOffsetEnd": 10,
            "label": "0"
        },
        {
            "word": "dolors",
            "characterOffsetBegin": 12,
            "characterOffsetEnd": 16,
            "label": "X"
        },
        {
            "word": "st",
            "characterOffsetBegin": 18,
            "characterOffsetEnd": 20,
            "label": "Z"
        },
        {
            "word": "amet",
            "characterOffsetBegin": 22,
            "characterOffsetEnd": 25,
            "label": "0"
        },
        {
            "word": "consectetur",
            "characterOffsetBegin": 28,
            "characterOffsetEnd": 38,
            "label": "0"
        },
        {
            "word": "adipiscing",
            "characterOffsetBegin": 40,
            "characterOffsetEnd": 49,
            "label": "0"
        },
        {
            "word": "elt",
            "characterOffsetBegin": 51,
            "characterOffsetEnd": 54,
            "label": "Y"
        },
        {
            "word": "sd",
            "characterOffsetBegin": 57,
            "characterOffsetEnd": 59
        },
        {
            "word": "do",
            "characterOffsetBegin": 61,
            "characterOffsetEnd": 62
        },
        {
            "word": "eiusmod",
            "characterOffsetBegin": 64,
            "characterOffsetEnd": 70,
            "label": "E"
        },
        {
            "word": "tempor",
            "characterOffsetBegin": 72,
            "characterOffsetEnd": 77
        },
        {
            "word": "inciddunt",
            "characterOffsetBegin": 79,
            "characterOffsetEnd": 88,
            "label": "K"
        },
        {
            "word": "ut",
            "characterOffsetBegin": 90,
            "characterOffsetEnd": 91,
            "label": "O"
        },
        {
            "word": "labore",
            "characterOffsetBegin": 93,
            "characterOffsetEnd": 98,
            "label": "O"
        },
        {
            "word": "et",
            "characterOffsetBegin": 100,
            "characterOffsetEnd": 101,
            "label": "O"
        },
        {
            "word": "dolore",
            "characterOffsetBegin": 103,
            "characterOffsetEnd": 108,
            "label": "O"
        },
        {
            "word": "magna",
            "characterOffsetBegin": 110,
            "characterOffsetEnd": 114,
            "label": "O"
        },
        {
            "word": "aliqua",
            "characterOffsetBegin": 116,
            "characterOffsetEnd": 121,
            "label": "K"
        }
    ]
}
我必须将前一个数组映射到后一个数组,以便给定的令牌继承给定令牌位置的标签。因此,假设看看
“characterOffsetBegin”:18
“characterOffsetEnd”:20
我们有:

{
  "word": "sit",
  "characterOffsetBegin": 18,
  "characterOffsetEnd": 20
}

因此,生成的令牌必须是

 {
          "word": "sit",
          "characterOffsetBegin": 18,
          "characterOffsetEnd": 20,
          "label": "Z"
 }

因为前者将继承后者的新属性。由于
word
属性可能会有所不同,因此我们只能依靠
characterOffsetBegin
characterOffsetEnd
属性来匹配某个位置的标记。

您能使用map和find获得标签吗

 const tokens =  tokens1.map(
        (token) => (
          {
            ...token,
            label: tokens2.find((x) =>x.characterOffsetBegin === token.characterOffsetBegin && x.characterOffsetEnd === token.characterOffsetEnd).label
          }
        ),
      );
      
运行下面的代码段

const-tokens1=[{
“单词”:“Lorem”,
“characterOffsetBegin”:0,
“characterOffsetEnd”:4
},
{
“单词”:“ipsum”,
“characterOffsetBegin”:6,
“characterOffsetEnd”:10
},
{
“单词”:“多洛”,
“characterOffsetBegin”:12,
“characterOffsetEnd”:16
},
{
“单词”:“坐下”,
“characterOffsetBegin”:18,
“characterOffsetEnd”:20
},
{
“单词”:“amet”,
“characterOffsetBegin”:22,
“characterOffsetEnd”:25
},
{
“单词”:“奉献者”,
“characterOffsetBegin”:28,
“characterOffsetEnd”:38
},
{
“单词”:“再见”,
“characterOffsetBegin”:40,
“characterOffsetEnd”:49
},
{
“单词”:“精英”,
“characterOffsetBegin”:51,
“characterOffsetEnd”:54
},
{
“单词”:“sed”,
“characterOffsetBegin”:57,
“characterOffsetEnd”:59
},
{
“字”:“做”,
“characterOffsetBegin”:61,
“characterOffsetEnd”:62
},
{
“单词”:“eiusmod”,
“characterOffsetBegin”:64,
“characterOffsetEnd”:70
},
{
“单词”:“临时”,
“characterOffsetBegin”:72,
“characterOffsetEnd”:77
},
{
“单词”:“incidedunt”,
“characterOffsetBegin”:79,
“characterOffsetEnd”:88
},
{
“单词”:“ut”,
“characterOffsetBegin”:90,
“characterOffsetEnd”:91
},
{
“词”:“劳动”,
“characterOffsetBegin”:93,
“characterOffsetEnd”:98
},
{
“单词”:“et”,
“characterOffsetBegin”:100,
“characterOffsetEnd”:101
},
{
“单词”:“多洛尔”,
“characterOffsetBegin”:103,
“characterOffsetEnd”:108
},
{
“单词”:“麦格纳”,
“characterOffsetBegin”:110,
“characterOffsetEnd”:114
},
{
“单词”:“阿里卡”,
“characterOffsetBegin”:116,
“characterOffsetEnd”:121
}
]
常量标记s2=[{
“单词”:“orem”,
“characterOffsetBegin”:0,
“characterOffsetEnd”:4,
“标签”:“0”
},
{
“单词”:“ipsum”,
“characterOffsetBegin”:6,
“characterOffsetEnd”:10,
“标签”:“0”
},
{
“单词”:“多洛斯”,
“characterOffsetBegin”:12,
“characterOffsetEnd”:16,
“标签”:“X”
},
{
“单词”:“st”,
“characterOffsetBegin”:18,
“characterOffsetEnd”:20,
“标签”:“Z”
},
{
“单词”:“amet”,
“characterOffsetBegin”:22,
“characterOffsetEnd”:25,
“标签”:“0”
},
{
“单词”:“奉献者”,
“characterOffsetBegin”:28,
“characterOffsetEnd”:38,
“标签”:“0”
},
{
“单词”:“再见”,
“characterOffsetBegin”:40,
“characterOffsetEnd”:49,
“标签”:“0”
},
{
“单词”:“elt”,
“characterOffsetBegin”:51,
“characterOffsetEnd”:54,
“标签”:“Y”
},
{
“单词”:“sd”,
“characterOffsetBegin”:57,
“characterOffsetEnd”:59
},
{
“字”:“做”,
“characterOffsetBegin”:61,
“characterOffsetEnd”:62
},
{
“单词”:“eiusmod”,
“characterOffsetBegin”:64,
“characterOffsetEnd”:70,
“标签”:“E”
},
{
“单词”:“临时”,
“characterOffsetBegin”:72,
“characterOffsetEnd”:77
},
{
“单词”:“inciddunt”,
“characterOffsetBegin”:79,
“characterOffsetEnd”:88,
“标签”:“K”
},
{
“单词”:“ut”,
“characterOffsetBegin”:90,
“characterOffsetEnd”:91,
“标签”:“O”
},
{
“词”:“劳动”,
“characterOffsetBegin”:93,
“characterOffsetEnd”:98,
“标签”:“O”
},
{
“单词”:“et”,
“characterOffsetBegin
 const tokens =  tokens1.map(
        (token) => (
          {
            ...token,
            label: tokens2.find((x) =>x.characterOffsetBegin === token.characterOffsetBegin && x.characterOffsetEnd === token.characterOffsetEnd).label
          }
        ),
      );
      
const merge = tokens.map(token => {
  const match = extras.find(x =>
    x["characterOffsetBegin"] === token["characterOffsetBegin"] &&
    x["characterOffsetEnd"] === token["characterOffsetEnd"]
  );
  
  return match ? Object.assign(match, token) : token;
});