Javascript 如何让NodeJS转换流为每个输入块发出多条记录?

Javascript 如何让NodeJS转换流为每个输入块发出多条记录?,javascript,node.js,stream,transform-stream,Javascript,Node.js,Stream,Transform Stream,我正在编写一个转换流,它接受传入的XML流,并将其中的一个子集作为JS对象发出 <item> <name>some name</name> <files> <file path='path/to/file'/> <file path='path/to//other/file'/> </files> </item> 及 然后,它将通过管道传输到CSV编写器中 代码看起来像

我正在编写一个转换流,它接受传入的XML流,并将其中的一个子集作为JS对象发出

<item>
  <name>some name</name>
  <files>
    <file path='path/to/file'/>
    <file path='path/to//other/file'/>
  </files>
</item>

然后,它将通过管道传输到CSV编写器中

代码看起来像

import fs from 'fs'
import expat from 'node-expat'
import { Transform } from 'stream'

const xParser = (filePath) => {
  const stream = fs.createReadStream(filePath)
  const parser = new expat.Parser('UTF-8')
  const tranny = Transform({ objectMode: true })

  tranny._transform = function transform(data, encoding, done) {
    this.push(data)
    done()
  }

  let current
  let inItem = false
  let tag
  let attributes

  const startTag = (name, attrs) => {
    if (name === 'item') {
      inItem = true
      current = {
        name: '',
        files: []
      }
      return
    }
    if (!inItem) return
    tag = name
    attributes = attrs
    if (tag === 'file') {
      current.files.push(attributes.path)
    }
  }

  const write = (file) => {
    tranny.write({
      name: current.name,
      file
    })
  }

  const endTag = (name) => {
    if (name === 'item') {
      // console.log('end item', JSON.stringify(current))
      inItem = false
      if (current.files.length === 0) {
        write('')
      } else {
        current.files.forEach(file => {
          write(file)
        })
      }
      tag = undefined
      attributes = undefined
    }
  }

  const handleText = (text) => {
    if (!inItem) return
    if (tag === 'name') {
      current.name = current.name.concat(text).trim()
    }
  }

  const handleDone = () => {
    console.log('done')
    tranny.end()
  }

  parser
  .on('startElement', startTag)
  .on('endElement', endTag)
  .on('text', handleText)
  .on('end', handleDone)

  stream.pipe(parser)
  return tranny
}

module.exports = xParser
const endTag = (name) => {
  if (name === 'item') {
    inItem = false
    tranny.write(current)
    tag = undefined
    attributes = undefined
  }
}
当xml只是

<item>
  <name>some name</name>
  <files>
    <file path='path/to/file'/>
  </files>
</item>

某个名字
这可以正常工作,但当它点击一个带有多个字段的
项时,它会停止,并且在第二次
传输时,
结束
会立即触发

如何让转换流为每个输入块发出多条记录?

好的,修复了它

这个解决方案不是执行多个
tranny.write
s,而是在
tranny.\u transform
函数中执行数据转换(这应该是显而易见的)

很像

tranny._transform = function transform(data, encoding, done) {
  if (data.files.length > 0) {
    data.files.forEach((file) => {
      this.push({
        name: data.name,
        file
      })
    })
  } else {
    this.push({
      name: data.name,
      file: ''
    })
  }
  done()
}
endTag
处理程序现在看起来像

import fs from 'fs'
import expat from 'node-expat'
import { Transform } from 'stream'

const xParser = (filePath) => {
  const stream = fs.createReadStream(filePath)
  const parser = new expat.Parser('UTF-8')
  const tranny = Transform({ objectMode: true })

  tranny._transform = function transform(data, encoding, done) {
    this.push(data)
    done()
  }

  let current
  let inItem = false
  let tag
  let attributes

  const startTag = (name, attrs) => {
    if (name === 'item') {
      inItem = true
      current = {
        name: '',
        files: []
      }
      return
    }
    if (!inItem) return
    tag = name
    attributes = attrs
    if (tag === 'file') {
      current.files.push(attributes.path)
    }
  }

  const write = (file) => {
    tranny.write({
      name: current.name,
      file
    })
  }

  const endTag = (name) => {
    if (name === 'item') {
      // console.log('end item', JSON.stringify(current))
      inItem = false
      if (current.files.length === 0) {
        write('')
      } else {
        current.files.forEach(file => {
          write(file)
        })
      }
      tag = undefined
      attributes = undefined
    }
  }

  const handleText = (text) => {
    if (!inItem) return
    if (tag === 'name') {
      current.name = current.name.concat(text).trim()
    }
  }

  const handleDone = () => {
    console.log('done')
    tranny.end()
  }

  parser
  .on('startElement', startTag)
  .on('endElement', endTag)
  .on('text', handleText)
  .on('end', handleDone)

  stream.pipe(parser)
  return tranny
}

module.exports = xParser
const endTag = (name) => {
  if (name === 'item') {
    inItem = false
    tranny.write(current)
    tag = undefined
    attributes = undefined
  }
}
这意味着我也可以删除
write
函数

现在它工作了,当我这样做的时候,它得到了验证

import xParser from './xParser'
import concat from 'concat-stream'

xmlParser('data/mybigdatafile.xml')
.pipe(concat((data) => {
  console.log(JSON.stringify(data));
}))
这为我提供了完整的数据集