Javascript 如何让NodeJS转换流为每个输入块发出多条记录?
我正在编写一个转换流,它接受传入的XML流,并将其中的一个子集作为JS对象发出Javascript 如何让NodeJS转换流为每个输入块发出多条记录?,javascript,node.js,stream,transform-stream,Javascript,Node.js,Stream,Transform Stream,我正在编写一个转换流,它接受传入的XML流,并将其中的一个子集作为JS对象发出 <item> <name>some name</name> <files> <file path='path/to/file'/> <file path='path/to//other/file'/> </files> </item> 及 然后,它将通过管道传输到CSV编写器中 代码看起来像
<item>
<name>some name</name>
<files>
<file path='path/to/file'/>
<file path='path/to//other/file'/>
</files>
</item>
及
然后,它将通过管道传输到CSV编写器中
代码看起来像
import fs from 'fs'
import expat from 'node-expat'
import { Transform } from 'stream'
const xParser = (filePath) => {
const stream = fs.createReadStream(filePath)
const parser = new expat.Parser('UTF-8')
const tranny = Transform({ objectMode: true })
tranny._transform = function transform(data, encoding, done) {
this.push(data)
done()
}
let current
let inItem = false
let tag
let attributes
const startTag = (name, attrs) => {
if (name === 'item') {
inItem = true
current = {
name: '',
files: []
}
return
}
if (!inItem) return
tag = name
attributes = attrs
if (tag === 'file') {
current.files.push(attributes.path)
}
}
const write = (file) => {
tranny.write({
name: current.name,
file
})
}
const endTag = (name) => {
if (name === 'item') {
// console.log('end item', JSON.stringify(current))
inItem = false
if (current.files.length === 0) {
write('')
} else {
current.files.forEach(file => {
write(file)
})
}
tag = undefined
attributes = undefined
}
}
const handleText = (text) => {
if (!inItem) return
if (tag === 'name') {
current.name = current.name.concat(text).trim()
}
}
const handleDone = () => {
console.log('done')
tranny.end()
}
parser
.on('startElement', startTag)
.on('endElement', endTag)
.on('text', handleText)
.on('end', handleDone)
stream.pipe(parser)
return tranny
}
module.exports = xParser
const endTag = (name) => {
if (name === 'item') {
inItem = false
tranny.write(current)
tag = undefined
attributes = undefined
}
}
当xml只是
<item>
<name>some name</name>
<files>
<file path='path/to/file'/>
</files>
</item>
某个名字
这可以正常工作,但当它点击一个带有多个字段的项时,它会停止,并且在第二次传输时,结束会立即触发
如何让转换流为每个输入块发出多条记录?好的,修复了它
这个解决方案不是执行多个tranny.write
s,而是在tranny.\u transform
函数中执行数据转换(这应该是显而易见的)
很像
tranny._transform = function transform(data, encoding, done) {
if (data.files.length > 0) {
data.files.forEach((file) => {
this.push({
name: data.name,
file
})
})
} else {
this.push({
name: data.name,
file: ''
})
}
done()
}
而endTag
处理程序现在看起来像
import fs from 'fs'
import expat from 'node-expat'
import { Transform } from 'stream'
const xParser = (filePath) => {
const stream = fs.createReadStream(filePath)
const parser = new expat.Parser('UTF-8')
const tranny = Transform({ objectMode: true })
tranny._transform = function transform(data, encoding, done) {
this.push(data)
done()
}
let current
let inItem = false
let tag
let attributes
const startTag = (name, attrs) => {
if (name === 'item') {
inItem = true
current = {
name: '',
files: []
}
return
}
if (!inItem) return
tag = name
attributes = attrs
if (tag === 'file') {
current.files.push(attributes.path)
}
}
const write = (file) => {
tranny.write({
name: current.name,
file
})
}
const endTag = (name) => {
if (name === 'item') {
// console.log('end item', JSON.stringify(current))
inItem = false
if (current.files.length === 0) {
write('')
} else {
current.files.forEach(file => {
write(file)
})
}
tag = undefined
attributes = undefined
}
}
const handleText = (text) => {
if (!inItem) return
if (tag === 'name') {
current.name = current.name.concat(text).trim()
}
}
const handleDone = () => {
console.log('done')
tranny.end()
}
parser
.on('startElement', startTag)
.on('endElement', endTag)
.on('text', handleText)
.on('end', handleDone)
stream.pipe(parser)
return tranny
}
module.exports = xParser
const endTag = (name) => {
if (name === 'item') {
inItem = false
tranny.write(current)
tag = undefined
attributes = undefined
}
}
这意味着我也可以删除write
函数
现在它工作了,当我这样做的时候,它得到了验证
import xParser from './xParser'
import concat from 'concat-stream'
xmlParser('data/mybigdatafile.xml')
.pipe(concat((data) => {
console.log(JSON.stringify(data));
}))
这为我提供了完整的数据集