Javascript 为tensorflow中的张量层解包位时性能较慢
我正在处理通过与星际争霸2客户端的WebSocket连接获得的数据,以从正在进行的游戏中获取图像数据。在某些情况下,可以将图像数据设置为每像素1位的格式。发生这种情况时,我需要“解包”响应中每个字节的位(1字节=>8位)。这是在下面的代码中完成的:Javascript 为tensorflow中的张量层解包位时性能较慢,javascript,bitmap,bit-manipulation,tensorflow.js,Javascript,Bitmap,Bit Manipulation,Tensorflow.js,我正在处理通过与星际争霸2客户端的WebSocket连接获得的数据,以从正在进行的游戏中获取图像数据。在某些情况下,可以将图像数据设置为每像素1位的格式。发生这种情况时,我需要“解包”响应中每个字节的位(1字节=>8位)。这是在下面的代码中完成的: function unpackbits(uint8data) { const results = new Uint8Array(8 * uint8data.length) let byte let offset for (let i
function unpackbits(uint8data) {
const results = new Uint8Array(8 * uint8data.length)
let byte
let offset
for (let i = 0; i < uint8data.length; i++) {
byte = uint8data[i]
offset = (8 * i)
results[offset + 7] = ((byte & (1 << 0)) >> 0)
results[offset + 6] = ((byte & (1 << 1)) >> 1)
results[offset + 5] = ((byte & (1 << 2)) >> 2)
results[offset + 4] = ((byte & (1 << 3)) >> 3)
results[offset + 3] = ((byte & (1 << 4)) >> 4)
results[offset + 2] = ((byte & (1 << 5)) >> 5)
results[offset + 1] = ((byte & (1 << 6)) >> 6)
results[offset + 0] = ((byte & (1 << 7)) >> 7)
}
return results
}
在我的一个测试中,这段代码运行1900次,执行时间为0.0737秒
这很慢
相比之下,python中的等效功能需要0.0209秒才能运行1900次。python代码如下所示:
def unpack_layer(plane):
"""Return a correctly shaped numpy array given the feature layer bytes."""
size = point.Point.build(plane.size) # {x, y }
data = np.frombuffer(plane.data, dtype=Feature.dtypes[plane.bits_per_pixel])
if plane.bits_per_pixel == 1:
data = np.unpackbits(data)
if data.shape[0] != size.x * size.y:
# This could happen if the correct length isn't a multiple of 8, leading
# to some padding bits at the end of the string which are incorrectly
# interpreted as data.
data = data[:size.x * size.y]
return data.reshape(size.y, size.x)
简而言之,javascript版本的时间大约是python版本的4倍
我将查看numpy unpackbits文档,因为它似乎比我自己的方法更有效-
然而,我想知道是否有人对我如何更好地优化自己的unpackbits函数或更好地让TensorFlow为我做这件事有任何想法?看起来TensorFlow.js没有按位AND函数,所以怀疑在TensorFlow.js中做这项工作需要一些编码练习 不过,一个建议是创建一个大小为8的256个Uint8Array数组,并用8字节翻译的完整列表预先填充它。这大大减少了可能重复值在0-255范围内的字节流的重复计算。例如,预计算数组中的第一个条目表示字节0的解包,因此是一个大小为8并填充了0的Uint8Array,下一个条目是另一个大小为8并填充了00000001的Uint8Array,以此类推,一直到表示字节255并填充了所有1的Uint8Array,大小为8 然后,在解包时,只需使用类型化数组方法将预计算的解包表示复制到
结果Uint8Array
希望这有帮助
EDIT创建了许多解包算法的变体,以测试内联计算与内存查找的性能,并对使用Chrome的结果感到惊讶。V8编译器的一些优化是非直观的
版本的差异
- unpackbits[FAST]:来自原始问题,这是比较其他变体的条
- 解包比特1[快速]:由…修改。。。
- 在每个整数后指定“| 0”
- 使用增量一元运算(“+”),而不是向
结果
数组的偏移量
索引添加增量
- 用实际值替换位掩码的计算。(即,看起来tensorflow.js没有按位AND函数,而不是
1,因此怀疑在tensorflow.js中进行工作需要一些编码技巧
不过,有一个建议是创建一个大小为8的256个Uint8Array数组,并用8字节翻译的完整列表预先填充它。这大大减少了可能重复值在0-255范围内的字节流的重复计算。例如,预计算数组中的第一个条目表示字节0,因此是一个大小为8且填充了0的Uint8Array,下一个条目是另一个大小为8且填充了00000001的Uint8Array,以此类推,一直到表示字节255的条目是一个大小为8且填充了所有1的Uint8Array
然后,在解包时,只需使用类型化数组方法将预计算的解包表示复制到结果Uint8Array
希望这有帮助
EDIT创建了许多解包算法的变体,以测试内联计算与内存查找的性能,并对使用Chrome的结果感到惊讶。V8编译器的一些优化不直观
版本的差异
- unpackbits[FAST]:来自原始问题,这是比较其他变体的条
- 解包比特1[快速]:由…修改。。。
- 在每个整数后指定“| 0”
- 使用增量一元运算(“+”),而不是向
结果
数组的偏移量
索引添加增量
- 用实际值替换位掩码的计算。(即,此响应不是
1,而是@Jon Trent答案下注释链的延续
编辑:包括整形部分的TensorFlow比较
我正在分析两种解包bits方法的性能:解包BITS1A和解包bits(原始)。我还分析了将数据重新格式化为NxM网格的不同方法,其中N可能与m相同。我得到的结果如下:
function unpackbits1a(uint8data) {
const results = new Uint8Array(8 * uint8data.length)
let byte;
let offset;
for (let i = 0|0, n = uint8data.length; i < n; i++) {
byte = uint8data[i]
offset = ((8|0) * i); // The "|0" on this line cut's the time almost in half!
results[offset++] = (byte & ((1|0) << (7|0)))>>7|0;
results[offset++] = (byte & ((1|0) << (6|0)))>>6|0;
results[offset++] = (byte & ((1|0) << (5|0)))>>5|0;
results[offset++] = (byte & ((1|0) << (4|0)))>>4|0;
results[offset++] = (byte & ((1|0) << (3|0)))>>3|0;
results[offset++] = (byte & ((1|0) << (2|0)))>>2|0;
results[offset++] = (byte & ((1|0) << (1|0)))>>1|0;
results[offset++] = (byte & (1|0));
}
return results
}
function unpackbits(uint8data) {
const results = new Uint8Array(8 * uint8data.length)
let byte
let offset
for (let i = 0; i < uint8data.length; i++) {
byte = uint8data[i]
offset = 8 * i
results[offset + 7] = ((byte & (1 << 0)) >> 0)
results[offset + 6] = ((byte & (1 << 1)) >> 1)
results[offset + 5] = ((byte & (1 << 2)) >> 2)
results[offset + 4] = ((byte & (1 << 3)) >> 3)
results[offset + 3] = ((byte & (1 << 4)) >> 4)
results[offset + 2] = ((byte & (1 << 5)) >> 5)
results[offset + 1] = ((byte & (1 << 6)) >> 6)
results[offset + 0] = ((byte & (1 << 7)) >> 7)
}
return results
}
function unpackbitsToShape1(uint8data, shape = [1, 1]) {
var data = unpackbits(uint8data)
const dims = [shape[0] | 0, shape[1] | 0]
const result = new Array(dims[0])
let temp
const width = 0 | dims[1]
for (let i = 0 | 0; i < dims[0]; i++) {
temp = new Array(dims[1])
for (let j = 0| 0; j < dims[1]; j++) {
temp[j] = data[uint8data[i * width + j]]
}
result[i] = temp
}
return result
}
function unpackbitsToShape2(uint8data, shape = [1, 1]) {
var data = unpackbits(uint8data)
const dims = [shape[0] | 0, shape[1] | 0]
const result = new Array(dims[0])
const width = dims[1]
let offset
for (let i = 0 | 0; i < dims[0]; i++) {
offset = (width * i)
result[i] = data.slice(offset, offset + width)
}
return result
}
function unpackbitsToShape3(uint8data, shape = [1, 1]) {
const dims = [0 | shape[0], 0 | shape[1]]
const result = new Array(dims[0])
let position = 0 | 0
const smallCount = 0 | (uint8data.length % dims[0])
const bigCount = 0 | (uint8data.length - smallCount)
const bigByteChunk = 0 | (bigCount / dims[0])
const bigBitWidth = 0 | 8 * bigByteChunk
const smallByteChunk = 0 | (smallCount / dims[0])
const smallBitWidth = 0 | 8 * smallByteChunk
if (smallCount) {
let big
let small
let odd
let temp
for (let i = 0 | 0; i < dims[0]; i++) {
temp = new Uint8Array(dims[1])
odd = i % 2
big = unpackbits(uint8data.subarray(position, position + bigByteChunk))
position += bigByteChunk
if (odd) {
temp.set(small.subarray(smallBitWidth, 8), 0)
temp.set(big, smallBitWidth)
result[i] = temp
} else {
temp.set(big, 0)
small = unpackbits(uint8data.subarray(position, position + 1))
position++
temp.set(small.subarray(0, smallBitWidth), bigBitWidth)
result[i] = temp
}
}
return result
}
for (let i = 0 | 0; i < dims[0]; i++) {
// console.log('unpacking: ', uint8data.subarray(position, position + bigByteChunk))
result[i] = unpackbits(uint8data.subarray(position, position + bigByteChunk))
position += bigByteChunk
}
return result
}
var tf = require('@tensorflow/tfjs')
tf = require('@tensorflow/tfjs-node')
function unpackBitsToShapeTensorflow(uint8data, shape) {
return tf.tensor(unpackbits(uint8data), shape, 'int32')
}
var test64by64 = new Uint8Array(512)
for (let i = 0; i < test64by64.length; i++) {
test64by64[ i ] = Math.floor(256 * Math.random());
}
var test84by84 = new Uint8Array(882)
for (let i = 0; i < test84by84.length; i++) {
test84by84[ i ] = Math.floor(256 * Math.random());
}
var test100by100 = new Uint8Array(1250)
for (let i = 0; i < test100by100.length; i++) {
test100by100[ i ] = Math.floor(256 * Math.random());
}
function assert(condition, errMsg) {
if (!condition) {
console.error(errMsg)
}
}
console.log('********* 64 x 64 *********\n\n')
console.log('Starting unpackbits1a.');
console.time('u1a');
var foo = unpackbits1a(test64by64);
console.timeEnd('u1a');
console.log('Finished unpackbits1a.');
console.log('Starting "unpackbits"');
console.time('u-orig');
foo = unpackbits(test64by64);
console.timeEnd('u-orig');
console.log('Finished unpackbits.');
console.log('Starting "unpackbitsToShape1"');
console.time('u1');
foo = unpackbitsToShape1(test64by64, [64, 64])
console.timeEnd('u1');
assert(
foo.length === 64 && foo[0].length === 64,
'foo.length === 64 && foo[0].length === 64'
)
console.log('Finished unpackbitsToShape1.');
console.log('Starting "unpackbitsToShape2"');
console.time('u2');
foo = unpackbitsToShape2(test64by64, [64, 64])
console.timeEnd('u2');
assert(
foo.length === 64 && foo[0].length === 64,
'foo.length === 64 && foo[0].length === 64'
)
console.log('Finished unpackbitsToShape2.');
console.log('Starting "unpackbitsToShape3"');
console.time('u3');
foo = unpackbitsToShape3(test64by64, [64, 64])
console.timeEnd('u3');
assert(
foo.length === 64 && foo[0].length === 64,
'foo.length === 64 && foo[0].length === 64'
)
console.log('Finished unpackbitsToShape3.');
console.log('\nStarting "unpackBitsToShapeTensorflow"')
console.time('u-tensor')
foo = unpackBitsToShapeTensorflow(test64by64, [64, 64])
console.timeEnd('u-tensor')
console.log('Finished unpackBitsToShapeTensorflow.');
console.log('\n\n********* 84 x 84 *********\n\n')
console.log('Starting unpackbits1a.');
console.time('u1a');
foo = unpackbits1a(test84by84);
console.timeEnd('u1a');
console.log('Finished unpackbits1a.');
console.log('Starting "unpackbits"');
console.time('u-orig');
foo = unpackbits(test84by84);
console.timeEnd('u-orig');
console.log('Finished unpackbits.');
console.log('Starting "unpackbitsToShape1"');
console.time('u1');
foo = unpackbitsToShape1(test84by84, [84, 84])
console.timeEnd('u1');
assert(
foo.length === 84 && foo[0].length === 84,
'foo.length === 84 && foo[0].length === 84'
)
console.log('Finished unpackbitsToShape1.');
console.log('Starting "unpackbitsToShape2"');
console.time('u2');
foo = unpackbitsToShape2(test84by84, [84, 84])
console.timeEnd('u2');
assert(
foo.length === 84 && foo[0].length === 84,
'foo.length === 84 && foo[0].length === 84'
)
console.log('Finished unpackbitsToShape2.');
console.log('Starting "unpackbitsToShape3"');
console.time('u3');
foo = unpackbitsToShape3(test84by84, [84, 84])
console.timeEnd('u3');
assert(
foo.length === 84 && foo[0].length === 84,
'foo.length === 84 && foo[0].length === 84'
)
console.log('Finished unpackbitsToShape3.');
console.log('\nStarting "unpackBitsToShapeTensorflow"')
console.time('u-tensor')
foo = unpackBitsToShapeTensorflow(test84by84, [84, 84])
console.timeEnd('u-tensor')
console.log('Finished unpackBitsToShapeTensorflow.');
console.log('\n\n********* 100 x 100 *********\n\n')
console.log('Starting unpackbits1a.');
console.time('u1a');
foo = unpackbits1a(test100by100);
console.timeEnd('u1a');
console.log('Finished unpackbits1a.');
console.log('Starting "unpackbits"');
console.time('u-orig');
foo = unpackbits(test100by100);
console.timeEnd('u-orig');
console.log('Finished unpackbits.');
console.log('Starting "unpackbitsToShape1"');
console.time('u1');
foo = unpackbitsToShape1(test100by100, [100, 100])
console.timeEnd('u1');
assert(
foo.length === 100 && foo[0].length === 100,
'foo.length === 100 && foo[0].length === 100'
)
console.log('Finished unpackbitsToShape1.');
console.log('Starting "unpackbitsToShape2"');
console.time('u2');
foo = unpackbitsToShape2(test100by100, [100, 100])
console.timeEnd('u2');
assert(
foo.length === 100 && foo[0].length === 100,
'foo.length === 100 && foo[0].length === 100'
)
console.log('Finished unpackbitsToShape2.');
console.log('Starting "unpackbitsToShape3"');
console.time('u3');
foo = unpackbitsToShape3(test100by100, [100, 100])
console.timeEnd('u3');
assert(
foo.length === 100 && foo[0].length === 100,
'foo.length === 100 && foo[0].length === 100'
)
console.log('Finished unpackbitsToShape3.');
console.log('\nStarting "unpackBitsToShapeTensorflow"')
console.time('u-tensor')
foo = unpackBitsToShapeTensorflow(test100by100, [100, 100])
console.timeEnd('u-tensor')
console.log('Finished unpackBitsToShapeTensorflow.');
此回复是@Jon Trent回复下评论链的延续
编辑:包括整形部分的TensorFlow比较
我正在分析两种解包bits方法的性能:解包BITS1A和解包bits(原始)。我还分析了将数据重新格式化为NxM网格的不同方法,其中N可能与m相同。我得到的结果如下:
function unpackbits1a(uint8data) {
const results = new Uint8Array(8 * uint8data.length)
let byte;
let offset;
for (let i = 0|0, n = uint8data.length; i < n; i++) {
byte = uint8data[i]
offset = ((8|0) * i); // The "|0" on this line cut's the time almost in half!
results[offset++] = (byte & ((1|0) << (7|0)))>>7|0;
results[offset++] = (byte & ((1|0) << (6|0)))>>6|0;
results[offset++] = (byte & ((1|0) << (5|0)))>>5|0;
results[offset++] = (byte & ((1|0) << (4|0)))>>4|0;
results[offset++] = (byte & ((1|0) << (3|0)))>>3|0;
results[offset++] = (byte & ((1|0) << (2|0)))>>2|0;
results[offset++] = (byte & ((1|0) << (1|0)))>>1|0;
results[offset++] = (byte & (1|0));
}
return results
}
function unpackbits(uint8data) {
const results = new Uint8Array(8 * uint8data.length)
let byte
let offset
for (let i = 0; i < uint8data.length; i++) {
byte = uint8data[i]
offset = 8 * i
results[offset + 7] = ((byte & (1 << 0)) >> 0)
results[offset + 6] = ((byte & (1 << 1)) >> 1)
results[offset + 5] = ((byte & (1 << 2)) >> 2)
results[offset + 4] = ((byte & (1 << 3)) >> 3)
results[offset + 3] = ((byte & (1 << 4)) >> 4)
results[offset + 2] = ((byte & (1 << 5)) >> 5)
results[offset + 1] = ((byte & (1 << 6)) >> 6)
results[offset + 0] = ((byte & (1 << 7)) >> 7)
}
return results
}
function unpackbitsToShape1(uint8data, shape = [1, 1]) {
var data = unpackbits(uint8data)
const dims = [shape[0] | 0, shape[1] | 0]
const result = new Array(dims[0])
let temp
const width = 0 | dims[1]
for (let i = 0 | 0; i < dims[0]; i++) {
temp = new Array(dims[1])
for (let j = 0| 0; j < dims[1]; j++) {
temp[j] = data[uint8data[i * width + j]]
}
result[i] = temp
}
return result
}
function unpackbitsToShape2(uint8data, shape = [1, 1]) {
var data = unpackbits(uint8data)
const dims = [shape[0] | 0, shape[1] | 0]
const result = new Array(dims[0])
const width = dims[1]
let offset
for (let i = 0 | 0; i < dims[0]; i++) {
offset = (width * i)
result[i] = data.slice(offset, offset + width)
}
return result
}
function unpackbitsToShape3(uint8data, shape = [1, 1]) {
const dims = [0 | shape[0], 0 | shape[1]]
const result = new Array(dims[0])
let position = 0 | 0
const smallCount = 0 | (uint8data.length % dims[0])
const bigCount = 0 | (uint8data.length - smallCount)
const bigByteChunk = 0 | (bigCount / dims[0])
const bigBitWidth = 0 | 8 * bigByteChunk
const smallByteChunk = 0 | (smallCount / dims[0])
const smallBitWidth = 0 | 8 * smallByteChunk
if (smallCount) {
let big
let small
let odd
let temp
for (let i = 0 | 0; i < dims[0]; i++) {
temp = new Uint8Array(dims[1])
odd = i % 2
big = unpackbits(uint8data.subarray(position, position + bigByteChunk))
position += bigByteChunk
if (odd) {
temp.set(small.subarray(smallBitWidth, 8), 0)
temp.set(big, smallBitWidth)
result[i] = temp
} else {
temp.set(big, 0)
small = unpackbits(uint8data.subarray(position, position + 1))
position++
temp.set(small.subarray(0, smallBitWidth), bigBitWidth)
result[i] = temp
}
}
return result
}
for (let i = 0 | 0; i < dims[0]; i++) {
// console.log('unpacking: ', uint8data.subarray(position, position + bigByteChunk))
result[i] = unpackbits(uint8data.subarray(position, position + bigByteChunk))
position += bigByteChunk
}
return result
}
var tf = require('@tensorflow/tfjs')
tf = require('@tensorflow/tfjs-node')
function unpackBitsToShapeTensorflow(uint8data, shape) {
return tf.tensor(unpackbits(uint8data), shape, 'int32')
}
var test64by64 = new Uint8Array(512)
for (let i = 0; i < test64by64.length; i++) {
test64by64[ i ] = Math.floor(256 * Math.random());
}
var test84by84 = new Uint8Array(882)
for (let i = 0; i < test84by84.length; i++) {
test84by84[ i ] = Math.floor(256 * Math.random());
}
var test100by100 = new Uint8Array(1250)
for (let i = 0; i < test100by100.length; i++) {
test100by100[ i ] = Math.floor(256 * Math.random());
}
function assert(condition, errMsg) {
if (!condition) {
console.error(errMsg)
}
}
console.log('********* 64 x 64 *********\n\n')
console.log('Starting unpackbits1a.');
console.time('u1a');
var foo = unpackbits1a(test64by64);
console.timeEnd('u1a');
console.log('Finished unpackbits1a.');
console.log('Starting "unpackbits"');
console.time('u-orig');
foo = unpackbits(test64by64);
console.timeEnd('u-orig');
console.log('Finished unpackbits.');
console.log('Starting "unpackbitsToShape1"');
console.time('u1');
foo = unpackbitsToShape1(test64by64, [64, 64])
console.timeEnd('u1');
assert(
foo.length === 64 && foo[0].length === 64,
'foo.length === 64 && foo[0].length === 64'
)
console.log('Finished unpackbitsToShape1.');
console.log('Starting "unpackbitsToShape2"');
console.time('u2');
foo = unpackbitsToShape2(test64by64, [64, 64])
console.timeEnd('u2');
assert(
foo.length === 64 && foo[0].length === 64,
'foo.length === 64 && foo[0].length === 64'
)
console.log('Finished unpackbitsToShape2.');
console.log('Starting "unpackbitsToShape3"');
console.time('u3');
foo = unpackbitsToShape3(test64by64, [64, 64])
console.timeEnd('u3');
assert(
foo.length === 64 && foo[0].length === 64,
'foo.length === 64 && foo[0].length === 64'
)
console.log('Finished unpackbitsToShape3.');
console.log('\nStarting "unpackBitsToShapeTensorflow"')
console.time('u-tensor')
foo = unpackBitsToShapeTensorflow(test64by64, [64, 64])
console.timeEnd('u-tensor')
console.log('Finished unpackBitsToShapeTensorflow.');
console.log('\n\n********* 84 x 84 *********\n\n')
console.log('Starting unpackbits1a.');
console.time('u1a');
foo = unpackbits1a(test84by84);
console.timeEnd('u1a');
console.log('Finished unpackbits1a.');
console.log('Starting "unpackbits"');
console.time('u-orig');
foo = unpackbits(test84by84);
console.timeEnd('u-orig');
console.log('Finished unpackbits.');
console.log('Starting "unpackbitsToShape1"');
console.time('u1');
foo = unpackbitsToShape1(test84by84, [84, 84])
console.timeEnd('u1');
assert(
foo.length === 84 && foo[0].length === 84,
'foo.length === 84 && foo[0].length === 84'
)
console.log('Finished unpackbitsToShape1.');
console.log('Starting "unpackbitsToShape2"');
console.time('u2');
foo = unpackbitsToShape2(test84by84, [84, 84])
console.timeEnd('u2');
assert(
foo.length === 84 && foo[0].length === 84,
'foo.length === 84 && foo[0].length === 84'
)
console.log('Finished unpackbitsToShape2.');
console.log('Starting "unpackbitsToShape3"');
console.time('u3');
foo = unpackbitsToShape3(test84by84, [84, 84])
console.timeEnd('u3');
assert(
foo.length === 84 && foo[0].length === 84,
'foo.length === 84 && foo[0].length === 84'
)
console.log('Finished unpackbitsToShape3.');
console.log('\nStarting "unpackBitsToShapeTensorflow"')
console.time('u-tensor')
foo = unpackBitsToShapeTensorflow(test84by84, [84, 84])
console.timeEnd('u-tensor')
console.log('Finished unpackBitsToShapeTensorflow.');
console.log('\n\n********* 100 x 100 *********\n\n')
console.log('Starting unpackbits1a.');
console.time('u1a');
foo = unpackbits1a(test100by100);
console.timeEnd('u1a');
console.log('Finished unpackbits1a.');
console.log('Starting "unpackbits"');
console.time('u-orig');
foo = unpackbits(test100by100);
console.timeEnd('u-orig');
console.log('Finished unpackbits.');
console.log('Starting "unpackbitsToShape1"');
console.time('u1');
foo = unpackbitsToShape1(test100by100, [100, 100])
console.timeEnd('u1');
assert(
foo.length === 100 && foo[0].length === 100,
'foo.length === 100 && foo[0].length === 100'
)
console.log('Finished unpackbitsToShape1.');
console.log('Starting "unpackbitsToShape2"');
console.time('u2');
foo = unpackbitsToShape2(test100by100, [100, 100])
console.timeEnd('u2');
assert(
foo.length === 100 && foo[0].length === 100,
'foo.length === 100 && foo[0].length === 100'
)
console.log('Finished unpackbitsToShape2.');
console.log('Starting "unpackbitsToShape3"');
console.time('u3');
foo = unpackbitsToShape3(test100by100, [100, 100])
console.timeEnd('u3');
assert(
foo.length === 100 && foo[0].length === 100,
'foo.length === 100 && foo[0].length === 100'
)
console.log('Finished unpackbitsToShape3.');
console.log('\nStarting "unpackBitsToShapeTensorflow"')
console.time('u-tensor')
foo = unpackBitsToShapeTensorflow(test100by100, [100, 100])
console.timeEnd('u-tensor')
console.log('Finished unpackBitsToShapeTensorflow.');
不确定这是否有帮助,但当我被tensorflow中的位运算符所困扰时,我感到很不舒服,因为根据最初的问题,需要将字节流转换为位流。简单使用整数除法和模也可以做到这一点
简言之,通过示例的算法是这样的。给定字节流[92]
- 将和mod除以16,得到2个字节,即分别为[5]和[12]
- 将这些结果交织成张量[5,12]
- 取这些值中的每一个,然后除以4进行mod,得到[1,3]和[1,0]
- 将这些结果交织成张量[1,1,3,0]
- 除以并用2进行mod,得到[0,0,1,0]和[1,1,
********* 64 x 64 *********
Starting unpackbits1a.
u1a: 0.513ms
Finished unpackbits1a.
Starting "unpackbits"
u-orig: 0.189ms
Finished unpackbits.
Starting "unpackbitsToShape1"
u1: 0.434ms
Finished unpackbitsToShape1.
Starting "unpackbitsToShape2"
u2: 0.365ms
Finished unpackbitsToShape2.
Starting "unpackbitsToShape3"
u3: 0.590ms
Finished unpackbitsToShape3.
Starting "unpackBitsToShapeTensorflow"
u-tensor: 0.508ms
Finished unpackBitsToShapeTensorflow.
********* 84 x 84 *********
Starting unpackbits1a.
u1a: 0.222ms
Finished unpackbits1a.
Starting "unpackbits"
u-orig: 0.425ms
Finished unpackbits.
Starting "unpackbitsToShape1"
u1: 0.622ms
Finished unpackbitsToShape1.
Starting "unpackbitsToShape2"
u2: 0.303ms
Finished unpackbitsToShape2.
Starting "unpackbitsToShape3"
u3: 0.388ms
Finished unpackbitsToShape3.
Starting "unpackBitsToShapeTensorflow"
u-tensor: 0.175ms
Finished unpackBitsToShapeTensorflow.
********* 100 x 100 *********
Starting unpackbits1a.
u1a: 1.502ms
Finished unpackbits1a.
Starting "unpackbits"
u-orig: 0.018ms
Finished unpackbits.
Starting "unpackbitsToShape1"
u1: 1.631ms
Finished unpackbitsToShape1.
Starting "unpackbitsToShape2"
u2: 0.072ms
Finished unpackbitsToShape2.
Starting "unpackbitsToShape3"
u3: 0.159ms
Finished unpackbitsToShape3.
Starting "unpackBitsToShapeTensorflow"
u-tensor: 0.052ms
Finished unpackBitsToShapeTensorflow.