Tensorflow 无法通过计算和应用梯度来训练模型
我试着训练一个简单的顺序模型。我想将拟合算法分解为聚集梯度阶段和应用梯度阶段,以便使用强化学习 这个想法来自于一个购物车杆的例子。 无论如何,即使问题(在Tensorflow 无法通过计算和应用梯度来训练模型,tensorflow,tensorflow.js,Tensorflow,Tensorflow.js,我试着训练一个简单的顺序模型。我想将拟合算法分解为聚集梯度阶段和应用梯度阶段,以便使用强化学习 这个想法来自于一个购物车杆的例子。 无论如何,即使问题(在func()中定义)比较容易通过拟合方法训练的模型来解决,我也不能收到好的结果。我能做到的最好的事情是: 我一定错过了什么。谁能告诉我怎么了 在这里您可以找到与净输出可视化 import {layers, sequential, Sequential} from "@tensorflow/tfjs-layers"; import {
func()
中定义)比较容易通过拟合方法训练的模型来解决,我也不能收到好的结果。我能做到的最好的事情是:
我一定错过了什么。谁能告诉我怎么了
在这里您可以找到与净输出可视化
import {layers, sequential, Sequential} from "@tensorflow/tfjs-layers";
import {
tensor2d,
Tensor,
losses,
variableGrads,
tidy,
train,
NamedTensorMap,
stack,
mean,
concat
} from "@tensorflow/tfjs";
import {ActivationIdentifier} from "@tensorflow/tfjs-layers/src/keras_format/activation_config";
import {NamedTensor} from "@tensorflow/tfjs-core/dist/tensor_types";
import {InitializerIdentifier} from "@tensorflow/tfjs-layers/src/initializers";
import {addHeatmap} from "./vis/heatmap";
const func = (...x) => {
const y1 = x[0] * x[1] * 0.9 + (1 - x[0]) * (1 - x[1]) * 0.9;
return tensor2d([y1], [1, 1])
}
const activation: ActivationIdentifier = "tanh"
const kernelInitializer: InitializerIdentifier = null
const model: Sequential = sequential();
const inputLayer = layers.dense({
units: 2,
inputShape: [2],
kernelInitializer,
});
const hiddenLayer1 = layers.dense({
units: 16,
activation: activation,
//kernelInitializer,
useBias: true
});
const outputLayer = layers.dense({
units: 1,
activation: "sigmoid",
kernelInitializer,
useBias: true
});
const dim = 10; // error sampling density
model.add(inputLayer);
model.add(hiddenLayer1);
model.add(outputLayer);
const optimizer = train.adam(0.1);
const calculateGradient = () => {
return tidy(() => {
const vGrads = variableGrads(() => tidy(() => {
const x1 = Math.random();
const x2 = Math.random();
const labels = func(x1, x2)
const input = tensor2d([x1, x2], [1, 2])
return losses.meanSquaredError(
labels,
model.predict(input) as Tensor
).asScalar();
}));
return vGrads.grads;
})
}
const createBatch = (n: number) => {
return tidy(() => {
const gradientsArrays = {}
for (let i = 0; i < n; i++) {
const gradient = calculateGradient();
Object.keys(gradient).forEach((entry) => {
gradientsArrays[entry] ? gradientsArrays[entry].push(gradient[entry]) : gradientsArrays[entry] = [gradient[entry]]
})
}
const gradientsMeans = {}
Object.keys(gradientsArrays).forEach(key => {
gradientsMeans[key] = mean(stack(gradientsArrays[key], 0))
})
return gradientsMeans;
})
}
const epoch = (iterations: number) => {
for (let i = 0; i < iterations; i++) {
let batch = createBatch(16);
optimizer.applyGradients(batch)
}
}
const calculateDesiredOutputs = () => {
const desiredOutputs = [];
for (let y = 0; y < 1; y += 1 / dim) {
for (let x = 0; x < 1; x += 1 / dim) {
desiredOutputs.push({x, y, value: func(x, y).dataSync()[0]});
}
}
return desiredOutputs;
}
const calculateNetOutputs = () => {
const netOutputs = [];
for (let y = 0; y < 1; y += 1 / dim) {
for (let x = 0; x < 1; x += 1 / dim) {
const value = (<any>model.predict(tensor2d([x, y], [1, 2]))).dataSync()[0];
netOutputs.push({x, y, value});
}
}
return netOutputs
}
const calculateError = (a: { value: number }[], b: { value: number }[]) => {
let error = 0;
for (let i = 0; i < a.length; i++) {
let e = a[i].value - b[i].value;
error += e * e
}
return Math.sqrt(error) / (dim * dim);
}
const run = async () => {
const desiredOutputs = calculateDesiredOutputs();
const desiredOutputsHeatmap = addHeatmap({dim});
desiredOutputsHeatmap.update(desiredOutputs)
const netOutputHeatmap = addHeatmap({dim});
let i = 0;
while (i < 256) {
epoch(20);
let netOutputs = calculateNetOutputs();
console.log("epoch: ", i)
console.log(calculateError(desiredOutputs, netOutputs))
netOutputHeatmap.update(netOutputs);
await new Promise((r) => setTimeout(() => r(), 100));
i++;
}
}
run();
从“@tensorflow/tfjs layers”导入{layers,sequential,sequential}”;
进口{
张量2D,
张量,
损失,
可变梯度,
整齐的
火车,
命名为ensormap,
堆栈
意思是,
海螺
}来自“@tensorflow/tfjs”;
从“@tensorflow/tfjs layers/src/keras_format/activation_config”导入{ActivationIdentifier}”;
从“@tensorflow/tfjs core/dist/tensor_types”导入{NamedTensor}”;
从“@tensorflow/tfjs layers/src/initializers”导入{InitializerIdentifier}”;
从“/vis/heatmap”导入{addHeatmap};
常量func=(…x)=>{
常数y1=x[0]*x[1]*0.9+(1-x[0])*(1-x[1])*0.9;
返回张量2D([y1],[1,1])
}
常量激活:ActivationIdentifier=“tanh”
常量kernelInitializer:InitializerIdentifier=null
常量模型:Sequential=Sequential();
const inputLayer=layers.density({
单位:2,,
inputShape:[2],
内核初始化器,
});
常量hiddenLayer1=层数。密集({
单位:16,
激活:激活,
//内核初始化器,
useBias:对
});
const outputLayer=layers.density({
单位:1,,
激活:“乙状结肠”,
内核初始化器,
useBias:对
});
常数dim=10;//误差采样密度
添加(输入层);
model.add(hiddenLayer1);
model.add(outputLayer);
常量优化器=train.adam(0.1);
常量CalculateGraient=()=>{
返回整洁(()=>{
常量vGrads=variableGrads(()=>tidy(()=>{
常数x1=数学随机();
常数x2=数学随机();
常量标签=func(x1,x2)
常量输入=张量2D([x1,x2],[1,2])
回报损失.平均平方误差(
标签,
作为张量预测(输入)
).asScalar();
}));
返回vGrads.grads;
})
}
const createBatch=(n:number)=>{
返回整洁(()=>{
常量梯度数组={}
for(设i=0;i{
gradientsArrays[entry]?gradientsArrays[entry]。推送(gradientsArrays[entry]):gradientsArrays[entry]=[gradientsArrays[entry]]
})
}
常数梯度表={}
Object.keys(gradientsArrays).forEach(key=>{
gradientsMeans[key]=平均值(堆栈(GradientsArray[key],0))
})
回归梯度法;
})
}
常量历元=(迭代次数:个)=>{
for(设i=0;i{
常数DesiredOutput=[];
对于(设y=0;y<1;y+=1/dim){
对于(设x=0;x<1;x+=1/dim){
desiredOutput.push({x,y,value:func(x,y).dataSync()[0]});
}
}
返回期望输出;
}
常量CalculateNet输出=()=>{
常量netOutputs=[];
对于(设y=0;y<1;y+=1/dim){
对于(设x=0;x<1;x+=1/dim){
常量值=(model.predict(tensor2d([x,y],[1,2])).dataSync()[0];
push({x,y,value});
}
}
返回网络输出
}
const calculateError=(a:{value:number}[],b:{value:number}[])=>{
设误差=0;
for(设i=0;i{
const desiredOutput=CalculatedEsiredOutput();
const desiredOutputsHeatmap=addHeatmap({dim});
desiredOutputsHeatmap.update(desiredOutputs)
const netOutputHeatmap=addHeatmap({dim});
设i=0;
而(i<256){
纪元(20);
让netOutputs=calculateNetOutputs();
console.log(“历元:”,i)
log(calculateError(DesiredOutput,netOutputs))
netOutputHeatmap.update(netOutputs);
等待新的承诺((r)=>setTimeout(()=>r(),100));
i++;
}
}
run();