Tensorflow 更改超参数并添加新的观察向量后,恢复ml代理训练
我正在培训一名特工,这要感谢团结的ml特工。当我改变叠加向量、观测向量和超参数的数量时,我无法从上一次训练中恢复训练,因为tensorflow告诉我,lhs rhs形状存在一个不同的问题 我希望能够更改代理脚本和配置脚本,并使用此新参数恢复培训,以避免失去代理过去取得的进展…因为目前我必须重新启动新的培训或不更改观察向量的数量等 如何做到这一点 多谢各位 编辑:这里是一个我想测试的示例,以及我在Rolleball ML代理教程中遇到的错误。看这里 目标:我想看看观察向量选择对代理训练的影响Tensorflow 更改超参数并添加新的观察向量后,恢复ml代理训练,tensorflow,unity3d,resume,Tensorflow,Unity3d,Resume,我正在培训一名特工,这要感谢团结的ml特工。当我改变叠加向量、观测向量和超参数的数量时,我无法从上一次训练中恢复训练,因为tensorflow告诉我,lhs rhs形状存在一个不同的问题 我希望能够更改代理脚本和配置脚本,并使用此新参数恢复培训,以避免失去代理过去取得的进展…因为目前我必须重新启动新的培训或不更改观察向量的数量等 如何做到这一点 多谢各位 编辑:这里是一个我想测试的示例,以及我在Rolleball ML代理教程中遇到的错误。看这里 目标:我想看看观察向量选择对代理训练的影响 我使
公共转型目标;
公共覆盖无效OnEpisodeBegin()
{
if(this.transform.localPosition.y<0)
{
//如果特工倒下,将其动量归零
this.rBody.angularVelocity=Vector3.0;
this.rBody.velocity=Vector3.0;
this.transform.localPosition=新向量3(0,0.5f,0);
}
//将目标移动到新位置
Target.localPosition=新矢量3(Random.value*8-4,
0.5f,
随机值*8-4);
}
公共超控传感器(矢量传感器)
{
//目标和代理人职位
sensor.AddObservation(Target.localPosition);
sensor.AddObservation(this.transform.localPosition);
//代理速度
传感器.AddObservation(rBody.velocity.x);
传感器.AddObservation(rBody.velocity.z);
}
公共浮动速度=10;
已接收公共覆盖无效OnAction(浮点[]矢量操作)
{
//动作,大小=2
矢量3控制信号=矢量3.0;
controlSignal.x=矢量作用[0];
controlSignal.z=矢量作用[1];
rBody.AddForce(控制信号*速度);
//奖励
float distanceToTarget=Vector3.Distance(this.transform.localPosition,Target.localPosition);
//达到目标
if(距离目标<1.42f)
{
设置奖励(1.0f);
EndEpisode();
}
//从平台上摔下来
if(this.transform.localPosition.y<0)
{
EndEpisode();
}
}
公共覆盖无效启发式(float[]actionsOut)
{
actionsOut[0]=Input.GetAxis(“水平”);
actionsOut[1]=Input.GetAxis(“垂直”);
}
}
公共转型目标;
公共覆盖无效OnEpisodeBegin()
{
if(this.transform.localPosition.y<0)
{
//如果特工倒下,将其动量归零
this.rBody.angularVelocity=Vector3.0;
this.rBody.velocity=Vector3.0;
this.transform.localPosition=新向量3(0,0.5f,0);
}
//将目标移动到新位置
Target.localPosition=新矢量3(Random.value*8-4,
0.5f,
随机值*8-4);
}
公共超控传感器(矢量传感器)
{
//目标和代理人职位
sensor.AddObservation(Target.localPosition);
sensor.AddObservation(this.transform.localPosition);
//代理速度
//传感器.AddObservation(rBody.velocity.x);
//传感器.AddObservation(rBody.velocity.z);
}
公共浮动速度=10;
已接收公共覆盖无效OnAction(浮点[]矢量操作)
{
//动作,大小=2
矢量3控制信号=矢量3.0;
controlSignal.x=矢量作用[0];
controlSignal.z=矢量作用[1];
rBody.AddForce(控制信号*速度);
//奖励
float distanceToTarget=Vector3.Distance(this.transform.localPosition,Target.localPosition);
//达到目标
if(距离目标<1.42f)
{
设置奖励(1.0f);
EndEpisode();
}
//从平台上摔下来
if(this.transform.localPosition.y<0)
{
EndEpisode();
}
}
公共覆盖无效启发式(float[]actionsOut)
{
actionsOut[0]=Input.GetAxis(“水平”);
actionsOut[1]=Input.GetAxis(“垂直”);
}
}
public Transform Target;
public override void OnEpisodeBegin()
{
if (this.transform.localPosition.y < 0)
{
// If the Agent fell, zero its momentum
this.rBody.angularVelocity = Vector3.zero;
this.rBody.velocity = Vector3.zero;
this.transform.localPosition = new Vector3(0, 0.5f, 0);
}
// Move the target to a new spot
Target.localPosition = new Vector3(Random.value * 8 - 4,
0.5f,
Random.value * 8 - 4);
}
public override void CollectObservations(VectorSensor sensor)
{
// Target and Agent positions
sensor.AddObservation(Target.localPosition);
sensor.AddObservation(this.transform.localPosition);
// Agent velocity
sensor.AddObservation(rBody.velocity.x);
sensor.AddObservation(rBody.velocity.z);
}
public float speed = 10;
public override void OnActionReceived(float[] vectorAction)
{
// Actions, size = 2
Vector3 controlSignal = Vector3.zero;
controlSignal.x = vectorAction[0];
controlSignal.z = vectorAction[1];
rBody.AddForce(controlSignal * speed);
// Rewards
float distanceToTarget = Vector3.Distance(this.transform.localPosition, Target.localPosition);
// Reached target
if (distanceToTarget < 1.42f)
{
SetReward(1.0f);
EndEpisode();
}
// Fell off platform
if (this.transform.localPosition.y < 0)
{
EndEpisode();
}
}
public override void Heuristic(float[] actionsOut)
{
actionsOut[0] = Input.GetAxis("Horizontal");
actionsOut[1] = Input.GetAxis("Vertical");
}
}
public Transform Target;
public override void OnEpisodeBegin()
{
if (this.transform.localPosition.y < 0)
{
// If the Agent fell, zero its momentum
this.rBody.angularVelocity = Vector3.zero;
this.rBody.velocity = Vector3.zero;
this.transform.localPosition = new Vector3(0, 0.5f, 0);
}
// Move the target to a new spot
Target.localPosition = new Vector3(Random.value * 8 - 4,
0.5f,
Random.value * 8 - 4);
}
public override void CollectObservations(VectorSensor sensor)
{
// Target and Agent positions
sensor.AddObservation(Target.localPosition);
sensor.AddObservation(this.transform.localPosition);
// Agent velocity
//sensor.AddObservation(rBody.velocity.x);
//sensor.AddObservation(rBody.velocity.z);
}
public float speed = 10;
public override void OnActionReceived(float[] vectorAction)
{
// Actions, size = 2
Vector3 controlSignal = Vector3.zero;
controlSignal.x = vectorAction[0];
controlSignal.z = vectorAction[1];
rBody.AddForce(controlSignal * speed);
// Rewards
float distanceToTarget = Vector3.Distance(this.transform.localPosition, Target.localPosition);
// Reached target
if (distanceToTarget < 1.42f)
{
SetReward(1.0f);
EndEpisode();
}
// Fell off platform
if (this.transform.localPosition.y < 0)
{
EndEpisode();
}
}
public override void Heuristic(float[] actionsOut)
{
actionsOut[0] = Input.GetAxis("Horizontal");
actionsOut[1] = Input.GetAxis("Vertical");
}
}