&引用;错误:应为标识符或';(';在';浮点';";之前编译C程序时
我试图理解(并将其“翻译”成Python)at的C程序,它模拟了一个推车杆系统的强化学习 我尝试做的一个修改是将&引用;错误:应为标识符或';(';在';浮点';";之前编译C程序时,c,C,我试图理解(并将其“翻译”成Python)at的C程序,它模拟了一个推车杆系统的强化学习 我尝试做的一个修改是将pole.h头文件移动到pole.c中。但是,当我尝试使用gcc pole3.c-lm进行编译时(其中pole3.c是我命名的已编辑程序),我得到以下两个错误: pole3.c:46:35: error: expected identifier or ‘(’ before ‘float’ #define random ((float) rand()
pole.h
头文件移动到pole.c
中。但是,当我尝试使用gcc pole3.c-lm
进行编译时(其中pole3.c
是我命名的已编辑程序),我得到以下两个错误:
pole3.c:46:35: error: expected identifier or ‘(’ before ‘float’
#define random ((float) rand() / (float)((1 << 31) - 1))
^
pole3.c:46:42: error: expected ‘)’ before ‘rand’
#define random ((float) rand() / (float)((1 << 31) - 1))
目前,我已返回一个步骤,仅将粘贴的
pole.h
复制到原始pole.c
中,而不保留以前的任何修改。生成的程序(如下所示)可编译
/* Parameters for reinforcement algorithm. */
#define min(x, y) ((x <= y) ? x : y)
#define max(x, y) ((x >= y) ? x : y)
#define prob_push_right(s) (1.0 / (1.0 + exp(-max(-50.0, min(s, 50.0)))))
#define random ((float) rand() / (float)((1 << 31) - 1))
#define N_BOXES 162 /* Number of disjoint boxes of state space. */
#define ALPHA 1000 /* Learning rate for action weights, w. */
#define BETA 0.5 /* Learning rate for critic weights, v. */
#define GAMMA 0.95 /* Discount factor for critic. */
#define LAMBDAw 0.9 /* Decay rate for w eligibility trace. */
#define LAMBDAv 0.8 /* Decay rate for v eligibility trace. */
#define MAX_FAILURES 300 /* Termination criterion. */
#define MAX_STEPS 100000
#define RUNS 1
/*** Parameters for simulation ***/
#define GRAVITY 9.8
#define MASSCART 1.0
#define MASSPOLE 0.1
#define TOTAL_MASS (MASSPOLE + MASSCART)
#define LENGTH 0.5 /* actually half the pole's length */
#define POLEMASS_LENGTH (MASSPOLE * LENGTH)
#define FORCE_MAG 10.0
#define TAU 0.02 /* seconds between state updates */
#define FOURTHIRDS 1.3333333333333
#include <math.h>
// #include "pole.h"
typedef float vector[N_BOXES];
main()
{
int i,seed;
printf("enter seed:");
scanf("%d",&seed);
srand(seed);
for (i=0; i<RUNS; i++)
run_trial(i);
}
run_trial(run)
int run;
{
float x, /* cart position, meters */
x_dot, /* cart velocity */
theta, /* pole angle, radians */
theta_dot; /* pole angular velocity */
vector w, /* vector of action weights */
v, /* vector of critic weights */
e, /* vector of action weight eligibilities */
xbar; /* vector of critic weight eligibilities */
float p, oldp, rhat, r;
int tsteps=0,box, i,y, steps = 0, failures=0, failed;
/*--- Initialize action and heuristic critic weights and traces. ---*/
for (i = 0; i < N_BOXES; i++)
w[i] = v[i] = xbar[i] = e[i] = 0.0;
/*--- Starting state is (0 0 0 0) ---*/
x = x_dot = theta = theta_dot = 0.0;
/*--- Find box in state space containing start state ---*/
box = get_box(x, x_dot, theta, theta_dot);
/*--- Iterate through the action-learn loop. ---*/
while (steps++ < MAX_STEPS && failures < MAX_FAILURES)
{
/*--- Choose action randomly, biased by current weight. ---*/
y = (random < prob_push_right(w[box]));
tsteps++; /* total number of steps */
/*--- Update traces. ---*/
e[box] += (1.0 - LAMBDAw) * (y - 0.5);
xbar[box] += (1.0 - LAMBDAv);
/*--- Remember prediction of failure for current state ---*/
oldp = v[box];
/*--- Apply action to the simulated cart-pole ---*/
cart_pole(y, &x, &x_dot, &theta, &theta_dot);
/*--- Get box of state space containing the resulting state. ---*/
box = get_box(x, x_dot, theta, theta_dot);
if (box < 0)
{
/*--- Failure occurred. ---*/
failed = 1;
failures++;
printf("Trial %d was %d steps.\n", failures, steps);
steps = 0;
/*--- Reset state to (0 0 0 0). Find the box. ---*/
x = x_dot = theta = theta_dot = 0.0;
box = get_box(x, x_dot, theta, theta_dot);
/*--- Reinforcement upon failure is -1. Prediction of failure is 0. ---*/
r = -1.0;
p = 0.;
}
else
{
/*--- Not a failure. ---*/
failed = 0;
/*--- Reinforcement is 0. Prediction of failure given by v weight. ---*/
r = 0;
p= v[box];
}
/*--- Heuristic reinforcement is: current reinforcement
+ gamma * new failure prediction - previous failure prediction ---*/
rhat = r + GAMMA * p - oldp;
for (i = 0; i < N_BOXES; i++)
{
/*--- Update all weights. ---*/
w[i] += ALPHA * rhat * e[i];
v[i] += BETA * rhat * xbar[i];
if (v[i] < -1.0)
v[i] = v[i];
if (failed)
{
/*--- If failure, zero all traces. ---*/
e[i] = 0.;
xbar[i] = 0.;
}
else
{
/*--- Otherwise, update (decay) the traces. ---*/
e[i] *= LAMBDAw;
xbar[i] *= LAMBDAv;
}
}
}
if (failures == MAX_FAILURES)
printf("run:%d Pole not balanced. Stopping after %d trials and %d steps.",run,failures,tsteps);
else
printf("Run:%d Pole balanced successfully for at least %d steps after %d trials and %d steps\n", run,steps,failures,tsteps);
}
/*----------------------------------------------------------------------
cart_pole: Takes an action (0 or 1) and the current values of the
four state variables and updates their values by estimating the state
TAU seconds later.
----------------------------------------------------------------------*/
cart_pole(action, x, x_dot, theta, theta_dot)
int action;
float *x, *x_dot, *theta, *theta_dot;
{
float xacc,thetaacc,force,costheta,sintheta,temp;
force = (action>0)? FORCE_MAG : -FORCE_MAG;
costheta = cos(*theta);
sintheta = sin(*theta);
temp = (force + POLEMASS_LENGTH * *theta_dot * *theta_dot * sintheta)
/ TOTAL_MASS;
thetaacc = (GRAVITY * sintheta - costheta* temp)
/ (LENGTH * (FOURTHIRDS - MASSPOLE * costheta * costheta
/ TOTAL_MASS));
xacc = temp - POLEMASS_LENGTH * thetaacc* costheta / TOTAL_MASS;
/*** Update the four state variables, using Euler's method. ***/
*x += TAU * *x_dot;
*x_dot += TAU * xacc;
*theta += TAU * *theta_dot;
*theta_dot += TAU * thetaacc;
}
/*----------------------------------------------------------------------
get_box: Given the current state, returns a number from 1 to 162
designating the region of the state space encompassing the current state.
Returns a value of -1 if a failure state is encountered.
----------------------------------------------------------------------*/
#define one_degree 0.0174532 /* 2pi/360 */
#define six_degrees 0.1047192
#define twelve_degrees 0.2094384
#define fifty_degrees 0.87266
get_box(x,x_dot,theta,theta_dot)
float x,x_dot,theta,theta_dot;
{
int box=0;
if (x < -2.4 ||
x > 2.4 ||
theta < -twelve_degrees ||
theta > twelve_degrees) return(-1); /* to signal failure */
if (x < -0.8) box = 0;
else if (x < 0.8) box = 1;
else box = 2;
if (x_dot < -0.5) ;
else if (x_dot < 0.5) box += 3;
else box += 6;
if (theta < -six_degrees) ;
else if (theta < -one_degree) box += 9;
else if (theta < 0) box += 18;
else if (theta < one_degree) box += 27;
else if (theta < six_degrees) box += 36;
else box += 45;
if (theta_dot < -fifty_degrees) ;
else if (theta_dot < fifty_degrees) box += 54;
else box += 108;
return(box);
}
增强算法的参数*/
#定义最小值(x,y)((x=y)?x:y)
#定义问题向右推(1.0/(1.0+exp(-max(-50.0,min(s,50.0)щщ)))
#定义随机((float)rand()/(float)((1 2.4||
θ<-十二度||
θ>十二度)返回(-1);/*至信号故障*/
如果(x<-0.8)框=0;
如果(x<0.8)框=1,则为else;
else框=2;
如果(x_点<-0.5);
如果(x_点<0.5)框+=3,则为else;
else框+=6;
如果(θ<-六度);
如果(θ<-一度)框+=9;
如果(θ<0)框+=18,则为else;
如果(θ<一度)框+=27;
如果(θ<六度)框+=36;
else框+=45;
如果(θu点<-50度);
如果(θ点<50度)框+=54;
else框+=108;
返回(框);
}
Kurt,它进行编译,但有明显的警告。(使用
gcc-Wall-Wextra-pedantic-std=gnu11-Ofast-lm-o pole.c进行编译,看看有多少)。您面临的问题之一是古老风格的函数声明,例如:
cart_pole(action, x, x_dot, theta, theta_dot)
int action;
float *x, *x_dot, *theta, *theta_dot;
而不是
void cart_pole (int action, float *x, float *x_dot,
float *theta, float *theta_dot)
如前所述,您将遇到整数溢出:
#define random ((float) rand() / (float)((1 << 31) - 1))
将这些部分放在一起,显式设置intmain(void)
,并从main
返回一个值,您可以在没有警告的情况下编译以下内容:
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#include <math.h>
#define min(x, y) ((x <= y) ? x : y)
#define max(x, y) ((x >= y) ? x : y)
#define prob_push_right(s) (1.0 / (1.0 + exp(-max(-50.0, min(s, 50.0)))))
#define random ((float) rand() / (float)((1U << 31) - 1))
#define N_BOXES 162 /* Number of disjoint boxes of state space. */
#define ALPHA 1000 /* Learning rate for action weights, w. */
#define BETA 0.5 /* Learning rate for critic weights, v. */
#define GAMMA 0.95 /* Discount factor for critic. */
#define LAMBDAw 0.9 /* Decay rate for w eligibility trace. */
#define LAMBDAv 0.8 /* Decay rate for v eligibility trace. */
#define MAX_FAILURES 100 /* Termination criterion. */
#define MAX_STEPS 100000
typedef float vector[N_BOXES];
void cart_pole (int action, float *x, float *x_dot,
float *theta, float *theta_dot);
int get_box (float x, float x_dot, float theta, float theta_dot);
int main (void)
{
float x, /* cart position, meters */
x_dot, /* cart velocity */
theta, /* pole angle, radians */
theta_dot; /* pole angular velocity */
vector w, /* vector of action weights */
v, /* vector of critic weights */
e, /* vector of action weight eligibilities */
xbar; /* vector of critic weight eligibilities */
float p, oldp, rhat, r;
int box, i, y, steps = 0, failures = 0, failed;
printf ("Seed? ");
scanf ("%d", &i);
srand (i);
/*--- Initialize action and heuristic critic weights and traces. ---*/
for (i = 0; i < N_BOXES; i++)
w[i] = v[i] = xbar[i] = e[i] = 0.0;
/*--- Starting state is (0 0 0 0) ---*/
x = x_dot = theta = theta_dot = 0.0;
/*--- Find box in state space containing start state ---*/
box = get_box (x, x_dot, theta, theta_dot);
/*--- Iterate through the action-learn loop. ---*/
while (steps++ < MAX_STEPS && failures < MAX_FAILURES) {
/*--- Choose action randomly, biased by current weight. ---*/
y = (random < prob_push_right (w[box]));
/*--- Update traces. ---*/
e[box] += (1.0 - LAMBDAw) * (y - 0.5);
xbar[box] += (1.0 - LAMBDAv);
/*--- Remember prediction of failure for current state ---*/
oldp = v[box];
/*--- Apply action to the simulated cart-pole ---*/
cart_pole (y, &x, &x_dot, &theta, &theta_dot);
/*--- Get box of state space containing the resulting state. ---*/
box = get_box (x, x_dot, theta, theta_dot);
if (box < 0) {
/*--- Failure occurred. ---*/
failed = 1;
failures++;
printf ("Trial %d was %d steps.\n", failures, steps);
steps = 0;
/*--- Reset state to (0 0 0 0). Find the box. ---*/
x = x_dot = theta = theta_dot = 0.0;
box = get_box (x, x_dot, theta, theta_dot);
/*--- Reinforcement upon failure is -1. Prediction of failure is 0. ---*/
r = -1.0;
p = 0.;
} else {
/*--- Not a failure. ---*/
failed = 0;
/*--- Reinforcement is 0. Prediction of failure given by v weight. ---*/
r = 0;
p = v[box];
}
/*--- Heuristic reinforcement is: current reinforcement
+ gamma * new failure prediction - previous failure prediction ---*/
rhat = r + GAMMA * p - oldp;
for (i = 0; i < N_BOXES; i++) {
/*--- Update all weights. ---*/
w[i] += ALPHA * rhat * e[i];
v[i] += BETA * rhat * xbar[i];
if (v[i] < -1.0)
v[i] = v[i];
if (failed) {
/*--- If failure, zero all traces. ---*/
e[i] = 0.;
xbar[i] = 0.;
} else {
/*--- Otherwise, update (decay) the traces. ---*/
e[i] *= LAMBDAw;
xbar[i] *= LAMBDAv;
}
}
}
if (failures == MAX_FAILURES)
printf ("Pole not balanced. Stopping after %d failures.", failures);
else
printf ("Pole balanced successfully for at least %d steps\n", steps);
return 0;
}
/*----------------------------------------------------------------------
cart_pole: Takes an action (0 or 1) and the current values of the
four state variables and updates their values by estimating the state
TAU seconds later.
----------------------------------------------------------------------*/
/*** Parameters for simulation ***/
#define GRAVITY 9.8
#define MASSCART 1.0
#define MASSPOLE 0.1
#define TOTAL_MASS (MASSPOLE + MASSCART)
#define LENGTH 0.5 /* actually half the pole's length */
#define POLEMASS_LENGTH (MASSPOLE * LENGTH)
#define FORCE_MAG 10.0
#define TAU 0.02 /* seconds between state updates */
#define FOURTHIRDS 1.3333333333333
void cart_pole (int action, float *x, float *x_dot,
float *theta, float *theta_dot)
{
float xacc, thetaacc, force, costheta, sintheta, temp;
force = (action > 0) ? FORCE_MAG : -FORCE_MAG;
costheta = cos (*theta);
sintheta = sin (*theta);
temp = (force + POLEMASS_LENGTH * *theta_dot * *theta_dot * sintheta)
/ TOTAL_MASS;
thetaacc = (GRAVITY * sintheta - costheta * temp)
/ (LENGTH * (FOURTHIRDS - MASSPOLE * costheta * costheta
/ TOTAL_MASS));
xacc = temp - POLEMASS_LENGTH * thetaacc * costheta / TOTAL_MASS;
/*** Update the four state variables, using Euler's method. ***/
*x += TAU * *x_dot;
*x_dot += TAU * xacc;
*theta += TAU * *theta_dot;
*theta_dot += TAU * thetaacc;
}
/*----------------------------------------------------------------------
get_box: Given the current state, returns a number from 1 to 162
designating the region of the state space encompassing the current state.
Returns a value of -1 if a failure state is encountered.
----------------------------------------------------------------------*/
#define one_degree 0.0174532 /* 2pi/360 */
#define six_degrees 0.1047192
#define twelve_degrees 0.2094384
#define fifty_degrees 0.87266
int get_box (float x, float x_dot, float theta, float theta_dot)
{
int box = 0;
if (x < -2.4 ||
x > 2.4 || theta < -twelve_degrees || theta > twelve_degrees)
return (-1); /* to signal failure */
if (x < -0.8)
box = 0;
else if (x < 0.8)
box = 1;
else
box = 2;
if (x_dot < -0.5);
else if (x_dot < 0.5)
box += 3;
else
box += 6;
if (theta < -six_degrees);
else if (theta < -one_degree)
box += 9;
else if (theta < 0)
box += 18;
else if (theta < one_degree)
box += 27;
else if (theta < six_degrees)
box += 36;
else
box += 45;
if (theta_dot < -fifty_degrees);
else if (theta_dot < fifty_degrees)
box += 54;
else
box += 108;
return (box);
}
#包括
#包括
#包括
#包括
#定义最小值(x,y)((x=y)?x:y)
#定义问题向右推(1.0/(1.0+exp(-max(-50.0,min(s,50.0)щщ)))
#定义随机((浮点)rand()/(浮点)((1U 2.4 | |θ<-十二度| |θ>十二度)
返回(-1);/*至信号故障*/
如果(x<-0.8)
box=0;
否则如果(x<0.8)
box=1;
其他的
box=2;
如果(x_点<-0.5);
否则如果(x_点<0.5)
box+=3;
其他的
box+=6;
如果(θ<-六度);
否则如果(θ<-一度)
box+=9;
else if(θ<0)
box+=18;
否则如果(θ<一度)
box+=27;
否则如果(θ<六度)
box+=36;
其他的
box+=45;
如果(θu点<-50度);
否则如果(θ点<五十度)
box+=54;
其他的
box+=108;
返回(框);
}
跑步会给出公布的结果。我知道这并不能解决你所有的问题,但希望它能帮助你前进。1
#define random ((float) rand() / (float)((1U << 31) - 1))
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#include <math.h>
#define min(x, y) ((x <= y) ? x : y)
#define max(x, y) ((x >= y) ? x : y)
#define prob_push_right(s) (1.0 / (1.0 + exp(-max(-50.0, min(s, 50.0)))))
#define random ((float) rand() / (float)((1U << 31) - 1))
#define N_BOXES 162 /* Number of disjoint boxes of state space. */
#define ALPHA 1000 /* Learning rate for action weights, w. */
#define BETA 0.5 /* Learning rate for critic weights, v. */
#define GAMMA 0.95 /* Discount factor for critic. */
#define LAMBDAw 0.9 /* Decay rate for w eligibility trace. */
#define LAMBDAv 0.8 /* Decay rate for v eligibility trace. */
#define MAX_FAILURES 100 /* Termination criterion. */
#define MAX_STEPS 100000
typedef float vector[N_BOXES];
void cart_pole (int action, float *x, float *x_dot,
float *theta, float *theta_dot);
int get_box (float x, float x_dot, float theta, float theta_dot);
int main (void)
{
float x, /* cart position, meters */
x_dot, /* cart velocity */
theta, /* pole angle, radians */
theta_dot; /* pole angular velocity */
vector w, /* vector of action weights */
v, /* vector of critic weights */
e, /* vector of action weight eligibilities */
xbar; /* vector of critic weight eligibilities */
float p, oldp, rhat, r;
int box, i, y, steps = 0, failures = 0, failed;
printf ("Seed? ");
scanf ("%d", &i);
srand (i);
/*--- Initialize action and heuristic critic weights and traces. ---*/
for (i = 0; i < N_BOXES; i++)
w[i] = v[i] = xbar[i] = e[i] = 0.0;
/*--- Starting state is (0 0 0 0) ---*/
x = x_dot = theta = theta_dot = 0.0;
/*--- Find box in state space containing start state ---*/
box = get_box (x, x_dot, theta, theta_dot);
/*--- Iterate through the action-learn loop. ---*/
while (steps++ < MAX_STEPS && failures < MAX_FAILURES) {
/*--- Choose action randomly, biased by current weight. ---*/
y = (random < prob_push_right (w[box]));
/*--- Update traces. ---*/
e[box] += (1.0 - LAMBDAw) * (y - 0.5);
xbar[box] += (1.0 - LAMBDAv);
/*--- Remember prediction of failure for current state ---*/
oldp = v[box];
/*--- Apply action to the simulated cart-pole ---*/
cart_pole (y, &x, &x_dot, &theta, &theta_dot);
/*--- Get box of state space containing the resulting state. ---*/
box = get_box (x, x_dot, theta, theta_dot);
if (box < 0) {
/*--- Failure occurred. ---*/
failed = 1;
failures++;
printf ("Trial %d was %d steps.\n", failures, steps);
steps = 0;
/*--- Reset state to (0 0 0 0). Find the box. ---*/
x = x_dot = theta = theta_dot = 0.0;
box = get_box (x, x_dot, theta, theta_dot);
/*--- Reinforcement upon failure is -1. Prediction of failure is 0. ---*/
r = -1.0;
p = 0.;
} else {
/*--- Not a failure. ---*/
failed = 0;
/*--- Reinforcement is 0. Prediction of failure given by v weight. ---*/
r = 0;
p = v[box];
}
/*--- Heuristic reinforcement is: current reinforcement
+ gamma * new failure prediction - previous failure prediction ---*/
rhat = r + GAMMA * p - oldp;
for (i = 0; i < N_BOXES; i++) {
/*--- Update all weights. ---*/
w[i] += ALPHA * rhat * e[i];
v[i] += BETA * rhat * xbar[i];
if (v[i] < -1.0)
v[i] = v[i];
if (failed) {
/*--- If failure, zero all traces. ---*/
e[i] = 0.;
xbar[i] = 0.;
} else {
/*--- Otherwise, update (decay) the traces. ---*/
e[i] *= LAMBDAw;
xbar[i] *= LAMBDAv;
}
}
}
if (failures == MAX_FAILURES)
printf ("Pole not balanced. Stopping after %d failures.", failures);
else
printf ("Pole balanced successfully for at least %d steps\n", steps);
return 0;
}
/*----------------------------------------------------------------------
cart_pole: Takes an action (0 or 1) and the current values of the
four state variables and updates their values by estimating the state
TAU seconds later.
----------------------------------------------------------------------*/
/*** Parameters for simulation ***/
#define GRAVITY 9.8
#define MASSCART 1.0
#define MASSPOLE 0.1
#define TOTAL_MASS (MASSPOLE + MASSCART)
#define LENGTH 0.5 /* actually half the pole's length */
#define POLEMASS_LENGTH (MASSPOLE * LENGTH)
#define FORCE_MAG 10.0
#define TAU 0.02 /* seconds between state updates */
#define FOURTHIRDS 1.3333333333333
void cart_pole (int action, float *x, float *x_dot,
float *theta, float *theta_dot)
{
float xacc, thetaacc, force, costheta, sintheta, temp;
force = (action > 0) ? FORCE_MAG : -FORCE_MAG;
costheta = cos (*theta);
sintheta = sin (*theta);
temp = (force + POLEMASS_LENGTH * *theta_dot * *theta_dot * sintheta)
/ TOTAL_MASS;
thetaacc = (GRAVITY * sintheta - costheta * temp)
/ (LENGTH * (FOURTHIRDS - MASSPOLE * costheta * costheta
/ TOTAL_MASS));
xacc = temp - POLEMASS_LENGTH * thetaacc * costheta / TOTAL_MASS;
/*** Update the four state variables, using Euler's method. ***/
*x += TAU * *x_dot;
*x_dot += TAU * xacc;
*theta += TAU * *theta_dot;
*theta_dot += TAU * thetaacc;
}
/*----------------------------------------------------------------------
get_box: Given the current state, returns a number from 1 to 162
designating the region of the state space encompassing the current state.
Returns a value of -1 if a failure state is encountered.
----------------------------------------------------------------------*/
#define one_degree 0.0174532 /* 2pi/360 */
#define six_degrees 0.1047192
#define twelve_degrees 0.2094384
#define fifty_degrees 0.87266
int get_box (float x, float x_dot, float theta, float theta_dot)
{
int box = 0;
if (x < -2.4 ||
x > 2.4 || theta < -twelve_degrees || theta > twelve_degrees)
return (-1); /* to signal failure */
if (x < -0.8)
box = 0;
else if (x < 0.8)
box = 1;
else
box = 2;
if (x_dot < -0.5);
else if (x_dot < 0.5)
box += 3;
else
box += 6;
if (theta < -six_degrees);
else if (theta < -one_degree)
box += 9;
else if (theta < 0)
box += 18;
else if (theta < one_degree)
box += 27;
else if (theta < six_degrees)
box += 36;
else
box += 45;
if (theta_dot < -fifty_degrees);
else if (theta_dot < fifty_degrees)
box += 54;
else
box += 108;
return (box);
}