&引用；错误：应为标识符或'；（'；在'；浮点'；"；之前编译C程序时_C

&引用；错误：应为标识符或'；（'；在'；浮点'；"；之前编译C程序时

&引用；错误：应为标识符或'；（'；在'；浮点'；"；之前编译C程序时,c,C,我试图理解（并将其“翻译”成Python）at的C程序，它模拟了一个推车杆系统的强化学习我尝试做的一个修改是将pole.h头文件移动到pole.c中。但是，当我尝试使用gcc pole3.c-lm进行编译时（其中pole3.c是我命名的已编辑程序），我得到以下两个错误： pole3.c:46:35: error: expected identifier or ‘(’ before ‘float’ #define random ((float) rand()

我试图理解（并将其“翻译”成Python）at的C程序，它模拟了一个推车杆系统的强化学习

我尝试做的一个修改是将

pole.h

头文件移动到

pole.c

中。但是，当我尝试使用

gcc pole3.c-lm

进行编译时（其中

pole3.c

是我命名的已编辑程序），我得到以下两个错误：

pole3.c:46:35: error: expected identifier or ‘(’ before ‘float’
 #define random                  ((float) rand() / (float)((1 << 31) - 1))
                                   ^
pole3.c:46:42: error: expected ‘)’ before ‘rand’
 #define random                  ((float) rand() / (float)((1 << 31) - 1))

目前，我已返回一个步骤，仅将粘贴的

pole.h

复制到原始

pole.c

中，而不保留以前的任何修改。生成的程序（如下所示）可编译

/* Parameters for reinforcement algorithm.  */

#define min(x, y)               ((x <= y) ? x : y)
#define max(x, y)           ((x >= y) ? x : y)
#define prob_push_right(s)      (1.0 / (1.0 + exp(-max(-50.0, min(s, 50.0)))))
#define random                  ((float) rand() / (float)((1 << 31) - 1))

#define N_BOXES         162         /* Number of disjoint boxes of state space. */
#define ALPHA       1000        /* Learning rate for action weights, w. */
#define BETA        0.5         /* Learning rate for critic weights, v. */
#define GAMMA       0.95        /* Discount factor for critic. */
#define LAMBDAw     0.9         /* Decay rate for w eligibility trace. */
#define LAMBDAv     0.8         /* Decay rate for v eligibility trace. */

#define MAX_FAILURES     300         /* Termination criterion. */
#define MAX_STEPS        100000

#define RUNS 1


/*** Parameters for simulation ***/

#define GRAVITY 9.8
#define MASSCART 1.0
#define MASSPOLE 0.1
#define TOTAL_MASS (MASSPOLE + MASSCART)
#define LENGTH 0.5        /* actually half the pole's length */
#define POLEMASS_LENGTH (MASSPOLE * LENGTH)
#define FORCE_MAG 10.0
#define TAU 0.02          /* seconds between state updates */
#define FOURTHIRDS 1.3333333333333

#include <math.h>
// #include "pole.h"


typedef float vector[N_BOXES];

main()
{
  int i,seed;

  printf("enter seed:");
  scanf("%d",&seed);
  srand(seed);

  for (i=0; i<RUNS; i++)
    run_trial(i);

}

run_trial(run)
int run;
{
  float x,          /* cart position, meters */
        x_dot,          /* cart velocity */
        theta,          /* pole angle, radians */
        theta_dot;      /* pole angular velocity */
  vector  w,            /* vector of action weights */
          v,            /* vector of critic weights */
          e,            /* vector of action weight eligibilities */
          xbar;         /* vector of critic weight eligibilities */
  float p, oldp, rhat, r;
  int tsteps=0,box, i,y, steps = 0, failures=0, failed;

  /*--- Initialize action and heuristic critic weights and traces. ---*/
  for (i = 0; i < N_BOXES; i++)
    w[i] = v[i] = xbar[i] = e[i] = 0.0;

  /*--- Starting state is (0 0 0 0) ---*/
  x = x_dot = theta = theta_dot = 0.0;

  /*--- Find box in state space containing start state ---*/
  box = get_box(x, x_dot, theta, theta_dot);

  /*--- Iterate through the action-learn loop. ---*/
  while (steps++ < MAX_STEPS && failures < MAX_FAILURES)
    {
      /*--- Choose action randomly, biased by current weight. ---*/
      y = (random < prob_push_right(w[box]));

      tsteps++;  /* total number of steps */

      /*--- Update traces. ---*/
      e[box] += (1.0 - LAMBDAw) * (y - 0.5);
      xbar[box] += (1.0 - LAMBDAv);

      /*--- Remember prediction of failure for current state ---*/
      oldp = v[box];

      /*--- Apply action to the simulated cart-pole ---*/
      cart_pole(y, &x, &x_dot, &theta, &theta_dot);

      /*--- Get box of state space containing the resulting state. ---*/
      box = get_box(x, x_dot, theta, theta_dot);

      if (box < 0)
    {
      /*--- Failure occurred. ---*/
      failed = 1;
      failures++;
      printf("Trial %d was %d steps.\n", failures, steps);
      steps = 0;

      /*--- Reset state to (0 0 0 0).  Find the box. ---*/
      x = x_dot = theta = theta_dot = 0.0;
      box = get_box(x, x_dot, theta, theta_dot);

      /*--- Reinforcement upon failure is -1. Prediction of failure is 0. ---*/
      r = -1.0;
      p = 0.;
    }
      else
    {
      /*--- Not a failure. ---*/
      failed = 0;

      /*--- Reinforcement is 0. Prediction of failure given by v weight. ---*/
      r = 0;
      p= v[box];
    }

      /*--- Heuristic reinforcement is:   current reinforcement
          + gamma * new failure prediction - previous failure prediction ---*/
      rhat = r + GAMMA * p - oldp;

      for (i = 0; i < N_BOXES; i++)
    {
      /*--- Update all weights. ---*/
      w[i] += ALPHA * rhat * e[i];
      v[i] += BETA * rhat * xbar[i];
      if (v[i] < -1.0)
        v[i] = v[i];

      if (failed)
        {
          /*--- If failure, zero all traces. ---*/
          e[i] = 0.;
          xbar[i] = 0.;
        }
      else
        {
          /*--- Otherwise, update (decay) the traces. ---*/
          e[i] *= LAMBDAw;
          xbar[i] *= LAMBDAv;
        }
    }

    }
  if (failures == MAX_FAILURES)
    printf("run:%d Pole not balanced. Stopping after %d trials and %d steps.",run,failures,tsteps);
  else
    printf("Run:%d Pole balanced successfully for at least %d steps after %d trials and %d steps\n", run,steps,failures,tsteps);
}


/*----------------------------------------------------------------------
   cart_pole:  Takes an action (0 or 1) and the current values of the
 four state variables and updates their values by estimating the state
 TAU seconds later.
----------------------------------------------------------------------*/

cart_pole(action, x, x_dot, theta, theta_dot)
int action;
float *x, *x_dot, *theta, *theta_dot;
{
    float xacc,thetaacc,force,costheta,sintheta,temp;

    force = (action>0)? FORCE_MAG : -FORCE_MAG;
    costheta = cos(*theta);
    sintheta = sin(*theta);

    temp = (force + POLEMASS_LENGTH * *theta_dot * *theta_dot * sintheta)
                 / TOTAL_MASS;

    thetaacc = (GRAVITY * sintheta - costheta* temp)
           / (LENGTH * (FOURTHIRDS - MASSPOLE * costheta * costheta
                                              / TOTAL_MASS));

    xacc  = temp - POLEMASS_LENGTH * thetaacc* costheta / TOTAL_MASS;

/*** Update the four state variables, using Euler's method. ***/

    *x  += TAU * *x_dot;
    *x_dot += TAU * xacc;
    *theta += TAU * *theta_dot;
    *theta_dot += TAU * thetaacc;
}

/*----------------------------------------------------------------------
   get_box:  Given the current state, returns a number from 1 to 162
  designating the region of the state space encompassing the current state.
  Returns a value of -1 if a failure state is encountered.
----------------------------------------------------------------------*/

#define one_degree 0.0174532    /* 2pi/360 */
#define six_degrees 0.1047192
#define twelve_degrees 0.2094384
#define fifty_degrees 0.87266

get_box(x,x_dot,theta,theta_dot)
float x,x_dot,theta,theta_dot;
{
  int box=0;

  if (x < -2.4 ||
      x > 2.4  ||
      theta < -twelve_degrees ||
      theta > twelve_degrees)          return(-1); /* to signal failure */

  if (x < -0.8)                box = 0;
  else if (x < 0.8)                box = 1;
  else                             box = 2;

  if (x_dot < -0.5)                ;
  else if (x_dot < 0.5)                box += 3;
  else                         box += 6;

  if (theta < -six_degrees)            ;
  else if (theta < -one_degree)        box += 9;
  else if (theta < 0)              box += 18;
  else if (theta < one_degree)         box += 27;
  else if (theta < six_degrees)        box += 36;
  else                         box += 45;

  if (theta_dot < -fifty_degrees)   ;
  else if (theta_dot < fifty_degrees)  box += 54;
  else                                 box += 108;

  return(box);
}

增强算法的参数*/ #定义最小值（x，y）（（x=y）？x:y） #定义问题向右推（1.0/（1.0+exp（-max（-50.0，min（s，50.0）щщ））） #定义随机（（float）rand（）/（float）（（1 2.4|| θ<-十二度|| θ>十二度）返回（-1）；/*至信号故障*/ 如果（x<-0.8）框=0；如果（x<0.8）框=1，则为else； else框=2；如果（x_点<-0.5）；如果（x_点<0.5）框+=3，则为else； else框+=6；如果（θ<-六度）；如果（θ<-一度）框+=9；如果（θ<0）框+=18，则为else；如果（θ<一度）框+=27；如果（θ<六度）框+=36； else框+=45；如果（θu点<-50度）；如果（θ点<50度）框+=54； else框+=108；返回（框）； }

Kurt，它进行编译，但有明显的警告。（使用

gcc-Wall-Wextra-pedantic-std=gnu11-Ofast-lm-o pole.c进行编译，看看有多少）。您面临的问题之一是古老风格的函数声明，例如：
cart_pole(action, x, x_dot, theta, theta_dot)
int action;
float *x, *x_dot, *theta, *theta_dot;

而不是
void cart_pole (int action, float *x, float *x_dot, 
                float *theta, float *theta_dot)

如前所述，您将遇到整数溢出：
#define random                  ((float) rand() / (float)((1 << 31) - 1))

将这些部分放在一起，显式设置intmain（void）
，并从main
返回一个值，您可以在没有警告的情况下编译以下内容：
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#include <math.h>

#define min(x, y)               ((x <= y) ? x : y)
#define max(x, y)               ((x >= y) ? x : y)
#define prob_push_right(s)      (1.0 / (1.0 + exp(-max(-50.0, min(s, 50.0)))))
#define random                  ((float) rand() / (float)((1U << 31) - 1))

#define N_BOXES     162     /* Number of disjoint boxes of state space. */
#define ALPHA       1000    /* Learning rate for action weights, w. */
#define BETA        0.5     /* Learning rate for critic weights, v. */
#define GAMMA       0.95    /* Discount factor for critic. */
#define LAMBDAw     0.9     /* Decay rate for w eligibility trace. */
#define LAMBDAv     0.8     /* Decay rate for v eligibility trace. */

#define MAX_FAILURES     100    /* Termination criterion. */
#define MAX_STEPS        100000

typedef float vector[N_BOXES];

void cart_pole (int action, float *x, float *x_dot,
                float *theta, float *theta_dot);
int get_box (float x, float x_dot, float theta, float theta_dot);

int main (void)
{
    float x,                  /* cart position, meters */
    x_dot,                    /* cart velocity */
    theta,                    /* pole angle, radians */
    theta_dot;                /* pole angular velocity */
    vector w,                 /* vector of action weights */
    v,                        /* vector of critic weights */
    e,                        /* vector of action weight eligibilities */
    xbar;                     /* vector of critic weight eligibilities */
    float p, oldp, rhat, r;
    int box, i, y, steps = 0, failures = 0, failed;

    printf ("Seed? ");
    scanf ("%d", &i);
    srand (i);

/*--- Initialize action and heuristic critic weights and traces. ---*/
    for (i = 0; i < N_BOXES; i++)
        w[i] = v[i] = xbar[i] = e[i] = 0.0;

/*--- Starting state is (0 0 0 0) ---*/
    x = x_dot = theta = theta_dot = 0.0;

/*--- Find box in state space containing start state ---*/
    box = get_box (x, x_dot, theta, theta_dot);

/*--- Iterate through the action-learn loop. ---*/
    while (steps++ < MAX_STEPS && failures < MAX_FAILURES) {
    /*--- Choose action randomly, biased by current weight. ---*/
        y = (random < prob_push_right (w[box]));

    /*--- Update traces. ---*/
        e[box] += (1.0 - LAMBDAw) * (y - 0.5);
        xbar[box] += (1.0 - LAMBDAv);

    /*--- Remember prediction of failure for current state ---*/
        oldp = v[box];

    /*--- Apply action to the simulated cart-pole ---*/
        cart_pole (y, &x, &x_dot, &theta, &theta_dot);

    /*--- Get box of state space containing the resulting state. ---*/
        box = get_box (x, x_dot, theta, theta_dot);

        if (box < 0) {
        /*--- Failure occurred. ---*/
            failed = 1;
            failures++;
            printf ("Trial %d was %d steps.\n", failures, steps);
            steps = 0;

        /*--- Reset state to (0 0 0 0).  Find the box. ---*/
            x = x_dot = theta = theta_dot = 0.0;
            box = get_box (x, x_dot, theta, theta_dot);

        /*--- Reinforcement upon failure is -1. Prediction of failure is 0. ---*/
            r = -1.0;
            p = 0.;
        } else {
        /*--- Not a failure. ---*/
            failed = 0;

        /*--- Reinforcement is 0. Prediction of failure given by v weight. ---*/
            r = 0;
            p = v[box];
        }

    /*--- Heuristic reinforcement is:   current reinforcement
            + gamma * new failure prediction - previous failure prediction ---*/
        rhat = r + GAMMA * p - oldp;

        for (i = 0; i < N_BOXES; i++) {
        /*--- Update all weights. ---*/
            w[i] += ALPHA * rhat * e[i];
            v[i] += BETA * rhat * xbar[i];
            if (v[i] < -1.0)
                v[i] = v[i];

            if (failed) {
            /*--- If failure, zero all traces. ---*/
                e[i] = 0.;
                xbar[i] = 0.;
            } else {
            /*--- Otherwise, update (decay) the traces. ---*/
                e[i] *= LAMBDAw;
                xbar[i] *= LAMBDAv;
            }
        }

    }
    if (failures == MAX_FAILURES)
        printf ("Pole not balanced. Stopping after %d failures.", failures);
    else
        printf ("Pole balanced successfully for at least %d steps\n", steps);

    return 0;
}

/*----------------------------------------------------------------------
cart_pole:  Takes an action (0 or 1) and the current values of the
four state variables and updates their values by estimating the state
TAU seconds later.
----------------------------------------------------------------------*/

/*** Parameters for simulation ***/

#define GRAVITY 9.8
#define MASSCART 1.0
#define MASSPOLE 0.1
#define TOTAL_MASS (MASSPOLE + MASSCART)
#define LENGTH 0.5              /* actually half the pole's length */
#define POLEMASS_LENGTH (MASSPOLE * LENGTH)
#define FORCE_MAG 10.0
#define TAU 0.02                /* seconds between state updates */
#define FOURTHIRDS 1.3333333333333

void cart_pole (int action, float *x, float *x_dot,
                float *theta, float *theta_dot)
{
    float xacc, thetaacc, force, costheta, sintheta, temp;

    force = (action > 0) ? FORCE_MAG : -FORCE_MAG;
    costheta = cos (*theta);
    sintheta = sin (*theta);

    temp = (force + POLEMASS_LENGTH * *theta_dot * *theta_dot * sintheta)
        / TOTAL_MASS;

    thetaacc = (GRAVITY * sintheta - costheta * temp)
        / (LENGTH * (FOURTHIRDS - MASSPOLE * costheta * costheta
                    / TOTAL_MASS));

    xacc = temp - POLEMASS_LENGTH * thetaacc * costheta / TOTAL_MASS;

/*** Update the four state variables, using Euler's method. ***/

    *x += TAU * *x_dot;
    *x_dot += TAU * xacc;
    *theta += TAU * *theta_dot;
    *theta_dot += TAU * thetaacc;
}

/*----------------------------------------------------------------------
get_box:  Given the current state, returns a number from 1 to 162
designating the region of the state space encompassing the current state.
Returns a value of -1 if a failure state is encountered.
----------------------------------------------------------------------*/

#define one_degree 0.0174532    /* 2pi/360 */
#define six_degrees 0.1047192
#define twelve_degrees 0.2094384
#define fifty_degrees 0.87266

int get_box (float x, float x_dot, float theta, float theta_dot)
{
    int box = 0;

    if (x < -2.4 ||
        x > 2.4 || theta < -twelve_degrees || theta > twelve_degrees)
        return (-1);            /* to signal failure */

    if (x < -0.8)
        box = 0;
    else if (x < 0.8)
        box = 1;
    else
        box = 2;

    if (x_dot < -0.5);
    else if (x_dot < 0.5)
        box += 3;
    else
        box += 6;

    if (theta < -six_degrees);
    else if (theta < -one_degree)
        box += 9;
    else if (theta < 0)
        box += 18;
    else if (theta < one_degree)
        box += 27;
    else if (theta < six_degrees)
        box += 36;
    else
        box += 45;

    if (theta_dot < -fifty_degrees);
    else if (theta_dot < fifty_degrees)
        box += 54;
    else
        box += 108;

    return (box);
}

#包括
#包括
#包括
#包括
#定义最小值（x，y）（（x=y）？x:y）
#定义问题向右推（1.0/（1.0+exp（-max（-50.0，min（s，50.0）щщ）））
#定义随机（（浮点）rand（）/（浮点）（（1U 2.4 | |θ<-十二度| |θ>十二度）
返回（-1）；/*至信号故障*/
如果（x<-0.8）
box=0；
否则如果（x<0.8）
box=1；
其他的
box=2；
如果（x_点<-0.5）；
否则如果（x_点<0.5）
box+=3；
其他的
box+=6；
如果（θ<-六度）；
否则如果（θ<-一度）
box+=9；
else if（θ<0）
box+=18；
否则如果（θ<一度）
box+=27；
否则如果（θ<六度）
box+=36；
其他的
box+=45；
如果（θu点<-50度）；
否则如果（θ点<五十度）
box+=54；
其他的
box+=108；
返回（框）；
}

跑步会给出公布的结果。我知道这并不能解决你所有的问题，但希望它能帮助你前进。
1
#define random                  ((float) rand() / (float)((1U << 31) - 1))

#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#include <math.h>

#define min(x, y)               ((x <= y) ? x : y)
#define max(x, y)               ((x >= y) ? x : y)
#define prob_push_right(s)      (1.0 / (1.0 + exp(-max(-50.0, min(s, 50.0)))))
#define random                  ((float) rand() / (float)((1U << 31) - 1))

#define N_BOXES     162     /* Number of disjoint boxes of state space. */
#define ALPHA       1000    /* Learning rate for action weights, w. */
#define BETA        0.5     /* Learning rate for critic weights, v. */
#define GAMMA       0.95    /* Discount factor for critic. */
#define LAMBDAw     0.9     /* Decay rate for w eligibility trace. */
#define LAMBDAv     0.8     /* Decay rate for v eligibility trace. */

#define MAX_FAILURES     100    /* Termination criterion. */
#define MAX_STEPS        100000

typedef float vector[N_BOXES];

void cart_pole (int action, float *x, float *x_dot,
                float *theta, float *theta_dot);
int get_box (float x, float x_dot, float theta, float theta_dot);

int main (void)
{
    float x,                  /* cart position, meters */
    x_dot,                    /* cart velocity */
    theta,                    /* pole angle, radians */
    theta_dot;                /* pole angular velocity */
    vector w,                 /* vector of action weights */
    v,                        /* vector of critic weights */
    e,                        /* vector of action weight eligibilities */
    xbar;                     /* vector of critic weight eligibilities */
    float p, oldp, rhat, r;
    int box, i, y, steps = 0, failures = 0, failed;

    printf ("Seed? ");
    scanf ("%d", &i);
    srand (i);

/*--- Initialize action and heuristic critic weights and traces. ---*/
    for (i = 0; i < N_BOXES; i++)
        w[i] = v[i] = xbar[i] = e[i] = 0.0;

/*--- Starting state is (0 0 0 0) ---*/
    x = x_dot = theta = theta_dot = 0.0;

/*--- Find box in state space containing start state ---*/
    box = get_box (x, x_dot, theta, theta_dot);

/*--- Iterate through the action-learn loop. ---*/
    while (steps++ < MAX_STEPS && failures < MAX_FAILURES) {
    /*--- Choose action randomly, biased by current weight. ---*/
        y = (random < prob_push_right (w[box]));

    /*--- Update traces. ---*/
        e[box] += (1.0 - LAMBDAw) * (y - 0.5);
        xbar[box] += (1.0 - LAMBDAv);

    /*--- Remember prediction of failure for current state ---*/
        oldp = v[box];

    /*--- Apply action to the simulated cart-pole ---*/
        cart_pole (y, &x, &x_dot, &theta, &theta_dot);

    /*--- Get box of state space containing the resulting state. ---*/
        box = get_box (x, x_dot, theta, theta_dot);

        if (box < 0) {
        /*--- Failure occurred. ---*/
            failed = 1;
            failures++;
            printf ("Trial %d was %d steps.\n", failures, steps);
            steps = 0;

        /*--- Reset state to (0 0 0 0).  Find the box. ---*/
            x = x_dot = theta = theta_dot = 0.0;
            box = get_box (x, x_dot, theta, theta_dot);

        /*--- Reinforcement upon failure is -1. Prediction of failure is 0. ---*/
            r = -1.0;
            p = 0.;
        } else {
        /*--- Not a failure. ---*/
            failed = 0;

        /*--- Reinforcement is 0. Prediction of failure given by v weight. ---*/
            r = 0;
            p = v[box];
        }

    /*--- Heuristic reinforcement is:   current reinforcement
            + gamma * new failure prediction - previous failure prediction ---*/
        rhat = r + GAMMA * p - oldp;

        for (i = 0; i < N_BOXES; i++) {
        /*--- Update all weights. ---*/
            w[i] += ALPHA * rhat * e[i];
            v[i] += BETA * rhat * xbar[i];
            if (v[i] < -1.0)
                v[i] = v[i];

            if (failed) {
            /*--- If failure, zero all traces. ---*/
                e[i] = 0.;
                xbar[i] = 0.;
            } else {
            /*--- Otherwise, update (decay) the traces. ---*/
                e[i] *= LAMBDAw;
                xbar[i] *= LAMBDAv;
            }
        }

    }
    if (failures == MAX_FAILURES)
        printf ("Pole not balanced. Stopping after %d failures.", failures);
    else
        printf ("Pole balanced successfully for at least %d steps\n", steps);

    return 0;
}

/*----------------------------------------------------------------------
cart_pole:  Takes an action (0 or 1) and the current values of the
four state variables and updates their values by estimating the state
TAU seconds later.
----------------------------------------------------------------------*/

/*** Parameters for simulation ***/

#define GRAVITY 9.8
#define MASSCART 1.0
#define MASSPOLE 0.1
#define TOTAL_MASS (MASSPOLE + MASSCART)
#define LENGTH 0.5              /* actually half the pole's length */
#define POLEMASS_LENGTH (MASSPOLE * LENGTH)
#define FORCE_MAG 10.0
#define TAU 0.02                /* seconds between state updates */
#define FOURTHIRDS 1.3333333333333

void cart_pole (int action, float *x, float *x_dot,
                float *theta, float *theta_dot)
{
    float xacc, thetaacc, force, costheta, sintheta, temp;

    force = (action > 0) ? FORCE_MAG : -FORCE_MAG;
    costheta = cos (*theta);
    sintheta = sin (*theta);

    temp = (force + POLEMASS_LENGTH * *theta_dot * *theta_dot * sintheta)
        / TOTAL_MASS;

    thetaacc = (GRAVITY * sintheta - costheta * temp)
        / (LENGTH * (FOURTHIRDS - MASSPOLE * costheta * costheta
                    / TOTAL_MASS));

    xacc = temp - POLEMASS_LENGTH * thetaacc * costheta / TOTAL_MASS;

/*** Update the four state variables, using Euler's method. ***/

    *x += TAU * *x_dot;
    *x_dot += TAU * xacc;
    *theta += TAU * *theta_dot;
    *theta_dot += TAU * thetaacc;
}

/*----------------------------------------------------------------------
get_box:  Given the current state, returns a number from 1 to 162
designating the region of the state space encompassing the current state.
Returns a value of -1 if a failure state is encountered.
----------------------------------------------------------------------*/

#define one_degree 0.0174532    /* 2pi/360 */
#define six_degrees 0.1047192
#define twelve_degrees 0.2094384
#define fifty_degrees 0.87266

int get_box (float x, float x_dot, float theta, float theta_dot)
{
    int box = 0;

    if (x < -2.4 ||
        x > 2.4 || theta < -twelve_degrees || theta > twelve_degrees)
        return (-1);            /* to signal failure */

    if (x < -0.8)
        box = 0;
    else if (x < 0.8)
        box = 1;
    else
        box = 2;

    if (x_dot < -0.5);
    else if (x_dot < 0.5)
        box += 3;
    else
        box += 6;

    if (theta < -six_degrees);
    else if (theta < -one_degree)
        box += 9;
    else if (theta < 0)
        box += 18;
    else if (theta < one_degree)
        box += 27;
    else if (theta < six_degrees)
        box += 36;
    else
        box += 45;

    if (theta_dot < -fifty_degrees);
    else if (theta_dot < fifty_degrees)
        box += 54;
    else
        box += 108;

    return (box);
}