防止GCC优化循环写入内存映射地址_C_For Loop_Gcc_Volatile_68000

防止GCC优化循环写入内存映射地址

c for-loop gcc

防止GCC优化循环写入内存映射地址,c,for-loop,gcc,volatile,68000,C,For Loop,Gcc,Volatile,68000,我有一个地址指向这样一个控制端口（我正在开发一个Sega/Megadrive游戏）：以及我要设置的初始值数组： const u8 initial_vdp_vals[24] = { 0x20, 0x74, 0x30, ..etc }; 使用循环： typedef struct { u16 upper; u8 reg; u8 val; } bitset; typedef union { bitset b; u32 as_u32; } u_bitset; s

我有一个地址指向这样一个控制端口（我正在开发一个Sega/Megadrive游戏）：

以及我要设置的初始值数组：

const u8 initial_vdp_vals[24] = {
  0x20,
  0x74,
  0x30,
  ..etc
};

使用循环：

typedef struct {
  u16 upper;
  u8 reg;
  u8 val;
} bitset;

typedef union {
  bitset b;
  u32 as_u32;
} u_bitset;

static inline void init_vdp() {
  u_bitset cmd = {{0x00008000}};
  for(int i = 0; i < 24; i++) {
     cmd.b.val = initial_vdp_vals[i];
     *vdp_ctrl = cmd.as_u32;
     cmd.b.reg += 1;
  }
}

这是非常好和简洁。因此，我的另一个问题可能是（作为一个完全的C新手）：有没有更好的方法来解决我的C解决方案，让我更接近上面的程序集？老实说，我甚至不确定我的代码是否正确，因为我只是想先解决这个循环优化问题，因为我知道这将是一个持续不断的问题

生成我的问题的可运行示例：

volatile unsigned long * vdp_ctrl = (unsigned long *) 0x00C00004;

const unsigned char initial_vdp_vals[24] = {
  0x20,
  0x74,
  0x30,
  0x40,
  0x05,
  0x70,
  0x00,
  0x00,
  0x00,
  0x00,
  0x00,
  0x08,
  0x81,
  0x34,
  0x00,
  0x00,
  0x01,
  0x00,
  0x00,
  0x00,
  0x00,
  0x00,
  0x00,
  0x00
};

typedef struct {
  unsigned int upper;
  unsigned char reg;
  unsigned char val;
} bitset;

typedef union {
  bitset b;
  unsigned long as_u32;
} u_bitset;

static inline void init_vdp() {
  u_bitset cmd = {{0x00008000}};
  for(int i = 0; i < 24; i++) {
     cmd.b.val = initial_vdp_vals[i];
     *vdp_ctrl = cmd.as_u32;
  }
}

void init() {
  init_vdp();
  for(;;) {
  }
}

注意：数组的大小决定了它是否得到优化。当我只生成两个元素时，它没有优化。

在这种代码中，您应该使用精确大小的整数。我强烈建议包装结构和工会以及

#include <stdint.h>
#define vdp_ctrl  ((volatile uint32_t *) 0x00C00004)

const unsigned char initial_vdp_vals[24] = {
  0x20,
  0x74,
  0x30,
  0x40,
  0x05,
  0x70,
  0x00,
  0x00,
  0x00,
  0x00,
  0x00,
  0x08,
  0x81,
  0x34,
  0x00,
  0x00,
  0x01,
  0x00,
  0x00,
  0x00,
  0x00,
  0x00,
  0x00,
  0x00
};

typedef struct {
  uint16_t upper;
  uint8_t reg;
  uint8_t val;
} __attribute__((packed)) bitset;

typedef union {
  bitset b;
  uint32_t as_u32;
} __attribute__((packed)) u_bitset ;

static inline void init_vdp() {
  u_bitset cmd = {.b.upper = 0x00008000};
  for(int i = 0; i < 24; i++) 
  {
     cmd.b.val = initial_vdp_vals[i];
     *vdp_ctrl = cmd.as_u32;
  }
}

void init() {
  init_vdp();
  for(;;) {
  }
}

#包括
#定义vdp\U ctrl（（易失性uint32\U t*）0x00C00004）
const unsigned char initial_vdp_vals[24]={
0x20，
0x74，
0x30，
0x40，
0x05，
0x70，
0x00，
0x00，
0x00，
0x00，
0x00，
0x08，
0x81，
0x34，
0x00，
0x00，
0x01，
0x00，
0x00，
0x00，
0x00，
0x00，
0x00，
0x00
};
类型定义结构{
uint16_t上部；
uint8_t reg；
uint8_t val；
}__属性__（（压缩））位集；
typedef联合{
位集b；
uint32\u t as\u u32；
}__属性__（（压缩））u_位集；
静态内联void init_vdp（）{
u_位集cmd={.b.upper=0x00008000}；
对于（int i=0；i<24；i++）
{
cmd.b.val=初始值[i]；
*vdp_ctrl=cmd.as_u32；
}
}
void init（）{
init_vdp（）；
对于（；；）{
}
}

它会生成您需要的代码

在我看来，最好是用宏来代替真实的对象。这可能不会对这段琐碎的代码产生任何影响，但如果代码变得更复杂，就会产生影响。

显示并指定您正在使用的编译器版本和所有编译器开关，以便其他人可以复制该行为。我同意这会很有用。我将尝试创建一个工作示例。我添加了一个工作示例，可以复制我的结果。在您的平台上，

unsigned long

有多大？它比无符号整数大吗？你检查过了吗？我用m68k编译器得到sizeof（int）==sizeof（long）==4，除非我通过

-mshort

…不要使用packed。这是完全不必要的，并且告诉编译器对象不需要对齐，这会使它使用更大/更糟糕的代码来访问，并破坏易失性访问的原子性。当我们压缩数据时，这是必要的，因此请不要使用这样强教师风格的注释。这个评论是100%错误的。uC数据通常必须打包。除非实际布局中的字段未对齐，否则不必打包。在实际需要的地方使用packet，而不是在任何地方都使用packet，这不仅会生成更糟糕（更大/更慢）的代码，而且实际上会破坏一些东西。如果packet确实改变了代码，那么就需要它。顺便说一句，原子性、一致性和易失性等不受包装的影响。这是一个神话。不，它不会！如果声明该类型的对象，它将不再对齐，因此必须生成更多代码才能访问它。此外，如果随后获取成员的地址并将其传递给指向成员类型的指针的函数，则它们将出现故障/故障，因为它们正确地期望对齐的对象，但接收到指向未对齐对象的指针。

volatile unsigned long * vdp_ctrl = (unsigned long *) 0x00C00004;

const unsigned char initial_vdp_vals[24] = {
  0x20,
  0x74,
  0x30,
  0x40,
  0x05,
  0x70,
  0x00,
  0x00,
  0x00,
  0x00,
  0x00,
  0x08,
  0x81,
  0x34,
  0x00,
  0x00,
  0x01,
  0x00,
  0x00,
  0x00,
  0x00,
  0x00,
  0x00,
  0x00
};

typedef struct {
  unsigned int upper;
  unsigned char reg;
  unsigned char val;
} bitset;

typedef union {
  bitset b;
  unsigned long as_u32;
} u_bitset;

static inline void init_vdp() {
  u_bitset cmd = {{0x00008000}};
  for(int i = 0; i < 24; i++) {
     cmd.b.val = initial_vdp_vals[i];
     *vdp_ctrl = cmd.as_u32;
  }
}

void init() {
  init_vdp();
  for(;;) {
  }
}

#NO_APP
    .file   "test.c"
    .text
    .align  2
    .globl  init
    .type   init, @function
init:
    link.w %fp,#0
    move.l vdp_ctrl,%a0
    moveq #24,%d0
    move.l #32768,%d1
.L2:
    move.l %d1,(%a0)
    subq.l #1,%d0
    jne .L2
.L3:
    jra .L3
    .size   init, .-init
    .globl  initial_vdp_vals
    .section    .rodata
    .type   initial_vdp_vals, @object
    .size   initial_vdp_vals, 24
initial_vdp_vals:
    .byte   32
    .byte   116
    .byte   48
    .byte   64
    .byte   5
    .byte   112
    .byte   0
    .byte   0
    .byte   0
    .byte   0
    .byte   0
    .byte   8
    .byte   -127
    .byte   52
    .byte   0
    .byte   0
    .byte   1
    .byte   0
    .byte   0
    .byte   0
    .byte   0
    .byte   0
    .byte   0
    .byte   0
    .globl  vdp_ctrl
    .data
    .align  2
    .type   vdp_ctrl, @object
    .size   vdp_ctrl, 4
vdp_ctrl:
    .long   12582916
    .ident  "GCC: (Ubuntu 7.4.0-1ubuntu1~18.04.1) 7.4.0"
    .section    .note.GNU-stack,"",@progbits

gcc version 7.4.0 (Ubuntu 7.4.0-1ubuntu1~18.04.1)

#include <stdint.h>
#define vdp_ctrl  ((volatile uint32_t *) 0x00C00004)

const unsigned char initial_vdp_vals[24] = {
  0x20,
  0x74,
  0x30,
  0x40,
  0x05,
  0x70,
  0x00,
  0x00,
  0x00,
  0x00,
  0x00,
  0x08,
  0x81,
  0x34,
  0x00,
  0x00,
  0x01,
  0x00,
  0x00,
  0x00,
  0x00,
  0x00,
  0x00,
  0x00
};

typedef struct {
  uint16_t upper;
  uint8_t reg;
  uint8_t val;
} __attribute__((packed)) bitset;

typedef union {
  bitset b;
  uint32_t as_u32;
} __attribute__((packed)) u_bitset ;

static inline void init_vdp() {
  u_bitset cmd = {.b.upper = 0x00008000};
  for(int i = 0; i < 24; i++) 
  {
     cmd.b.val = initial_vdp_vals[i];
     *vdp_ctrl = cmd.as_u32;
  }
}

void init() {
  init_vdp();
  for(;;) {
  }
}