C++ 什么是严格的别名规则？_C++_C_Undefined Behavior_Strict Aliasing_Type Punning

C++ 什么是严格的别名规则？

c++ c

C++ 什么是严格的别名规则？,c++,c,undefined-behavior,strict-aliasing,type-punning,C++,C,Undefined Behavior,Strict Aliasing,Type Punning,当询问时，人们有时会参考严格的别名规则。他们在说什么？严格的别名不允许不同的指针类型指向相同的数据应该有助于您全面了解该问题。严格别名不允许对同一数据使用不同的指针类型应该有助于您全面了解这个问题。通过指针强制转换（与使用并集相反）是打破严格别名的一个主要例子。通过指针强制转换（与使用并集相反）是打破严格别名的一个主要例子。我找到的最好解释是Mike Acton。它关注的是PS3的开发，但基本上只是GCC 从文章中：严格别名是由C（或C++）编译器做出的一种假设，即对不同类型对象的指针进

当询问时，人们有时会参考严格的别名规则。

他们在说什么？

严格的别名不允许不同的指针类型指向相同的数据

应该有助于您全面了解该问题。

严格别名不允许对同一数据使用不同的指针类型

应该有助于您全面了解这个问题。

通过指针强制转换（与使用并集相反）是打破严格别名的一个主要例子。

我找到的最好解释是Mike Acton。它关注的是PS3的开发，但基本上只是GCC

从文章中：

严格别名是由C（或C++）编译器做出的一种假设，即对不同类型对象的指针进行解引用时，不会引用同一内存位置（即相互别名）

因此，基本上，如果你有一个

int*

指向某个包含

int

的内存，然后你将一个

float*

指向该内存，并将其用作

float

，你就违反了规则。如果您的代码不遵守这一点，那么编译器的优化器很可能会破坏您的代码

这个规则的例外是一个

char*

，它可以指向任何类型。

我找到的最好的解释是Mike Acton。它关注的是PS3的开发，但基本上只是GCC

从文章中：

严格别名是由C（或C++）编译器做出的一种假设，即对不同类型对象的指针进行解引用时，不会引用同一内存位置（即相互别名）

因此，基本上，如果你有一个

int*

指向某个包含

int

的内存，然后你将一个

float*

指向该内存，并将其用作

float

，你就违反了规则。如果您的代码不遵守这一点，那么编译器的优化器很可能会破坏您的代码

该规则的例外是允许指向任何类型的

char*

。

遇到严格别名问题的典型情况是将结构（如设备/网络消息）覆盖到系统字长的缓冲区上（如指向

uint32\u t

s或

uint16\u t

s的指针）。当您将结构覆盖到这样的缓冲区上，或者通过指针转换将缓冲区覆盖到这样的结构上时，很容易违反严格的别名规则

因此，在这种设置中，如果我想向某个对象发送消息，我必须有两个不兼容的指针指向同一块内存。然后我可能会天真地编写如下代码：

typedef struct Msg
{
    unsigned int a;
    unsigned int b;
} Msg;

void SendWord(uint32_t);

int main(void)
{
    // Get a 32-bit buffer from the system
    uint32_t* buff = malloc(sizeof(Msg));
    
    // Alias that buffer through message
    Msg* msg = (Msg*)(buff);
    
    // Send a bunch of messages    
    for (int i = 0; i < 10; ++i)
    {
        msg->a = i;
        msg->b = i+1;
        SendWord(buff[0]);
        SendWord(buff[1]);   
    }
}

并重写前面的循环以利用这个方便的函数

for (int i = 0; i < 10; ++i)
{
    msg->a = i;
    msg->b = i+1;
    SendMessage(buff, 2);
}

您可以在编译器中禁用严格别名（在gcc中））

您可以使用

char*

作为别名，而不是系统的单词。规则允许对

char*

进行例外处理（包括

signed char

和

unsigned char

）。通常假定

char*

别名为其他类型。然而，这不会以另一种方式起作用：没有假设您的结构会为字符缓冲区别名

初学者要当心

当两种类型相互叠加时，这只是一个潜在雷区。您还应该了解，以及如何通过正确的方式处理对齐问题

脚注 1 C 2011 6.5 7允许左值访问的类型有：

与对象的有效类型兼容的类型
与对象的有效类型兼容的类型的限定版本
与对象的有效类型相对应的有符号或无符号类型
一种类型，它是与对象的有效类型的限定版本相对应的有符号或无符号类型
在其成员中包含上述类型之一的聚合或联合类型（递归地包括子聚合或包含的联合的成员），或
字符类型

uint32\u t

uint16\u t

typedef struct Msg
{
    unsigned int a;
    unsigned int b;
} Msg;

void SendWord(uint32_t);

int main(void)
{
    // Get a 32-bit buffer from the system
    uint32_t* buff = malloc(sizeof(Msg));
    
    // Alias that buffer through message
    Msg* msg = (Msg*)(buff);
    
    // Send a bunch of messages    
    for (int i = 0; i < 10; ++i)
    {
        msg->a = i;
        msg->b = i+1;
        SendWord(buff[0]);
        SendWord(buff[1]);   
    }
}

for (int i = 0; i < 10; ++i)
{
    msg->a = i;
    msg->b = i+1;
    SendMessage(buff, 2);
}

您可以在编译器中禁用严格别名（在gcc中））
您可以使用
```
char*
```
作为别名，而不是系统的单词。规则允许对
```
char*
```
进行例外处理（包括
```
signed char
```
和
```
unsigned char
```
）。通常假定
```
char*
```
别名为其他类型。然而，这不会以另一种方式起作用：没有假设您的结构会为字符缓冲区别名

初学者要当心

与对象的有效类型兼容的类型
与对象的有效类型兼容的类型的限定版本
与对象的有效类型相对应的有符号或无符号类型

对应于的有符号或无符号类型的类型

#include <stdio.h>

void check(short *h,long *k)
{
    *h=5;
    *k=6;
    if (*h == 5)
        printf("strict aliasing problem\n");
}

int main(void)
{
    long      k[1];
    check((short *)k,k);
    return 0;
}

movw    $5, (%rdi)
movq    $6, (%rsi)
movl    $.LC0, %edi
jmp puts

int x;
int test(double *p)
{
  x=5;
  *p = 1.0;
  return x;
}

void test(void)
{
  struct S {int x;} s;
  s.x = 1;
}

int test(int *ip, double *dp)
{
  *ip = 1;
  *dp = 1.23;
  return *ip;
}
int test2(void)
{
  union U { int i; double d; } u;
  return test(&u.i, &u.d);
}

 void inc_int(int *p) { *p = 3; }
 int test(void)
 {
   int *p;
   struct S { int x; } s;
   s.x = 1;
   p = &s.x;
   inc_int(p);
   return s.x;
 }

 void inc_int(int *p) { *p = 3; }
 int test(void)
 {
   int *p;
   struct S { int x; } s;
   p = &s.x;
   s.x = 1;  //  !!*!!
   *p += 1;
   return s.x;
 }

void merge_two_ints(int *a, int *b) {
  *b += *a;
  *a += *b;
}

void merge_two_numbers(int *a, long *b) {...}

void merge_two_ints(int * restrict a, int * restrict b) {...}

int x = 10;
int *ip = &x;

std::cout << *ip << "\n";
*ip = 12;
std::cout << x << "\n";

int foo( float *f, int *i ) { 
    *i = 1;               
    *f = 0.f;            

   return *i;
}

int main() {
    int x = 0;

    std::cout << x << "\n";   // Expect 0
    x = foo(reinterpret_cast<float*>(&x), &x);
    std::cout << x << "\n";   // Expect 0?
}

0
1

foo(float*, int*): # @foo(float*, int*)
mov dword ptr [rsi], 1  
mov dword ptr [rdi], 0
mov eax, 1                       
ret

int x = 1;
int *p = &x;   
printf("%d\n", *p); // *p gives us an lvalue expression of type int which is compatible with int

int x = 1;
const int *p = &x;
printf("%d\n", *p); // *p gives us an lvalue expression of type const int which is compatible with int

int x = 1;
unsigned int *p = (unsigned int*)&x;
printf("%u\n", *p ); // *p gives us an lvalue expression of type unsigned int which corresponds to 
                     // the effective type of the object

int x = 1;
const unsigned int *p = (const unsigned int*)&x;
printf("%u\n", *p ); // *p gives us an lvalue expression of type const unsigned int which is a unsigned type 
                     // that corresponds with to a qualified verison of the effective type of the object

struct foo {
  int x;
};

void foobar( struct foo *fp, int *ip );  // struct foo is an aggregate that includes int among its members so it can
                                         // can alias with *ip

foo f;
foobar( &f, &f.x );

int x = 65;
char *p = (char *)&x;
printf("%c\n", *p );  // *p gives us an lvalue expression of type char which is a character type.
                      // The results are not portable due to endianness issues.

void *p = malloc( sizeof(int) ); // We have allocated storage but not started the lifetime of an object
int *ip = new (p) int{0};        // Placement new changes the dynamic type of the object to int
std::cout << *ip << "\n";        // *ip gives us a glvalue expression of type int which matches the dynamic type 
                                  // of the allocated object

int x = 1;
const int *cip = &x;
std::cout << *cip << "\n";  // *cip gives us a glvalue expression of type const int which is a cv-qualified 
                            // version of the dynamic type of x

// Both si and ui are signed or unsigned types corresponding to each others dynamic types
// We can see from this godbolt(https://godbolt.org/g/KowGXB) the optimizer assumes aliasing.
signed int foo( signed int &si, unsigned int &ui ) {
  si = 1;
  ui = 2;

  return si;
}

signed int foo( const signed int &si1, int &si2); // Hard to show this one assumes aliasing

struct foo {
 int x;
};

// Compiler Explorer example(https://godbolt.org/g/z2wJTC) shows aliasing assumption
int foobar( foo &fp, int &ip ) {
 fp.x = 1;
 ip = 2;

 return fp.x;
}

foo f; 
foobar( f, f.x );

struct foo { int x ; };

struct bar : public foo {};

int foobar( foo &f, bar &b ) {
  f.x = 1;
  b.x = 2;

  return f.x;
}

int foo( std::byte &b, uint32_t &ui ) {
  b = static_cast<std::byte>('a');
  ui = 0xFFFFFFFF;                   

  return std::to_integer<int>( b );  // b gives us a glvalue expression of type std::byte which can alias
                                     // an object of type uint32_t
}

int x =  1 ;

// In C
float *fp = (float*)&x ;  // Not a valid aliasing

// In C++
float *fp = reinterpret_cast<float*>(&x) ;  // Not a valid aliasing

printf( "%f\n", *fp ) ;

union u1
{
  int n;
  float f;
} ;

union u1 u;
u.f = 1.0f;

printf( "%d\n”, u.n );  // UB in C++ n is not the active member

static_assert( sizeof( double ) == sizeof( int64_t ) );  // C++17 does not require a message

void func1( double d ) {
  std::int64_t n;
  std::memcpy(&n, &d, sizeof d); 
  //...

std::cout << bit_cast<float>(0x447a0000) << "\n" ; //assuming sizeof(float) == sizeof(unsigned int)

struct uint_chars {
 unsigned char arr[sizeof( unsigned int )] = {} ;  // Assume sizeof( unsigned int ) == 4
};

// Assume len is a multiple of 4 
int bar( unsigned char *p, size_t len ) {
 int result = 0;

 for( size_t index = 0; index < len; index += sizeof(unsigned int) ) {
   uint_chars f;
   std::memcpy( f.arr, &p[index], sizeof(unsigned int));
   unsigned int result = bit_cast<unsigned int>(f);

   result += foo( result );
 }

 return result ;
}

int a = 1;
short j;
float f = 1.f; // Originally not initialized but tis-kernel caught 
               // it was being accessed w/ an indeterminate value below

printf("%i\n", j = *(reinterpret_cast<short*>(&a)));
printf("%i\n", j = *(reinterpret_cast<int*>(&f)));

int *p;

p=&a;
printf("%i\n", j = *(reinterpret_cast<short*>(p)));

int *x = new int[2];               // 8 bytes: [0,7].
int *u = (int*)((char*)x + 6);     // regardless of alignment of x this will not be an aligned address
*u = 1;                            // Access to range [6-9]
printf( "%d\n", *u );              // Access to range [6-9]

int a = 1;
short j;
float f = 1.0 ;

printf("%i\n", j = *((short*)&a));
printf("%i\n", j = *((int*)&f));

int *p; 

p=&a;
printf("%i\n", j = *((short*)p));

./bin/tis-kernel -sa example1.c 
...
example1.c:9:[sa] warning: The pointer (short *)(& a) has type short *. It violates strict aliasing
              rules by accessing a cell with effective type int.
...

example1.c:10:[sa] warning: The pointer (int *)(& f) has type int *. It violates strict aliasing rules by
              accessing a cell with effective type float.
              Callstack: main
...

example1.c:15:[sa] warning: The pointer (short *)p has type short *. It violates strict aliasing rules by
              accessing a cell with effective type int.