C++ 什么是严格的别名规则?

C++ 什么是严格的别名规则?,c++,c,undefined-behavior,strict-aliasing,type-punning,C++,C,Undefined Behavior,Strict Aliasing,Type Punning,当询问时,人们有时会参考严格的别名规则。 他们在说什么?严格的别名不允许不同的指针类型指向相同的数据 应该有助于您全面了解该问题。严格别名不允许对同一数据使用不同的指针类型 应该有助于您全面了解这个问题。通过指针强制转换(与使用并集相反)是打破严格别名的一个主要例子。通过指针强制转换(与使用并集相反)是打破严格别名的一个主要例子。我找到的最好解释是Mike Acton。它关注的是PS3的开发,但基本上只是GCC 从文章中: 严格别名是由C(或C++)编译器做出的一种假设,即对不同类型对象的指针进

当询问时,人们有时会参考严格的别名规则。

他们在说什么?

严格的别名不允许不同的指针类型指向相同的数据


应该有助于您全面了解该问题。

严格别名不允许对同一数据使用不同的指针类型


应该有助于您全面了解这个问题。

通过指针强制转换(与使用并集相反)是打破严格别名的一个主要例子。

通过指针强制转换(与使用并集相反)是打破严格别名的一个主要例子。

我找到的最好解释是Mike Acton。它关注的是PS3的开发,但基本上只是GCC

从文章中:

严格别名是由C(或C++)编译器做出的一种假设,即对不同类型对象的指针进行解引用时,不会引用同一内存位置(即相互别名)

因此,基本上,如果你有一个
int*
指向某个包含
int
的内存,然后你将一个
float*
指向该内存,并将其用作
float
,你就违反了规则。如果您的代码不遵守这一点,那么编译器的优化器很可能会破坏您的代码


这个规则的例外是一个
char*
,它可以指向任何类型。

我找到的最好的解释是Mike Acton。它关注的是PS3的开发,但基本上只是GCC

从文章中:

严格别名是由C(或C++)编译器做出的一种假设,即对不同类型对象的指针进行解引用时,不会引用同一内存位置(即相互别名)

因此,基本上,如果你有一个
int*
指向某个包含
int
的内存,然后你将一个
float*
指向该内存,并将其用作
float
,你就违反了规则。如果您的代码不遵守这一点,那么编译器的优化器很可能会破坏您的代码


该规则的例外是允许指向任何类型的
char*

遇到严格别名问题的典型情况是将结构(如设备/网络消息)覆盖到系统字长的缓冲区上(如指向
uint32\u t
s或
uint16\u t
s的指针)。当您将结构覆盖到这样的缓冲区上,或者通过指针转换将缓冲区覆盖到这样的结构上时,很容易违反严格的别名规则

因此,在这种设置中,如果我想向某个对象发送消息,我必须有两个不兼容的指针指向同一块内存。然后我可能会天真地编写如下代码:

typedef struct Msg
{
    unsigned int a;
    unsigned int b;
} Msg;

void SendWord(uint32_t);

int main(void)
{
    // Get a 32-bit buffer from the system
    uint32_t* buff = malloc(sizeof(Msg));
    
    // Alias that buffer through message
    Msg* msg = (Msg*)(buff);
    
    // Send a bunch of messages    
    for (int i = 0; i < 10; ++i)
    {
        msg->a = i;
        msg->b = i+1;
        SendWord(buff[0]);
        SendWord(buff[1]);   
    }
}
并重写前面的循环以利用这个方便的函数

for (int i = 0; i < 10; ++i)
{
    msg->a = i;
    msg->b = i+1;
    SendMessage(buff, 2);
}
  • 您可以在编译器中禁用严格别名(在gcc中))

  • 您可以使用
    char*
    作为别名,而不是系统的单词。规则允许对
    char*
    进行例外处理(包括
    signed char
    unsigned char
    )。通常假定
    char*
    别名为其他类型。然而,这不会以另一种方式起作用:没有假设您的结构会为字符缓冲区别名

  • 初学者要当心

    当两种类型相互叠加时,这只是一个潜在雷区。您还应该了解,以及如何通过正确的方式处理对齐问题

    脚注 1 C 2011 6.5 7允许左值访问的类型有:

    • 与对象的有效类型兼容的类型
    • 与对象的有效类型兼容的类型的限定版本
    • 与对象的有效类型相对应的有符号或无符号类型
    • 一种类型,它是与对象的有效类型的限定版本相对应的有符号或无符号类型
    • 在其成员中包含上述类型之一的聚合或联合类型(递归地包括子聚合或包含的联合的成员),或
    • 字符类型

      • 遇到严格别名问题的典型情况是将结构(如设备/网络消息)覆盖到系统字长的缓冲区(如指向
        uint32\u t
        s或
        uint16\u t
        s的指针)上。当您将结构覆盖到这样的缓冲区上,或者通过指针转换将缓冲区覆盖到这样的结构上时,很容易违反严格的别名规则

        因此,在这种设置中,如果我想向某个对象发送消息,我必须有两个不兼容的指针指向同一块内存。然后我可能会天真地编写如下代码:

        typedef struct Msg
        {
            unsigned int a;
            unsigned int b;
        } Msg;
        
        void SendWord(uint32_t);
        
        int main(void)
        {
            // Get a 32-bit buffer from the system
            uint32_t* buff = malloc(sizeof(Msg));
            
            // Alias that buffer through message
            Msg* msg = (Msg*)(buff);
            
            // Send a bunch of messages    
            for (int i = 0; i < 10; ++i)
            {
                msg->a = i;
                msg->b = i+1;
                SendWord(buff[0]);
                SendWord(buff[1]);   
            }
        }
        
        并重写前面的循环以利用这个方便的函数

        for (int i = 0; i < 10; ++i)
        {
            msg->a = i;
            msg->b = i+1;
            SendMessage(buff, 2);
        }
        
      • 您可以在编译器中禁用严格别名(在gcc中))

      • 您可以使用
        char*
        作为别名,而不是系统的单词。规则允许对
        char*
        进行例外处理(包括
        signed char
        unsigned char
        )。通常假定
        char*
        别名为其他类型。然而,这不会以另一种方式起作用:没有假设您的结构会为字符缓冲区别名

      初学者要当心

      当两种类型相互叠加时,这只是一个潜在雷区。您还应该了解,以及如何通过正确的方式处理对齐问题

      脚注 1 C 2011 6.5 7允许左值访问的类型有:

      • 与对象的有效类型兼容的类型
      • 与对象的有效类型兼容的类型的限定版本
      • 与对象的有效类型相对应的有符号或无符号类型
      • 对应于的有符号或无符号类型的类型
        #include <stdio.h>
        
        void check(short *h,long *k)
        {
            *h=5;
            *k=6;
            if (*h == 5)
                printf("strict aliasing problem\n");
        }
        
        int main(void)
        {
            long      k[1];
            check((short *)k,k);
            return 0;
        }
        
        movw    $5, (%rdi)
        movq    $6, (%rsi)
        movl    $.LC0, %edi
        jmp puts
        
        int x;
        int test(double *p)
        {
          x=5;
          *p = 1.0;
          return x;
        }
        
        void test(void)
        {
          struct S {int x;} s;
          s.x = 1;
        }
        
        int test(int *ip, double *dp)
        {
          *ip = 1;
          *dp = 1.23;
          return *ip;
        }
        int test2(void)
        {
          union U { int i; double d; } u;
          return test(&u.i, &u.d);
        }
        
         void inc_int(int *p) { *p = 3; }
         int test(void)
         {
           int *p;
           struct S { int x; } s;
           s.x = 1;
           p = &s.x;
           inc_int(p);
           return s.x;
         }
        
         void inc_int(int *p) { *p = 3; }
         int test(void)
         {
           int *p;
           struct S { int x; } s;
           p = &s.x;
           s.x = 1;  //  !!*!!
           *p += 1;
           return s.x;
         }
        
        void merge_two_ints(int *a, int *b) {
          *b += *a;
          *a += *b;
        }
        
        void merge_two_numbers(int *a, long *b) {...}
        
        void merge_two_ints(int * restrict a, int * restrict b) {...}
        
        int x = 10;
        int *ip = &x;
        
        std::cout << *ip << "\n";
        *ip = 12;
        std::cout << x << "\n";
        
        int foo( float *f, int *i ) { 
            *i = 1;               
            *f = 0.f;            
        
           return *i;
        }
        
        int main() {
            int x = 0;
        
            std::cout << x << "\n";   // Expect 0
            x = foo(reinterpret_cast<float*>(&x), &x);
            std::cout << x << "\n";   // Expect 0?
        }
        
        0
        1
        
        foo(float*, int*): # @foo(float*, int*)
        mov dword ptr [rsi], 1  
        mov dword ptr [rdi], 0
        mov eax, 1                       
        ret
        
        int x = 1;
        int *p = &x;   
        printf("%d\n", *p); // *p gives us an lvalue expression of type int which is compatible with int
        
        int x = 1;
        const int *p = &x;
        printf("%d\n", *p); // *p gives us an lvalue expression of type const int which is compatible with int
        
        int x = 1;
        unsigned int *p = (unsigned int*)&x;
        printf("%u\n", *p ); // *p gives us an lvalue expression of type unsigned int which corresponds to 
                             // the effective type of the object
        
        int x = 1;
        const unsigned int *p = (const unsigned int*)&x;
        printf("%u\n", *p ); // *p gives us an lvalue expression of type const unsigned int which is a unsigned type 
                             // that corresponds with to a qualified verison of the effective type of the object
        
        struct foo {
          int x;
        };
        
        void foobar( struct foo *fp, int *ip );  // struct foo is an aggregate that includes int among its members so it can
                                                 // can alias with *ip
        
        foo f;
        foobar( &f, &f.x );
        
        int x = 65;
        char *p = (char *)&x;
        printf("%c\n", *p );  // *p gives us an lvalue expression of type char which is a character type.
                              // The results are not portable due to endianness issues.
        
        void *p = malloc( sizeof(int) ); // We have allocated storage but not started the lifetime of an object
        int *ip = new (p) int{0};        // Placement new changes the dynamic type of the object to int
        std::cout << *ip << "\n";        // *ip gives us a glvalue expression of type int which matches the dynamic type 
                                          // of the allocated object
        
        int x = 1;
        const int *cip = &x;
        std::cout << *cip << "\n";  // *cip gives us a glvalue expression of type const int which is a cv-qualified 
                                    // version of the dynamic type of x
        
        // Both si and ui are signed or unsigned types corresponding to each others dynamic types
        // We can see from this godbolt(https://godbolt.org/g/KowGXB) the optimizer assumes aliasing.
        signed int foo( signed int &si, unsigned int &ui ) {
          si = 1;
          ui = 2;
        
          return si;
        }
        
        signed int foo( const signed int &si1, int &si2); // Hard to show this one assumes aliasing
        
        struct foo {
         int x;
        };
        
        // Compiler Explorer example(https://godbolt.org/g/z2wJTC) shows aliasing assumption
        int foobar( foo &fp, int &ip ) {
         fp.x = 1;
         ip = 2;
        
         return fp.x;
        }
        
        foo f; 
        foobar( f, f.x ); 
        
        struct foo { int x ; };
        
        struct bar : public foo {};
        
        int foobar( foo &f, bar &b ) {
          f.x = 1;
          b.x = 2;
        
          return f.x;
        }
        
        int foo( std::byte &b, uint32_t &ui ) {
          b = static_cast<std::byte>('a');
          ui = 0xFFFFFFFF;                   
        
          return std::to_integer<int>( b );  // b gives us a glvalue expression of type std::byte which can alias
                                             // an object of type uint32_t
        }
        
        int x =  1 ;
        
        // In C
        float *fp = (float*)&x ;  // Not a valid aliasing
        
        // In C++
        float *fp = reinterpret_cast<float*>(&x) ;  // Not a valid aliasing
        
        printf( "%f\n", *fp ) ;
        
        union u1
        {
          int n;
          float f;
        } ;
        
        union u1 u;
        u.f = 1.0f;
        
        printf( "%d\n”, u.n );  // UB in C++ n is not the active member
        
        static_assert( sizeof( double ) == sizeof( int64_t ) );  // C++17 does not require a message
        
        void func1( double d ) {
          std::int64_t n;
          std::memcpy(&n, &d, sizeof d); 
          //...
        
        std::cout << bit_cast<float>(0x447a0000) << "\n" ; //assuming sizeof(float) == sizeof(unsigned int)
        
        struct uint_chars {
         unsigned char arr[sizeof( unsigned int )] = {} ;  // Assume sizeof( unsigned int ) == 4
        };
        
        // Assume len is a multiple of 4 
        int bar( unsigned char *p, size_t len ) {
         int result = 0;
        
         for( size_t index = 0; index < len; index += sizeof(unsigned int) ) {
           uint_chars f;
           std::memcpy( f.arr, &p[index], sizeof(unsigned int));
           unsigned int result = bit_cast<unsigned int>(f);
        
           result += foo( result );
         }
        
         return result ;
        }
        
        int a = 1;
        short j;
        float f = 1.f; // Originally not initialized but tis-kernel caught 
                       // it was being accessed w/ an indeterminate value below
        
        printf("%i\n", j = *(reinterpret_cast<short*>(&a)));
        printf("%i\n", j = *(reinterpret_cast<int*>(&f)));
        
        int *p;
        
        p=&a;
        printf("%i\n", j = *(reinterpret_cast<short*>(p)));
        
        int *x = new int[2];               // 8 bytes: [0,7].
        int *u = (int*)((char*)x + 6);     // regardless of alignment of x this will not be an aligned address
        *u = 1;                            // Access to range [6-9]
        printf( "%d\n", *u );              // Access to range [6-9]
        
        int a = 1;
        short j;
        float f = 1.0 ;
        
        printf("%i\n", j = *((short*)&a));
        printf("%i\n", j = *((int*)&f));
        
        int *p; 
        
        p=&a;
        printf("%i\n", j = *((short*)p));
        
        ./bin/tis-kernel -sa example1.c 
        ...
        example1.c:9:[sa] warning: The pointer (short *)(& a) has type short *. It violates strict aliasing
                      rules by accessing a cell with effective type int.
        ...
        
        example1.c:10:[sa] warning: The pointer (int *)(& f) has type int *. It violates strict aliasing rules by
                      accessing a cell with effective type float.
                      Callstack: main
        ...
        
        example1.c:15:[sa] warning: The pointer (short *)p has type short *. It violates strict aliasing rules by
                      accessing a cell with effective type int.