C++ 什么是严格的别名规则?
当询问时,人们有时会参考严格的别名规则。C++ 什么是严格的别名规则?,c++,c,undefined-behavior,strict-aliasing,type-punning,C++,C,Undefined Behavior,Strict Aliasing,Type Punning,当询问时,人们有时会参考严格的别名规则。 他们在说什么?严格的别名不允许不同的指针类型指向相同的数据 应该有助于您全面了解该问题。严格别名不允许对同一数据使用不同的指针类型 应该有助于您全面了解这个问题。通过指针强制转换(与使用并集相反)是打破严格别名的一个主要例子。通过指针强制转换(与使用并集相反)是打破严格别名的一个主要例子。我找到的最好解释是Mike Acton。它关注的是PS3的开发,但基本上只是GCC 从文章中: 严格别名是由C(或C++)编译器做出的一种假设,即对不同类型对象的指针进
他们在说什么?严格的别名不允许不同的指针类型指向相同的数据
应该有助于您全面了解该问题。严格别名不允许对同一数据使用不同的指针类型
应该有助于您全面了解这个问题。通过指针强制转换(与使用并集相反)是打破严格别名的一个主要例子。通过指针强制转换(与使用并集相反)是打破严格别名的一个主要例子。我找到的最好解释是Mike Acton。它关注的是PS3的开发,但基本上只是GCC 从文章中: 严格别名是由C(或C++)编译器做出的一种假设,即对不同类型对象的指针进行解引用时,不会引用同一内存位置(即相互别名) 因此,基本上,如果你有一个
int*
指向某个包含int
的内存,然后你将一个float*
指向该内存,并将其用作float
,你就违反了规则。如果您的代码不遵守这一点,那么编译器的优化器很可能会破坏您的代码
这个规则的例外是一个
char*
,它可以指向任何类型。我找到的最好的解释是Mike Acton。它关注的是PS3的开发,但基本上只是GCC
从文章中:
严格别名是由C(或C++)编译器做出的一种假设,即对不同类型对象的指针进行解引用时,不会引用同一内存位置(即相互别名)
因此,基本上,如果你有一个int*
指向某个包含int
的内存,然后你将一个float*
指向该内存,并将其用作float
,你就违反了规则。如果您的代码不遵守这一点,那么编译器的优化器很可能会破坏您的代码
该规则的例外是允许指向任何类型的
char*
。遇到严格别名问题的典型情况是将结构(如设备/网络消息)覆盖到系统字长的缓冲区上(如指向uint32\u t
s或uint16\u t
s的指针)。当您将结构覆盖到这样的缓冲区上,或者通过指针转换将缓冲区覆盖到这样的结构上时,很容易违反严格的别名规则
因此,在这种设置中,如果我想向某个对象发送消息,我必须有两个不兼容的指针指向同一块内存。然后我可能会天真地编写如下代码:
typedef struct Msg
{
unsigned int a;
unsigned int b;
} Msg;
void SendWord(uint32_t);
int main(void)
{
// Get a 32-bit buffer from the system
uint32_t* buff = malloc(sizeof(Msg));
// Alias that buffer through message
Msg* msg = (Msg*)(buff);
// Send a bunch of messages
for (int i = 0; i < 10; ++i)
{
msg->a = i;
msg->b = i+1;
SendWord(buff[0]);
SendWord(buff[1]);
}
}
并重写前面的循环以利用这个方便的函数
for (int i = 0; i < 10; ++i)
{
msg->a = i;
msg->b = i+1;
SendMessage(buff, 2);
}
char*
作为别名,而不是系统的单词。规则允许对char*
进行例外处理(包括signed char
和unsigned char
)。通常假定char*
别名为其他类型。然而,这不会以另一种方式起作用:没有假设您的结构会为字符缓冲区别名
- 与对象的有效类型兼容的类型
- 与对象的有效类型兼容的类型的限定版本
- 与对象的有效类型相对应的有符号或无符号类型
- 一种类型,它是与对象的有效类型的限定版本相对应的有符号或无符号类型
- 在其成员中包含上述类型之一的聚合或联合类型(递归地包括子聚合或包含的联合的成员),或
- 字符类型
- 遇到严格别名问题的典型情况是将结构(如设备/网络消息)覆盖到系统字长的缓冲区(如指向
uint32\u t
s或uint16\u t
s的指针)上。当您将结构覆盖到这样的缓冲区上,或者通过指针转换将缓冲区覆盖到这样的结构上时,很容易违反严格的别名规则
因此,在这种设置中,如果我想向某个对象发送消息,我必须有两个不兼容的指针指向同一块内存。然后我可能会天真地编写如下代码:
typedef struct Msg
{
unsigned int a;
unsigned int b;
} Msg;
void SendWord(uint32_t);
int main(void)
{
// Get a 32-bit buffer from the system
uint32_t* buff = malloc(sizeof(Msg));
// Alias that buffer through message
Msg* msg = (Msg*)(buff);
// Send a bunch of messages
for (int i = 0; i < 10; ++i)
{
msg->a = i;
msg->b = i+1;
SendWord(buff[0]);
SendWord(buff[1]);
}
}
并重写前面的循环以利用这个方便的函数
for (int i = 0; i < 10; ++i)
{
msg->a = i;
msg->b = i+1;
SendMessage(buff, 2);
}
char*
作为别名,而不是系统的单词。规则允许对char*
进行例外处理(包括signed char
和unsigned char
)。通常假定char*
别名为其他类型。然而,这不会以另一种方式起作用:没有假设您的结构会为字符缓冲区别名
- 与对象的有效类型兼容的类型
- 与对象的有效类型兼容的类型的限定版本
- 与对象的有效类型相对应的有符号或无符号类型
- 对应于的有符号或无符号类型的类型
#include <stdio.h> void check(short *h,long *k) { *h=5; *k=6; if (*h == 5) printf("strict aliasing problem\n"); } int main(void) { long k[1]; check((short *)k,k); return 0; }
movw $5, (%rdi) movq $6, (%rsi) movl $.LC0, %edi jmp puts
int x; int test(double *p) { x=5; *p = 1.0; return x; }
void test(void) { struct S {int x;} s; s.x = 1; }
int test(int *ip, double *dp) { *ip = 1; *dp = 1.23; return *ip; } int test2(void) { union U { int i; double d; } u; return test(&u.i, &u.d); }
void inc_int(int *p) { *p = 3; } int test(void) { int *p; struct S { int x; } s; s.x = 1; p = &s.x; inc_int(p); return s.x; }
void inc_int(int *p) { *p = 3; } int test(void) { int *p; struct S { int x; } s; p = &s.x; s.x = 1; // !!*!! *p += 1; return s.x; }
void merge_two_ints(int *a, int *b) { *b += *a; *a += *b; }
void merge_two_numbers(int *a, long *b) {...}
void merge_two_ints(int * restrict a, int * restrict b) {...}
int x = 10; int *ip = &x; std::cout << *ip << "\n"; *ip = 12; std::cout << x << "\n";
int foo( float *f, int *i ) { *i = 1; *f = 0.f; return *i; } int main() { int x = 0; std::cout << x << "\n"; // Expect 0 x = foo(reinterpret_cast<float*>(&x), &x); std::cout << x << "\n"; // Expect 0? }
0 1
foo(float*, int*): # @foo(float*, int*) mov dword ptr [rsi], 1 mov dword ptr [rdi], 0 mov eax, 1 ret
int x = 1; int *p = &x; printf("%d\n", *p); // *p gives us an lvalue expression of type int which is compatible with int
int x = 1; const int *p = &x; printf("%d\n", *p); // *p gives us an lvalue expression of type const int which is compatible with int
int x = 1; unsigned int *p = (unsigned int*)&x; printf("%u\n", *p ); // *p gives us an lvalue expression of type unsigned int which corresponds to // the effective type of the object
int x = 1; const unsigned int *p = (const unsigned int*)&x; printf("%u\n", *p ); // *p gives us an lvalue expression of type const unsigned int which is a unsigned type // that corresponds with to a qualified verison of the effective type of the object
struct foo { int x; }; void foobar( struct foo *fp, int *ip ); // struct foo is an aggregate that includes int among its members so it can // can alias with *ip foo f; foobar( &f, &f.x );
int x = 65; char *p = (char *)&x; printf("%c\n", *p ); // *p gives us an lvalue expression of type char which is a character type. // The results are not portable due to endianness issues.
void *p = malloc( sizeof(int) ); // We have allocated storage but not started the lifetime of an object int *ip = new (p) int{0}; // Placement new changes the dynamic type of the object to int std::cout << *ip << "\n"; // *ip gives us a glvalue expression of type int which matches the dynamic type // of the allocated object
int x = 1; const int *cip = &x; std::cout << *cip << "\n"; // *cip gives us a glvalue expression of type const int which is a cv-qualified // version of the dynamic type of x
// Both si and ui are signed or unsigned types corresponding to each others dynamic types // We can see from this godbolt(https://godbolt.org/g/KowGXB) the optimizer assumes aliasing. signed int foo( signed int &si, unsigned int &ui ) { si = 1; ui = 2; return si; }
signed int foo( const signed int &si1, int &si2); // Hard to show this one assumes aliasing
struct foo { int x; }; // Compiler Explorer example(https://godbolt.org/g/z2wJTC) shows aliasing assumption int foobar( foo &fp, int &ip ) { fp.x = 1; ip = 2; return fp.x; } foo f; foobar( f, f.x );
struct foo { int x ; }; struct bar : public foo {}; int foobar( foo &f, bar &b ) { f.x = 1; b.x = 2; return f.x; }
int foo( std::byte &b, uint32_t &ui ) { b = static_cast<std::byte>('a'); ui = 0xFFFFFFFF; return std::to_integer<int>( b ); // b gives us a glvalue expression of type std::byte which can alias // an object of type uint32_t }
int x = 1 ; // In C float *fp = (float*)&x ; // Not a valid aliasing // In C++ float *fp = reinterpret_cast<float*>(&x) ; // Not a valid aliasing printf( "%f\n", *fp ) ;
union u1 { int n; float f; } ; union u1 u; u.f = 1.0f; printf( "%d\n”, u.n ); // UB in C++ n is not the active member
static_assert( sizeof( double ) == sizeof( int64_t ) ); // C++17 does not require a message
void func1( double d ) { std::int64_t n; std::memcpy(&n, &d, sizeof d); //...
std::cout << bit_cast<float>(0x447a0000) << "\n" ; //assuming sizeof(float) == sizeof(unsigned int)
struct uint_chars { unsigned char arr[sizeof( unsigned int )] = {} ; // Assume sizeof( unsigned int ) == 4 }; // Assume len is a multiple of 4 int bar( unsigned char *p, size_t len ) { int result = 0; for( size_t index = 0; index < len; index += sizeof(unsigned int) ) { uint_chars f; std::memcpy( f.arr, &p[index], sizeof(unsigned int)); unsigned int result = bit_cast<unsigned int>(f); result += foo( result ); } return result ; }
int a = 1; short j; float f = 1.f; // Originally not initialized but tis-kernel caught // it was being accessed w/ an indeterminate value below printf("%i\n", j = *(reinterpret_cast<short*>(&a))); printf("%i\n", j = *(reinterpret_cast<int*>(&f)));
int *p; p=&a; printf("%i\n", j = *(reinterpret_cast<short*>(p)));
int *x = new int[2]; // 8 bytes: [0,7]. int *u = (int*)((char*)x + 6); // regardless of alignment of x this will not be an aligned address *u = 1; // Access to range [6-9] printf( "%d\n", *u ); // Access to range [6-9]
int a = 1; short j; float f = 1.0 ; printf("%i\n", j = *((short*)&a)); printf("%i\n", j = *((int*)&f)); int *p; p=&a; printf("%i\n", j = *((short*)p));
./bin/tis-kernel -sa example1.c ... example1.c:9:[sa] warning: The pointer (short *)(& a) has type short *. It violates strict aliasing rules by accessing a cell with effective type int. ... example1.c:10:[sa] warning: The pointer (int *)(& f) has type int *. It violates strict aliasing rules by accessing a cell with effective type float. Callstack: main ... example1.c:15:[sa] warning: The pointer (short *)p has type short *. It violates strict aliasing rules by accessing a cell with effective type int.