X86 应该或不应该';在将XGETBV的结果用于XSETBV之前,我是否会屏蔽它们?

X86 应该或不应该';在将XGETBV的结果用于XSETBV之前,我是否会屏蔽它们?,x86,x86-64,avx,bare-metal,X86,X86 64,Avx,Bare Metal,我正在尝试执行一些UEFI应用程序 我发现此代码在VirtualBox上崩溃(testsuccess未打印,而teststart已打印): 通过检查,我发现EDX:EAX通过xgetbv指令设置为00000000:0000001f,并且xsetbv会导致#GP(中断向量13)看到该值时出现故障 奇怪的是,当我通过点击VirtualBox执行xgetbv指令时,它会将EDX:EAX设置为00000000:00000001,因此不会发生故障并打印测试成功 参考,我发现它说的是关于XGETBV的: 如

我正在尝试执行一些UEFI应用程序

我发现此代码在VirtualBox上崩溃(
testsuccess
未打印,而
teststart
已打印):

通过检查,我发现
EDX:EAX
通过
xgetbv
指令设置为
00000000:0000001f
,并且
xsetbv
会导致
#GP
(中断向量13)看到该值时出现故障

奇怪的是,当我通过点击VirtualBox执行
xgetbv
指令时,它会将
EDX:EAX
设置为
00000000:00000001
,因此不会发生故障并打印
测试成功

参考,我发现它说的是关于XGETBV的:

如果更少 在正在读取的XCR中实现了64位以上的值,在未实现的位loca中返回给EDX:EAX的值- 定义不明确

那么,关于XSETBV:

受保护模式异常
#GP(0)
如果当前权限级别不是0。
如果在ECX中指定了无效的XCR。
如果EDX:EAX中的值设置了ECX指定的XCR中保留的位。
如果试图清除XCR0的第0位。
如果试图将XCR0[2:1]设置为10b

这种情况是根据
EDX:EAX
值设置保留位。由于从XGETBV返回的未实现位的值是未定义的,因此在将XGETBV的结果传递给XSETBV之前屏蔽这些结果似乎是合理的。 用于掩蔽的值可通过以下方式获得。 在添加一些代码以应用掩蔽之后,XSETBV在VirtualBox上运行良好

另一方面,英特尔手册也对XSETBV作了如下说明:

XCR中未定义或保留的位应设置为值 以前读过

这看起来应该将保留位设置为通过XGETBV获得的值,并且我不应该应用maskimg来强制位变为零

作为结论,在将XGETBV的结果传递给XSETBV之前,我是否应该用通过CPUID获得的有效位来屏蔽XGETBV的结果


我发现的是相关但不重复的问题:

    • 这个问题声称
      xgetbv
      之后的
      xsetbv
      有效,但我发现它在VirtualBox上不起作用
    • 这个问题是直接设置EAX和EDX,而不是使用XGETBV
    • 这个问题是关于VMWare的,但我的问题是关于VirtualBox的
    • 这个问题是这样的:“如果我不操作
      xcr0
      ,只读取它的值并将其写回,事情似乎运行得很好”,但在我的实验中,只写入读取的值会导致
      #GP
      错误

主机环境:

  • Windows 10 Home(x64)
  • 英特尔(R)核心(TM)i7-9750H处理器@2.60GHz 2.59GHz
  • RAM 16.0 GB
  • VirtualBox 6.1.18 r142142(Qt5.6.2)
来宾(VM)环境:

  • 操作系统:其他/未知(64位)
  • 基本内存:128 MB
  • 芯片组:PIIX3
  • 启用I/O APIC
  • EFI:已启用
  • 1个CPU
  • 加速:VT-x/AMD-V、嵌套分页、PAE/NX
  • 半虚拟化接口:默认值

测试的完整代码:

#include <stdint.h>

void* ConOut;
uint64_t (*OutputString)(void* protocol, void* string);

void printChar(int c) {
    unsigned char data[4] = { (unsigned char)c };
    if (c == '\n') printChar('\r');
    OutputString(ConOut, data);
}

void printString(const char* str) {
    while (*str != '\0') printChar((unsigned char)*(str++));
}

void printInt(uint64_t value, int radix, int minDigits) {
    char vStr[128] = "";
    char* pStr = vStr + 120;
    int digits = 0;
    do {
        *(pStr--) = "0123456789ABCDEF"[value % radix];
        value /= radix;
        digits++;
    } while (value > 0 || digits < minDigits);
    printString(pStr + 1);
}

void stop(void) {
    __asm__ __volatile__(
        "cli\n\t"
        "1:\n\t"
        "hlt\n\t"
        "jmp 1b\n\t"
    );
}

void entry(void* unused, uint64_t* table) {
    uint32_t eax, ebx, ecx, edx, cs, cr0, xcr0_low, xcr0_high;
    uint32_t cpuid_max, eax_mask, edx_mask;
    unsigned char src_test[32], dst_test[32] = {0};
    int i;
    (void)unused;

    ConOut = (void*)table[8];
    OutputString = (uint64_t (*)(void*, void*))((uint64_t*)ConOut)[1];

    __asm__ __volatile__ (
        "xor %%eax, %%eax\n\t"
        "cpuid\n\t"
    : "=a"(eax), "=b"(ebx), "=c"(ecx), "=d"(edx));
    printString("CPUID.00H: EAX=0x"); printInt(eax, 16, 8);
    printString(", EBX=0x"); printInt(ebx, 16, 8);
    printString(", ECX=0x"); printInt(ecx, 16, 8);
    printString(", EDX=0x"); printInt(edx, 16, 8);
    printChar('\n');
    if (eax < 1) {
        printString("CPUID.01H not supported!\n");
        stop();
    }
    cpuid_max = eax;

    __asm__ __volatile__ (
        "mov $1, %%eax\n\t"
        "cpuid\n\t"
    : "=a"(eax), "=b"(ebx), "=c"(ecx), "=d"(edx));
    printString("CPUID.01H: EAX=0x"); printInt(eax, 16, 8);
    printString(", EBX=0x"); printInt(ebx, 16, 8);
    printString(", ECX=0x"); printInt(ecx, 16, 8);
    printString(", EDX=0x"); printInt(edx, 16, 8);
    printChar('\n');
    if (!((ecx >> 26) & 1)) {
        printString("xsave (ECX[26]) not supported!\n");
        stop();
    }

    if (cpuid_max >= 0x0D) {
        __asm__ __volatile__ (
            "mov $0xd, %%eax\n\t"
            "xor %%ecx, %%ecx\n\t"
            "cpuid\n\t"
        : "=a"(eax), "=b"(ebx), "=c"(ecx), "=d"(edx));
        printString("CPUID.0DH: EAX=0x"); printInt(eax, 16, 8);
        printString(", EBX=0x"); printInt(ebx, 16, 8);
        printString(", ECX=0x"); printInt(ecx, 16, 8);
        printString(", EDX=0x"); printInt(edx, 16, 8);
        printChar('\n');
        eax_mask = eax;
        edx_mask = edx;
    } else {
        printString("CPUID.0DH not supported\n");
        eax_mask = UINT32_C(0xffffffff);
        edx_mask = UINT32_C(0xffffffff);
    }

    __asm__ __volatile__ (
        "mov %%cs, %%ax\n\t"
        "movzwl %%ax, %0\n\t"
        "mov %%cr0, %%rax\n\t"
    : "=g"(cs), "=a"(cr0));
    printString("CPL check: CS=0x"); printInt(cs, 16, 4);
    printString(", CR0=0x"); printInt(cr0, 16, 8);
    printChar('\n');
    if (!cr0 & 1) {
        printString("not in protected mode!\n");
        stop();
    }
    if ((cs & 3) != 0) {
        printString("CPL is not zero!\n");
        stop();
    }

    printString("waiting for breakpoint set...\n");
    {
        volatile int j;
        for (j = 0; j < 1000000000; j++);
    }

    printString("turning on OSXSAVE\n");
    __asm__ __volatile__ (
        /* turn on OSXSAVE */
        "mov %%cr4, %%rax\n\t"
        "or $0x40000, %%rax\n\t"
        "mov %%rax, %%cr4\n\t"
    : : : "%eax");

    __asm__ __volatile__ (
        /* marker for setting breakpoint */
        "cmp $0xdeadbeef, %%eax\n\t"
        /* read XCR[0] */
        "xor %%eax, %%eax\n\t"
        "xor %%edx, %%edx\n\t"
        "xor %%ecx, %%ecx\n\t"
        "xgetbv\n\t"
    : "=a"(xcr0_low), "=d"(xcr0_high) : : "%ecx", "cc");
    printString("XCR[0] = ");
    printInt(xcr0_high, 16, 8); printChar(':');
    printInt(xcr0_low, 16, 8); printChar('\n');

    xcr0_low |= 6;

#if 0
    printString("applying mask\n");
    xcr0_low &= eax_mask;
    xcr0_high &= edx_mask;
#else
    (void)eax_mask; (void)edx_mask;
#endif

    printString("new XCR[0] will be: ");
    printInt(xcr0_high, 16, 8); printChar(':');
    printInt(xcr0_low, 16, 8); printChar('\n');

    printString("turning on AVX\n");
    __asm__ __volatile__ (
        /* marker for setting breakpoint */
        "cmp $0xdeadbeef, %%ecx\n\t"
        /* turn on AVX */
        "xor %%ecx, %%ecx\n\t"
        "xsetbv\n\t"
    : : "a"(xcr0_low), "d"(xcr0_high) : "%ecx", "cc");

    for (i = 0; i < 32; i++) src_test[i] = 123 * (i + 1);
    printString("testing AVX instruction\n");
    printString("src:\n");
    for (i = 0; i < 32; i++) {
        printInt(src_test[i], 16, 2);
        printChar((i + 1) % 16 == 0 ? '\n' : ' ');
    }
    printString("dest before:\n");
    for (i = 0; i < 32; i++) {
        printInt(dst_test[i], 16, 2);
        printChar((i + 1) % 16 == 0 ? '\n' : ' ');
    }
    __asm__ __volatile__ (
        "vmovups (%0), %%ymm0\n\t"
        "vmovups %%ymm0, (%1)\n\t"
    : : "r"(src_test), "r"(dst_test));
    printString("dest after:\n");
    for (i = 0; i < 32; i++) {
        printInt(dst_test[i], 16, 2);
        printChar((i + 1) % 16 == 0 ? '\n' : ' ');
    }

    printString("test done.\n");
    stop();
}
直接在电脑上执行程序时的输出:

CPUID.00H: EAX=0x00000016, EBX=0x756E6547, ECX=0x6C65746E, EDX=0x49656E69
CPUID.01H: EAX=0x000906ED, EBX=0x00100800, ECX=0x77FAFBBF, EDX=0xBFEBFBFF
CPUID.0DH: EAX=0x0000001F, EBX=0x00000240, ECX=0x00000440, EDX=0x00000000
CPL check: CS=0x0038, CR0=0x80000013
waiting for breakpoint set...
turning on OSXSAVE
XCR[0] = 00000000:00000001
new XCR[0] will be: 00000000:00000007
turning on AVX
testing AVX instruction
src:
7B F6 71 EC 67 E2 5D D8 53 CE 49 C4 3F BA 35 B0
2B A6 21 9C 17 92 0D 88 03 7E F9 74 EF 6A E5 60
dest before:
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
dest after:
7B F6 71 EC 67 E2 5D D8 53 CE 49 C4 3F BA 35 B0
2B A6 21 9C 17 92 0D 88 03 7E F9 74 EF 6A E5 60
test done.

您的VirtualBox是否设置为通过AVX支持?如果不是这样,它可能会悄悄地屏蔽您尝试使用AVX启用位XSETBV的行为。我没有尝试手动解码您的CPUID结果,但我看到EAX=1/CPUID对ECX有不同的结果。(有些虚拟机默认采用这种方式,因此可以在不支持AVX的情况下恢复主机上的虚拟机映像。也就是说,它们将来宾限制在一些CPUID功能标志的基线集。对于某些虚拟机,如
popcnt
,在CPUID中不提供广告支持不会停止机器insn的执行,但对于AVX来说确实如此。)@彼得考德斯:谢谢你的评论。来自VM的CPUID.01H ECX的最高数字是5,因此它表示支持AVX(位28)。我认为
xgetbv/xsetbv
在物理机器上总是安全的。但是
xgetbv
不是敏感指令(它不会导致VM退出,因此VBox不知道何时执行),因此它读取主机的XCR0值。然而,VBox似乎禁用了GUE上的MPX(使XCR0的第3位和第4位不可设置,如
CPUID.edx=0Dh,ecx=0.eax
所示)。简而言之,这可能是VBox的一个怪癖。我甚至不认为
xsetbv
实际上是错误,相反,它会导致VM退出,然后VBox注入错误(因为我没有找到为来宾禁用MPX的方法,所以这可能是模拟的)
#include <stdint.h>

void* ConOut;
uint64_t (*OutputString)(void* protocol, void* string);

void printChar(int c) {
    unsigned char data[4] = { (unsigned char)c };
    if (c == '\n') printChar('\r');
    OutputString(ConOut, data);
}

void printString(const char* str) {
    while (*str != '\0') printChar((unsigned char)*(str++));
}

void printInt(uint64_t value, int radix, int minDigits) {
    char vStr[128] = "";
    char* pStr = vStr + 120;
    int digits = 0;
    do {
        *(pStr--) = "0123456789ABCDEF"[value % radix];
        value /= radix;
        digits++;
    } while (value > 0 || digits < minDigits);
    printString(pStr + 1);
}

void stop(void) {
    __asm__ __volatile__(
        "cli\n\t"
        "1:\n\t"
        "hlt\n\t"
        "jmp 1b\n\t"
    );
}

void entry(void* unused, uint64_t* table) {
    uint32_t eax, ebx, ecx, edx, cs, cr0, xcr0_low, xcr0_high;
    uint32_t cpuid_max, eax_mask, edx_mask;
    unsigned char src_test[32], dst_test[32] = {0};
    int i;
    (void)unused;

    ConOut = (void*)table[8];
    OutputString = (uint64_t (*)(void*, void*))((uint64_t*)ConOut)[1];

    __asm__ __volatile__ (
        "xor %%eax, %%eax\n\t"
        "cpuid\n\t"
    : "=a"(eax), "=b"(ebx), "=c"(ecx), "=d"(edx));
    printString("CPUID.00H: EAX=0x"); printInt(eax, 16, 8);
    printString(", EBX=0x"); printInt(ebx, 16, 8);
    printString(", ECX=0x"); printInt(ecx, 16, 8);
    printString(", EDX=0x"); printInt(edx, 16, 8);
    printChar('\n');
    if (eax < 1) {
        printString("CPUID.01H not supported!\n");
        stop();
    }
    cpuid_max = eax;

    __asm__ __volatile__ (
        "mov $1, %%eax\n\t"
        "cpuid\n\t"
    : "=a"(eax), "=b"(ebx), "=c"(ecx), "=d"(edx));
    printString("CPUID.01H: EAX=0x"); printInt(eax, 16, 8);
    printString(", EBX=0x"); printInt(ebx, 16, 8);
    printString(", ECX=0x"); printInt(ecx, 16, 8);
    printString(", EDX=0x"); printInt(edx, 16, 8);
    printChar('\n');
    if (!((ecx >> 26) & 1)) {
        printString("xsave (ECX[26]) not supported!\n");
        stop();
    }

    if (cpuid_max >= 0x0D) {
        __asm__ __volatile__ (
            "mov $0xd, %%eax\n\t"
            "xor %%ecx, %%ecx\n\t"
            "cpuid\n\t"
        : "=a"(eax), "=b"(ebx), "=c"(ecx), "=d"(edx));
        printString("CPUID.0DH: EAX=0x"); printInt(eax, 16, 8);
        printString(", EBX=0x"); printInt(ebx, 16, 8);
        printString(", ECX=0x"); printInt(ecx, 16, 8);
        printString(", EDX=0x"); printInt(edx, 16, 8);
        printChar('\n');
        eax_mask = eax;
        edx_mask = edx;
    } else {
        printString("CPUID.0DH not supported\n");
        eax_mask = UINT32_C(0xffffffff);
        edx_mask = UINT32_C(0xffffffff);
    }

    __asm__ __volatile__ (
        "mov %%cs, %%ax\n\t"
        "movzwl %%ax, %0\n\t"
        "mov %%cr0, %%rax\n\t"
    : "=g"(cs), "=a"(cr0));
    printString("CPL check: CS=0x"); printInt(cs, 16, 4);
    printString(", CR0=0x"); printInt(cr0, 16, 8);
    printChar('\n');
    if (!cr0 & 1) {
        printString("not in protected mode!\n");
        stop();
    }
    if ((cs & 3) != 0) {
        printString("CPL is not zero!\n");
        stop();
    }

    printString("waiting for breakpoint set...\n");
    {
        volatile int j;
        for (j = 0; j < 1000000000; j++);
    }

    printString("turning on OSXSAVE\n");
    __asm__ __volatile__ (
        /* turn on OSXSAVE */
        "mov %%cr4, %%rax\n\t"
        "or $0x40000, %%rax\n\t"
        "mov %%rax, %%cr4\n\t"
    : : : "%eax");

    __asm__ __volatile__ (
        /* marker for setting breakpoint */
        "cmp $0xdeadbeef, %%eax\n\t"
        /* read XCR[0] */
        "xor %%eax, %%eax\n\t"
        "xor %%edx, %%edx\n\t"
        "xor %%ecx, %%ecx\n\t"
        "xgetbv\n\t"
    : "=a"(xcr0_low), "=d"(xcr0_high) : : "%ecx", "cc");
    printString("XCR[0] = ");
    printInt(xcr0_high, 16, 8); printChar(':');
    printInt(xcr0_low, 16, 8); printChar('\n');

    xcr0_low |= 6;

#if 0
    printString("applying mask\n");
    xcr0_low &= eax_mask;
    xcr0_high &= edx_mask;
#else
    (void)eax_mask; (void)edx_mask;
#endif

    printString("new XCR[0] will be: ");
    printInt(xcr0_high, 16, 8); printChar(':');
    printInt(xcr0_low, 16, 8); printChar('\n');

    printString("turning on AVX\n");
    __asm__ __volatile__ (
        /* marker for setting breakpoint */
        "cmp $0xdeadbeef, %%ecx\n\t"
        /* turn on AVX */
        "xor %%ecx, %%ecx\n\t"
        "xsetbv\n\t"
    : : "a"(xcr0_low), "d"(xcr0_high) : "%ecx", "cc");

    for (i = 0; i < 32; i++) src_test[i] = 123 * (i + 1);
    printString("testing AVX instruction\n");
    printString("src:\n");
    for (i = 0; i < 32; i++) {
        printInt(src_test[i], 16, 2);
        printChar((i + 1) % 16 == 0 ? '\n' : ' ');
    }
    printString("dest before:\n");
    for (i = 0; i < 32; i++) {
        printInt(dst_test[i], 16, 2);
        printChar((i + 1) % 16 == 0 ? '\n' : ' ');
    }
    __asm__ __volatile__ (
        "vmovups (%0), %%ymm0\n\t"
        "vmovups %%ymm0, (%1)\n\t"
    : : "r"(src_test), "r"(dst_test));
    printString("dest after:\n");
    for (i = 0; i < 32; i++) {
        printInt(dst_test[i], 16, 2);
        printChar((i + 1) % 16 == 0 ? '\n' : ' ');
    }

    printString("test done.\n");
    stop();
}
CPUID.00H: EAX=0x00000016, EBX=0x756E6547, ECX=0x6C65746E, EDX=0x49656E69
CPUID.01H: EAX=0x000906ED, EBX=0x00010800, ECX=0x56DA220B, EDX=0x178BFBFF
CPUID.0DH: EAX=0x00000007, EBX=0x00000340, EDX=0x00000340, EDX=0x00000000
CPL check: CS=0x0038, CR0=0xC0010033
waiting for breakpoint set...
turning on OSXSAVE
XCR[0] = 00000000:0000001F
new XCR[0] will be: 00000000:0000001F
turning on AVX
CPUID.00H: EAX=0x00000016, EBX=0x756E6547, ECX=0x6C65746E, EDX=0x49656E69
CPUID.01H: EAX=0x000906ED, EBX=0x00100800, ECX=0x77FAFBBF, EDX=0xBFEBFBFF
CPUID.0DH: EAX=0x0000001F, EBX=0x00000240, ECX=0x00000440, EDX=0x00000000
CPL check: CS=0x0038, CR0=0x80000013
waiting for breakpoint set...
turning on OSXSAVE
XCR[0] = 00000000:00000001
new XCR[0] will be: 00000000:00000007
turning on AVX
testing AVX instruction
src:
7B F6 71 EC 67 E2 5D D8 53 CE 49 C4 3F BA 35 B0
2B A6 21 9C 17 92 0D 88 03 7E F9 74 EF 6A E5 60
dest before:
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
dest after:
7B F6 71 EC 67 E2 5D D8 53 CE 49 C4 3F BA 35 B0
2B A6 21 9C 17 92 0D 88 03 7E F9 74 EF 6A E5 60
test done.