Assembly APIC多核启动协议和ICR启动地址
我正在编写一个引导加载并尝试测试处理器间中断。我遇到了以下两个问题: 1,我可以找到启动APs的程序 2,当发出IPI时,我应该加载内存地址,告诉目标处理器从哪个内存地址开始Assembly APIC多核启动协议和ICR启动地址,assembly,x86,multiprocessing,cpu,apic,Assembly,X86,Multiprocessing,Cpu,Apic,我正在编写一个引导加载并尝试测试处理器间中断。我遇到了以下两个问题: 1,我可以找到启动APs的程序 2,当发出IPI时,我应该加载内存地址,告诉目标处理器从哪个内存地址开始 感谢您的回答,如果您能附上一个程序集示例的话。我从现在已经不存在的Stackoverflow文档项目中提取了这个。这本书最初是由玛格丽特·布鲁姆写的,我已经清理了她的代码。因为这不是我自己的,所以我把它标记为社区维基。您可能会发现一些有用的信息 此示例将唤醒每个应用程序处理器(AP),并使它们与引导处理器(BSP)一起显
感谢您的回答,如果您能附上一个程序集示例的话。我从现在已经不存在的Stackoverflow文档项目中提取了这个。这本书最初是由玛格丽特·布鲁姆写的,我已经清理了她的代码。因为这不是我自己的,所以我把它标记为社区维基。您可能会发现一些有用的信息
此示例将唤醒每个应用程序处理器(AP),并使它们与引导处理器(BSP)一起显示其LAPIC ID
; Assemble boot sector and insert it into a 1.44MiB floppy image
;
; nasm -f bin boot.asm -o boot.bin
; dd if=/dev/zero of=disk.img bs=512 count=2880
; dd if=boot.bin of=disk.img bs=512 conv=notrunc
BITS 16
; Bootloader starts at segment:offset 07c0h:0000h
section bootloader, vstart=0000h
jmp 7c0h:__START__
__START__:
mov ax, cs
mov ds, ax
mov es, ax
mov ss, ax
xor sp, sp
cld
;Clear screen
mov ax, 03h
int 10h
;Set limit of 4GiB and base 0 for FS and GS
call 7c0h:unrealmode
;Enable the APIC
call enable_lapic
;Move the payload to the expected address
mov si, payload_start_abs
mov cx, payload_end-payload + 1
mov di, 400h ;7c0h:400h = 8000h
rep movsb
;Wakeup the other APs
;INIT
call lapic_send_init
mov cx, WAIT_10_ms
call us_wait
;SIPI
call lapic_send_sipi
mov cx, WAIT_200_us
call us_wait
;SIPI
call lapic_send_sipi
;Jump to the payload
jmp 0000h:8000h
;Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll
; Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll
;Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll
;CX = Wait (in ms) Max 65536 us (=0 on input)
us_wait:
mov dx, 80h ;POST Diagnose port, 1us per IO
xor si, si
rep outsb
ret
WAIT_10_ms EQU 10000
WAIT_200_us EQU 200
;Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll
; Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll
;Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll
enable_lapic:
;Enable the APIC globally
;On P6 CPU once this flag is set to 0, it cannot be set back to 16
;Without an HARD RESET
mov ecx, IA32_APIC_BASE_MSR
rdmsr
or ah, 08h ;bit11: APIC GLOBAL Enable/Disable
wrmsr
;Mask off lower 12 bits to get the APIC base address
and ah, 0f0h
mov DWORD [APIC_BASE], eax
;Newer processors enables the APIC through the Spurious Interrupt Vector register
mov ecx, DWORD [fs: eax + APIC_REG_SIV]
or ch, 01h ;bit8: APIC SOFTWARE enable/disable
mov DWORD [fs: eax+APIC_REG_SIV], ecx
ret
;Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll
; Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll
;Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll
lapic_send_sipi:
mov eax, DWORD [APIC_BASE]
;Destination field is set to 0 has we will use a shorthand
xor ebx, ebx
mov DWORD [fs: eax+APIC_REG_ICR_HIGH], ebx
;Vector: 08h (Will make the CPU execute instruction ad address 08000h)
;Delivery mode: Startup
;Destination mode: ignored (0)
;Level: ignored (1)
;Trigger mode: ignored (0)
;Shorthand: All excluding self (3)
mov ebx, 0c4608h
mov DWORD [fs: eax+APIC_REG_ICR_LOW], ebx ;Writing the low DWORD sent the IPI
ret
;Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll
; Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll
;Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll
lapic_send_init:
mov eax, DWORD [APIC_BASE]
;Destination field is set to 0 has we will use a shorthand
xor ebx, ebx
mov DWORD [fs: eax+APIC_REG_ICR_HIGH], ebx
;Vector: 00h
;Delivery mode: Startup
;Destination mode: ignored (0)
;Level: ignored (1)
;Trigger mode: ignored (0)
;Shorthand: All excluding self (3)
mov ebx, 0c4500h
mov DWORD [fs: eax+APIC_REG_ICR_LOW], ebx ;Writing the low DWORD sent the IPI
ret
IA32_APIC_BASE_MSR EQU 1bh
APIC_REG_SIV EQU 0f0h
APIC_REG_ICR_LOW EQU 300h
APIC_REG_ICR_HIGH EQU 310h
APIC_REG_ID EQU 20h
;Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll
; Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll
;Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll
APIC_BASE dd 00h
;Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll
; Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll
;Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll
unrealmode:
lgdt [cs:GDT]
cli
mov eax, cr0
or ax, 01h
mov cr0, eax
mov bx, 08h
mov fs, bx
mov gs, bx
and ax, 0fffeh
mov cr0, eax
sti
;IMPORTAT: This call is FAR!
;So it can be called from everywhere
retf
GDT:
dw 0fh
dd GDT + 7c00h
dw 00h
dd 0000ffffh
dd 00cf9200h
;Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll
; Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll
;Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll
payload_start_abs:
; payload starts at segment:offset 0800h:0000h
section payload, vstart=0000h, align=1
payload:
;IMPORTANT NOTE: Here we are in a \"new\" CPU every state we set before is no
;more present here (except for the BSP, but we handler every processor with
;the same code).
jmp 800h: __RESTART__
__RESTART__:
mov ax, cs
mov ds, ax
xor sp, sp
cld
;IMPORTANT: We can't use the stack yet. Every CPU is pointing to the same stack!
;Get an unique id
mov ax, WORD [counter]
.try:
mov bx, ax
inc bx
lock cmpxchg WORD [counter], bx
jnz .try
mov cx, ax ;Save this unique id
;Stack segment = CS + unique id * 1000
shl ax, 12
mov bx, cs
add ax, bx
mov ss, ax
;Text buffer
push 0b800h
pop es
;Set unreal mode again
call 7c0h:unrealmode
;Use GS for old variables
mov ax, 7c0h
mov gs, ax
;Calculate text row
mov ax, cx
mov bx, 160d ;80 * 2
mul bx
mov di, ax
;Get LAPIC id
mov ebx, DWORD [gs:APIC_BASE]
mov edx, DWORD [fs:ebx + APIC_REG_ID]
shr edx, 24d
call itoa8
cli
hlt
;DL = Number
;DI = ptr to text buffer
itoa8:
mov bx, dx
shr bx, 0fh
mov al, BYTE [bx + digits]
mov ah, 09h
stosw
mov bx, dx
and bx, 0fh
mov al, BYTE [bx + digits]
mov ah, 09h
stosw
ret
digits db \"0123456789abcdef\"
counter dw 0
payload_end:
; Boot signature is at physical offset 01feh of
; the boot sector
section bootsig, start=01feh
dw 0aa55h
需要执行两个主要步骤: 1。唤醒AP
这是通过向所有AP输入INIT-SIPI-SIPI(ISS)序列来实现的 将发送ISS序列的BSP使用缩写All(不包括self)作为目标,从而以所有AP为目标 在接收到SIPI(启动处理器间中断)时被唤醒的所有CPU忽略,因此,如果第一个SIPI足以唤醒目标处理器,则忽略第二个SIPI。出于兼容性原因,英特尔建议使用该软件 SIPI包含一个向量,其含义与中断向量(也称为中断编号)类似,但实际上完全不同。
向量是一个8位的数字,值为V(在基数16中表示为vv),它使CPU开始在物理地址0vv000h执行指令。
我们将呼叫0vv000h作为唤醒地址(WA)。
WA在4KB(或页面)边界处强制执行 我们将使用08h作为V,WA在引导加载程序之后是08000h,400h字节 这将控制AP 2。初始化和区分AP
有必要在WA上有一个可执行代码。引导加载程序位于7c00h,因此我们需要在页面边界重新定位一些代码 写入有效负载时首先要记住的是,对共享资源的任何访问都必须受到保护或区分。
一个公共共享资源是堆栈,如果我们天真地初始化堆栈,每个AP最终都会使用相同的堆栈 第一步是使用不同的堆栈地址,从而区分堆栈。
我们通过为每个CPU分配一个基于零的唯一数字来实现这一点。这个数字,我们称之为索引,用于区分堆栈和CPU将写入其APIC ID的行
; Assemble boot sector and insert it into a 1.44MiB floppy image
;
; nasm -f bin boot.asm -o boot.bin
; dd if=/dev/zero of=disk.img bs=512 count=2880
; dd if=boot.bin of=disk.img bs=512 conv=notrunc
BITS 16
; Bootloader starts at segment:offset 07c0h:0000h
section bootloader, vstart=0000h
jmp 7c0h:__START__
__START__:
mov ax, cs
mov ds, ax
mov es, ax
mov ss, ax
xor sp, sp
cld
;Clear screen
mov ax, 03h
int 10h
;Set limit of 4GiB and base 0 for FS and GS
call 7c0h:unrealmode
;Enable the APIC
call enable_lapic
;Move the payload to the expected address
mov si, payload_start_abs
mov cx, payload_end-payload + 1
mov di, 400h ;7c0h:400h = 8000h
rep movsb
;Wakeup the other APs
;INIT
call lapic_send_init
mov cx, WAIT_10_ms
call us_wait
;SIPI
call lapic_send_sipi
mov cx, WAIT_200_us
call us_wait
;SIPI
call lapic_send_sipi
;Jump to the payload
jmp 0000h:8000h
;Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll
; Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll
;Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll
;CX = Wait (in ms) Max 65536 us (=0 on input)
us_wait:
mov dx, 80h ;POST Diagnose port, 1us per IO
xor si, si
rep outsb
ret
WAIT_10_ms EQU 10000
WAIT_200_us EQU 200
;Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll
; Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll
;Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll
enable_lapic:
;Enable the APIC globally
;On P6 CPU once this flag is set to 0, it cannot be set back to 16
;Without an HARD RESET
mov ecx, IA32_APIC_BASE_MSR
rdmsr
or ah, 08h ;bit11: APIC GLOBAL Enable/Disable
wrmsr
;Mask off lower 12 bits to get the APIC base address
and ah, 0f0h
mov DWORD [APIC_BASE], eax
;Newer processors enables the APIC through the Spurious Interrupt Vector register
mov ecx, DWORD [fs: eax + APIC_REG_SIV]
or ch, 01h ;bit8: APIC SOFTWARE enable/disable
mov DWORD [fs: eax+APIC_REG_SIV], ecx
ret
;Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll
; Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll
;Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll
lapic_send_sipi:
mov eax, DWORD [APIC_BASE]
;Destination field is set to 0 has we will use a shorthand
xor ebx, ebx
mov DWORD [fs: eax+APIC_REG_ICR_HIGH], ebx
;Vector: 08h (Will make the CPU execute instruction ad address 08000h)
;Delivery mode: Startup
;Destination mode: ignored (0)
;Level: ignored (1)
;Trigger mode: ignored (0)
;Shorthand: All excluding self (3)
mov ebx, 0c4608h
mov DWORD [fs: eax+APIC_REG_ICR_LOW], ebx ;Writing the low DWORD sent the IPI
ret
;Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll
; Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll
;Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll
lapic_send_init:
mov eax, DWORD [APIC_BASE]
;Destination field is set to 0 has we will use a shorthand
xor ebx, ebx
mov DWORD [fs: eax+APIC_REG_ICR_HIGH], ebx
;Vector: 00h
;Delivery mode: Startup
;Destination mode: ignored (0)
;Level: ignored (1)
;Trigger mode: ignored (0)
;Shorthand: All excluding self (3)
mov ebx, 0c4500h
mov DWORD [fs: eax+APIC_REG_ICR_LOW], ebx ;Writing the low DWORD sent the IPI
ret
IA32_APIC_BASE_MSR EQU 1bh
APIC_REG_SIV EQU 0f0h
APIC_REG_ICR_LOW EQU 300h
APIC_REG_ICR_HIGH EQU 310h
APIC_REG_ID EQU 20h
;Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll
; Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll
;Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll
APIC_BASE dd 00h
;Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll
; Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll
;Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll
unrealmode:
lgdt [cs:GDT]
cli
mov eax, cr0
or ax, 01h
mov cr0, eax
mov bx, 08h
mov fs, bx
mov gs, bx
and ax, 0fffeh
mov cr0, eax
sti
;IMPORTAT: This call is FAR!
;So it can be called from everywhere
retf
GDT:
dw 0fh
dd GDT + 7c00h
dw 00h
dd 0000ffffh
dd 00cf9200h
;Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll
; Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll
;Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll
payload_start_abs:
; payload starts at segment:offset 0800h:0000h
section payload, vstart=0000h, align=1
payload:
;IMPORTANT NOTE: Here we are in a \"new\" CPU every state we set before is no
;more present here (except for the BSP, but we handler every processor with
;the same code).
jmp 800h: __RESTART__
__RESTART__:
mov ax, cs
mov ds, ax
xor sp, sp
cld
;IMPORTANT: We can't use the stack yet. Every CPU is pointing to the same stack!
;Get an unique id
mov ax, WORD [counter]
.try:
mov bx, ax
inc bx
lock cmpxchg WORD [counter], bx
jnz .try
mov cx, ax ;Save this unique id
;Stack segment = CS + unique id * 1000
shl ax, 12
mov bx, cs
add ax, bx
mov ss, ax
;Text buffer
push 0b800h
pop es
;Set unreal mode again
call 7c0h:unrealmode
;Use GS for old variables
mov ax, 7c0h
mov gs, ax
;Calculate text row
mov ax, cx
mov bx, 160d ;80 * 2
mul bx
mov di, ax
;Get LAPIC id
mov ebx, DWORD [gs:APIC_BASE]
mov edx, DWORD [fs:ebx + APIC_REG_ID]
shr edx, 24d
call itoa8
cli
hlt
;DL = Number
;DI = ptr to text buffer
itoa8:
mov bx, dx
shr bx, 0fh
mov al, BYTE [bx + digits]
mov ah, 09h
stosw
mov bx, dx
and bx, 0fh
mov al, BYTE [bx + digits]
mov ah, 09h
stosw
ret
digits db \"0123456789abcdef\"
counter dw 0
payload_end:
; Boot signature is at physical offset 01feh of
; the boot sector
section bootsig, start=01feh
dw 0aa55h
每个CPU的堆栈地址为800h:(索引*1000h),给每个AP 64KiB的堆栈。每个CPU的行号是索引,因此文本缓冲区中的指针是80*2*index 要生成索引,使用
lock cmpxchg
以原子方式递增并返回一个字
期末笔记*对端口80h的写入用于产生1µs的延迟。
*
unrealmode
是一个较远的例程,因此也可以在唤醒后调用它。*BSP也跳到WA 屏幕截图 来自具有8个处理器的Bochs
到目前为止,您的代码是什么?非常感谢您的回答,这正是我需要的!!然而,我想从我的好奇心中多问一点。由于代码的地址通过IPIs中的向量字段传递,并以4KB对齐,如果我需要目标处理器以特定的32位或64位地址开始执行,该怎么办?我得到的信息是,我应该通过IDT来做这件事,所以8位向量是IDT编号??非常感谢你对之前的评论做了一点说明。在完成ISS协议后,我应该如何发送额外的IPI?比如说,如果我将交付模式设置为fixed,vector设置为10b(int 2),目标核心是否会执行IDT#2?
us#u wait
的等待时间可能比预期的要短得多,因为如果端口80h路由到实际的ISA总线,每次写入只需要1U左右。您可以通过将APIC_基更改为1M实模式地址空间中的某个位置来避免非实模式。至于等待循环,它依赖于每次写入1us的延迟,这对于<0x400的端口来说是典型的。10000us=10ms,1us延迟来自ISA总线定时。如果没有ISA总线路由端口80h写入,您将获得不同的延迟。代码已经在使用IA32_APIC_BASE MSR,因此它已经假设为P6。此外,INIT、SIPI、SIPI序列在486 CPU与外部APIC之间不起作用。