Math stm32 arm上的sqrt函数不';行不通
我使用的是一个stm32f4芯片(cortex-m4),带有FPU和 sqrt(9.7*9.7)返回94.17 我使用的是arm ONE eabi gcc编译器,编译时没有任何错误 我的makefile很长,因为同一个文件用于stm32f4和sam4芯片。我甚至不知道要发布的相关部分。感谢您的帮助 编辑:makefile中的一些标志配置Math stm32 arm上的sqrt函数不';行不通,math,arm,sqrt,fpu,cortex-m,Math,Arm,Sqrt,Fpu,Cortex M,我使用的是一个stm32f4芯片(cortex-m4),带有FPU和 sqrt(9.7*9.7)返回94.17 我使用的是arm ONE eabi gcc编译器,编译时没有任何错误 我的makefile很长,因为同一个文件用于stm32f4和sam4芯片。我甚至不知道要发布的相关部分。感谢您的帮助 编辑:makefile中的一些标志配置 C_FLAGS = -mcpu=cortex-m4 -mthumb C_FLAGS += -std=gnu89 -g -ggdb3 -fverbose-asm
C_FLAGS = -mcpu=cortex-m4 -mthumb
C_FLAGS += -std=gnu89 -g -ggdb3 -fverbose-asm
C_FLAGS += --param max-inline-insns-single=500
C_FLAGS += -fsingle-precision-constant
C_FLAGS += -mfpu=fpv4-sp-d16
C_FLAGS += -mfloat-abi=hard
C_WARNINGS = -Wall
C_WARNINGS += -Wdouble-promotion
这是我的密码:
float x, y, z, mag;
x = ((float) (((int32_t) x_adc) + 0x8000)) * scale - offset;
y = ((float) (((int32_t) y_adc) + 0x8000)) * scale - offset;
z = ((float) (((int32_t) z_adc) + 0x8000)) * scale - offset;
mag = sqrt(x*x + y*y + z*z);
DEBUG("XYZ = %.2f, %.2f, %.2f = %.2f\n", x, y, z, mag );
调试宏打印到usb_cdc接口和LCD屏幕上的调试日志。此外,我一直在覆盖Wdouble升级,现在在调试行上收到了双重升级警告(使用\uu VA\u ARGS\uu
调用snprintf)。我想这是不相关的
我将优化级别更改为0(仍然使用sqrt获得相同的结果),这是.lss:
mag = sqrt(x*x + y*y + z*z);
8019a36: ed97 7a05 vldr s14, [r7, #20]
8019a3a: edd7 7a05 vldr s15, [r7, #20]
8019a3e: ee27 7a27 vmul.f32 s14, s14, s15
8019a42: edd7 6a04 vldr s13, [r7, #16]
8019a46: edd7 7a04 vldr s15, [r7, #16]
8019a4a: ee66 7aa7 vmul.f32 s15, s13, s15
8019a4e: ee37 7a27 vadd.f32 s14, s14, s15
8019a52: edd7 6a03 vldr s13, [r7, #12]
8019a56: edd7 7a03 vldr s15, [r7, #12]
8019a5a: ee66 7aa7 vmul.f32 s15, s13, s15
8019a5e: ee77 7a27 vadd.f32 s15, s14, s15
8019a62: ee17 0a90 vmov r0, s15
8019a66: f7e6 fcf3 bl 8000450 <__aeabi_f2d>
8019a6a: 4602 mov r2, r0
8019a6c: 460b mov r3, r1
8019a6e: ec43 2b10 vmov d0, r2, r3
8019a72: f007 fe9f bl 80217b4 <sqrt>
8019a76: ec53 2b10 vmov r2, r3, d0
8019a7a: 4610 mov r0, r2
8019a7c: 4619 mov r1, r3
8019a7e: f7e6 fffd bl 8000a7c <__aeabi_d2f>
8019a82: 4603 mov r3, r0
8019a84: 60bb str r3, [r7, #8]
if( 1 ) {
DEBUG("XYZ = %.2f, %.2f, %.2f = %.2f\n", x, y, z, mag );
8019a86: 6978 ldr r0, [r7, #20]
8019a88: f7e6 fce2 bl 8000450 <__aeabi_f2d>
8019a8c: 4682 mov sl, r0
8019a8e: 468b mov fp, r1
8019a90: 6938 ldr r0, [r7, #16]
8019a92: f7e6 fcdd bl 8000450 <__aeabi_f2d>
8019a96: 4680 mov r8, r0
8019a98: 4689 mov r9, r1
8019a9a: 68f8 ldr r0, [r7, #12]
8019a9c: f7e6 fcd8 bl 8000450 <__aeabi_f2d>
8019aa0: 4604 mov r4, r0
8019aa2: 460d mov r5, r1
8019aa4: 68b8 ldr r0, [r7, #8]
8019aa6: f7e6 fcd3 bl 8000450 <__aeabi_f2d>
8019aaa: 4602 mov r2, r0
8019aac: 460b mov r3, r1
8019aae: e9cd ab00 strd sl, fp, [sp]
8019ab2: e9cd 8902 strd r8, r9, [sp, #8]
8019ab6: e9cd 4504 strd r4, r5, [sp, #16]
8019aba: e9cd 2306 strd r2, r3, [sp, #24]
8019abe: f243 707c movw r0, #14204 ; 0x377c
8019ac2: f6c0 0002 movt r0, #2050 ; 0x802
8019ac6: f240 4176 movw r1, #1142 ; 0x476
8019aca: f643 0218 movw r2, #14360 ; 0x3818
8019ace: f6c0 0202 movt r2, #2050 ; 0x802
8019ad2: f643 430c movw r3, #15372 ; 0x3c0c
8019ad6: f6c0 0302 movt r3, #2050 ; 0x802
8019ada: f7ed fee9 bl 80078b0 <debug_log>
mag=sqrt(x*x+y*y+z*z);
8019a36:ed97 7a05 vldr s14[r7,#20]
8019a3a:edd7 7a05 vldr s15[r7,#20]
8019a3e:ee27 7a27 vmul.f32 s14、s14、s15
8019a42:edd7 6a04 vldr s13[r7,#16]
8019a46:edd7 7a04 vldr s15[r7,#16]
8019a4a:ee66 7aa7 vmul.f32 s15、s13、s15
8019a4e:ee37 7a27 vadd.f32 s14、s14、s15
8019a52:edd7 6a03 vldr s13[r7,#12]
8019a56:edd7 7a03 vldr s15[r7,#12]
8019a5a:ee66 7aa7 vmul.f32 s15、s13、s15
8019a5e:ee77 7a27 vadd.f32 s15、s14、s15
8019a62:ee17 0a90 vmov r0,s15
8019a66:f7e6 fcf3 bl 8000450
8019a6a:4602 mov r2,r0
8019a6c:460b mov r3,r1
8019a6e:ec43 2b10 vmov d0、r2、r3
8019a72:f007 fe9f bl 80217b4
8019a76:ec53 2b10 vmov r2、r3、d0
8019a7a:4610 mov r0,r2
8019a7c:4619 mov r1、r3
8019a7e:f7e6 fffd bl 8000a7c
8019a82:4603 mov r3,r0
8019a84:60bb str r3[r7,#8]
如果(1){
调试(“XYZ=%.2f、%.2f、%.2f=%.2f\n”、x、y、z、mag);
8019a86:6978 ldr r0,[r7,#20]
8019a88:f7e6 fce2 bl 8000450
8019a8c:4682 mov sl,r0
8019a8e:468b mov fp,r1
8019a90:6938 ldr r0[r7,#16]
8019a92:f7e6 fcdd bl 8000450
8019a96:4680 mov r8,r0
8019a98:4689 mov r9,r1
8019a9a:68f8 ldr r0,[r7,#12]
8019a9c:f7e6 fcd8 bl 8000450
8019aa0:4604 mov r4,r0
8019aa2:460d mov r5,r1
8019aa4:68b8 ldr r0,[r7,#8]
8019aa6:f7e6 fcd3 bl 8000450
8019aaa:4602 mov r2,r0
8019aac:460b mov r3,r1
8019aae:e9cd ab00标准sl,fp,[sp]
8019ab2:e9cd 8902标准r8,r9[sp,#8]
8019ab6:e9cd 4504标准r4,r5[sp,#16]
8019aba:e9cd 2306标准r2,r3[sp,#24]
8019abe:f243 707c movw r0,#14204;0x377c
8019ac2:F6C00002移动r0,#2050;0x802
8019ac6:f240 4176 movw r1,#1142;0x476
8019aca:f643 0218 movw r2,#14360;0x3818
8019ace:F6C00202移动r2,#2050;0x802
8019ad2:f643 430c movw r3,#15372;0x3c0c
8019ad6:F6C00302移动r3,#2050;0x802
8019ada:f7ed fee9 bl 80078b0
这是用-O2
x = ((float) (((int32_t) x_adc) + 0x8000)) * scale - offset;
8010c3e: eeb0 9a48 vmov.f32 s18, s16
8010c42: ee10 9aa9 vnmls.f32 s18, s1, s19
y = ((float) (((int32_t) y_adc) + 0x8000)) * scale - offset;
z = ((float) (((int32_t) z_adc) + 0x8000)) * scale - offset;
8010c46: eef8 1ac1 vcvt.f32.s32 s3, s2
mag = sqrt(x*x + y*y + z*z);
8010c4a: ee28 2aa8 vmul.f32 s4, s17, s17
y_adc = ((int16_t) (buffer[2] & 0xff)) + (((int16_t) (buffer[3] & 0xff))<<8);
z_adc = ((int16_t) (buffer[4] & 0xff)) + (((int16_t) (buffer[5] & 0xff))<<8);
x = ((float) (((int32_t) x_adc) + 0x8000)) * scale - offset;
y = ((float) (((int32_t) y_adc) + 0x8000)) * scale - offset;
z = ((float) (((int32_t) z_adc) + 0x8000)) * scale - offset;
8010c4e: ee11 8aa9 vnmls.f32 s16, s3, s19
mag = sqrt(x*x + y*y + z*z);
8010c52: ee09 2a09 vmla.f32 s4, s18, s18
8010c56: ee08 2a08 vmla.f32 s4, s16, s16
8010c5a: ee12 0a10 vmov r0, s4
8010c5e: f7ef fbf7 bl 8000450 <__aeabi_f2d>
8010c62: ec41 0b10 vmov d0, r0, r1
8010c66: f007 f8f9 bl 8017e5c <sqrt>
if( 1 ) {
DEBUG("XYZ = %.2f, %.2f, %.2f = %.2f\n", x, y, z, mag );
8010c6a: ee19 0a10 vmov r0, s18
z_adc = ((int16_t) (buffer[4] & 0xff)) + (((int16_t) (buffer[5] & 0xff))<<8);
x = ((float) (((int32_t) x_adc) + 0x8000)) * scale - offset;
y = ((float) (((int32_t) y_adc) + 0x8000)) * scale - offset;
z = ((float) (((int32_t) z_adc) + 0x8000)) * scale - offset;
mag = sqrt(x*x + y*y + z*z);
8010c6e: ec55 4b10 vmov r4, r5, d0
if( 1 ) {
DEBUG("XYZ = %.2f, %.2f, %.2f = %.2f\n", x, y, z, mag );
8010c72: f7ef fbed bl 8000450 <__aeabi_f2d>
8010c76: e9cd 0100 strd r0, r1, [sp]
8010c7a: ee18 0a90 vmov r0, s17
8010c7e: f7ef fbe7 bl 8000450 <__aeabi_f2d>
8010c82: e9cd 0102 strd r0, r1, [sp, #8]
8010c86: ee18 0a10 vmov r0, s16
8010c8a: f7ef fbe1 bl 8000450 <__aeabi_f2d>
8010c8e: e9cd 0104 strd r0, r1, [sp, #16]
z_adc = ((int16_t) (buffer[4] & 0xff)) + (((int16_t) (buffer[5] & 0xff))<<8);
x = ((float) (((int32_t) x_adc) + 0x8000)) * scale - offset;
y = ((float) (((int32_t) y_adc) + 0x8000)) * scale - offset;
z = ((float) (((int32_t) z_adc) + 0x8000)) * scale - offset;
mag = sqrt(x*x + y*y + z*z);
8010c92: 4629 mov r1, r5
8010c94: 4620 mov r0, r4
8010c96: f7ef fef1 bl 8000a7c <__aeabi_d2f>
if( 1 ) {
DEBUG("XYZ = %.2f, %.2f, %.2f = %.2f\n", x, y, z, mag );
8010c9a: f7ef fbd9 bl 8000450 <__aeabi_f2d>
8010c9e: 4a0e ldr r2, [pc, #56] ; (8010cd8 <LIS331DLH_ReadAc+0x134>)
8010ca0: 4b0e ldr r3, [pc, #56] ; (8010cdc <LIS331DLH_ReadAc+0x138>)
8010ca2: e9cd 0106 strd r0, r1, [sp, #24]
8010ca6: 4808 ldr r0, [pc, #32] ; (8010cc8 <LIS331DLH_ReadAc+0x124>)
8010ca8: f240 4176 movw r1, #1142 ; 0x476
8010cac: f7f4 fb76 bl 800539c <debug_log>
8010cb0: e78b b.n 8010bca <LIS331DLH_ReadAc+0x26>
x=((浮点)((int32_t)x_adc)+0x8000))*标度-偏移;
80103E:eeb0 9a48 vmov.f32 s18、s16
801OC42:ee10 9aa9 vnmls.f32 s18、s1、s19
y=((浮点)((int32_t)y_adc)+0x8000))*标度-偏移;
z=((浮点)((int32_t)z_adc)+0x8000))*标度-偏移;
801OC46:eef8 1ac1 vcvt.f32.s32 s3,s2
mag=sqrt(x*x+y*y+z*z);
8010c4a:ee28 2aa8 vmul.f32 s4、s17、s17
y_adc=((int16_t)(buffer[2]&0xff))+((int16_t)(buffer[3]&0xff))这一切都取决于您如何使用它。您需要粘贴更多的信息/代码。一个简单的测试用例,该简单测试用例的反汇编,以及如何确定结果,等等
#include <math.h>
double fun ( void )
{
return(sqrt(9.7*9.7));
}
所以你需要重复一遍,看看你是否调用了数学函数,如果是的话,它们给sqrt的是什么,它们是否真的链接了正确的东西,然后我会简单地用十六进制打印结果。然后调试另一半,它是否取了正确的9.7F值,并且根据你的打印函数,它是否生成了正确的ascii
如何将(单精度)浮点转换为十六进制
.thumb_func
.globl dummy
dummy:
bx lr
要验证编译器在进出过程中是否使用r0,请执行以下操作:
unsigned int fun3 ( void )
{
return(dummy(9.7F));
}
00000010 <fun3>:
10: b508 push {r3, lr}
12: 4803 ldr r0, [pc, #12] ; (20 <fun3+0x10>)
14: f7ff fffe bl 0 <dummy>
18: bc08 pop {r3}
1a: bc02 pop {r1}
1c: 4708 bx r1
1e: 46c0 nop ; (mov r8, r8)
20: 411b3333 tstmi fp, r3, lsr r3
unsigned int fun3(void)
{
返回(虚拟(9.7F));
}
00000010 :
10:b508推送{r3,lr}
12:4803 ldr r0,[pc,#12];(20)
14:f7ff fffe bl 0
18:bc08 pop{r3}
1a:bc02 pop{r1}
1c:4708 bx r1
1e:46c0 nop;(mov r8,r8)
20:411B333TSTMI fp、r3、lsr r3
通过简单地返回r0中作为浮点数不改变的位,这些位的定义是编译器或程序员虚构的。无论如何,位只是需要任何解释的位。您是否启用了编译器警告?您是否#包括?请注意,您的代码使用了imlicitlydouble
在值和函数调用中。Cortex M4F仅在硬件中支持float
,但在软件中应支持double
。您是否尝试过sqrtf(9.7f*9.7f)
aka float变体?如果这是您的确切代码(请发布和反汇编)然后,实际的平方根很可能在编译时完成,并返回答案和/或打印它时输入的任何内容。您输入什么内容来打印它,并确定这是否也是在运行时或编译时解决的。问题可能(可能)与sqrt()或sqrtf()无关…您已经摆脱了从位到浮点的调试转换了吗?将问题一分为二,继续分割,直到找到为止。首先,去除格式化打印并检查原始十六进制中由数学生成的值。如果不是格式化打印,则开始将数学分解为各个部分,如果您有alr已经做完了
#include <math.h>
float fun ( void )
{
return(sqrtf(9.7F*9.7F));
}
float fun2 ( void )
{
return(9.7F);
}
00000000 <fun>:
0: 4800 ldr r0, [pc, #0] ; (4 <fun+0x4>)
2: 4770 bx lr
4: 411b3333 tstmi fp, r3, lsr r3
00000008 <fun2>:
8: 4800 ldr r0, [pc, #0] ; (c <fun2+0x4>)
a: 4770 bx lr
c: 411b3333 tstmi fp, r3, lsr r3
fun:
ldr.n r0, .LCPI0_0
bx lr
.align 2
.LCPI0_0:
.long 1092301619 @ 0x411b3333
fun2:
ldr.n r0, .LCPI1_0
bx lr
.align 2
.LCPI1_0:
.long 1092301619 @ 0x411b3333
.thumb_func
.globl dummy
dummy:
bx lr
unsigned int fun3 ( void )
{
return(dummy(9.7F));
}
00000010 <fun3>:
10: b508 push {r3, lr}
12: 4803 ldr r0, [pc, #12] ; (20 <fun3+0x10>)
14: f7ff fffe bl 0 <dummy>
18: bc08 pop {r3}
1a: bc02 pop {r1}
1c: 4708 bx r1
1e: 46c0 nop ; (mov r8, r8)
20: 411b3333 tstmi fp, r3, lsr r3