运行LLVM优化器将32位内存访问转换为64位。有没有办法避免这种情况?

运行LLVM优化器将32位内存访问转换为64位。有没有办法避免这种情况?,llvm,llvm-clang,llvm-ir,Llvm,Llvm Clang,Llvm Ir,我正在生成的一段llvm ir代码上运行llvm优化器。运行优化器后,内存访问将从32位地址转换为64位地址。我希望避免这种情况,因为我用来运行软件验证的工具在64位指针方面存在一些问题 这是原始代码: target triple = "i386-unknown-linux-gnu" @Global_0 = local_unnamed_addr global i32 0 @Global_1 = local_unnamed_addr global i32 0 @Global_2 = local

我正在生成的一段llvm ir代码上运行llvm优化器。运行优化器后,内存访问将从32位地址转换为64位地址。我希望避免这种情况,因为我用来运行软件验证的工具在64位指针方面存在一些问题

这是原始代码:

target triple = "i386-unknown-linux-gnu"


@Global_0 = local_unnamed_addr global i32 0
@Global_1 = local_unnamed_addr global i32 0
@Global_2 = local_unnamed_addr global i32 0
@Global_3 = local_unnamed_addr global i32 0
@mem = local_unnamed_addr global [128 x i8] zeroinitializer, align 1

define i32 @main() #0  { 

  func_2_entry: 
    %local_0 = alloca i32
    store i32 0, i32* %local_0
    %local_1 = alloca i32
    store i32 0, i32* %local_1
    %local_2 = alloca i32
    store i32 0, i32* %local_2
    br label %box_0

  box_0: 
    %s_0 = load i32, i32* @Global_2, !Stack !{ !"Stack((s_0, W32Int()))" } 
    br label %box_1

  box_1: 
    store i32 %s_0, i32* %local_1, !Stack !{ !"Stack()" } 
    br label %box_2

  box_2: 
    %s_1 = load i32, i32* @Global_2, !Stack !{ !"Stack((s_1, W32Int()))" } 
    br label %box_3

  box_3: 
    %s_2 = add i32 0, 48, !Stack !{ !"Stack((s_2, W32Int()), (s_1, W32Int()))" } 
    br label %box_4

  box_4: 
    %s_3 = add i32 %s_1, %s_2, !Stack !{ !"Stack((s_3, W32Int()))" } 
    br label %box_5

  box_5: 
    store i32 %s_3, i32* @Global_2, !Stack !{ !"Stack()" } 
    br label %box_6

  box_6: 
    %s_4 = load i32, i32* %local_1, !Stack !{ !"Stack((s_4, W32Int()))" } 
    br label %box_7

  box_7: 
    store i32 %s_4, i32* %local_2, !Stack !{ !"Stack()" } 
    br label %loop_8

  loop_8: 
    br label %box_9

  box_9: 
    %s_5 = load i32, i32* %local_2, !Stack !{ !"Stack((s_5, W32Int()))" } 
    br label %box_10

  box_10: 
    %s_6 = load i32, i32* %local_0, !Stack !{ !"Stack((s_6, W32Int()), (s_5, W32Int()))" } 
    br label %box_11

  box_11: 
    %s_7 = add i32 0, 2, !Stack !{ !"Stack((s_7, W32Int()), (s_6, W32Int()), (s_5, W32Int()))" } 
    br label %box_12

  box_12: 
    %s_8 = shl i32 %s_6, %s_7, !Stack !{ !"Stack((s_8, W32Int()), (s_5, W32Int()))" } 
    br label %box_13

  box_13: 
    %s_9 = add i32 %s_5, %s_8, !Stack !{ !"Stack((s_9, W32Int()))" } 
    br label %box_14

  box_14: 
    %s_10 = load i32, i32* %local_0, !Stack !{ !"Stack((s_10, W32Int()), (s_9, W32Int()))" } 
    br label %box_15

  box_15: 
    %temp_0 = getelementptr inbounds [128 x i8], [128 x i8]* @mem, i32 0, i32 %s_9, !Stack !{ !"Stack()" } 
    %temp_1 = bitcast i8* %temp_0 to i32*, !Stack !{ !"Stack()" }   
    store i32 %s_10, i32* %temp_1, !Stack !{ !"Stack()" } 
    br label %box_16

  box_16: 
    %s_11 = load i32, i32* %local_0, !Stack !{ !"Stack((s_11, W32Int()))" } 
    br label %box_17

  box_17: 
    %s_12 = add i32 0, 1, !Stack !{ !"Stack((s_12, W32Int()), (s_11, W32Int()))" } 
    br label %box_18

  box_18: 
    %s_13 = add i32 %s_11, %s_12, !Stack !{ !"Stack((s_13, W32Int()))" } 
    br label %box_19

  box_19: 
    store i32 %s_13, i32* %local_0, !Stack !{ !"Stack()" } 
    br label %box_20

  box_20: 
    %s_14 = add i32 0, 5, !Stack !{ !"Stack((s_14, W32Int()), (s_13, W32Int()))" } 
    br label %box_21

  box_21: 
    %s_15 = icmp ne i32 %s_13, %s_14, !Stack !{ !"Stack()" } 
    %s_16 = zext i1 %s_15 to i32
    br label %cond.branch_22

  cond.branch_22: 
    %temp_2 = icmp ne i32 %s_16, 0, !Stack !{ !"Stack()" } 
    br i1 %temp_2, label %loop_8, label %loop_8.end

  loop_8.end: 
    br label %box_23

  box_23: 
    %s_17 = load i32, i32* %local_2, !Stack !{ !"Stack((s_17, W32Int()))" } 
    br label %box_24

  box_24: 
    %temp_5 = add i32 16, %s_17, !Stack !{ !"Stack((s_18, W32Int()))" } 
    %temp_3 = getelementptr inbounds [128 x i8], [128 x i8]* @mem, i32 0, i32 %temp_5, !Stack !{ !"Stack((s_18, W32Int()))" } 
    %temp_4 = bitcast i8* %temp_3 to i32*, !Stack !{ !"Stack((s_18, W32Int()))" } 
    %s_18 = load i32, i32* %temp_4, !Stack !{ !"Stack((s_18, W32Int()))" } 

    br label %box_25

  box_25: 
    %s_19 = add i32 0, 4, !Stack !{ !"Stack((s_19, W32Int()), (s_18, W32Int()))" } 
    br label %box_26

  box_26: 
    %s_20 = icmp eq i32 %s_18, %s_19, !Stack !{ !"Stack()" } 
    %s_21 = zext i1 %s_20 to i32
    br label %if_27

  if_27: 
    %temp_6 = icmp ne i32 %s_21, 0, !Stack !{ !"Stack()" } 
    br i1 %temp_6, label %box_28, label %box_32

  box_28: 
    %s_22 = load i32, i32* %local_1, !Stack !{ !"Stack((s_22, W32Int()))" } 
    br label %box_29

  box_29: 
    store i32 %s_22, i32* @Global_2, !Stack !{ !"Stack()" } 
    br label %box_30

  box_30: 
    %s_23 = add i32 0, 0, !Stack !{ !"Stack((s_23, W32Int()))" } 
    br label %box_31

  box_31: 
    br label %if_27_cond.end

  box_32: 
    call void (...) @__VERIFIER_error() #2
    br label %if_27_cond.end

  if_27_cond.end: 
    br label %box_33

  box_33: 
    %s_24 = add i32 0, 0, !Stack !{ !"Stack((s_24, W32Int()))" } 
    br label %func_2_exit

  func_2_exit: 
    ret i32 %s_24

} 

declare void @abort(i32 ) 

declare void @__VERIFIER_error(...) #1

attributes #1 = { noreturn "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
attributes #2 = { noreturn }
作为本代码段中有关指针的示例,使用i32:

%temp_0 = getelementptr inbounds [128 x i8], [128 x i8]* @mem, i32 0, i32 %s_9
我运行:

opt-始终在线-O2-S output.ll>output-optimized.ll
使用此版本的opt:

$opt-版本
LLVM(http://llvm.org/):
LLVM版本6.0.1
优化构建。
默认目标:x86_64-unknown-linux-gnu
主机CPU:haswell
最终结果是:


; ModuleID = 'output.ll'
source_filename = "output.ll"
target triple = "i386-unknown-linux-gnu"

@Global_0 = local_unnamed_addr global i32 0
@Global_1 = local_unnamed_addr global i32 0
@Global_2 = local_unnamed_addr global i32 0
@Global_3 = local_unnamed_addr global i32 0
@mem = local_unnamed_addr global [128 x i8] zeroinitializer, align 1

define i32 @main() local_unnamed_addr {
box_28:
  %s_0 = load i32, i32* @Global_2, align 4, !Stack !0
  %0 = sext i32 %s_0 to i64
  %temp_0 = getelementptr inbounds [128 x i8], [128 x i8]* @mem, i64 0, i64 %0, !Stack !1
  %temp_1 = bitcast i8* %temp_0 to i32*, !Stack !1
  store i32 0, i32* %temp_1, align 4, !Stack !1
  %s_9.1 = add i32 %s_0, 4, !Stack !2
  %1 = sext i32 %s_9.1 to i64
  %temp_0.1 = getelementptr inbounds [128 x i8], [128 x i8]* @mem, i64 0, i64 %1, !Stack !1
  %temp_1.1 = bitcast i8* %temp_0.1 to i32*, !Stack !1
  store i32 1, i32* %temp_1.1, align 4, !Stack !1
  %s_9.2 = add i32 %s_0, 8, !Stack !2
  %2 = sext i32 %s_9.2 to i64
  %temp_0.2 = getelementptr inbounds [128 x i8], [128 x i8]* @mem, i64 0, i64 %2, !Stack !1
  %temp_1.2 = bitcast i8* %temp_0.2 to i32*, !Stack !1
  store i32 2, i32* %temp_1.2, align 4, !Stack !1
  %s_9.3 = add i32 %s_0, 12, !Stack !2
  %3 = sext i32 %s_9.3 to i64
  %temp_0.3 = getelementptr inbounds [128 x i8], [128 x i8]* @mem, i64 0, i64 %3, !Stack !1
  %temp_1.3 = bitcast i8* %temp_0.3 to i32*, !Stack !1
  store i32 3, i32* %temp_1.3, align 4, !Stack !1
  %s_9.4 = add i32 %s_0, 16
  %4 = sext i32 %s_9.4 to i64
  %temp_0.4 = getelementptr inbounds [128 x i8], [128 x i8]* @mem, i64 0, i64 %4
  %temp_1.4 = bitcast i8* %temp_0.4 to i32*
  store i32 4, i32* %temp_1.4, align 4, !Stack !1
  store i32 %s_0, i32* @Global_2, align 4, !Stack !1
  ret i32 0
}

!0 = !{!"Stack((s_0, W32Int()))"}
!1 = !{!"Stack()"}
!2 = !{!"Stack((s_9, W32Int()))"}

如您所见,现在我获得了用于mem访问的i64s:


 %temp_0 = getelementptr inbounds [128 x i8], [128 x i8]* @mem, i64 0, i64 %0

我需要在输出中得到的是所有那些使用i32s而不是i64s的getelementptr指令。有什么想法吗?

正如@arrowd在问题评论中指出的,添加元数据解决了这个问题:

p[n]:<size>:<abi>:<pref>:<idx>
        This specifies the size of a pointer and its <abi> and <pref>erred 
        alignments for address space n. The fourth parameter <idx> is a size of 
        index that used for address calculation. If not specified, the default index 
        size is equal to the pointer size. All sizes are in bits. The address space, 
        n, is optional, and if not specified, denotes the default address space 0. 
        The value of n must be in the range [1,2^23).

运行opt命令:

opt-始终在线-O2-S output.ll>output-optimized.ll
产生预期的结果:

; ModuleID = 'output.ll'
source_filename = "output.ll"
target datalayout = "e-p:32:32-m:e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "i386-unknown-linux-gnu"

@Global_0 = local_unnamed_addr global i32 0
@Global_1 = local_unnamed_addr global i32 0
@Global_2 = local_unnamed_addr global i32 0
@Global_3 = local_unnamed_addr global i32 0
@mem = local_unnamed_addr global [128 x i8] zeroinitializer, align 1

define i32 @main() local_unnamed_addr {
box_28:
  %s_0 = load i32, i32* @Global_2, align 4, !Stack !0
  %temp_0 = getelementptr inbounds [128 x i8], [128 x i8]* @mem, i32 0, i32 %s_0, !Stack !1
  %temp_1 = bitcast i8* %temp_0 to i32*, !Stack !1
  store i32 0, i32* %temp_1, align 4, !Stack !1
  %s_9.1 = add i32 %s_0, 4, !Stack !2
  %temp_0.1 = getelementptr inbounds [128 x i8], [128 x i8]* @mem, i32 0, i32 %s_9.1, !Stack !1
  %temp_1.1 = bitcast i8* %temp_0.1 to i32*, !Stack !1
  store i32 1, i32* %temp_1.1, align 4, !Stack !1
  %s_9.2 = add i32 %s_0, 8, !Stack !2
  %temp_0.2 = getelementptr inbounds [128 x i8], [128 x i8]* @mem, i32 0, i32 %s_9.2, !Stack !1
  %temp_1.2 = bitcast i8* %temp_0.2 to i32*, !Stack !1
  store i32 2, i32* %temp_1.2, align 4, !Stack !1
  %s_9.3 = add i32 %s_0, 12, !Stack !2
  %temp_0.3 = getelementptr inbounds [128 x i8], [128 x i8]* @mem, i32 0, i32 %s_9.3, !Stack !1
  %temp_1.3 = bitcast i8* %temp_0.3 to i32*, !Stack !1
  store i32 3, i32* %temp_1.3, align 4, !Stack !1
  %s_9.4 = add i32 %s_0, 16
  %temp_0.4 = getelementptr inbounds [128 x i8], [128 x i8]* @mem, i32 0, i32 %s_9.4
  %temp_1.4 = bitcast i8* %temp_0.4 to i32*
  store i32 4, i32* %temp_1.4, align 4, !Stack !1
  store i32 %s_0, i32* @Global_2, align 4, !Stack !1
  ret i32 0
}

尝试添加显式目标布局字符串并使用其值。@箭头d,opt没有“-target”参数,但-march。我试图指定-march=i386,但没有效果。我说的是:这种优化是由于
冗余指令组合
pass造成的。无法显式禁用它。谢谢,@arrowd,这就成功了。特别是p选项。我添加了p:32:32,整行代码是:target datalayout=“e-p:32:32-m:e-i64:64-f80:128-n8:16:32:64-S128”,并且工作顺利。请注意,您不必明确列出所有数据布局项。例如,这是我在自己的玩具后端中使用的数据布局:
E-p:32:32-S32
; ModuleID = 'output.ll'
source_filename = "output.ll"
target datalayout = "e-p:32:32-m:e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "i386-unknown-linux-gnu"

@Global_0 = local_unnamed_addr global i32 0
@Global_1 = local_unnamed_addr global i32 0
@Global_2 = local_unnamed_addr global i32 0
@Global_3 = local_unnamed_addr global i32 0
@mem = local_unnamed_addr global [128 x i8] zeroinitializer, align 1

define i32 @main() local_unnamed_addr {
box_28:
  %s_0 = load i32, i32* @Global_2, align 4, !Stack !0
  %temp_0 = getelementptr inbounds [128 x i8], [128 x i8]* @mem, i32 0, i32 %s_0, !Stack !1
  %temp_1 = bitcast i8* %temp_0 to i32*, !Stack !1
  store i32 0, i32* %temp_1, align 4, !Stack !1
  %s_9.1 = add i32 %s_0, 4, !Stack !2
  %temp_0.1 = getelementptr inbounds [128 x i8], [128 x i8]* @mem, i32 0, i32 %s_9.1, !Stack !1
  %temp_1.1 = bitcast i8* %temp_0.1 to i32*, !Stack !1
  store i32 1, i32* %temp_1.1, align 4, !Stack !1
  %s_9.2 = add i32 %s_0, 8, !Stack !2
  %temp_0.2 = getelementptr inbounds [128 x i8], [128 x i8]* @mem, i32 0, i32 %s_9.2, !Stack !1
  %temp_1.2 = bitcast i8* %temp_0.2 to i32*, !Stack !1
  store i32 2, i32* %temp_1.2, align 4, !Stack !1
  %s_9.3 = add i32 %s_0, 12, !Stack !2
  %temp_0.3 = getelementptr inbounds [128 x i8], [128 x i8]* @mem, i32 0, i32 %s_9.3, !Stack !1
  %temp_1.3 = bitcast i8* %temp_0.3 to i32*, !Stack !1
  store i32 3, i32* %temp_1.3, align 4, !Stack !1
  %s_9.4 = add i32 %s_0, 16
  %temp_0.4 = getelementptr inbounds [128 x i8], [128 x i8]* @mem, i32 0, i32 %s_9.4
  %temp_1.4 = bitcast i8* %temp_0.4 to i32*
  store i32 4, i32* %temp_1.4, align 4, !Stack !1
  store i32 %s_0, i32* @Global_2, align 4, !Stack !1
  ret i32 0
}