在汇编程序中调用C函数_C_Arrays_Assembly

在汇编程序中调用C函数

c arrays assembly

在汇编程序中调用C函数,c,arrays,assembly,C,Arrays,Assembly,我正在尝试编写一个汇编程序，调用c中的函数，该函数将用预定义字符替换字符串中的某些字符，因为字符数组中的当前字符符合某些限定条件我的c文件： #include <stdio.h> #include <stdlib.h> #include <string.h> #include <ctype.h> //display *((char *) $edi) // These functions will be implemented in assemb

我正在尝试编写一个汇编程序，调用c中的函数，该函数将用预定义字符替换字符串中的某些字符，因为字符数组中的当前字符符合某些限定条件

我的c文件：

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>

//display *((char *) $edi)
// These functions will be implemented in assembly:
//

int strrepl(char *str, int c, int (* isinsubset) (int c) ) ;


int isvowel (int c) {

   if (c == 'a' || c == 'e' || c == 'i' || c == 'o' || c == 'u') 
      return 1 ;

   if (c == 'A' || c == 'E' || c == 'I' || c == 'O' || c == 'U') 
      return 1 ;

   return 0 ;
}

int main(){
    char *str1;
    int r;
// I ran my code through a debugger again, and it seems that when displaying 
// the character stored in ecx is listed as "A" (correct) right before the call
// to "add ecx, 1" at which point ecx somehow resets to 0 when it should be "B"

    str1 = strdup("ABC 123 779 Hello World") ;
    r = strrepl(str1, '#', &isdigit) ;
    printf("str1 = \"%s\"\n", str1) ;
    printf("%d chararcters were replaced\n", r) ;
    free(str1) ;
    return 0;
}

当通过gdb运行此命令并在；BREAK，在我执行call命令的步骤后出现以下错误：

Program received signal SIGSEGV, Segmentation fault.
0x0081320f in isdigit () from /lib/libc.so.6

isdigit是我的c文件中包含的标准c库的一部分，所以我不确定如何利用它

编辑：我已经编辑了我的第一个循环并包含了第二个循环，它应该用“#”替换任何数字，但是它似乎替换了整个数组

firstLoop:

    xor eax, eax

    mov edi, [ecx]
    cmp edi, 0
    jz  end

    mov edi, ecx        ; save array


    movzx   eax, byte [ecx]     ;load single byte into eax  
    mov ebp, edx        ; save function pointer
    push    eax         ; parameter for (*isinsubset)           
    call    edx         ; execute (*isinsubset)

    ;cmp    eax, 0
    ;jne    end

    mov ecx, edi        ; restore array
    cmp eax, 0
    jne secondLoop  
    mov edx, ebp        ; restore function pointer
    add esp, 4          ; "pop off" the parameter
    mov ebx, eax        ; store return value
    add ecx, 1
    jmp firstLoop

secondLoop:
    mov [ecx], esi
    mov edx, ebp
    add esp, 4
    mov ebx, eax
    add ecx, 1
    jmp     firstLoop

使用gdb，当代码到达secondloop时，一切都是正确的。ecx显示为“1”，这是从.c文件传入的字符串中的第一个数字。Esi显示为“#”的状态。然而，在我完成mov[ecx]之后，esi似乎崩溃了。ecx此时应显示为“#”，但当我增加1以到达数组中的下一个字符时，它将显示为“/000”。将1后面的每个字符替换为“#”，并显示为“/000”。在我让第二个循环尝试用“#”替换字符之前，我只是让第一个循环自己循环，看看它是否可以在不崩溃的情况下通过整个数组。确实如此，在每次增量之后，ecx显示为正确的字符。我不知道为什么执行mov[ecx]，esi会将ecx的其余部分设置为null。

在您的

第一个循环中：

您正在使用以下命令从字符串加载字符：

mov eax, [ecx]

它是在一个连接处加载4个字节，而不是单个字节。因此，传递给

isdigit（）

的

int

可能远远超出了它处理的范围（它可能使用简单的表查找）

可以使用以下“英特尔asm”语法加载单个字节：

movzx eax, byte ptr [ecx]

还有几件事：

它还将产生这样的效果，即它可能无法正确地检测字符串的结尾，因为空终止符后面可能没有其他三个零字节
我不知道为什么要在处理字符串中的第一个字符之前递增
```
ecx
```
您发布的汇编代码似乎并没有在字符串上实际循环

我在您的代码中添加了一些注释：-

  ; this is OK: setting up the stack frame and saving important register
  ; on Win32, the registers that need saving are: esi, edi and ebx
  ; the rest can be used without needing to preserve them
  push    ebp
  mov ebp, esp
  push    esi
  push    ebx

  xor eax, eax
  mov ecx, [ebp + 8]

  ; you said that this checked [ecx] for zero, but I think you've just written
  ; that wrong, this checks the value of ecx for zero, the [reg] form usually indicates
  ; the value at the address defined by reg
  ; so this is effectively doing a null pointer check (which is good)
  jecxz   end

  mov esi, [ebp + 12]
  mov edx, [ebp + 16]
  xor bl, bl

firstLoop:
  add bl, 1
  ; you increment ecx before loading the first character, this means
  ; that the function ignores the first character of the string
  ; and will therefore produce an incorrect result if the string
  ; starts with a character that needs replacing
  add ecx, 1
  ; characters are 8 bit, not 32 bit (mentioned in comments elsewhere)
  mov eax, [ecx]  
  cmp eax, 0
  jz  end
  push    eax
  ; possibly segfaults due to character out of range
  ; also, as mentioned elsewhere, the function you call here must conform to the 
  ; the standard calling convention of the system (e.g, preserve esi, edi and ebx for
  ; Win32 systems), so eax, ecx and edx can change, so next time you call
  ; [edx] it might be referencing random memory
  ; either save edx on the stack (push before pushing parameters, pop after add esp)
  ; or just load edx with [ebp+16] here instead of at the start
  call    edx

  add esp, 4
  mov ebx, eax

  ; more functionality required here!



end:
  ; restore important values, etc
  pop ebx
  pop esi
  mov esp, ebp
  pop ebp
  ; the result of the function should be in eax, but that's not set up properly yet
  ret

对内部循环的评论：-

firstLoop:

    xor eax, eax

    ; you're loading a 32 bit value and checking for zero,
    ; strings are terminated with a null character, an 8 bit value,
    ; not a 32 bit value, so you're reading past the end of the string
    ; so this is unlikely to correctly test the end of string

    mov edi, [ecx]
    cmp edi, 0
    jz  end

    mov edi, ecx        ; save array


    movzx   eax, byte [ecx]     ;load single byte into eax  
    ; you need to keep ebp! its value must be saved (at the end, 
    ; you do a mov esp,ebp)
    mov ebp, edx        ; save function pointer
    push    eax         ; parameter for (*isinsubset)           
    call    edx         ; execute (*isinsubset)

    mov ecx, edi        ; restore array
    cmp eax, 0
    jne secondLoop  
    mov edx, ebp        ; restore function pointer
    add esp, 4          ; "pop off" the parameter
    mov ebx, eax        ; store return value
    add ecx, 1
    jmp firstLoop

secondLoop:
    ; again, your accessing the string using a 32 bit value, not an 8 bit value
    ; so you're replacing the matched character and the three next characters
    ; with the new value
    ; the upper 24 bits are probably zero so the loop will terminate on the
    ; next character
    ; also, the function seems to be returning a count of characters replaced,
    ; but you're not recording the fact that characters have been replaced
    mov [ecx], esi
    mov edx, ebp
    add esp, 4
    mov ebx, eax
    add ecx, 1
    jmp     firstLoop

您似乎在内存工作方式上遇到了问题，您正在混淆8位和32位内存访问

如果已经组装好，它仍然不起作用：

cl

是

ecx

的低位字节，因此您将丢失该字节。您没有提到edi，它可能是免费的。cl是8位寄存器，eax是32位寄存器。您可以将“al”或“ah”移动到cl中；您可以将eax移动到ecx。。。但是您不能将eax移动到cl中。我决定使用ebx，因为我无论如何都要在完成后恢复寄存器，并且在代码中到达该点之前，我结束了分段错误。我编辑了我的问题以反映这一点：我是否错过了跳转到firstLoop的某个地方（正如我所期望的那样）？firstLoop只是在strepl中的最后一个xor命令之后执行。然而，在修复了另一个问题（ecx被0替换为添加bl，1的调用）后，我编辑了我的帖子，以包含堆栈跟踪。因此，为了不加载字符的所有4个字节，我是否可以简单地移动eax、[ecx-3]或类似的内容？至于增加ecx而不循环，我只是想让代码运行一次，然后再将其配置为实际循环整个字符串。我还发现我没有处理一个问题，这是我修复的。但是，同样的错误仍然存在，我需要弄清楚如何不将所有4个字节传递给isdigit（）。进一步检查后，由于字符加载了4个字节，因此我需要确保在传递4字节值之前，将其前24位置零。然而，我只知道清除前16、32和64位的命令。不是24。只是在我的电脑上使用DevSutdio 2010和在调试版本中尝试了代码，isdigit函数正在断言，因为输入超出范围-gcc是否有相同的检查？我不确定，但我与一位教授进行了简短的交谈，我的问题似乎如下：当你将字符传递给isdigit（）、isxdigit（）或IsVotal时，它以4字节int的形式传递。在将其推送到堆栈上之前，请确保4字节值的前24位已调零。如果调用isdigit（）或isxdigit（）导致seg故障，请在调用指令之前使用gdb检查堆栈上的全部32位。我现在正试图弄清楚在将寄存器放入堆栈之前，我可以使用什么xor、or、etc命令组合将寄存器的最后24位归零，但运气不好。@rajraj:

isdigit（）

可能会在所有

isxxx（）

函数使用的属性位表中进行查找，而不检查参数的范围。在我的Linux机器上确实如此。请参阅我的编辑，我修复了迭代问题，并添加了第二个循环，它实际上用提供的替换字符（在本例中为“#”）替换字符，但它似乎将第一个数字替换为“#”并将数组的其余部分设置为null。@user2357446:我已经包含了您的新代码并添加了一些注释。在评论访问32位值和8位值时，您是说它将不是mov edi，[ecx]而是movzx edi，字节[ecx]？如果是这样，我将如何将此应用于secondLoop？esi不存储为字符数组。它作为int传递，所以当我尝试使用“byte esi”时，它显然会抛出错误。有什么建议吗？@user2357446:该参数仅定义为int，因为它最初是传递值的最佳方式。在汇编程序中，由您决定内存的组织方式，因此在C中存储为int的字符不需要读取为int，您只需

firstLoop:

    xor eax, eax

    ; you're loading a 32 bit value and checking for zero,
    ; strings are terminated with a null character, an 8 bit value,
    ; not a 32 bit value, so you're reading past the end of the string
    ; so this is unlikely to correctly test the end of string

    mov edi, [ecx]
    cmp edi, 0
    jz  end

    mov edi, ecx        ; save array


    movzx   eax, byte [ecx]     ;load single byte into eax  
    ; you need to keep ebp! its value must be saved (at the end, 
    ; you do a mov esp,ebp)
    mov ebp, edx        ; save function pointer
    push    eax         ; parameter for (*isinsubset)           
    call    edx         ; execute (*isinsubset)

    mov ecx, edi        ; restore array
    cmp eax, 0
    jne secondLoop  
    mov edx, ebp        ; restore function pointer
    add esp, 4          ; "pop off" the parameter
    mov ebx, eax        ; store return value
    add ecx, 1
    jmp firstLoop

secondLoop:
    ; again, your accessing the string using a 32 bit value, not an 8 bit value
    ; so you're replacing the matched character and the three next characters
    ; with the new value
    ; the upper 24 bits are probably zero so the loop will terminate on the
    ; next character
    ; also, the function seems to be returning a count of characters replaced,
    ; but you're not recording the fact that characters have been replaced
    mov [ecx], esi
    mov edx, ebp
    add esp, 4
    mov ebx, eax
    add ecx, 1
    jmp     firstLoop