Assembly 删除字符串MASM32中的重复单词

Assembly 删除字符串MASM32中的重复单词,assembly,x86,text-processing,masm32,Assembly,X86,Text Processing,Masm32,我需要找到所有重复N次的单词并从字符串中删除它们 我不明白如何在MASM32中遍历字符串和比较单词。有什么建议吗?看来我已经做好了。下面是一些代码。如果您对如何改进代码有一些建议,请随时告诉我 .386 .model flat, stdcall option casemap :none include masm32.inc include kernel32.inc includelib masm32.lib includelib kernel32.lib .data temp db 10 ms

我需要找到所有重复N次的单词并从字符串中删除它们


我不明白如何在MASM32中遍历字符串和比较单词。有什么建议吗?

看来我已经做好了。下面是一些代码。如果您对如何改进代码有一些建议,请随时告诉我

.386
.model flat, stdcall
option casemap :none
include masm32.inc
include kernel32.inc
includelib masm32.lib
includelib kernel32.lib

.data
temp db 10
msg1 db 10, 13, "Input string: ", 10, 13, 0
msg2 db 10, 13, "How many times should word be repeated? ", 10, 13, 0
msg_true db 10, 13, "TRUE", 10, 13, 0
msg_false db 10, 13, "FALSE", 10, 13, 0

move_to_next_word db 1
infinite_loop db 1
garbage dd ?

input_string db 128 dup(?)

input_string_len dd ?
remaining_string_len dd ?

word_buffer db 128 dup(?)

repeat_num db 10 dup(?)

current_word_ptr dd ?


; PROTOTYPES-----------------------------------------------------
find_space proto inp:ptr byte, len:dword
str_len proto inp:ptr byte, delimiter:byte
str_to_int proto inp:ptr byte
str_cmp proto str1:ptr byte, str2:ptr byte, delimiter:byte
delete_word proto word_start:dword, word_end:dword, word_len:dword
; ---------------------------------------------------------------

.code
main proc
invoke  StdOut, offset msg1
invoke  StdIn,  offset input_string, lengthof input_string
invoke  StdOut, offset msg2
invoke  StdIn,  offset repeat_num, lengthof repeat_num

invoke  str_to_int, offset repeat_num

    mov esi, offset input_string

    .while infinite_loop == 1

    pushad
invoke  str_len, offset input_string, 0
    mov input_string_len, ecx
    popad

invoke  str_len, esi, 0
    mov     remaining_string_len, ecx

invoke  find_space, esi, remaining_string_len
    mov current_word_ptr, esi

    .if ecx == 0
        jmp terminate
    .endif

    ; end of the current word
    push edi


    push esi
    push edi

    ; counter of duplicates
    xor edx, edx
    mov dl, 0

    mov edi, offset input_string
    dec edi
    mov ecx, input_string_len

.while ecx > 0
    inc edi
invoke  find_space, edi, ecx

invoke  str_cmp, current_word_ptr, esi, " "

    .if ebx == 1
    inc dl

    push esi ; word's start
    push edi ; word's end

    pushad
invoke  StdOut, offset msg_true
    popad

    .elseif ebx == 0
    pushad
invoke  StdOut, offset msg_false
    popad
    .endif
.endw

.if dl == repeat_num
    mov move_to_next_word, 0
    .while dl > 1
        pop edi
        pop esi
        inc edi
    invoke str_len, edi, 0
        dec edi

invoke  delete_word, esi, edi, ecx
        dec dl
    .endw

    ; delete first word

    pop edi
    pop esi

invoke str_len, edi, 0
    dec edi

invoke delete_word, esi, edi, ecx

.else
    .while dl > 0

    pop edi
    pop esi
    dec dl
    .endw
.endif

    pushad
invoke  StdOut, offset input_string
    popad

    .if move_to_next_word == 1
        ; get ptr to the next word
        pop esi
        inc esi
    .else
        mov move_to_next_word, 1
        pop garbage
    .endif
    .endw

terminate:

invoke ExitProcess, 0
main endp


; FUNCTIONS------------------------------------------------------
; esi - string start
; edi - string end
; ebx - word length
; ecx - length of the remaining string
find_space proc inp:ptr byte, len:dword
    mov     edi, inp
    mov     esi, inp
    mov     ecx, len
    mov     al, " "
    cld
repne   scasb
    dec     edi
    mov     ebx, edi
    sub     ebx, esi
    ret
find_space endp

;ecx - length of a string
str_len proc inp:ptr byte, delimiter:byte
    push edi
    push eax

    mov     edi, inp
    sub     ecx, ecx
    not     ecx ; ECX = -1, or 4,294,967,295
    mov     al, delimiter
    cld     
repne   scasb
    not     ecx
    dec     ecx

    pop eax
    pop edi
    ret
str_len endp

; convert string to int
str_to_int proc inp:ptr byte
    pushad 

    mov     ecx, 0
    mov     esi, inp
    cmp     byte ptr [esi + ecx], 0
    sub     byte ptr [esi + ecx], 48d ;
    inc     ecx

    popad
    ret
str_to_int endp

; if equal then     ebx = 1
; if not equal then ebx = 0
str_cmp proc str1:ptr byte, str2:ptr byte, delimiter:byte
.data
str1_len dd ?
str2_len dd ?

.code
    push eax
    push ecx
    push edi
    push esi

    mov     esi, str1
    mov     edi, str2
invoke  str_len, str1, delimiter ; ecx = length of str1
    mov     str1_len, ecx
invoke  str_len, str2, delimiter
    mov     str2_len, ecx

    cmp str1_len, ecx
    jne notequal

repe    cmpsb
    je  equal
notequal:
    mov ebx, 0
    pop esi
    pop edi
    pop ecx
    pop eax
    ret
equal:
    pop esi
    pop edi
    pop ecx
    pop eax
    mov ebx, 1
    ret
str_cmp endp

delete_word proc word_start:dword, word_end:dword, word_len:dword
        pushad
        mov esi, word_end
        inc esi
        mov edi, word_start
        mov ecx, word_len
        inc ecx
    rep movsb
        popad
        ret
delete_word endp
; ---------------------------------------------------------------

end main

与任何其他就地筛选一样,读取和写入指针都从字符串的开头开始。具有可变长度对象(word=由非word字符分隔的连续字节)使得复制和比较它们变得更加复杂。除此之外,您还需要记住当前和上一个(或上一个第n个?)单词的结尾,以便可以倒回写入指针,从而有效地从字符串尾部删除。或者类似的。您可能需要一个字结束指针数组(循环缓冲区),以便可以同时复制和比较。@彼得按指针排序您是指ESI/EDI寄存器吗?不,我是指保存地址的32位值,如C变量的值,如
char*lastword
。您可以将指针值存储在任何位置,包括内存。您只能在它位于寄存器中时解除对它的引用,但任何寄存器都可以在32位寻址模式下使用<例如,代码>cmp al、[ecx]是有效的。TL:DR:我说的是指针值,而不是您可能保留的位置。将答案作为答案发布(如果您愿意在适用于SO帖子的CC by SA版权许可下共享代码,请将代码内联,而不是外部链接)。不要对问题进行编辑,从而将其变成非问题。如果您希望对代码进行代码检查以获得改进,请将其发布在