Assembly 汇编-打印任意长度的整数_Assembly_X86 64_Stdio

Assembly 汇编-打印任意长度的整数

assembly

Assembly 汇编-打印任意长度的整数,assembly,x86-64,stdio,Assembly,X86 64,Stdio,我创建了一个汇编程序，它实现了biginteger来计算大的斐波那契数。该程序工作正常，使用gdb我可以验证数字是否正确存储在内存中当我想要打印数字时，问题就来了。通常我只使用printf，但它不支持大于64位的数字。这意味着我需要自己实现转换。我知道我需要计算余数mod 10，但我不知道如何计算这么大的数字代码 section .text global _start _start: mov eax, 192 xor ebx, ebx mov ecx, 2

我创建了一个汇编程序，它实现了biginteger来计算大的斐波那契数。该程序工作正常，使用

gdb

我可以验证数字是否正确存储在内存中

当我想要打印数字时，问题就来了。通常我只使用

printf

，但它不支持大于64位的数字。这意味着我需要自己实现转换。我知道我需要计算余数

mod 10

，但我不知道如何计算这么大的数字

代码

section .text
global _start

_start: mov eax, 192
        xor ebx, ebx
        mov ecx, 2048
        mov edx, 0x6
        mov esi, 0x22
        mov edi, -1
        xor ebp, ebp
        int 0x80 ; allocate memory
        lea r8, [eax] ; a
        lea r9, [eax+1024] ; b
        mov qword [r8], 0x1
        mov qword [r9], 0x1
        mov rbx, 0x2 ; fib num counter
        jmp fib

fib:    inc rbx ; num counter++
        mov rsi, 0x0 ; addr for int
        mov rdx, 128; counter for int
        clc ; clear carry flag
add:    mov rax, [r8+8*rsi]
        adc rax, [r9+8*rsi] ; rax = a + b
        mov rcx, [r8+8*rsi]
        mov [r9+8*rsi], rcx ; b = a
        mov [r8+8*rsi], rax ; a = a + b
        inc rsi
        dec rdx
        jnz add ; repeat for whole bigint
        call print
        cmp rbx, 100 ; repeat for 100 fib nums
        jl fib
        jmp exit

print:  ; what to do here?
        ret    

exit:   mov eax, 0x1
        xor ebx, ebx
        int 0x80

有一个有趣的经典算法叫做double-dabble（很容易找到很多参考文献），它只使用shift和add将二进制无符号整数转换为二进制编码十进制（BCD）。BCD很容易打印为ASCII十进制

对于任意长度的整数，double-dable并不是特别有效，但它很容易实现

这里是多字双涉猎的C代码。它假设无符号整数是32位，在我的机器上可以正常工作。一个可以使它更便携，但我不介意，因为你打算翻译成汇编

在汇编语言中，多字移位和加法可以更小、更简单、更快，因为您可以直接访问进位

#include <stdio.h>

typedef unsigned int word;

// Print BCD in x of given size.
void print(word *x, int size) {
  for (int i = size - 1; i >= 0; --i)
    for (int j = 28; j >= 0; j -= 4) {
      unsigned d = (x[i] >> j) & 0xf;
      putchar('0' + d);
    }
  putchar('\n');
}

// Shift x of given size in words left one bit
void shift_left(word *x, int size) {
  for (int i = size - 1; i > 0; --i) x[i] = (x[i] << 1) | (x[i - 1] >> 31);
  x[0] <<= 1;
}

// Add small constant c to x of given size in words.
void add(word *x, int size, word c) {
  word x0 = x[0] + c;
  word carry = x0 < x[0];
  x[0] = x0;
  for (int i = 1; carry && i < size; ++i) {
    word xi = x[i] + carry;
    carry = xi < x[i];
    xi = xi;
  } 
}

// Do the double dabble increment.
void double_dable_increment(word *bcd, int size) {
  for (int i = 0; i < size; ++i) {
    for (int j = 28; j >= 0; j -= 4) {
      word digit = (bcd[i] >> j) & 0xfu;
      if (digit >= 5) add(bcd + i, size - i, 3u << j);
    }
  }
}

// Convert binary in the low words of x into BCD in the high words.
void convert(word *x, int bin_size, int bcd_size) {
  for (int i = 0; i < 32 * bin_size; ++i) {
    double_dable_increment(x + bin_size, bcd_size);
    shift_left(x, bin_size + bcd_size);
  }
}

// Warning: Not portable. For simplicity, assumes 32-bit words.
#define BCD_BITS_PER_BINARY_BIT (1.20411998267)
#define WORDS_FROM_BITS(N) (((int)(N) + 31) / 32)
#define BCD_WORDS_FROM_BINARY_WORDS(N) \
  WORDS_FROM_BITS(BCD_BITS_PER_BINARY_BIT * 32 * (N))

// Test problem uses 4 words.
#define SIZE 4
#define BCD_SIZE BCD_WORDS_FROM_BINARY_WORDS(SIZE)

int main(void) {
  // Binary rep of 12345678901234567890123456789
  word x[10] = { 0xae398115, 0xaadfa328u, 0x6015fec5u, 0x5ce0e9a5u, };
  convert(x, SIZE, BCD_SIZE);
  print(x + SIZE, BCD_SIZE);
  return 0;
}

提取数字是小学数学，你到底在哪里被卡住了？例如，从1开始，每一步乘以10，直到你得到一个大于你的数字的幂，然后去减去每一个，得到合适的数字。除非你想这样做。使用，特别是对于使用指针的系统调用。你不需要每次都在缓冲区之间交换数据。相反，交换内部循环外部的指针。（而且，

mov-rcx，[r8+8*rsi]

很愚蠢。你已经将它加载到

rax

，但随后用

adc

将其重击。你应该在

adc

之前完成

mov-rcx，rax

）。顺便说一句，如果你使用

add rbx，1

，这将清除CF，这样你就可以删除

clc

指令。我编写了一个扩展精度Fibonacci来打印Fib（10^9）的前1000位数字，这对代码大小进行了大量优化。它在4GHz的天湖上运行大约一分钟。为了简化10次幂除法（用于打印，并且只保留前导的1009位数字），我将数据存储在32位块中，以10^9为基数，而不是以2^32为基数。这意味着在

add

之后手动执行比较，但它使除法更便宜，打印更简单。它们不会溢出，因为您使用的是相同的任意精度算法。有一个8位整数的示例。顺便说一句，如果所需的输出格式是ASCII而不是BCD，那么您可能可以将其转换为每字节4位，在数字之间手动传播进位，而不是让硬件在字节/字内为您执行。Wikipedia有一个C实现，我认为它是这样工作的。@PeterCordes当然。谢谢但是他把连续的斐波那契结果作为一个单词数组来计算，并且想要一个程序来打印这些结果。你说的“但是”是关于哪一部分？如果举个例子，看看它是如何工作的，一步一个8位，这对于展示它是如何工作的非常有用。如果扩展精度字数组：生成字节数组与半字节数组不会改变处理扩展精度位移位的qword元素输入数组的方式。使用SSE2将BCD解包为十进制非常便宜，但如果它为比较节省了一些掩蔽（或允许

pcmpgtb

查找需要添加的元素），那么每字节4位而不是压缩BCD可以使扩展精度双精度更便宜。

$ ./a.out
0123456789012345678901234567890123456789