Warning: file_get_contents(/data/phpspider/zhask/data//catemap/2/python/330.json): failed to open stream: No such file or directory in /data/phpspider/zhask/libs/function.php on line 167

Warning: Invalid argument supplied for foreach() in /data/phpspider/zhask/libs/tag.function.php on line 1116

Notice: Undefined index: in /data/phpspider/zhask/libs/function.php on line 180

Warning: array_chunk() expects parameter 1 to be array, null given in /data/phpspider/zhask/libs/function.php on line 181

Warning: file_get_contents(/data/phpspider/zhask/data//catemap/4/c/68.json): failed to open stream: No such file or directory in /data/phpspider/zhask/libs/function.php on line 167

Warning: Invalid argument supplied for foreach() in /data/phpspider/zhask/libs/tag.function.php on line 1116

Notice: Undefined index: in /data/phpspider/zhask/libs/function.php on line 180

Warning: array_chunk() expects parameter 1 to be array, null given in /data/phpspider/zhask/libs/function.php on line 181
Python 如何加速Levenshtein距离计算_Python_C_Performance_Optimization_Levenshtein Distance - Fatal编程技术网

Python 如何加速Levenshtein距离计算

Python 如何加速Levenshtein距离计算,python,c,performance,optimization,levenshtein-distance,Python,C,Performance,Optimization,Levenshtein Distance,我正在尝试运行一个模拟来测试随机变量之间的平均值 二进制字符串 我的程序是用python编写的,但我正在使用它。相关的函数会占用大部分时间来计算两个字符串之间的Levenshtein距离,如下所示 lev_edit_distance(size_t len1, const lev_byte *string1, size_t len2, const lev_byte *string2, int xcost) { size_t

我正在尝试运行一个模拟来测试随机变量之间的平均值 二进制字符串

我的程序是用python编写的,但我正在使用它。相关的函数会占用大部分时间来计算两个字符串之间的Levenshtein距离,如下所示

lev_edit_distance(size_t len1, const lev_byte *string1,
                  size_t len2, const lev_byte *string2,
                  int xcost)
{
  size_t i;
  size_t *row;  /* we only need to keep one row of costs */
  size_t *end;
  size_t half;

  /* strip common prefix */
  while (len1 > 0 && len2 > 0 && *string1 == *string2) {
    len1--;
    len2--;
    string1++;
    string2++;
  }

  /* strip common suffix */
  while (len1 > 0 && len2 > 0 && string1[len1-1] == string2[len2-1]) {
    len1--;
    len2--;
  }

  /* catch trivial cases */
  if (len1 == 0)
    return len2;
  if (len2 == 0)
    return len1;

  /* make the inner cycle (i.e. string2) the longer one */
  if (len1 > len2) {
    size_t nx = len1;
    const lev_byte *sx = string1;
    len1 = len2;
    len2 = nx;
    string1 = string2;
    string2 = sx;
  }
  /* check len1 == 1 separately */
  if (len1 == 1) {
    if (xcost)
      return len2 + 1 - 2*(memchr(string2, *string1, len2) != NULL);
    else
      return len2 - (memchr(string2, *string1, len2) != NULL);
  }
  len1++;
  len2++;
  half = len1 >> 1;
  /* initalize first row */
  row = (size_t*)malloc(len2*sizeof(size_t));
  if (!row)
    return (size_t)(-1);
  end = row + len2 - 1;
  for (i = 0; i < len2 - (xcost ? 0 : half); i++)
    row[i] = i;

  /* go through the matrix and compute the costs.  yes, this is an extremely
   * obfuscated version, but also extremely memory-conservative and relatively
   * fast.  */
  if (xcost) {
    for (i = 1; i < len1; i++) {
      size_t *p = row + 1;
      const lev_byte char1 = string1[i - 1];
      const lev_byte *char2p = string2;
      size_t D = i;
      size_t x = i;
      while (p <= end) {
        if (char1 == *(char2p++))
          x = --D;
        else
          x++;
        D = *p;
        D++;
        if (x > D)
          x = D;
        *(p++) = x;
      }
    }
  }
  else {
    /* in this case we don't have to scan two corner triangles (of size len1/2)
     * in the matrix because no best path can go throught them. note this
     * breaks when len1 == len2 == 2 so the memchr() special case above is
     * necessary */
    row[0] = len1 - half - 1;
    for (i = 1; i < len1; i++) {
      size_t *p;
      const lev_byte char1 = string1[i - 1];
      const lev_byte *char2p;
      size_t D, x;
      /* skip the upper triangle */
      if (i >= len1 - half) {
        size_t offset = i - (len1 - half);
        size_t c3;

        char2p = string2 + offset;
        p = row + offset;
        c3 = *(p++) + (char1 != *(char2p++));
        x = *p;
        x++;
        D = x;
        if (x > c3)
          x = c3;
        *(p++) = x;
      }
      else {
        p = row + 1;
        char2p = string2;
        D = x = i;
      }
      /* skip the lower triangle */
      if (i <= half + 1)
        end = row + len2 + i - half - 2;
      /* main */
      while (p <= end) {
        size_t c3 = --D + (char1 != *(char2p++));
        x++;
        if (x > c3)
          x = c3;
        D = *p;
        D++;
        if (x > D)
          x = D;
        *(p++) = x;
      }
      /* lower triangle sentinel */
      if (i <= half) {
        size_t c3 = --D + (char1 != *char2p);
        x++;
        if (x > c3)
          x = c3;
        *p = x;
      }
    }
  }

  i = *end;
  free(row);
  return i;
}

您可以从本网站学习一些OpenMP概念和指令开始:

您需要一个兼容OpenMP的编译器。下面是一个可以使用的编译器列表。编译代码时,您需要使用
-fopenmp
选项

我只在代码中添加了编译器指令
#pragma omp parallel for
,告诉编译器以下代码块可以并行运行。通过将while循环更改为for循环,或者通过在整个函数中应用OpenMP模式,您可以看到性能的额外提高。通过在这些块之前使用函数
omp\u set\u num\u threads()
,调整用于执行for循环的线程数,可以调整性能。一个好的数字是8,因为您将在8核处理器上运行

lev_edit_distance(size_t len1, const lev_byte *string1,
              size_t len2, const lev_byte *string2,
              int xcost)
{
  size_t i;
  size_t *row;  /* we only need to keep one row of costs */
  size_t *end;
  size_t half;

 // Set the number of threads the OpenMP framework will use to parallelize the for loops
 omp_set_num_threads(8);

  /* strip common prefix */
  while (len1 > 0 && len2 > 0 && *string1 == *string2) {
    len1--;
    len2--;
    string1++;
    string2++;
  }

  /* strip common suffix */
  while (len1 > 0 && len2 > 0 && string1[len1-1] == string2[len2-1]) {
    len1--;
    len2--;
  }

  /* catch trivial cases */
  if (len1 == 0)
    return len2;
  if (len2 == 0)
    return len1;

  /* make the inner cycle (i.e. string2) the longer one */
  if (len1 > len2) {
    size_t nx = len1;
    const lev_byte *sx = string1;
    len1 = len2;
    len2 = nx;
    string1 = string2;
    string2 = sx;
  }
  /* check len1 == 1 separately */
  if (len1 == 1) {
    if (xcost)
      return len2 + 1 - 2*(memchr(string2, *string1, len2) != NULL);
    else
      return len2 - (memchr(string2, *string1, len2) != NULL);
  }
  len1++;
  len2++;
  half = len1 >> 1;
  /* initalize first row */
  row = (size_t*)malloc(len2*sizeof(size_t));
  if (!row)
    return (size_t)(-1);
  end = row + len2 - 1;

  #pragma omp parallel for
  for (i = 0; i < len2 - (xcost ? 0 : half); i++)
    row[i] = i;

  /* go through the matrix and compute the costs.  yes, this is an extremely
   * obfuscated version, but also extremely memory-conservative and relatively
   * fast.  */
  if (xcost) {
   #pragma omp parallel for
   for (i = 1; i < len1; i++) {
      size_t *p = row + 1;
      const lev_byte char1 = string1[i - 1];
      const lev_byte *char2p = string2;
      size_t D = i;
      size_t x = i;
      while (p <= end) {
        if (char1 == *(char2p++))
          x = --D;
        else
          x++;
        D = *p;
        D++;
        if (x > D)
          x = D;
        *(p++) = x;
      }
    }
  }
  else {
    /* in this case we don't have to scan two corner triangles (of size len1/2)
     * in the matrix because no best path can go throught them. note this
     * breaks when len1 == len2 == 2 so the memchr() special case above is
     * necessary */
    row[0] = len1 - half - 1;
    #pragma omp parallel for
    for (i = 1; i < len1; i++) {
      size_t *p;
      const lev_byte char1 = string1[i - 1];
      const lev_byte *char2p;
      size_t D, x;
      /* skip the upper triangle */
      if (i >= len1 - half) {
        size_t offset = i - (len1 - half);
        size_t c3;

        char2p = string2 + offset;
        p = row + offset;
        c3 = *(p++) + (char1 != *(char2p++));
        x = *p;
        x++;
        D = x;
        if (x > c3)
          x = c3;
        *(p++) = x;
      }
      else {
        p = row + 1;
        char2p = string2;
        D = x = i;
      }
      /* skip the lower triangle */
      if (i <= half + 1)
        end = row + len2 + i - half - 2;
      /* main */
      while (p <= end) {
        size_t c3 = --D + (char1 != *(char2p++));
        x++;
        if (x > c3)
          x = c3;
        D = *p;
        D++;
        if (x > D)
          x = D;
        *(p++) = x;
      }
      /* lower triangle sentinel */
       if (i <= half) {
        size_t c3 = --D + (char1 != *char2p);
        x++;
        if (x > c3)
          x = c3;
        *p = x;
      }
    }
  }

  i = *end;
  free(row);
  return i;
}
lev_edit_距离(大小为len1,常量lev_字节*string1,
大小为len2,常量级别字节*string2,
int(xcost)
{
尺寸i;
我们只需要保留一行成本*/
尺寸*端部;
大小为一半;
//设置OpenMP框架将用于并行for循环的线程数
omp_设置_数量_线程(8);
/*带公共前缀*/
而(len1>0&&len2>0&&string1==*string2){
len1--;
len2--;
string1++;
string2++;
}
/*带公共后缀*/
而(len1>0&&len2>0&&string1[len1-1]==string2[len2-1]){
len1--;
len2--;
}
/*抓鸡毛蒜皮的案子*/
if(len1==0)
返回len2;
如果(len2==0)
返回len1;
/*使内部循环(即string2)更长*/
如果(len1>len2){
尺寸_tnx=len1;
常量lev_字节*sx=string1;
len1=len2;
len2=nx;
string1=string2;
string2=sx;
}
/*分别检查len1==1*/
if(len1==1){
如果(xcost)
返回len2+1-2*(memchr(string2,*string1,len2)!=NULL);
其他的
返回len2-(memchr(string2,*string1,len2)!=NULL);
}
len1++;
len2++;
一半=len1>>1;
/*初始化第一行*/
row=(size_t*)malloc(len2*sizeof(size_t));
如果(!行)
返回值(大小为-1);
结束=行+列2-1;
#pragma-omp并行
对于(i=0;i=len1-half){
尺寸\u t偏移=i-(len1-一半);
尺寸为c3;
char2p=string2+偏移量;
p=行+偏移量;
c3=*(p++++)(char1!=*(char2p++);
x=*p;
x++;
D=x;
如果(x>c3)
x=c3;
*(p++)=x;
}
否则{
p=行+1;
char2p=string2;
D=x=i;
}
/*跳过下面的三角形*/
如果(i D)
x=D;
*(p++)=x;
}
/*下三角哨兵*/
如果(i)c3
x=c3;
*p=x;
}
}
}
i=*结束;
自由(行);
返回i;
}
您还可以对for循环中正在操作的变量执行操作,以便提供简单的并行计算,如求和、乘法等

int main()
{
    int i = 0,
        j = 0,
        sum = 0;
    char str1[30]; // Change size to fit your specifications
    char str2[30];

    #pragma omp parallel for
    for(i=0;i<16;i++)
    {
        sum = 0;
            // Could do a reduction on sum across all threads
        for(j=0;j<1000;j++)
        {
            // Calls will have to be changed
            // I don't know much Python so I'll leave that to the experts 
            str1 = bin(random.getrandbits(2**i))[2:].zfill(2**i)
            str2 = bin(random.getrandbits(2**i))[2:].zfill(2**i)
            sum += distance(str1,str2)
        }
        printf("%d %d",i,(sum/(1000*2*i)));
    }
}
intmain()
{
int i=0,
j=0,
总和=0;
char str1[30];//更改大小以符合您的规格
char-str2[30];
#pragma-omp并行
对于(i=0;i我会做什么:

1) 非常小的优化:一次性分配
,以避免内存管理开销。或者您可以尝试
realloc()
,或者您可以在静态变量中跟踪
的大小(并使
也是静态的)。然而,这节省很少,即使成本很低

2) 您正在尝试计算平均值。也使用C进行平均值计算。这应该可以在调用中节省一些钱。同样,零钱很小,但很便宜

3) 既然你对实际计算不感兴趣,只对结果感兴趣,那么,假设你有三台电脑,每台电脑都是四核机器。然后在每台电脑上运行四个程序实例,循环时间缩短十二倍。你会在十二分之一的时间内得到十二个结果:平均这些结果,鲍勃是你叔叔

选项#3除循环外,不需要任何修改,您可能希望将其作为命令行参数,以便可以在不同数量的计算机上部署程序。实际上,您可能希望同时输出结果及其“权重”,以最大限度地减少将结果相加时出错的机会

for j in xrange(N):
    str1 = bin(random.getrandbits(2**i))[2:].zfill(2**i)
    str2 = bin(random.getrandbits(2**i))[2:].zfill(2**i)
    sum += distance(str1,str2)
print N,i,sum/(N*2**i)
但是如果你对一般的Levenshtein统计量感兴趣,我不太确定只使用0和1符号进行计算是否适合你的目的
for j in xrange(N):
    str1 = bin(random.getrandbits(2**i))[2:].zfill(2**i)
    str2 = bin(random.getrandbits(2**i))[2:].zfill(2**i)
    sum += distance(str1,str2)
print N,i,sum/(N*2**i)
from threading import Thread

sum = 0

def calc_distance(offset) :
    sum += distance(randoms[offset][0], randoms[offset][1]) #use whatever addressing scheme is best

threads = []
for i in xrange(8) :
    t = new Thread(target=calc_distance, args=(i))
    t.start()
    threads.append(t)
for t in threads :
     t.join()