使用MPI的拼写检查程序_Mpi_Spell Checking

使用MPI的拼写检查程序

mpi

使用MPI的拼写检查程序,mpi,spell-checking,Mpi,Spell Checking,所以，我的任务是编写一个拼写检查程序，然后使用openMPI将其并行化。我的方法是将文本文件中的单词加载到名为dict[]的数组中，并将其用作字典。接下来，我从用户那里获取输入，然后应该遍历字典数组，检查当前单词是否在阈值百分比内，如果在阈值百分比内，则打印出来。但我只需要打印出一定数量的单词。我的问题是，我的建议[]数组似乎没有按我需要的方式填充，它有很多空白点，然而，我想，至少，我写它的方式是，当一个单词在阈值内时填充它。所以在没有更多的单词被添加之前，它不应该有任何空格。我想它快完成了，但

所以，我的任务是编写一个拼写检查程序，然后使用openMPI将其并行化。我的方法是将文本文件中的单词加载到名为dict[]的数组中，并将其用作字典。接下来，我从用户那里获取输入，然后应该遍历字典数组，检查当前单词是否在阈值百分比内，如果在阈值百分比内，则打印出来。但我只需要打印出一定数量的单词。我的问题是，我的建议[]数组似乎没有按我需要的方式填充，它有很多空白点，然而，我想，至少，我写它的方式是，当一个单词在阈值内时填充它。所以在没有更多的单词被添加之前，它不应该有任何空格。我想它快完成了，但我似乎无法理解这一部分。感谢您的帮助

#include <stdio.h>
#include <mpi.h>
#include <string.h>
#include <stdlib.h>
#define SIZE 30
#define max(x,y) (((x) > (y)) ? (x) : (y))
char *dict[50000];
char *suggestions[50000];
char enterWord[50];
char *myWord;
int wordsToPrint = 20;
int threshold = 40;
int i;
int words_added = 0;


   int levenshtein(const char *word1, int len1, const char *word2, int len2){
      int matrix[len1 + 1][len2 + 1];
      int a;
      for(a=0; a<= len1; a++){
         matrix[a][0] = a;
      }
      for(a=0;a<=len2;a++){
         matrix[0][a] = a;
      }

      for(a = 1; a <= len1; a++){
         int j;
         char c1;

         c1 = word1[a-1];
         for(j = 1; j <= len2; j++){
            char c2;

            c2 = word2[j-1];
            if(c1 == c2){
               matrix[a][j] = matrix[a-1][j-1];
            }
            else{
               int delete, insert, substitute, minimum;

               delete = matrix[a-1][j] +1;
               insert = matrix[a][j-1] +1;
               substitute = matrix[a-1][j-1] +1;
               minimum = delete;

               if(insert < minimum){
                  minimum = insert;
               }
               if(substitute < minimum){
                  minimum = substitute;
               }
               matrix[a][j] = minimum;
            }//else
         }//for
      }//for
      return matrix[len1][len2];
   }//levenshtein

   void prompt(){
      printf("Enter word to search for: \n");
      scanf("%s", &enterWord);
   }


   int p0_compute_output(int num_processes, char *word1){
      int totalNumber = 0;
      int k = 0;
      int chunk = 50000 / num_processes;
      for(i = 0; i < chunk; i++){
         int minedits = levenshtein(word1, strlen(word1), dict[i], strlen(dict[i]));
         int thresholdPercentage = (100 * minedits) / max(strlen(word1), strlen(dict[i]));
         if(thresholdPercentage < threshold){
            suggestions[totalNumber] = dict[i];
            totalNumber = totalNumber + 1;
         }
      }//for
      return totalNumber;
   }//p0_compute_output

   void p0_receive_output(int next_addition){
      int num_to_add;
      MPI_Comm comm;
      MPI_Status status;
         MPI_Recv(&num_to_add,1,MPI_INT,MPI_ANY_SOURCE, MPI_ANY_TAG,MPI_COMM_WORLD, MPI_STATUS_IGNORE);
         printf("--%d\n", num_to_add);
         suggestions[next_addition] = dict[num_to_add];
         next_addition = next_addition + 1;
   }

   void compute_output(int num_processes, int me, char *word1){
      int chunk = 0;
      int last_chunk = 0;
      MPI_Comm comm;
      if(50000 % num_processes == 0){
         chunk = 50000 / num_processes;
         last_chunk = chunk;
         int start = me * chunk;
         int end = me * chunk + chunk;
         for(i = start; i < end;i++){
            int minedits = levenshtein(word1, strlen(word1), dict[i], strlen(dict[i]));
            int thresholdPercentage = (100 * minedits) / max(strlen(word1), strlen(dict[i]));
            if(thresholdPercentage < threshold){
               int number_to_send = i;
               MPI_Send(&number_to_send, 1, MPI_INT, 0, 1, MPI_COMM_WORLD);
            }
         }
      }
      else{
         chunk = 50000 / num_processes;
         last_chunk = 50000 - ((num_processes - 1) * chunk);
         if(me != num_processes){
            int start = me * chunk;
            int end = me * chunk + chunk;
            for(i = start; i < end; i++){
               int minedits = levenshtein(word1, strlen(word1), dict[i], strlen(dict[i]));
               int thresholdPercentage = (100 * minedits) / max(strlen(word1), strlen(dict[i]));
               if(thresholdPercentage < threshold){
                  int number_to_send = i;
                  MPI_Send(&number_to_send, 1, MPI_INT, 0, 1, MPI_COMM_WORLD);
               }//if
            }//for
         }//if me != num_processes
         else{
            int start = me * chunk;
            int end = 50000 - start;
            for(i = start; i < end; i++){
               int minedits = levenshtein(word1, strlen(word1), dict[i], strlen(dict[i]));
               int thresholdPercentage = (100 * minedits) / max(strlen(word1), strlen(dict[i]));
               if(thresholdPercentage < threshold){
                  int number_to_send = i;
                  MPI_Send(&number_to_send, 1, MPI_INT, 0, 1, MPI_COMM_WORLD);
               }
            }
         }//me == num_processes
      }//BIG else
      return;
   }//COMPUTE OUTPUT

   void set_data(){
      prompt();
      MPI_Bcast(&enterWord,20 ,MPI_CHAR, 0, MPI_COMM_WORLD);
   }//p0_send_inpui


//--------------------------MAIN-----------------------------//
main(int argc, char **argv){
   int ierr, num_procs, my_id, loop;
   FILE *myFile;
   loop = 0;

   for(i=0;i<50000;i++){
      suggestions[i] = calloc(SIZE, sizeof(char));
   }

   ierr = MPI_Init(NULL, NULL);
   ierr = MPI_Comm_rank(MPI_COMM_WORLD, &my_id);
   ierr = MPI_Comm_size(MPI_COMM_WORLD, &num_procs);
   printf("Check in from %d of %d processors\n", my_id, num_procs);

   set_data();
   myWord = enterWord;

   myFile = fopen("words", "r");
   if(myFile != NULL){
      for(i=0;i<50000;i++){
         dict[i] = calloc(SIZE, sizeof(char));
         fscanf(myFile, "%s", dict[i]);
      }//for
      fclose(myFile);
   }//read word list into dictionary
   else printf("File not found");

   if(my_id == 0){
      words_added = p0_compute_output(num_procs, enterWord);
      printf("words added so far: %d\n", words_added);
      p0_receive_output(words_added);
      printf("Threshold: %d\nWords To print: %d\n%s\n", threshold, wordsToPrint, myWord);
      ierr = MPI_Finalize();
   }
   else{
      printf("my word %s*\n", enterWord);
      compute_output(num_procs, my_id, enterWord);
     // printf("Process %d terminating...\n", my_id);
      ierr = MPI_Finalize();
   }

   for(i=0;i<wordsToPrint;i++){
      printf("*%s\n", suggestions[i]);
   }//print suggestions

   return (0);
}//END MAIN

#包括
#包括
#包括
#包括
#定义尺寸30
#定义最大值（x，y）（（（x）>（y））？（x）：（y））
char*dict[50000]；
char*建议[50000]；
字符输入[50]；
char*myWord；
int-wordsToPrint=20；
int阈值=40；
int i；
int words_added=0；
int levenshtein（常量字符*word1，int len1，常量字符*word2，int len2）{
int矩阵[len1+1][len2+1]；
INTA；
对于（a=0；a，以下是我看到的关于您所做工作的一些问题：

prompt（）只能由秩0调用
字典文件应仅由秩0读取，然后将数组广播到其他秩

或者，让秩1在秩0等待输入时读取文件，然后广播输入和字典

您使计算输出步骤过于复杂。您可以将p0\U计算输出和计算输出合并到一个例程中。

将索引数组存储到每个列的dict中
此数组在每个列中的大小不同，因此最简单的方法是从每个列中发送一个表示数组大小的整数，然后使用此大小发送数组。（接收列必须知道预期的数据量）.您也可以使用MPI_Gatherv的尺寸，但我想这比您现在想要的要多
一旦你有一个排名为0的索引数组，然后用它来填充建议

将MPI_Finalize调用保存到返回调用之前
对于最后的printf调用，应该只有秩0打印该值。我怀疑这是导致大部分“不正确”结果的原因。正如您所说，所有秩都是打印建议，但它只填充秩0。因此其他所有秩都将打印空白条目

尝试其中一些更改，尤其是最后一个更改，看看是否有帮助。
代码太多了。请提供一个与原始代码相同的问题的最小工作示例（MWE）。