如何在ANSI C中对字符串的结构数组使用快速排序_C_Arrays_Sorting_Struct_Quicksort

如何在ANSI C中对字符串的结构数组使用快速排序

c arrays sorting struct

如何在ANSI C中对字符串的结构数组使用快速排序,c,arrays,sorting,struct,quicksort,C,Arrays,Sorting,Struct,Quicksort,我有一个300万行的字符串结构。我正在尝试对文件进行如下排序： aaaaa aaaab aaacc 等等我想做泡泡运动。我用10行代码试了试，结果成功了，但当我试了整个300万行文件时，花了30多分钟，仍在处理中。我决定试试快速排序。然而，我遇到了一个问题，它说：应为“const char**”，但参数类型为“struct lines*” 我怎样才能解决这个问题？以下是我正在做的： #include<stdio.h> #include<string.h> #inclu

我有一个300万行的字符串结构。我正在尝试对文件进行如下排序：

aaaaa

aaaab

aaacc

等等

我想做泡泡运动。我用10行代码试了试，结果成功了，但当我试了整个300万行文件时，花了30多分钟，仍在处理中。我决定试试快速排序。然而，我遇到了一个问题，它说：

应为“const char**”，但参数类型为“struct lines*”

我怎样才能解决这个问题？以下是我正在做的：

#include<stdio.h>
#include<string.h>
#include <stdlib.h>
#include <math.h>
#include <stdbool.h>
#include <ctype.h>

void swap_str_ptrs(char const **arg1, char const **arg2)
{
    const char *tmp = *arg1;
    *arg1 = *arg2;
    *arg2 = tmp;
}

void quicksort_strs(char const *args[], unsigned int len)
{
    unsigned int i, pvt=0;

    if (len <= 1)
        return;

    // swap a randomly selected value to the last node
    swap_str_ptrs(args+((unsigned int)rand() % len), args+len-1);

    // reset the pivot index to zero, then scan
    for (i=0;i<len-1;++i)
    {
        if (strcmp(args[i], args[len-1]) < 0)
            swap_str_ptrs(args+i, args+pvt++);
    }

    // move the pivot value into its place
    swap_str_ptrs(args+pvt, args+len-1);

    // and invoke on the subsequences. does NOT include the pivot-slot
    quicksort_strs(args, pvt++);
    quicksort_strs(args+pvt, len - pvt);
}

void main()
{
    FILE *dnaFile=fopen("hs_alt_HuRef_chr2.fa", "r"); //file im reading
    typedef struct lines
    {
        char lines[100]; //size of each line
    } lines;
    int i = 0;

    char buf[256];
    static lines myDNA[3354419]; //creates the 3m spots for all lines
    while (fgets (buf, sizeof(buf), dnaFile))
    {
        if (i > 0)
            strcpy(myDNA[i].lines, buf); //inserting each line into the struct array

        i++;
    }

    // this is the bubblesort approach, works, but it takes too lon
    /**int a;
    int total;
    char temp[150];
    char report[100][150];

    for(a=0; a<3354419; a++)
    {
        for(total=a+1; total<=3354419; total++)
        {
            if(strcmp(myDNA[a].lines,myDNA[total].lines)>0)
            {
                strcpy(temp,myDNA[a].lines);
                strcpy(myDNA[a].lines,myDNA[total].lines);
                strcpy(myDNA[total].lines,temp);
            }
        }
    }*/

    quicksort_strs(myDNA, 3354419); //attempt at quicksort, which crashes

}

#包括
#包括
#包括
#包括
#包括
#包括
无效交换字符串（字符常量**arg1，字符常量**arg2）
{
常量字符*tmp=*arg1；
*arg1=*arg2；
*arg2=tmp；
}
无效快速排序（字符常量*参数[]，无符号整数长度）
{
无符号整数i，pvt=0；
如果（len我稍微修改了问题代码。从我的测试中，以下代码似乎可以根据需要运行（如问题所述）
#包括//printf（）、fprintf（）、fclose（）、feof（）、fgets（）、fopen（）
#包括//memset（）、strcmp（）、strdup（）
#包括//malloc（）、qsort（）、free（）
#包括//errno、ENOMEM、EIO
#定义最大文件行3354419
#定义最大行大小（255+1）
int compare_函数（常数void*a，常数void*b）
{
返回（strcmp（*（常量字符**）a，*（常量字符**）b））；
}
int main（int argC，char*argV[]）
{
int rCode=0；
char*filePath=“hs\u alt\u HuRef\u chr2.fa”；
FILE*dnaFile=NULL；
字符**myDNA=NULL；
int myDNAcnt=0；
整数指数；
/**允许用户在命令行上指定文件路径**/
如果（argC>1）
filePath=argV[1]；
/**分配一个数组（容纳300万行）**/
errno=0；
myDNA=malloc（最大文件行数*sizeof（*myDNA））；
if（NULL==myDNA）
{
rCode=errno？errno:ENOMEM；
fprintf（stderr，“malloc（）失败。错误号：%d\n”，错误号）；
去清理；
}
memset（myDNA，0，最大文件行数*sizeof（*myDNA））；
/**打开文件**/
errno=0；
dnaFile=fopen（文件路径，“r”）；
if（NULL==dnaFile）
{
rCode=errno；
fprintf（stderr，“fopen（）无法打开\%s\”。错误号：%d\n，“文件路径”，错误号）；
去清理；
}
/**将文件读入数组，为每行分配动态内存**/
对于（myDNAcnt=0；myDNAcnt
使用标准库确保标准qsort工作正常。向我们展示您使用它的代码。将最多256个字符（sizeof（buf）
）复制到100字节的字符数组（sizeof（lines）
）中可能是未定义的行为。此外，我建议将myDNA
作为二维字符数组：char myDNA[3354419][100]；
除非您有理由使用结构。如果文件中的行长度超过100（或200？）个字符，则使用字符myDNA[3354419][256]是有意义的
，因此每行有256个字节可用。这意味着必须在qsort
调用中更新相同的长度。另外，对于预处理器指令来说，这是一个非常好的用例：#定义行长度256
，因此如果要更改长度，只需在一个位置更改它。因为strcmp
使用strlen
，一行后是否有未使用的字节并不重要。但是，由于这只是放弃文件第一行的一种解决方法，因此只需执行一次fgets
操作，即可获得约200个字符，然后开始读取数组。毕竟，您只需开始写入myDNA[1]
，但是qsort
将期望myDNA[0]成为要排序的数据的一部分。转到“C”这是一个坏消息吗practise@delive，在C语言中，goto语句是处理错误条件的最佳方式，在状态机中非常流行。也许是谁告诉你goto是一种不好的做法误导了你；至少在这种情况下。我现在很困惑
#include<stdio.h>
#include<string.h>
#include <stdlib.h>
#include <math.h>
#include <stdbool.h>
#include <ctype.h>

int compare_function(const void *a,const void *b) {
return (strcmp((char *)a,(char *)b));
}

void main()
{
    FILE *dnaFile=fopen("hs_alt_HuRef_chr2.fa", "r"); //file with 3 million lines
    typedef struct lines
    {
        char lines[100];
    } lines;
    int i = 0;

    char buf[256];
    static lines myDNA[3354419]; // array holding the 3 million lines
    while (fgets (buf, sizeof(buf), dnaFile))
    {
        if (i > 0)
            strcpy(myDNA[i].lines, buf); //putting each line into array

        i++;
    }

    qsort(myDNA, 1000, 100, compare_function); //qsort works for first 1k lines, after, messed up

    int a;
    for (a = 0; a < 1000; a++){
    printf("%s", myDNA[a].lines); //printing lines
    }

}

#include <stdio.h>  // printf(), fprintf(), fclose(), feof(), fgets(), fopen()
#include <string.h> // memset(), strcmp(), strdup()
#include <stdlib.h> // malloc(), qsort(), free()
#include <errno.h>  // errno, ENOMEM, EIO

#define MAX_FILE_LINES 3354419
#define MAX_LINE_SIZE  (255+1)

int compare_function(const void *a, const void *b)
   {
   return(strcmp(*(const char **)a, *(const char **)b));
   }

int main(int argC, char *argV[])
   {
   int    rCode    = 0;
   char  *filePath = "hs_alt_HuRef_chr2.fa";
   FILE  *dnaFile  = NULL;
   char **myDNA    = NULL;
   int    myDNAcnt = 0;
   int    index;

   /** Allow user to specify the file path on the command-line. **/
   if(argC > 1)
      filePath=argV[1];

   /** Allocate an array (to hold the 3 million lines). **/
   errno=0;
   myDNA=malloc(MAX_FILE_LINES * sizeof(*myDNA));
   if(NULL == myDNA)
      {
      rCode=errno?errno:ENOMEM;
      fprintf(stderr, "malloc() failed. errno:%d\n", errno);
      goto CLEANUP;
      }
   memset(myDNA, 0, MAX_FILE_LINES * sizeof(*myDNA));

   /** Open the file. **/
   errno=0;
   dnaFile=fopen(filePath, "r");
   if(NULL == dnaFile)
      {
      rCode=errno;
      fprintf(stderr, "fopen() failed to open \"%s\". errno:%d\n", filePath, errno);
      goto CLEANUP;
      }

   /** Read the file into the array, allocating dynamic memory for each line. **/
   for(myDNAcnt=0; myDNAcnt < MAX_FILE_LINES; ++myDNAcnt)
      {
      char buf[MAX_LINE_SIZE];
      char *cp;

      if(NULL == fgets(buf, sizeof(buf), dnaFile))
         {
         if(feof(dnaFile))
            break;

         rCode=EIO;
         fprintf(stderr, "fgets() failed.\n");
         goto CLEANUP;
         }

      cp=strchr(buf, '\n');
      if(cp)
         *cp='\0';

      errno=0;
      myDNA[myDNAcnt] = strdup(buf);
      if(NULL == myDNA[myDNAcnt])
         {
         rCode=errno;
         fprintf(stderr, "strdup() failed. errno:%d\n", errno);
         goto CLEANUP;
         }
      }

   /** Sort the array. **/
   qsort(myDNA, myDNAcnt, sizeof(*myDNA), compare_function);

   /** Print the resulting sorted array. **/
   for(index=0; index < myDNAcnt; index++)
      {
      printf("%8d: %s\n",index,  myDNA[index]); //printing lines
      }

CLEANUP:

   /** Close the file. **/
   if(dnaFile)
      fclose(dnaFile);

   /** Free the array. **/
   if(myDNA)
      {
      for(index=0; index < myDNAcnt; index++)
         {
         free(myDNA[index]);
         }

      free(myDNA);
      }

   return(rCode);
   }