C 降低单词搜索程序的时间复杂度_C_Time Complexity

C 降低单词搜索程序的时间复杂度
c time-complexity
C 降低单词搜索程序的时间复杂度,c,time-complexity,C,Time Complexity,我在试着查找课文中最常用的单词。在我的程序中，判断系统（PC^2）将输入一些单词、分隔符和一些文本。单词和文本之间用“----”隔开（我需要搜索程序中最常用的单词）输入输出不是数字、字母表和“_”的字符应视为空格每行的最长长度为1024 输出顺序应为strcmp的顺序然而，尽管程序在我测试时运行得很好。我从PC^2收到超过时间限制（TLE）。因此，我想问，如何提高程序的时间复杂度？在显示最频繁的单词及其频率之前，我甚至得到了TLE #include <stdio.h>
我在试着查找课文中最常用的单词。在我的程序中，判断系统（PC^2）将输入一些单词、分隔符和一些文本。单词和文本之间用“----”隔开（我需要搜索程序中最常用的单词）
输入输出
不是数字、字母表和“_”的字符应视为空格
每行的最长长度为1024
输出顺序应为strcmp的
```
顺序
```

然而，尽管程序在我测试时运行得很好。我从PC^2收到超过时间限制（TLE）。因此，我想问，如何提高程序的时间复杂度？在显示最频繁的单词及其频率之前，我甚至得到了TLE
#include <stdio.h>
#include <stdlib.h>
#include <string.h>

char tmp[2048];
char **dicWord;
int *dicWcount;
int dic_assume_num = 1000, dic_actual_num = 0;

int main()
{
    char divider[6] = "-----";
    ssize_t bytes_read;
    size_t nbytes = 2047;
    char *my_string;

    // first, get the input of words
    dicWord = malloc( dic_assume_num * sizeof( *dicWord ));
    my_string = malloc((nbytes + 1) * sizeof(*my_string));

    while ((bytes_read = getline (&my_string, &nbytes, stdin)) != -1) {
        my_string[strcspn(my_string, "\n")] = '\0';

        if (strncmp(my_string, divider, 5) == 0) {
            dicWcount = calloc(dic_actual_num+1,  sizeof(*dicWcount));
            break;
        }

        else {
            dicWord[dic_actual_num++] = strdup(my_string);
            if (dic_actual_num >= dic_assume_num) {
                dic_assume_num *= 2;
                dicWord = realloc( dicWord, dic_assume_num * sizeof( *dicWord ));
            }
        }
    }

    // second, get the input of text
    int cnt;
    for (;;) {
        scanf("%*[^a-zA-Z0-9_]");
        cnt = scanf("%2047[a-zA-Z0-9_]", tmp);

        if (cnt != 1) {
            break;
        }


        for ( size_t i = 0; i < dic_actual_num; ++i) {

            if (strcmp(dicWord[i], tmp) == 0) {
                dicWcount[i]++;
                break;
            }
        }

    }

    for ( size_t i = 0; i < dic_actual_num; ++i)
        free(dicWord[i]);

    free(dicWord);
    free(dicWcount);
    free(my_string);

    return 0;
}

#包括
#包括
#包括
char-tmp[2048]；
字符**双字；
整数*数字计数；
int dic_假设数量=1000，dic_实际数量=0；
int main（）
{
字符分隔符[6]=“----”；
ssize_t字节_u u读取；
大小字节=2047；
char*my_字符串；
//首先，获取单词的输入
dicWord=malloc（dic_假设数量*sizeof（*dicWord））；
my_字符串=malloc（（n字节+1）*sizeof（*my_字符串））；
while（（bytes\u read=getline（&my\u string，&nbytes，stdin））！=-1）{
my_string[strcspn（my_string，“\n”）]='\0'；
if（strncmp（my_字符串，除法器，5）==0）{
dicWcount=calloc（dic_实际_num+1，sizeof（*dicWcount））；
打破
}
否则{
dicWord[dic_-actual_-num++]=strdup（我的字符串）；
如果（dic_实际数>=dic_假设数）{
dic_假设_num*=2；
dicWord=realloc（dicWord，dic假设数量*sizeof（*dicWord））；
}
}
}
//第二，获取文本的输入
int-cnt；
对于（；；）{
scanf（“%*[^a-zA-Z0-9]”；
cnt=scanf（“%2047[a-zA-Z0-9_]”，tmp）；
如果（cnt！=1）{
打破
}
对于（大小i=0；i
想法很简单：

迭代文本/文件：O（n）
将当前单词添加到已看到单词的树/哈希表/映射中

或者增加此单词的计数器：n*log（n）
迭代文件后：获取计数器最高的单词：O（1）

最影响所需时间的是用于存储单词的数据结构。一个平衡的树（以最高计数器为根的单词）应该是好的
我认为不可能再减少所需的时间。
以下代码：
干净地编译
速度相当快
执行所需的操作，但对输出进行排序除外
现在建议的守则是：
#include <stdio.h>
#include <stdlib.h>
#include <string.h>

struct myData
{
    char   word[1026];
    size_t count;
};


struct myData dicWord[ 1000 ];  // may need to increase the 1000


int main( void )
{
    size_t dic_actual_num = 0;

    // first, get the input of words
    while ( fgets(dicWord[ dic_actual_num ].word, 1026, stdin) )
    {
        dicWord[ dic_actual_num ].word[ strcspn( dicWord[ dic_actual_num ].word, "\n") ] = '\0';

        if (strncmp( dicWord[ dic_actual_num ].word, "-----", 5 ) == 0)
        {
             break;
        }

        dic_actual_num++;
    }

    // second, get the input of text and check for highest count
    char tmp[1026];  // +2 for newline and NUL byte
    size_t maxCount = 0;

    while( 1 == scanf( "%*[^a-zA-Z0-9_] %1024[a-zA-Z0-9_]", tmp ) )
    {
        for ( size_t i = 0; i < dic_actual_num; ++i)
        {
            if (strcmp(dicWord[i].word, tmp) == 0)
            {
                dicWord[i].count++;
                if( dicWord[i].count > maxCount )
                {
                    maxCount = dicWord[i].count;
                }
                break;
            }
        }
    }

    // << need to add logic for `strcmp()` sorting


    // display answer
    for( size_t i = 0; i<dic_actual_num; i++ )
    {
        if( maxCount == dicWord[i].count )
        {
            printf( "%s %lu\n", dicWord[i].word, maxCount );
        }
    }

    return 0;
}

#包括
#包括
#包括
结构myData
{
字符字[1026]；
大小/数量；
};
struct myData dicWord[1000]；//可能需要增加1000
内部主（空）
{
大小dic实际数量=0；
//首先，获取单词的输入
while（fgets（dicWord[dic_-actual_-num].word，1026，stdin））
{
dicWord[dic\U actual\U num].word[strcspn（dicWord[dic\U actual\U num].word，“\n”）]='\0'；
if（strncmp（dicWord[dic_实际值].word，“----”，5）=0）
{
打破
}
dic_实际_num++；
}
//第二，获取文本输入并检查最高计数
字符tmp[1026]；//+2表示换行符和NUL字节
大小\u t最大计数=0；
而（1==scanf（“%*[^a-zA-Z0-9\]%1024[a-zA-Z0-9\]”，tmp））
{
对于（大小i=0；imaxCount）
{
maxCount=dicWord[i]。计数；
}
打破
}
}
}
//“可能不是无限循环。相反，我的程序可能会运行超过预期的时间。”--您应该确保这一点，例如通过调试。如果您的程序有错误的行为，问题可能属于此处，但请确保创建一个（包括输入、预期和实际输出）。另一方面，如果程序确实正常工作，您可能需要查看codereview.stackexchange。您说您已经问过这个问题。您链接到的问题似乎不太相似。但是，已通过引用重复的问题来回答。请详细说明这三个问题之间的区别问题是什么？这个问题中的什么问题没有被其他两个回答。如果这是一个新问题（看起来），然后我建议删除对不相关问题的令人困惑的引用，或者更准确地引用你想将此问题与之对比的另一个问题的一部分。@Felix该程序在我测试时效果很好。但它在评判系统上失败（PC^2）@Yunnosch谢谢你的建议。我已经修改了它。发布的代码不包含任何“排序”算法。发布的代码不包含任何结果值的显示或所选单词。除了scanf
，还有其他或更好的方法来清除无效字符吗？更好的方法，而不是使用scanf（）
是编写一个小函数来输入每个单词，这样一个函数将使用getchar\u unlocked（）
它将累加每个单词，然后返回指向该单词的指针。这样一个函数将比调用第一个（ch=getchar\u unlocked）要快得多！=EOF在while循环中会一直为真，直到输入结束？@linpoxien，好吧，I di
#include <stdio.h>
#include <stdlib.h>
#include <string.h>

char tmp[2048];
char **dicWord;
int *dicWcount;
int dic_assume_num = 1000, dic_actual_num = 0;

int main()
{
    char divider[6] = "-----";
    ssize_t bytes_read;
    size_t nbytes = 2047;
    char *my_string;

    // first, get the input of words
    dicWord = malloc( dic_assume_num * sizeof( *dicWord ));
    my_string = malloc((nbytes + 1) * sizeof(*my_string));

    while ((bytes_read = getline (&my_string, &nbytes, stdin)) != -1) {
        my_string[strcspn(my_string, "\n")] = '\0';

        if (strncmp(my_string, divider, 5) == 0) {
            dicWcount = calloc(dic_actual_num+1,  sizeof(*dicWcount));
            break;
        }

        else {
            dicWord[dic_actual_num++] = strdup(my_string);
            if (dic_actual_num >= dic_assume_num) {
                dic_assume_num *= 2;
                dicWord = realloc( dicWord, dic_assume_num * sizeof( *dicWord ));
            }
        }
    }

    // second, get the input of text
    int cnt;
    for (;;) {
        scanf("%*[^a-zA-Z0-9_]");
        cnt = scanf("%2047[a-zA-Z0-9_]", tmp);

        if (cnt != 1) {
            break;
        }


        for ( size_t i = 0; i < dic_actual_num; ++i) {

            if (strcmp(dicWord[i], tmp) == 0) {
                dicWcount[i]++;
                break;
            }
        }

    }

    for ( size_t i = 0; i < dic_actual_num; ++i)
        free(dicWord[i]);

    free(dicWord);
    free(dicWcount);
    free(my_string);

    return 0;
}

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

struct myData
{
    char   word[1026];
    size_t count;
};


struct myData dicWord[ 1000 ];  // may need to increase the 1000


int main( void )
{
    size_t dic_actual_num = 0;

    // first, get the input of words
    while ( fgets(dicWord[ dic_actual_num ].word, 1026, stdin) )
    {
        dicWord[ dic_actual_num ].word[ strcspn( dicWord[ dic_actual_num ].word, "\n") ] = '\0';

        if (strncmp( dicWord[ dic_actual_num ].word, "-----", 5 ) == 0)
        {
             break;
        }

        dic_actual_num++;
    }

    // second, get the input of text and check for highest count
    char tmp[1026];  // +2 for newline and NUL byte
    size_t maxCount = 0;

    while( 1 == scanf( "%*[^a-zA-Z0-9_] %1024[a-zA-Z0-9_]", tmp ) )
    {
        for ( size_t i = 0; i < dic_actual_num; ++i)
        {
            if (strcmp(dicWord[i].word, tmp) == 0)
            {
                dicWord[i].count++;
                if( dicWord[i].count > maxCount )
                {
                    maxCount = dicWord[i].count;
                }
                break;
            }
        }
    }

    // << need to add logic for `strcmp()` sorting


    // display answer
    for( size_t i = 0; i<dic_actual_num; i++ )
    {
        if( maxCount == dicWord[i].count )
        {
            printf( "%s %lu\n", dicWord[i].word, maxCount );
        }
    }

    return 0;
}

#include <ctype.h>
#include <stdio.h>

// prototype:
char *getWord( void );


char *getWord()
{
    static char newWord[1026];

    //memset( newWord, '\0', sizeof( newWord ) );

    size_t charCount = 0;
    int  ch;

    // skip leading junk
    while( (ch = getchar_unlocked) != EOF 
         || '\n' == ch 
         || ispunct( ch ) 
         || isspace( ch ) );

    // append desired characters
    while( charCount < 1024
        && (ch = getchar_unlocked) != EOF 
        && ( '_' == ch || isdigit( ch ) || isalpha( ch ) ) )
    {
        newWord[ charCount ] = ch;
        charCount++;
    }

    // terminate the string
    newWord[ charCount ] = '\0';

    return newWord;
}