Frama-C字谜函数行为验证_C_Static Analysis_Anagram_Frama C

Frama-C字谜函数行为验证

Frama-C字谜函数行为验证,c,static-analysis,anagram,frama-c,C,Static Analysis,Anagram,Frama C,我编写了一个C函数，用于检查两个给定字符串（C样式）是否为字谜。我试图用Frama-C验证它，但它无法验证函数的最终行为（其他规范是有效的）。第一个是超时（即使WP中的超时值非常高），第二个是未知的代码如下： #include <string.h> //@ ghost char alphabet[26] = {'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q

我编写了一个C函数，用于检查两个给定字符串（C样式）是否为字谜。我试图用Frama-C验证它，但它无法验证函数的最终行为（其他规范是有效的）。第一个是超时（即使WP中的超时值非常高），第二个是未知的

代码如下：

    #include <string.h>
//@ ghost char alphabet[26] = {'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z'};

/*@
    // Takes a character and return it to lowercase if it's uppercase
    axiomatic ToLower
    {
        logic char to_lower(char c);

        axiom lowercase:
            \forall char c; 97 <= c <= 122 ==> to_lower(c) == c;

        axiom uppercase:
            \forall char c; 65 <= c <= 90 ==> to_lower(c) == to_lower((char) (c+32));
    }
*/
/*@
    // Count the occurences of character 'c' into 'string' that is long 'n' characters
    axiomatic CountChar
    {
        logic integer count_char(char* string, integer n, char c);

        axiom count_zero:
            \forall char* string, integer n, char c; n <= 0 ==>
            count_char(string, n, c) == 0;

        axiom count_hit:
            \forall char* string, integer n, char c; n >= 0 && to_lower(string[n]) == c ==>
            count_char(string, n+1, c) == count_char(string, n, c) + 1;

        axiom count_miss:
            \forall char* string, integer n, char c; n >= 0 && to_lower(string[n]) != c ==>
            count_char(string, n+1, c) == count_char(string, n, c);
    }
*/

/*@
    predicate are_anagrams{L}(char* s1, char* s2) = ( \forall integer i; 0 <= i < 26 ==> 
    count_char(s1, strlen(s1), alphabet[i]) == count_char(s2, strlen(s2), alphabet[i]) );
*/

/*@
    requires valid_string(a);
    requires valid_string(b);

    // Requires that strings 'a' and 'b' are composed only by alphabet's letters and that are long equally.
    requires \forall integer k; 0 <= k < strlen(a) ==> 65 <= a[k] <= 90 || 97 <= a[k] <= 122;
    requires \forall integer k; 0 <= k < strlen(b) ==> 65 <= b[k] <= 90 || 97 <= b[k] <= 122;
    requires strlen(a) == strlen(b);

    ensures 0 <= \result <= 1;
    assigns \nothing;

    behavior anagrams:
    assumes are_anagrams(a, b);
    ensures \result == 1;
    behavior not_anagrams:
    assumes !are_anagrams(a, b);
    ensures \result == 0;
    complete behaviors anagrams, not_anagrams;
    disjoint behaviors anagrams, not_anagrams;
*/
int check_anagram(const char a[], const char b[])
{
   // Create two arrays and initialize them to zero
   int first[26];
   int second[26];
   int c;
   /*@
    loop assigns first[0..(c-1)];
    loop assigns second[0..(c-1)];
    loop assigns c; 
    loop invariant 0 <= c <= 26;
    loop invariant \forall integer k; 0 <= k < c ==> second[k] == first[k];
    loop invariant \forall integer k; 0 <= k < c ==> first[k] == 0 && second[k] == 0;
    loop invariant \valid(first+(0..25)) && \valid(second+(0..25));
    loop variant 26-c;
   */
   for(c = 0; c < 26; c++)
   {
      first[c] = 0;
      second[c] = 0;
   }

   char tmp = 'a';
   c = 0;

   // Now increment the array position related to position of character occured in the alphabet, subtracting ASCII decimal value of character from the character.
   /*@
    loop assigns first[0..25];
    loop assigns tmp;
    loop assigns c;
    loop invariant 97 <= tmp <= 122;
    loop invariant \valid(first+(0..25));
    loop invariant strlen(\at(a, Pre)) == strlen(\at(a, Here));
    loop invariant 0 <= c <= strlen(a);
    loop variant strlen(a)-c;
   */
   while (a[c] != '\0')
   {
      // This is a little trick to lowercase if the char is uppercase.
      tmp = (a[c] > 64 && a[c] < 91) ? a[c]+32 : a[c];
      first[tmp-97]++;
      c++;
   }


   c = 0;
   // Doing the same thing on second string.
   /*@
    loop assigns second[0..25];
    loop assigns tmp;
    loop assigns c;
    loop invariant 97 <= tmp <= 122;
    loop invariant \valid(second+(0..25));
    loop invariant strlen(\at(b, Pre)) == strlen(\at(b, Here));
    loop invariant 0 <= c <= strlen(b);
    loop variant strlen(b)-c;
   */
   while (b[c] != '\0')
   {
      tmp = (b[c] > 64 && b[c] < 91) ? b[c]+32 : b[c];
      second[tmp-'a']++;
      c++;
   }

   // And now compare the arrays containing the number of occurences to determine if strings are anagrams or not.
   /*@
    loop invariant strlen(\at(a, Pre)) == strlen(\at(a, Here));
    loop invariant strlen(\at(b, Pre)) == strlen(\at(b, Here));
    loop invariant 0 <= c <= 26;
    loop assigns c;
    loop variant 26-c;
   */
   for (c = 0; c < 26; c++)
   {
      if (first[c] != second[c])
         return 0;
   }

   return 1;
}

#包括
//@鬼字符字母[26]={'a'，'b'，'c'，'d'，'e'，'f'，'g'，'h'，'i'，'j'，'k'，'l'，'m'，'n'，'o'，'p'，'q'，'r'，'s'，'t'，'u'，'v'，'w'，'x'，'y'，'z'}；
/*@
//接受一个字符并将其返回为小写（如果是大写）
公理托洛尔
{
逻辑字符到较低的字符（字符c）；
axiom小写字母：
\所有字符c；97
count_char（string，n+1，c）=count_char（string，n，c）+1；
axiom计数未命中：
\对于所有字符*字符串，整数n，字符c；n>=0&&to_更低（字符串[n]）！=c==>
count_char（string，n+1，c）=count_char（string，n，c）；
}
*/
/*@
谓词是字符{L}（char*s1，char*s2）=（\对于所有整数i；0
计数字符（s1，strlen（s1），字母[i]）==计数字符（s2，strlen（s2），字母[i]）；
*/
/*@
需要有效的_字符串（a）；
需要有效的_字符串（b）；
//要求字符串“a”和“b”仅由字母组成，且长度相等。
需要\forall integer k；0 65我不是静态分析方面的专家，但我怀疑一些静态分析引擎可能会遇到一些问题，例如（a[c]>64&&a[c]<91）
，a[c]+32
，首先[tmp-97]
以及您在这里使用的其他ASCII特定代码
记住，C不需要ASCII字符集；据我们所知，您可以尝试在EBCDIC是字符集的情况下运行此程序，在这种情况下，我预计可能会出现缓冲区溢出，具体取决于输入
您应该使用查找表（或某种字典）将每个字符转换为整数索引，并使用toupper
和tolower
等函数转换无符号字符
值（请注意无符号字符
的重要性）可移植。
乍一看，您的规范似乎是正确的（但这又是一个非常复杂的规范。我从未编写过如此复杂的ACSL，可能会遗漏一些东西）
然而，函数check_anagram
中的注释显然不足以解释为什么该函数应该尊重契约。特别是，考虑while循环。为了深入了解函数的工作原理，每个循环的不变量应该表示在任何迭代中，数组分别first
和second
包含到目前为止访问的第一个和第二个字符串的字符计数
这就是为什么在每个循环的末尾，这些数组包含整个字符串的字符计数
表达这些不变量将真正显示函数是如何工作的。如果没有他们，就没有希望达成合同得以实施的结论。
我以前曾试图删除CAP处理代码/特定代码。“不努力。”我明白了。请注意，向我们展示充满神奇数字的代码大致相当于邀请我们进入一个黑暗的房子，里面有不合适位置的肮脏的电灯开关。为了提高可读性，我在源代码中添加了一些注释。谢谢。评论不需要解释发生了什么。它们对于解释为什么会发生某些事情更有用。实际上，在实践中似乎有效的是（d）选择一致的实现定义参数，并针对该目标进行分析。这就是Frama-C所做的，它选择了ASCII和（除非您使用命令行选项来更改此选项）32位2的补码int
。我不知道Frama，但是循环不变量0@TripeHound对于每次迭代，不变量必须为真，包括因为条件变为假而退出循环的迭代。对于For循环For（c=0；c<26；c++）…
，不变量0≤ C≤ 26
是正确的。循环退出时保证的属性为“不变且非条件”，在本例中为“0”≤ C≤ 26，而不是c<26”，从中我们可以机械地推断，当循环退出时，c保持26（但这是为了得出函数工作的结论，应该推断出循环做了什么的最小值）。@TripeHoundloop首先赋值[0..（c-1）]
也是正确的：在任何迭代开始时，循环只分配了数组first
的元素0到c-1：@PascalCuoq谢谢。。。这（几乎）是有道理的。根据您的建议，我尝试向while循环添加一个新的规范。新规范是所有整数i的循环不变量；正如Pascal所说，您至少需要在第二个循环中的second
中这样做。此外，第三个循环还需要至少另一个不变量，即first
和second
具有类似的元素，最多为c-1
。