Google bigquery 如何从函数内部调用函数?

Google bigquery 如何从函数内部调用函数?,google-bigquery,Google Bigquery,我试图定义一个递归函数(查找两个字符串之间的Levenshtein距离)。我定义了一个数据集,方法是在函数名前加上它的名称 作为一种可能的修复方法,我尝试从递归调用中删除了SQUAD_V11前缀 CREATE OR REPLACE FUNCTION SQUAD_V11.levenshteinDistance(s STRING,t STRING) RETURNS INT64 AS ( /* function levenshteinDistance (s, t) { */ /* if (!s

我试图定义一个递归函数(查找两个字符串之间的Levenshtein距离)。我定义了一个数据集,方法是在函数名前加上它的名称

作为一种可能的修复方法,我尝试从递归调用中删除了SQUAD_V11前缀

CREATE OR REPLACE FUNCTION SQUAD_V11.levenshteinDistance(s STRING,t STRING)
RETURNS INT64
AS
(
/* function levenshteinDistance (s, t) { */
    /* if (!s.length) return t.length*/
    CASE
      WHEN (LENGTH(s) = 0) THEN LENGTH(t)
    /* if (!t.length) return s.length; */
      WHEN (LENGTH(t) = 0) THEN LENGTH(s)
      /* return Math.min(*/
      ELSE
        (SELECT MIN(distances) FROM UNNEST(
      /*    levenshteinDistance(s.substr(1), t) + 1,*/
      [SQUAD_V11.levenshteinDistance(SUBSTR(s,1),t) + 1,
      /*    levenshteinDistance(t.substr(1), s) + 1,*/
      SQUAD_V11.levenshteinDistance(SUBSTR(t,1),s) + 1,
      /*    levenshteinDistance(s.substr(1), t.substr(1)) + (s[0] !== t[0] ? 1 : 0)*/
      SQUAD_V11.levenshteinDistance(SUBSTR(s,1),SUBSTR(t,1)) + (SUBSTR(s,0,1) != SUBSTR(t,0,1))]))
      /* ) + 1;*/
     END
/* }*/
);

我需要能够递归调用我的函数。我应该能够从函数调用函数;从逻辑上讲,这不需要与从SELECT语句调用它不同的语法。然而,当我尝试这样做时,我得到了“Error:Function not found:levenshteinDistance at[15:8]”。

不幸的是,BigQuery不支持递归UDF调用!但您可以在一个JSUDF(注意:不是SQLUDF)中实现整个递归逻辑,然后从SQL调用它

下面是此类实施的示例

#standardSQL
CREATE OR REPLACE FUNCTION SQUAD_V11.levenshteinDistance(s STRING, t STRING)
RETURNS INT64
LANGUAGE js AS """
  var _extend = function(dst) {
    var sources = Array.prototype.slice.call(arguments, 1);
    for (var i=0; i<sources.length; ++i) {
      var src = sources[i];
      for (var p in src) {
        if (src.hasOwnProperty(p)) dst[p] = src[p];
      }
    }
    return dst;
  };
  var Levenshtein = {
    /**
     * Calculate levenshtein distance of the two strings.
     *
     * @param str1 String the first string.
     * @param str2 String the second string.
     * @return Integer the levenshtein distance (0 and above).
     */
    get: function(str1, str2) {
      // base cases
      if (str1 === str2) return 0;
      if (str1.length === 0) return str2.length;
      if (str2.length === 0) return str1.length;
      // two rows
      var prevRow  = new Array(str2.length + 1),
          curCol, nextCol, i, j, tmp;
      // initialise previous row
      for (i=0; i<prevRow.length; ++i) {
        prevRow[i] = i;
      }
      // calculate current row distance from previous row
      for (i=0; i<str1.length; ++i) {
        nextCol = i + 1;
        for (j=0; j<str2.length; ++j) {
          curCol = nextCol;
          // substution
          nextCol = prevRow[j] + ( (str1.charAt(i) === str2.charAt(j)) ? 0 : 1 );
          // insertion
          tmp = curCol + 1;
          if (nextCol > tmp) {
            nextCol = tmp;
          }
          // deletion
          tmp = prevRow[j + 1] + 1;
          if (nextCol > tmp) {
            nextCol = tmp;
          }
          // copy current col value into previous (in preparation for next iteration)
          prevRow[j] = curCol;
        }
        // copy last col value into previous (in preparation for next iteration)
        prevRow[j] = nextCol;
      }
      return nextCol;
    }
  };
  var the_s;
  try {
    the_s = decodeURI(s).toLowerCase();
  } catch (ex) {
    the_s = s.toLowerCase();
  }
  try {
    the_t = decodeURI(t).toLowerCase();
  } catch (ex) {
    the_t = t.toLowerCase();
  }
  return Levenshtein.get(the_s, the_t) 
""";   
结果将是

Row string1 string2 changes  
1   mikhail mike    4    
2   jon     john    1    

不幸的是,BigQuery不支持递归UDF调用!但您可以在一个JSUDF(注意:不是SQLUDF)中实现整个递归逻辑,然后从SQL调用它

下面是此类实施的示例

#standardSQL
CREATE OR REPLACE FUNCTION SQUAD_V11.levenshteinDistance(s STRING, t STRING)
RETURNS INT64
LANGUAGE js AS """
  var _extend = function(dst) {
    var sources = Array.prototype.slice.call(arguments, 1);
    for (var i=0; i<sources.length; ++i) {
      var src = sources[i];
      for (var p in src) {
        if (src.hasOwnProperty(p)) dst[p] = src[p];
      }
    }
    return dst;
  };
  var Levenshtein = {
    /**
     * Calculate levenshtein distance of the two strings.
     *
     * @param str1 String the first string.
     * @param str2 String the second string.
     * @return Integer the levenshtein distance (0 and above).
     */
    get: function(str1, str2) {
      // base cases
      if (str1 === str2) return 0;
      if (str1.length === 0) return str2.length;
      if (str2.length === 0) return str1.length;
      // two rows
      var prevRow  = new Array(str2.length + 1),
          curCol, nextCol, i, j, tmp;
      // initialise previous row
      for (i=0; i<prevRow.length; ++i) {
        prevRow[i] = i;
      }
      // calculate current row distance from previous row
      for (i=0; i<str1.length; ++i) {
        nextCol = i + 1;
        for (j=0; j<str2.length; ++j) {
          curCol = nextCol;
          // substution
          nextCol = prevRow[j] + ( (str1.charAt(i) === str2.charAt(j)) ? 0 : 1 );
          // insertion
          tmp = curCol + 1;
          if (nextCol > tmp) {
            nextCol = tmp;
          }
          // deletion
          tmp = prevRow[j + 1] + 1;
          if (nextCol > tmp) {
            nextCol = tmp;
          }
          // copy current col value into previous (in preparation for next iteration)
          prevRow[j] = curCol;
        }
        // copy last col value into previous (in preparation for next iteration)
        prevRow[j] = nextCol;
      }
      return nextCol;
    }
  };
  var the_s;
  try {
    the_s = decodeURI(s).toLowerCase();
  } catch (ex) {
    the_s = s.toLowerCase();
  }
  try {
    the_t = decodeURI(t).toLowerCase();
  } catch (ex) {
    the_t = t.toLowerCase();
  }
  return Levenshtein.get(the_s, the_t) 
""";   
结果将是

Row string1 string2 changes  
1   mikhail mike    4    
2   jon     john    1