在UNIX中查找包含字符的所有单词

在UNIX中查找包含字符的所有单词,unix,shell,Unix,Shell,给定一个单词W,我想从/usr/dict/words中查找包含W中字母的所有单词。 例如,“bat”应该返回“bat”和“tab”(而不是“table”) 这里有一个解决方案,涉及对输入单词进行排序和匹配: word=$1 sortedWord=`echo $word | grep -o . | sort | tr -d '\n'` while read line do sortedLine=`echo $line | grep -o . | sort | tr -d '\n'`

给定一个单词W,我想从/usr/dict/words中查找包含W中字母的所有单词。 例如,“bat”应该返回“bat”和“tab”(而不是“table”)

这里有一个解决方案,涉及对输入单词进行排序和匹配:

word=$1
sortedWord=`echo $word | grep -o . | sort | tr -d '\n'`

while read line
do
    sortedLine=`echo $line | grep -o . | sort | tr -d '\n'`
    if [ "$sortedWord" == "$sortedLine" ]
    then
        echo $line
    fi
done < /usr/dict/words
word=$1
sortedWord=`echo$word | grep-o.|排序| tr-d'\n'`
读行时
做
sortedLine=`echo$line | grep-o.|排序| tr-d'\n'`
如果[“$sortedWord”==“$sortedLine”]
然后
回音$线
fi
完成
有更好的办法吗?我更喜欢使用基本命令(而不是perl/awk等),但欢迎使用所有解决方案


为了澄清,我想找到原始单词的所有排列。不允许添加或删除字符。

这里是一个awk实现。它在“W”中找到带有这些字母的单词


这里有一个shell解决方案。最好的算法似乎是#4。它会过滤掉所有长度不正确的单词。然后,它使用一个简单的替换密码(a=1,b=2,a=27,…)对单词求和。如果和匹配,那么它将实际执行原始排序和比较。 在我的系统中,它可以在不到1/2秒内翻滚约235k个单词来寻找“蝙蝠”。 我提供了我所有的解决方案,因此您可以看到不同的方法

更新:未显示,但我也尝试将总和放入我尝试的直方图方法的第一个箱子中,但它甚至比没有直方图的直方图还要慢。我原以为它会起短路的作用,但它不起作用

更新2:我尝试了awk解决方案,它的运行时间大约是我最好的shell解决方案的1/3,即~0.126秒,而不是~0.490秒。perl解决方案运行~1.1s

#!/bin/bash

word=$1
#dict=words
dict=/usr/share/dict/words
#dict=/usr/dict/words

alg1() {
  sortedWord=`echo $word | grep -o . | sort | tr -d '\n'`

  while read line
  do
    sortedLine=`echo $line | grep -o . | sort | tr -d '\n'`
    if [ "$sortedWord" == "$sortedLine" ]
    then
      echo $line
    fi
  done < $dict
}

check_sorted_versus_not() {
    local word=$1
    local line=`echo $2 | grep -o . | sort | tr -d '\n'`
    if [ "$word" == "$line" ]
    then
        echo $2
    fi
}

# Filter out all words of incorrect length
alg2() {
  sortedWord=`echo $word | grep -o . | sort | tr -d '\n'`
  grep_string="^`echo -n $word | tr 'a-zA-Z' '.'`\$"

  grep "$grep_string" "$dict" | \
  while read line
  do
    sortedLine=`echo $line | grep -o . | sort | tr -d '\n'`
    if [ "$sortedWord" == "$sortedLine" ]
    then
      echo $line
    fi
  done
}


# Create a lot of variables like this:
# _a=1, _b=2, ... _z=26, _A=27, _B=28, ... _Z=52
gen_chars() {
#  [ -n "$GEN_CHARS" ] && return
  GEN_CHARS=1
  local alpha="abcdefghijklmnopqrstuvwxyz"
  local upperalpha=`echo -n $alpha | tr 'a-z' 'A-Z'`
  local both="$alpha$upperalpha"
  for ((i=0; i < ${#both}; i++))
  do
    ACHAR=${both:i:1}
    eval "_$ACHAR=$((i+1))"
  done
}

# I think it's faster to return the value in a var then to echo it in a sub process.
# Try summing the word one char at a time by building an arithmetic expression
# and then evaluate that expression.
# Requires: gen_chars
sum_word() {
  SUM=0
  local s=""
  # parsing input one character at a time
  for ((i=0; i < ${#1}; i++))
  do
    ACHAR=${1:i:1}
    s="$s\$_$ACHAR+"
  done

  SUM=$(( $(eval echo -n ${s}0) ))
}

# I think it's faster to return the value in a var then to echo it in a sub process.
# Try summing the word one char at a time using a case statement.
sum_word2() {
  SUM=0
  local s=""
  # parsing input one character at a time
  for ((i=0; i < ${#1}; i++))
  do
    ACHAR=${1:i:1}
    case $ACHAR in
    a) SUM=$((SUM+  1));;
    b) SUM=$((SUM+  2));;
    c) SUM=$((SUM+  3));;
    d) SUM=$((SUM+  4));;
    e) SUM=$((SUM+  5));;
    f) SUM=$((SUM+  6));;
    g) SUM=$((SUM+  7));;
    h) SUM=$((SUM+  8));;
    i) SUM=$((SUM+  9));;
    j) SUM=$((SUM+ 10));;
    k) SUM=$((SUM+ 11));;
    l) SUM=$((SUM+ 12));;
    m) SUM=$((SUM+ 13));;
    n) SUM=$((SUM+ 14));;
    o) SUM=$((SUM+ 15));;
    p) SUM=$((SUM+ 16));;
    q) SUM=$((SUM+ 17));;
    r) SUM=$((SUM+ 18));;
    s) SUM=$((SUM+ 19));;
    t) SUM=$((SUM+ 20));;
    u) SUM=$((SUM+ 21));;
    v) SUM=$((SUM+ 22));;
    w) SUM=$((SUM+ 23));;
    x) SUM=$((SUM+ 24));;
    y) SUM=$((SUM+ 25));;
    z) SUM=$((SUM+ 26));;
    A) SUM=$((SUM+ 27));;
    B) SUM=$((SUM+ 28));;
    C) SUM=$((SUM+ 29));;
    D) SUM=$((SUM+ 30));;
    E) SUM=$((SUM+ 31));;
    F) SUM=$((SUM+ 32));;
    G) SUM=$((SUM+ 33));;
    H) SUM=$((SUM+ 34));;
    I) SUM=$((SUM+ 35));;
    J) SUM=$((SUM+ 36));;
    K) SUM=$((SUM+ 37));;
    L) SUM=$((SUM+ 38));;
    M) SUM=$((SUM+ 39));;
    N) SUM=$((SUM+ 40));;
    O) SUM=$((SUM+ 41));;
    P) SUM=$((SUM+ 42));;
    Q) SUM=$((SUM+ 43));;
    R) SUM=$((SUM+ 44));;
    S) SUM=$((SUM+ 45));;
    T) SUM=$((SUM+ 46));;
    U) SUM=$((SUM+ 47));;
    V) SUM=$((SUM+ 48));;
    W) SUM=$((SUM+ 49));;
    X) SUM=$((SUM+ 50));;
    Y) SUM=$((SUM+ 51));;
    Z) SUM=$((SUM+ 52));;
    *) SUM=0; return;;
    esac
  done
}

# I think it's faster to return the value in a var then to echo it in a sub process.
# Try summing the word by building an arithmetic expression using sed and then evaluating
# the expression.
# Requires: gen_chars
sum_word3() {
  SUM=$(( $(eval echo -n `echo -n $1 | sed -E -ne 's,.,$_&+,pg'`) 0))
  #echo "SUM($1)=$SUM"
}

# Filter out all words of incorrect length
# Sum the characters in the word: i.e. a=1, b=2, ...  and "abbc" = 1+2+2+3 = 8
alg3() {
  gen_chars
  sortedWord=`echo $word | grep -o . | sort | tr -d '\n'`
  sum_word $word
  word_sum=$SUM
  grep_string="^`echo -n $word | tr 'a-zA-Z' '.'`\$"

  grep "$grep_string" "$dict" | \
  while read line
  do
    sum_word $line
    line_sum=$SUM
    if [ $word_sum == $line_sum ]
    then
      check_sorted_versus_not $sortedWord $line
    fi
  done
}

# Filter out all words of incorrect length
# Sum the characters in the word: i.e. a=1, b=2, ...  and "abbc" = 1+2+2+3 = 8
# Use sum_word2
alg4() {
  sortedWord=`echo $word | grep -o . | sort | tr -d '\n'`
  sum_word2 $word
  word_sum=$SUM
  grep_string="^`echo -n $word | tr 'a-zA-Z' '.'`\$"

  grep "$grep_string" "$dict" | \
  while read line
  do
    sum_word2 $line
    line_sum=$SUM
    if [ $word_sum == $line_sum ]
    then
      check_sorted_versus_not $sortedWord $line
    fi
  done
}

# Filter out all words of incorrect length
# Sum the characters in the word: i.e. a=1, b=2, ...  and "abbc" = 1+2+2+3 = 8
# Use sum_word3
alg5() {
  gen_chars
  sortedWord=`echo $word | grep -o . | sort | tr -d '\n'`
  sum_word3 $word
  word_sum=$SUM
  grep_string="^`echo -n $word | tr 'a-zA-Z' '.'`\$"

  grep "$grep_string" "$dict" | \
  while read line
  do
    sum_word3 $line
    line_sum=$SUM
    if [ $word_sum == $line_sum ]
    then
      check_sorted_versus_not $sortedWord $line
    fi
  done
}


# I think it's faster to return the value in a var then to echo it in a sub process.
# Try summing the word one char at a time using a case statement.
# Place results in a histogram
sum_word4() {
  SUM=(0 0 0 0 0 0 0 0 0 0
       0 0 0 0 0 0 0 0 0 0
       0 0 0 0 0 0 
       0 0 0 0 0 0 0 0 0 0
       0 0 0 0 0 0 0 0 0 0
       0 0 0 0 0 0 
       0)
  # parsing input one character at a time
  for ((i=0; i < ${#1}; i++))
  do
    ACHAR=${1:i:1}
    case $ACHAR in
    a) SUM[1]=$((SUM[ 1] + 1));;
    b) SUM[2]=$((SUM[ 2] + 1));;
    c) SUM[3]=$((SUM[ 3] + 1));;
    d) SUM[4]=$((SUM[ 4] + 1));;
    e) SUM[5]=$((SUM[ 5] + 1));;
    f) SUM[6]=$((SUM[ 6] + 1));;
    g) SUM[7]=$((SUM[ 7] + 1));;
    h) SUM[8]=$((SUM[ 8] + 1));;
    i) SUM[9]=$((SUM[ 9] + 1));;
    j) SUM[10]=$((SUM[10] + 1));;
    k) SUM[11]=$((SUM[11] + 1));;
    l) SUM[12]=$((SUM[12] + 1));;
    m) SUM[13]=$((SUM[13] + 1));;
    n) SUM[14]=$((SUM[14] + 1));;
    o) SUM[15]=$((SUM[15] + 1));;
    p) SUM[16]=$((SUM[16] + 1));;
    q) SUM[17]=$((SUM[17] + 1));;
    r) SUM[18]=$((SUM[18] + 1));;
    s) SUM[19]=$((SUM[19] + 1));;
    t) SUM[20]=$((SUM[20] + 1));;
    u) SUM[21]=$((SUM[21] + 1));;
    v) SUM[22]=$((SUM[22] + 1));;
    w) SUM[23]=$((SUM[23] + 1));;
    x) SUM[24]=$((SUM[24] + 1));;
    y) SUM[25]=$((SUM[25] + 1));;
    z) SUM[26]=$((SUM[26] + 1));;
    A) SUM[27]=$((SUM[27] + 1));;
    B) SUM[28]=$((SUM[28] + 1));;
    C) SUM[29]=$((SUM[29] + 1));;
    D) SUM[30]=$((SUM[30] + 1));;
    E) SUM[31]=$((SUM[31] + 1));;
    F) SUM[32]=$((SUM[32] + 1));;
    G) SUM[33]=$((SUM[33] + 1));;
    H) SUM[34]=$((SUM[34] + 1));;
    I) SUM[35]=$((SUM[35] + 1));;
    J) SUM[36]=$((SUM[36] + 1));;
    K) SUM[37]=$((SUM[37] + 1));;
    L) SUM[38]=$((SUM[38] + 1));;
    M) SUM[39]=$((SUM[39] + 1));;
    N) SUM[40]=$((SUM[40] + 1));;
    O) SUM[41]=$((SUM[41] + 1));;
    P) SUM[42]=$((SUM[42] + 1));;
    Q) SUM[43]=$((SUM[43] + 1));;
    R) SUM[44]=$((SUM[44] + 1));;
    S) SUM[45]=$((SUM[45] + 1));;
    T) SUM[46]=$((SUM[46] + 1));;
    U) SUM[47]=$((SUM[47] + 1));;
    V) SUM[48]=$((SUM[48] + 1));;
    W) SUM[49]=$((SUM[49] + 1));;
    X) SUM[50]=$((SUM[50] + 1));;
    Y) SUM[51]=$((SUM[51] + 1));;
    Z) SUM[52]=$((SUM[52] + 1));;
    *) SUM[53]=-1; return;;
    esac
  done

 #echo ${SUM[*]}
}

# Check if two histograms are equal
hist_are_equal() {
  # Array sizes differ?
  [ ${#_h1[*]} != ${#SUM[*]} ] && return 1

  # parsing input one index at a time
  for ((i=0; i < ${#_h1[*]}; i++))
  do
    [ ${_h1[i]} != ${SUM[i]} ] && return 1
  done

  return 0
}

# Check if two histograms are equal
hist_are_equal2() {
  # Array sizes differ?
  local size=${#_h1[*]}
  [ $size != ${#SUM[*]} ] && return 1

  # parsing input one index at a time
  for ((i=0; i < $size; i++))
  do
    [ ${_h1[i]} != ${SUM[i]} ] && return 1
  done

  return 0
}

# Filter out all words of incorrect length
# Use sum_word4 which generates a histogram of character frequency
alg6() {
  sum_word4 $word
  _h1=${SUM[*]}
  grep_string="^`echo -n $word | tr 'a-zA-Z' '.'`\$"

  grep "$grep_string" "$dict" | \
  while read line
  do
    sum_word4 $line
    if hist_are_equal
    then
      echo $line
    fi
  done
}

# Filter out all words of incorrect length
# Use sum_word4 which generates a histogram of character frequency
alg7() {
  sum_word4 $word
  _h1=${SUM[*]}
  grep_string="^`echo -n $word | tr 'a-zA-Z' '.'`\$"

  grep "$grep_string" "$dict" | \
  while read line
  do
    sum_word4 $line
    if hist_are_equal2
    then
      echo $line
    fi
  done
}

run_test() {
  echo alg$1
  eval time alg$1
}

#run_test 1
#run_test 2
#run_test 3
run_test 4
#run_test 5
run_test 6
#run_test 7
#/bin/bash
word=$1
#dict=单词
dict=/usr/share/dict/words
#dict=/usr/dict/words
alg1(){
sortedWord=`echo$word | grep-o.| sort | tr-d'\n'`
读行时
做
sortedLine=`echo$line | grep-o.| sort | tr-d'\n'`
如果[“$sortedWord”==“$sortedLine”]
然后
回音$线
fi
完成<$dict
}
检查\u排序\u与\u不(){
本地字=$1
本地行=`echo$2 | grep-o.| sort | tr-d'\n'`
如果[“$word”==“$line”]
然后
回声2美元
fi
}
#过滤掉所有长度不正确的单词
alg2(){
sortedWord=`echo$word | grep-o.| sort | tr-d'\n'`
grep_string=“^`echo-n$word | tr'a-zA-Z'.`\$”
grep“$grep_string”“$dict”|\
读行时
做
sortedLine=`echo$line | grep-o.| sort | tr-d'\n'`
如果[“$sortedWord”==“$sortedLine”]
然后
回音$线
fi
完成
}
#创建许多变量,如下所示:
#_a=1,_b=2_z=26,_A=27,_B=28_Z=52
gen_chars(){
#[-n“$GEN_CHARS”]&返回
GEN_CHARS=1
局部alpha=“abcdefghijklmnopqrstuvxyz”
本地大写字母=`echo-n$alpha | tr'a-z''a-z'`
本地两个=“$alpha$upperalpha”
for((i=0;i<${#两者};i++)
做
ACHAR=${both:i:1}
评估“$ACHAR=$((i+1))”
完成
}
#我认为在var中返回值比在子进程中回显值更快。
#尝试通过构建一个算术表达式,将单词一次求和一个字符
#然后计算这个表达式。
#需要:gen_chars
sum_word(){
总和=0
本地s=“”
#一次解析输入一个字符
对于((i=0;i<${1};i++)
做
亚喀尔=${1:i:1}
s=“$s\$\u$ACHAR+”
完成
总和=$($(eval echo-n${s}0)))
}
#我认为在var中返回值比在子进程中回显值更快。
#尝试使用case语句将单词一次求和一个字符。
sum_word2(){
总和=0
本地s=“”
#一次解析输入一个字符
对于((i=0;i<${1};i++)
做
亚喀尔=${1:i:1}
案件$ACHAR
a) 总和=$((总和+1));;
b) 总和=$((总和+2));;
c) 总和=$((总和+3));;
d) 总和=$((总和+4));;
e) 总和=$((总和+5));;
f) 总和=$((总和+6));;
g) 总和=$((总和+7));;
h) 总和=$((总和+8));;
i) 总和=$((总和+9));;
j) 总和=$((总和+10));;
k) 总和=$((总和+11));;
l) 总和=$((总和+12));;
m) 总和=$((总和+13));;
n) 总和=$((总和+14));;
o) 总和=$((总和+15));;
p) 总和=$((总和+16));;
q) 总和=$((总和+17));;
r) 总和=$((总和+18));;
s) 总和=$((总和+19));;
t) 总和=$((总和+20));;
u) 总和=$((总和+21));;
v) 总和=$((总和+22));;
w) 总和=$((总和+23));;
x) 总和=$((总和+24));;
y) 总和=$((总和+25));;
z) 总和=$((总和+26));;
A) 总和=$((总和+27));;
B) 总和=$((总和+28));;
C) 总和=$((总和+29));;
D) 总和=$((总和+30));;
E) 总和=$((总和+31));;
F) 总和=$((总和+32));;
G) 总和=$((总和+33));;
H) 总和=$((总和+34));;
一) 总和=$((总和+35));;
J) 总和=$((总和+36));;
K) 总和=$((总和+37));;
五十) 总和=$((总和+38));;
M) 总和=$((总和+39));;
N) 总和=$((总和+40));;
O) 总和=$((总和+41));;
P) 总和=$((总和+42));;
Q) 总和=$((总和+43));;
R) 总和=$((总和+44));;
S) 总和=$((总和+45));;
T) 总和=$((总和+46));;
U) 总和=$((总和+47));;
五) 总和=$((总和+48));;
W) 总和=$((总和+49));;
十) 总和=$((总和+50));;
Y) 总和=$((总和+51));;
Z) 总和=$((总和+52));;
*)总和=0;返回;;
以撒
完成
}
#我认为在var中返回值比在子进程中回显值更快。
#尝试通过使用sed构建一个算术表达式,然后计算单词的和
#表情。
#需要:gen_chars
sum_word3(){
总和=$($(eval echo-n`echo-n$1 | sed-E-ne's,,$,$,+,pg'`0))
#echo“总和($1)=$SUM”
}
#过滤掉所有长度不正确的单词
#对单词中的字符求和:即a=1,b=2。。。而“abbc”=1+2+2+3=8
alg3(){
吉努查斯
sortedWord=`echo$word | grep-o.| sort | tr-d'\n'`
sum_word$word
word_sum=$sum
grep_string=“^`echo-n$word | tr'a-zA-Z'.`\$”
grep“$grep_string”“$dict”|\
读行时
做
sum_word$行
第_行总和=$sum
如果[$word\u sum==$line\u sum]
然后
检查\u排序\u与\u非$sortedWord$行
fi
完成
}
#过滤掉所有长度不正确的单词
#对单词中的字符求和:即a=1,b=2。。。而“abbc”=1+2+2+3=8
#使用s
$ wc -l < /usr/share/dict/words
479829

$ time ./shell.sh look
found: kolo
found: look

real    0m1.361s
user    0m1.074s
sys     0m0.015s
dict="/usr/share/dict/words"
awk 'BEGIN{
  w="table"
  m=split(w,c,"")
  b=asort(c,chars)
}
length($0)==length(w){
  f=0
  n=split($0,t,"")
  e=asort(t,d)
  for(i=1;i<=e;i++) {
    if(d[i]!=chars[i]){
        f=1;break
    }
  }
  if(!f) print $0
}' $dict
$ time ./shell.sh #looking for table
ablet
batel
belat
blate
bleat
tabel
table

real    0m1.416s
user    0m1.343s
sys     0m0.014s

$ time ./shell.sh #looking for chairs
chairs
ischar
rachis

real    0m1.697s
user    0m1.660s
sys     0m0.014s

$ time perl perl.pl #using beamrider's Perl script
table
tabel
ablet
batel
blate
bleat
belat

real    0m2.680s
user    0m1.633s
sys     0m0.881s

$ time perl perl.pl # looking for chairs
chairs
ischar
rachis

real    0m14.044s
user    0m8.328s
sys     0m5.236s
#!/bin/bash

word=$1
#dict=words
dict=/usr/share/dict/words
#dict=/usr/dict/words

alg1() {
  sortedWord=`echo $word | grep -o . | sort | tr -d '\n'`

  while read line
  do
    sortedLine=`echo $line | grep -o . | sort | tr -d '\n'`
    if [ "$sortedWord" == "$sortedLine" ]
    then
      echo $line
    fi
  done < $dict
}

check_sorted_versus_not() {
    local word=$1
    local line=`echo $2 | grep -o . | sort | tr -d '\n'`
    if [ "$word" == "$line" ]
    then
        echo $2
    fi
}

# Filter out all words of incorrect length
alg2() {
  sortedWord=`echo $word | grep -o . | sort | tr -d '\n'`
  grep_string="^`echo -n $word | tr 'a-zA-Z' '.'`\$"

  grep "$grep_string" "$dict" | \
  while read line
  do
    sortedLine=`echo $line | grep -o . | sort | tr -d '\n'`
    if [ "$sortedWord" == "$sortedLine" ]
    then
      echo $line
    fi
  done
}


# Create a lot of variables like this:
# _a=1, _b=2, ... _z=26, _A=27, _B=28, ... _Z=52
gen_chars() {
#  [ -n "$GEN_CHARS" ] && return
  GEN_CHARS=1
  local alpha="abcdefghijklmnopqrstuvwxyz"
  local upperalpha=`echo -n $alpha | tr 'a-z' 'A-Z'`
  local both="$alpha$upperalpha"
  for ((i=0; i < ${#both}; i++))
  do
    ACHAR=${both:i:1}
    eval "_$ACHAR=$((i+1))"
  done
}

# I think it's faster to return the value in a var then to echo it in a sub process.
# Try summing the word one char at a time by building an arithmetic expression
# and then evaluate that expression.
# Requires: gen_chars
sum_word() {
  SUM=0
  local s=""
  # parsing input one character at a time
  for ((i=0; i < ${#1}; i++))
  do
    ACHAR=${1:i:1}
    s="$s\$_$ACHAR+"
  done

  SUM=$(( $(eval echo -n ${s}0) ))
}

# I think it's faster to return the value in a var then to echo it in a sub process.
# Try summing the word one char at a time using a case statement.
sum_word2() {
  SUM=0
  local s=""
  # parsing input one character at a time
  for ((i=0; i < ${#1}; i++))
  do
    ACHAR=${1:i:1}
    case $ACHAR in
    a) SUM=$((SUM+  1));;
    b) SUM=$((SUM+  2));;
    c) SUM=$((SUM+  3));;
    d) SUM=$((SUM+  4));;
    e) SUM=$((SUM+  5));;
    f) SUM=$((SUM+  6));;
    g) SUM=$((SUM+  7));;
    h) SUM=$((SUM+  8));;
    i) SUM=$((SUM+  9));;
    j) SUM=$((SUM+ 10));;
    k) SUM=$((SUM+ 11));;
    l) SUM=$((SUM+ 12));;
    m) SUM=$((SUM+ 13));;
    n) SUM=$((SUM+ 14));;
    o) SUM=$((SUM+ 15));;
    p) SUM=$((SUM+ 16));;
    q) SUM=$((SUM+ 17));;
    r) SUM=$((SUM+ 18));;
    s) SUM=$((SUM+ 19));;
    t) SUM=$((SUM+ 20));;
    u) SUM=$((SUM+ 21));;
    v) SUM=$((SUM+ 22));;
    w) SUM=$((SUM+ 23));;
    x) SUM=$((SUM+ 24));;
    y) SUM=$((SUM+ 25));;
    z) SUM=$((SUM+ 26));;
    A) SUM=$((SUM+ 27));;
    B) SUM=$((SUM+ 28));;
    C) SUM=$((SUM+ 29));;
    D) SUM=$((SUM+ 30));;
    E) SUM=$((SUM+ 31));;
    F) SUM=$((SUM+ 32));;
    G) SUM=$((SUM+ 33));;
    H) SUM=$((SUM+ 34));;
    I) SUM=$((SUM+ 35));;
    J) SUM=$((SUM+ 36));;
    K) SUM=$((SUM+ 37));;
    L) SUM=$((SUM+ 38));;
    M) SUM=$((SUM+ 39));;
    N) SUM=$((SUM+ 40));;
    O) SUM=$((SUM+ 41));;
    P) SUM=$((SUM+ 42));;
    Q) SUM=$((SUM+ 43));;
    R) SUM=$((SUM+ 44));;
    S) SUM=$((SUM+ 45));;
    T) SUM=$((SUM+ 46));;
    U) SUM=$((SUM+ 47));;
    V) SUM=$((SUM+ 48));;
    W) SUM=$((SUM+ 49));;
    X) SUM=$((SUM+ 50));;
    Y) SUM=$((SUM+ 51));;
    Z) SUM=$((SUM+ 52));;
    *) SUM=0; return;;
    esac
  done
}

# I think it's faster to return the value in a var then to echo it in a sub process.
# Try summing the word by building an arithmetic expression using sed and then evaluating
# the expression.
# Requires: gen_chars
sum_word3() {
  SUM=$(( $(eval echo -n `echo -n $1 | sed -E -ne 's,.,$_&+,pg'`) 0))
  #echo "SUM($1)=$SUM"
}

# Filter out all words of incorrect length
# Sum the characters in the word: i.e. a=1, b=2, ...  and "abbc" = 1+2+2+3 = 8
alg3() {
  gen_chars
  sortedWord=`echo $word | grep -o . | sort | tr -d '\n'`
  sum_word $word
  word_sum=$SUM
  grep_string="^`echo -n $word | tr 'a-zA-Z' '.'`\$"

  grep "$grep_string" "$dict" | \
  while read line
  do
    sum_word $line
    line_sum=$SUM
    if [ $word_sum == $line_sum ]
    then
      check_sorted_versus_not $sortedWord $line
    fi
  done
}

# Filter out all words of incorrect length
# Sum the characters in the word: i.e. a=1, b=2, ...  and "abbc" = 1+2+2+3 = 8
# Use sum_word2
alg4() {
  sortedWord=`echo $word | grep -o . | sort | tr -d '\n'`
  sum_word2 $word
  word_sum=$SUM
  grep_string="^`echo -n $word | tr 'a-zA-Z' '.'`\$"

  grep "$grep_string" "$dict" | \
  while read line
  do
    sum_word2 $line
    line_sum=$SUM
    if [ $word_sum == $line_sum ]
    then
      check_sorted_versus_not $sortedWord $line
    fi
  done
}

# Filter out all words of incorrect length
# Sum the characters in the word: i.e. a=1, b=2, ...  and "abbc" = 1+2+2+3 = 8
# Use sum_word3
alg5() {
  gen_chars
  sortedWord=`echo $word | grep -o . | sort | tr -d '\n'`
  sum_word3 $word
  word_sum=$SUM
  grep_string="^`echo -n $word | tr 'a-zA-Z' '.'`\$"

  grep "$grep_string" "$dict" | \
  while read line
  do
    sum_word3 $line
    line_sum=$SUM
    if [ $word_sum == $line_sum ]
    then
      check_sorted_versus_not $sortedWord $line
    fi
  done
}


# I think it's faster to return the value in a var then to echo it in a sub process.
# Try summing the word one char at a time using a case statement.
# Place results in a histogram
sum_word4() {
  SUM=(0 0 0 0 0 0 0 0 0 0
       0 0 0 0 0 0 0 0 0 0
       0 0 0 0 0 0 
       0 0 0 0 0 0 0 0 0 0
       0 0 0 0 0 0 0 0 0 0
       0 0 0 0 0 0 
       0)
  # parsing input one character at a time
  for ((i=0; i < ${#1}; i++))
  do
    ACHAR=${1:i:1}
    case $ACHAR in
    a) SUM[1]=$((SUM[ 1] + 1));;
    b) SUM[2]=$((SUM[ 2] + 1));;
    c) SUM[3]=$((SUM[ 3] + 1));;
    d) SUM[4]=$((SUM[ 4] + 1));;
    e) SUM[5]=$((SUM[ 5] + 1));;
    f) SUM[6]=$((SUM[ 6] + 1));;
    g) SUM[7]=$((SUM[ 7] + 1));;
    h) SUM[8]=$((SUM[ 8] + 1));;
    i) SUM[9]=$((SUM[ 9] + 1));;
    j) SUM[10]=$((SUM[10] + 1));;
    k) SUM[11]=$((SUM[11] + 1));;
    l) SUM[12]=$((SUM[12] + 1));;
    m) SUM[13]=$((SUM[13] + 1));;
    n) SUM[14]=$((SUM[14] + 1));;
    o) SUM[15]=$((SUM[15] + 1));;
    p) SUM[16]=$((SUM[16] + 1));;
    q) SUM[17]=$((SUM[17] + 1));;
    r) SUM[18]=$((SUM[18] + 1));;
    s) SUM[19]=$((SUM[19] + 1));;
    t) SUM[20]=$((SUM[20] + 1));;
    u) SUM[21]=$((SUM[21] + 1));;
    v) SUM[22]=$((SUM[22] + 1));;
    w) SUM[23]=$((SUM[23] + 1));;
    x) SUM[24]=$((SUM[24] + 1));;
    y) SUM[25]=$((SUM[25] + 1));;
    z) SUM[26]=$((SUM[26] + 1));;
    A) SUM[27]=$((SUM[27] + 1));;
    B) SUM[28]=$((SUM[28] + 1));;
    C) SUM[29]=$((SUM[29] + 1));;
    D) SUM[30]=$((SUM[30] + 1));;
    E) SUM[31]=$((SUM[31] + 1));;
    F) SUM[32]=$((SUM[32] + 1));;
    G) SUM[33]=$((SUM[33] + 1));;
    H) SUM[34]=$((SUM[34] + 1));;
    I) SUM[35]=$((SUM[35] + 1));;
    J) SUM[36]=$((SUM[36] + 1));;
    K) SUM[37]=$((SUM[37] + 1));;
    L) SUM[38]=$((SUM[38] + 1));;
    M) SUM[39]=$((SUM[39] + 1));;
    N) SUM[40]=$((SUM[40] + 1));;
    O) SUM[41]=$((SUM[41] + 1));;
    P) SUM[42]=$((SUM[42] + 1));;
    Q) SUM[43]=$((SUM[43] + 1));;
    R) SUM[44]=$((SUM[44] + 1));;
    S) SUM[45]=$((SUM[45] + 1));;
    T) SUM[46]=$((SUM[46] + 1));;
    U) SUM[47]=$((SUM[47] + 1));;
    V) SUM[48]=$((SUM[48] + 1));;
    W) SUM[49]=$((SUM[49] + 1));;
    X) SUM[50]=$((SUM[50] + 1));;
    Y) SUM[51]=$((SUM[51] + 1));;
    Z) SUM[52]=$((SUM[52] + 1));;
    *) SUM[53]=-1; return;;
    esac
  done

 #echo ${SUM[*]}
}

# Check if two histograms are equal
hist_are_equal() {
  # Array sizes differ?
  [ ${#_h1[*]} != ${#SUM[*]} ] && return 1

  # parsing input one index at a time
  for ((i=0; i < ${#_h1[*]}; i++))
  do
    [ ${_h1[i]} != ${SUM[i]} ] && return 1
  done

  return 0
}

# Check if two histograms are equal
hist_are_equal2() {
  # Array sizes differ?
  local size=${#_h1[*]}
  [ $size != ${#SUM[*]} ] && return 1

  # parsing input one index at a time
  for ((i=0; i < $size; i++))
  do
    [ ${_h1[i]} != ${SUM[i]} ] && return 1
  done

  return 0
}

# Filter out all words of incorrect length
# Use sum_word4 which generates a histogram of character frequency
alg6() {
  sum_word4 $word
  _h1=${SUM[*]}
  grep_string="^`echo -n $word | tr 'a-zA-Z' '.'`\$"

  grep "$grep_string" "$dict" | \
  while read line
  do
    sum_word4 $line
    if hist_are_equal
    then
      echo $line
    fi
  done
}

# Filter out all words of incorrect length
# Use sum_word4 which generates a histogram of character frequency
alg7() {
  sum_word4 $word
  _h1=${SUM[*]}
  grep_string="^`echo -n $word | tr 'a-zA-Z' '.'`\$"

  grep "$grep_string" "$dict" | \
  while read line
  do
    sum_word4 $line
    if hist_are_equal2
    then
      echo $line
    fi
  done
}

run_test() {
  echo alg$1
  eval time alg$1
}

#run_test 1
#run_test 2
#run_test 3
run_test 4
#run_test 5
run_test 6
#run_test 7
#!/usr/bin/perl
$myword=join("", sort split (//, $ARGV[0]));
shift;
while (<>) {
    chomp;
    print "$_\n" if (join("", sort split (//)) eq $myword);
}
'^[letters_you_care_about]*$'
grep "^[$W]*$" /usr/dict/words
sub dedupe {
    my (@list) = @_;
    my (@new_list, %seen_entries, $entry);

    foreach $entry (@list) {
        if (!(defined($seen_entries{$entry}))) {
            push(@new_list, $entry);
            $seen_entries{$entry} = 1;
        }
    }

    return @new_list;
}

sub find_all_permutations {
    my ($word) = @_;
    my (@permutations, $subword, $letter, $rest_of_word, $i);

    if (length($word) == 1) {
        push(@permutations, $word);
    } else {   
        for ($i=0; $i<length($word); $i++) {
            $letter = substr($word, $i, 1);
            $rest_of_word = substr($word, 0, $i) . substr($word, $i + 1);
            foreach $subword (find_all_permutations($rest_of_word)) {
                push(@permutations, $letter . $subword);
            }            
        }
    }

    return @permutations;
}

$words_file = '/usr/share/dict/words';
$word = 'table';

@all_permutations = dedupe(find_all_permutations($word));
foreach $permutation (@all_permutations) {
    if (`grep -c -m 1 ^$permutation\$ $words_file` == 1) {
        print $permutation . "\n";
    }
}
an -w "tab" -m 3