提高Python拼图的性能
为了获得面试机会,我被要求解决以下难题,并不是因为我昨天收到通知说我没有被选中(无法让难题表现得那么好),我想知道是否有人知道如何帮助我解决这个问题,使我表现得更好。它是用Python编写的,尽管一两年前我有两个Python课程,但与我曾经工作过的世界(嵌入式C语言18年!)相比,我还是个新手。有什么帮助或建议可以让我从经验中学习吗。谜题提交考虑被选中面试那里 谜题挑战描述如下: 将“单词”视为大写字母a-Z的任意序列(不限于“字典单词”)。对于任何至少有两个不同字母的单词,都有其他由相同字母组成但顺序不同的单词(例如,stationalily/ANTIROYALIST,两者碰巧都是字典中的单词;就我们而言,“aaiilnorssty”也是由这两个字母组成的“单词”)。然后,我们可以根据每个单词在由同一组字母组成的按字母顺序排列的所有单词列表中的位置,为每个单词分配一个数字。一种方法是生成整个单词列表并找到所需的单词,但如果单词很长,则速度会很慢。编写一个程序,将一个单词作为命令行参数,并将其数字打印到标准输出。不要使用上述生成整个列表的方法。您的程序应该能够接受长度不超过20个字母(可能有一些字母重复)的任何单词,并且应该使用不超过1GB的内存,运行时间不超过500毫秒。我们检查的任何答案都将符合64位整数 示例单词及其排名:提高Python拼图的性能,python,performance,Python,Performance,为了获得面试机会,我被要求解决以下难题,并不是因为我昨天收到通知说我没有被选中(无法让难题表现得那么好),我想知道是否有人知道如何帮助我解决这个问题,使我表现得更好。它是用Python编写的,尽管一两年前我有两个Python课程,但与我曾经工作过的世界(嵌入式C语言18年!)相比,我还是个新手。有什么帮助或建议可以让我从经验中学习吗。谜题提交考虑被选中面试那里 谜题挑战描述如下: 将“单词”视为大写字母a-Z的任意序列(不限于“字典单词”)。对于任何至少有两个不同字母的单词,都有其他由相同字母组
ABAB = 2
AAAB = 1
BAAA = 4
QUESTION = 24572
BOOKKEEPER = 10743
NONINTUITIVENESS = 8222334634
您的程序将根据其运行速度和代码编写的清晰程度进行判断。我们将运行您的程序以及阅读源代码,所以您可以做任何事情,使这个过程更容易将不胜感激 要运行此拼图:您可以在命令行输入中输入一个单词(它处于当前状态),或者如果您想从文件中读取上面提供的单词,您可以注释掉
raw\u input
以接收一个单词,然后通过取消注释该代码来读取words.txt
文件
在节目的主要部分:
从命令行逐字输入-代码的当前状态-将接受命令行的字输入
getInputFromCommandLine()
--以这种方式运行:命令行:python athenaPuzzleIterDeep.py
如果要从要读取的单词文件words.txt
获取输入,请取消注释以下内容
words.txt
将与代码一起发送
--以这种方式运行:命令行:python athenaPuzzleIterDeep.py
--但是,您还必须将words.txt
文件与python程序放在同一目录中
wordList=loadWords()
wordNumberOrdering(单词列表)
调查的性能增强最终不够好:迭代深化:
利用BFS(广度优先搜索)的时间优势和浅解优势,尝试迭代深化以获得DFS(深度优先搜索)的空间优势。因此,您可以尝试使用深度限制运行DFS:尝试树的深度=1,然后是2、3等等。所以,不要在每个树级别构建整个图,而是调用DFS以查看是否找到了解决方案。DFS将首先从树的子节点的左侧搜索,但最终将搜索每个节点,所以会花费太多时间而不会占用太多空间。但是,如果您使用BFS中的级别限制思想,只需逐级构建树,然后使用DFS进行搜索,这就是迭代深化的思想
迭代深化没有提供所需的性能改进。我还尝试包含优先级队列python导入,但无法在我的linux版本上正确安装它
Words.txt文件包含:
ABAB
AAAB
BAAA
QUESTION
ABCDEFGHIJKLMNOPQRSTUVWXYZ
BOOKKEEPER
BOOKKEEPERS
STATIONARILY
NONINTUITIVENESS
代码如下:
import random
import string
from math import factorial
import itertools
from functools import update_wrapper
import time
import sys
sys.setrecursionlimit(5000)
# works for functions with hashable (immuatble) arguments
# Example usage: permutations = memoize(itertools.permutations)
ALPHABET_LETTERS = ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z']
globalMemCache = {}
def memoize(f):
# define "wrapper" function that checks cache for
# previously computed answer, only calling f if this
# is a new problem.
def memf(*x):
permutationsInCache = []
if x not in memf.cache:
memf.cache[x] = f(*x)
return memf.cache[x]
# initialize wrapper function's cache. store cache as
# attribute of function so we can look at its value.
memf.cache = globalMemCache
return memf
def isValidWord(word):
lenWord = len(word)
if (lenWord > 20):
print "word > 20 letters is NOT acceptable as input"
print " "
return False
elif (lenWord >= 11):
print "word >= 11 letters is NOT acceptable as input for this current iterative deepening solution"
print "my iterative deepening solution takes too much time and space for words >= 11 letters"
print " "
return False
wordInAlphabet = True
for letter in word:
if (wordInAlphabet != True) or (letter not in ALPHABET_LETTERS):
wordInAlphabet = False
return wordInAlphabet
permutationsMemoized = memoize(itertools.permutations)
WORDLIST_FILENAME = "words.txt"
def loadWords():
print "Loading word list from file..."
inFile = open(WORDLIST_FILENAME, 'r', 0)
wordList = []
for line in inFile:
wordList.append(line.strip().lower())
print " ", len(wordList), "words loaded."
return wordList
def remove_duplicates(l):
return list(set(l))
def printPath(path):
result = ''
for i in range(len(path)):
if i == len(path) - 1:
result = result + str(path[i])
else:
result = result + str(path[i]) + '->'
return result
class Node(object):
def __init__(self, name, index):
self.name = str(name)
self.index = index
def getName(self):
return self.name
def getIndex(self):
return self.index
def __str__(self):
return self.name
class Edge(object):
def __init__(self, src, dest):
self.src = src
self.dest = dest
def getSource(self):
return self.src
def getDestination(self):
return self.dest
def __str__(self):
return str(self.src) + '->' + str(self.dest)
class Queue:
def __init__(self):
self.list = []
def push(self,item):
self.list.insert(0,item)
def pop(self):
return self.list.pop()
def isEmpty(self):
return len(self.list) == 0
def DFSShortest(graph, start, end, path = [], shortest = None, index = 1000):
newGraph = graph
path = path + [start]
if str(start) == str(end):
index = start.index
newPath = path
return newPath,index
else:
anyChildren = graph.childrenOf(start)
if (anyChildren != None) and (index == 1000):
for node in graph.childrenOf(start):
if node not in path: #avoid cycles
if (shortest == None) or ( (shortest != None) and (len(path) < len(shortest))) :
newPath,index = DFSShortest(newGraph,node,end,path)
if newPath != None:
shortest = newPath
if (index != 1000):
return shortest,index
elif (anyChildren == None) and (index == 1000):
newPath,index = DFSShortest(newGraph,graph.parents[start],end,path)
if newPath != None:
shortest = newPath
if (index != 1000):
return shortest,index
return shortest,index
def BFS(graph, start, end, q):
initPath = [start]
q.append(initPath)
while len(q) != 0:
tmpPath = q.pop(0)
lastNode = tmpPath[len(tmpPath) - 1]
if str(lastNode) == str(end):
return lastNode.index
if (graph.childrenOf(lastNode) != []):
printPath(graph.childrenOf(lastNode))
for linkNode in graph.childrenOf(lastNode):
if linkNode not in tmpPath:
newPath = tmpPath + [linkNode]
q.append(newPath)
return None
class Digraph(object):
def __init__(self):
self.nodes = set([])
self.edges = {}
self.parents = {}
def addNode(self, node):
if node in self.nodes:
raise ValueError('Duplicate node')
else:
self.nodes.add(node)
self.edges[node] = []
#print "added edges = [] for node " + str(node)
def addEdge(self, edge):
src = edge.getSource()
dest = edge.getDestination()
self.edges[src].append(dest)
self.parents[dest] = src
def childrenOf(self, node):
if (self.edges[node]):
return self.edges[node]
else:
return None
def hasNode(self, node):
return node in self.nodes
def __str__(self):
res = ''
for k in self.edges:
for d in self.edges[k]:
res = res + str(k) + '->' + str(d) + '\n'
return res[:-1]
class Graph(Digraph):
def addEdge(self, edge):
Digraph.addEdge(self, edge)
def addEdgesForTreesWith4Nodes(g,childNode,factorNum,i,lenList,wordNodes):
if (i + factorNum + 1) < lenList:
g.addEdge(Edge(wordNodes[childNode + 1],wordNodes[i + factorNum + 1]))
if (i + factorNum + 2) < lenList:
g.addEdge(Edge(wordNodes[childNode + 1],wordNodes[i + factorNum + 2]))
def addEdgesForTreesWithMoreThan4Nodes(g,childNode,factorNum,i,lenList,wordNodes, numChildrenNodesThisLevel, numChildrenNodesPreviousLevel):
if (i + factorNum + numChildrenNodesPreviousLevel) < lenList:
g.addEdge(Edge(wordNodes[childNode + i],wordNodes[i + factorNum + numChildrenNodesPreviousLevel]))
if (i + factorNum + numChildrenNodesThisLevel + 1) < lenList:
g.addEdge(Edge(wordNodes[childNode + i],wordNodes[i + factorNum + numChildrenNodesPreviousLevel + 1]))
"""
Can try using iterative deepening to get the DFS space advantage with BFS's time and shallow
solution advantage. So can try running DFS with depth limits: try depth of tree = 1, then 2, 3,...etc
"""
"""
Also - you can avoid the log(n) overhead in DFS/BFS with a priority queue (had trouble downloaded and installing on my computer!)
"""
def iterativeDeepeningSolution(wordNodes, saveWord, saveWordTuple, lenList):
#rather than building entire graph, at each tree level, call DFS to see if solution found
#DFS will search going down left side of tree's child nodes first, but will eventually search
#every node, so takes too much time while not taking much space. However, if you use the level
#limitation idea from BFS, only building the tree level by level and then searching it with DFS,
#that is the idea of iterative deepening.
index = 0
q = []
shortest = None
saveNodes = wordNodes
i = 0
totalNodes = 1
numChildrenNodesPreviousLevel = 0
while i < lenList:
index = 0
if (i > 0):
numChildrenNodesPreviousLevel = numChildrenNodesThisLevel
numChildrenNodesThisLevel = 2**i #number of children nodes at level
if (i > 0):
totalNodes += numChildrenNodesThisLevel
if (numChildrenNodesThisLevel > 1) and (numChildrenNodesThisLevel <= 32): #only search 32 children nodes or less (level 5 of tree, 2**5 = 32):
#print "build graph - if previous level already searched - just add this level of children nodes"
if (numChildrenNodesThisLevel == 2): #new graph since none built when it was just a root node
g = Graph()
for n in range(numChildrenNodesThisLevel + 1):
g.addNode(wordNodes[n])
else: #use graph from last level of children added - don't rebuild graph
n = numChildrenNodesThisLevel - 1
while (n < lenList) and (n < (totalNodes)):
g.addNode(wordNodes[n])
n += 1
elif (numChildrenNodesThisLevel > 32): #only search 32 children nodes or less (level 5 of tree, 2**5 = 32)
print "word graph just searched: " + str(saveWord)
print "cannot go further searching in iterative deepening - tree will take too much space and time to search"
print "Tree Level = " + str(i) + " num children at this level " + str(numChildrenNodesThisLevel) + " total nodes in graph " + str(totalNodes)
print "Last Level Searched " + str(i - 1) + " num children at this level " + str(numChildrenNodesPreviousLevel) + " total nodes in graph " + str(totalNodes - numChildrenNodesThisLevel)
print " "
return
if (numChildrenNodesThisLevel > 2):
childNode = 0
while childNode < numChildrenNodesPreviousLevel:
if (childNode > 0):
factorNum = childNode * 2
else:
factorNum = childNode
if (numChildrenNodesThisLevel == 4):
addEdgesForTreesWith4Nodes(g,childNode,factorNum,i,lenList,wordNodes)
elif (numChildrenNodesThisLevel > 4): addEdgesForTreesWithMoreThan4Nodes(g,childNode,factorNum,i,lenList,wordNodes,numChildrenNodesThisLevel,numChildrenNodesPreviousLevel)
childNode += 1
startNode = wordNodes[0]
endNode = Node(str(saveWordTuple),0)
index = 1000
path,index = DFSShortest(g, startNode, endNode, q, shortest, index)
if (index != 1000): #made up error - not searching 1000 nodes or more at this time - soln found
print saveWord + " = " + str(index + 1)
print " "
return
i += 1
wordNodes = saveNodes
elif (numChildrenNodesThisLevel == 2): #so new graph just formed of 3 nodes (including root) - no edges on it yet
g.addEdge(Edge(wordNodes[0],wordNodes[1]))
g.addEdge(Edge(wordNodes[0],wordNodes[2]))
startNode = wordNodes[0]
endNode = Node(str(saveWordTuple),0)
index = 1000
path,index = DFSShortest(g, startNode, endNode, q, shortest, index)
if (index != 1000): #made up error - not searching 1000 nodes or more at this time - soln found
print saveWord + " = " + str(index + 1)
print " "
return
i += 1
wordNodes = saveNodes
elif (numChildrenNodesThisLevel == 1):
startNode = wordNodes[0]
oneNode = Node(str(saveWordTuple),0)
if str(oneNode) == str(startNode):
print saveWord + " = " + str(startNode.index + 1)
print " "
return
else:
i += 1
wordNodes = saveNodes
def wordNumberOrdering(wordList):
for word in wordList:
permutationTuples = []
withDupsList = []
noDupsList = []
noDupsStringList = []
index = 0
outputDict = {}
saveWord = ""
saveWordTuple = []
wordLen = len(word)
if (wordLen <= 10):
saveWord = word
saveWordTuple = tuple(saveWord,)
permutationTuples = permutationsMemoized(word)
for tupleStr in permutationTuples:
withDupsList.append(tupleStr)
noDupsList = remove_duplicates(withDupsList)
lenList = len(noDupsList)
noDupsList.sort()
wordNodes = []
i = 0
for name in noDupsList:
wordNodes.append(Node(str(name),i))
i += 1 #index of list to print when found for this puzzle
iterativeDeepeningSolution(wordNodes, saveWord, saveWordTuple, lenList)
elif (wordLen > 20):
print word
print "word length too long (> 20 chars): " + str(wordLen)
print " "
elif (wordLen >= 11):
print word
print "word length too long for this current solution to puzzle (>= 11 chars): " + str(wordLen)
print " "
def oneWordInputFromCommandLineAtATime(word):
permutationTuples = []
withDupsList = []
noDupsList = []
noDupsStringList = []
index = 0
outputDict = {}
saveWord = ""
saveWordTuple = []
saveWord = word
saveWordTuple = tuple(saveWord,)
permutationTuples = permutationsMemoized(word)
for tupleStr in permutationTuples:
withDupsList.append(tupleStr)
noDupsList = remove_duplicates(withDupsList)
lenList = len(noDupsList)
noDupsList.sort()
wordNodes = []
i = 0
for name in noDupsList:
wordNodes.append(Node(str(name),i))
i += 1 #index of list to print when found for this puzzle
iterativeDeepeningSolution(wordNodes, saveWord, saveWordTuple, lenList)
def getInputFromCommandLine():
guessWord = ""
guessWordLowCase = ""
validWord = False
takeInput = True
while (takeInput == True):
guessWord = raw_input('Enter word, or a "." to indicate that you are finished: ').decode('utf-8')
guessWordLowCase = guessWord.lower()
print "word being considered " + guessWordLowCase
if (guessWordLowCase == "."):
takeInput = False
else: #otherwise consider this word as an input from command line
validWord = isValidWord(guessWordLowCase)
if (validWord == False):
guessWordLowCase + " is INVALID"
print "Invalid word, please try again"
print " "
else:
oneWordInputFromCommandLineAtATime(guessWordLowCase)
print "Goodbye!"
if __name__ == '__main__':
#taking input word by word from command line
getInputFromCommandLine()
#uncomment the following if you want to take the input from words.txt, a file of words to read in instead
#wordList = loadWords()
#wordNumberOrdering(wordList)
随机导入
导入字符串
从数学导入阶乘
进口itertools
从functools导入更新包装器
导入时间
导入系统
系统设置递归限制(5000)
#适用于具有哈希(不可变)参数的函数
#用法示例:permutations=memoize(itertools.permutations)
字母表_字母=['a','b','c','d','e','f','g','h','i','j','k','l','m','n','o','p','q','r','s','t','u','v','w','x','y','z']
globalMemCache={}
def记忆(f):
#定义“包装器”函数,用于检查缓存中的
#以前计算的答案,仅在以下情况下调用f
#这是一个新问题。
def memf(*x):
置换sincache=[]
如果x不在memf.cache中:
memf.cache[x]=f(*x)
返回memf.cache[x]
#初始化包装函数的缓存。将缓存存储为
#函数的属性,以便我们可以查看其值。
memf.cache=globalMemCache
返回内存
def isValidWord(字):
lenWord=len(word)
如果(lenWord>20):
打印“word>20个字母不能作为输入”
打印“”
返回错误
elif(lenWord>=11):
打印“word>=11个字母不能作为当前迭代深化解决方案的输入”
打印“我的迭代深化解决方案对于大于等于11个字母的单词占用了太多的时间和空间”
打印“”
返回错误
wordInAlphabet=True
对于大写字母:
如果(wordInAlphabet!=True)或(字母不是字母):
wordInAlphabet=False
返回词INALPHATE
置换置换=记忆化(itertools.permutations)
WORDLIST_FILENAME=“words.txt”
def loadWords():
打印“正在从文件加载单词列表…”
infle=open(字列表\文件名'r',0)
单词表=[]
对于填充中的线:
wordList.append(line.strip().lower())
打印“”,len(wordLi)
from math import factorial
from collections import Counter
def number_of_distinct_permutations(counts):
f = factorial(sum(counts.values()))
for letter, count in counts.items():
f //= factorial(count)
return f
def compute_index(word, index=0):
if not word:
return index + 1
pending = Counter(word)
head = word[0]
for p in sorted(pending):
if p < head:
index += number_of_distinct_permutations(pending - Counter(p))
if p == head:
index += compute_index(word[1:])
return index
test_data = {"ABAB": 2,
"AAAB": 1,
"BAAA": 4,
"QUESTION": 24572,
"BOOKKEEPER": 10743,
"NONINTUITIVENESS": 8222334634}
print("word, reference, calculated")
for k,v in sorted(test_data.items()):
print (k, v, compute_index(k))
word, reference, calculated
AAAB 1 1
ABAB 2 2
BAAA 4 4
BOOKKEEPER 10743 10743
NONINTUITIVENESS 8222334634 8222334634
QUESTION 24572 24572
from collections import Counter
from math import factorial
def number_of_distinct_permutations(counted):
result = factorial(sum(counted))
for each in counted:
result //= factorial(each)
return result
def anagram_number(iterable):
elems = list(iterable)
tally = Counter()
index = 1
while elems:
current = elems.pop()
tally[current] += 1
for item in tally:
if item < current:
tally[item] -= 1
index += number_of_distinct_permutations(tally.values())
tally[item] += 1
return index
#include <Windows.h>
#include <stdio.h>
ULONGLONG fact( ULONGLONG n )
{
if ( n == 0 )
return 1;
return n * fact(n-1);
}
ULONGLONG multinom( INT mults[] ) // = (SUM(M))! / PROD(M!)
{
ULONGLONG n = 0;
for ( INT i=0; i<26; i++ )
n += mults[i];
ULONGLONG result = fact(n);
for ( INT i=0; i<26; i++ )
if ( mults[i] )
result /= fact(mults[i]);
return result;
}
// uses a~z as alphabet; strings up to 20 chars; no safeguards or E/C whatsoever.
INT main ( INT argc, LPSTR* argv )
{
ULONGLONG index = 1; // we'll add to this any earlier strings
CHAR str[21];
lstrcpy(str, argv[1]);
INT mults[26] = {0}; // initialize multiplicities to zero
for ( CHAR *p=str; *p != 0; p++ ) // set multiplicities that are non-zero
{
mults[*p - 'a'] += 1;
}
for ( CHAR *p = str; *p != 0; p++ ) // iterate through the characters of str
{
for ( INT i=0; i < (*p - 'a'); i++ ) // check each character lexicographically before *p
{
if ( mults[i] ) // it's in the string; count (as earlier in the list) the strings that start with it
{
mults[i] -= 1;
index += multinom(mults);
mults[i] += 1;
}
}
// At this point we've counted all the words that start with an earlier character.
// Any remaining earlier words must match up to this point. So ...
mults[*p - 'a'] -= 1; // p will be incremented so, in effect, forget this character and move on
}
printf("index = %I64u\n", index);
return 0;
}