Python 尝试在固定长度的多行上打印单个字符串并将成本降至最低

Python 尝试在固定长度的多行上打印单个字符串并将成本降至最低,python,algorithm,Python,Algorithm,首先是一些背景知识,我刚开始学习算法(我现在觉得我缺乏擅长的逻辑和推理能力),我一直在尝试将“这是一个示例文本”打印到不同的行中,每行最多7个字符,因此第一行将具有: this is (no spaces left in the end so cost 0) a [cost=6*6*6(The spaces left at the end of each line are cubed which will be the cost) ] sample [cost=1*1*1] text [c

首先是一些背景知识,我刚开始学习算法(我现在觉得我缺乏擅长的逻辑和推理能力),我一直在尝试将“这是一个示例文本”打印到不同的行中,每行最多7个字符,因此第一行将具有:

this is  (no spaces left in the end so cost 0)
a  
[cost=6*6*6(The spaces left at the end of each line are cubed which will be the cost) ]
sample [cost=1*1*1]
text [cost= 3*3*3]

(Total cost = 0+216+1+27=244)
现在可以通过

this [cost 3*3*3]
is a [cost 3*3*3]
sample [cost 1*1*1]
text [cost 3*3*3]

[Total cost = 27+27+1+27 = 82]
很明显,我们不能用贪婪的方法来代替动态规划,但我的问题是,我无法找出将被重用的子结构。我真的很想弄清楚我是如何将成本条件与python中的打印联系起来的,我可以为每个单词编制索引,我可以得到每个单词的长度,我有点想知道打印时我接下来要做什么,所有发生的事情是整个字符串都打印在一行上(这是我到目前为止得到的结果)。 我很抱歉,如果这是一个非常愚蠢的问题,但我被卡住了,真的需要一些帮助。 谢谢


这就是我尝试实现代码的方式虽然我试着在代码上运行一些测试,但是测试是由我的朋友编写的,我认为我没有得到正确的结果。任何帮助或建议都是非常感谢的 print_test.py

 import os
 import sys
 from glob import glob

  #TODO -- replace this with your solution 
 from printing import print_neatly

 log = open('output.log', 'w')

 #This tests the code against my own text
 maxline = 80
 for source in glob('*.txt'):
 with open(source) as f:
    fulltext = f.read()

 words = fulltext.split()
 (cost, text) = print_neatly(words, maxline)

 #double check the cost
 #lines = text.split('\n')
 truecost = 0
 for line in text[0:-1]:
    truecost += (maxline - len(line))**3


   #print the output and cost
   print >>log, '----------------------'
   print >>log, source
   print >>log, '----------------------'
   print >>log, text
   print >>log, '----------------------'
   print >>log, 'cost = ', cost
   print >>log, 'true cost = ', truecost
   print >>log, '----------------------'


log.close()

#print the log
with open('output.log') as f: print f.read()
打印.py

def print_neatly(wordlist, max):
   #strings='This is a sample string'

   #splitting the string and taking out words from it 
   #wordlist=strings.split()
   (cost, dyn_print) = print_line(wordlist, len(wordlist), max)
   for dyn in dyn_print:
      print dyn
   return cost, dyn_print

 def cost(lines, max):

    return sum([(max-len(x)) ** 3 for x in lines])

 def print_line(wordlist, count, max, results = {}):
  results = [([],0)]
  for count in range(1, len(wordlist) + 1):
    best = wordlist[:count]               
    best_cost = cost(best, max)
    mycount = count - 1
    line = wordlist[mycount]       
    while len(line) <= max: 
        attempt, attempt_cost = results[mycount]
        attempt = attempt + [line]
        attempt_cost += cost([line],max)
        if attempt_cost < best_cost:
            best = attempt
            best_cost = attempt_cost
        if mycount > 0:
            mycount -= 1
            line = wordlist[mycount] + ' ' + line
        else:
            break
    results += [(best, best_cost)]

 #print best
 #print best_cost
 return (best_cost, best)


#print_neatly(0,7)
def打印整齐(字表,最大):
#strings='这是一个示例字符串'
#拆分字符串并从中取出单词
#wordlist=strings.split()
(成本,动态打印)=打印行(字表,len(字表),最大值)
对于dyn in dyn_打印:
打印动态
退货成本,动态打印
def成本(行,最大值):
返回和([(行中x的最大长度(x))**3])
def print_行(字表、计数、最大值、结果={}):
结果=[([],0)]
对于范围(1,len(单词列表)+1)内的计数:
最佳=单词列表[:计数]
最佳成本=成本(最佳,最大)
mycount=count-1
line=wordlist[mycount]
而len(行)0:
mycount-=1
line=wordlist[mycount]+''+行
其他:
打破
结果+=[(最佳、最佳成本)]
#最佳印刷品
#打印最佳成本
退货(最佳成本,最佳)
#打印整齐(0,7)
需要测试的文本文件给了我这个输出,这里两个成本需要相同,我没有得到,有人能指出我哪里出了问题吗


成本=16036


真实成本=15911一次方法是列出所有可能的备选方案,并选择成本最低的方案:

from functools import wraps

def cache(origfunc):
    d = {}
    @wraps(origfunc)
    def wrapper(*args):
        if args in d:
            return d[args]
        result = origfunc(*args)
        d[args] = result
        return result
    return wrapper

@cache
def alternatives(t, m=7):
    ''' Given a tuple of word lengths and a maximum line length,
        return a list of all possible line groupings
        showing the total length of each line.

        >>> alternatives((4, 2, 1, 3), 7)
        [[4, 2, 1, 3], [4, 2, 5], [4, 4, 3], [7, 1, 3], [7, 5]]

    '''
    if not t:
        return []
    alts = []
    s = 0
    for i, x in enumerate(t):
        s += x
        if s > m:
            break
        tail = t[i+1:]
        if not tail:
            alts.append([s])
            break
        for subalt in alternatives(tail, m):
            alts.append([s] + subalt)
        s += 1
    return alts

def cost(t, m=7):
    ''' Evaluate the cost of lines given to line lengths

            >>> cost((7, 1, 6, 4), m=7)  # 'this is', 'a', 'sample', 'text'
            244
            >>> cost((4, 4, 6, 4))       # 'this', 'is a', 'sample', 'text'
            82

    '''
    return sum((m - x) ** 3 for x in t)

def textwrap(s, m=7):
    ''' Given a string, result a list of strings with optimal line wrapping

        >>> print textwrap('This is a sample text', 7)
        ['This', 'is a', 'sample', 'text']

    '''
    words = s.split()
    t = tuple(map(len, words))
    lengths = min(alternatives(t, m), key=cost)
    result = []
    worditer = iter(words)
    for length in lengths:
        line = []
        s = 0
        while s < length:
            word = next(worditer)
            line.append(word)
            s += len(word) + 1
        result.append(' '.join(line))
    return result


if __name__ == '__main__':
    import doctest
    print doctest.testmod()
从functools导入包装
def缓存(origfunc):
d={}
@包裹(origfunc)
def包装(*args):
如果参数在d中:
返回d[args]
结果=origfunc(*args)
d[args]=结果
返回结果
返回包装器
@缓存
def替代品(t,m=7):
''给定单词长度的元组和最大行长度,
返回所有可能行分组的列表
显示每行的总长度。
>>>备选方案(4,2,1,3,7)
[[4, 2, 1, 3], [4, 2, 5], [4, 4, 3], [7, 1, 3], [7, 5]]
'''
如果不是t:
返回[]
alts=[]
s=0
对于枚举(t)中的i,x:
s+=x
如果s>m:
打破
tail=t[i+1:]
如果不是尾部:
备降附加([s])
打破
对于备选方案中的子盐(尾部,m):
附加高度([s]+附加高度)
s+=1
返回高度
def成本(t,m=7):
''评估给定线路长度的线路成本
>>>成本((7,1,6,4),m=7)#“这是”、“a”、“样本”、“文本”
244
>>>成本((4,4,6,4))#“这”、“是一个”、“示例”、“文本”
82
'''
返回和((m-x)**3代表t中的x)
def textwrap(s,m=7):
''给定一个字符串,结果为具有最佳换行的字符串列表
>>>打印文本包装('这是一个示例文本',7)
['This'、'isa'、'sample'、'text']
'''
words=s.split()
t=元组(映射(len,words))
长度=最小值(备选方案(t,m),关键值=成本)
结果=[]
worditer=iter(words)
对于长度:
行=[]
s=0
当s<长度时:
word=next(worditer)
行。追加(word)
s+=len(字)+1
result.append(“”.join(行))
返回结果
如果uuuu name uuuuuu='\uuuuuuu main\uuuuuuu':
进口医生测试
打印doctest.testmod()
通过限制备选方案搜索的数量(可能限制为每行上三个最长的备选方案),可以加快代码的速度。

如果有“最佳”方式将一个单词、两个单词等排列成行,则不会根据后面的行数而改变。如果单词足够小,可以与其他单词连成一行,则可以根据后面出现的单词进行更改。但是如果我们把这些词孤立起来,并试图把它们排成一行,那么同样的一组解决方案将永远是最优的。(可能会有相同的答案;例如,根据标准,7字符行上的“戴帽子的猫”有两种解决方案。两者都是“最好的”,而且永远都是——我们可以选择其中一种并坚持下去,而不牺牲正确性。)

  • “This”
    总是最好的,因为
    [“This”]
    。(注意,我并不是说一行字总是最好的!我要说的是,如果你有一个字,最好的排列方式就是一行字。)

  • “这是”
    可以安排为
    [“这”、“是”]
    [“这是”]
    。然而,后者是最好的。因此,从现在起,每当我们只考虑这两个词时,我们就可以完全忽略[这个]、“是”——它永远不会优越。

  • “这是一个”
    可以安排为
    [“这”、“是”、“a”]
    [“这是”、“a”]
    ,或
    [“这”、“是一个”]
    。(我们已经知道,
    [“这是”]
    优于
    [“这”,“是”]
    ——参见前面的要点!)结果是
    [“这”,“是”]
    是最好的。因此,我们可以从此忽略[“这是”,“a”]

  • “这是一个示例”def cost(lines, limit):
        # figures the cost of the current arrangement of words in lines.
        return sum([(limit-len(x)) ** 3 for x in lines])
    
    
    def lineify(words, limit):
        # splits up words into lines of at most (limit) chars.
        # should find an optimal solution, assuming all words are < limit chars long
    
        results = [([], 0)]
    
        for count in range(1, len(words) + 1):
            best = words[:count]         # (start off assuming one word per line)
            best_cost = cost(best, limit)
            mycount = count - 1
            line = words[mycount]        # start with one word
    
            while len(line) <= limit:
                # figure the optimal cost, assuming the other words are on another line
                attempt, attempt_cost = results[mycount]
                attempt = attempt + [line]
                attempt_cost += cost([line],limit)
                # print attempt
                if attempt_cost < best_cost:
                    best = attempt
                    best_cost = attempt_cost
    
                # steal another word.  if there isn't one, we're done
                if mycount > 0:
                    mycount -= 1
                    line = words[mycount] + ' ' + line
                else:
                    break
    
            # once we have an optimal result for (count) words, save it for posterity
            results += [(best, best_cost)]
    
        return results[len(words)][0]
    
    
    def wrap(phrase, limit):
        # helper function...so the caller doesn't have to pass an array of words.
        # they shouldn't need to know to do that
        words = phrase.split()
        return lineify(words, limit)
    
    import functools
    
    def wrap(text, width):
        """
        >>> wrap('This is a sample text', 7)
        ['This', 'is a', 'sample', 'text']
        """
        return [' '.join(line) for line in best_partition(
            tuple(text.split()), functools.partial(cost, width=width))]
    
    def best_partition(words, cost):
        """The best partition of words into lines according to the cost function."""
        best = [words] # start with all words on a single line
        for i in reversed(range(1, len(words))): # reverse to avoid recursion limit
            lines = [words[:i]] + best_partition(words[i:], cost)
            if cost(lines) < cost(best):
                best = lines
        return best
    
    def memoize(func):
        cache = {}
        @functools.wraps(func)
        def wrapper(*args):
            try: return cache[args]
            except KeyError:
                ret = cache[args] = func(*args)
                return ret
        return wrapper
    
    best_partition = memoize(best_partition)
    
    def linelen(words):
        """Number of characters in a line created from words."""
        if not words: return 0
        # words + spaces between them
        return sum(map(len, words)) + len(words) - 1
    
    def cost(lines, width):
        """
        - each line except last costs `(width - w)**3`, where `w` is the
          line width
    
        - cost is infinite if `w > width` and the line has more than one word
    
        >>> cost([['a'], ['b']], 1)
        0
        >>> cost([['a','b']], 1)
        inf
        >>> cost([['a'], ['b']], 3)
        8
        >>> cost([['a', 'b']], 2)
        inf
        """
        if not lines: return 0
        s = 0
        for i, words in enumerate(lines, 1):
            w = linelen(words)
            if width >= w:
                if i != len(lines): # last line has zero cost
                    s += (width - w)**3
            elif len(words) != 1: # more than one word in the line
                return float("inf") # penalty for w > width
        return s
    
    print('\n'.join(wrap("""
        In olden times when wishing still helped one, there lived a king whose
        daughters were all beautiful, but the youngest was so beautiful that
        the sun itself, which has seen so much, was astonished whenever it
        shone in her face. Close by the king's castle lay a great dark forest,
        and under an old lime-tree in the forest was a well, and when the day
        was very warm, the king's child went out into the forest and sat down
        by the side of the cool fountain, and when she was bored she took a
        golden ball, and threw it up on high and caught it, and this ball was
        her favorite plaything.
        """, int(sys.argv[1]) if len(sys.argv) > 1 else 70)))
    
    In olden times when wishing still helped one, there lived a king whose daughters were all beautiful, but the youngest was so beautiful that the sun itself, which has seen so much, was astonished whenever it shone in her face. Close by the king's castle lay a great dark forest, and under an old lime-tree in the forest was a well, and when the day was very warm, the king's child went out into the forest and sat down by the side of the cool fountain, and when she was bored she took a golden ball, and threw it up on high and caught it, and this ball was her favorite plaything.