一旦超过某个宽度,就沿空格断开字符串(Python)
我正在基础图像上渲染文本 一个核心要求是,每当字符的总宽度超过基本图像的宽度时,字符串都要换行到下一行。我通过以下正确工作的代码段来实现这一点:一旦超过某个宽度,就沿空格断开字符串(Python),python,word-wrap,Python,Word Wrap,我正在基础图像上渲染文本 一个核心要求是,每当字符的总宽度超过基本图像的宽度时,字符串都要换行到下一行。我通过以下正确工作的代码段来实现这一点: base_width, base_height = base_img.size font = ImageFont.truetype("/usr/share/fonts/truetype/dejavu/DejaVuSans.ttf", font_size) line_width = 0 line_count = 1 lines = [] string =
base_width, base_height = base_img.size
font = ImageFont.truetype("/usr/share/fonts/truetype/dejavu/DejaVuSans.ttf", font_size)
line_width = 0
line_count = 1
lines = []
string = ""
for c in text:
line_width += font.getsize(c)[0]
string+=str(c)
if line_width > base_width:
lines.append(string)
string = ""
line_width = 0
line_count += 1
if string:
lines.append(string)
结果行
是通过分解原始字符串获得的子字符串列表
现在我需要改进这个算法
问题是它在单词中间打断了行。例如,字符串lorem ipsum
可能以行=['lorem ip','sum']
结尾。相反,对我来说,理想的分手方式是更加人性化的行=['lorem','ipsum']
,或行=['lorem','ipsum']
换言之,我想沿着空格而不是中间的单词来断线。有人能给我举个例子说明我是如何做到这一点的吗?我似乎无法理解这一点。有一个专门用于此的python模块:
In [1]: import textwrap
In [2]: textwrap.wrap('x lorem ipsum', width=5)
Out[2]: ['x', 'lorem', 'ipsum']
编辑:
我误解了作者的意图。问题在于,行宽不是定义为字符数,而是定义为包含渲染文本的图像的宽度。我想出了一个黑客方法,实现了一个具有正确宽度定义的自定义字符串类,并稍微修改了TextWrapper
类:
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import textwrap
class MyTextWrapper(textwrap.TextWrapper):
def _split(self, text):
assert isinstance(text, StringWithWidth)
return [StringWithWidth(i) for i in super()._split(text._str)]
def _wrap_chunks(self, chunks):
# only modify ''.join in the original code
"""_wrap_chunks(chunks : [string]) -> [string]
Wrap a sequence of text chunks and return a list of lines of
length 'self.width' or less. (If 'break_long_words' is false,
some lines may be longer than this.) Chunks correspond roughly
to words and the whitespace between them: each chunk is
indivisible (modulo 'break_long_words'), but a line break can
come between any two chunks. Chunks should not have internal
whitespace; ie. a chunk is either all whitespace or a "word".
Whitespace chunks will be removed from the beginning and end of
lines, but apart from that whitespace is preserved.
"""
lines = []
if self.width <= 0:
raise ValueError("invalid width %r (must be > 0)" % self.width)
if self.max_lines is not None:
if self.max_lines > 1:
indent = self.subsequent_indent
else:
indent = self.initial_indent
if len(indent) + len(self.placeholder.lstrip()) > self.width:
raise ValueError("placeholder too large for max width")
# Arrange in reverse order so items can be efficiently popped
# from a stack of chucks.
chunks.reverse()
while chunks:
# Start the list of chunks that will make up the current line.
# cur_len is just the length of all the chunks in cur_line.
cur_line = []
cur_len = 0
# Figure out which static string will prefix this line.
if lines:
indent = self.subsequent_indent
else:
indent = self.initial_indent
# Maximum width for this line.
width = self.width - len(indent)
# First chunk on line is whitespace -- drop it, unless this
# is the very beginning of the text (ie. no lines started yet).
if self.drop_whitespace and chunks[-1].strip() == '' and lines:
del chunks[-1]
while chunks:
l = len(chunks[-1])
# Can at least squeeze this chunk onto the current line.
if cur_len + l <= width:
cur_line.append(chunks.pop())
cur_len += l
# Nope, this line is full.
else:
break
# The current line is full, and the next chunk is too big to
# fit on *any* line (not just this one).
if chunks and len(chunks[-1]) > width:
self._handle_long_word(chunks, cur_line, cur_len, width)
cur_len = sum(map(len, cur_line))
# If the last chunk on this line is all whitespace, drop it.
if self.drop_whitespace and cur_line and cur_line[-1].strip() == '':
cur_len -= len(cur_line[-1])
del cur_line[-1]
if cur_line:
if (self.max_lines is None or
len(lines) + 1 < self.max_lines or
(not chunks or
self.drop_whitespace and
len(chunks) == 1 and
not chunks[0].strip()) and cur_len <= width):
# Convert current line back to a string and store it in
# list of all lines (return value).
lines.append(StringWithWidth(
indent + ''.join(map(_as_str, cur_line))))
else:
while cur_line:
if (cur_line[-1].strip() and
cur_len + len(self.placeholder) <= width):
cur_line.append(self.placeholder)
lines.append(StringWithWidth(
indent + ''.join(map(_as_str, cur_line))))
break
cur_len -= len(cur_line[-1])
del cur_line[-1]
else:
if lines:
prev_line = lines[-1].rstrip()
if (len(prev_line) + len(self.placeholder) <=
self.width):
lines[-1] = prev_line + self.placeholder
break
lines.append(indent + self.placeholder.lstrip())
break
return lines
def _make_str_fwd(name):
def func(self, *args, **kwargs):
return StringWithWidth(getattr(self._str, name)(*args, **kwargs))
func.__name__ = name
return func
def _as_str(val):
if isinstance(val, StringWithWidth):
val = val._str
assert isinstance(val, str)
return val
class StringWithWidth:
char_width = {
'x': 1,
's': 2,
' ': 1
}
def __init__(self, s):
self._str = s
expandtabs = _make_str_fwd('expandtabs')
translate = _make_str_fwd('translate')
strip = _make_str_fwd('strip')
__getitem__ = _make_str_fwd('__getitem__')
def __eq__(self, rhs):
return self._str == _as_str(rhs)
def __add__(self, rhs):
return StringWithWidth(self._str + _as_str(rhs))
def __len__(self):
return sum(map(self.char_width.__getitem__, self._str))
def __repr__(self):
return repr(self._str)
def main():
print(MyTextWrapper(width=8).wrap(StringWithWidth('x ss s')))
if __name__ == '__main__':
main()
#/usr/bin/env蟒蛇3
#-*-编码:utf-8-*-
导入文本包装
类MyTextWrapper(textwrap.TextWrapper):
def_分割(自身,文本):
断言isinstance(文本,StringWithWidth)
返回[StringWithWidth(i)for i in super()。\u split(text.\u str)]
def_wrap_块(self,chunks):
#仅修改“”。在原始代码中加入
“”“\u wrap\u块(块:[string])->[string]
换行一系列文本块,并返回一个文本行列表
长度为'self.width'或更小。(如果'break\u long\u words'为假,
有些行可能比这个长。)块大致对应
对于单词及其之间的空格:每个块都是
不可分割(模'break_long_words'),但换行符可以
位于任意两个块之间。块不应具有内部
空白;也就是说,块不是全部空白就是一个“单词”。
将从文件的开头和结尾删除空白块
行,但除此之外,保留空白。
"""
行=[]
如果自宽为1:
缩进=自缩进
其他:
缩进=自初始缩进
如果len(缩进)+len(self.placeholder.lstrip())>self.width:
raise VALUERROR(“占位符对于最大宽度而言太大”)
#按相反顺序排列,以便有效地弹出项目
#从一堆夹头上。
chunks.reverse()
而区块:
#开始组成当前行的块列表。
#cur_len就是cur_行中所有块的长度。
cur_line=[]
cur_len=0
#找出哪一个静态字符串将作为此行的前缀。
如果行:
缩进=自缩进
其他:
缩进=自初始缩进
#此行的最大宽度。
宽度=自身宽度-长度(缩进)
#行中的第一个块是空白——删除它,除非
#是课文的开头(即还没有开始行)。
如果self.drop_空格和块[-1].strip()==''和行:
del块[-1]
而区块:
l=len(块[-1])
#至少可以将该块压缩到当前行。
如果当前长度+l宽度:
self.\u handle\u long\u word(块、cur\u line、cur\u len、宽度)
cur_len=总和(映射(len,cur_line))
#如果这一行的最后一个区块都是空白,请删除它。
如果self.drop_空格和cur_行以及cur_行[-1]。strip()='':
cur_len-=len(cur_线[-1])
德尔库鲁线[-1]
如果当前线路:
如果(self.max_line)为无或
len(线)+1<自最大线或
(不是块或块)
self.drop_空格和
len(块)==1和
不是chunk[0].strip())和cur_len尽管我认为递归可能更优雅,但这种简单的方法可能会有所帮助。请注意,字符宽度的固定值需要替换为相应的函数调用
#!蟒蛇3
#编码=utf-8
“换行演示”
text=“lorem ipsum dolor sit amet blablah”
打印(“断开”,文本)
words=text.split()
宽度=[求和([5表示w中的c])表示w中的文字]
#印刷品(字)
#打印(宽度)
印刷品(文字),“文字”)
对于[10,25,55,80100120]中的wmax:
打印(“\n最大线宽:”,wmax)
行=[]
li=0
r=范围(长度(宽度))#0…5
对于r中的i:
w=总和(宽度[li:i+1])
如果w>=wmax且i>0:
line.append(“.”join(单词[li:i]))
li=i
打印(“--”)
w=总和(宽度[li:i+1])
打印(“,i,字[i],宽度[i],w)
#余数
line.append(“.”join(单词[li:]))
打印(行)
输出:
10
text lorem ipsum dolor sit amet blablah
string lor
split ['lor']
lines ['lor']
string
text lorem ipsum dolor sit amet blablah
string em
split ['em', '']
lines ['lor', 'em']
string
text lorem ipsum dolor sit amet blablah
string ips
split ['ips']
lines ['lor', 'em', 'ips']
string
text lorem ipsum dolor sit amet blablah
string um
split ['um', '']
lines ['lor', 'em', 'ips', 'um']
string
text lorem ipsum dolor sit amet blablah
string dol
split ['dol']
lines ['lor', 'em', 'ips', 'um', 'dol']
string
text lorem ipsum dolor sit amet blablah
string or
split ['or', '']
lines ['lor', 'em', 'ips', 'um', 'dol', 'or']
string
text lorem ipsum dolor sit amet blablah
string sit
split ['sit']
lines ['lor', 'em', 'ips', 'um', 'dol', 'or', 'sit']
string
text lorem ipsum dolor sit amet blablah
string am
split ['', 'am']
lines ['lor', 'em', 'ips', 'um', 'dol', 'or', 'sit', '']
string am
text lorem ipsum dolor sit amet blablah
string ame
split ['ame']
lines ['lor', 'em', 'ips', 'um', 'dol', 'or', 'sit', '', 'ame']
string
text lorem ipsum dolor sit amet blablah
string t b
split ['t', 'b']
lines ['lor', 'em', 'ips', 'um', 'dol', 'or', 'sit', '', 'ame', 't']
string b
text lorem ipsum dolor sit amet blablah
string bla
split ['bla']
lines ['lor', 'em', 'ips', 'um', 'dol', 'or', 'sit', '', 'ame', 't', 'bla']
string
text lorem ipsum dolor sit amet blablah
string bla
split ['bla']
lines ['lor', 'em', 'ips', 'um', 'dol', 'or', 'sit', '', 'ame', 't', 'bla', 'bla
string
['lor', 'em', 'ips', 'um', 'dol', 'or', 'sit', '', 'ame', 't', 'bla', 'bla', 'h']
25
text lorem ipsum dolor sit amet blablah
string lorem
split ['lorem', '']
lines ['lorem']
string
text lorem ipsum dolor sit amet blablah
string ipsum
split ['ipsum', '']
lines ['lorem', 'ipsum']
string
text lorem ipsum dolor sit amet blablah
string dolor
split ['dolor', '']
lines ['lorem', 'ipsum', 'dolor']
string
text lorem ipsum dolor sit amet blablah
string sit am
split ['sit', 'am']
lines ['lorem', 'ipsum', 'dolor', 'sit']
string am
text lorem ipsum dolor sit amet blablah
string amet b
split ['amet', 'b']
lines ['lorem', 'ipsum', 'dolor', 'sit', 'amet']
string b
text lorem ipsum dolor sit amet blablah
string blabla
split ['blabla']
lines ['lorem', 'ipsum', 'dolor', 'sit', 'amet', 'blabla']
string
['lorem', 'ipsum', 'dolor', 'sit', 'amet', 'blabla', 'h']
55
text lorem ipsum dolor sit amet blablah
string lorem ipsum
split ['lorem ipsum', '']
lines ['lorem ipsum']
string
text lorem ipsum dolor sit amet blablah
string dolor sit am
split ['dolor sit', 'am']
lines ['lorem ipsum', 'dolor sit']
string am
text lorem ipsum dolor sit amet blablah
string amet blablah
split ['amet', 'blablah']
lines ['lorem ipsum', 'dolor sit', 'amet']
string blablah
['lorem ipsum', 'dolor sit', 'amet', 'blablah']
80
text lorem ipsum dolor sit amet blablah
string lorem ipsum dolor
split ['lorem ipsum', 'dolor']
lines ['lorem ipsum']
string dolor
text lorem ipsum dolor sit amet blablah
string dolor sit amet bl
split ['dolor sit amet', 'bl']
lines ['lorem ipsum', 'dolor sit amet']
string bl
['lorem ipsum', 'dolor sit amet', 'blablah']
100
text lorem ipsum dolor sit amet blablah
string lorem ipsum dolor sit
split ['lorem ipsum dolor', 'sit']
lines ['lorem ipsum dolor']
string sit
['lorem ipsum dolor', 'sit amet blablah']
120
text lorem ipsum dolor sit amet blablah
string lorem ipsum dolor sit ame
split ['lorem ipsum dolor sit', 'ame']
lines ['lorem ipsum dolor sit']
string ame
['lorem ipsum dolor sit', 'amet blablah']
打破知识产权不受欢迎
6个字
最大线宽:10
0洛雷姆25
---
1同侧25
---
2多洛25
---
3坐15 15
---
4 amet 20 20
---
5胡说八道35 35
['lorem','ipsum','dolor','sit','amet','blablah']
最大线宽:25
0洛雷姆25
---
1同侧25
---
2多洛25
---
3坐15 15
---
4 amet 20 20
---
5胡说八道35 35
['lorem','ipsum','dolor','sit','amet','blablah']
最大线宽:55
0洛雷姆25
1同侧25 50
---
2多洛25
3坐15 40
10
text lorem ipsum dolor sit amet blablah
string lor
split ['lor']
lines ['lor']
string
text lorem ipsum dolor sit amet blablah
string em
split ['em', '']
lines ['lor', 'em']
string
text lorem ipsum dolor sit amet blablah
string ips
split ['ips']
lines ['lor', 'em', 'ips']
string
text lorem ipsum dolor sit amet blablah
string um
split ['um', '']
lines ['lor', 'em', 'ips', 'um']
string
text lorem ipsum dolor sit amet blablah
string dol
split ['dol']
lines ['lor', 'em', 'ips', 'um', 'dol']
string
text lorem ipsum dolor sit amet blablah
string or
split ['or', '']
lines ['lor', 'em', 'ips', 'um', 'dol', 'or']
string
text lorem ipsum dolor sit amet blablah
string sit
split ['sit']
lines ['lor', 'em', 'ips', 'um', 'dol', 'or', 'sit']
string
text lorem ipsum dolor sit amet blablah
string am
split ['', 'am']
lines ['lor', 'em', 'ips', 'um', 'dol', 'or', 'sit', '']
string am
text lorem ipsum dolor sit amet blablah
string ame
split ['ame']
lines ['lor', 'em', 'ips', 'um', 'dol', 'or', 'sit', '', 'ame']
string
text lorem ipsum dolor sit amet blablah
string t b
split ['t', 'b']
lines ['lor', 'em', 'ips', 'um', 'dol', 'or', 'sit', '', 'ame', 't']
string b
text lorem ipsum dolor sit amet blablah
string bla
split ['bla']
lines ['lor', 'em', 'ips', 'um', 'dol', 'or', 'sit', '', 'ame', 't', 'bla']
string
text lorem ipsum dolor sit amet blablah
string bla
split ['bla']
lines ['lor', 'em', 'ips', 'um', 'dol', 'or', 'sit', '', 'ame', 't', 'bla', 'bla
string
['lor', 'em', 'ips', 'um', 'dol', 'or', 'sit', '', 'ame', 't', 'bla', 'bla', 'h']
25
text lorem ipsum dolor sit amet blablah
string lorem
split ['lorem', '']
lines ['lorem']
string
text lorem ipsum dolor sit amet blablah
string ipsum
split ['ipsum', '']
lines ['lorem', 'ipsum']
string
text lorem ipsum dolor sit amet blablah
string dolor
split ['dolor', '']
lines ['lorem', 'ipsum', 'dolor']
string
text lorem ipsum dolor sit amet blablah
string sit am
split ['sit', 'am']
lines ['lorem', 'ipsum', 'dolor', 'sit']
string am
text lorem ipsum dolor sit amet blablah
string amet b
split ['amet', 'b']
lines ['lorem', 'ipsum', 'dolor', 'sit', 'amet']
string b
text lorem ipsum dolor sit amet blablah
string blabla
split ['blabla']
lines ['lorem', 'ipsum', 'dolor', 'sit', 'amet', 'blabla']
string
['lorem', 'ipsum', 'dolor', 'sit', 'amet', 'blabla', 'h']
55
text lorem ipsum dolor sit amet blablah
string lorem ipsum
split ['lorem ipsum', '']
lines ['lorem ipsum']
string
text lorem ipsum dolor sit amet blablah
string dolor sit am
split ['dolor sit', 'am']
lines ['lorem ipsum', 'dolor sit']
string am
text lorem ipsum dolor sit amet blablah
string amet blablah
split ['amet', 'blablah']
lines ['lorem ipsum', 'dolor sit', 'amet']
string blablah
['lorem ipsum', 'dolor sit', 'amet', 'blablah']
80
text lorem ipsum dolor sit amet blablah
string lorem ipsum dolor
split ['lorem ipsum', 'dolor']
lines ['lorem ipsum']
string dolor
text lorem ipsum dolor sit amet blablah
string dolor sit amet bl
split ['dolor sit amet', 'bl']
lines ['lorem ipsum', 'dolor sit amet']
string bl
['lorem ipsum', 'dolor sit amet', 'blablah']
100
text lorem ipsum dolor sit amet blablah
string lorem ipsum dolor sit
split ['lorem ipsum dolor', 'sit']
lines ['lorem ipsum dolor']
string sit
['lorem ipsum dolor', 'sit amet blablah']
120
text lorem ipsum dolor sit amet blablah
string lorem ipsum dolor sit ame
split ['lorem ipsum dolor sit', 'ame']
lines ['lorem ipsum dolor sit']
string ame
['lorem ipsum dolor sit', 'amet blablah']
def break_lines(img, text, font_size):
base_width = img.size[0]
font = ImageFont.truetype("/usr/share/fonts/truetype/dejavu/DejaVuSans.ttf", font_size)
line = ""
lines = []
width_of_line = 0
number_of_lines = 0
# break string into multi-lines that fit base_width
for token in text.split():
token = token+' '
token_width = font.getsize(token)[0]
if width_of_line+token_width < base_width:
line+=token
width_of_line+=token_width
else:
lines.append(line)
number_of_lines += 1
width_of_line = 0
line = ""
line+=token
width_of_line+=token_width
if line:
lines.append(line)
number_of_lines += 1
return lines, number_of_lines
#!python3
#coding=utf-8
""" Line break demo 3 """
def charwidth(char):
return 5
def stringwidth(string):
return sum( [charwidth(char) for char in string] )
text = "lorem ipsum dolor sit amet blablah"
limit = 60
words = text.split()
lines = [[]]
while( words ):
word = words.pop(0)
if stringwidth( " ".join(lines[-1]) ) + 1 + stringwidth(word) < limit:
lines[-1].append(word)
else:
lines.append( [word] )
print( [" ".join(words) for words in lines ] )
def text_width(text, font):
return font.getsize(text)[0]
# Set max_lines to 0 for no limit
def wrap_text(text, font, max_width, max_lines=0):
words = text.split()
lines = []
while(words):
word = words.pop(0)
# Append word if it's not too long
if len(lines) > 0 and (text_width(" ".join(lines[-1]), font) + 1 + text_width(word,font)) < max_width:
lines[-1].append(word)
else:
# Brute-force: chunkify word until it fits
chunk = len(word)
while chunk > 0:
while (text_width(word[:chunk],font) > max_width and chunk > 1):
chunk -= 1
lines.append( [word[:chunk]] )
word = word[chunk:]
chunk = len(word)
lines = [" ".join(words) for words in lines]
if max_lines and len(lines) > max_lines:
lines[max_lines-1] = lines[max_lines-1][:-1] + "..."
return "\n".join(lines[:max_lines])