Warning: file_get_contents(/data/phpspider/zhask/data//catemap/3/html/90.json): failed to open stream: No such file or directory in /data/phpspider/zhask/libs/function.php on line 167

Warning: Invalid argument supplied for foreach() in /data/phpspider/zhask/libs/tag.function.php on line 1116

Notice: Undefined index: in /data/phpspider/zhask/libs/function.php on line 180

Warning: array_chunk() expects parameter 1 to be array, null given in /data/phpspider/zhask/libs/function.php on line 181
在使用python正则表达式模块的行中查找定位标记时出现问题 >数据 ') (' >>>next=re.findall(“”,数据) >>>下一个 (w/w/索引。php)网站的中文中文/w/索引。php(w/w/索引)网站的中文中文/索引。php(w/w/索引)网站的中文/w/索引。php(w/w/索引)网站的中文/索引。php(标题)的中文/索引。中文:标题:E0%6%6%6%6%6%6%6%6%6%6%6%6%6%6%6%6%6%6%6%6%6%6%6%6%6%6%6%6%6%6%6%6%6%6%6%6%6%6%6%6%6%6%6%6%6%6%6%6%6%6%6%6%6%6%6%0 0%0%0%0%6%的10%0%0%0%6%的10%的10%的10%的10%6%6%6%6%6%6%0%0%6%E0%A6%AE%E0%A7%82%E0%A6%B9/%E0%A6%9F%E0%A7%87%E0%A6%AE%E0%A6%AA%E0%A7%8D%E0%A6%B2%E0%A7%87%E0%9F:%E0%A6%AC%E0%A6%A6%BF%E0%E0%A6%B7%A6%AF%E0%A6%BC%E0%A6%B6%E0%A7%8D%E0%A6%B0%E0%A7%87%E0%A6%A3%E0%A7%80%E0%A6%B9%E0%A7%80%E0%A6%A8&;from=0&;hidelinks=1']_Python_Html_Beautifulsoup - Fatal编程技术网

在使用python正则表达式模块的行中查找定位标记时出现问题 >数据 ') (' >>>next=re.findall(“”,数据) >>>下一个 (w/w/索引。php)网站的中文中文/w/索引。php(w/w/索引)网站的中文中文/索引。php(w/w/索引)网站的中文/w/索引。php(w/w/索引)网站的中文/索引。php(标题)的中文/索引。中文:标题:E0%6%6%6%6%6%6%6%6%6%6%6%6%6%6%6%6%6%6%6%6%6%6%6%6%6%6%6%6%6%6%6%6%6%6%6%6%6%6%6%6%6%6%6%6%6%6%6%6%6%6%6%6%6%6%6%0 0%0%0%0%6%的10%0%0%0%6%的10%的10%的10%的10%6%6%6%6%6%6%0%0%6%E0%A6%AE%E0%A7%82%E0%A6%B9/%E0%A6%9F%E0%A7%87%E0%A6%AE%E0%A6%AA%E0%A7%8D%E0%A6%B2%E0%A7%87%E0%9F:%E0%A6%AC%E0%A6%A6%BF%E0%E0%A6%B7%A6%AF%E0%A6%BC%E0%A6%B6%E0%A7%8D%E0%A6%B0%E0%A7%87%E0%A6%A3%E0%A7%80%E0%A6%B9%E0%A7%80%E0%A6%A8&;from=0&;hidelinks=1']

在使用python正则表达式模块的行中查找定位标记时出现问题 >数据 ') (' >>>next=re.findall(“”,数据) >>>下一个 (w/w/索引。php)网站的中文中文/w/索引。php(w/w/索引)网站的中文中文/索引。php(w/w/索引)网站的中文/w/索引。php(w/w/索引)网站的中文/索引。php(标题)的中文/索引。中文:标题:E0%6%6%6%6%6%6%6%6%6%6%6%6%6%6%6%6%6%6%6%6%6%6%6%6%6%6%6%6%6%6%6%6%6%6%6%6%6%6%6%6%6%6%6%6%6%6%6%6%6%6%6%6%6%6%6%0 0%0%0%0%6%的10%0%0%0%6%的10%的10%的10%的10%6%6%6%6%6%6%0%0%6%E0%A6%AE%E0%A7%82%E0%A6%B9/%E0%A6%9F%E0%A7%87%E0%A6%AE%E0%A6%AA%E0%A7%8D%E0%A6%B2%E0%A7%87%E0%9F:%E0%A6%AC%E0%A6%A6%BF%E0%E0%A6%B7%A6%AF%E0%A6%BC%E0%A6%B6%E0%A7%8D%E0%A6%B0%E0%A7%87%E0%A6%A3%E0%A7%80%E0%A6%B9%E0%A7%80%E0%A6%A8&;from=0&;hidelinks=1'],python,html,beautifulsoup,Python,Html,Beautifulsoup,我试图找到第二个锚标记中的内容,但为什么我要从第一个标记中获取数据?您为什么要使用正则表达式解析HTML?为什么不?请阅读 您应该使用beautifulsou例如: >>> data '<a href="/w/index.php?title=%E0%A6%AC%E0%A6%BF%E0%A6%B6%E0%A7%87%E0%A6%B7:%E0%A6%B8%E0%A6%82%E0%A6%AF%E0%A7%8B%E0%A6%97%E0%A6%95%E0%A6%BE%E0


我试图找到第二个锚标记中的内容,但为什么我要从第一个标记中获取数据?

您为什么要使用正则表达式解析HTML?为什么不?请阅读

您应该使用
beautifulsou
例如:

>>> data
'<a href="/w/index.php?title=%E0%A6%AC%E0%A6%BF%E0%A6%B6%E0%A7%87%E0%A6%B7:%E0%A6%B8%E0%A6%82%E0%A6%AF%E0%A7%8B%E0%A6%97%E0%A6%95%E0%A6%BE%E0%A6%B0%E0%A7%80_%E0%A6%AA%E0%A7%83%E0%A6%B7%E0%A7%8D%E0%A6%A0%E0%A6%BE%E0%A6%B8%E0%A6%AE%E0%A7%82%E0%A6%B9/%E0%A6%9F%E0%A7%87%E0%A6%AE%E0%A6%AA%E0%A7%8D%E0%A6%B2%E0%A7%87%E0%A6%9F:%E0%A6%AC%E0%A6%BF%E0%A6%B7%E0%A6%AF%E0%A6%BC%E0%A6%B6%E0%A7%8D%E0%A6%B0%E0%A7%87%E0%A6%A3%E0%A7%80%E0%A6%B9%E0%A7%80%E0%A6%A8&amp;from=0&amp;hidelinks=1" title="বিশেষ:সংযোগকারী পৃষ্ঠাসমূহ/টেমপ্লেট:বিষয়শ্রেণীহীন">পূর্ববর্তী ৫০টি</a>) (<a href="/w/index.php?title=%E0%A6%AC%E0%A6%BF%E0%A6%B6%E0%A7%87%E0%A6%B7:%E0%A6%B8%E0%A6%82%E0%A6%AF%E0%A7%8B%E0%A6%97%E0%A6%95%E0%A6%BE%E0%A6%B0%E0%A7%80_%E0%A6%AA%E0%A7%83%E0%A6%B7%E0%A7%8D%E0%A6%A0%E0%A6%BE%E0%A6%B8%E0%A6%AE%E0%A7%82%E0%A6%B9/%E0%A6%9F%E0%A7%87%E0%A6%AE%E0%A6%AA%E0%A7%8D%E0%A6%B2%E0%A7%87%E0%A6%9F:%E0%A6%AC%E0%A6%BF%E0%A6%B7%E0%A6%AF%E0%A6%BC%E0%A6%B6%E0%A7%8D%E0%A6%B0%E0%A7%87%E0%A6%A3%E0%A7%80%E0%A6%B9%E0%A7%80%E0%A6%A8&amp;from=950505&amp;hidelinks=1&amp;back=776017" title="বিশেষ:সংযোগকারী পৃষ্ঠাসমূহ/টেমপ্লেট:বিষয়শ্রেণীহীন">পরবর্তী ৫০টি</a>'
>>> next = re.findall('<a href="(.*?)".*?>পরবর্তী ৫০টি</a>',data)
>>> next
['/w/index.php?title=%E0%A6%AC%E0%A6%BF%E0%A6%B6%E0%A7%87%E0%A6%B7:%E0%A6%B8%E0%A6%82%E0%A6%AF%E0%A7%8B%E0%A6%97%E0%A6%95%E0%A6%BE%E0%A6%B0%E0%A7%80_%E0%A6%AA%E0%A7%83%E0%A6%B7%E0%A7%8D%E0%A6%A0%E0%A6%BE%E0%A6%B8%E0%A6%AE%E0%A7%82%E0%A6%B9/%E0%A6%9F%E0%A7%87%E0%A6%AE%E0%A6%AA%E0%A7%8D%E0%A6%B2%E0%A7%87%E0%A6%9F:%E0%A6%AC%E0%A6%BF%E0%A6%B7%E0%A6%AF%E0%A6%BC%E0%A6%B6%E0%A7%8D%E0%A6%B0%E0%A7%87%E0%A6%A3%E0%A7%80%E0%A6%B9%E0%A7%80%E0%A6%A8&amp;from=0&amp;hidelinks=1']


不要发布截图,把它作为文本放在你的问题中。为什么你要使用正则表达式,而不是像beautifulsoup这样的专用库?首先,我不知道beautifulsoup。第二,数据变量是一个整页的分段。你的解决方案只给出带有特定索引的锚标记。我需要在页面使用上述模式。还有一件事,这个
amp;
&
转换如何?我使用的是您提供的原始字符串,因此我无法解释
amp;
和其他元素,因为我没有完整的源代码。我已经用你的正则表达式。你在这里做的是,找到所有的锚标记并检查它是否与文本
পরবর্তী ৫০টি
然后返回what's inside
href
。但是当复杂性变得更大时,您使用了一个循环,这是不需要的。具有列表理解的复杂性?使用正则表达式解析HTML是一种复杂性。
from bs4 import BeautifulSoup

a = '<a href="/w/index.php?title=%E0%A6%AC%E0%A6%BF%E0%A6%B6%E0%A7%87%E0%A6%B7:%E0%A6%B8%E0%A6%82%E0%A6%AF%E0%A7%8B%E0%A6%97%E0%A6%95%E0%A6%BE%E0%A6%B0%E0%A7%80_%E0%A6%AA%E0%A7%83%E0%A6%B7%E0%A7%8D%E0%A6%A0%E0%A6%BE%E0%A6%B8%E0%A6%AE%E0%A7%82%E0%A6%B9/%E0%A6%9F%E0%A7%87%E0%A6%AE%E0%A6%AA%E0%A7%8D%E0%A6%B2%E0%A7%87%E0%A6%9F:%E0%A6%AC%E0%A6%BF%E0%A6%B7%E0%A6%AF%E0%A6%BC%E0%A6%B6%E0%A7%8D%E0%A6%B0%E0%A7%87%E0%A6%A3%E0%A7%80%E0%A6%B9%E0%A7%80%E0%A6%A8&amp;from=0&amp;hidelinks=1" title="বিশেষ:সংযোগকারী পৃষ্ঠাসমূহ/টেমপ্লেট:বিষয়শ্রেণীহীন">পূর্ববর্তী ৫০টি</a>) (<a href="/w/index.php?title=%E0%A6%AC%E0%A6%BF%E0%A6%B6%E0%A7%87%E0%A6%B7:%E0%A6%B8%E0%A6%82%E0%A6%AF%E0%A7%8B%E0%A6%97%E0%A6%95%E0%A6%BE%E0%A6%B0%E0%A7%80_%E0%A6%AA%E0%A7%83%E0%A6%B7%E0%A7%8D%E0%A6%A0%E0%A6%BE%E0%A6%B8%E0%A6%AE%E0%A7%82%E0%A6%B9/%E0%A6%9F%E0%A7%87%E0%A6%AE%E0%A6%AA%E0%A7%8D%E0%A6%B2%E0%A7%87%E0%A6%9F:%E0%A6%AC%E0%A6%BF%E0%A6%B7%E0%A6%AF%E0%A6%BC%E0%A6%B6%E0%A7%8D%E0%A6%B0%E0%A7%87%E0%A6%A3%E0%A7%80%E0%A6%B9%E0%A7%80%E0%A6%A8&amp;from=950505&amp;hidelinks=1&amp;back=776017" title="বিশেষ:সংযোগকারী পৃষ্ঠাসমূহ/টেমপ্লেট:বিষয়শ্রেণীহীন">পরবর্তী ৫০টি</a>'

print(BeautifulSoup(a, "html.parser").find_all("a"))
from bs4 import BeautifulSoup

a = '<a href="/w/index.php?title=%E0%A6%AC%E0%A6%BF%E0%A6%B6%E0%A7%87%E0%A6%B7:%E0%A6%B8%E0%A6%82%E0%A6%AF%E0%A7%8B%E0%A6%97%E0%A6%95%E0%A6%BE%E0%A6%B0%E0%A7%80_%E0%A6%AA%E0%A7%83%E0%A6%B7%E0%A7%8D%E0%A6%A0%E0%A6%BE%E0%A6%B8%E0%A6%AE%E0%A7%82%E0%A6%B9/%E0%A6%9F%E0%A7%87%E0%A6%AE%E0%A6%AA%E0%A7%8D%E0%A6%B2%E0%A7%87%E0%A6%9F:%E0%A6%AC%E0%A6%BF%E0%A6%B7%E0%A6%AF%E0%A6%BC%E0%A6%B6%E0%A7%8D%E0%A6%B0%E0%A7%87%E0%A6%A3%E0%A7%80%E0%A6%B9%E0%A7%80%E0%A6%A8&amp;from=0&amp;hidelinks=1" title="বিশেষ:সংযোগকারী পৃষ্ঠাসমূহ/টেমপ্লেট:বিষয়শ্রেণীহীন">পূর্ববর্তী ৫০টি</a>) (<a href="/w/index.php?title=%E0%A6%AC%E0%A6%BF%E0%A6%B6%E0%A7%87%E0%A6%B7:%E0%A6%B8%E0%A6%82%E0%A6%AF%E0%A7%8B%E0%A6%97%E0%A6%95%E0%A6%BE%E0%A6%B0%E0%A7%80_%E0%A6%AA%E0%A7%83%E0%A6%B7%E0%A7%8D%E0%A6%A0%E0%A6%BE%E0%A6%B8%E0%A6%AE%E0%A7%82%E0%A6%B9/%E0%A6%9F%E0%A7%87%E0%A6%AE%E0%A6%AA%E0%A7%8D%E0%A6%B2%E0%A7%87%E0%A6%9F:%E0%A6%AC%E0%A6%BF%E0%A6%B7%E0%A6%AF%E0%A6%BC%E0%A6%B6%E0%A7%8D%E0%A6%B0%E0%A7%87%E0%A6%A3%E0%A7%80%E0%A6%B9%E0%A7%80%E0%A6%A8&amp;from=950505&amp;hidelinks=1&amp;back=776017" title="বিশেষ:সংযোগকারী পৃষ্ঠাসমূহ/টেমপ্লেট:বিষয়শ্রেণীহীন">পরবর্তী ৫০টি</a>'

print([i.get("href") for i in BeautifulSoup(a, "html.parser").find_all("a") if i.text == "পরবর্তী ৫০টি"])

/w/index.php?title=%E0%A6%AC%E0%A6%BF%E0%A6%B6%E0%A7%87%E0%A6%B7:%E0%A6%B8%E0%A6%82%E0%A6%AF%E0%A7%8B%E0%A6%97%E0%A6%95%E0%A6%BE%E0%A6%B0%E0%A7%80_%E0%A6%AA%E0%A7%83%E0%A6%B7%E0%A7%8D%E0%A6%A0%E0%A6%BE%E0%A6%B8%E0%A6%AE%E0%A7%82%E0%A6%B9/%E0%A6%9F%E0%A7%87%E0%A6%AE%E0%A6%AA%E0%A7%8D%E0%A6%B2%E0%A7%87%E0%A6%9F:%E0%A6%AC%E0%A6%BF%E0%A6%B7%E0%A6%AF%E0%A6%BC%E0%A6%B6%E0%A7%8D%E0%A6%B0%E0%A7%87%E0%A6%A3%E0%A7%80%E0%A6%B9%E0%A7%80%E0%A6%A8&from=950505&hidelinks=1&back=776017