C# 如何从HTTP中提取属性
我正在尝试从HTTP捕获团队的日程安排。 但到目前为止,我所做的是:C# 如何从HTTP中提取属性,c#,http,C#,Http,我正在尝试从HTTP捕获团队的日程安排。 但到目前为止,我所做的是: 2018/1/23 Tuesday AM4:00 Swansea Ci Liverpool 2018/1/23 Tuesday AM4:00 Swansea City Liverpool 但我需要这样做: 2018/1/23 Tuesday AM4:00 Swansea Ci Liverpool 2018/1/23 Tuesday AM4:00 Swansea Ci
2018/1/23 Tuesday AM4:00 Swansea Ci Liverpool
2018/1/23 Tuesday AM4:00 Swansea City Liverpool
但我需要这样做:
2018/1/23 Tuesday AM4:00 Swansea Ci Liverpool
2018/1/23 Tuesday AM4:00 Swansea City Liverpool
这是因为我将字符串长度设置为10,所以它不会打印“ty”
那么我如何才能在这个代码结构中做到正确呢?谢谢
using System.IO;
using System.Net;
using System;
using System.Text;
using System.Text.RegularExpressions;
public class Crawler
{
static void Main()
{
string web = "http://www.goal.com/hk/fixtures/team/liverpool/663";
WebRequest request = WebRequest.Create(web);
request.Credentials = CredentialCache.DefaultCredentials;
HttpWebResponse response = null;
response = (HttpWebResponse)request.GetResponse();
Stream dataStream = response.GetResponseStream();
StreamReader reader = new StreamReader(dataStream);
string line0 = reader.ReadToEnd();
string path = @"C:\Users\UserName\Desktop\LiverpoolSchedule.txt";
using (FileStream fileStream = File.OpenWrite(path))
{
using (StreamWriter writer = new StreamWriter(fileStream))
{
string SS0 = @"2...年.?.?月.?.?日星期.?</th>\n<th colspan=";
foreach (Match match0 in Regex.Matches(line0, SS0))
{
writer.Write(match0.ToString().Remove(match0.ToString().IndexOf("</th>")));
writer.Write("\t");
string SS1 = @"<td class=.status.>\n.午.?.?:.?.?";
string line1 = line0.Substring(match0.Index);
Match match1 = Regex.Match(line1, SS1);
writer.Write(match1.ToString().Substring(match1.ToString().IndexOf("\n") + 1));
writer.Write("\t");
string SS2 = @".png. alt=.";
string line2 = line1.Substring(match1.Index);
Match match2 = Regex.Match(line2, SS2);
writer.Write(line2.Substring(match2.Index + 11, 10));
writer.Write("\t");
string SS3 = @".png. alt=.";
string line3 = line2.Substring(match2.Index + 30);
Match match3 = Regex.Match(line3, SS3);
writer.Write(line3.Substring(match3.Index + 11, 10));
writer.WriteLine();
}
writer.Close();
}
}
reader.Close();
dataStream.Close();
}
}
使用System.IO;
Net系统;
使用制度;
使用系统文本;
使用System.Text.RegularExpressions;
公共类爬虫
{
静态void Main()
{
字符串web=”http://www.goal.com/hk/fixtures/team/liverpool/663";
WebRequest=WebRequest.Create(web);
request.Credentials=CredentialCache.DefaultCredentials;
HttpWebResponse响应=null;
response=(HttpWebResponse)request.GetResponse();
Stream dataStream=response.GetResponseStream();
StreamReader=新的StreamReader(数据流);
字符串line0=reader.ReadToEnd();
字符串路径=@“C:\Users\UserName\Desktop\LiverpoolSchedule.txt”;
使用(FileStream FileStream=File.OpenWrite(path))
{
使用(StreamWriter=newstreamwriter(fileStream))
{
字符串SS0=@“2。。。年.?.?月.?.?日星期.?\n您可以尝试在正则表达式中使用+
,而不是?
想了解更多信息,请看这个。取一个更大的子字符串?比如writer.Write(line2.substring(match2.Index+11,20));
No…它将包括换行符,但我希望每一行的每一场比赛,也不是确切的球队名称。到目前为止您尝试了什么?我需要从[Swansea City]之类的地方获取属性,但不知道怎么做是的,但你试过什么?