C# 从字符串中提取信息时的字符串操作问题
在将文本文件转换为CSV之前清理文本文件时,我遇到了一个操作字符串的问题。 我的文件看起来像这样 当你看到道奇·麦克道奇时,它的布局是正确的。布巴和篮子就是我遇到的问题的例子 篮子由于某种原因,当我遇到一个地址将在城市中填充的记录时,由于单元格的移动,当我将CSV转换为数据表时会出现问题。 关于Bubba记录类型的问题,我知道该怎么做 我知道这与我如何提取信息有关,只是当我遇到这些行时不知道如何修复它 下面是我编写的代码,用于获取此文本文件并将其写入CSVC# 从字符串中提取信息时的字符串操作问题,c#,string,information-extraction,C#,String,Information Extraction,在将文本文件转换为CSV之前清理文本文件时,我遇到了一个操作字符串的问题。 我的文件看起来像这样 当你看到道奇·麦克道奇时,它的布局是正确的。布巴和篮子就是我遇到的问题的例子 篮子由于某种原因,当我遇到一个地址将在城市中填充的记录时,由于单元格的移动,当我将CSV转换为数据表时会出现问题。 关于Bubba记录类型的问题,我知道该怎么做 我知道这与我如何提取信息有关,只是当我遇到这些行时不知道如何修复它 下面是我编写的代码,用于获取此文本文件并将其写入CSV public class PIADAT
public class PIADATA
{
public string LastName { get; set; }
public string FirstName { get; set; }
public string StreetAddress { get; set; }
public string Suite { get; set; }
public string City { get; set; }
public string Province { get; set; }
public string PostalCode { get; set; }
public string Telephone { get; set; }
public string Alternate { get; set; }
public string Fax { get; set; }
}
static void Main(string[] args)
{
PIADATA pia = new PIADATA();
string strFileContent;
List<string> NewData = new List<string>();
List<string> lstFinishedData = new List<string>();
string[] myDataString;
StringBuilder sb = new StringBuilder();
#region Read File to a List
using (StreamReader rd = new StreamReader(@"C:\TestData2.txt"))
{
while ((strFileContent = rd.ReadLine()) != null)
{
if (strFileContent.Contains("Details »"))
{
int nStartOf = strFileContent.IndexOf("Details");
int nEndOf = strFileContent.LastIndexOf(";");
NewData.Add(strFileContent.Remove(nStartOf - 6));
}
else
NewData.Add(strFileContent);
}
}
#endregion
foreach (string item in NewData)
{
if (item.Contains("^"))
{
if (pia != null)
{
//if (pia.Suite == pia.City || pia.Suite == pia.PostalCode) pia.Suite = "";
sb.Append(pia.LastName + "," +
pia.FirstName + "," +
pia.StreetAddress + "," +
pia.Suite + "," +
pia.City + "," +
pia.PostalCode + "," +
pia.Telephone + "," +
pia.Fax);
sb.AppendLine();
}
pia.LastName = string.Empty;
pia.FirstName = string.Empty;
pia.StreetAddress = string.Empty;
pia.Suite = string.Empty;
pia.City = string.Empty;
pia.PostalCode = string.Empty;
pia.Telephone = string.Empty;
pia.Fax = string.Empty;
string[] strReplaceName = item.Split(',');
pia.LastName = strReplaceName[0].Replace("^", "").Trim().ToString();
pia.FirstName = strReplaceName[1].Trim().ToString();
}
#region Name, Address
//if (item.Contains("^")) pia.FirstName = item;
if (Regex.IsMatch(item, @"^\d"))
{
pia.StreetAddress = item.Replace(",", "");
// Seperate Address if it contains Unit or Suite
#region Seperate Address Line
Match matchUnit = Regex.Match(item, @"[Unit]", RegexOptions.IgnoreCase);
if (item.Contains("Unit") || item.Contains("Suite"))
{
if (item.Contains("Unit"))
{
int nStart = item.IndexOf("Unit") + "Unit".Length;
pia.Suite = item.Substring(nStart);
if (Regex.IsMatch(item, @"^\d"))
{
//int a = item.Replace(item.Substring(nStart), string.Empty).Trim().Length;
string a = item;
int b = nStart;
if (a.Length - b == 0) pia.StreetAddress = item;
else
pia.StreetAddress = item.Replace(item.Substring(nStart), "").Trim();
//if(item.Replace(item.Substring(nStart), string.Empty).Trim() == "0") pia.StreetAddress = item;
}
}
if (item.Contains("Suite"))
{
int nStart = item.IndexOf("Suite") + "Suite".Length;
pia.Suite = item.Substring(nStart);
if (Regex.IsMatch(item, @"^\d")) pia.StreetAddress = item.Replace(item.Substring(nStart), "").Replace("Suite", "").Trim();
}
}
#endregion
}
#endregion
#region City and Postal Code
// Get the postal code
Match matchPostalCode = Regex.Match(item, @"[ABCEGHJKLMNPRSTVXY]{1}\d{1}[A-Z]{1}[ -] *\d{1}[A-Z]{1}\d{1}$", RegexOptions.IgnoreCase);
if (matchPostalCode.Success)
{
pia.PostalCode = matchPostalCode.ToString();
// Get City
if (item.Contains(matchPostalCode.ToString()) && item.Contains("ON"))
{
string a = ", ON";
string b = "ON";
string c = " ON";
if (item.Contains(a)) pia.City = item.Replace(pia.PostalCode, "").Replace(a, "").Trim();
if (item.Contains(b)) pia.City = item.Replace(pia.PostalCode, "").Replace(b, "").Trim();
if (item.Contains(c)) pia.City = item.Replace(pia.PostalCode, "").Replace(c, "").Trim();
}
}
#endregion
#region Phone, Suite Number, Fax
if (item.Contains("Phone")) pia.Telephone = item.Remove(0, 7);
if (item.Contains("Suite") || item.Contains("Unit")) pia.Suite = item.Replace("Suite", "").Replace("Unit", "").Trim();
if (item.Contains("Fax")) pia.Fax = item;
#endregion
}
string g = sb.ToString();
File.WriteAllText(@"C:\Meh.csv", g);
}
tl;“但是,似乎很难不把电话和传真放在正确的栏目里——它们都被贴上了标签。@Henkholtman,我后来添加了标题,所以当我发布这个问题时,可以看到它看起来是什么样子,这更让人困惑。这是一种视觉表现,所以当我说地址如记录所示接管了这个城市时,你可以看到。我试着消除任何困惑,我是指手机:在文本文件中。你怎么把数据放错列了?
public class PIADATA
{
public string LastName { get; set; }
public string FirstName { get; set; }
public string StreetAddress { get; set; }
public string Suite { get; set; }
public string City { get; set; }
public string Province { get; set; }
public string PostalCode { get; set; }
public string Telephone { get; set; }
public string Alternate { get; set; }
public string Fax { get; set; }
}
static void Main(string[] args)
{
PIADATA pia = new PIADATA();
string strFileContent;
List<string> NewData = new List<string>();
List<string> lstFinishedData = new List<string>();
string[] myDataString;
StringBuilder sb = new StringBuilder();
#region Read File to a List
using (StreamReader rd = new StreamReader(@"C:\TestData2.txt"))
{
while ((strFileContent = rd.ReadLine()) != null)
{
if (strFileContent.Contains("Details »"))
{
int nStartOf = strFileContent.IndexOf("Details");
int nEndOf = strFileContent.LastIndexOf(";");
NewData.Add(strFileContent.Remove(nStartOf - 6));
}
else
NewData.Add(strFileContent);
}
}
#endregion
foreach (string item in NewData)
{
if (item.Contains("^"))
{
if (pia != null)
{
//if (pia.Suite == pia.City || pia.Suite == pia.PostalCode) pia.Suite = "";
sb.Append(pia.LastName + "," +
pia.FirstName + "," +
pia.StreetAddress + "," +
pia.Suite + "," +
pia.City + "," +
pia.PostalCode + "," +
pia.Telephone + "," +
pia.Fax);
sb.AppendLine();
}
pia.LastName = string.Empty;
pia.FirstName = string.Empty;
pia.StreetAddress = string.Empty;
pia.Suite = string.Empty;
pia.City = string.Empty;
pia.PostalCode = string.Empty;
pia.Telephone = string.Empty;
pia.Fax = string.Empty;
string[] strReplaceName = item.Split(',');
pia.LastName = strReplaceName[0].Replace("^", "").Trim().ToString();
pia.FirstName = strReplaceName[1].Trim().ToString();
}
#region Name, Address
//if (item.Contains("^")) pia.FirstName = item;
if (Regex.IsMatch(item, @"^\d"))
{
pia.StreetAddress = item.Replace(",", "");
// Seperate Address if it contains Unit or Suite
#region Seperate Address Line
Match matchUnit = Regex.Match(item, @"[Unit]", RegexOptions.IgnoreCase);
if (item.Contains("Unit") || item.Contains("Suite"))
{
if (item.Contains("Unit"))
{
int nStart = item.IndexOf("Unit") + "Unit".Length;
pia.Suite = item.Substring(nStart);
if (Regex.IsMatch(item, @"^\d"))
{
//int a = item.Replace(item.Substring(nStart), string.Empty).Trim().Length;
string a = item;
int b = nStart;
if (a.Length - b == 0) pia.StreetAddress = item;
else
pia.StreetAddress = item.Replace(item.Substring(nStart), "").Trim();
//if(item.Replace(item.Substring(nStart), string.Empty).Trim() == "0") pia.StreetAddress = item;
}
}
if (item.Contains("Suite"))
{
int nStart = item.IndexOf("Suite") + "Suite".Length;
pia.Suite = item.Substring(nStart);
if (Regex.IsMatch(item, @"^\d")) pia.StreetAddress = item.Replace(item.Substring(nStart), "").Replace("Suite", "").Trim();
}
}
#endregion
}
#endregion
#region City and Postal Code
// Get the postal code
Match matchPostalCode = Regex.Match(item, @"[ABCEGHJKLMNPRSTVXY]{1}\d{1}[A-Z]{1}[ -] *\d{1}[A-Z]{1}\d{1}$", RegexOptions.IgnoreCase);
if (matchPostalCode.Success)
{
pia.PostalCode = matchPostalCode.ToString();
// Get City
if (item.Contains(matchPostalCode.ToString()) && item.Contains("ON"))
{
string a = ", ON";
string b = "ON";
string c = " ON";
if (item.Contains(a)) pia.City = item.Replace(pia.PostalCode, "").Replace(a, "").Trim();
if (item.Contains(b)) pia.City = item.Replace(pia.PostalCode, "").Replace(b, "").Trim();
if (item.Contains(c)) pia.City = item.Replace(pia.PostalCode, "").Replace(c, "").Trim();
}
}
#endregion
#region Phone, Suite Number, Fax
if (item.Contains("Phone")) pia.Telephone = item.Remove(0, 7);
if (item.Contains("Suite") || item.Contains("Unit")) pia.Suite = item.Replace("Suite", "").Replace("Unit", "").Trim();
if (item.Contains("Fax")) pia.Fax = item;
#endregion
}
string g = sb.ToString();
File.WriteAllText(@"C:\Meh.csv", g);
}