Warning: file_get_contents(/data/phpspider/zhask/data//catemap/4/string/5.json): failed to open stream: No such file or directory in /data/phpspider/zhask/libs/function.php on line 167

Warning: Invalid argument supplied for foreach() in /data/phpspider/zhask/libs/tag.function.php on line 1116

Notice: Undefined index: in /data/phpspider/zhask/libs/function.php on line 180

Warning: array_chunk() expects parameter 1 to be array, null given in /data/phpspider/zhask/libs/function.php on line 181
C# 从字符串中提取信息时的字符串操作问题_C#_String_Information Extraction - Fatal编程技术网

C# 从字符串中提取信息时的字符串操作问题

C# 从字符串中提取信息时的字符串操作问题,c#,string,information-extraction,C#,String,Information Extraction,在将文本文件转换为CSV之前清理文本文件时,我遇到了一个操作字符串的问题。 我的文件看起来像这样 当你看到道奇·麦克道奇时,它的布局是正确的。布巴和篮子就是我遇到的问题的例子 篮子由于某种原因,当我遇到一个地址将在城市中填充的记录时,由于单元格的移动,当我将CSV转换为数据表时会出现问题。 关于Bubba记录类型的问题,我知道该怎么做 我知道这与我如何提取信息有关,只是当我遇到这些行时不知道如何修复它 下面是我编写的代码,用于获取此文本文件并将其写入CSV public class PIADAT

在将文本文件转换为CSV之前清理文本文件时,我遇到了一个操作字符串的问题。 我的文件看起来像这样

当你看到道奇·麦克道奇时,它的布局是正确的。布巴和篮子就是我遇到的问题的例子

篮子由于某种原因,当我遇到一个地址将在城市中填充的记录时,由于单元格的移动,当我将CSV转换为数据表时会出现问题。 关于Bubba记录类型的问题,我知道该怎么做

我知道这与我如何提取信息有关,只是当我遇到这些行时不知道如何修复它

下面是我编写的代码,用于获取此文本文件并将其写入CSV

public class PIADATA
{
    public string LastName { get; set; }
    public string FirstName { get; set; }
    public string StreetAddress { get; set; }
    public string Suite { get; set; }
    public string City { get; set; }
    public string Province { get; set; }
    public string PostalCode { get; set; }
    public string Telephone { get; set; }
    public string Alternate { get; set; }
    public string Fax { get; set; }
}

static void Main(string[] args)
    {
        PIADATA pia = new PIADATA();
        string strFileContent;

        List<string> NewData = new List<string>();

        List<string> lstFinishedData = new List<string>();

        string[] myDataString;

        StringBuilder sb = new StringBuilder();

        #region Read File to a List

        using (StreamReader rd = new StreamReader(@"C:\TestData2.txt"))
        {
            while ((strFileContent = rd.ReadLine()) != null)
            {
                if (strFileContent.Contains("Details  &raquo;"))
                {
                    int nStartOf = strFileContent.IndexOf("Details");
                    int nEndOf = strFileContent.LastIndexOf(";");
                    NewData.Add(strFileContent.Remove(nStartOf - 6));
                }
                else
                NewData.Add(strFileContent);
            }
        }

        #endregion

        foreach (string item in NewData)
        {
            if (item.Contains("^"))
            {
                if (pia != null)
                {
                    //if (pia.Suite == pia.City || pia.Suite == pia.PostalCode) pia.Suite = "";
                    sb.Append(pia.LastName + "," +
                        pia.FirstName + "," +
                        pia.StreetAddress + "," +
                        pia.Suite + "," +
                        pia.City + "," +
                        pia.PostalCode + "," +
                        pia.Telephone + "," +
                        pia.Fax);
                    sb.AppendLine();
                }

                pia.LastName = string.Empty;
                pia.FirstName = string.Empty;
                pia.StreetAddress = string.Empty;
                pia.Suite = string.Empty;
                pia.City = string.Empty;
                pia.PostalCode = string.Empty;
                pia.Telephone = string.Empty;
                pia.Fax = string.Empty;

                string[] strReplaceName = item.Split(',');
                pia.LastName = strReplaceName[0].Replace("^", "").Trim().ToString();
                pia.FirstName = strReplaceName[1].Trim().ToString();
            }

            #region Name, Address

            //if (item.Contains("^")) pia.FirstName = item;
            if (Regex.IsMatch(item, @"^\d"))
            {
                pia.StreetAddress = item.Replace(",", "");

                // Seperate Address if it contains Unit or Suite
                #region Seperate Address Line

                Match matchUnit = Regex.Match(item, @"[Unit]", RegexOptions.IgnoreCase);
                if (item.Contains("Unit") || item.Contains("Suite"))
                {
                    if (item.Contains("Unit"))
                    {
                        int nStart = item.IndexOf("Unit") + "Unit".Length;
                        pia.Suite = item.Substring(nStart);

                        if (Regex.IsMatch(item, @"^\d"))
                        {
                            //int a = item.Replace(item.Substring(nStart), string.Empty).Trim().Length;

                            string a = item;
                            int b = nStart;
                            if (a.Length - b == 0) pia.StreetAddress = item;
                            else
                                pia.StreetAddress = item.Replace(item.Substring(nStart), "").Trim();
                            //if(item.Replace(item.Substring(nStart), string.Empty).Trim() == "0") pia.StreetAddress = item;

                        }
                    }

                    if (item.Contains("Suite"))
                    {
                        int nStart = item.IndexOf("Suite") + "Suite".Length;
                        pia.Suite = item.Substring(nStart);

                        if (Regex.IsMatch(item, @"^\d")) pia.StreetAddress = item.Replace(item.Substring(nStart), "").Replace("Suite", "").Trim();
                    }
                }

                #endregion

            }

            #endregion

            #region City and Postal Code

            // Get the postal code
            Match matchPostalCode = Regex.Match(item, @"[ABCEGHJKLMNPRSTVXY]{1}\d{1}[A-Z]{1}[ -] *\d{1}[A-Z]{1}\d{1}$", RegexOptions.IgnoreCase);
            if (matchPostalCode.Success)
            {
                pia.PostalCode = matchPostalCode.ToString();

                // Get City
                if (item.Contains(matchPostalCode.ToString()) && item.Contains("ON"))
                {
                    string a = ", ON";
                    string b = "ON";
                    string c = " ON";
                    if (item.Contains(a)) pia.City = item.Replace(pia.PostalCode, "").Replace(a, "").Trim();
                    if (item.Contains(b)) pia.City = item.Replace(pia.PostalCode, "").Replace(b, "").Trim();
                    if (item.Contains(c)) pia.City = item.Replace(pia.PostalCode, "").Replace(c, "").Trim();
                }
            }
            #endregion

            #region Phone, Suite Number, Fax

            if (item.Contains("Phone")) pia.Telephone = item.Remove(0, 7);
            if (item.Contains("Suite") || item.Contains("Unit")) pia.Suite = item.Replace("Suite", "").Replace("Unit", "").Trim();

            if (item.Contains("Fax")) pia.Fax = item;

            #endregion

        }
        string g = sb.ToString();
        File.WriteAllText(@"C:\Meh.csv", g);
    }

tl;“但是,似乎很难不把电话和传真放在正确的栏目里——它们都被贴上了标签。@Henkholtman,我后来添加了标题,所以当我发布这个问题时,可以看到它看起来是什么样子,这更让人困惑。这是一种视觉表现,所以当我说地址如记录所示接管了这个城市时,你可以看到。我试着消除任何困惑,我是指手机:在文本文件中。你怎么把数据放错列了?
public class PIADATA
{
    public string LastName { get; set; }
    public string FirstName { get; set; }
    public string StreetAddress { get; set; }
    public string Suite { get; set; }
    public string City { get; set; }
    public string Province { get; set; }
    public string PostalCode { get; set; }
    public string Telephone { get; set; }
    public string Alternate { get; set; }
    public string Fax { get; set; }
}

static void Main(string[] args)
    {
        PIADATA pia = new PIADATA();
        string strFileContent;

        List<string> NewData = new List<string>();

        List<string> lstFinishedData = new List<string>();

        string[] myDataString;

        StringBuilder sb = new StringBuilder();

        #region Read File to a List

        using (StreamReader rd = new StreamReader(@"C:\TestData2.txt"))
        {
            while ((strFileContent = rd.ReadLine()) != null)
            {
                if (strFileContent.Contains("Details  &raquo;"))
                {
                    int nStartOf = strFileContent.IndexOf("Details");
                    int nEndOf = strFileContent.LastIndexOf(";");
                    NewData.Add(strFileContent.Remove(nStartOf - 6));
                }
                else
                NewData.Add(strFileContent);
            }
        }

        #endregion

        foreach (string item in NewData)
        {
            if (item.Contains("^"))
            {
                if (pia != null)
                {
                    //if (pia.Suite == pia.City || pia.Suite == pia.PostalCode) pia.Suite = "";
                    sb.Append(pia.LastName + "," +
                        pia.FirstName + "," +
                        pia.StreetAddress + "," +
                        pia.Suite + "," +
                        pia.City + "," +
                        pia.PostalCode + "," +
                        pia.Telephone + "," +
                        pia.Fax);
                    sb.AppendLine();
                }

                pia.LastName = string.Empty;
                pia.FirstName = string.Empty;
                pia.StreetAddress = string.Empty;
                pia.Suite = string.Empty;
                pia.City = string.Empty;
                pia.PostalCode = string.Empty;
                pia.Telephone = string.Empty;
                pia.Fax = string.Empty;

                string[] strReplaceName = item.Split(',');
                pia.LastName = strReplaceName[0].Replace("^", "").Trim().ToString();
                pia.FirstName = strReplaceName[1].Trim().ToString();
            }

            #region Name, Address

            //if (item.Contains("^")) pia.FirstName = item;
            if (Regex.IsMatch(item, @"^\d"))
            {
                pia.StreetAddress = item.Replace(",", "");

                // Seperate Address if it contains Unit or Suite
                #region Seperate Address Line

                Match matchUnit = Regex.Match(item, @"[Unit]", RegexOptions.IgnoreCase);
                if (item.Contains("Unit") || item.Contains("Suite"))
                {
                    if (item.Contains("Unit"))
                    {
                        int nStart = item.IndexOf("Unit") + "Unit".Length;
                        pia.Suite = item.Substring(nStart);

                        if (Regex.IsMatch(item, @"^\d"))
                        {
                            //int a = item.Replace(item.Substring(nStart), string.Empty).Trim().Length;

                            string a = item;
                            int b = nStart;
                            if (a.Length - b == 0) pia.StreetAddress = item;
                            else
                                pia.StreetAddress = item.Replace(item.Substring(nStart), "").Trim();
                            //if(item.Replace(item.Substring(nStart), string.Empty).Trim() == "0") pia.StreetAddress = item;

                        }
                    }

                    if (item.Contains("Suite"))
                    {
                        int nStart = item.IndexOf("Suite") + "Suite".Length;
                        pia.Suite = item.Substring(nStart);

                        if (Regex.IsMatch(item, @"^\d")) pia.StreetAddress = item.Replace(item.Substring(nStart), "").Replace("Suite", "").Trim();
                    }
                }

                #endregion

            }

            #endregion

            #region City and Postal Code

            // Get the postal code
            Match matchPostalCode = Regex.Match(item, @"[ABCEGHJKLMNPRSTVXY]{1}\d{1}[A-Z]{1}[ -] *\d{1}[A-Z]{1}\d{1}$", RegexOptions.IgnoreCase);
            if (matchPostalCode.Success)
            {
                pia.PostalCode = matchPostalCode.ToString();

                // Get City
                if (item.Contains(matchPostalCode.ToString()) && item.Contains("ON"))
                {
                    string a = ", ON";
                    string b = "ON";
                    string c = " ON";
                    if (item.Contains(a)) pia.City = item.Replace(pia.PostalCode, "").Replace(a, "").Trim();
                    if (item.Contains(b)) pia.City = item.Replace(pia.PostalCode, "").Replace(b, "").Trim();
                    if (item.Contains(c)) pia.City = item.Replace(pia.PostalCode, "").Replace(c, "").Trim();
                }
            }
            #endregion

            #region Phone, Suite Number, Fax

            if (item.Contains("Phone")) pia.Telephone = item.Remove(0, 7);
            if (item.Contains("Suite") || item.Contains("Unit")) pia.Suite = item.Replace("Suite", "").Replace("Unit", "").Trim();

            if (item.Contains("Fax")) pia.Fax = item;

            #endregion

        }
        string g = sb.ToString();
        File.WriteAllText(@"C:\Meh.csv", g);
    }