C# 多个正则表达式字符串模式(不同字段)

C# 多个正则表达式字符串模式(不同字段),c#,sql,asp.net,regex,ms-word,C#,Sql,Asp.net,Regex,Ms Word,我正试图用以下格式从word文档中提取文本,并将数据插入SQL数据库 Word文档 Name of House: Aasleagh Lodge Townland: Srahatloe Near: Killary Harbour, Leenane Status/Public Access: maintained, private fishing lodge Date Built: 1838-1850, burnt 1923, rebuilt 1928 源代码 var wordAp

我正试图用以下格式从word文档中提取文本,并将数据插入SQL数据库

Word文档

Name of House: Aasleagh Lodge
Townland: Srahatloe
Near: Killary Harbour, Leenane
Status/Public Access: maintained, private fishing lodge
Date Built: 1838-1850, burnt 1923, rebuilt 1928
源代码

        var wordApp = new Microsoft.Office.Interop.Word.Application();
        var wordDoc = wordApp.Documents.Open(@"C:\Users\mhoban\Documents\Book.docx");
        var txt = wordDoc.Content.Text;

        var regex = new Regex(@"(Name of House\: )(.+?)[\r\n]");

        var allMatches = regex.Matches(txt);
        foreach (Match match in allMatches)
        {
            var nameValue = match.Groups[2].Value;
            var townValue = match.Groups[2].Value;

            SqlConnection con = new SqlConnection(ConfigurationManager.ConnectionStrings["ConnectionString"].ToString());
            SqlCommand com = new SqlCommand();

            com.CommandText = "INSERT INTO Houses (Name, Townland) VALUES (@name, @town)";

            com.Parameters.Add("@name", SqlDbType.NVarChar).SqlValue = nameValue;
            com.Parameters.Add("@town", SqlDbType.NVarChar).SqlValue = townValue;

            com.Connection = con;

            con.Open();

            com.ExecuteNonQuery();

            con.Close();
        }
var wordApp = new Microsoft.Office.Interop.Word.Application();
            var wordDoc = wordApp.Documents.Open(@"C:\Users\mhoban\Documents\Book.docx");
            var txt = wordDoc.Content.Text;

            using (var sr = new StringReader(txt))
            {
                var s = string.Empty;
                var nameValue = new StringBuilder();
                var townValue = new StringBuilder();
                while ((s = sr.ReadLine()) != null)
                {
                    if (s.StartsWith("Name of House"))
                    {
                        nameValue.Append(s.Split(new[] { ':' })[1].Trim());
                    }
                    else if (s.StartsWith("Townland"))
                    {
                        townValue.Append(s.Split(new[] { ':' })[1].Trim());
                    }

                    if (nameValue.Length > 0 && townValue.Length > 0)
                    {
                        SqlConnection con = new SqlConnection(ConfigurationManager.ConnectionStrings["ConnectionString"].ToString());
                        SqlCommand com = new SqlCommand();

                        com.CommandText = "INSERT INTO Houses (Name, Townland) VALUES (@name, @town)";
                        com.CommandText = "INSERT INTO Houses (Name) VALUES (@name)";

                        com.Parameters.Add("@name", SqlDbType.NVarChar).SqlValue = nameValue;
                        com.Parameters.Add("@town", SqlDbType.NVarChar).SqlValue = townValue;

                        com.Connection = con;

                        con.Open();

                        com.ExecuteNonQuery();

                        con.Close();

                        nameValue.Clear(); townValue.Clear();
                    }
                }
            }
这非常有效唯一的问题是我如何编写代码来插入其他文本字段,例如这一行

var regex = new Regex(@"(Name of House\: )(.+?)[\r\n]");
在本例中插入房屋名称“Aasleagh Lodge”,但我如何写这行以插入城镇

我尝试用我需要的字段名替换正则表达式中的“Townland”,但最终得到的是单个记录,每个记录只包含一个不同的列值

是否有一种方法可以在插入数据的同时使用列表或其他方法,这样就不会发生这种情况

新源代码

        var wordApp = new Microsoft.Office.Interop.Word.Application();
        var wordDoc = wordApp.Documents.Open(@"C:\Users\mhoban\Documents\Book.docx");
        var txt = wordDoc.Content.Text;

        var regex = new Regex(@"(Name of House\: )(.+?)[\r\n]");

        var allMatches = regex.Matches(txt);
        foreach (Match match in allMatches)
        {
            var nameValue = match.Groups[2].Value;
            var townValue = match.Groups[2].Value;

            SqlConnection con = new SqlConnection(ConfigurationManager.ConnectionStrings["ConnectionString"].ToString());
            SqlCommand com = new SqlCommand();

            com.CommandText = "INSERT INTO Houses (Name, Townland) VALUES (@name, @town)";

            com.Parameters.Add("@name", SqlDbType.NVarChar).SqlValue = nameValue;
            com.Parameters.Add("@town", SqlDbType.NVarChar).SqlValue = townValue;

            com.Connection = con;

            con.Open();

            com.ExecuteNonQuery();

            con.Close();
        }
var wordApp = new Microsoft.Office.Interop.Word.Application();
            var wordDoc = wordApp.Documents.Open(@"C:\Users\mhoban\Documents\Book.docx");
            var txt = wordDoc.Content.Text;

            using (var sr = new StringReader(txt))
            {
                var s = string.Empty;
                var nameValue = new StringBuilder();
                var townValue = new StringBuilder();
                while ((s = sr.ReadLine()) != null)
                {
                    if (s.StartsWith("Name of House"))
                    {
                        nameValue.Append(s.Split(new[] { ':' })[1].Trim());
                    }
                    else if (s.StartsWith("Townland"))
                    {
                        townValue.Append(s.Split(new[] { ':' })[1].Trim());
                    }

                    if (nameValue.Length > 0 && townValue.Length > 0)
                    {
                        SqlConnection con = new SqlConnection(ConfigurationManager.ConnectionStrings["ConnectionString"].ToString());
                        SqlCommand com = new SqlCommand();

                        com.CommandText = "INSERT INTO Houses (Name, Townland) VALUES (@name, @town)";
                        com.CommandText = "INSERT INTO Houses (Name) VALUES (@name)";

                        com.Parameters.Add("@name", SqlDbType.NVarChar).SqlValue = nameValue;
                        com.Parameters.Add("@town", SqlDbType.NVarChar).SqlValue = townValue;

                        com.Connection = con;

                        con.Open();

                        com.ExecuteNonQuery();

                        con.Close();

                        nameValue.Clear(); townValue.Clear();
                    }
                }
            }
数据库字段

[Id]          NCHAR (10)     NULL,
[Name]        NVARCHAR (MAX) NULL,
[Townland]    NVARCHAR (MAX) NULL,
[Near]        NVARCHAR (MAX) NULL,
[Status]      NVARCHAR (MAX) NULL,
[Built]       NVARCHAR (MAX) NULL,
[Description] NVARCHAR (MAX) NULL,
[Families]    NVARCHAR (MAX) NULL,
[Images]      IMAGE          NULL

您可以使用以下正则表达式:

(.*?\: )(.+?)[\r\n]

还有。。您可以将正则表达式中的
[\r\n]
替换为
$
以匹配last,如
\r或\n
是可选的

i、 e:


是的,这是可能的。然而,仅仅将regex设置为泛型只能解决一半的问题,因为您还必须知道每个值映射到哪个数据库列

以下是我将采取的一般方法:

  • 具有定义文件中可能遇到的每个参数名称及其在数据库中对应列的内容。这可能只是代码中的一个字典,但更成熟的设计需要将其放入某种外部配置中

  • 对每一行使用一个简单的based-on
    ,将所有键/值对放入字典(这里的正则表达式是多余的)

  • 基于上面前两个步骤中的数据构建insert语句


  • 这里有一个没有正则表达式的解决方案。你真的不需要它

    var txt = "Name of House: Aasleagh Lodge\r\nTownland: Srahatloe\r\nNear: Killary Harbour, Leenane\r\nStatus/Public Access: maintained, private fishing lodge\r\nDate Built: 1838-1850, burnt 1923, rebuilt 1928\r\nName of House: House of Lan\r\nTownland: Another town land\r\nNear: Killary Harbour, Leenane\r\nStatus/Public Access: maintained, private fishing lodge\r\nDate Built: 1838-1850, burnt 1923, rebuilt 1928\r\nName of House: New Lodge\r\nTownland: NewTownLand\r\nNear: Killary Harbour, Leenane\r\nStatus/Public Access: maintained, private fishing lodge\r\nDate Built: 1838-1850, burnt 1923, rebuilt 1928";
    using (var sr = new StringReader(txt))
    {
       var s = string.Empty;
       var nameOfHouse = new StringBuilder();
       var townland = new StringBuilder();
       while ((s = sr.ReadLine()) != null)
       {
          if (s.StartsWith("Name of House"))
          {
              nameOfHouse.Append(s.Split(new[] {':'})[1].Trim());
          }
          else if (s.StartsWith("Townland"))
          {
               townland.Append(s.Split(new[] { ':' })[1].Trim());
          }
    
          if (nameOfHouse.Length > 0 && townland.Length > 0)
          { 
              // INSERT THE VALUES AND RESET THEM
              nameOfHouse.Clear(); townland.Clear();
          }
       }
    }
    
    试试这个

    使用系统;
    使用System.Collections.Generic;
    使用System.Linq;
    使用系统文本;
    使用System.IO;
    使用系统数据;
    使用System.Data.SqlClient;
    命名空间控制台应用程序21
    {
    班级计划
    {
    常量字符串文件名=@“c:\temp\test.txt”;
    静态void Main(字符串[]参数)
    {
    StreamReader=新的StreamReader(文件名);
    字符串inputLine=“”;
    List takenBMIs=新列表();
    TakenBMI newTakenBMI=null;
    而((inputLine=reader.ReadLine())!=null)
    {
    inputLine=inputLine.Trim();
    如果(inputLine.Length>0)
    {
    字符串[]inputArray=inputLine.Split(新字符[]{':'});
    开关(输入阵列[0].Trim())
    {
    案例“房屋名称”:
    newTakenBMI=新TakenBMI();
    takenBMIs.Add(newTakenBMI);
    newTakenBMI.Name_of_House=inputArray[1]。Trim();
    打破
    “城镇”案例:
    newTakenBMI.Townland=inputArray[1].Trim();
    打破
    案例“近”:
    newTakenBMI.Near=inputArray[1].Trim();
    打破
    案例“状态/公共访问”:
    newTakenBMI.Status_Public_Access=inputArray[1]。Trim();
    打破
    案例“建造日期”:
    newTakenBMI.Date_build=inputArray[1].Trim();
    打破
    }
    }
    }
    reader.Close();
    SqlConnection con=新的SqlConnection(ConfigurationManager.ConnectionString[“ConnectionString”].ToString());
    con.Open();
    string SQL=“插入房屋(名称、城镇、附近、状态、已建)”+
    “值(“@name”、“@town”、“@near”、“@status”、“@build”)”;
    SqlCommand com=新的SqlCommand(SQL,con);
    Add(“@name”,SqlDbType.NVarChar);
    添加(“@town”,SqlDbType.NVarChar);
    Add(“@near”,SqlDbType.NVarChar);
    添加(“@status”,SqlDbType.NVarChar);
    添加(“@build”,SqlDbType.NVarChar);
    foreach(TakenBMI TakenBMI在takenBMIs中)
    {
    com.Parameters[“@name”].Value=takenBMI.name\u of_House;
    com.Parameters[“@town”].Value=takenBMI.Townland;
    com.Parameters[“@near”].Value=takenBMI.near;
    com.Parameters[“@status”].Value=takenBMI.status\u Public\u Access;
    com.Parameters[“@build”].Value=takenBMI.Date\u builded;
    com.ExecuteNonQuery();
    }
    }
    }
    公共课武馆
    {
    _House{get;set;}的公共字符串名称_
    公共字符串Townland{get;set;}
    靠近{get;set;}的公共字符串
    公共字符串状态\u公共\u访问{get;set;}
    公共字符串Date_builded{get;set;}
    }
    
    }
    我认为用正则表达式无法实现。您需要使用SingleLine选项来获取删除返回的所有行。问题是您的值在单词之间有空格,需要返回作为终止符。不用正则表达式,我就可以轻松地完成它。@jdweng你能给我举个例子吗?我在这行上得到一个运行时var nameValue=splts[“房屋名称”];它说字典里没有钥匙,这很奇怪。您确定您发布了显示在
    txt
    变量中的输入数据吗?我发布了测试数据。请查看我的屏幕。请同时发布问题中
    txt
    变量的图片。这来自一个包含70000多个单词的word文档我发布的word文档示例仅针对一所房子,其他570所房子的格式相同。。。我不知道该怎么做