C# 阅读word文档标题下的所有段落,并将其导出到c windows中的excel
下面的代码读取word文档中每个标题下的段落,直到到达回车。我无法阅读回车后的段落,但仍在同一标题下。有没有办法做到这一点。有人能告诉我这是否可行吗C# 阅读word文档标题下的所有段落,并将其导出到c windows中的excel,c#,ms-word,office-interop,C#,Ms Word,Office Interop,下面的代码读取word文档中每个标题下的段落,直到到达回车。我无法阅读回车后的段落,但仍在同一标题下。有没有办法做到这一点。有人能告诉我这是否可行吗 foreach (Microsoft.Office.Interop.Word.Paragraph paragraph in Doc.Paragraphs) { Microsoft.Office.Interop.Word.Style style = paragraph.get_Style() as Microsoft.Off
foreach (Microsoft.Office.Interop.Word.Paragraph paragraph in Doc.Paragraphs)
{
Microsoft.Office.Interop.Word.Style style =
paragraph.get_Style() as Microsoft.Office.Interop.Word.Style;
string styleName = style.NameLocal;
string text = paragraph.Range.Text.Trim();
string[] words = text.Split('.');
if (styleName.Contains("Heading")
|| styleName.Contains("Heading1")
|| styleName.Contains("Heading2")
|| styleName.Contains("Heading3"))
{
foreach (string word in words)
{
if(paragraph.Next() !=null)
{
int j = 1;
string data = paragraph.Next().Range.Text.ToString().Trim();
// string h = paragraph.Next().Range.Tables.ToString().Trim();
string[] dataf = data.Split('.');
foreach (string dat in dataf)
{
paracount.Add(word + j, dat);
j++;
}
}
}
}其他样式的文本会发生什么情况,比如说我希望在标题后面出现的样式正常?我正在阅读标题后面的文本作为段落。如果样式是正常的,它将退出循环。如果样式是标题,它将获取标题下的所有段落,直到遇到新行或转义字符。如何读取标题下的正常样式文本?我知道这可以通过中的XML访问完成,但我不确定如何通过互操作完成。我认为这不可行。甚至我也找不到任何方法继续下去。
//This will return you headers and text below of corrousponding header
private List<Tuple<string, string>> GetPlainTextByHeaderFromWordDoc(string docname)
{
#region for Plain text collection from document
List<Tuple<string, string>> docPlainTextWithHeaderList = new List<Tuple<string, string>>();
string headerText = string.Empty;
string finalTextBelowHeader = string.Empty;
try
{
Document doc = ReadMsWord(docname, objCommonVariables);
if (doc.Paragraphs.Count > 0)
{
//heading with 1st paragraph
foreach (Paragraph paragraph in doc.Paragraphs)
{
Style style = paragraph.get_Style() as Style;
headerText = string.Empty;
finalTextBelowHeader = string.Empty;
if (style.NameLocal == "Heading 1")
{
headerText = paragraph.Range.Text.TrimStart().TrimEnd();
//reading 1st paragraph of each section
for (int i = 0; i < doc.Paragraphs.Count; i++)
{
if (paragraph.Next(i) != null)
{
Style yle = paragraph.Next(i).get_Style() as Style;
if (yle.NameLocal != "Heading 1")
{
finalTextBelowHeader += paragraph.Next(i).Range.Text.ToString();
}
else if (yle.NameLocal == "Heading 1" && !headerText.Contains(paragraph.Next(i).Range.Text.ToString()))
{
break;
}
}
}
string header = Regex.Replace(headerText, "[^a-zA-Z\\s]", string.Empty).TrimStart().TrimEnd();
string belowText = Regex.Replace(finalTextBelowHeader, @"\s+", String.Empty);
belowText = belowText.Trim().Replace("\a", string.Empty);
docPlainTextWithHeaderList.Add(new Tuple<string, string>(header, belowText));
}
}
}
else
{
//error msg: unable to read
}
doc.Close(Type.Missing, Type.Missing, Type.Missing);
}
catch (Exception ex)
{
MessageBox.Show(ex.StackTrace);
}
}
//This will read and return word document
private Document ReadMsWord(string docName)
{
Document docs = new Document();
try
{
// variable to store file path
string FilePath = @"C:\Kaustubh_Tupe\WordRepository/docName.docx";
// create word application
Microsoft.Office.Interop.Word.Application word = new Microsoft.Office.Interop.Word.Application();
// create object of missing value
object miss = System.Reflection.Missing.Value;
// create object of selected file path
object path = FilePath;
// set file path mode
object readOnly = false;
// open Destination
docs = word.Documents.Open(ref path, ref miss, ref readOnly,
ref miss, ref miss, ref miss, ref miss, ref miss, ref miss, ref miss, ref miss,
ref miss, ref miss, ref miss, ref miss, ref miss);
//select whole data from active window Destination
docs.ActiveWindow.Selection.WholeStory();
// handover the data to cllipboard
docs.ActiveWindow.Selection.Copy();
// clipboard create reference of idataobject interface which transfer the data
}
catch (Exception ex)
{
//MessageBox.Show(ex.ToString());
}
return docs;