C# 无法使用OpenxmlSDK从DOCX文件读取形状
我有一个要求,我必须解析一个DOCX文件并提取所有文本和图像。我正在使用OpenXMLSDK2.5来实现这一点。我能够解析图像和文本,但DOCX也有一组形状,我试图解析它们并将其转换为绘图图像,这给了我错误的结果 是我试图解析的示例docx文件 我引用了这个,并尝试了同样的方法,但没有运气 我用以下代码创建的DOCX没有任何已解析的图像C# 无法使用OpenxmlSDK从DOCX文件读取形状,c#,ms-word,openxml,openxml-sdk,C#,Ms Word,Openxml,Openxml Sdk,我有一个要求,我必须解析一个DOCX文件并提取所有文本和图像。我正在使用OpenXMLSDK2.5来实现这一点。我能够解析图像和文本,但DOCX也有一组形状,我试图解析它们并将其转换为绘图图像,这给了我错误的结果 是我试图解析的示例docx文件 我引用了这个,并尝试了同样的方法,但没有运气 我用以下代码创建的DOCX没有任何已解析的图像 using System.Collections.Generic; using System.Linq; using System.IO; using Syst
using System.Collections.Generic;
using System.Linq;
using System.IO;
using System.Drawing;
using DocumentFormat.OpenXml.Packaging;
using DocumentFormat.OpenXml.Wordprocessing;
using DocumentFormat.OpenXml.Vml;
using DocumentFormat.OpenXml;
namespace ReadGroupShape
{
class Program
{
static List<Bitmap> images = new List<Bitmap>();
static void Main(string[] args)
{
MainDocumentPart mainPart = null;
Body content = null;
WordprocessingDocument newDoc = WordprocessingDocument.Create("NewDocx.docx", WordprocessingDocumentType.Document);
MainDocumentPart newMainPart = newDoc.AddMainDocumentPart();
newMainPart.Document = new Document();
Body newbody = newMainPart.Document.AppendChild(new Body());
byte[] docBytes = File.ReadAllBytes("SampleDoc.docx");
using (MemoryStream ms = new MemoryStream())
{
ms.Write(docBytes, 0, docBytes.Length);
using (WordprocessingDocument wpDoc = WordprocessingDocument.Open(ms, true))
{
mainPart = wpDoc.MainDocumentPart;
content = mainPart.Document.Body;
foreach (Paragraph par in content.Descendants<Paragraph>())
{
Paragraph npar = newbody.AppendChild(new Paragraph());
foreach (Run run in par.Descendants<Run>())
{
Run nrun = npar.AppendChild(new Run());
DocumentFormat.OpenXml.Drawing.Blip pic = run.Descendants<DocumentFormat.OpenXml.Drawing.Blip>().FirstOrDefault();
ImageData imageData = run.Descendants<ImageData>().FirstOrDefault();
if (pic == null && imageData == null)
{
nrun.InsertAfterSelf(run.CloneNode(true));
}
else
{
if (pic != null)
{
nrun.InsertAfterSelf(CreateImageFromBlip(wpDoc, run, newMainPart, pic));
}
else if (imageData != null)
{
nrun.InsertAfterSelf(CreateImageFromShape(wpDoc, run, newMainPart, imageData));
}
}
}
}
mainPart.Document.Save();
}
}
newMainPart.Document.Save();
newDoc.Close();
}
private static Run CreateImageFromShape(WordprocessingDocument sourceDoc, Run sourceRun, MainDocumentPart mainpart, ImageData imageData)
{
ImagePart p = sourceDoc.MainDocumentPart.GetPartById(imageData.RelationshipId) as ImagePart;
return CreateImageRun(sourceDoc, sourceRun, mainpart, p);
}
private static Run CreateImageFromBlip(WordprocessingDocument sourceDoc, Run sourceRun, MainDocumentPart mainpart, DocumentFormat.OpenXml.Drawing.Blip blip)
{
ImagePart newPart = mainpart.AddImagePart(ImagePartType.Png);
ImagePart p = sourceDoc.MainDocumentPart.GetPartById(blip.Embed.Value) as ImagePart;
Bitmap image = new Bitmap(p.GetStream());
using (Stream s = p.GetStream())
{
s.Position = 0;
newPart.FeedData(s);
}
string partId = mainpart.GetIdOfPart(newPart);
Drawing newImage = CreateImage(partId);
return new Run(newImage);
}
private static Run CreateImageRun(WordprocessingDocument sourceDoc, Run sourceRun, MainDocumentPart mainpart, ImagePart p)
{
ImagePart newPart = mainpart.AddImagePart(ImagePartType.Png);
using (Stream s = p.GetStream())
{
s.Position = 0;
newPart.FeedData(s);
}
string partId = mainpart.GetIdOfPart(newPart);
Drawing newImage = CreateImage(partId);
return new Run(newImage);
}
private static Drawing CreateImage(string relationshipId)
{
// Define the reference of the image.
return new Drawing(
new DocumentFormat.OpenXml.Drawing.Wordprocessing.Inline(
new DocumentFormat.OpenXml.Drawing.Wordprocessing.Extent() { Cx = 990000L, Cy = 792000L },
new DocumentFormat.OpenXml.Drawing.Wordprocessing.EffectExtent()
{
LeftEdge = 0L,
TopEdge = 0L,
RightEdge = 0L,
BottomEdge = 0L
},
new DocumentFormat.OpenXml.Drawing.Wordprocessing.DocProperties()
{
Id = (UInt32Value)1U,
Name = "Picture 1"
},
new DocumentFormat.OpenXml.Drawing.Wordprocessing.NonVisualGraphicFrameDrawingProperties(
new DocumentFormat.OpenXml.Drawing.GraphicFrameLocks() { NoChangeAspect = true }),
new DocumentFormat.OpenXml.Drawing.Graphic(
new DocumentFormat.OpenXml.Drawing.GraphicData(
new DocumentFormat.OpenXml.Drawing.Picture(
new DocumentFormat.OpenXml.Drawing.NonVisualPictureProperties(
new DocumentFormat.OpenXml.Drawing.NonVisualDrawingProperties()
{
Id = (UInt32Value)0U,
Name = "New Bitmap Image.jpg"
},
new DocumentFormat.OpenXml.Drawing.NonVisualPictureDrawingProperties()),
new DocumentFormat.OpenXml.Drawing.BlipFill(
new DocumentFormat.OpenXml.Drawing.Blip(
new DocumentFormat.OpenXml.Drawing.BlipExtensionList(
new DocumentFormat.OpenXml.Drawing.BlipExtension()
{
Uri =
"{28A0092B-C50C-407E-A947-70E740481C1C}"
})
)
{
Embed = relationshipId,
CompressionState =
DocumentFormat.OpenXml.Drawing.BlipCompressionValues.Print
},
new DocumentFormat.OpenXml.Drawing.Stretch(
new DocumentFormat.OpenXml.Drawing.FillRectangle())),
new DocumentFormat.OpenXml.Drawing.ShapeProperties(
new DocumentFormat.OpenXml.Drawing.Transform2D(
new DocumentFormat.OpenXml.Drawing.Offset() { X = 0L, Y = 0L },
new DocumentFormat.OpenXml.Drawing.Extents() { Cx = 990000L, Cy = 792000L }),
new DocumentFormat.OpenXml.Drawing.PresetGeometry(
new DocumentFormat.OpenXml.Drawing.AdjustValueList()
)
{ Preset = DocumentFormat.OpenXml.Drawing.ShapeTypeValues.Rectangle }))
)
{ Uri = "http://schemas.openxmlformats.org/drawingml/2006/picture" })
)
{
DistanceFromTop = (UInt32Value)0U,
DistanceFromBottom = (UInt32Value)0U,
DistanceFromLeft = (UInt32Value)0U,
DistanceFromRight = (UInt32Value)0U,
EditId = "50D07946"
});
}
}
}
使用System.Collections.Generic;
使用System.Linq;
使用System.IO;
使用系统图;
使用DocumentFormat.OpenXml.Packaging;
使用DocumentFormat.OpenXml.Wordprocessing;
使用DocumentFormat.OpenXml.Vml;
使用DocumentFormat.OpenXml;
命名空间ReadGroupShape
{
班级计划
{
静态列表图像=新列表();
静态void Main(字符串[]参数)
{
MainDocumentPart mainPart=null;
正文内容=空;
WordprocessingDocument newDoc=WordprocessingDocument.Create(“NewDocx.docx”,WordprocessingDocumentType.Document);
MainDocumentPart newMainPart=newDoc.AddMainDocumentPart();
newMainPart.Document=新文档();
Body newbody=newMainPart.Document.AppendChild(newbody());
byte[]docBytes=File.ReadAllBytes(“SampleDoc.docx”);
使用(MemoryStream ms=new MemoryStream())
{
ms.Write(docBytes,0,docBytes.Length);
使用(WordprocessingDocument wpDoc=WordprocessingDocument.Open(ms,true))
{
mainPart=wpDoc.MainDocumentPart;
content=mainPart.Document.Body;
前缀(内容中的段落)
{
段落npar=newbody.AppendChild(新段落());
Frach(运行在PAR。
{
Run nrun=npar.AppendChild(new Run());
DocumentFormat.OpenXml.Drawing.Blip pic=run.subjects().FirstOrDefault();
ImageData ImageData=run.subjects().FirstOrDefault();
if(pic==null&&imageData==null)
{
nrun.InsertAfterSelf(run.CloneNode(true));
}
其他的
{
如果(pic!=null)
{
nrun.InsertAfterSelf(CreateImageFromBlip(wpDoc,run,newMainPart,pic));
}
else if(imageData!=null)
{
nrun.InsertAfterSelf(CreateImageFromShape(wpDoc,run,newMainPart,imageData));
}
}
}
}
mainPart.Document.Save();
}
}
newMainPart.Document.Save();
newDoc.Close();
}
私有静态运行CreateImageFromShape(WordprocessingDocument sourceDoc、运行sourceRun、MainDocumentPart mainpart、ImageData ImageData)
{
ImagePart p=sourceDoc.MainDocumentPart.GetPartById(imageData.RelationshipId)作为ImagePart;
返回CreateImageRun(sourceDoc、sourceRun、mainpart、p);
}
私有静态运行CreateImageFromBlip(WordProcessingDocumentSourceDoc,运行sourceRun,MainDocumentPart mainpart,DocumentFormat.OpenXml.Drawing.Blip Blip)
{
ImagePart newPart=mainpart.AddImagePart(ImagePartType.Png);
ImagePart p=sourceDoc.MainDocumentPart.GetPartById(blip.Embed.Value)作为ImagePart;
位图图像=新位图(p.GetStream());
使用(streams=p.GetStream())
{
s、 位置=0;
新零件。FeedData(多个);
}
string partId=mainpart.GetIdOfPart(newPart);
绘图newImage=CreateImage(partId);
返回新运行(newImage);
}
私有静态运行CreateImageRun(WordprocessingDocument sourceDoc、运行sourceRun、MainDocumentPart mainpart、ImagePart p)
{
ImagePart newPart=mainpart.AddImagePart(ImagePartType.Png);
使用(streams=p.GetStream())
{
s、 位置=0;
新零件。FeedData(多个);
}
string partId=mainpart.GetIdOfPart(newPart);
绘图newImage=CreateImage(partId);
返回新运行(newImage);
}
私有静态图形CreateImage(字符串关系ID)
{
//定义图像的引用。
返回新图纸(
新建DocumentFormat.OpenXml.Drawing.Wordprocessing.Inline(
新的DocumentFormat.OpenXml.Drawing.Wordprocessing.Extent(){Cx=99000L,Cy=792000L},
新的DocumentFormat.OpenXml.Drawing.Wordprocessing.EffectExtent()文件
{
LeftEdge=0升,
TopEdge=0升,
RightEdge=0升,
底边=0升
},
新的DocumentFormat.OpenXml.Drawing.Wordprocessing.DocProperties()
{
Id=(UINT32值)1U,
Name=“图片1”
},
新DocumentFormat.OpenXml.Drawi