C# pdf内容流解析
我需要解析pdf的帮助 pdf构建在illustrator中,它有4层,每层有一个图形路径对象 我通常做的是得到所有的4个图形路径,并将它们绘制在另一个pdf文件中,该文件的宽度和高度与此pdf相同,我希望将它们绘制在相同的位置。 这是我开始编写的代码:C# pdf内容流解析,c#,pdf-generation,pdfsharp,pdf-parsing,C#,Pdf Generation,Pdfsharp,Pdf Parsing,我需要解析pdf的帮助 pdf构建在illustrator中,它有4层,每层有一个图形路径对象 我通常做的是得到所有的4个图形路径,并将它们绘制在另一个pdf文件中,该文件的宽度和高度与此pdf相同,我希望将它们绘制在相同的位置。 这是我开始编写的代码: public static List<PDFMask> GetMasksFromPage(PdfPage page) { List<PDFMask> masks = new Lis
public static List<PDFMask> GetMasksFromPage(PdfPage page)
{
List<PDFMask> masks = new List<PDFMask>();
PdfDictionary contents = page.Elements.GetDictionary("/Contents");
PdfDictionary.PdfStream contentsStream = contents.Stream;
PdfDictionary resources = page.Elements.GetDictionary("/Resources");
PdfDictionary properties = resources.Elements.GetDictionary("/Properties");
PdfName[] keys = properties.Elements.KeyNames;
int dataStartPointer = 0;
int dataEndPointer = Utils.Bytes.IndexOf(contentsStream.UnfilteredValue, Encoding.ASCII.GetBytes("EMC "), dataStartPointer);
int dataCount = dataEndPointer+4;
for (int i = 0; i < keys.Length; i++)
{
PdfDictionary mc = properties.Elements.GetDictionary(keys[i].Value);
PDFMask mask = new PDFMask();
mask.name = mc.Elements.GetString("/Title");
mask.key = keys[i].Value;
byte[] data = new byte[dataCount];
Array.Copy(contentsStream.UnfilteredValue, dataStartPointer, mask.data, 0, dataCount);
mask.parseData(data);
dataStartPointer += dataCount+1;
dataEndPointer = Utils.Bytes.IndexOf(contentsStream.UnfilteredValue, Encoding.ASCII.GetBytes("EMC "), dataStartPointer);
dataCount = dataEndPointer + 4 - dataStartPointer;
masks.Add(mask);
}
return masks;
}
这一行给出了4层图形的二进制数据
现在这是重复发送1层的PDFMask类
public class PDFMask
{
public string name;
public string key;
public byte[] data;
public void parseData(byte[] data)
{
this.data = data; //how i parsing this data to some XGrapic Object?
}
}
现在,数据源是这样的:
/Layer /MC0 BDC
0.75 0.68 0.67 0.902 k
/GS0 gs
q 1 0 0 1 396.4473 1835.6143 cm
0 0 m
76.497 -132.515 l
-17.184 -159.051 l
76.496 -185.607 l
-0.003 -318.119 l
-72.563 -252.047 l
-50.486 -349.178 l
-202.179 -349.182 l
-180.097 -252.046 l
-252.658 -318.116 l
-329.154 -185.603 l
-235.473 -159.048 l
-329.154 -132.511 l
-252.654 0.002 l
-180.094 -66.07 l
-202.175 31.087 l
-50.482 31.081 l
-72.563 -66.072 l
h
f
Q
EMC
我正在寻找一些解析器(我更喜欢pdfsharp解析器)
它可以将这些数据解析成一些图形对象,我可以在另一个pdf文档中使用它好的,我所做的是为我自己的需要构建我自己的解析器,我将在这里显示代码,我相信它有一天会帮助别人
public struct GD { public double x, y, a, b, c, d; public byte t; }
public struct Coordinate { public double locX, locY, oriX, oriY, xAxis, yAxis; }
public class PDFMask
{
private string _name;
public string fun;
public string name
{
get
{
return _name;
}
set
{
if (value.Contains("{"))
{
_name = value.Substring(0, value.IndexOf("{"));
fun = value.Substring(value.IndexOf("{"));
}
else
{
_name = value;
}
}
}
public string key;
public byte[] data;
public GD[] graphicsDirectives;
public Coordinate coordinate;
public void parseData(byte[] data)
{
this.data = data;
graphicsDirectives = new GD[100];
int gdCount = 0;
byte[] buffer = new byte[100];
int bufferCount = 0;
for (int i = 0; i < data.Length; i++)
{
switch (data[i])
{
case (byte)'\n':
if (bufferCount > 2 && buffer[bufferCount - 2] == ' ' && (buffer[bufferCount - 1] == 'c' || buffer[bufferCount - 1] == 'l' || buffer[bufferCount - 1] == 'm'))
graphicsDirectives[gdCount++] = parseDataWriteGD(buffer, bufferCount);
else if (bufferCount > 3 && buffer[0] == 'q' && buffer[bufferCount - 1] == 'm' && buffer[bufferCount - 2] == 'c')
coordinate = parseDataWriteCoordinate(buffer, bufferCount);
bufferCount = 0;
break;
default :
buffer[bufferCount++] = data[i];
break;
}
}
GD[] actualGraphicsDirectives = new GD[gdCount];
Array.Copy(graphicsDirectives, actualGraphicsDirectives, gdCount);
graphicsDirectives = actualGraphicsDirectives;
}
public Coordinate parseDataWriteCoordinate(byte[] bytes, int count)
{
byte[] actualBytes = new byte[count];
Array.Copy(bytes, actualBytes, count);
string[] values = Encoding.ASCII.GetString(actualBytes).Split(new char[] { ' ' }, StringSplitOptions.RemoveEmptyEntries);
Coordinate c = new Coordinate();
c.locX = double.Parse(values[1]);
c.locY = double.Parse(values[2]);
c.oriX = double.Parse(values[3]);
c.oriY = double.Parse(values[4]);
c.xAxis = double.Parse(values[5]);
c.yAxis = double.Parse(values[6]);
return c;
}
public GD parseDataWriteGD(byte[] bytes, int count)
{
byte[] actualBytes = new byte[count];
Array.Copy(bytes, actualBytes, count);
string[] values = Encoding.ASCII.GetString(actualBytes).Split(new char[] { ' ' }, StringSplitOptions.RemoveEmptyEntries);
GD gd = new GD();
gd.t = (byte)values[values.Length - 1][0];
if (gd.t == 'c')
{
gd.a = double.Parse(values[0]);
gd.b = double.Parse(values[1]);
gd.c = double.Parse(values[2]);
gd.d = double.Parse(values[3]);
gd.x = double.Parse(values[4]);
gd.y = double.Parse(values[5]);
}
else
{
gd.x = double.Parse(values[0]);
gd.y = double.Parse(values[1]);
}
return gd;
}
}
公共结构GD{public double x,y,a,b,c,d;公共字节t;}
公共结构坐标{public double locX,locY,oriX,oriY,xAxis,yAxis;}
公共类PDFMask
{
私有字符串\u名称;
公共弦乐;
公共字符串名
{
收到
{
返回_name;
}
设置
{
if(value.Contains(“{”))
{
_name=value.Substring(0,value.IndexOf(“{”);
fun=value.Substring(value.IndexOf(“{”);
}
其他的
{
_名称=值;
}
}
}
公共字符串密钥;
公共字节[]数据;
公共GD[]图形指令;
公共协调;
公共数据(字节[]数据)
{
这个数据=数据;
graphicsDirectives=新GD[100];
int-gdCount=0;
字节[]缓冲区=新字节[100];
int bufferCount=0;
for(int i=0;ipublic struct GD { public double x, y, a, b, c, d; public byte t; }
public struct Coordinate { public double locX, locY, oriX, oriY, xAxis, yAxis; }
public class PDFMask
{
private string _name;
public string fun;
public string name
{
get
{
return _name;
}
set
{
if (value.Contains("{"))
{
_name = value.Substring(0, value.IndexOf("{"));
fun = value.Substring(value.IndexOf("{"));
}
else
{
_name = value;
}
}
}
public string key;
public byte[] data;
public GD[] graphicsDirectives;
public Coordinate coordinate;
public void parseData(byte[] data)
{
this.data = data;
graphicsDirectives = new GD[100];
int gdCount = 0;
byte[] buffer = new byte[100];
int bufferCount = 0;
for (int i = 0; i < data.Length; i++)
{
switch (data[i])
{
case (byte)'\n':
if (bufferCount > 2 && buffer[bufferCount - 2] == ' ' && (buffer[bufferCount - 1] == 'c' || buffer[bufferCount - 1] == 'l' || buffer[bufferCount - 1] == 'm'))
graphicsDirectives[gdCount++] = parseDataWriteGD(buffer, bufferCount);
else if (bufferCount > 3 && buffer[0] == 'q' && buffer[bufferCount - 1] == 'm' && buffer[bufferCount - 2] == 'c')
coordinate = parseDataWriteCoordinate(buffer, bufferCount);
bufferCount = 0;
break;
default :
buffer[bufferCount++] = data[i];
break;
}
}
GD[] actualGraphicsDirectives = new GD[gdCount];
Array.Copy(graphicsDirectives, actualGraphicsDirectives, gdCount);
graphicsDirectives = actualGraphicsDirectives;
}
public Coordinate parseDataWriteCoordinate(byte[] bytes, int count)
{
byte[] actualBytes = new byte[count];
Array.Copy(bytes, actualBytes, count);
string[] values = Encoding.ASCII.GetString(actualBytes).Split(new char[] { ' ' }, StringSplitOptions.RemoveEmptyEntries);
Coordinate c = new Coordinate();
c.locX = double.Parse(values[1]);
c.locY = double.Parse(values[2]);
c.oriX = double.Parse(values[3]);
c.oriY = double.Parse(values[4]);
c.xAxis = double.Parse(values[5]);
c.yAxis = double.Parse(values[6]);
return c;
}
public GD parseDataWriteGD(byte[] bytes, int count)
{
byte[] actualBytes = new byte[count];
Array.Copy(bytes, actualBytes, count);
string[] values = Encoding.ASCII.GetString(actualBytes).Split(new char[] { ' ' }, StringSplitOptions.RemoveEmptyEntries);
GD gd = new GD();
gd.t = (byte)values[values.Length - 1][0];
if (gd.t == 'c')
{
gd.a = double.Parse(values[0]);
gd.b = double.Parse(values[1]);
gd.c = double.Parse(values[2]);
gd.d = double.Parse(values[3]);
gd.x = double.Parse(values[4]);
gd.y = double.Parse(values[5]);
}
else
{
gd.x = double.Parse(values[0]);
gd.y = double.Parse(values[1]);
}
return gd;
}
}