C# 如何使用itext7从blob存储中存储的pdf中提取文本?
我正在使用itext7从pdf中提取文本。以下是我提取本地pdf文件文本的代码:C# 如何使用itext7从blob存储中存储的pdf中提取文本?,c#,azure,itext7,azure-blob-storage,C#,Azure,Itext7,Azure Blob Storage,我正在使用itext7从pdf中提取文本。以下是我提取本地pdf文件文本的代码: var pageText = new StringBuilder(); using(PdfDocument pdfDocument = new PdfDocument(new PdfReader("E:\\es.pdf"))) { var pageNumbers = pdfDocument.GetNumberOfPages(); for (i
var pageText = new StringBuilder();
using(PdfDocument pdfDocument = new PdfDocument(new PdfReader("E:\\es.pdf"))) {
var pageNumbers = pdfDocument.GetNumberOfPages();
for (int i = 1; i <= pageNumbers; i++) {
LocationTextExtractionStrategy strategy = new LocationTextExtractionStrategy();
PdfCanvasProcessor parser = new PdfCanvasProcessor(strategy);
parser.ProcessPageContent(pdfDocument.GetFirstPage());
pageText.Append(strategy.GetResultantText());
}
}
var pageText=new StringBuilder();
使用(PdfDocument PdfDocument=newpdfdocument(newpdfReader(“E:\\es.pdf”)){
var pageNumbers=pdfDocument.GetNumberOfPages();
对于(int i=1;i如果您想阅读pdf表单azure blob,请参考以下代码
string storageAccountName = "andyprivate";
string accountKey = "";
var blobServiceClient = new BlobServiceClient(
new Uri($"https://{storageAccountName}.blob.core.windows.net"),
new StorageSharedKeyCredential(storageAccountName, accountKey),
new BlobClientOptions());
var containerClient = blobServiceClient.GetBlobContainerClient("test");
var blob = containerClient.GetBlobClient("sample.pdf");
BlobProperties properties = await blob.GetPropertiesAsync();
var pageText = new StringBuilder();
using (var stream = await blob.OpenReadAsync(position: 0, bufferSize: (int)properties.ContentLength))
using (PdfDocument pdfDocument = new PdfDocument(new PdfReader(stream))) {
var pageNumbers = pdfDocument.GetNumberOfPages();
for (int i = 1; i <= pageNumbers; i++)
{
LocationTextExtractionStrategy strategy = new LocationTextExtractionStrategy();
PdfCanvasProcessor parser = new PdfCanvasProcessor(strategy);
parser.ProcessPageContent(pdfDocument.GetPage(i));
pageText.Append(strategy.GetResultantText());
pageText.Append(Environment.NewLine);
}
Console.WriteLine(pageText);
}
string storageAccountName=“andyprivate”;
字符串accountKey=“”;
var blobServiceClient=新的blobServiceClient(
新Uri($“https://{storageAccountName}.blob.core.windows.net”),
新的StorageSharedKeyCredential(storageAccountName、accountKey),
新BlobClient());
var containerClient=blobServiceClient.GetBlobContainerClient(“测试”);
var blob=containerClient.GetBlobClient(“sample.pdf”);
BlobProperties属性=等待blob.GetPropertiesAsync();
var pageText=新的StringBuilder();
使用(var stream=await blob.OpenReadAsync(位置:0,bufferSize:(int)properties.ContentLength))
使用(PdfDocument PdfDocument=新PdfDocument(新PDF阅读器(流))){
var pageNumbers=pdfDocument.GetNumberOfPages();
对于(int i=1;正如我在阅读您的问题时所想的那样,您可能需要将字节/blob转换为文件流,然后转换为所需的扩展名。另一方面,如果您在此处使用.net MVC“”你会找到一个很好的例子。你还有其他的顾虑吗?如果你没有其他顾虑,你能接受它作为答案吗?