How to convert PDF to HTML programmatically in C#
If you are looking for a good solution for converting PDF files to a HTML programmatically, try our PDF Focus .Net.

1. Convert PDF file to HTML in C# and .Net:
public static void PdfToHtmlAsFiles()
{
string pdfFile = @"..\..\..\..\..\simple text.pdf";
string htmlFile = Path.ChangeExtension(pdfFile, ".htm");
PdfFocus f = new PdfFocus();
// The same path where the final HTML document. This folder must already exist.
f.HtmlOptions.ImageFolder = Path.GetDirectoryName(pdfFile);
// This folder will be created by the component to store images.
// Like a "zoo_images".
f.HtmlOptions.ImageSubFolder = String.Format("{0}_images",
Path.GetFileNameWithoutExtension(pdfFile));
f.HtmlOptions.ImageType = PdfFocus.CHtmlOptions.eHtmlImageType.Png;
f.HtmlOptions.Title = "Zoo page";
f.OpenPdf(pdfFile);
if (f.PageCount > 0)
{
int result = f.ToHtml(htmlFile);
}
}
2. Convert PDF to HTML in memory using MemoryStream in C#:
public static void PdfToHtmlAsMemoryStream()
{
// We are using files only for demonstration.
// The whole conversion process will be done in memory.
string pdfFile = @"d:\Zoo.pdf";
PdfFocus f = new PdfFocus();
using (FileStream pdfStream = new FileStream(pdfFile, FileMode.Open))
{
f.OpenPdf(pdfStream);
if (f.PageCount > 0)
{
// Let's force the component to store images inside HTML document
// using base-64 encoding.
// Thus the component will not use HDD.
f.HtmlOptions.IncludeImageInHtml = true;
f.HtmlOptions.InlineCSS = true;
string html = f.ToHtml();
// Here we have the HTML result as string object.
}
}
}
3. Convert PDF to HTML in memory as byte[] using C#:
public static void PdfToHtmlAsByteArray()
{
// We are using files only for demonstration.
// The whole conversion process will be done in memory.
string pdfFile = @"d:\Zoo.pdf";
byte[] pdfBytes = File.ReadAllBytes(pdfFile);
PdfFocus f = new PdfFocus();
f.OpenPdf(pdfBytes);
if (f.PageCount > 0)
{
// Let's force the component to store images inside HTML document
// using base-64 encoding.
// Thus the component will not use HDD.
f.HtmlOptions.IncludeImageInHtml = true;
f.HtmlOptions.InlineCSS = true;
string html = f.ToHtml();
// Here we have the HTML result as string object.
}
}
|