#include <windows.h>
#include <shellapi.h>
#include <conio.h>

#define PDF_STATIC // Enable static binding. This define makes nothing when we link dynamically.
#include "../../../../include/C_CPP/dynapdf.h"
#include "pdf_text_extraction.h"

#if defined(WIN64) || defined(_WIN64)
   #ifdef _DLL
      #pragma comment(lib, "../../../../win64/dynapdfm.lib") // Multithreaded-DLL
   #else
      #pragma comment(lib, "../../../../win64/dynapdf.lib")  // Multithreaded
   #endif
#elif defined(_DLL)
   #pragma comment(lib, "../../../../win32/dynapdfm.lib") // Multithreaded-DLL
#else
   #pragma comment(lib, "../../../../win32/dynapdf.lib")  // Multithreaded
#endif

using namespace DynaPDF;

/*
   Note that the dynapdf.dll must be copied into the output directory or into a
   Windwos search path (e.g. %WINDOWS%/System32) before the application can be executed!
*/

// Error callback function.
// If the function name should not appear at the beginning of the error message then set
// the flag emNoFuncNames (pdfSetErrorMode(emNoFuncNames);).
SI32 PDF_CALL PDFError(const void* Data, SI32 ErrCode, const char* ErrMessage, SI32 ErrType)
{
   printf("%s\n", ErrMessage);
   return 0;
}

SI32 PDF_CALL parseBeginTemplate(const void* Data, const BYTE* Object, SI32 Handle, struct TPDFRect* BBox, struct TCTM* Matrix)
{
   return ((CTextExtraction*)Data)->BeginTemplate(BBox, Matrix);
}

void PDF_CALL parseEndTemplate(const void* Data)
{
   ((CTextExtraction*)Data)->EndTemplate();
}

void PDF_CALL parseMulMatrix(const void* Data, const BYTE* Object, struct TCTM* M)
{
   ((CTextExtraction*)Data)->MulMatrix(M);
}

SI32 PDF_CALL parseRestoreGraphicState(const void* Data)
{
   ((CTextExtraction*)Data)->RestoreGState();
   return 0;
}

SI32 PDF_CALL parseSaveGraphicState(const void* Data)
{
   return ((CTextExtraction*)Data)->SaveGState();
}

void PDF_CALL parseSetCharSpacing(const void* Data, const BYTE* Object, double Value)
{
   ((CTextExtraction*)Data)->SetCharSpacing(Value);
}

void PDF_CALL parseSetFillColor(const void* Data, const BYTE* Object, double* Color, UI32 NumComps, TExtColorSpace CS, void* IColorSpace)
{
   ((CTextExtraction*)Data)->SetFillColor(pdfConvColor(Color, NumComps, CS, IColorSpace, esDeviceRGB));
}

void PDF_CALL parseSetFont(const void* Data, const BYTE* Object, TFontType Type, LBOOL Embedded, const char* FontName, TFStyle Style, double FontSize, const void* Font)
{
   ((CTextExtraction*)Data)->SetFont(FontSize, Type, Font);
}

void PDF_CALL parseSetStrokeColor(const void* Data, const BYTE* Object, double* Color, UI32 NumComps, TExtColorSpace CS, void* IColorSpace)
{
   ((CTextExtraction*)Data)->SetStrokeColor(pdfConvColor(Color, NumComps, CS, IColorSpace, esDeviceRGB));
}

void PDF_CALL parseSetTextDrawMode(const void* Data, const BYTE* Object, TDrawMode Mode)
{
   ((CTextExtraction*)Data)->SetTextDrawMode(Mode);
}

void PDF_CALL parseSetTextScale(const void* Data, const BYTE* Object, double Value)
{
   ((CTextExtraction*)Data)->SetTextScale(Value);
}

void PDF_CALL parseSetWordSpacing(const void* Data, const BYTE* Object, double Value)
{
   ((CTextExtraction*)Data)->SetWordSpacing(Value);
}

SI32 PDF_CALL parseShowTextArrayW(const void* Data, const TTextRecordA* Source, struct TCTM* M, const struct TTextRecordW* Kerning, UI32 Count, double Width, LBOOL Decoded)
{
   return ((CTextExtraction*)Data)->AddText(M, Source, Kerning, Count, Width, Decoded != 0);
}

int main(int argc, char* argv[])
{
   char filePath[MAX_PATH+1];
   UI32 timeStart = GetTickCount();
   PPDF* pdf = pdfNewPDF();
   pdfSetOnErrorProc(pdf, NULL, PDFError);
   pdfCreateNewPDF(pdf, NULL); // We do not produce a PDF file in this example!

   // External cmaps should always be loaded when extracting text from PDF files.
   // See the description of ParseContent() for further information.
   _fullpath(filePath, "../../../../Resource/CMap/", MAX_PATH);
   pdfSetCMapDir(pdf, filePath, (TLoadCMapFlags)(lcmRecursive | lcmDelayed));

   pdfSetImportFlags(pdf, ifImportAll | ifImportAsPage);
   if (pdfOpenImportFile(pdf, "../../../../dynapdf_help.pdf", ptOpen, NULL) < 0)
   {
      pdfDeletePDF(pdf);
      _getch();
      return 0;
   }
   pdfImportPDFFile(pdf, 1, 1.0, 1.0);
   pdfCloseImportFile(pdf);

   // We flatten markup annotations and form fields so that we can extract the text from these objects too.
   pdfFlattenAnnots(pdf, affMarkupAnnots);
   pdfFlattenForm(pdf);

   TPDFParseInterface stack;
   memset(&stack, 0, sizeof(stack));

   // More callback functions are not required to extract text
   stack.BeginTemplate       = parseBeginTemplate;
   stack.EndTemplate         = parseEndTemplate;
   stack.MulMatrix           = parseMulMatrix;
   stack.RestoreGraphicState = parseRestoreGraphicState;
   stack.SaveGraphicState    = parseSaveGraphicState;
   stack.SetCharSpacing      = parseSetCharSpacing;
   stack.SetFillColor        = parseSetFillColor;
   stack.SetFont             = parseSetFont;
   stack.SetStrokeColor      = parseSetStrokeColor;
   stack.SetTextDrawMode     = parseSetTextDrawMode;
   stack.SetTextScale        = parseSetTextScale;
   stack.SetWordSpacing      = parseSetWordSpacing;
   stack.ShowTextArrayW      = parseShowTextArrayW;

   // We write the output file into the current directory.
   GetCurrentDirectory(511, filePath);
   strcat(filePath, "\\out.txt");

   CTextExtraction textStack;
   // The output file must be opened before the parser is executed!
   if (!textStack.Open(filePath))
   {
      pdfDeletePDF(pdf);
      printf("Cannot open output file!\n");
      _getch();
      return -1;
   }
   SI32 i, count = pdfGetPageCount(pdf);
   for (i = 1; i <= count; i++)
   {
      pdfEditPage(pdf, i);
      // Initialize the graphics state to the default values
      textStack.Init();
      // We write a page identifier to the file so that we know from which page comes the text.
      textStack.WritePageIdentifier(i);
      pdfParseContent(pdf, &textStack, &stack, pfNone);
      pdfEndPage(pdf);
   }
   textStack.Close();
   pdfDeletePDF(pdf);
   timeStart = GetTickCount() - timeStart;
   printf("Processing time: %d ms\nText successfully extracted to:\n%s\n", timeStart, filePath);
   ShellExecute(0, "open", filePath, NULL, NULL, SW_SHOWMAXIMIZED);
   _getch();
   return 0;
}
