PDF Parser & Modify SDK can be downloaded from following web page,
https://www.verydoc.com/pdfparsersdk.html
You can use following VC++ source code to extract text contents from PDF file, you can also use following source code to replace text contents in PDF file easily.
#include <windows.h>
#include <assert.h>
#include <stdio.h>
#include <io.h>
#include <vector>
using namespace std;
/*
"lpOptions" parameter supports following options:
-f <int> : first page to convert
-l <int> : last page to convert
-r <int> : resolution for both X and Y, in DPI (default is 150)
-opw <string> : owner password (for encrypted files)
-upw <string> : user password (for encrypted files)
-html : output text information in HTML format instead of CSV format
*/
typedef int (WINAPI *PDFParserSDKFunc)(char *lpPDFFile, char *lpOutFile, char *lpOptions);
typedef int (WINAPI *PDFParserSDKFromMemoryFunc)(LPBYTE lpPDFData, int nDataLen, char *lpOutFile, char *lpOptions);
typedef HANDLE (WINAPI *PDFParserSDK_GetHandleFunc)(char *lpPDFFile, char *lpOptions);
typedef int (WINAPI *PDFParserSDK_ParseFunc)(HANDLE hPDFParserData, char *lpOptions);
typedef int (WINAPI *PDFParserSDK_GetCountFunc)(HANDLE hPDFParserData);
typedef int (WINAPI *PDFParserSDK_GetImageLengthFunc)(HANDLE hPDFParserData, int nIndex);
typedef int (WINAPI *PDFParserSDK_GetImageDataFunc)(HANDLE hPDFParserData, int nIndex, LPBYTE lpData, int nBufLen);
typedef int (WINAPI *PDFParserSDK_GetTextInfoLengthFunc)(HANDLE hPDFParserData, int nIndex);
typedef int (WINAPI *PDFParserSDK_GetTextInfoDataFunc)(HANDLE hPDFParserData, int nIndex, LPBYTE lpData, int nBufLen);
typedef int (WINAPI *PDFParserSDK_FreeFunc)(HANDLE hPDFParserData);
typedef int (WINAPI *PDFParserSDK_GetPageCountFunc)(char *lpPDFFile);
typedef int (WINAPI *PDFParserSDK_GetAllPagesCountFunc)(HANDLE hPDFParserData);
PDFParserSDKFunc PDFParserSDK = NULL;
PDFParserSDKFromMemoryFunc PDFParserSDKFromMemory = NULL;
PDFParserSDK_GetHandleFunc PDFParserSDK_GetHandle = NULL;
PDFParserSDK_ParseFunc PDFParserSDK_Parse = NULL;
PDFParserSDK_GetCountFunc PDFParserSDK_GetCount = NULL;
PDFParserSDK_GetImageLengthFunc PDFParserSDK_GetImageLength = NULL;
PDFParserSDK_GetImageDataFunc PDFParserSDK_GetImageData = NULL;
PDFParserSDK_GetTextInfoLengthFunc PDFParserSDK_GetTextInfoLength = NULL;
PDFParserSDK_GetTextInfoDataFunc PDFParserSDK_GetTextInfoData = NULL;
PDFParserSDK_FreeFunc PDFParserSDK_Free = NULL;
PDFParserSDK_GetPageCountFunc PDFParserSDK_GetPageCount = NULL;
PDFParserSDK_GetAllPagesCountFunc PDFParserSDK_GetAllPagesCount = NULL;
typedef HANDLE (WINAPI *VeryPDF_ModifyPDF_OpenFileFunc)(char *lpInPDFFile, char *lpOutPDFFile);
typedef BOOL (WINAPI *VeryPDF_ModifyPDF_CloseFileFunc)(HANDLE hPDF);
typedef BOOL (WINAPI *VeryPDF_ModifyPDF_ModifyTextFunc)(HANDLE hPDF, int nPage, int x, int y, int nWidth, int nHeight, char *lpOldText, char *lpNewText);
typedef void (WINAPI *VeryPDF_ModifyPDF_SetCodeFunc)(char *lpLicenseKey);
VeryPDF_ModifyPDF_OpenFileFunc VeryPDF_ModifyPDF_OpenFile = NULL;
VeryPDF_ModifyPDF_CloseFileFunc VeryPDF_ModifyPDF_CloseFile = NULL;
VeryPDF_ModifyPDF_ModifyTextFunc VeryPDF_ModifyPDF_ModifyText = NULL;
VeryPDF_ModifyPDF_SetCodeFunc VeryPDF_ModifyPDF_SetCode = NULL;
#define LICENSE_KEY_PDFPARSERSDK "XXXXXXXXXXXXXXXXXXX"
#define LICENSE_KEY_PDFMODIFYSDK "Your License Key for PDF Modify SDK"
void GetModulePath(char *out_path,char *in_name)
{
char *p;
GetModuleFileName(NULL,out_path,256);
p =strrchr(out_path,'\\');
p[1]=0;
strcat(out_path,in_name);
}
int TestPDFParserSDKInMemory(char *lpInFile, char *lpOutFile)
{
LPBYTE lpData = NULL;
int nLength = 0;
FILE *file = fopen(lpInFile,"rb");
if(!file)
return -1001;
nLength = _filelength(fileno(file));
if(nLength <= 0)
{
fclose(file);
return -1002;
}
lpData = new BYTE[nLength];
if(lpData == NULL)
return -1003;
fread(lpData,1,nLength,file);
fclose(file);
int nRet = PDFParserSDKFromMemory(lpData, nLength, lpOutFile, "-$ "LICENSE_KEY_PDFPARSERSDK);
delete []lpData;
return nRet;
}
HMODULE LoadPDFLibrary()
{
int nRet = 0;
char szLibPath[_MAX_PATH];
GetModulePath(szLibPath, "pdfparsersdk2.dll");
HMODULE dll_handle = LoadLibrary(szLibPath);
if(dll_handle == NULL)
return FALSE;
PDFParserSDK = (PDFParserSDKFunc)GetProcAddress(dll_handle, "VeryPDF_PDFParserSDK");
PDFParserSDKFromMemory = (PDFParserSDKFromMemoryFunc)GetProcAddress(dll_handle, "VeryPDF_PDFParserSDKFromMemory");
PDFParserSDK_GetHandle = (PDFParserSDK_GetHandleFunc)GetProcAddress(dll_handle, "VeryPDF_PDFParserSDK_GetHandle");
PDFParserSDK_Parse = (PDFParserSDK_ParseFunc)GetProcAddress(dll_handle, "VeryPDF_PDFParserSDK_Parse");
PDFParserSDK_GetCount = (PDFParserSDK_GetCountFunc)GetProcAddress(dll_handle, "VeryPDF_PDFParserSDK_GetCount");
PDFParserSDK_GetImageLength = (PDFParserSDK_GetImageLengthFunc)GetProcAddress(dll_handle, "VeryPDF_PDFParserSDK_GetImageLength");
PDFParserSDK_GetImageData = (PDFParserSDK_GetImageDataFunc)GetProcAddress(dll_handle, "VeryPDF_PDFParserSDK_GetImageData");
PDFParserSDK_GetTextInfoLength = (PDFParserSDK_GetTextInfoLengthFunc)GetProcAddress(dll_handle, "VeryPDF_PDFParserSDK_GetTextInfoLength");
PDFParserSDK_GetTextInfoData = (PDFParserSDK_GetTextInfoDataFunc)GetProcAddress(dll_handle, "VeryPDF_PDFParserSDK_GetTextInfoData");
PDFParserSDK_Free = (PDFParserSDK_FreeFunc)GetProcAddress(dll_handle, "VeryPDF_PDFParserSDK_Free");
PDFParserSDK_GetPageCount = (PDFParserSDK_GetPageCountFunc)GetProcAddress(dll_handle, "VeryPDF_PDFParserSDK_GetPageCount");
PDFParserSDK_GetAllPagesCount = (PDFParserSDK_GetAllPagesCountFunc)GetProcAddress(dll_handle, "VeryPDF_PDFParserSDK_GetAllPagesCount");
VeryPDF_ModifyPDF_OpenFile = (VeryPDF_ModifyPDF_OpenFileFunc)GetProcAddress(dll_handle, "VeryPDF_ModifyPDF_OpenFile");
VeryPDF_ModifyPDF_CloseFile = (VeryPDF_ModifyPDF_CloseFileFunc)GetProcAddress(dll_handle, "VeryPDF_ModifyPDF_CloseFile");
VeryPDF_ModifyPDF_ModifyText = (VeryPDF_ModifyPDF_ModifyTextFunc)GetProcAddress(dll_handle, "VeryPDF_ModifyPDF_ModifyText");
VeryPDF_ModifyPDF_SetCode = (VeryPDF_ModifyPDF_SetCodeFunc)GetProcAddress(dll_handle, "VeryPDF_ModifyPDF_SetCode");
return dll_handle;
}
int Test_PDFParserSDK_1(char *pdf_filename, char *out_filename)
{
int nRet = 0;
nRet = PDFParserSDK(pdf_filename, out_filename, "-html -$ "LICENSE_KEY_PDFPARSERSDK);
return nRet;
}
int Test_PDFParserSDK_2(char *pdf_filename, char *out_filename)
{
int nRet = 0;
nRet = TestPDFParserSDKInMemory(pdf_filename, out_filename);
return nRet;
}
int Test_PDFParserSDK_3(char *pdf_filename, char *out_filename)
{
int nRet = 0;
HANDLE hPDFSDK = PDFParserSDK_GetHandle(pdf_filename, "-$ "LICENSE_KEY_PDFPARSERSDK);
if(hPDFSDK == NULL)
return nRet;
int nCount = PDFParserSDK_GetCount(hPDFSDK);
for(int i = 0; i < nCount; i++)
{
int nImageDataLen = PDFParserSDK_GetImageLength(hPDFSDK, i);
int nTextInfoLen = PDFParserSDK_GetTextInfoLength(hPDFSDK, i);
vector<BYTE> vecImgData;
vector<BYTE> vecTxtData;
vecImgData.resize(nImageDataLen);
vecTxtData.resize(nTextInfoLen);
PDFParserSDK_GetImageData(hPDFSDK, i, vecImgData.begin(), vecImgData.size());
PDFParserSDK_GetTextInfoData(hPDFSDK, i, vecTxtData.begin(), vecTxtData.size());
}
PDFParserSDK_Free(hPDFSDK);
hPDFSDK = NULL;
return nRet;
}
int Test_PDFParserSDK_4(char *pdf_filename, char *out_filename, char *lpOptions)
{
int nRet = 0;
int nPageCount = PDFParserSDK_GetPageCount(pdf_filename);
for(int page = 1; page <= nPageCount; page++)
{
printf("Parser page %d ...\n", page);
char szOptions[300];
sprintf(szOptions, "-$ "LICENSE_KEY_PDFPARSERSDK" -f %d -l %d", page, page);
if(lpOptions)
{
strcat(szOptions, " ");
strcat(szOptions, lpOptions);
}
HANDLE hPDFSDK = PDFParserSDK_GetHandle(pdf_filename, szOptions);
if(hPDFSDK == NULL)
break;
int nCount = PDFParserSDK_GetCount(hPDFSDK);
for(int i = 0; i < nCount; i++)
{
int nImageDataLen = PDFParserSDK_GetImageLength(hPDFSDK, i);
int nTextInfoLen = PDFParserSDK_GetTextInfoLength(hPDFSDK, i);
vector<BYTE> vecImgData;
vector<BYTE> vecTxtData;
vecImgData.resize(nImageDataLen);
vecTxtData.resize(nTextInfoLen);
PDFParserSDK_GetImageData(hPDFSDK, i, vecImgData.begin(), vecImgData.size());
PDFParserSDK_GetTextInfoData(hPDFSDK, i, vecTxtData.begin(), vecTxtData.size());
}
PDFParserSDK_Free(hPDFSDK);
hPDFSDK = NULL;
}
return nRet;
}
int Test_PDFParserSDK_ModifyPDF()
{
char szPDFFile[_MAX_PATH];
GetModulePath(szPDFFile, "example1.pdf");
char szOutPDFFile[_MAX_PATH];
GetModulePath(szOutPDFFile, "_modified.pdf");
VeryPDF_ModifyPDF_SetCode(LICENSE_KEY_PDFMODIFYSDK);
HANDLE hPDF = VeryPDF_ModifyPDF_OpenFile(szPDFFile, szOutPDFFile);
if(hPDF == NULL)
return 0;
int nPage = 1;
//The default DPI for these values is 72DPI
int dpi = 72;
int x = 319*72/dpi;
int y = 56*72/dpi;
int nWidth = 37*72/dpi;
int nHeight = 7*72/dpi;
char *lpOldText = "September";
char *lpNewText = "VeryPDF";
BOOL bRet = VeryPDF_ModifyPDF_ModifyText(hPDF, nPage, x, y, nWidth, nHeight, lpOldText, lpNewText);
VeryPDF_ModifyPDF_CloseFile(hPDF);
return 1;
}
int main(int argc, char* argv[])
{
if(argc != 3)
{
printf("%s test.pdf out.png\n", argv[0]);
return 0;
}
int nRet = 0;
char *pdf_filename = argv[1];
char *out_filename = argv[2];
HMODULE hPDFLib = LoadPDFLibrary();
if(hPDFLib == NULL)
return 0;
int nTime1 = GetTickCount();
/////////////////////////////////
nRet = Test_PDFParserSDK_1(pdf_filename, out_filename);
//nRet = Test_PDFParserSDK_2(pdf_filename, out_filename);
//nRet = Test_PDFParserSDK_3(pdf_filename, out_filename);
//nRet = Test_PDFParserSDK_4(pdf_filename, out_filename, NULL);
//nRet = Test_PDFParserSDK_4(pdf_filename, out_filename, "-noimg");
nRet = Test_PDFParserSDK_ModifyPDF();
/////////////////////////////////
int nTime2 = GetTickCount();
printf("Spend Time: %dms (%.2fs)\n", nTime2-nTime1, (nTime2-nTime1)/1000.0);
FreeLibrary(hPDFLib);
return nRet;
}