-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathPDFRipper.cpp
118 lines (100 loc) · 3.02 KB
/
PDFRipper.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
/***********************************************************************
* Copyright 2007-2010 Michael Drueing <[email protected]>
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License as
* published by the Free Software Foundation; either version 2 of
* the License or (at your option) version 3 or any later version
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
***********************************************************************/
#include <cstring>
#include <cstdio>
#include <cstdlib>
#include <locale>
#include "PDFRipper.h"
const char *PDFRipper::s_name = "PDF Ripper v1.0";
const HeaderStruct PDFRipper::s_headers[] = {
HS("%PDF-1.", 7)
HS_END
};
bool PDFRipper::checkLocation(unsigned char *pos, const HeaderStruct * /*header*/, FoundStruct *found)
{
char version;
unsigned char *scan;
unsigned char *scan2;
bool eof_found = false;
char buffer[20] = { 0 };
found->startoffset = pos;
found->criterium = CRIT_STRONG;
strcpy(found->extension, "pdf");
// get the version: "%PDF-1.x" ==> x
version = *((char *)pos + 7);
if ((version < '1') || (version > '6'))
{
fprintf(stderr, "PDFRipper: Unsupported PDF version 1.%c. PLEASE REPORT THIS!\n", version);
return false;
}
if (version == '6')
{
// PDF 1.6 seems to have additional data, i.e. data is bigger than "/L <numbytes>" suggests
fprintf(stderr, "PDFRipper: Warning: PDF v1.6 only partially supported, file may be truncated\n");
found->criterium = CRIT_WEAK;
}
// first check to see if we find a "/Linearized" somewhere...
for (scan = pos + 8; scan < pos + 1024; scan++)
{
if (scan > m_start + m_length)
return false;
if (strncmp((char *)scan, "/Linearized", 11) == 0)
break;
}
if (scan >= pos + 1024)
{
// we didn't find a "/Linearized", so we just extract until %%EOF{\x0d{\x0a}}
do
{
pos++;
if (strncmp((char *)pos, "%%EOF", 5) == 0)
{
eof_found = true;
break;
}
} while (pos < m_start + m_length);
if (!eof_found)
return false;
pos +=5; // skip the "%%EOF"
// append trailing \\r\\n if available
if (*pos == 0x0d)
pos++;
if (*pos == 0x0a)
pos++;
found->length = pos - found->startoffset;
return true;
}
else
{
// we found a "/Linearized" header at scan. now use "pos" to find the "/L" string
for (scan2 = scan; scan2 < scan + 1024; scan2++)
{
if (scan2 > m_start + m_length)
return false;
if (strncmp((char *)scan2, "/L ", 3) == 0)
{
scan2 += 3;
// copy the numbers out to a buffer
while ((scan2 <= m_start + m_length) && (isdigit(*scan2)))
buffer[strlen(buffer)] = *scan2++;
found->length = atol(buffer);
return true;
}
}
}
return false;
}