Skip to content

Commit

Permalink
Support pdf parsing where not every postscript path command takes a s…
Browse files Browse the repository at this point in the history
…ingle line
  • Loading branch information
hrobeers committed Jul 13, 2021
1 parent c57dda8 commit f45af2b
Show file tree
Hide file tree
Showing 4 changed files with 42 additions and 58 deletions.
2 changes: 1 addition & 1 deletion include/hrlib/io/pdfio.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ namespace hrlib { namespace pdf

std::istream& read_linear_dict(std::istream& stream, linear_dict& dict);
std::istream& read_next_binary(std::istream& stream, std::vector<char>& bin);
std::optional<path_cmd> parse_path_line(const std::string &line);
std::optional<path_cmd> parse_path_line(std::istream& stream);
}}

#endif //HRLIB_PDFIO_HPP
38 changes: 18 additions & 20 deletions src/foillogic/foilio.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -255,16 +255,15 @@ Outline* foillogic::loadOutlinePdfStream(std::istream &stream, std::ostream */*e
{
std::string line;
boost::interprocess::basic_ivectorstream<std::vector<char>> bin_stream(bin);
while(getline_safe(bin_stream, line))
while(auto pc = pdf::parse_path_line(bin_stream))
{
if (auto pc = pdf::parse_path_line(line)) {
if (first.cmd!='m' && pc->cmd!='m')
// continue until first move
continue;
if (first.cmd!='m' && pc->cmd=='m')
// set first move
first=pc.value();
else if (first.cmd=='m' && pc->cmd=='m')
if (first.cmd!='m' && pc->cmd!='m')
// continue until first move
continue;
if (first.cmd!='m' && pc->cmd=='m')
// set first move
first=pc.value();
else if (first.cmd=='m' && pc->cmd=='m')
{
// break on a second move if path long enough
// otherwise reset move command
Expand All @@ -274,19 +273,18 @@ Outline* foillogic::loadOutlinePdfStream(std::istream &stream, std::ostream */*e
path_cmds.clear();
first=pc.value();
}
if (pc->cmd == 'l' && path_cmds.back().cmd == 'l') {
// merge line commands that extend each other
if (pc->vals[0][0] == path_cmds.back().vals[0][0]) {
path_cmds.back().vals[0][1] = pc->vals[0][1];
continue;
}
if (pc->vals[0][1] == path_cmds.back().vals[0][1]) {
path_cmds.back().vals[0][0] = pc->vals[0][0];
continue;
}
if (pc->cmd == 'l' && path_cmds.back().cmd == 'l') {
// merge line commands that extend each other
if (pc->vals[0][0] == path_cmds.back().vals[0][0]) {
path_cmds.back().vals[0][1] = pc->vals[0][1];
continue;
}
if (pc->vals[0][1] == path_cmds.back().vals[0][1]) {
path_cmds.back().vals[0][0] = pc->vals[0][0];
continue;
}
path_cmds.push_back(std::move(pc.value()));
}
path_cmds.push_back(std::move(pc.value()));
}
// stop reading file when a path is parsed
if (path_cmds.size()>2)
Expand Down
56 changes: 21 additions & 35 deletions src/hrlib/io/pdfio.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -44,13 +44,6 @@ namespace {
const std::string N("/N");

bool is_path_char(char c) { return std::any_of(path_chars.cbegin(), path_chars.cend(), [c](char f){ return c==f; }); }
bool is_path_line(const std::string &str)
{
return std::all_of(str.begin(), str.end(), [](char c){
if (utf8::is_float_char(c)) return true;
return is_path_char(c);
});
}

bool try_read_property(const std::string &line, const std::string &prop, bool &out) {
if(line.find(prop) != std::string::npos)
Expand Down Expand Up @@ -172,40 +165,33 @@ std::istream& hrlib::pdf::read_next_binary(std::istream &stream, std::vector<cha
}
}

std::optional<path_cmd> hrlib::pdf::parse_path_line(const std::string &line)
std::optional<path_cmd> hrlib::pdf::parse_path_line(std::istream& stream)
{
if (!is_path_line(line))
return std::optional<path_cmd>();

// Push coordinates & extract command
path_cmd retval;
size_t prev = 0;
size_t coord_idx = 0;
while (true)
{
auto pos = line.find_first_of(delimiters, prev);

std::string word = line.substr(prev, pos-prev);
if (hrlib::utf8::is_floats(word))
while (!stream.eof())
{
double v;
std::istringstream(line.substr(prev, pos-prev)) >> v;
if (coord_idx % 2 == 0)
retval.vals.push_back({v, NAN});
else
retval.vals.back()[1] = v;
++coord_idx;
std::string word;
stream >> word;
if (hrlib::utf8::is_floats(word)) {
double v;
std::istringstream(word) >> v;
if (coord_idx % 2 == 0)
retval.vals.push_back({v, NAN});
else
retval.vals.back()[1] = v;
++coord_idx;
}
else if (word.size()==1 && is_path_char(word[0]) && retval.vals.size()>0) {
retval.cmd = word[0];
return retval;
}
else {
retval.vals.clear();
coord_idx=0;
}
}
else if (word.size()==1 && is_path_char(word[0]) && retval.vals.size()>0)
{
retval.cmd = word[0];
return retval;
}

prev = pos+1;
if (pos==std::string::npos)
break;
}

return std::optional<path_cmd>();
}
4 changes: 2 additions & 2 deletions src/version_autogen.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
#define MINOR_VERSION 2
#define REVISION 0

#define BUILD_NUMBER 627
#define COMMIT_HASH "ff8fce688f5d09b5160d9ebfb8a7dbc3cc23246a"
#define BUILD_NUMBER 628
#define COMMIT_HASH "c57dda812c04db7a0f8de5ef21695940fbd191ae"

#endif // VERSION_AUTOGEN_H

0 comments on commit f45af2b

Please sign in to comment.