forked from ytechie/wordpress-to-markdown
-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathconvert.js
164 lines (139 loc) · 4.46 KB
/
convert.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
var xml2js = require('xml2js');
var fs = require('fs');
var util = require('util');
var toMarkdown = require('./markdown');
var http = require('http');
processExport();
function processExport() {
var parser = new xml2js.Parser();
fs.readFile('export.xml', function(err, data) {
if(err) {
console.log('Error: ' + err);
}
parser.parseString(data, function (err, result) {
if(err) {
console.log('Error parsing xml: ' + err);
}
console.log('Parsed XML');
//console.log(util.inspect(result.rss.channel));
var posts = result.rss.channel[0].item;
fs.mkdir('out', function() {
for(var i = 0; i < posts.length; i++) {
processPost(posts[i]);
//console.log(util.inspect(posts[i]));
}
});
});
});
}
function processPost(post) {
console.log('Processing Post');
var postTitle = post.title;
console.log('Post title: ' + postTitle);
var postDate = new Date(post.pubDate);
console.log('Post Date: ' + postDate);
var postData = post['content:encoded'][0];
console.log('Post length: ' + postData.length + ' bytes');
var slug = post['wp:post_name'];
console.log('Post slug: ' + slug);
let fname = `${postTitle}.md`;
//Merge categories and tags into tags
var categories = [];
if (post.category != undefined) {
for(var i = 0; i < post.category.length; i++) {
var cat = post.category[i]['_'];
if(cat != "Uncategorized")
categories.push(cat);
//console.log('CATEGORY: ' + util.inspect(post.category[i]['_']));
}
}
//Find all images
var patt = new RegExp("(?:src=\"(.*?)\")", "gi");
var m;
var matches = [];
while ((m = patt.exec(postData)) !== null) {
matches.push(m[1]);
//console.log("Found: " + m[1]);
}
if (matches != null && matches.length > 0) {
for (var i = 0; i < matches.length; i++) {
//console.log('Post image found: ' + matches[i])
var url = matches[i];
var urlParts = matches[i].split('/');
var imageName = urlParts[urlParts.length - 1];
var filePath = 'img/' + imageName;
downloadFile(url, filePath);
//Make the image name local relative in the markdown
postData = postData.replace(url, `/img/${imageName}`);
}
}
var markdown = toMarkdown.toMarkdown(postData);
//Fix characters that markdown doesn't like
// smart single quotes and apostrophe
markdown = markdown.replace(/[\u2018|\u2019|\u201A]/g, "\'");
// smart double quotes
markdown = markdown.replace(/"/g, "\"");
markdown = markdown.replace(/[\u201C|\u201D|\u201E]/g, "\"");
// ellipsis
markdown = markdown.replace(/\u2026/g, "...");
// dashes
markdown = markdown.replace(/[\u2013|\u2014]/g, "-");
// circumflex
markdown = markdown.replace(/\u02C6/g, "^");
// open angle bracket
markdown = markdown.replace(/\u2039/g, "<");
markdown = markdown.replace(/</g, "<");
// close angle bracket
markdown = markdown.replace(/\u203A/g, ">");
markdown = markdown.replace(/>/g, ">");
// spaces
markdown = markdown.replace(/[\u02DC|\u00A0]/g, " ");
// ampersand
markdown = markdown.replace(/&/g, "&");
var header = "";
header += "---\n";
header += "layout: post\n";
header += "title: " + postTitle + "\n";
header += "date: " + postDate.getFullYear() + '-' + getPaddedMonthNumber(postDate.getMonth() + 1) + '-' + getPaddedDayNumber(postDate.getDate()) + "\n";
if (categories.length > 0)
header += "tags: " + JSON.stringify(categories) + '\n';
header += "---\n";
header += "\n";
fs.writeFile('out/' + fname, header + markdown, function (err) {
});
}
function downloadFile(url, path) {
//console.log("Attempt downloading " + url + " to " + path + ' ' + url.indexOf("https:") );
if (url.indexOf("https:") == -1) {
if (url.indexOf(".jpg") >=0 || url.indexOf(".png") >=0 || url.indexOf(".png") >=0) {
var file = fs.createWriteStream(path).on('open', function() {
var request = http.get(url, function(response) {
console.log("Response code: " + response.statusCode);
response.pipe(file);
}).on('error', function(err) {
console.log('error downloading url: ' + url + ' to ' + path);
});
}).on('error', function(err) {
console.log('error downloading url2: ' + url + ' to ' + path);
});
}
else {
console.log ('passing on: ' + url + ' ' + url.indexOf('https:'));
}
}
else {
console.log ('passing on: ' + url + ' ' + url.indexOf('https:'));
}
}
function getPaddedMonthNumber(month) {
if(month < 10)
return "0" + month;
else
return month;
}
function getPaddedDayNumber(day) {
if(day < 10)
return "0" + day;
else
return day;
}