1 | var xml2js = require('xml2js');
|
2 | var fs = require('fs');
|
3 | var util = require('util');
|
4 | var async = require('async');
|
5 | var moment = require('moment');
|
6 | var _ = require('lodash');
|
7 | var cheerio = require('cheerio');
|
8 | var splitHtml = require('split-html');
|
9 | var path = require('path');
|
10 | var request = require('request');
|
11 | var util = require('util');
|
12 |
|
13 | module.exports = function(self, argv, callback) {
|
14 | var data;
|
15 | var parent;
|
16 | var req = self._apos.getTaskReq();
|
17 | return async.series({
|
18 | usage: function(callback) {
|
19 | if (argv._.length !== 3)
|
20 | {
|
21 | return callback('The first argument must be a Wordpress XML export filename. The second argument must be the slug of an existing blog page on your A2 site.');
|
22 | }
|
23 | return callback(null);
|
24 | },
|
25 | getParent: function(callback) {
|
26 | return self.indexes.getOne(req, { slug: argv._[2] }, {}, function(err, _parent) {
|
27 | if (err) {
|
28 | return callback(err);
|
29 | }
|
30 | if (!_parent) {
|
31 | return callback('No such parent blog page found');
|
32 | }
|
33 | parent = _parent;
|
34 | return callback(null);
|
35 | });
|
36 | },
|
37 | parse: function(callback) {
|
38 | return xml2js.parseString(fs.readFileSync(argv._[1], 'utf8'), function(err, result) {
|
39 | if (err) {
|
40 | return callback(err);
|
41 | }
|
42 | data = result;
|
43 | return callback(null);
|
44 | });
|
45 | },
|
46 | insert: function(callback) {
|
47 | var count = 0;
|
48 | var documents = data.rss.channel[0].item;
|
49 | posts = _.filter(documents, function(post) {
|
50 | return (post['wp:post_type'] && (post['wp:post_type'][0] === 'post'));
|
51 | });
|
52 |
|
53 |
|
54 | return async.eachLimit(posts, argv.parallel || 1, function(post, callback) {
|
55 | var html = post['content:encoded'][0];
|
56 | count++;
|
57 | console.log(post['title'][0] + ': ' + count + ' of ' + posts.length);
|
58 | var publishedAt = new Date(post.pubDate[0]);
|
59 | var items = [];
|
60 |
|
61 | return async.series({
|
62 | meta: function(callback) {
|
63 | return async.eachSeries(post['wp:postmeta'] || [], function(meta, callback) {
|
64 | var key = meta['wp:meta_key'] && meta['wp:meta_key'][0];
|
65 | var code = meta['wp:meta_value'] && meta['wp:meta_value'][0];
|
66 | if (key === 'embed') {
|
67 | var matches = code.match(/(http"|https:)?\/\/[^'"]+/);
|
68 | if (matches) {
|
69 | return self._apos.acceptVideo(req, { url: matches[0] }, function(err, video) {
|
70 | if (err) {
|
71 | console.error('WARNING: Apostrophe couldn\'t figure out what to do with this embedded item: ' + code);
|
72 | } else {
|
73 | items.push({
|
74 | type: (video.type === "video") ? 'video' : 'embed',
|
75 | video: matches[0],
|
76 | thumbnail: video.thumbnail
|
77 | });
|
78 | }
|
79 | return callback(null);
|
80 | });
|
81 | }
|
82 | }
|
83 | return setImmediate(callback);
|
84 | }, callback);
|
85 | },
|
86 | body: function(callback) {
|
87 |
|
88 |
|
89 |
|
90 |
|
91 |
|
92 |
|
93 |
|
94 |
|
95 | html = html.replace(/\[(portfolio_slideshow)(.*?)\]/g, function(everything, name, attributes) {
|
96 | return '<wps' + name + attributes + ' />';
|
97 | });
|
98 |
|
99 |
|
100 |
|
101 |
|
102 |
|
103 | var before = html;
|
104 | html = html.replace(/\[(\w+)(.*?)\](.*?)\[\/(\w+)\]/g, function(everything, name, attributes, body, closeName) {
|
105 | return '<wps' + name + attributes + '>' + body + '</wps' + closeName + '>';
|
106 | });
|
107 |
|
108 |
|
109 |
|
110 |
|
111 |
|
112 |
|
113 |
|
114 | var fragments = splitHtml(html, 'wpsportfolio_slideshow, wpsbutton, wpsyoutube, wpsvimeo, wpscaption, a', function($el) {
|
115 | if ($el[0].name === 'a') {
|
116 | return $el.find('img').length;
|
117 | } else {
|
118 | return true;
|
119 | }
|
120 | });
|
121 |
|
122 | var i = 0;
|
123 | return async.eachSeries(fragments, function(fragment, callback) {
|
124 | var isSpecial = i & 1;
|
125 | i++;
|
126 | if (!isSpecial) {
|
127 |
|
128 |
|
129 |
|
130 |
|
131 |
|
132 |
|
133 |
|
134 |
|
135 |
|
136 |
|
137 |
|
138 |
|
139 |
|
140 |
|
141 |
|
142 | if (!argv['no-autop']) {
|
143 | fragment = fragment.replace(/\r?\n\r?\n/g, '<br />\n<br />\n');
|
144 | }
|
145 |
|
146 | var item = {
|
147 | type: 'richText',
|
148 | content: fragment
|
149 | };
|
150 | self._apos.itemTypes.richText.sanitize(item);
|
151 | items.push(item);
|
152 | return setImmediate(callback);
|
153 | }
|
154 | var $ = cheerio.load('<div>' + fragment + '</div>');
|
155 |
|
156 | var $img = $('img');
|
157 | if ($img.length) {
|
158 |
|
159 | var src = $img.attr('src');
|
160 | var href = $('a').attr('href');
|
161 |
|
162 | var title = $('wpscaption').attr('caption');
|
163 | if (!title) {
|
164 |
|
165 |
|
166 | title = $('wpscaption').text().trim();
|
167 | }
|
168 | if (href && src) {
|
169 | if (path.extname(href) === path.extname(src)) {
|
170 |
|
171 |
|
172 | src = href;
|
173 | }
|
174 | }
|
175 | if (!src) {
|
176 | console.error('WARNING: missing image URL, ignoring image');
|
177 | return setImmediate(callback);
|
178 | }
|
179 |
|
180 |
|
181 | return request(src, { encoding: null }, function(err, response, body) {
|
182 | if (err || (response.status >= 300)) {
|
183 | console.error('WARNING: image ' + src + ' not accessible, ignoring');
|
184 | return setImmediate(callback);
|
185 | }
|
186 | var tmp = self._apos.uploadfs.getTempPath();
|
187 | var name = self._apos.generateId();
|
188 | tmp += '/' + name;
|
189 | fs.writeFileSync(tmp, body);
|
190 | name = path.basename(src);
|
191 | return self._apos.acceptFiles(req, { path: tmp, name: name }, function(err, infos) {
|
192 | if (err) {
|
193 | return callback(err);
|
194 | }
|
195 | if (!infos.length) {
|
196 | console.error('WARNING: image ' + src + ' downloaded by not accepted by Apostrophe');
|
197 | return callback(null);
|
198 | }
|
199 |
|
200 |
|
201 |
|
202 |
|
203 | var file = infos[0];
|
204 | var showTitles = false;
|
205 | var showDescriptions = false;
|
206 | if (title) {
|
207 | if (argv['caption-as-description']) {
|
208 | file.description = title;
|
209 | showDescriptions = true;
|
210 | } else {
|
211 | file.title = title;
|
212 | showTitle = true;
|
213 | }
|
214 | }
|
215 | return self._apos.files.update({
|
216 | _id: infos[0]._id
|
217 | }, {
|
218 | $set: {
|
219 | title: title || infos[0].title,
|
220 | description: title || ''
|
221 | }
|
222 | }, function(err) {
|
223 | items.push({
|
224 | type: 'slideshow',
|
225 | ids: [ infos[0]._id ],
|
226 | showTitles: showTitles,
|
227 | showDescriptions: showDescriptions
|
228 | });
|
229 | return callback(null);
|
230 | });
|
231 | });
|
232 | });
|
233 | } else if ($('wpsyoutube').length || $('wpsvimeo').length) {
|
234 |
|
235 | var url = $('wpsyoutube, wpsvimeo').text().trim();
|
236 | console.log(post.title[0] + ' has video');
|
237 | return self._apos.acceptVideo(req, { url: url }, function(err, video) {
|
238 | if (err) {
|
239 | console.error('WARNING: Apostrophe couldn\'t figure out what to do with this embedded item: ' + url);
|
240 | } else {
|
241 | items.push({
|
242 | type: 'video',
|
243 | video: url,
|
244 | thumbnail: video.thumbnail
|
245 | });
|
246 | }
|
247 | return callback(null);
|
248 | });
|
249 | } else if ($('wpsportfolio_slideshow').length) {
|
250 | var excluded = [];
|
251 | var exclude = $('wpsportfolio_slideshow').attr('exclude');
|
252 | if (exclude && exclude.length) {
|
253 | excluded = exclude.split(/\s*,\s*/);
|
254 | }
|
255 |
|
256 |
|
257 |
|
258 |
|
259 | var images = [];
|
260 | _.each(documents, function(slide) {
|
261 | if (_.contains(excluded, slide['wp:post_id'][0])) {
|
262 | console.log('excluding');
|
263 | return;
|
264 | }
|
265 | if (!slide['wp:post_parent']) {
|
266 | console.log('no parent');
|
267 | return;
|
268 | }
|
269 | if (!((slide['wp:post_type'][0] == 'attachment') && (slide['wp:post_parent'][0] == post['wp:post_id'][0]))) {
|
270 | return;
|
271 | }
|
272 | if (!slide['wp:attachment_url']) {
|
273 | console.log('NO ATTACHMENT URL');
|
274 | return;
|
275 | }
|
276 | images.push(slide['wp:attachment_url'][0]);
|
277 | });
|
278 | var candidates = [];
|
279 | return async.series({
|
280 | get: function(callback) {
|
281 | return async.eachSeries(images, function(image, callback) {
|
282 | return request(image, { encoding: null }, function(err, response, body) {
|
283 | if (err) {
|
284 | console.error(err);
|
285 | return setImmediate(callback);
|
286 | }
|
287 | var tmp = self._apos.uploadfs.getTempPath();
|
288 | var name = self._apos.generateId();
|
289 | tmp += '/' + name;
|
290 | fs.writeFileSync(tmp, body);
|
291 | name = path.basename(image);
|
292 | candidates.push({ path: tmp, name: name });
|
293 | return callback(null);
|
294 | }
|
295 | );
|
296 | }, callback);
|
297 | },
|
298 | accept: function(callback) {
|
299 | return self._apos.acceptFiles(req, candidates, function(err, infos) {
|
300 | if (err) {
|
301 | return callback(err);
|
302 | }
|
303 | items.push({
|
304 | type: 'slideshow',
|
305 | ids: _.pluck(infos, '_id')
|
306 | });
|
307 | return callback(null);
|
308 | });
|
309 | }
|
310 | }, callback);
|
311 | } else {
|
312 | return callback(new Error('Unexpected special, our parser should not have allowed that to happen: ' + fragment));
|
313 | }
|
314 | }, function(err) {
|
315 | if (err) {
|
316 | return callback(err);
|
317 | }
|
318 | var bodyArea = argv['body-area'] || 'body';
|
319 | var a2Post = {
|
320 | type: self.pieceName,
|
321 | title: post.title[0],
|
322 | publishedAt: publishedAt,
|
323 | publicationDate: moment(publishedAt).format('YYYY-MM-DD'),
|
324 | publicationTime: moment(publishedAt).format('HH:MM')
|
325 | };
|
326 | a2Post[bodyArea] = {
|
327 | type: 'area',
|
328 | items: items
|
329 | };
|
330 | if (post['wp:status'] && post['wp:status'][0] === 'publish') {
|
331 | a2Post.published = true;
|
332 | }
|
333 | return self.pieces.putOne(req,
|
334 | undefined,
|
335 | { parent: parent },
|
336 | a2Post,
|
337 | callback);
|
338 | }, callback);
|
339 | }
|
340 | }, callback);
|
341 | }, callback);
|
342 | }
|
343 | }, callback);
|
344 | };
|