1 |
|
2 |
|
3 | var xml2js = require('xml2js');
|
4 | var fs = require('fs');
|
5 | var util = require('util');
|
6 | var async = require('async');
|
7 | var moment = require('moment');
|
8 | var _ = require('lodash');
|
9 | var cheerio = require('cheerio');
|
10 | var splitHtml = require('split-html');
|
11 | var path = require('path');
|
12 | var request = require('request');
|
13 | var urls = require('url');
|
14 |
|
15 | module.exports = function(self, argv, callback) {
|
16 | var data;
|
17 | var parent;
|
18 | var req = self._apos.getTaskReq();
|
19 | var base;
|
20 |
|
21 | if (argv.base) {
|
22 | base = argv.base;
|
23 | }
|
24 |
|
25 | function resolve(url) {
|
26 | if (!base) {
|
27 | return url;
|
28 | }
|
29 | return urls.resolve(base, url);
|
30 | }
|
31 |
|
32 | var creatorToCredit;
|
33 |
|
34 | return async.series({
|
35 | usage: function(callback) {
|
36 | if (argv._.length !== 3)
|
37 | {
|
38 | return callback('The first argument must be a Wordpress XML export filename. The second argument must be the slug of an existing blog page on your A2 site.');
|
39 | }
|
40 | return callback(null);
|
41 | },
|
42 | getCreatorToCredit: function(callback) {
|
43 | if (!argv['creator-to-credit']) {
|
44 | return setImmediate(callback);
|
45 | }
|
46 | var parse = require('csv-parse');
|
47 | var data = fs.readFileSync(argv['creator-to-credit'], 'utf8');
|
48 | return parse(data, {}, function(err, _info) {
|
49 | if (err) {
|
50 | return callback(err);
|
51 | }
|
52 | creatorToCredit = {};
|
53 | _.each(_info, function(row) {
|
54 | if (row.length >= 2) {
|
55 | creatorToCredit[row[0]] = row[1];
|
56 | }
|
57 | });
|
58 | return callback(null);
|
59 | });
|
60 | },
|
61 | getParent: function(callback) {
|
62 | return self.indexes.getOne(req, { slug: argv._[2] }, {}, function(err, _parent) {
|
63 | if (err) {
|
64 | return callback(err);
|
65 | }
|
66 | if (!_parent) {
|
67 | return callback('No such parent blog page found');
|
68 | }
|
69 | parent = _parent;
|
70 | return callback(null);
|
71 | });
|
72 | },
|
73 | parse: function(callback) {
|
74 | return xml2js.parseString(fs.readFileSync(argv._[1], 'utf8'), function(err, result) {
|
75 | if (err) {
|
76 | return callback(err);
|
77 | }
|
78 | data = result;
|
79 | return callback(null);
|
80 | });
|
81 | },
|
82 | insert: function(callback) {
|
83 | var count = 0;
|
84 | if ((!base) && (data.rss.channel[0]['wp:base_blog_url'])) {
|
85 | base = data.rss.channel[0]['wp:base_blog_url'][0];
|
86 | }
|
87 | var documents = data.rss.channel[0].item;
|
88 | var posts = _.filter(documents, function(post) {
|
89 | return (post['wp:post_type'] && (post['wp:post_type'][0] === 'post'));
|
90 | });
|
91 |
|
92 |
|
93 | return async.eachLimit(posts, argv.parallel || 1, function(post, callback) {
|
94 | var html = post['content:encoded'][0];
|
95 | count++;
|
96 |
|
97 | var publishedAt = new Date(post.pubDate[0]);
|
98 | var items = [];
|
99 |
|
100 | var categories = [];
|
101 | var tags = [];
|
102 |
|
103 | categories = _.map(post.category, function(category) {
|
104 | return category._;
|
105 | });
|
106 |
|
107 | tags = _.map(post.tag, function(tag) {
|
108 | return tag._;
|
109 | });
|
110 |
|
111 | if (argv['with-tag']) {
|
112 | if (!_.contains(tags, argv['with-tag'])) {
|
113 | return setImmediate(callback);
|
114 | }
|
115 | }
|
116 | if (argv['with-category']) {
|
117 | if (!_.contains(categories, argv['with-category'])) {
|
118 | return setImmediate(callback);
|
119 | }
|
120 | }
|
121 |
|
122 | if (argv['without-tags']) {
|
123 | var withoutTags = argv['without-tags'].split(/\s*,\s*/);
|
124 | if (_.intersection(tags, withoutTags).length) {
|
125 | return setImmediate(callback);
|
126 | }
|
127 | }
|
128 |
|
129 | if (argv['without-categories']) {
|
130 | var withoutCategories = argv['without-categories'].split(/\s*,\s*/);
|
131 | if (_.intersection(categories, withoutCategories).length) {
|
132 | return setImmediate(callback);
|
133 | }
|
134 | }
|
135 |
|
136 | if (argv['ignore-tags']) {
|
137 | tags = [];
|
138 | }
|
139 |
|
140 | if (argv['ignore-categories']) {
|
141 | categories = [];
|
142 | }
|
143 |
|
144 |
|
145 | tags = categories.concat(tags);
|
146 |
|
147 | var thumbnail;
|
148 |
|
149 | return async.series({
|
150 | meta: function(callback) {
|
151 | return async.eachSeries(post['wp:postmeta'] || [], function(meta, callback) {
|
152 | var key = meta['wp:meta_key'] && meta['wp:meta_key'][0];
|
153 | var code = meta['wp:meta_value'] && meta['wp:meta_value'][0];
|
154 | if (key === 'embed') {
|
155 | var matches = code.match(/(http"|https:)?\/\/[^'"]+/);
|
156 | if (matches) {
|
157 | return self._apos.acceptVideo(req, { url: matches[0] }, function(err, video) {
|
158 | if (err) {
|
159 | console.error('WARNING: Apostrophe couldn\'t figure out what to do with this embedded item: ' + code);
|
160 | } else {
|
161 | items.push({
|
162 | type: (video.type === "video") ? 'video' : 'embed',
|
163 | video: matches[0],
|
164 | thumbnail: video.thumbnail
|
165 | });
|
166 | }
|
167 | return callback(null);
|
168 | });
|
169 | }
|
170 | } else if (key === 'Image') {
|
171 | var src = resolve(code);
|
172 |
|
173 |
|
174 | return request(src, { encoding: null }, function(err, response, body) {
|
175 | if (err || (response.status >= 300)) {
|
176 | console.error('WARNING: image ' + src + ' not accessible, ignoring');
|
177 | return setImmediate(callback);
|
178 | }
|
179 | var tmp = self._apos.uploadfs.getTempPath();
|
180 | var name = self._apos.generateId();
|
181 | tmp += '/' + name;
|
182 | fs.writeFileSync(tmp, body);
|
183 | name = path.basename(src);
|
184 | return self._apos.acceptFiles(req, { path: tmp, name: name }, function(err, infos) {
|
185 | if (err || (!infos.length)) {
|
186 | console.error('WARNING: image ' + src + ' downloaded by not accepted by Apostrophe');
|
187 | return callback(null);
|
188 | }
|
189 |
|
190 | thumbnail = {
|
191 | type: 'area',
|
192 | items: [
|
193 | {
|
194 | type: 'slideshow',
|
195 | ids: [ infos[0]._id ]
|
196 | }
|
197 | ]
|
198 | };
|
199 | return callback(null);
|
200 | });
|
201 | });
|
202 | }
|
203 | return setImmediate(callback);
|
204 | }, callback);
|
205 | },
|
206 | body: function(callback) {
|
207 |
|
208 |
|
209 |
|
210 |
|
211 |
|
212 |
|
213 |
|
214 |
|
215 | html = html.replace(/\[(portfolio_slideshow)(.*?)\]/g, function(everything, name, attributes) {
|
216 | return '<wps' + name + attributes + ' />';
|
217 | });
|
218 |
|
219 |
|
220 |
|
221 |
|
222 |
|
223 | var before = html;
|
224 | html = html.replace(/\[(\w+)(.*?)\](.*?)\[\/(\w+)\]/g, function(everything, name, attributes, body, closeName) {
|
225 | return '<wps' + name + attributes + '>' + body + '</wps' + closeName + '>';
|
226 | });
|
227 |
|
228 |
|
229 |
|
230 |
|
231 |
|
232 |
|
233 |
|
234 | var fragments = splitHtml(html, 'wpsportfolio_slideshow, wpsbutton, wpsyoutube, wpsvimeo, wpscaption, a', function($el) {
|
235 | if ($el[0].name === 'a') {
|
236 | return $el.find('img').length;
|
237 | } else {
|
238 | return true;
|
239 | }
|
240 | });
|
241 |
|
242 | var i = 0;
|
243 | return async.eachSeries(fragments, function(fragment, callback) {
|
244 | var isSpecial = i & 1;
|
245 | i++;
|
246 | if (!isSpecial) {
|
247 |
|
248 |
|
249 |
|
250 |
|
251 |
|
252 |
|
253 |
|
254 |
|
255 |
|
256 |
|
257 |
|
258 |
|
259 |
|
260 |
|
261 |
|
262 | if (!argv['no-autop']) {
|
263 | fragment = fragment.replace(/\r?\n\r?\n/g, '<br />\n<br />\n');
|
264 | }
|
265 |
|
266 | var item = {
|
267 | type: 'richText',
|
268 | content: fragment
|
269 | };
|
270 | self._apos.itemTypes.richText.sanitize(item);
|
271 | items.push(item);
|
272 | return setImmediate(callback);
|
273 | }
|
274 | var $ = cheerio.load('<div>' + fragment + '</div>');
|
275 |
|
276 | var $img = $('img');
|
277 | if ($img.length) {
|
278 |
|
279 | var src = $img.attr('src');
|
280 | var href = $('a').attr('href');
|
281 |
|
282 | var title = $('wpscaption').attr('caption');
|
283 | if (!title) {
|
284 |
|
285 |
|
286 | title = $('wpscaption').text().trim();
|
287 | }
|
288 | if (href && src) {
|
289 | if (path.extname(href) === path.extname(src)) {
|
290 |
|
291 |
|
292 | src = href;
|
293 | }
|
294 | }
|
295 | if (!src) {
|
296 | console.error('WARNING: missing image URL, ignoring image');
|
297 | return setImmediate(callback);
|
298 | }
|
299 |
|
300 |
|
301 | return request(resolve(src), { encoding: null }, function(err, response, body) {
|
302 | if (err || (response.status >= 300)) {
|
303 | console.error('WARNING: image ' + src + ' not accessible, ignoring');
|
304 | return setImmediate(callback);
|
305 | }
|
306 | var tmp = self._apos.uploadfs.getTempPath();
|
307 | var name = self._apos.generateId();
|
308 | tmp += '/' + name;
|
309 | fs.writeFileSync(tmp, body);
|
310 | name = path.basename(src);
|
311 | return self._apos.acceptFiles(req, { path: tmp, name: name }, function(err, infos) {
|
312 | if (err || (!infos.length)) {
|
313 | console.error('WARNING: image ' + src + ' downloaded by not accepted by Apostrophe');
|
314 | return callback(null);
|
315 | }
|
316 |
|
317 |
|
318 |
|
319 |
|
320 | var file = infos[0];
|
321 | var showTitles = false;
|
322 | var showDescriptions = false;
|
323 | if (title) {
|
324 | if (argv['caption-as-description']) {
|
325 | file.description = title;
|
326 | showDescriptions = true;
|
327 | } else {
|
328 | file.title = title;
|
329 | showTitles = true;
|
330 | }
|
331 | }
|
332 | return self._apos.files.update({
|
333 | _id: infos[0]._id
|
334 | }, {
|
335 | $set: {
|
336 | title: title || infos[0].title,
|
337 | description: title || ''
|
338 | }
|
339 | }, function(err) {
|
340 | items.push({
|
341 | type: 'slideshow',
|
342 | ids: [ infos[0]._id ],
|
343 | showTitles: showTitles,
|
344 | showDescriptions: showDescriptions
|
345 | });
|
346 | return callback(null);
|
347 | });
|
348 | });
|
349 | });
|
350 | } else if ($('wpsyoutube').length || $('wpsvimeo').length) {
|
351 |
|
352 | var url = $('wpsyoutube, wpsvimeo').text().trim();
|
353 | return self._apos.acceptVideo(req, { url: url }, function(err, video) {
|
354 | if (err) {
|
355 | console.error('WARNING: Apostrophe couldn\'t figure out what to do with this embedded item: ' + url);
|
356 | } else {
|
357 | items.push({
|
358 | type: 'video',
|
359 | video: url,
|
360 | thumbnail: video.thumbnail
|
361 | });
|
362 | }
|
363 | return callback(null);
|
364 | });
|
365 | } else if ($('wpsportfolio_slideshow').length) {
|
366 | var excluded = [];
|
367 | var exclude = $('wpsportfolio_slideshow').attr('exclude');
|
368 | if (exclude && exclude.length) {
|
369 | excluded = exclude.split(/\s*,\s*/);
|
370 | }
|
371 |
|
372 |
|
373 |
|
374 |
|
375 | var images = [];
|
376 | _.each(documents, function(slide) {
|
377 | if (_.contains(excluded, slide['wp:post_id'][0])) {
|
378 | console.log('excluding');
|
379 | return;
|
380 | }
|
381 | if (!slide['wp:post_parent']) {
|
382 | console.log('no parent');
|
383 | return;
|
384 | }
|
385 | if (!((slide['wp:post_type'][0] == 'attachment') && (slide['wp:post_parent'][0] == post['wp:post_id'][0]))) {
|
386 | return;
|
387 | }
|
388 | if (!slide['wp:attachment_url']) {
|
389 | console.log('NO ATTACHMENT URL');
|
390 | return;
|
391 | }
|
392 | images.push(slide['wp:attachment_url'][0]);
|
393 | });
|
394 | var candidates = [];
|
395 | return async.series({
|
396 | get: function(callback) {
|
397 | return async.eachSeries(images, function(image, callback) {
|
398 | return request(resolve(image), { encoding: null }, function(err, response, body) {
|
399 | if (err) {
|
400 | console.error(err);
|
401 | return setImmediate(callback);
|
402 | }
|
403 | var tmp = self._apos.uploadfs.getTempPath();
|
404 | var name = self._apos.generateId();
|
405 | tmp += '/' + name;
|
406 | fs.writeFileSync(tmp, body);
|
407 | name = path.basename(image);
|
408 | candidates.push({ path: tmp, name: name });
|
409 | return callback(null);
|
410 | }
|
411 | );
|
412 | }, callback);
|
413 | },
|
414 | accept: function(callback) {
|
415 | return self._apos.acceptFiles(req, candidates, function(err, infos) {
|
416 | if (err || (!infos.length)) {
|
417 | console.error('WARNING: image ' + src + ' downloaded by not accepted by Apostrophe');
|
418 | return callback(null);
|
419 | }
|
420 | items.push({
|
421 | type: 'slideshow',
|
422 | ids: _.pluck(infos, '_id')
|
423 | });
|
424 | return callback(null);
|
425 | });
|
426 | }
|
427 | }, callback);
|
428 | } else {
|
429 | return callback(new Error('Unexpected special, our parser should not have allowed that to happen: ' + fragment));
|
430 | }
|
431 | }, function(err) {
|
432 | if (err) {
|
433 | return callback(err);
|
434 | }
|
435 | var bodyArea = argv['body-area'] || 'body';
|
436 | var credit;
|
437 | var creator = post['dc:creator'];
|
438 | if (creator && creator.length) {
|
439 | credit = creator[0];
|
440 | }
|
441 | if (creatorToCredit && _.has(creatorToCredit, credit)) {
|
442 | credit = creatorToCredit[credit];
|
443 | }
|
444 | var a2Post = {
|
445 | tags: tags,
|
446 | type: self.pieceName,
|
447 | title: post.title[0],
|
448 | publishedAt: publishedAt,
|
449 | publicationDate: moment(publishedAt).format('YYYY-MM-DD'),
|
450 | publicationTime: moment(publishedAt).format('HH:MM')
|
451 | };
|
452 | if (credit) {
|
453 | a2Post.credit = credit;
|
454 | }
|
455 | if (thumbnail) {
|
456 | a2Post.thumbnail = thumbnail;
|
457 | }
|
458 | a2Post[bodyArea] = {
|
459 | type: 'area',
|
460 | items: items
|
461 | };
|
462 | if (post['wp:status'] && post['wp:status'][0] === 'publish') {
|
463 | a2Post.published = true;
|
464 | }
|
465 | return self.pieces.putOne(req,
|
466 | undefined,
|
467 | { parent: parent },
|
468 | a2Post,
|
469 | callback);
|
470 | }, callback);
|
471 | }
|
472 | }, callback);
|
473 | }, callback);
|
474 | }
|
475 | }, callback);
|
476 | };
|