1 | var _ = require('lodash').mixin({
|
2 | isStream: require('isstream'),
|
3 | });
|
4 |
|
5 | var async = require('async-chainable');
|
6 | var entities = require('entities');
|
7 | var events = require('events');
|
8 | var fs = require('fs');
|
9 | var moment = require('moment');
|
10 | var sax = require('sax');
|
11 | var xml2js = require('xml2js');
|
12 |
|
13 | var types = [
|
14 | {rlId: 'aggregatedDatabase', enText: 'Aggregated Database', enId: 55},
|
15 | {rlId: 'ancientText', enText: 'Ancient Text', enId: 51},
|
16 | {rlId: 'artwork', enText: 'Artwork', enId: 2},
|
17 | {rlId: 'audiovisualMaterial', enText: 'Audiovisual Material', enId: 3},
|
18 | {rlId: 'bill', enText: 'Bill', enId: 4},
|
19 | {rlId: 'blog', enText: 'Blog', enId: 56},
|
20 | {rlId: 'book', enText: 'Book', enId: 6},
|
21 | {rlId: 'bookSection', enText: 'Book Section', enId: 5},
|
22 | {rlId: 'case', enText: 'Case', enId: 7},
|
23 | {rlId: 'catalog', enText: 'Catalog', enId: 8},
|
24 | {rlId: 'chartOrTable', enText: 'Chart or Table', enId: 38},
|
25 | {rlId: 'classicalWork', enText: 'Classical Work', enId: 49},
|
26 | {rlId: 'computerProgram', enText: 'Computer Program', enId: 9},
|
27 | {rlId: 'conferencePaper', enText: 'Conference Paper', enId: 47},
|
28 | {rlId: 'conferenceProceedings', enText: 'Conference Proceedings', enId: 10},
|
29 | {rlId: 'dataset', enText: 'Dataset', enId: 59},
|
30 | {rlId: 'dictionary', enText: 'Dictionary', enId: 52},
|
31 | {rlId: 'editedBook', enText: 'Edited Book', enId: 28},
|
32 | {rlId: 'electronicArticle', enText: 'Electronic Article', enId: 43},
|
33 | {rlId: 'electronicBook', enText: 'Electronic Book', enId: 44},
|
34 | {rlId: 'electronicBookSection', enText: 'Electronic Book Section', enId: 60},
|
35 | {rlId: 'encyclopedia', enText: 'Encyclopedia', enId: 53},
|
36 | {rlId: 'equation', enText: 'Equation', enId: 39},
|
37 | {rlId: 'figure', enText: 'Figure', enId: 37},
|
38 | {rlId: 'filmOrBroadcast', enText: 'Film or Broadcast', enId: 21},
|
39 | {rlId: 'generic', enText: 'Generic', enId: 13},
|
40 | {rlId: 'governmentDocument', enText: 'Government Document', enId: 46},
|
41 | {rlId: 'grant', enText: 'Grant', enId: 54},
|
42 | {rlId: 'hearing', enText: 'Hearing', enId: 14},
|
43 | {rlId: 'journalArticle', enText: 'Journal Article', enId: 17},
|
44 | {rlId: 'legalRuleOrRegulation', enText:', Legal Rule or Regulation', enId: 50},
|
45 | {rlId: 'magazineArticle', enText: 'Magazine Article', enId: 19},
|
46 | {rlId: 'manuscript', enText: 'Manuscript', enId: 36},
|
47 | {rlId: 'map', enText: 'Map', enId: 20},
|
48 | {rlId: 'music', enText: 'Music', enId: 61},
|
49 | {rlId: 'newspaperArticle', enText: 'Newspaper Article', enId: 23},
|
50 | {rlId: 'onlineDatabase', enText: 'Online Database', enId: 45},
|
51 | {rlId: 'onlineMultimedia', enText: 'Online Multimedia', enId: 48},
|
52 | {rlId: 'pamphlet', enText: 'Pamphlet', enId: 24},
|
53 | {rlId: 'patent', enText: 'Patent', enId: 25},
|
54 | {rlId: 'personalCommunication', enText: 'Personal Communication', enId: 26},
|
55 | {rlId: 'report', enText: 'Report', enId: 27},
|
56 | {rlId: 'serial', enText: 'Serial', enId: 57},
|
57 | {rlId: 'standard', enText: 'Standard', enId: 58},
|
58 | {rlId: 'statute', enText: 'Statute', enId: 31},
|
59 | {rlId: 'thesis', enText: 'Thesis', enId: 32},
|
60 | {rlId: 'unpublished', enText: 'Unpublished Work', enId: 34},
|
61 | {rlId: 'web', enText: 'Web Page', enId: 12},
|
62 | ];
|
63 |
|
64 |
|
65 |
|
66 |
|
67 |
|
68 |
|
69 |
|
70 | var getTypeELtoRL = _.memoize(function(enType) {
|
71 | var found = _.find(types, {enText: enType});
|
72 | return found ? found.rlId : false;
|
73 | });
|
74 |
|
75 |
|
76 |
|
77 |
|
78 |
|
79 |
|
80 | var getTypeRLtoEL = _.memoize(function(rlId) {
|
81 | var found = _.find(types, {rlId: rlId});
|
82 | return found;
|
83 | });
|
84 |
|
85 |
|
86 |
|
87 |
|
88 |
|
89 |
|
90 | function _escape(str) {
|
91 | return ('' + str)
|
92 | .replace(/&/g, '&')
|
93 | .replace(/\r/g, ' ')
|
94 | .replace(/</g, '<')
|
95 | .replace(/>/g, '>')
|
96 | .replace(/"/g, '"')
|
97 | .replace(/'/g, ''');
|
98 | }
|
99 |
|
100 | function parse(input) {
|
101 | var emitter = new events.EventEmitter();
|
102 |
|
103 |
|
104 | var parser;
|
105 | if (_.isStream(input)) {
|
106 | parser = sax.createStream(true, {});
|
107 | } else if (_.isString(input) || _.isBuffer(input)) {
|
108 | parser = sax.parser(true);
|
109 | parser.on = function(event, cb) {
|
110 | parser['on' + event] = cb;
|
111 | return parser;
|
112 | };
|
113 | } else {
|
114 | throw new Error('Unknown input type for parse(): ' + (typeof input));
|
115 | }
|
116 |
|
117 |
|
118 | var recParser = new xml2js.Parser({
|
119 | async: false,
|
120 | normalizeKeywords: true,
|
121 | normalize: true,
|
122 | });
|
123 |
|
124 |
|
125 | var ref;
|
126 | var inRef = false;
|
127 | var hasErr = false;
|
128 | parser
|
129 | .on('error', function (e) {
|
130 | if (hasErr) {
|
131 |
|
132 | return;
|
133 | }
|
134 |
|
135 | hasErr = true;
|
136 | parser.end();
|
137 | emitter.emit('error', e);
|
138 | })
|
139 | .on('opentag', function(node) {
|
140 |
|
141 | if (parser._parser && parser._parser.position) {
|
142 | emitter.emit('progress', parser._parser.position, parser._parser.length || undefined);
|
143 | } else if (parser.position && input.length) {
|
144 | emitter.emit('progress', parser.position, input.length);
|
145 | }
|
146 |
|
147 |
|
148 | if (node.name == 'record') {
|
149 | ref = '<?xml version="1.0" encoding="UTF-8"?><xml><records>';
|
150 | inRef = true;
|
151 | }
|
152 |
|
153 | ref += '<' + node.name
|
154 |
|
155 | if (node.name != 'style')
|
156 | _.forEach(node.attributes, function(v, k) {
|
157 | ref += ' ' + k + '="' + entities.encodeXML(v) + '"';
|
158 | });
|
159 |
|
160 | ref += '>';
|
161 | })
|
162 | .on('closetag', function(tag) {
|
163 | if (inRef && tag == 'record') {
|
164 | ref += '</' + tag + '></records></xml>';
|
165 | recParser.parseString(ref, function(err, json) {
|
166 | var parsedRef = _parseRef(json);
|
167 | emitter.emit('ref', parsedRef);
|
168 | });
|
169 | ref = null;
|
170 | inRef = false;
|
171 | } else if (inRef) {
|
172 | ref += '</' + tag + '>';
|
173 | }
|
174 | })
|
175 | .on('text', function(text) {
|
176 | if (inRef) ref += entities.encodeXML(text);
|
177 | })
|
178 | .on('cdata', function(data) {
|
179 | if (inRef) ref += '<![CDATA[' + data + ']]>';
|
180 | })
|
181 | .on('end', function() {
|
182 | if (!hasErr) emitter.emit('end');
|
183 | });
|
184 |
|
185 |
|
186 |
|
187 | async()
|
188 |
|
189 | .then(function(next) {
|
190 | if (_.isStream(input) && input.path) {
|
191 | fs.stat(input.path, function(err, stat) {
|
192 | if (err) return next(err);
|
193 | parser._parser.length = stat.size;
|
194 | next();
|
195 | });
|
196 | } else {
|
197 | next();
|
198 | }
|
199 | })
|
200 |
|
201 |
|
202 | .then(function(next) {
|
203 | if (_.isStream(input)) {
|
204 | input.pipe(parser);
|
205 | } else if (_.isString(input) || _.isBuffer(input)) {
|
206 | try{
|
207 | parser.write(input).close();
|
208 | }
|
209 | catch(err){
|
210 | emitter.emit('error', err);
|
211 | }
|
212 | }
|
213 | next();
|
214 | })
|
215 |
|
216 |
|
217 | .end(function(err) {
|
218 | if (err) emitter.emit('error', err);
|
219 |
|
220 | });
|
221 |
|
222 |
|
223 |
|
224 | return emitter;
|
225 | };
|
226 |
|
227 | function _parseRef(json) {
|
228 | var ref = {};
|
229 |
|
230 | var rawRef = json.xml.records[0].record[0];
|
231 |
|
232 |
|
233 | ref.recNumber = _.get(rawRef, 'rec-number.0');
|
234 | if (_.has(rawRef, 'titles.0.title.0')) ref.title = _.get(rawRef, 'titles.0.title.0.style.0') || _.get(rawRef, 'titles.0.title.0');
|
235 | if (_.has(rawRef, 'titles.0.secondary-title.0')) ref.journal = _.get(rawRef, 'titles.0.secondary-title.0.style.0') || _.get(rawRef, 'titles.0.secondary-title.0');;
|
236 | if (_.has(rawRef, 'auth-address.0')) ref.address = _.get(rawRef, 'auth-address.0.style.0') || _.get(rawRef, 'auth-address.0');;
|
237 | if (_.has(rawRef, 'research-notes.0')) ref.researchNotes = _.get(rawRef, 'research-notes.0.style.0') || _.get(rawRef, 'research-notes.0');;
|
238 |
|
239 |
|
240 | if (_.has(rawRef, 'ref-type.0.$.name')) {
|
241 | var rawType = _.get(rawRef, 'ref-type.0.$.name');
|
242 | var rlType = getTypeELtoRL(rawType);
|
243 | if (!rlType) throw new Error('Unknown EndNote type: ' + rawType);
|
244 | ref.type = rlType;
|
245 | }
|
246 |
|
247 |
|
248 | if (_.has(rawRef, 'contributors.0.authors.0.author.0')) {
|
249 | ref.authors = _.get(rawRef, 'contributors.0.authors.0.author').map(function(rawAuthor) {
|
250 | if (_.isString(rawAuthor)) return rawAuthor;
|
251 | return rawAuthor['style'][0];
|
252 | });
|
253 | }
|
254 |
|
255 |
|
256 | _.forEach({
|
257 | pages: 'pages',
|
258 | volume: 'volume',
|
259 | number: 'number',
|
260 | isbn: 'isbn',
|
261 | abstract: 'abstract',
|
262 | label: 'label',
|
263 | caption: 'caption',
|
264 | notes: 'notes',
|
265 | custom1: 'custom1',
|
266 | custom2: 'custom2',
|
267 | custom3: 'custom3',
|
268 | custom4: 'custom4',
|
269 | custom5: 'custom5',
|
270 | custom6: 'custom6',
|
271 | custom7: 'custom7',
|
272 | }, function(rlKey, enKey) {
|
273 | var checkPath = enKey + '.0';
|
274 | if (_.has(rawRef, checkPath)) ref[rlKey] = _.get(rawRef, enKey + '.0.style.0') || _.get(rawRef, enKey + '.0');
|
275 | });
|
276 |
|
277 |
|
278 | if (_.has(rawRef, 'dates.0.year.0')) ref.year = _.get(rawRef, 'dates.0.year.0.style.0') || _.get(rawRef, 'dates.0.year.0');
|
279 | if (_.has(rawRef, 'dates.0.pub-dates.0.date.0')) ref.date = _.get(rawRef, 'dates.0.pub-dates.0.date.0.style.0') || _.get(rawRef, 'dates.0.pub-dates.0.date.0');
|
280 |
|
281 |
|
282 | if (_.has(rawRef, 'keywords.0.keyword')) {
|
283 | ref.keywords = rawRef.keywords[0].keyword
|
284 | .map(function(rawKeyword) {
|
285 | if (_.isString(rawKeyword)) return rawKeyword;
|
286 | if (_.has(rawKeyword, 'style.0')) return rawKeyword['style'][0];
|
287 | return false;
|
288 | })
|
289 | .filter(function(keyword) {
|
290 | return !! keyword;
|
291 | })
|
292 | }
|
293 |
|
294 |
|
295 | ['related-urls', 'text-urls'].forEach(function(key) {
|
296 | if (_.has(rawRef, 'urls.0.' + key + '.0.url')) {
|
297 | if (!ref.urls) ref.urls = [];
|
298 | rawRef['urls'][0][key][0]['url'].forEach(function(rawURL) {
|
299 | if (_.isString(rawURL)) {
|
300 | ref.urls.push(rawURL);
|
301 | } else if (_.has(rawURL, 'style.0')) {
|
302 | ref.urls.push(rawURL['style'][0]);
|
303 | }
|
304 | });
|
305 | }
|
306 | });
|
307 |
|
308 |
|
309 | return ref;
|
310 | }
|
311 |
|
312 |
|
313 | function output(options) {
|
314 | var settings = _.defaults(options, {
|
315 | stream: null,
|
316 | xmlOptions: {
|
317 | file: 'EndNote.enl',
|
318 | },
|
319 | defaultType: 'report',
|
320 | fields: [],
|
321 | encode: function(ref) {
|
322 | settings.recordOffset++;
|
323 |
|
324 | var output =
|
325 | '<database name="' + settings.xmlOptions.file + '" path="c:\\' + settings.xmlOptions.file + '">' + settings.escape(settings.xmlOptions.file) + '</database>' +
|
326 | '<source-app name="EndNote" version="16.0">EndNote</source-app>' +
|
327 | '<rec-number>' + (ref.recNumber || settings.recordOffset) + '</rec-number>' +
|
328 | '<foreign-keys><key app="EN" db-id="s55prpsswfsepue0xz25pxai2p909xtzszzv">' + settings.escape(ref.recordOffset) + '</key></foreign-keys>';
|
329 |
|
330 | var foundType = getTypeRLtoEL(ref.type || settings.defaultType);
|
331 | if (!foundType) {
|
332 | console.log('Unknown or unsuppoted reference type: ' + ref.type + '. Using default of "' + settings.defaultType + '" instead');
|
333 | foundType = getTypeRLtoEL(settings.defaultType);
|
334 | }
|
335 |
|
336 | output += '<ref-type name="' + foundType.enText + '">' + settings.escape(foundType.enId) + '</ref-type>';
|
337 |
|
338 | output += '<contributors><authors>' +
|
339 | (ref.authors ? ref.authors.map(function(author) {
|
340 | return '<author><style face="normal" font="default" size="100%">' + settings.escape(author) + '</style></author>';
|
341 | }) : '') +
|
342 | '</authors></contributors>';
|
343 |
|
344 |
|
345 | output += '<titles>' +
|
346 | (ref.title ? '<title><style face="normal" font="default" size="100%">' + settings.escape(ref.title) + '</style></title>' : '') +
|
347 | (ref.journal ? '<secondary-title><style face="normal" font="default" size="100%">' + settings.escape(ref.journal) + '</style></secondary-title>' : '') +
|
348 | (ref.titleShort ? '<short-title><style face="normal" font="default" size="100%">' + settings.escape(ref.titleShort) + '</style></short-title>' : '') +
|
349 | (ref.journalAlt ? '<alt-title><style face="normal" font="default" size="100%">' + settings.escape(ref.journalAlt) + '</style></alt-title>' : '') +
|
350 | '</titles>';
|
351 |
|
352 | if (ref.periodical)
|
353 | output += '<periodical><full-title><style face="normal" font="default" size="100%">' + settings.escape(ref.periodical) + '</style></full-title></periodical>';
|
354 |
|
355 | _.forEach({
|
356 | 'abstract': 'abstract',
|
357 | 'accessDate': 'access-date',
|
358 | 'accession': 'accession-num',
|
359 | 'address': 'auth-address',
|
360 | 'caption': 'caption',
|
361 | 'databaseProvider': 'remote-database-provider',
|
362 | 'database': 'remote-database-name',
|
363 | 'doi': 'electronic-resource-num',
|
364 | 'isbn': 'isbn',
|
365 | 'label': 'label',
|
366 | 'language': 'language',
|
367 | 'notes': 'notes',
|
368 | 'number': 'number',
|
369 | 'pages': 'pages',
|
370 | 'researchNotes': 'research-notes',
|
371 | 'section': 'section',
|
372 | 'volume': 'volume',
|
373 | 'workType': 'work-type',
|
374 | 'custom1': 'custom1',
|
375 | 'custom2': 'custom2',
|
376 | 'custom3': 'custom3',
|
377 | 'custom4': 'custom4',
|
378 | 'custom5': 'custom5',
|
379 | 'custom6': 'custom6',
|
380 | 'custom7': 'custom7',
|
381 | }, function(enKey, rlKey) {
|
382 | if (ref[rlKey])
|
383 | output += '<' + enKey + '><style face="normal" font="default" size="100%">' + settings.escape(ref[rlKey]) + '</style></' + enKey + '>';
|
384 | });
|
385 |
|
386 | if (ref.date && ref.year && _.isDate(ref.date)) {
|
387 | output += '<dates><year><style face="normal" font="default" size="100%">' + ref.year + '</style></year>';
|
388 | output += '<pub-dates><date><style face="normal" font="default" size="100%">' + moment(ref.date).format('YYYY-MM-DD') + '</style></date></pub-dates></dates>';
|
389 | } else if (ref.date && ref.year) {
|
390 | output += '<dates><year><style face="normal" font="default" size="100%">' + ref.year + '</style></year>';
|
391 | output += '<pub-dates><date><style face="normal" font="default" size="100%">' + ref.date + '</style></date></pub-dates></dates>';
|
392 | } else if (ref.date) {
|
393 | output += '<dates><pub-dates><date><style face="normal" font="default" size="100%">' + settings.escape(ref.date) + '</style></date></pub-dates></dates>';
|
394 | } else if (ref.year) {
|
395 | output += '<dates><year><style face="normal" font="default" size="100%">' + ref.year + '</style></year></dates>';
|
396 | }
|
397 |
|
398 | if (ref.urls)
|
399 | output += '<urls><related-urls>' +
|
400 | ref.urls.map(function(url) { return '<url><style face="normal" font="default" size="100%">' + settings.escape(url) + '</style></url>' }) +
|
401 | '</related-urls></urls>';
|
402 |
|
403 | if (ref.keywords)
|
404 | output += '<keywords>' +
|
405 | ref.keywords.map(function(keyword) { return '<keyword><style face="normal" font="default" size="100%">' + settings.escape(keyword) + '</style></keyword>' }) +
|
406 | '</keywords>';
|
407 |
|
408 | return '<record>' + output + '</record>';
|
409 | },
|
410 | escape: this._escape,
|
411 | recordOffset: 0,
|
412 | content: [],
|
413 | });
|
414 |
|
415 | async()
|
416 |
|
417 | .then(function(next) {
|
418 | if (!settings.stream) return next('A writable \'stream\' option must be specified');
|
419 | next();
|
420 | })
|
421 |
|
422 |
|
423 |
|
424 | .then(function(next) {
|
425 | settings.stream.write('<?xml version="1.0" encoding="UTF-8"?><xml><records>');
|
426 | next();
|
427 | })
|
428 |
|
429 |
|
430 |
|
431 | .then(function(next) {
|
432 | if (_.isFunction(settings.content)) {
|
433 | var batchNo = 0;
|
434 | var fetcher = function() {
|
435 | settings.content(function(err, data, isLast) {
|
436 | if (err) return emitter.error(err);
|
437 | if (_.isArray(data) && data.length > 0) {
|
438 | data.forEach(function(ref) {
|
439 | settings.stream.write(settings.encode(ref));
|
440 | });
|
441 | setTimeout(fetcher);
|
442 | } else if(!_.isArray(data) && _.isObject(data)) {
|
443 | settings.stream.write(settings.encode(data));
|
444 | setTimeout(fetcher);
|
445 | } else {
|
446 | next();
|
447 | }
|
448 | }, batchNo++);
|
449 | };
|
450 | fetcher();
|
451 | } else if (_.isArray(settings.content)) {
|
452 | settings.content.forEach(function(ref) {
|
453 | settings.stream.write(settings.encode(ref));
|
454 | });
|
455 | next();
|
456 | } else if (_.isObject(settings.content)) {
|
457 | settings.stream.write(settings.encode(settings.content));
|
458 | next();
|
459 | }
|
460 | })
|
461 |
|
462 |
|
463 |
|
464 | .then(function(next) {
|
465 | settings.stream.write('</records></xml>');
|
466 | next();
|
467 | })
|
468 |
|
469 |
|
470 | .end(function(err) {
|
471 | settings.stream.end();
|
472 | if (err) throw new Error(err);
|
473 | });
|
474 |
|
475 | return settings.stream;
|
476 | }
|
477 |
|
478 | module.exports = {
|
479 | output: output,
|
480 | parse: parse,
|
481 | _escape: _escape,
|
482 | };
|