UNPKG

16.5 kBJavaScriptView Raw
1var _ = require('lodash').mixin({
2 isStream: require('isstream'),
3});
4
5var async = require('async-chainable');
6var entities = require('entities');
7var events = require('events');
8var fs = require('fs');
9var moment = require('moment');
10var sax = require('sax');
11var xml2js = require('xml2js');
12
13var types = [
14 {rlId: 'aggregatedDatabase', enText: 'Aggregated Database', enId: 55},
15 {rlId: 'ancientText', enText: 'Ancient Text', enId: 51},
16 {rlId: 'artwork', enText: 'Artwork', enId: 2},
17 {rlId: 'audiovisualMaterial', enText: 'Audiovisual Material', enId: 3},
18 {rlId: 'bill', enText: 'Bill', enId: 4},
19 {rlId: 'blog', enText: 'Blog', enId: 56},
20 {rlId: 'book', enText: 'Book', enId: 6},
21 {rlId: 'bookSection', enText: 'Book Section', enId: 5},
22 {rlId: 'case', enText: 'Case', enId: 7},
23 {rlId: 'catalog', enText: 'Catalog', enId: 8},
24 {rlId: 'chartOrTable', enText: 'Chart or Table', enId: 38},
25 {rlId: 'classicalWork', enText: 'Classical Work', enId: 49},
26 {rlId: 'computerProgram', enText: 'Computer Program', enId: 9},
27 {rlId: 'conferencePaper', enText: 'Conference Paper', enId: 47},
28 {rlId: 'conferenceProceedings', enText: 'Conference Proceedings', enId: 10},
29 {rlId: 'dataset', enText: 'Dataset', enId: 59},
30 {rlId: 'dictionary', enText: 'Dictionary', enId: 52},
31 {rlId: 'editedBook', enText: 'Edited Book', enId: 28},
32 {rlId: 'electronicArticle', enText: 'Electronic Article', enId: 43},
33 {rlId: 'electronicBook', enText: 'Electronic Book', enId: 44},
34 {rlId: 'electronicBookSection', enText: 'Electronic Book Section', enId: 60},
35 {rlId: 'encyclopedia', enText: 'Encyclopedia', enId: 53},
36 {rlId: 'equation', enText: 'Equation', enId: 39},
37 {rlId: 'figure', enText: 'Figure', enId: 37},
38 {rlId: 'filmOrBroadcast', enText: 'Film or Broadcast', enId: 21},
39 {rlId: 'generic', enText: 'Generic', enId: 13},
40 {rlId: 'governmentDocument', enText: 'Government Document', enId: 46},
41 {rlId: 'grant', enText: 'Grant', enId: 54},
42 {rlId: 'hearing', enText: 'Hearing', enId: 14},
43 {rlId: 'journalArticle', enText: 'Journal Article', enId: 17},
44 {rlId: 'legalRuleOrRegulation', enText:', Legal Rule or Regulation', enId: 50},
45 {rlId: 'magazineArticle', enText: 'Magazine Article', enId: 19},
46 {rlId: 'manuscript', enText: 'Manuscript', enId: 36},
47 {rlId: 'map', enText: 'Map', enId: 20},
48 {rlId: 'music', enText: 'Music', enId: 61},
49 {rlId: 'newspaperArticle', enText: 'Newspaper Article', enId: 23},
50 {rlId: 'onlineDatabase', enText: 'Online Database', enId: 45},
51 {rlId: 'onlineMultimedia', enText: 'Online Multimedia', enId: 48},
52 {rlId: 'pamphlet', enText: 'Pamphlet', enId: 24},
53 {rlId: 'patent', enText: 'Patent', enId: 25},
54 {rlId: 'personalCommunication', enText: 'Personal Communication', enId: 26},
55 {rlId: 'report', enText: 'Report', enId: 27},
56 {rlId: 'serial', enText: 'Serial', enId: 57},
57 {rlId: 'standard', enText: 'Standard', enId: 58},
58 {rlId: 'statute', enText: 'Statute', enId: 31},
59 {rlId: 'thesis', enText: 'Thesis', enId: 32},
60 {rlId: 'unpublished', enText: 'Unpublished Work', enId: 34},
61 {rlId: 'web', enText: 'Web Page', enId: 12},
62];
63
64/**
65* Translate an EndNote type to a RefLib type
66* This function uses memorize caching
67* @param string enType The EndNote type to translate
68* @return string the RefLib type
69*/
70var getTypeELtoRL = _.memoize(function(enType) {
71 var found = _.find(types, {enText: enType});
72 return found ? found.rlId : false;
73});
74
75/**
76* Get the type record from the RefLib ID
77* @param string rlId The RefLib type ID
78* @return object The object in the types collection
79*/
80var getTypeRLtoEL = _.memoize(function(rlId) {
81 var found = _.find(types, {rlId: rlId});
82 return found;
83});
84
85
86/**
87* Default string -> XML encoder
88* @param string str The input string to encode
89*/
90function _escape(str) {
91 return ('' + str)
92 .replace(/&/g, '&')
93 .replace(/\r/g, '
')
94 .replace(/</g, '&lt;')
95 .replace(/>/g, '&gt;')
96 .replace(/"/g, '&quot;')
97 .replace(/'/g, '&apos;');
98}
99
100function parse(input) {
101 var emitter = new events.EventEmitter();
102
103 // Setup parser {{{
104 var parser;
105 if (_.isStream(input)) {
106 parser = sax.createStream(true, {});
107 } else if (_.isString(input) || _.isBuffer(input)) {
108 parser = sax.parser(true);
109 parser.on = function(event, cb) { // Quick binder to simulate on() behaviour
110 parser['on' + event] = cb;
111 return parser;
112 };
113 } else {
114 throw new Error('Unknown input type for parse(): ' + (typeof input));
115 }
116 // }}}
117 // Setup record parser {{{
118 var recParser = new xml2js.Parser({
119 async: false, // We will handle our own async
120 normalizeKeywords: true,
121 normalize: true,
122 });
123 // }}}
124 // Setup events {{{
125 var ref;
126 var inRef = false;
127 var hasErr = false;
128 parser
129 .on('error', function (e) {
130 if (hasErr) {
131 // Calling parser.end() here can cause infinite loop if Sax raises another error.
132 return; // Already errored
133 }
134
135 hasErr = true;
136 parser.end();
137 emitter.emit('error', e);
138 })
139 .on('opentag', function(node) {
140 // Fire `progress` emitter if we know enough to update that {{{
141 if (parser._parser && parser._parser.position) {
142 emitter.emit('progress', parser._parser.position, parser._parser.length || undefined);
143 } else if (parser.position && input.length) {
144 emitter.emit('progress', parser.position, input.length);
145 }
146 // }}}
147
148 if (node.name == 'record') {
149 ref = '<?xml version="1.0" encoding="UTF-8"?><xml><records>';
150 inRef = true;
151 }
152
153 ref += '<' + node.name
154
155 if (node.name != 'style') // Dont bother saving these attribs
156 _.forEach(node.attributes, function(v, k) {
157 ref += ' ' + k + '="' + entities.encodeXML(v) + '"';
158 });
159
160 ref += '>';
161 })
162 .on('closetag', function(tag) {
163 if (inRef && tag == 'record') {
164 ref += '</' + tag + '></records></xml>';
165 recParser.parseString(ref, function(err, json) {
166 var parsedRef = _parseRef(json);
167 emitter.emit('ref', parsedRef);
168 });
169 ref = null;
170 inRef = false;
171 } else if (inRef) {
172 ref += '</' + tag + '>';
173 }
174 })
175 .on('text', function(text) {
176 if (inRef) ref += entities.encodeXML(text);
177 })
178 .on('cdata', function(data) {
179 if (inRef) ref += '<![CDATA[' + data + ']]>';
180 })
181 .on('end', function() {
182 if (!hasErr) emitter.emit('end');
183 });
184 // }}}
185 // Feed into parser {{{
186 // NOTE: We have to do this in an async thread otherwise we can't return the emitter as a function return
187 async()
188 // Try to populate the parser stream length from the file name stats if the stream looks like an accessible file {{{
189 .then(function(next) {
190 if (_.isStream(input) && input.path) {
191 fs.stat(input.path, function(err, stat) {
192 if (err) return next(err);
193 parser._parser.length = stat.size;
194 next();
195 });
196 } else {
197 next();
198 }
199 })
200 // }}}
201 // Invoke the parser {{{
202 .then(function(next) {
203 if (_.isStream(input)) {
204 input.pipe(parser);
205 } else if (_.isString(input) || _.isBuffer(input)) {
206 try{
207 parser.write(input).close();
208 }
209 catch(err){
210 emitter.emit('error', err);
211 }
212 }
213 next();
214 })
215 // }}}
216 // End - Very basic error handling for this early in the loader order {{{
217 .end(function(err) {
218 if (err) emitter.emit('error', err);
219 // Everything else handled by the SAX emitters
220 });
221 // }}}
222 // }}}
223
224 return emitter;
225};
226
227function _parseRef(json) {
228 var ref = {};
229
230 var rawRef = json.xml.records[0].record[0];
231
232 // Complex extractions {{{
233 ref.recNumber = _.get(rawRef, 'rec-number.0');
234 if (_.has(rawRef, 'titles.0.title.0')) ref.title = _.get(rawRef, 'titles.0.title.0.style.0') || _.get(rawRef, 'titles.0.title.0');
235 if (_.has(rawRef, 'titles.0.secondary-title.0')) ref.journal = _.get(rawRef, 'titles.0.secondary-title.0.style.0') || _.get(rawRef, 'titles.0.secondary-title.0');;
236 if (_.has(rawRef, 'auth-address.0')) ref.address = _.get(rawRef, 'auth-address.0.style.0') || _.get(rawRef, 'auth-address.0');;
237 if (_.has(rawRef, 'research-notes.0')) ref.researchNotes = _.get(rawRef, 'research-notes.0.style.0') || _.get(rawRef, 'research-notes.0');;
238 // }}}
239 // Type {{{
240 if (_.has(rawRef, 'ref-type.0.$.name')) {
241 var rawType = _.get(rawRef, 'ref-type.0.$.name');
242 var rlType = getTypeELtoRL(rawType);
243 if (!rlType) throw new Error('Unknown EndNote type: ' + rawType);
244 ref.type = rlType;
245 }
246 // }}}
247 // Authors {{{
248 if (_.has(rawRef, 'contributors.0.authors.0.author.0')) {
249 ref.authors = _.get(rawRef, 'contributors.0.authors.0.author').map(function(rawAuthor) {
250 if (_.isString(rawAuthor)) return rawAuthor;
251 return rawAuthor['style'][0];
252 });
253 }
254 // }}}
255 // Key to key extractions {{{
256 _.forEach({
257 pages: 'pages',
258 volume: 'volume',
259 number: 'number',
260 isbn: 'isbn',
261 abstract: 'abstract',
262 label: 'label',
263 caption: 'caption',
264 notes: 'notes',
265 custom1: 'custom1',
266 custom2: 'custom2',
267 custom3: 'custom3',
268 custom4: 'custom4',
269 custom5: 'custom5',
270 custom6: 'custom6',
271 custom7: 'custom7',
272 }, function(rlKey, enKey) {
273 var checkPath = enKey + '.0';
274 if (_.has(rawRef, checkPath)) ref[rlKey] = _.get(rawRef, enKey + '.0.style.0') || _.get(rawRef, enKey + '.0');
275 });
276 // }}}
277 // Dates {{{
278 if (_.has(rawRef, 'dates.0.year.0')) ref.year = _.get(rawRef, 'dates.0.year.0.style.0') || _.get(rawRef, 'dates.0.year.0');
279 if (_.has(rawRef, 'dates.0.pub-dates.0.date.0')) ref.date = _.get(rawRef, 'dates.0.pub-dates.0.date.0.style.0') || _.get(rawRef, 'dates.0.pub-dates.0.date.0');
280 // }}}
281 // Keywords {{{
282 if (_.has(rawRef, 'keywords.0.keyword')) {
283 ref.keywords = rawRef.keywords[0].keyword
284 .map(function(rawKeyword) {
285 if (_.isString(rawKeyword)) return rawKeyword;
286 if (_.has(rawKeyword, 'style.0')) return rawKeyword['style'][0];
287 return false;
288 })
289 .filter(function(keyword) {
290 return !! keyword;
291 })
292 }
293 // }}}
294 // URLs {{{
295 ['related-urls', 'text-urls'].forEach(function(key) {
296 if (_.has(rawRef, 'urls.0.' + key + '.0.url')) {
297 if (!ref.urls) ref.urls = [];
298 rawRef['urls'][0][key][0]['url'].forEach(function(rawURL) {
299 if (_.isString(rawURL)) {
300 ref.urls.push(rawURL);
301 } else if (_.has(rawURL, 'style.0')) {
302 ref.urls.push(rawURL['style'][0]);
303 }
304 });
305 }
306 });
307 // }}}
308
309 return ref;
310}
311
312
313function output(options) {
314 var settings = _.defaults(options, {
315 stream: null,
316 xmlOptions: {
317 file: 'EndNote.enl',
318 },
319 defaultType: 'report', // Assume this reference type if we are not provided with one
320 fields: [], // This is not supported for this driver
321 encode: function(ref) {
322 settings.recordOffset++;
323
324 var output =
325 '<database name="' + settings.xmlOptions.file + '" path="c:\\' + settings.xmlOptions.file + '">' + settings.escape(settings.xmlOptions.file) + '</database>' +
326 '<source-app name="EndNote" version="16.0">EndNote</source-app>' +
327 '<rec-number>' + (ref.recNumber || settings.recordOffset) + '</rec-number>' +
328 '<foreign-keys><key app="EN" db-id="s55prpsswfsepue0xz25pxai2p909xtzszzv">' + settings.escape(ref.recordOffset) + '</key></foreign-keys>';
329
330 var foundType = getTypeRLtoEL(ref.type || settings.defaultType);
331 if (!foundType) {
332 console.log('Unknown or unsuppoted reference type: ' + ref.type + '. Using default of "' + settings.defaultType + '" instead');
333 foundType = getTypeRLtoEL(settings.defaultType);
334 }
335
336 output += '<ref-type name="' + foundType.enText + '">' + settings.escape(foundType.enId) + '</ref-type>';
337
338 output += '<contributors><authors>' +
339 (ref.authors ? ref.authors.map(function(author) {
340 return '<author><style face="normal" font="default" size="100%">' + settings.escape(author) + '</style></author>';
341 }) : '') +
342 '</authors></contributors>';
343
344
345 output += '<titles>' +
346 (ref.title ? '<title><style face="normal" font="default" size="100%">' + settings.escape(ref.title) + '</style></title>' : '') +
347 (ref.journal ? '<secondary-title><style face="normal" font="default" size="100%">' + settings.escape(ref.journal) + '</style></secondary-title>' : '') +
348 (ref.titleShort ? '<short-title><style face="normal" font="default" size="100%">' + settings.escape(ref.titleShort) + '</style></short-title>' : '') +
349 (ref.journalAlt ? '<alt-title><style face="normal" font="default" size="100%">' + settings.escape(ref.journalAlt) + '</style></alt-title>' : '') +
350 '</titles>';
351
352 if (ref.periodical)
353 output += '<periodical><full-title><style face="normal" font="default" size="100%">' + settings.escape(ref.periodical) + '</style></full-title></periodical>';
354
355 _.forEach({
356 'abstract': 'abstract',
357 'accessDate': 'access-date',
358 'accession': 'accession-num',
359 'address': 'auth-address',
360 'caption': 'caption',
361 'databaseProvider': 'remote-database-provider',
362 'database': 'remote-database-name',
363 'doi': 'electronic-resource-num',
364 'isbn': 'isbn',
365 'label': 'label',
366 'language': 'language',
367 'notes': 'notes',
368 'number': 'number',
369 'pages': 'pages',
370 'researchNotes': 'research-notes',
371 'section': 'section',
372 'volume': 'volume',
373 'workType': 'work-type',
374 'custom1': 'custom1',
375 'custom2': 'custom2',
376 'custom3': 'custom3',
377 'custom4': 'custom4',
378 'custom5': 'custom5',
379 'custom6': 'custom6',
380 'custom7': 'custom7',
381 }, function(enKey, rlKey) {
382 if (ref[rlKey])
383 output += '<' + enKey + '><style face="normal" font="default" size="100%">' + settings.escape(ref[rlKey]) + '</style></' + enKey + '>';
384 });
385
386 if (ref.date && ref.year && _.isDate(ref.date)) {
387 output += '<dates><year><style face="normal" font="default" size="100%">' + ref.year + '</style></year>';
388 output += '<pub-dates><date><style face="normal" font="default" size="100%">' + moment(ref.date).format('YYYY-MM-DD') + '</style></date></pub-dates></dates>';
389 } else if (ref.date && ref.year) {
390 output += '<dates><year><style face="normal" font="default" size="100%">' + ref.year + '</style></year>';
391 output += '<pub-dates><date><style face="normal" font="default" size="100%">' + ref.date + '</style></date></pub-dates></dates>';
392 } else if (ref.date) {
393 output += '<dates><pub-dates><date><style face="normal" font="default" size="100%">' + settings.escape(ref.date) + '</style></date></pub-dates></dates>';
394 } else if (ref.year) {
395 output += '<dates><year><style face="normal" font="default" size="100%">' + ref.year + '</style></year></dates>';
396 }
397
398 if (ref.urls)
399 output += '<urls><related-urls>' +
400 ref.urls.map(function(url) { return '<url><style face="normal" font="default" size="100%">' + settings.escape(url) + '</style></url>' }) +
401 '</related-urls></urls>';
402
403 if (ref.keywords)
404 output += '<keywords>' +
405 ref.keywords.map(function(keyword) { return '<keyword><style face="normal" font="default" size="100%">' + settings.escape(keyword) + '</style></keyword>' }) +
406 '</keywords>';
407
408 return '<record>' + output + '</record>';
409 },
410 escape: this._escape,
411 recordOffset: 0,
412 content: [],
413 });
414
415 async()
416 // Sanity checks {{{
417 .then(function(next) {
418 if (!settings.stream) return next('A writable \'stream\' option must be specified');
419 next();
420 })
421 // }}}
422
423 // Header {{{
424 .then(function(next) {
425 settings.stream.write('<?xml version="1.0" encoding="UTF-8"?><xml><records>');
426 next();
427 })
428 // }}}
429
430 // References {{{
431 .then(function(next) {
432 if (_.isFunction(settings.content)) { // Callback
433 var batchNo = 0;
434 var fetcher = function() {
435 settings.content(function(err, data, isLast) {
436 if (err) return emitter.error(err);
437 if (_.isArray(data) && data.length > 0) { // Callback provided array
438 data.forEach(function(ref) {
439 settings.stream.write(settings.encode(ref));
440 });
441 setTimeout(fetcher);
442 } else if(!_.isArray(data) && _.isObject(data)) { // Callback provided single ref
443 settings.stream.write(settings.encode(data));
444 setTimeout(fetcher);
445 } else { // End of stream
446 next();
447 }
448 }, batchNo++);
449 };
450 fetcher();
451 } else if (_.isArray(settings.content)) { // Array of refs
452 settings.content.forEach(function(ref) {
453 settings.stream.write(settings.encode(ref));
454 });
455 next();
456 } else if (_.isObject(settings.content)) { // Single ref
457 settings.stream.write(settings.encode(settings.content));
458 next();
459 }
460 })
461 // }}}
462
463 // Footer {{{
464 .then(function(next) {
465 settings.stream.write('</records></xml>');
466 next();
467 })
468 // }}}
469
470 .end(function(err) {
471 settings.stream.end();
472 if (err) throw new Error(err);
473 });
474
475 return settings.stream;
476}
477
478module.exports = {
479 output: output,
480 parse: parse,
481 _escape: _escape,
482};