UNPKG

13.4 kBJavaScriptView Raw
1var fs = require('fs');
2
3var iconv = null;
4try {
5 iconv = require('iconv-lite');
6} catch (err) {
7 // If module iconv-lite is not present then use only utf-8 encoding.
8 iconv = null;
9}
10
11var bufferWrapper = require('./bufferwrapper');
12var MarcRecord = require('./record').MarcRecord;
13
14var field = require('./field');
15var MarcVariableField = field.MarcVariableField;
16var MarcControlField = field.MarcControlField;
17var MarcDataField = field.MarcDataField;
18var MarcSubfield = field.MarcSubfield;
19
20/*
21 * The constructor of MARC ISO2709 reader.
22 */
23function MarcIsoReader(options) {
24 if (!(this instanceof MarcIsoReader)) {
25 return new MarcIsoReader(options);
26 }
27
28 // File with records in ISO2709 format.
29 this.recordsFile = null;
30 // Buffer for the record (maximum size of the ISO2709 record is 99999 bytes).
31 this.recordBuffer = bufferWrapper.allocUnsafe(100000);
32 // Flag is true when next() can be performed.
33 this.readyToRead = false;
34 // Position in file.
35 this.position = null;
36
37 // File options.
38 options = options || {};
39 this.options = {
40 // MARC format variation (MARC21, UNIMARC).
41 format: (options.format || 'UNIMARC').toUpperCase(),
42 // Input data encoding.
43 encoding: options.encoding || null,
44 // Permissive mode (ignore minor errors).
45 permissive: options.permissive || false,
46 // Template of the control field tag.
47 controlFieldRegexp: options.controlFieldRegexp || /^00[1-9]$/
48 }
49}
50
51/*
52 * Constants.
53 */
54MarcIsoReader.ISO_LEADER_SIZE = 24;
55MarcIsoReader.ISO_DIR_ENTRY_SIZE = 12;
56
57/*
58 * Opens records file by descriptor.
59 */
60MarcIsoReader.prototype.openFile = function(recordsFile, options) {
61 this.recordsFile = recordsFile;
62 this.readyToRead = true;
63 this.position = 0;
64
65 options = options || {};
66
67 if (options.hasOwnProperty('format')) {
68 this.options.format = (options.format || 'UNIMARC').toUpperCase();
69 }
70
71 if (options.hasOwnProperty('encoding')) {
72 if (options.encoding && options.encoding !== 'utf-8'
73 && iconv && iconv.encodingExists(options.encoding))
74 {
75 this.options.encoding = options.encoding;
76 } else {
77 this.encoding = null;
78 }
79 }
80
81 if (options.hasOwnProperty('permissive')) {
82 this.options.permissive = options.permissive;
83 }
84
85 if (options.hasOwnProperty('controlFieldRegexp')) {
86 this.options.controlFieldRegexp = options.controlFieldRegexp;
87 }
88}
89
90/*
91 * Opens records file by name.
92 */
93MarcIsoReader.prototype.open = function(recordsFileName) {
94 var self = this;
95 var options = arguments.length === 3 ? arguments[1] : undefined;
96 var callback = arguments.length === 3 ? arguments[2] : arguments[1];
97
98 var flags = (options || {}).flags || 'r';
99 var mode = (options || {}).mode || '0666';
100 fs.open(recordsFileName, flags, mode, function(err, recordsFile) {
101 if (err) { return callback(err); }
102 self.openFile(recordsFile, options);
103 callback();
104 });
105}
106
107/*
108 * Opens records file by name (sync version).
109 */
110MarcIsoReader.prototype.openSync = function(recordsFileName, options) {
111 var flags = (options || {}).flags || 'r';
112 var mode = (options || {}).mode || '0666';
113 var recordsFile = fs.openSync(recordsFileName, flags, mode);
114 this.openFile(recordsFile, options);
115}
116
117/*
118 * Closes records file.
119 */
120MarcIsoReader.prototype.close = function(callback) {
121 var self = this;
122 if (self.recordsFile !== null) {
123 fs.close(self.recordsFile, function(err) {
124 self.readyToRead = false;
125 self.recordsFile = null;
126 self.position = null;
127 callback(err);
128 });
129 }
130}
131
132/*
133 * Closes records file (sync version).
134 */
135MarcIsoReader.prototype.closeSync = function() {
136 if (this.recordsFile !== null) {
137 fs.closeSync(this.recordsFile);
138 this.readyToRead = false;
139 this.recordsFile = null;
140 this.position = null;
141 }
142}
143
144/*
145 * Returns true if next record available to read.
146 */
147MarcIsoReader.prototype.hasNext = function() {
148 return this.readyToRead;
149}
150
151/*
152 * Reads next record from the file.
153 */
154MarcIsoReader.prototype.next = function(callback) {
155 var self = this;
156 if (self.recordsFile === null) {
157 return callback(new Error('records file must be opened'));
158 }
159
160 // Read record length.
161 fs.read(self.recordsFile, self.recordBuffer, 0, 5, null,
162 function(err, bytesRead) {
163 self.position += bytesRead;
164 if (err) { return callback(err); }
165 if (bytesRead === 0) {
166 return callback(null, null);
167 } else if (bytesRead !== 5) {
168 return callback(new Error('failed to read record length'));
169 }
170
171 // Parse record length.
172 var recordLength = parseInt(self.recordBuffer.toString('ascii', 0, 5));
173 if (isNaN(recordLength)) {
174 return callback(new Error('invalid record length'));
175 }
176
177 // Read the record.
178 fs.read(self.recordsFile, self.recordBuffer, 5, recordLength - 5, null,
179 function(err, bytesRead) {
180 self.position += bytesRead;
181 if (err) { return callback(err); }
182
183 if (bytesRead != recordLength - 5) {
184 self.readyToRead = false;
185 return callback(new Error('unexpected end of file'));
186 }
187
188 // Parse record data from the buffer.
189 try {
190 callback(null,
191 MarcIsoReader.parseRecord(self.recordBuffer, self.options));
192 } catch (err) {
193 callback(err);
194 }
195 return;
196 }
197 );
198 }
199 );
200}
201
202/*
203 * Reads next record from the file (sync version).
204 */
205MarcIsoReader.prototype.nextSync = function() {
206 if (this.recordsFile === null) {
207 throw new Error('records file must be opened');
208 }
209
210 // Read record length.
211 var bytesRead = fs.readSync(this.recordsFile, this.recordBuffer, 0, 5, null);
212 this.position += bytesRead;
213 if (bytesRead === 0) {
214 return null;
215 } else if (bytesRead !== 5) {
216 throw new Error('failed to read record length');
217 }
218
219 // Parse record length.
220 var recordLength = parseInt(this.recordBuffer.toString('ascii', 0, 5));
221 if (isNaN(recordLength)) {
222 throw new Error('invalid record length');
223 }
224
225 // Read the record.
226 var bytesRead = fs.readSync(this.recordsFile, this.recordBuffer,
227 5, recordLength - 5, null);
228 this.position += bytesRead;
229 if (bytesRead != recordLength - 5) {
230 this.readyToRead = false;
231 throw new Error('unexpected end of file');
232 }
233
234 // Parse record data from the buffer.
235 return MarcIsoReader.parseRecord(this.recordBuffer, this.options);
236}
237
238/*
239 * Reads specified record from the file.
240 */
241MarcIsoReader.prototype.read = function(position, size, callback) {
242 var self = this;
243 if (self.recordsFile === null) {
244 return callback(new Error('records file must be opened'));
245 }
246
247 // Read the record.
248 fs.read(self.recordsFile, self.recordBuffer, 0, size, position,
249 function(err, bytesRead) {
250 if (err) { return callback(err); }
251 if (bytesRead === 0) {
252 return callback(null, null);
253 } else if (bytesRead !== size) {
254 return callback(new Error('unexpected end of file'));
255 }
256
257 // Parse record data from the buffer.
258 try {
259 callback(null,
260 MarcIsoReader.parseRecord(self.recordBuffer, self.options));
261 } catch (err) {
262 callback(err);
263 }
264 }
265 );
266}
267
268/*
269 * Reads specified record from the file (sync version).
270 */
271MarcIsoReader.prototype.readSync = function(position, size) {
272 if (this.recordsFile === null) {
273 throw new Error('records file must be opened');
274 }
275
276 // Read the record.
277 var bytesRead = fs.readSync(this.recordsFile, this.recordBuffer,
278 0, size, position);
279 if (bytesRead === 0) {
280 return null;
281 } else if (bytesRead != size) {
282 throw new Error('unexpected end of file');
283 }
284
285 // Parse record data from the buffer.
286 return MarcIsoReader.parseRecord(this.recordBuffer, this.options);
287}
288
289/*
290 * Returns current position in file.
291 */
292MarcIsoReader.prototype.getPosition = function() {
293 return this.position;
294}
295
296/*
297 * Parses record data from the buffer.
298 */
299MarcIsoReader.parseRecord = function(buffer, options) {
300 // Create the new record.
301 var record = new MarcRecord();
302
303 // Get the record leader.
304 record.leader = buffer.toString('ascii', 0, MarcIsoReader.ISO_LEADER_SIZE);
305
306 // Parse record length.
307 var recordLength = parseInt(record.leader.slice(0, 5));
308 if (isNaN(recordLength)) {
309 throw new Error('invalid record length');
310 }
311
312 // Parse base address of the record data.
313 var baseAddress = parseInt(record.leader.slice(12, 17));
314 if (isNaN(baseAddress)) {
315 throw new Error('invalid base address of the record data');
316 }
317
318 // Calculate number of the fields.
319 var numFields = (baseAddress - record.leader.length - 1)
320 / MarcIsoReader.ISO_DIR_ENTRY_SIZE;
321
322 // Parse list of fields.
323 for (var fieldNo = 0; fieldNo < numFields; fieldNo++) {
324 // Parse directory entry.
325 var directoryEntryPos = MarcIsoReader.ISO_LEADER_SIZE
326 + (fieldNo * MarcIsoReader.ISO_DIR_ENTRY_SIZE);
327 var directoryEntry = buffer.toString('ascii', directoryEntryPos,
328 directoryEntryPos + MarcIsoReader.ISO_DIR_ENTRY_SIZE);
329 var fieldTag = directoryEntry.slice(0, 3);
330 var fieldLength = parseInt(directoryEntry.slice(3, 7));
331 var fieldStartPos = parseInt(directoryEntry.slice(7, 12));
332 if (isNaN(fieldLength) || isNaN(fieldStartPos)) {
333 throw new Error('invalid field length or starting position');
334 }
335
336 // Parse field.
337 var field = MarcIsoReader.parseField(buffer, fieldTag,
338 baseAddress + fieldStartPos, fieldLength, options);
339 // Append field to the record.
340 record.addVariableField(field);
341 }
342
343 return record;
344}
345
346/*
347 * Parses field data from the buffer.
348 */
349MarcIsoReader.parseField = function(buffer, fieldTag,
350 fieldStartPos, fieldLength, options)
351{
352 // Adjust field length.
353 if (buffer.readUInt8(fieldStartPos + fieldLength - 1) == 0x1e) {
354 fieldLength--;
355 }
356
357 if (options.controlFieldRegexp.test(fieldTag)) {
358 // Parse control field.
359 var fieldDataBuffer =
360 buffer.slice(fieldStartPos, fieldStartPos + fieldLength);
361 var fieldData;
362 if (!options || !options.encoding) {
363 fieldData = fieldDataBuffer.toString('utf-8');
364 } else {
365 fieldData = iconv.decode(fieldDataBuffer, options.encoding);
366 }
367 return new MarcControlField(fieldTag, fieldData);
368 } else {
369 var subfieldStartPos = fieldStartPos;
370
371 // Parse data field.
372 if (buffer.readUInt8(subfieldStartPos) != 0x1f) {
373 var ind1 = buffer.toString('ascii',
374 subfieldStartPos, subfieldStartPos + 1);
375 subfieldStartPos++;
376 } else {
377 var ind1 = '?';
378 }
379 if (buffer.readUInt8(subfieldStartPos) != 0x1f) {
380 var ind2 = buffer.toString('ascii',
381 subfieldStartPos, subfieldStartPos + 1);
382 subfieldStartPos++;
383 } else {
384 var ind2 = '?';
385 }
386 var dataField = new MarcDataField(fieldTag, ind1, ind2);
387
388 // Initialize embedded field for UNIMARC records.
389 var embeddedField = null;
390
391 // Parse list of subfields.
392 for (symbolPos = 2; symbolPos <= fieldLength; symbolPos++) {
393 if (buffer.readUInt8(fieldStartPos + symbolPos) != 0x1f
394 && symbolPos != fieldLength)
395 {
396 continue;
397 }
398
399 if (symbolPos > 2) {
400 // Parse subfield data from the buffer.
401 var subfield = MarcIsoReader.parseSubfield(buffer,
402 subfieldStartPos, fieldStartPos + symbolPos - subfieldStartPos,
403 options);
404
405 if (subfield.code === '1' && options.format === 'UNIMARC') {
406 // Add embedded field (for UNIMARC).
407 if (subfield.data.length < 3) {
408 throw new Error('invalid embedded field');
409 }
410
411 var embeddedFieldTag = subfield.data.slice(0, 3);
412 if (options.controlFieldRegexp.test(embeddedFieldTag)) {
413 // Add embedded control field.
414 dataField.addSubfield(new MarcSubfield('1',
415 new MarcControlField(embeddedFieldTag, subfield.data.slice(3))));
416 } else {
417 if (subfield.data.length !== 5) {
418 if (options.permissive) {
419 subfield.data = (subfield.data
420 + '000 '.slice(subfield.data.length)).slice(0, 5);
421 } else {
422 throw new Error('invalid embedded field');
423 }
424 }
425
426 // Add embedded data field.
427 embeddedField = new MarcDataField(embeddedFieldTag,
428 subfield.data.slice(3, 4), subfield.data.slice(4, 5));
429 dataField.addSubfield(new MarcSubfield('1', embeddedField));
430 }
431 } else if (embeddedField) {
432 // Add subfield to the embedded data field.
433 embeddedField.addSubfield(subfield);
434 } else {
435 // Add subfield.
436 dataField.addSubfield(subfield);
437 }
438 }
439
440 subfieldStartPos = fieldStartPos + symbolPos;
441 }
442 return dataField;
443 }
444}
445
446/*
447 * Parses subfield data from the buffer.
448 */
449MarcIsoReader.parseSubfield = function(buffer,
450 subfieldStartPos, subfieldLength, options)
451{
452 var subfieldCode =
453 buffer.toString('ascii', subfieldStartPos + 1, subfieldStartPos + 2);
454 var subfieldDataBuffer =
455 buffer.slice(subfieldStartPos + 2, subfieldStartPos + subfieldLength);
456 if (!options || !options.encoding) {
457 subfieldData = subfieldDataBuffer.toString('utf-8');
458 } else {
459 subfieldData = iconv.decode(subfieldDataBuffer, options.encoding);
460 }
461 return new MarcSubfield(subfieldCode, subfieldData);
462}
463
464module.exports = {
465 MarcIsoReader: MarcIsoReader
466};