1 | var fs = require('fs');
|
2 |
|
3 | var iconv = null;
|
4 | try {
|
5 | iconv = require('iconv-lite');
|
6 | } catch (err) {
|
7 |
|
8 | iconv = null;
|
9 | }
|
10 |
|
11 | var bufferWrapper = require('./bufferwrapper');
|
12 | var MarcRecord = require('./record').MarcRecord;
|
13 |
|
14 | var field = require('./field');
|
15 | var MarcVariableField = field.MarcVariableField;
|
16 | var MarcControlField = field.MarcControlField;
|
17 | var MarcDataField = field.MarcDataField;
|
18 | var MarcSubfield = field.MarcSubfield;
|
19 |
|
20 |
|
21 |
|
22 |
|
23 | function MarcIsoReader(options) {
|
24 | if (!(this instanceof MarcIsoReader)) {
|
25 | return new MarcIsoReader(options);
|
26 | }
|
27 |
|
28 |
|
29 | this.recordsFile = null;
|
30 |
|
31 | this.recordBuffer = bufferWrapper.allocUnsafe(100000);
|
32 |
|
33 | this.readyToRead = false;
|
34 |
|
35 | this.position = null;
|
36 |
|
37 |
|
38 | options = options || {};
|
39 | this.options = {
|
40 |
|
41 | format: (options.format || 'UNIMARC').toUpperCase(),
|
42 |
|
43 | encoding: options.encoding || null,
|
44 |
|
45 | permissive: options.permissive || false,
|
46 |
|
47 | controlFieldRegexp: options.controlFieldRegexp || /^00[1-9]$/
|
48 | }
|
49 | }
|
50 |
|
51 |
|
52 |
|
53 |
|
54 | MarcIsoReader.ISO_LEADER_SIZE = 24;
|
55 | MarcIsoReader.ISO_DIR_ENTRY_SIZE = 12;
|
56 |
|
57 |
|
58 |
|
59 |
|
60 | MarcIsoReader.prototype.openFile = function(recordsFile, options) {
|
61 | this.recordsFile = recordsFile;
|
62 | this.readyToRead = true;
|
63 | this.position = 0;
|
64 |
|
65 | options = options || {};
|
66 |
|
67 | if (options.hasOwnProperty('format')) {
|
68 | this.options.format = (options.format || 'UNIMARC').toUpperCase();
|
69 | }
|
70 |
|
71 | if (options.hasOwnProperty('encoding')) {
|
72 | if (options.encoding && options.encoding !== 'utf-8'
|
73 | && iconv && iconv.encodingExists(options.encoding))
|
74 | {
|
75 | this.options.encoding = options.encoding;
|
76 | } else {
|
77 | this.encoding = null;
|
78 | }
|
79 | }
|
80 |
|
81 | if (options.hasOwnProperty('permissive')) {
|
82 | this.options.permissive = options.permissive;
|
83 | }
|
84 |
|
85 | if (options.hasOwnProperty('controlFieldRegexp')) {
|
86 | this.options.controlFieldRegexp = options.controlFieldRegexp;
|
87 | }
|
88 | }
|
89 |
|
90 |
|
91 |
|
92 |
|
93 | MarcIsoReader.prototype.open = function(recordsFileName) {
|
94 | var self = this;
|
95 | var options = arguments.length === 3 ? arguments[1] : undefined;
|
96 | var callback = arguments.length === 3 ? arguments[2] : arguments[1];
|
97 |
|
98 | var flags = (options || {}).flags || 'r';
|
99 | var mode = (options || {}).mode || '0666';
|
100 | fs.open(recordsFileName, flags, mode, function(err, recordsFile) {
|
101 | if (err) { return callback(err); }
|
102 | self.openFile(recordsFile, options);
|
103 | callback();
|
104 | });
|
105 | }
|
106 |
|
107 |
|
108 |
|
109 |
|
110 | MarcIsoReader.prototype.openSync = function(recordsFileName, options) {
|
111 | var flags = (options || {}).flags || 'r';
|
112 | var mode = (options || {}).mode || '0666';
|
113 | var recordsFile = fs.openSync(recordsFileName, flags, mode);
|
114 | this.openFile(recordsFile, options);
|
115 | }
|
116 |
|
117 |
|
118 |
|
119 |
|
120 | MarcIsoReader.prototype.close = function(callback) {
|
121 | var self = this;
|
122 | if (self.recordsFile !== null) {
|
123 | fs.close(self.recordsFile, function(err) {
|
124 | self.readyToRead = false;
|
125 | self.recordsFile = null;
|
126 | self.position = null;
|
127 | callback(err);
|
128 | });
|
129 | }
|
130 | }
|
131 |
|
132 |
|
133 |
|
134 |
|
135 | MarcIsoReader.prototype.closeSync = function() {
|
136 | if (this.recordsFile !== null) {
|
137 | fs.closeSync(this.recordsFile);
|
138 | this.readyToRead = false;
|
139 | this.recordsFile = null;
|
140 | this.position = null;
|
141 | }
|
142 | }
|
143 |
|
144 |
|
145 |
|
146 |
|
147 | MarcIsoReader.prototype.hasNext = function() {
|
148 | return this.readyToRead;
|
149 | }
|
150 |
|
151 |
|
152 |
|
153 |
|
154 | MarcIsoReader.prototype.next = function(callback) {
|
155 | var self = this;
|
156 | if (self.recordsFile === null) {
|
157 | return callback(new Error('records file must be opened'));
|
158 | }
|
159 |
|
160 |
|
161 | fs.read(self.recordsFile, self.recordBuffer, 0, 5, null,
|
162 | function(err, bytesRead) {
|
163 | self.position += bytesRead;
|
164 | if (err) { return callback(err); }
|
165 | if (bytesRead === 0) {
|
166 | return callback(null, null);
|
167 | } else if (bytesRead !== 5) {
|
168 | return callback(new Error('failed to read record length'));
|
169 | }
|
170 |
|
171 |
|
172 | var recordLength = parseInt(self.recordBuffer.toString('ascii', 0, 5));
|
173 | if (isNaN(recordLength)) {
|
174 | return callback(new Error('invalid record length'));
|
175 | }
|
176 |
|
177 |
|
178 | fs.read(self.recordsFile, self.recordBuffer, 5, recordLength - 5, null,
|
179 | function(err, bytesRead) {
|
180 | self.position += bytesRead;
|
181 | if (err) { return callback(err); }
|
182 |
|
183 | if (bytesRead != recordLength - 5) {
|
184 | self.readyToRead = false;
|
185 | return callback(new Error('unexpected end of file'));
|
186 | }
|
187 |
|
188 |
|
189 | try {
|
190 | callback(null,
|
191 | MarcIsoReader.parseRecord(self.recordBuffer, self.options));
|
192 | } catch (err) {
|
193 | callback(err);
|
194 | }
|
195 | return;
|
196 | }
|
197 | );
|
198 | }
|
199 | );
|
200 | }
|
201 |
|
202 |
|
203 |
|
204 |
|
205 | MarcIsoReader.prototype.nextSync = function() {
|
206 | if (this.recordsFile === null) {
|
207 | throw new Error('records file must be opened');
|
208 | }
|
209 |
|
210 |
|
211 | var bytesRead = fs.readSync(this.recordsFile, this.recordBuffer, 0, 5, null);
|
212 | this.position += bytesRead;
|
213 | if (bytesRead === 0) {
|
214 | return null;
|
215 | } else if (bytesRead !== 5) {
|
216 | throw new Error('failed to read record length');
|
217 | }
|
218 |
|
219 |
|
220 | var recordLength = parseInt(this.recordBuffer.toString('ascii', 0, 5));
|
221 | if (isNaN(recordLength)) {
|
222 | throw new Error('invalid record length');
|
223 | }
|
224 |
|
225 |
|
226 | var bytesRead = fs.readSync(this.recordsFile, this.recordBuffer,
|
227 | 5, recordLength - 5, null);
|
228 | this.position += bytesRead;
|
229 | if (bytesRead != recordLength - 5) {
|
230 | this.readyToRead = false;
|
231 | throw new Error('unexpected end of file');
|
232 | }
|
233 |
|
234 |
|
235 | return MarcIsoReader.parseRecord(this.recordBuffer, this.options);
|
236 | }
|
237 |
|
238 |
|
239 |
|
240 |
|
241 | MarcIsoReader.prototype.read = function(position, size, callback) {
|
242 | var self = this;
|
243 | if (self.recordsFile === null) {
|
244 | return callback(new Error('records file must be opened'));
|
245 | }
|
246 |
|
247 |
|
248 | fs.read(self.recordsFile, self.recordBuffer, 0, size, position,
|
249 | function(err, bytesRead) {
|
250 | if (err) { return callback(err); }
|
251 | if (bytesRead === 0) {
|
252 | return callback(null, null);
|
253 | } else if (bytesRead !== size) {
|
254 | return callback(new Error('unexpected end of file'));
|
255 | }
|
256 |
|
257 |
|
258 | try {
|
259 | callback(null,
|
260 | MarcIsoReader.parseRecord(self.recordBuffer, self.options));
|
261 | } catch (err) {
|
262 | callback(err);
|
263 | }
|
264 | }
|
265 | );
|
266 | }
|
267 |
|
268 |
|
269 |
|
270 |
|
271 | MarcIsoReader.prototype.readSync = function(position, size) {
|
272 | if (this.recordsFile === null) {
|
273 | throw new Error('records file must be opened');
|
274 | }
|
275 |
|
276 |
|
277 | var bytesRead = fs.readSync(this.recordsFile, this.recordBuffer,
|
278 | 0, size, position);
|
279 | if (bytesRead === 0) {
|
280 | return null;
|
281 | } else if (bytesRead != size) {
|
282 | throw new Error('unexpected end of file');
|
283 | }
|
284 |
|
285 |
|
286 | return MarcIsoReader.parseRecord(this.recordBuffer, this.options);
|
287 | }
|
288 |
|
289 |
|
290 |
|
291 |
|
292 | MarcIsoReader.prototype.getPosition = function() {
|
293 | return this.position;
|
294 | }
|
295 |
|
296 |
|
297 |
|
298 |
|
299 | MarcIsoReader.parseRecord = function(buffer, options) {
|
300 |
|
301 | var record = new MarcRecord();
|
302 |
|
303 |
|
304 | record.leader = buffer.toString('ascii', 0, MarcIsoReader.ISO_LEADER_SIZE);
|
305 |
|
306 |
|
307 | var recordLength = parseInt(record.leader.slice(0, 5));
|
308 | if (isNaN(recordLength)) {
|
309 | throw new Error('invalid record length');
|
310 | }
|
311 |
|
312 |
|
313 | var baseAddress = parseInt(record.leader.slice(12, 17));
|
314 | if (isNaN(baseAddress)) {
|
315 | throw new Error('invalid base address of the record data');
|
316 | }
|
317 |
|
318 |
|
319 | var numFields = (baseAddress - record.leader.length - 1)
|
320 | / MarcIsoReader.ISO_DIR_ENTRY_SIZE;
|
321 |
|
322 |
|
323 | for (var fieldNo = 0; fieldNo < numFields; fieldNo++) {
|
324 |
|
325 | var directoryEntryPos = MarcIsoReader.ISO_LEADER_SIZE
|
326 | + (fieldNo * MarcIsoReader.ISO_DIR_ENTRY_SIZE);
|
327 | var directoryEntry = buffer.toString('ascii', directoryEntryPos,
|
328 | directoryEntryPos + MarcIsoReader.ISO_DIR_ENTRY_SIZE);
|
329 | var fieldTag = directoryEntry.slice(0, 3);
|
330 | var fieldLength = parseInt(directoryEntry.slice(3, 7));
|
331 | var fieldStartPos = parseInt(directoryEntry.slice(7, 12));
|
332 | if (isNaN(fieldLength) || isNaN(fieldStartPos)) {
|
333 | throw new Error('invalid field length or starting position');
|
334 | }
|
335 |
|
336 |
|
337 | var field = MarcIsoReader.parseField(buffer, fieldTag,
|
338 | baseAddress + fieldStartPos, fieldLength, options);
|
339 |
|
340 | record.addVariableField(field);
|
341 | }
|
342 |
|
343 | return record;
|
344 | }
|
345 |
|
346 |
|
347 |
|
348 |
|
349 | MarcIsoReader.parseField = function(buffer, fieldTag,
|
350 | fieldStartPos, fieldLength, options)
|
351 | {
|
352 |
|
353 | if (buffer.readUInt8(fieldStartPos + fieldLength - 1) == 0x1e) {
|
354 | fieldLength--;
|
355 | }
|
356 |
|
357 | if (options.controlFieldRegexp.test(fieldTag)) {
|
358 |
|
359 | var fieldDataBuffer =
|
360 | buffer.slice(fieldStartPos, fieldStartPos + fieldLength);
|
361 | var fieldData;
|
362 | if (!options || !options.encoding) {
|
363 | fieldData = fieldDataBuffer.toString('utf-8');
|
364 | } else {
|
365 | fieldData = iconv.decode(fieldDataBuffer, options.encoding);
|
366 | }
|
367 | return new MarcControlField(fieldTag, fieldData);
|
368 | } else {
|
369 | var subfieldStartPos = fieldStartPos;
|
370 |
|
371 |
|
372 | if (buffer.readUInt8(subfieldStartPos) != 0x1f) {
|
373 | var ind1 = buffer.toString('ascii',
|
374 | subfieldStartPos, subfieldStartPos + 1);
|
375 | subfieldStartPos++;
|
376 | } else {
|
377 | var ind1 = '?';
|
378 | }
|
379 | if (buffer.readUInt8(subfieldStartPos) != 0x1f) {
|
380 | var ind2 = buffer.toString('ascii',
|
381 | subfieldStartPos, subfieldStartPos + 1);
|
382 | subfieldStartPos++;
|
383 | } else {
|
384 | var ind2 = '?';
|
385 | }
|
386 | var dataField = new MarcDataField(fieldTag, ind1, ind2);
|
387 |
|
388 |
|
389 | var embeddedField = null;
|
390 |
|
391 |
|
392 | for (symbolPos = 2; symbolPos <= fieldLength; symbolPos++) {
|
393 | if (buffer.readUInt8(fieldStartPos + symbolPos) != 0x1f
|
394 | && symbolPos != fieldLength)
|
395 | {
|
396 | continue;
|
397 | }
|
398 |
|
399 | if (symbolPos > 2) {
|
400 |
|
401 | var subfield = MarcIsoReader.parseSubfield(buffer,
|
402 | subfieldStartPos, fieldStartPos + symbolPos - subfieldStartPos,
|
403 | options);
|
404 |
|
405 | if (subfield.code === '1' && options.format === 'UNIMARC') {
|
406 |
|
407 | if (subfield.data.length < 3) {
|
408 | throw new Error('invalid embedded field');
|
409 | }
|
410 |
|
411 | var embeddedFieldTag = subfield.data.slice(0, 3);
|
412 | if (options.controlFieldRegexp.test(embeddedFieldTag)) {
|
413 |
|
414 | dataField.addSubfield(new MarcSubfield('1',
|
415 | new MarcControlField(embeddedFieldTag, subfield.data.slice(3))));
|
416 | } else {
|
417 | if (subfield.data.length !== 5) {
|
418 | if (options.permissive) {
|
419 | subfield.data = (subfield.data
|
420 | + '000 '.slice(subfield.data.length)).slice(0, 5);
|
421 | } else {
|
422 | throw new Error('invalid embedded field');
|
423 | }
|
424 | }
|
425 |
|
426 |
|
427 | embeddedField = new MarcDataField(embeddedFieldTag,
|
428 | subfield.data.slice(3, 4), subfield.data.slice(4, 5));
|
429 | dataField.addSubfield(new MarcSubfield('1', embeddedField));
|
430 | }
|
431 | } else if (embeddedField) {
|
432 |
|
433 | embeddedField.addSubfield(subfield);
|
434 | } else {
|
435 |
|
436 | dataField.addSubfield(subfield);
|
437 | }
|
438 | }
|
439 |
|
440 | subfieldStartPos = fieldStartPos + symbolPos;
|
441 | }
|
442 | return dataField;
|
443 | }
|
444 | }
|
445 |
|
446 |
|
447 |
|
448 |
|
449 | MarcIsoReader.parseSubfield = function(buffer,
|
450 | subfieldStartPos, subfieldLength, options)
|
451 | {
|
452 | var subfieldCode =
|
453 | buffer.toString('ascii', subfieldStartPos + 1, subfieldStartPos + 2);
|
454 | var subfieldDataBuffer =
|
455 | buffer.slice(subfieldStartPos + 2, subfieldStartPos + subfieldLength);
|
456 | if (!options || !options.encoding) {
|
457 | subfieldData = subfieldDataBuffer.toString('utf-8');
|
458 | } else {
|
459 | subfieldData = iconv.decode(subfieldDataBuffer, options.encoding);
|
460 | }
|
461 | return new MarcSubfield(subfieldCode, subfieldData);
|
462 | }
|
463 |
|
464 | module.exports = {
|
465 | MarcIsoReader: MarcIsoReader
|
466 | };
|