UNPKG

12.6 kBJavaScriptView Raw
1/*
2 * Licensed under the Apache License, Version 2.0 (the "License");
3 * you may not use this file except in compliance with the License.
4 * You may obtain a copy of the License at
5 *
6 * http://www.apache.org/licenses/LICENSE-2.0
7 *
8 * Unless required by applicable law or agreed to in writing, software
9 * distributed under the License is distributed on an "AS IS" BASIS,
10 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11 * See the License for the specific language governing permissions and
12 * limitations under the License.
13 */
14'use strict';
15
16function asyncGeneratorStep(gen, resolve, reject, _next, _throw, key, arg) { try { var info = gen[key](arg); var value = info.value; } catch (error) { reject(error); return; } if (info.done) { resolve(value); } else { Promise.resolve(value).then(_next, _throw); } }
17
18function _asyncToGenerator(fn) { return function () { var self = this, args = arguments; return new Promise(function (resolve, reject) { var gen = fn.apply(self, args); function _next(value) { asyncGeneratorStep(gen, resolve, reject, _next, _throw, "next", value); } function _throw(err) { asyncGeneratorStep(gen, resolve, reject, _next, _throw, "throw", err); } _next(undefined); }); }; }
19
20const PDFParser = require('pdf2json');
21
22const CiceroMarkTransformer = require('@accordproject/markdown-cicero').CiceroMarkTransformer;
23
24const PdfPrinter = require('pdfmake');
25
26const ToPdfMakeVisitor = require('./ToPdfMakeVisitor');
27
28const fonts = {
29 Courier: {
30 normal: 'Courier',
31 bold: 'Courier-Bold',
32 italics: 'Courier-Oblique',
33 bolditalics: 'Courier-BoldOblique'
34 },
35 Helvetica: {
36 normal: 'Helvetica',
37 bold: 'Helvetica-Bold',
38 italics: 'Helvetica-Oblique',
39 bolditalics: 'Helvetica-BoldOblique'
40 },
41 Times: {
42 normal: 'Times-Roman',
43 bold: 'Times-Bold',
44 italics: 'Times-Italic',
45 bolditalics: 'Times-BoldItalic'
46 },
47 Symbol: {
48 normal: 'Symbol'
49 },
50 ZapfDingbats: {
51 normal: 'ZapfDingbats'
52 },
53 LiberationSerif: {
54 normal: "".concat(__dirname, "/fonts/LiberationSerif-Regular.ttf"),
55 bold: "".concat(__dirname, "/fonts/LiberationSerif-Bold.ttf"),
56 italics: "".concat(__dirname, "/fonts/LiberationSerif-Italic.ttf"),
57 bolditalics: "".concat(__dirname, "/fonts/LiberationSerif-BoldItalic.ttf")
58 },
59 LiberationSans: {
60 normal: "".concat(__dirname, "/fonts/LiberationSans-Regular.ttf"),
61 bold: "".concat(__dirname, "/fonts/LiberationSans-Bold.ttf"),
62 italics: "".concat(__dirname, "/fonts/LiberationSans-Italic.ttf"),
63 bolditalics: "".concat(__dirname, "/fonts/LiberationSans-BoldItalic.ttf")
64 },
65 LiberationMono: {
66 normal: "".concat(__dirname, "/fonts/LiberationMono-Regular.ttf"),
67 bold: "".concat(__dirname, "/fonts/LiberationMono-Bold.ttf"),
68 italics: "".concat(__dirname, "/fonts/LiberationMono-Italic.ttf"),
69 bolditalics: "".concat(__dirname, "/fonts/LiberationMono-BoldItalic.ttf")
70 }
71};
72/**
73 * Converts a PDF to CiceroMark DOM
74 */
75
76class PdfTransformer {
77 /**
78 * Construct the parser.
79 * @param {object} [options] configuration options
80 */
81 constructor() {
82 let options = arguments.length > 0 && arguments[0] !== undefined ? arguments[0] : {};
83 this.options = options;
84 this.ciceroMarkTransformer = new CiceroMarkTransformer();
85 }
86 /**
87 * Converts an pdf buffer to a CiceroMark DOM
88 * @param {Buffer} input - pdf buffer
89 * @param {string} [format] result format, defaults to 'concerto'. Pass
90 * 'json' to return the JSON data.
91 * @param {object} [options] - the PDF parsing options
92 * @param {number} [options.paragraphVerticalOffset] - the vertical offset used to detect pararaphs (defaults to 1)
93 * @param {boolean} [options.preservePages] - whether to preserve page breaks (defaults to true)
94 * @returns {promise} a Promise to the CiceroMark DOM
95 */
96
97
98 toCiceroMark(input) {
99 var _arguments = arguments;
100 return _asyncToGenerator(function* () {
101 let format = _arguments.length > 1 && _arguments[1] !== undefined ? _arguments[1] : 'concerto';
102 let options = _arguments.length > 2 && _arguments[2] !== undefined ? _arguments[2] : {
103 paragraphVerticalOffset: 1,
104 preservePages: true
105 };
106 return new Promise((resolve, reject) => {
107 const pdfParser = new PDFParser(null, false);
108
109 const errorCallback = errData => reject("PDF parsing failed with error ".concat(errData.parserError));
110
111 const conversionCallback = pdfData => {
112 const document = {
113 $class: 'org.accordproject.commonmark.Document',
114 xmlns: pdfData.formImage.Id.Name,
115 nodes: []
116 }; // pdfData = pdfParser.getMergedTextBlocksIfNeeded();
117
118 let currentPara = null;
119 pdfData.formImage.Pages.forEach(page => {
120 let lastY = 0;
121 page.Texts.forEach(text => {
122 if (!currentPara || Math.abs(lastY - text.y) > options.paragraphVerticalOffset) {
123 currentPara = {
124 $class: 'org.accordproject.commonmark.Paragraph',
125 nodes: []
126 };
127 document.nodes.push(currentPara);
128 }
129
130 text.R.forEach(run => {
131 let [,,
132 /*fontFaceId*/
133
134 /*fontSize*/
135 bold, italic] = run.TS;
136 const textNode = {
137 $class: 'org.accordproject.commonmark.Text',
138 text: run.T ? decodeURIComponent(run.T) : ''
139 };
140
141 if (bold && !italic) {
142 const bold = {
143 $class: 'org.accordproject.commonmark.Strong',
144 nodes: [textNode]
145 };
146 PdfTransformer.pushNode(currentPara, bold, lastY, text.y);
147 } else if (italic && !bold) {
148 const italic = {
149 $class: 'org.accordproject.commonmark.Emph',
150 nodes: [textNode]
151 };
152 PdfTransformer.pushNode(currentPara, italic, lastY, text.y);
153 } else if (italic && bold) {
154 const boldItalic = {
155 $class: 'org.accordproject.commonmark.Strong',
156 nodes: [{
157 $class: 'org.accordproject.commonmark.Emph',
158 nodes: [textNode]
159 }]
160 };
161 PdfTransformer.pushNode(currentPara, boldItalic, lastY, text.y);
162 } else {
163 PdfTransformer.pushNode(currentPara, textNode, lastY, text.y);
164 }
165 });
166 lastY = text.y;
167 });
168
169 if (options.preservePages) {
170 document.nodes.push({
171 $class: 'org.accordproject.commonmark.ThematicBreak'
172 });
173 }
174 });
175 resolve(document);
176 }; // trigger parsing
177
178
179 pdfParser.on('pdfParser_dataError', errorCallback);
180 pdfParser.on('pdfParser_dataReady', conversionCallback);
181 pdfParser.parseBuffer(input);
182 });
183 })();
184 }
185 /**
186 * Converts a CiceroMark DOM to a PDF Buffer
187 * @param {*} input - CiceroMark DOM
188 * @param {*} options - the PDF generation options
189 * @param {*} outputStream - the output stream
190 */
191
192
193 toPdf(input, options, outputStream) {
194 var _this = this;
195
196 return _asyncToGenerator(function* () {
197 const printer = new PdfPrinter(fonts);
198
199 if (!input.getType) {
200 input = _this.ciceroMarkTransformer.getSerializer().fromJSON(input);
201 }
202
203 const parameters = {};
204 parameters.result = '';
205 parameters.first = true;
206 parameters.indent = 0;
207 const visitor = new ToPdfMakeVisitor(_this.options);
208 input.accept(visitor, parameters);
209 const dd = parameters.result; // console.log(JSON.stringify(dd, null, 2));
210
211 dd.defaultStyle = {
212 fontSize: 12,
213 font: 'LiberationSerif',
214 lineHeight: 1.5
215 };
216 dd.pageSize = 'LETTER';
217 dd.pageOrientation = 'portrait', dd.pageMargins = [80, 80, 80, 80]; // allow overrding top-level options
218
219 Object.assign(dd, options);
220
221 if (options.tocHeading) {
222 dd.content = [{
223 toc: {
224 title: {
225 text: options.tocHeading,
226 style: 'toc'
227 }
228 }
229 }].concat([{
230 text: '',
231 pageBreak: 'after'
232 }].concat(dd.content));
233 }
234
235 if (options.headerText) {
236 dd.header = {
237 text: options.headerText,
238 style: 'Header'
239 };
240 }
241
242 if (options.footerText || options.footerPageNumber) {
243 dd.footer = function (currentPage, pageCount) {
244 const footer = [{
245 text: options.footerText ? options.footerText : '',
246 style: 'Footer'
247 }];
248
249 if (options.footerPageNumber) {
250 footer.push({
251 text: currentPage.toString() + ' / ' + pageCount,
252 style: 'PageNumber'
253 });
254 }
255
256 return footer;
257 };
258 }
259
260 const defaultStyles = {
261 Footer: {
262 alignment: 'left',
263 margin: [10, 10, 0, 0]
264 },
265 PageNumber: {
266 alignment: 'center',
267 margin: [0, 0, 0, 0]
268 },
269 Header: {
270 alignment: 'right',
271 margin: [0, 10, 10, 0]
272 },
273 heading_one: {
274 fontSize: 30,
275 bold: true,
276 alignment: 'center'
277 },
278 heading_two: {
279 fontSize: 28,
280 bold: true
281 },
282 heading_three: {
283 fontSize: 26,
284 bold: true
285 },
286 heading_four: {
287 fontSize: 24,
288 bold: true
289 },
290 heading_five: {
291 fontSize: 22,
292 bold: true
293 },
294 heading_six: {
295 fontSize: 20,
296 bold: true
297 },
298 Code: {
299 font: 'LiberationMono'
300 },
301 CodeBlock: {
302 font: 'LiberationMono'
303 },
304 HtmlInline: {
305 font: 'LiberationMono'
306 },
307 HtmlBlock: {
308 font: 'LiberationMono'
309 },
310 Paragraph: {
311 alignment: 'justify'
312 },
313 toc: {
314 fontSize: 30,
315 bold: true,
316 alignment: 'center'
317 },
318 Link: {
319 color: 'blue'
320 },
321 BlockQuote: {
322 margin: [20, 0]
323 }
324 }; // allow the caller to override default styles
325
326 dd.styles = defaultStyles;
327
328 if (options.styles) {
329 Object.assign(dd.styles, options.styles);
330 }
331
332 const pdfDoc = printer.createPdfKitDocument(dd);
333 pdfDoc.pipe(outputStream);
334 pdfDoc.end();
335 })();
336 }
337 /**
338 * Utility to get the last child of a node.
339 * @param {object} node a commonmark node
340 * @returns {object} the last child node, or null
341 */
342
343
344 static getLastChildNode(node) {
345 return node.nodes.length > 0 ? node.nodes[node.nodes.length - 1] : null;
346 }
347 /**
348 * Utility to merge text nodes. It recurses so that is can deal with
349 * bold, italic, bold+italic text.
350 * @param {object} srcNode a commonmark node
351 * @param {object} destNode a commonmark node
352 * @returns {object} the modified destination node, or null
353 */
354
355
356 static mergeTextNode(srcNode, destNode) {
357 if (srcNode && destNode) {
358 if (srcNode.$class === destNode.$class) {
359 if (srcNode.$class === 'org.accordproject.commonmark.Text') {
360 destNode.text = destNode.text + srcNode.text;
361 return destNode;
362 } else {
363 const srcChild = PdfTransformer.getLastChildNode(srcNode);
364 const destChild = PdfTransformer.getLastChildNode(destNode);
365 return PdfTransformer.mergeTextNode(srcChild, destChild);
366 }
367 }
368 }
369
370 return null;
371 }
372 /**
373 * Utility to merge adjacent text runs from a PDF
374 * @param {*} currentPara CommonMark paragraph node
375 * @param {*} node the current node
376 * @param {*} lastY the last Y offset position from PDF
377 * @param {*} textY the current Y offset position from PDF
378 */
379
380
381 static pushNode(currentPara, node, lastY, textY) {
382 if (lastY !== textY) {
383 currentPara.nodes.push({
384 $class: 'org.accordproject.commonmark.Softbreak'
385 });
386 currentPara.nodes.push(node);
387 } else {
388 const lastNode = PdfTransformer.getLastChildNode(currentPara);
389 const merged = PdfTransformer.mergeTextNode(node, lastNode);
390
391 if (!merged) {
392 currentPara.nodes.push(node);
393 }
394 }
395 }
396
397}
398
399module.exports = PdfTransformer;
\No newline at end of file