UNPKG

8.51 kBJavaScriptView Raw
1/**
2 * @licstart The following is the entire license notice for the
3 * JavaScript code in this page
4 *
5 * Copyright 2022 Mozilla Foundation
6 *
7 * Licensed under the Apache License, Version 2.0 (the "License");
8 * you may not use this file except in compliance with the License.
9 * You may obtain a copy of the License at
10 *
11 * http://www.apache.org/licenses/LICENSE-2.0
12 *
13 * Unless required by applicable law or agreed to in writing, software
14 * distributed under the License is distributed on an "AS IS" BASIS,
15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 * See the License for the specific language governing permissions and
17 * limitations under the License.
18 *
19 * @licend The above is the entire license notice for the
20 * JavaScript code in this page
21 */
22"use strict";
23
24Object.defineProperty(exports, "__esModule", {
25 value: true
26});
27exports.StructTreeRoot = exports.StructTreePage = void 0;
28
29var _primitives = require("./primitives.js");
30
31var _util = require("../shared/util.js");
32
33var _name_number_tree = require("./name_number_tree.js");
34
35const MAX_DEPTH = 40;
36const StructElementType = {
37 PAGE_CONTENT: "PAGE_CONTENT",
38 STREAM_CONTENT: "STREAM_CONTENT",
39 OBJECT: "OBJECT",
40 ELEMENT: "ELEMENT"
41};
42
43class StructTreeRoot {
44 constructor(rootDict) {
45 this.dict = rootDict;
46 this.roleMap = new Map();
47 }
48
49 init() {
50 this.readRoleMap();
51 }
52
53 readRoleMap() {
54 const roleMapDict = this.dict.get("RoleMap");
55
56 if (!(roleMapDict instanceof _primitives.Dict)) {
57 return;
58 }
59
60 roleMapDict.forEach((key, value) => {
61 if (!(value instanceof _primitives.Name)) {
62 return;
63 }
64
65 this.roleMap.set(key, value.name);
66 });
67 }
68
69}
70
71exports.StructTreeRoot = StructTreeRoot;
72
73class StructElementNode {
74 constructor(tree, dict) {
75 this.tree = tree;
76 this.dict = dict;
77 this.kids = [];
78 this.parseKids();
79 }
80
81 get role() {
82 const nameObj = this.dict.get("S");
83 const name = nameObj instanceof _primitives.Name ? nameObj.name : "";
84 const {
85 root
86 } = this.tree;
87
88 if (root.roleMap.has(name)) {
89 return root.roleMap.get(name);
90 }
91
92 return name;
93 }
94
95 parseKids() {
96 let pageObjId = null;
97 const objRef = this.dict.getRaw("Pg");
98
99 if (objRef instanceof _primitives.Ref) {
100 pageObjId = objRef.toString();
101 }
102
103 const kids = this.dict.get("K");
104
105 if (Array.isArray(kids)) {
106 for (const kid of kids) {
107 const element = this.parseKid(pageObjId, kid);
108
109 if (element) {
110 this.kids.push(element);
111 }
112 }
113 } else {
114 const element = this.parseKid(pageObjId, kids);
115
116 if (element) {
117 this.kids.push(element);
118 }
119 }
120 }
121
122 parseKid(pageObjId, kid) {
123 if (Number.isInteger(kid)) {
124 if (this.tree.pageDict.objId !== pageObjId) {
125 return null;
126 }
127
128 return new StructElement({
129 type: StructElementType.PAGE_CONTENT,
130 mcid: kid,
131 pageObjId
132 });
133 }
134
135 let kidDict = null;
136
137 if (kid instanceof _primitives.Ref) {
138 kidDict = this.dict.xref.fetch(kid);
139 } else if (kid instanceof _primitives.Dict) {
140 kidDict = kid;
141 }
142
143 if (!kidDict) {
144 return null;
145 }
146
147 const pageRef = kidDict.getRaw("Pg");
148
149 if (pageRef instanceof _primitives.Ref) {
150 pageObjId = pageRef.toString();
151 }
152
153 const type = kidDict.get("Type") instanceof _primitives.Name ? kidDict.get("Type").name : null;
154
155 if (type === "MCR") {
156 if (this.tree.pageDict.objId !== pageObjId) {
157 return null;
158 }
159
160 return new StructElement({
161 type: StructElementType.STREAM_CONTENT,
162 refObjId: kidDict.getRaw("Stm") instanceof _primitives.Ref ? kidDict.getRaw("Stm").toString() : null,
163 pageObjId,
164 mcid: kidDict.get("MCID")
165 });
166 }
167
168 if (type === "OBJR") {
169 if (this.tree.pageDict.objId !== pageObjId) {
170 return null;
171 }
172
173 return new StructElement({
174 type: StructElementType.OBJECT,
175 refObjId: kidDict.getRaw("Obj") instanceof _primitives.Ref ? kidDict.getRaw("Obj").toString() : null,
176 pageObjId
177 });
178 }
179
180 return new StructElement({
181 type: StructElementType.ELEMENT,
182 dict: kidDict
183 });
184 }
185
186}
187
188class StructElement {
189 constructor({
190 type,
191 dict = null,
192 mcid = null,
193 pageObjId = null,
194 refObjId = null
195 }) {
196 this.type = type;
197 this.dict = dict;
198 this.mcid = mcid;
199 this.pageObjId = pageObjId;
200 this.refObjId = refObjId;
201 this.parentNode = null;
202 }
203
204}
205
206class StructTreePage {
207 constructor(structTreeRoot, pageDict) {
208 this.root = structTreeRoot;
209 this.rootDict = structTreeRoot ? structTreeRoot.dict : null;
210 this.pageDict = pageDict;
211 this.nodes = [];
212 }
213
214 parse() {
215 if (!this.root || !this.rootDict) {
216 return;
217 }
218
219 const parentTree = this.rootDict.get("ParentTree");
220
221 if (!parentTree) {
222 return;
223 }
224
225 const id = this.pageDict.get("StructParents");
226
227 if (!Number.isInteger(id)) {
228 return;
229 }
230
231 const numberTree = new _name_number_tree.NumberTree(parentTree, this.rootDict.xref);
232 const parentArray = numberTree.get(id);
233
234 if (!Array.isArray(parentArray)) {
235 return;
236 }
237
238 const map = new Map();
239
240 for (const ref of parentArray) {
241 if (ref instanceof _primitives.Ref) {
242 this.addNode(this.rootDict.xref.fetch(ref), map);
243 }
244 }
245 }
246
247 addNode(dict, map, level = 0) {
248 if (level > MAX_DEPTH) {
249 (0, _util.warn)("StructTree MAX_DEPTH reached.");
250 return null;
251 }
252
253 if (map.has(dict)) {
254 return map.get(dict);
255 }
256
257 const element = new StructElementNode(this, dict);
258 map.set(dict, element);
259 const parent = dict.get("P");
260
261 if (!parent || (0, _primitives.isName)(parent.get("Type"), "StructTreeRoot")) {
262 if (!this.addTopLevelNode(dict, element)) {
263 map.delete(dict);
264 }
265
266 return element;
267 }
268
269 const parentNode = this.addNode(parent, map, level + 1);
270
271 if (!parentNode) {
272 return element;
273 }
274
275 let save = false;
276
277 for (const kid of parentNode.kids) {
278 if (kid.type === StructElementType.ELEMENT && kid.dict === dict) {
279 kid.parentNode = element;
280 save = true;
281 }
282 }
283
284 if (!save) {
285 map.delete(dict);
286 }
287
288 return element;
289 }
290
291 addTopLevelNode(dict, element) {
292 const obj = this.rootDict.get("K");
293
294 if (!obj) {
295 return false;
296 }
297
298 if (obj instanceof _primitives.Dict) {
299 if (obj.objId !== dict.objId) {
300 return false;
301 }
302
303 this.nodes[0] = element;
304 return true;
305 }
306
307 if (!Array.isArray(obj)) {
308 return true;
309 }
310
311 let save = false;
312
313 for (let i = 0; i < obj.length; i++) {
314 const kidRef = obj[i];
315
316 if (kidRef && kidRef.toString() === dict.objId) {
317 this.nodes[i] = element;
318 save = true;
319 }
320 }
321
322 return save;
323 }
324
325 get serializable() {
326 function nodeToSerializable(node, parent, level = 0) {
327 if (level > MAX_DEPTH) {
328 (0, _util.warn)("StructTree too deep to be fully serialized.");
329 return;
330 }
331
332 const obj = Object.create(null);
333 obj.role = node.role;
334 obj.children = [];
335 parent.children.push(obj);
336 const alt = node.dict.get("Alt");
337
338 if (typeof alt === "string") {
339 obj.alt = (0, _util.stringToPDFString)(alt);
340 }
341
342 const lang = node.dict.get("Lang");
343
344 if (typeof lang === "string") {
345 obj.lang = (0, _util.stringToPDFString)(lang);
346 }
347
348 for (const kid of node.kids) {
349 const kidElement = kid.type === StructElementType.ELEMENT ? kid.parentNode : null;
350
351 if (kidElement) {
352 nodeToSerializable(kidElement, obj, level + 1);
353 continue;
354 } else if (kid.type === StructElementType.PAGE_CONTENT || kid.type === StructElementType.STREAM_CONTENT) {
355 obj.children.push({
356 type: "content",
357 id: `page${kid.pageObjId}_mcid${kid.mcid}`
358 });
359 } else if (kid.type === StructElementType.OBJECT) {
360 obj.children.push({
361 type: "object",
362 id: kid.refObjId
363 });
364 }
365 }
366 }
367
368 const root = Object.create(null);
369 root.children = [];
370 root.role = "Root";
371
372 for (const child of this.nodes) {
373 if (!child) {
374 continue;
375 }
376
377 nodeToSerializable(child, root);
378 }
379
380 return root;
381 }
382
383}
384
385exports.StructTreePage = StructTreePage;
\No newline at end of file