1 |
|
2 |
|
3 |
|
4 |
|
5 |
|
6 |
|
7 |
|
8 |
|
9 |
|
10 |
|
11 |
|
12 |
|
13 |
|
14 |
|
15 |
|
16 |
|
17 |
|
18 |
|
19 |
|
20 |
|
21 |
|
22 | "use strict";
|
23 |
|
24 | Object.defineProperty(exports, "__esModule", {
|
25 | value: true
|
26 | });
|
27 | exports.StructTreeRoot = exports.StructTreePage = void 0;
|
28 |
|
29 | var _primitives = require("./primitives.js");
|
30 |
|
31 | var _util = require("../shared/util.js");
|
32 |
|
33 | var _name_number_tree = require("./name_number_tree.js");
|
34 |
|
35 | const MAX_DEPTH = 40;
|
36 | const StructElementType = {
|
37 | PAGE_CONTENT: "PAGE_CONTENT",
|
38 | STREAM_CONTENT: "STREAM_CONTENT",
|
39 | OBJECT: "OBJECT",
|
40 | ELEMENT: "ELEMENT"
|
41 | };
|
42 |
|
43 | class StructTreeRoot {
|
44 | constructor(rootDict) {
|
45 | this.dict = rootDict;
|
46 | this.roleMap = new Map();
|
47 | }
|
48 |
|
49 | init() {
|
50 | this.readRoleMap();
|
51 | }
|
52 |
|
53 | readRoleMap() {
|
54 | const roleMapDict = this.dict.get("RoleMap");
|
55 |
|
56 | if (!(roleMapDict instanceof _primitives.Dict)) {
|
57 | return;
|
58 | }
|
59 |
|
60 | roleMapDict.forEach((key, value) => {
|
61 | if (!(value instanceof _primitives.Name)) {
|
62 | return;
|
63 | }
|
64 |
|
65 | this.roleMap.set(key, value.name);
|
66 | });
|
67 | }
|
68 |
|
69 | }
|
70 |
|
71 | exports.StructTreeRoot = StructTreeRoot;
|
72 |
|
73 | class StructElementNode {
|
74 | constructor(tree, dict) {
|
75 | this.tree = tree;
|
76 | this.dict = dict;
|
77 | this.kids = [];
|
78 | this.parseKids();
|
79 | }
|
80 |
|
81 | get role() {
|
82 | const nameObj = this.dict.get("S");
|
83 | const name = nameObj instanceof _primitives.Name ? nameObj.name : "";
|
84 | const {
|
85 | root
|
86 | } = this.tree;
|
87 |
|
88 | if (root.roleMap.has(name)) {
|
89 | return root.roleMap.get(name);
|
90 | }
|
91 |
|
92 | return name;
|
93 | }
|
94 |
|
95 | parseKids() {
|
96 | let pageObjId = null;
|
97 | const objRef = this.dict.getRaw("Pg");
|
98 |
|
99 | if (objRef instanceof _primitives.Ref) {
|
100 | pageObjId = objRef.toString();
|
101 | }
|
102 |
|
103 | const kids = this.dict.get("K");
|
104 |
|
105 | if (Array.isArray(kids)) {
|
106 | for (const kid of kids) {
|
107 | const element = this.parseKid(pageObjId, kid);
|
108 |
|
109 | if (element) {
|
110 | this.kids.push(element);
|
111 | }
|
112 | }
|
113 | } else {
|
114 | const element = this.parseKid(pageObjId, kids);
|
115 |
|
116 | if (element) {
|
117 | this.kids.push(element);
|
118 | }
|
119 | }
|
120 | }
|
121 |
|
122 | parseKid(pageObjId, kid) {
|
123 | if (Number.isInteger(kid)) {
|
124 | if (this.tree.pageDict.objId !== pageObjId) {
|
125 | return null;
|
126 | }
|
127 |
|
128 | return new StructElement({
|
129 | type: StructElementType.PAGE_CONTENT,
|
130 | mcid: kid,
|
131 | pageObjId
|
132 | });
|
133 | }
|
134 |
|
135 | let kidDict = null;
|
136 |
|
137 | if (kid instanceof _primitives.Ref) {
|
138 | kidDict = this.dict.xref.fetch(kid);
|
139 | } else if (kid instanceof _primitives.Dict) {
|
140 | kidDict = kid;
|
141 | }
|
142 |
|
143 | if (!kidDict) {
|
144 | return null;
|
145 | }
|
146 |
|
147 | const pageRef = kidDict.getRaw("Pg");
|
148 |
|
149 | if (pageRef instanceof _primitives.Ref) {
|
150 | pageObjId = pageRef.toString();
|
151 | }
|
152 |
|
153 | const type = kidDict.get("Type") instanceof _primitives.Name ? kidDict.get("Type").name : null;
|
154 |
|
155 | if (type === "MCR") {
|
156 | if (this.tree.pageDict.objId !== pageObjId) {
|
157 | return null;
|
158 | }
|
159 |
|
160 | return new StructElement({
|
161 | type: StructElementType.STREAM_CONTENT,
|
162 | refObjId: kidDict.getRaw("Stm") instanceof _primitives.Ref ? kidDict.getRaw("Stm").toString() : null,
|
163 | pageObjId,
|
164 | mcid: kidDict.get("MCID")
|
165 | });
|
166 | }
|
167 |
|
168 | if (type === "OBJR") {
|
169 | if (this.tree.pageDict.objId !== pageObjId) {
|
170 | return null;
|
171 | }
|
172 |
|
173 | return new StructElement({
|
174 | type: StructElementType.OBJECT,
|
175 | refObjId: kidDict.getRaw("Obj") instanceof _primitives.Ref ? kidDict.getRaw("Obj").toString() : null,
|
176 | pageObjId
|
177 | });
|
178 | }
|
179 |
|
180 | return new StructElement({
|
181 | type: StructElementType.ELEMENT,
|
182 | dict: kidDict
|
183 | });
|
184 | }
|
185 |
|
186 | }
|
187 |
|
188 | class StructElement {
|
189 | constructor({
|
190 | type,
|
191 | dict = null,
|
192 | mcid = null,
|
193 | pageObjId = null,
|
194 | refObjId = null
|
195 | }) {
|
196 | this.type = type;
|
197 | this.dict = dict;
|
198 | this.mcid = mcid;
|
199 | this.pageObjId = pageObjId;
|
200 | this.refObjId = refObjId;
|
201 | this.parentNode = null;
|
202 | }
|
203 |
|
204 | }
|
205 |
|
206 | class StructTreePage {
|
207 | constructor(structTreeRoot, pageDict) {
|
208 | this.root = structTreeRoot;
|
209 | this.rootDict = structTreeRoot ? structTreeRoot.dict : null;
|
210 | this.pageDict = pageDict;
|
211 | this.nodes = [];
|
212 | }
|
213 |
|
214 | parse() {
|
215 | if (!this.root || !this.rootDict) {
|
216 | return;
|
217 | }
|
218 |
|
219 | const parentTree = this.rootDict.get("ParentTree");
|
220 |
|
221 | if (!parentTree) {
|
222 | return;
|
223 | }
|
224 |
|
225 | const id = this.pageDict.get("StructParents");
|
226 |
|
227 | if (!Number.isInteger(id)) {
|
228 | return;
|
229 | }
|
230 |
|
231 | const numberTree = new _name_number_tree.NumberTree(parentTree, this.rootDict.xref);
|
232 | const parentArray = numberTree.get(id);
|
233 |
|
234 | if (!Array.isArray(parentArray)) {
|
235 | return;
|
236 | }
|
237 |
|
238 | const map = new Map();
|
239 |
|
240 | for (const ref of parentArray) {
|
241 | if (ref instanceof _primitives.Ref) {
|
242 | this.addNode(this.rootDict.xref.fetch(ref), map);
|
243 | }
|
244 | }
|
245 | }
|
246 |
|
247 | addNode(dict, map, level = 0) {
|
248 | if (level > MAX_DEPTH) {
|
249 | (0, _util.warn)("StructTree MAX_DEPTH reached.");
|
250 | return null;
|
251 | }
|
252 |
|
253 | if (map.has(dict)) {
|
254 | return map.get(dict);
|
255 | }
|
256 |
|
257 | const element = new StructElementNode(this, dict);
|
258 | map.set(dict, element);
|
259 | const parent = dict.get("P");
|
260 |
|
261 | if (!parent || (0, _primitives.isName)(parent.get("Type"), "StructTreeRoot")) {
|
262 | if (!this.addTopLevelNode(dict, element)) {
|
263 | map.delete(dict);
|
264 | }
|
265 |
|
266 | return element;
|
267 | }
|
268 |
|
269 | const parentNode = this.addNode(parent, map, level + 1);
|
270 |
|
271 | if (!parentNode) {
|
272 | return element;
|
273 | }
|
274 |
|
275 | let save = false;
|
276 |
|
277 | for (const kid of parentNode.kids) {
|
278 | if (kid.type === StructElementType.ELEMENT && kid.dict === dict) {
|
279 | kid.parentNode = element;
|
280 | save = true;
|
281 | }
|
282 | }
|
283 |
|
284 | if (!save) {
|
285 | map.delete(dict);
|
286 | }
|
287 |
|
288 | return element;
|
289 | }
|
290 |
|
291 | addTopLevelNode(dict, element) {
|
292 | const obj = this.rootDict.get("K");
|
293 |
|
294 | if (!obj) {
|
295 | return false;
|
296 | }
|
297 |
|
298 | if (obj instanceof _primitives.Dict) {
|
299 | if (obj.objId !== dict.objId) {
|
300 | return false;
|
301 | }
|
302 |
|
303 | this.nodes[0] = element;
|
304 | return true;
|
305 | }
|
306 |
|
307 | if (!Array.isArray(obj)) {
|
308 | return true;
|
309 | }
|
310 |
|
311 | let save = false;
|
312 |
|
313 | for (let i = 0; i < obj.length; i++) {
|
314 | const kidRef = obj[i];
|
315 |
|
316 | if (kidRef && kidRef.toString() === dict.objId) {
|
317 | this.nodes[i] = element;
|
318 | save = true;
|
319 | }
|
320 | }
|
321 |
|
322 | return save;
|
323 | }
|
324 |
|
325 | get serializable() {
|
326 | function nodeToSerializable(node, parent, level = 0) {
|
327 | if (level > MAX_DEPTH) {
|
328 | (0, _util.warn)("StructTree too deep to be fully serialized.");
|
329 | return;
|
330 | }
|
331 |
|
332 | const obj = Object.create(null);
|
333 | obj.role = node.role;
|
334 | obj.children = [];
|
335 | parent.children.push(obj);
|
336 | const alt = node.dict.get("Alt");
|
337 |
|
338 | if (typeof alt === "string") {
|
339 | obj.alt = (0, _util.stringToPDFString)(alt);
|
340 | }
|
341 |
|
342 | const lang = node.dict.get("Lang");
|
343 |
|
344 | if (typeof lang === "string") {
|
345 | obj.lang = (0, _util.stringToPDFString)(lang);
|
346 | }
|
347 |
|
348 | for (const kid of node.kids) {
|
349 | const kidElement = kid.type === StructElementType.ELEMENT ? kid.parentNode : null;
|
350 |
|
351 | if (kidElement) {
|
352 | nodeToSerializable(kidElement, obj, level + 1);
|
353 | continue;
|
354 | } else if (kid.type === StructElementType.PAGE_CONTENT || kid.type === StructElementType.STREAM_CONTENT) {
|
355 | obj.children.push({
|
356 | type: "content",
|
357 | id: `page${kid.pageObjId}_mcid${kid.mcid}`
|
358 | });
|
359 | } else if (kid.type === StructElementType.OBJECT) {
|
360 | obj.children.push({
|
361 | type: "object",
|
362 | id: kid.refObjId
|
363 | });
|
364 | }
|
365 | }
|
366 | }
|
367 |
|
368 | const root = Object.create(null);
|
369 | root.children = [];
|
370 | root.role = "Root";
|
371 |
|
372 | for (const child of this.nodes) {
|
373 | if (!child) {
|
374 | continue;
|
375 | }
|
376 |
|
377 | nodeToSerializable(child, root);
|
378 | }
|
379 |
|
380 | return root;
|
381 | }
|
382 |
|
383 | }
|
384 |
|
385 | exports.StructTreePage = StructTreePage; |
\ | No newline at end of file |