UNPKG

49.3 kBJavaScriptView Raw
1/**
2 * @license Copyright (c) 2003-2024, CKSource Holding sp. z o.o. All rights reserved.
3 * For licensing, see LICENSE.md or https://ckeditor.com/legal/ckeditor-oss-license
4 */
5import { Plugin } from '@ckeditor/ckeditor5-core/dist/index.js';
6import { ClipboardPipeline } from '@ckeditor/ckeditor5-clipboard/dist/index.js';
7import { UpcastWriter, Matcher, ViewDocument, DomConverter } from '@ckeditor/ckeditor5-engine/dist/index.js';
8
9/**
10 * @license Copyright (c) 2003-2024, CKSource Holding sp. z o.o. All rights reserved.
11 * For licensing, see LICENSE.md or https://ckeditor.com/legal/ckeditor-oss-license
12 */ /**
13 * @module paste-from-office/filters/utils
14 */ /**
15 * Normalizes CSS length value to 'px'.
16 *
17 * @internal
18 */ function convertCssLengthToPx(value) {
19 const numericValue = parseFloat(value);
20 if (value.endsWith('pt')) {
21 // 1pt = 1in / 72
22 return toPx(numericValue * 96 / 72);
23 } else if (value.endsWith('pc')) {
24 // 1pc = 12pt = 1in / 6.
25 return toPx(numericValue * 12 * 96 / 72);
26 } else if (value.endsWith('in')) {
27 // 1in = 2.54cm = 96px
28 return toPx(numericValue * 96);
29 } else if (value.endsWith('cm')) {
30 // 1cm = 96px / 2.54
31 return toPx(numericValue * 96 / 2.54);
32 } else if (value.endsWith('mm')) {
33 // 1mm = 1cm / 10
34 return toPx(numericValue / 10 * 96 / 2.54);
35 }
36 return value;
37}
38/**
39 * Returns true for value with 'px' unit.
40 *
41 * @internal
42 */ function isPx(value) {
43 return value !== undefined && value.endsWith('px');
44}
45/**
46 * Returns a rounded 'px' value.
47 *
48 * @internal
49 */ function toPx(value) {
50 return value.toFixed(2).replace(/\.?0+$/, '') + 'px';
51}
52
53/**
54 * Transforms Word specific list-like elements to the semantic HTML lists.
55 *
56 * Lists in Word are represented by block elements with special attributes like:
57 *
58 * ```xml
59 * <p class=MsoListParagraphCxSpFirst style='mso-list:l1 level1 lfo1'>...</p> // Paragraph based list.
60 * <h1 style='mso-list:l0 level1 lfo1'>...</h1> // Heading 1 based list.
61 * ```
62 *
63 * @param documentFragment The view structure to be transformed.
64 * @param stylesString Styles from which list-like elements styling will be extracted.
65 */ function transformListItemLikeElementsIntoLists(documentFragment, stylesString, hasMultiLevelListPlugin) {
66 if (!documentFragment.childCount) {
67 return;
68 }
69 const writer = new UpcastWriter(documentFragment.document);
70 const itemLikeElements = findAllItemLikeElements(documentFragment, writer);
71 if (!itemLikeElements.length) {
72 return;
73 }
74 const encounteredLists = {};
75 const stack = [];
76 for (const itemLikeElement of itemLikeElements){
77 if (itemLikeElement.indent !== undefined) {
78 if (!isListContinuation(itemLikeElement)) {
79 stack.length = 0;
80 }
81 // Combined list ID for addressing encounter lists counters.
82 const originalListId = `${itemLikeElement.id}:${itemLikeElement.indent}`;
83 // Normalized list item indentation.
84 const indent = Math.min(itemLikeElement.indent - 1, stack.length);
85 // Trimming of the list stack on list ID change.
86 if (indent < stack.length && stack[indent].id !== itemLikeElement.id) {
87 stack.length = indent;
88 }
89 // Trimming of the list stack on lower indent list encountered.
90 if (indent < stack.length - 1) {
91 stack.length = indent + 1;
92 } else {
93 const listStyle = detectListStyle(itemLikeElement, stylesString);
94 // Create a new OL/UL if required (greater indent or different list type).
95 if (indent > stack.length - 1 || stack[indent].listElement.name != listStyle.type) {
96 // Check if there is some start index to set from a previous list.
97 if (indent == 0 && listStyle.type == 'ol' && itemLikeElement.id !== undefined && encounteredLists[originalListId]) {
98 listStyle.startIndex = encounteredLists[originalListId];
99 }
100 const listElement = createNewEmptyList(listStyle, writer, hasMultiLevelListPlugin);
101 // Apply list padding only if we have margins for the item and the parent item.
102 if (isPx(itemLikeElement.marginLeft) && (indent == 0 || isPx(stack[indent - 1].marginLeft))) {
103 let marginLeft = itemLikeElement.marginLeft;
104 if (indent > 0) {
105 // Convert the padding from absolute to relative.
106 marginLeft = toPx(parseFloat(marginLeft) - parseFloat(stack[indent - 1].marginLeft));
107 }
108 writer.setStyle('padding-left', marginLeft, listElement);
109 }
110 // Insert the new OL/UL.
111 if (stack.length == 0) {
112 const parent = itemLikeElement.element.parent;
113 const index = parent.getChildIndex(itemLikeElement.element) + 1;
114 writer.insertChild(index, listElement, parent);
115 } else {
116 const parentListItems = stack[indent - 1].listItemElements;
117 writer.appendChild(listElement, parentListItems[parentListItems.length - 1]);
118 }
119 // Update the list stack for other items to reference.
120 stack[indent] = {
121 ...itemLikeElement,
122 listElement,
123 listItemElements: []
124 };
125 // Prepare list counter for start index.
126 if (indent == 0 && itemLikeElement.id !== undefined) {
127 encounteredLists[originalListId] = listStyle.startIndex || 1;
128 }
129 }
130 }
131 // Use LI if it is already it or create a new LI element.
132 // https://github.com/ckeditor/ckeditor5/issues/15964
133 const listItem = itemLikeElement.element.name == 'li' ? itemLikeElement.element : writer.createElement('li');
134 // Append the LI to OL/UL.
135 writer.appendChild(listItem, stack[indent].listElement);
136 stack[indent].listItemElements.push(listItem);
137 // Increment list counter.
138 if (indent == 0 && itemLikeElement.id !== undefined) {
139 encounteredLists[originalListId]++;
140 }
141 // Append list block to LI.
142 if (itemLikeElement.element != listItem) {
143 writer.appendChild(itemLikeElement.element, listItem);
144 }
145 // Clean list block.
146 removeBulletElement(itemLikeElement.element, writer);
147 writer.removeStyle('text-indent', itemLikeElement.element); // #12361
148 writer.removeStyle('margin-left', itemLikeElement.element);
149 } else {
150 // Other blocks in a list item.
151 const stackItem = stack.find((stackItem)=>stackItem.marginLeft == itemLikeElement.marginLeft);
152 // This might be a paragraph that has known margin, but it is not a real list block.
153 if (stackItem) {
154 const listItems = stackItem.listItemElements;
155 // Append block to LI.
156 writer.appendChild(itemLikeElement.element, listItems[listItems.length - 1]);
157 writer.removeStyle('margin-left', itemLikeElement.element);
158 } else {
159 stack.length = 0;
160 }
161 }
162 }
163}
164/**
165 * Removes paragraph wrapping content inside a list item.
166 */ function unwrapParagraphInListItem(documentFragment, writer) {
167 for (const value of writer.createRangeIn(documentFragment)){
168 const element = value.item;
169 if (element.is('element', 'li')) {
170 // Google Docs allows for single paragraph inside LI.
171 const firstChild = element.getChild(0);
172 if (firstChild && firstChild.is('element', 'p')) {
173 writer.unwrapElement(firstChild);
174 }
175 }
176 }
177}
178/**
179 * Finds all list-like elements in a given document fragment.
180 *
181 * @param documentFragment Document fragment in which to look for list-like nodes.
182 * @returns Array of found list-like items. Each item is an object containing:
183 */ function findAllItemLikeElements(documentFragment, writer) {
184 const range = writer.createRangeIn(documentFragment);
185 const itemLikeElements = [];
186 const foundMargins = new Set();
187 for (const item of range.getItems()){
188 // https://github.com/ckeditor/ckeditor5/issues/15964
189 if (!item.is('element') || !item.name.match(/^(p|h\d+|li|div)$/)) {
190 continue;
191 }
192 // Try to rely on margin-left style to find paragraphs visually aligned with previously encountered list item.
193 let marginLeft = getMarginLeftNormalized(item);
194 // Ignore margin-left 0 style if there is no MsoList... class.
195 if (marginLeft !== undefined && parseFloat(marginLeft) == 0 && !Array.from(item.getClassNames()).find((className)=>className.startsWith('MsoList'))) {
196 marginLeft = undefined;
197 }
198 // List item or a following list item block.
199 if (item.hasStyle('mso-list') || marginLeft !== undefined && foundMargins.has(marginLeft)) {
200 const itemData = getListItemData(item);
201 itemLikeElements.push({
202 element: item,
203 id: itemData.id,
204 order: itemData.order,
205 indent: itemData.indent,
206 marginLeft
207 });
208 if (marginLeft !== undefined) {
209 foundMargins.add(marginLeft);
210 }
211 } else {
212 foundMargins.clear();
213 }
214 }
215 return itemLikeElements;
216}
217/**
218 * Whether the given element is possibly a list continuation. Previous element was wrapped into a list
219 * or the current element already is inside a list.
220 */ function isListContinuation(currentItem) {
221 const previousSibling = currentItem.element.previousSibling;
222 if (!previousSibling) {
223 // If it's a li inside ul or ol like in here: https://github.com/ckeditor/ckeditor5/issues/15964.
224 return isList(currentItem.element.parent);
225 }
226 // Even with the same id the list does not have to be continuous (#43).
227 return isList(previousSibling);
228}
229function isList(element) {
230 return element.is('element', 'ol') || element.is('element', 'ul');
231}
232/**
233 * Extracts list item style from the provided CSS.
234 *
235 * List item style is extracted from the CSS stylesheet. Each list with its specific style attribute
236 * value (`mso-list:l1 level1 lfo1`) has its dedicated properties in a CSS stylesheet defined with a selector like:
237 *
238 * ```css
239 * @list l1:level1 { ... }
240 * ```
241 *
242 * It contains `mso-level-number-format` property which defines list numbering/bullet style. If this property
243 * is not defined it means default `decimal` numbering.
244 *
245 * Here CSS string representation is used as `mso-level-number-format` property is an invalid CSS property
246 * and will be removed during CSS parsing.
247 *
248 * @param listLikeItem List-like item for which list style will be searched for. Usually
249 * a result of `findAllItemLikeElements()` function.
250 * @param stylesString CSS stylesheet.
251 * @returns An object with properties:
252 *
253 * * type - List type, could be `ul` or `ol`.
254 * * startIndex - List start index, valid only for ordered lists.
255 * * style - List style, for example: `decimal`, `lower-roman`, etc. It is extracted
256 * directly from Word stylesheet and adjusted to represent proper values for the CSS `list-style-type` property.
257 * If it cannot be adjusted, the `null` value is returned.
258 */ function detectListStyle(listLikeItem, stylesString) {
259 const listStyleRegexp = new RegExp(`@list l${listLikeItem.id}:level${listLikeItem.indent}\\s*({[^}]*)`, 'gi');
260 const listStyleTypeRegex = /mso-level-number-format:([^;]{0,100});/gi;
261 const listStartIndexRegex = /mso-level-start-at:\s{0,100}([0-9]{0,10})\s{0,100};/gi;
262 const legalStyleListRegex = new RegExp(`@list\\s+l${listLikeItem.id}:level\\d\\s*{[^{]*mso-level-text:"%\\d\\\\.`, 'gi');
263 const multiLevelNumberFormatTypeRegex = new RegExp(`@list l${listLikeItem.id}:level\\d\\s*{[^{]*mso-level-number-format:`, 'gi');
264 const legalStyleListMatch = legalStyleListRegex.exec(stylesString);
265 const multiLevelNumberFormatMatch = multiLevelNumberFormatTypeRegex.exec(stylesString);
266 // Multi level lists in Word have mso-level-number-format attribute except legal lists,
267 // so we used that. If list has legal list match and doesn't has mso-level-number-format
268 // then this is legal-list.
269 const islegalStyleList = legalStyleListMatch && !multiLevelNumberFormatMatch;
270 const listStyleMatch = listStyleRegexp.exec(stylesString);
271 let listStyleType = 'decimal'; // Decimal is default one.
272 let type = 'ol'; // <ol> is default list.
273 let startIndex = null;
274 if (listStyleMatch && listStyleMatch[1]) {
275 const listStyleTypeMatch = listStyleTypeRegex.exec(listStyleMatch[1]);
276 if (listStyleTypeMatch && listStyleTypeMatch[1]) {
277 listStyleType = listStyleTypeMatch[1].trim();
278 type = listStyleType !== 'bullet' && listStyleType !== 'image' ? 'ol' : 'ul';
279 }
280 // Styles for the numbered lists are always defined in the Word CSS stylesheet.
281 // Unordered lists MAY contain a value for the Word CSS definition `mso-level-text` but sometimes
282 // this tag is missing. And because of that, we cannot depend on that. We need to predict the list style value
283 // based on the list style marker element.
284 if (listStyleType === 'bullet') {
285 const bulletedStyle = findBulletedListStyle(listLikeItem.element);
286 if (bulletedStyle) {
287 listStyleType = bulletedStyle;
288 }
289 } else {
290 const listStartIndexMatch = listStartIndexRegex.exec(listStyleMatch[1]);
291 if (listStartIndexMatch && listStartIndexMatch[1]) {
292 startIndex = parseInt(listStartIndexMatch[1]);
293 }
294 }
295 if (islegalStyleList) {
296 type = 'ol';
297 }
298 }
299 return {
300 type,
301 startIndex,
302 style: mapListStyleDefinition(listStyleType),
303 isLegalStyleList: islegalStyleList
304 };
305}
306/**
307 * Tries to extract the `list-style-type` value based on the marker element for bulleted list.
308 */ function findBulletedListStyle(element) {
309 // https://github.com/ckeditor/ckeditor5/issues/15964
310 if (element.name == 'li' && element.parent.name == 'ul' && element.parent.hasAttribute('type')) {
311 return element.parent.getAttribute('type');
312 }
313 const listMarkerElement = findListMarkerNode(element);
314 if (!listMarkerElement) {
315 return null;
316 }
317 const listMarker = listMarkerElement._data;
318 if (listMarker === 'o') {
319 return 'circle';
320 } else if (listMarker === '·') {
321 return 'disc';
322 } else if (listMarker === '§') {
323 return 'square';
324 }
325 return null;
326}
327/**
328 * Tries to find a text node that represents the marker element (list-style-type).
329 */ function findListMarkerNode(element) {
330 // If the first child is a text node, it is the data for the element.
331 // The list-style marker is not present here.
332 if (element.getChild(0).is('$text')) {
333 return null;
334 }
335 for (const childNode of element.getChildren()){
336 // The list-style marker will be inside the `<span>` element. Let's ignore all non-span elements.
337 // It may happen that the `<a>` element is added as the first child. Most probably, it's an anchor element.
338 if (!childNode.is('element', 'span')) {
339 continue;
340 }
341 const textNodeOrElement = childNode.getChild(0);
342 if (!textNodeOrElement) {
343 continue;
344 }
345 // If already found the marker element, use it.
346 if (textNodeOrElement.is('$text')) {
347 return textNodeOrElement;
348 }
349 return textNodeOrElement.getChild(0);
350 }
351 /* istanbul ignore next -- @preserve */ return null;
352}
353/**
354 * Parses the `list-style-type` value extracted directly from the Word CSS stylesheet and returns proper CSS definition.
355 */ function mapListStyleDefinition(value) {
356 if (value.startsWith('arabic-leading-zero')) {
357 return 'decimal-leading-zero';
358 }
359 switch(value){
360 case 'alpha-upper':
361 return 'upper-alpha';
362 case 'alpha-lower':
363 return 'lower-alpha';
364 case 'roman-upper':
365 return 'upper-roman';
366 case 'roman-lower':
367 return 'lower-roman';
368 case 'circle':
369 case 'disc':
370 case 'square':
371 return value;
372 default:
373 return null;
374 }
375}
376/**
377 * Creates a new list OL/UL element.
378 */ function createNewEmptyList(listStyle, writer, hasMultiLevelListPlugin) {
379 const list = writer.createElement(listStyle.type);
380 // We do not support modifying the marker for a particular list item.
381 // Set the value for the `list-style-type` property directly to the list container.
382 if (listStyle.style) {
383 writer.setStyle('list-style-type', listStyle.style, list);
384 }
385 if (listStyle.startIndex && listStyle.startIndex > 1) {
386 writer.setAttribute('start', listStyle.startIndex, list);
387 }
388 if (listStyle.isLegalStyleList && hasMultiLevelListPlugin) {
389 writer.addClass('legal-list', list);
390 }
391 return list;
392}
393/**
394 * Extracts list item information from Word specific list-like element style:
395 *
396 * ```
397 * `style="mso-list:l1 level1 lfo1"`
398 * ```
399 *
400 * where:
401 *
402 * ```
403 * * `l1` is a list id (however it does not mean this is a continuous list - see #43),
404 * * `level1` is a list item indentation level,
405 * * `lfo1` is a list insertion order in a document.
406 * ```
407 *
408 * @param element Element from which style data is extracted.
409 */ function getListItemData(element) {
410 const listStyle = element.getStyle('mso-list');
411 if (listStyle === undefined) {
412 return {};
413 }
414 const idMatch = listStyle.match(/(^|\s{1,100})l(\d+)/i);
415 const orderMatch = listStyle.match(/\s{0,100}lfo(\d+)/i);
416 const indentMatch = listStyle.match(/\s{0,100}level(\d+)/i);
417 if (idMatch && orderMatch && indentMatch) {
418 return {
419 id: idMatch[2],
420 order: orderMatch[1],
421 indent: parseInt(indentMatch[1])
422 };
423 }
424 return {
425 indent: 1 // Handle empty mso-list style as a marked for default list item.
426 };
427}
428/**
429 * Removes span with a numbering/bullet from a given element.
430 */ function removeBulletElement(element, writer) {
431 // Matcher for finding `span` elements holding lists numbering/bullets.
432 const bulletMatcher = new Matcher({
433 name: 'span',
434 styles: {
435 'mso-list': 'Ignore'
436 }
437 });
438 const range = writer.createRangeIn(element);
439 for (const value of range){
440 if (value.type === 'elementStart' && bulletMatcher.match(value.item)) {
441 writer.remove(value.item);
442 }
443 }
444}
445/**
446 * Returns element left margin normalized to 'px' if possible.
447 */ function getMarginLeftNormalized(element) {
448 const value = element.getStyle('margin-left');
449 if (value === undefined || value.endsWith('px')) {
450 return value;
451 }
452 return convertCssLengthToPx(value);
453}
454
455/**
456 * Replaces source attribute of all `<img>` elements representing regular
457 * images (not the Word shapes) with inlined base64 image representation extracted from RTF or Blob data.
458 *
459 * @param documentFragment Document fragment on which transform images.
460 * @param rtfData The RTF data from which images representation will be used.
461 */ function replaceImagesSourceWithBase64(documentFragment, rtfData) {
462 if (!documentFragment.childCount) {
463 return;
464 }
465 const upcastWriter = new UpcastWriter(documentFragment.document);
466 const shapesIds = findAllShapesIds(documentFragment, upcastWriter);
467 removeAllImgElementsRepresentingShapes(shapesIds, documentFragment, upcastWriter);
468 insertMissingImgs(shapesIds, documentFragment, upcastWriter);
469 removeAllShapeElements(documentFragment, upcastWriter);
470 const images = findAllImageElementsWithLocalSource(documentFragment, upcastWriter);
471 if (images.length) {
472 replaceImagesFileSourceWithInlineRepresentation(images, extractImageDataFromRtf(rtfData), upcastWriter);
473 }
474}
475/**
476 * Converts given HEX string to base64 representation.
477 *
478 * @internal
479 * @param hexString The HEX string to be converted.
480 * @returns Base64 representation of a given HEX string.
481 */ function _convertHexToBase64(hexString) {
482 return btoa(hexString.match(/\w{2}/g).map((char)=>{
483 return String.fromCharCode(parseInt(char, 16));
484 }).join(''));
485}
486/**
487 * Finds all shapes (`<v:*>...</v:*>`) ids. Shapes can represent images (canvas)
488 * or Word shapes (which does not have RTF or Blob representation).
489 *
490 * @param documentFragment Document fragment from which to extract shape ids.
491 * @returns Array of shape ids.
492 */ function findAllShapesIds(documentFragment, writer) {
493 const range = writer.createRangeIn(documentFragment);
494 const shapeElementsMatcher = new Matcher({
495 name: /v:(.+)/
496 });
497 const shapesIds = [];
498 for (const value of range){
499 if (value.type != 'elementStart') {
500 continue;
501 }
502 const el = value.item;
503 const previousSibling = el.previousSibling;
504 const prevSiblingName = previousSibling && previousSibling.is('element') ? previousSibling.name : null;
505 // List of ids which should not be considered as shapes.
506 // https://github.com/ckeditor/ckeditor5/pull/15847#issuecomment-1941543983
507 const exceptionIds = [
508 'Chart'
509 ];
510 const isElementAShape = shapeElementsMatcher.match(el);
511 const hasElementGfxdataAttribute = el.getAttribute('o:gfxdata');
512 const isPreviousSiblingAShapeType = prevSiblingName === 'v:shapetype';
513 const isElementIdInExceptionsArray = hasElementGfxdataAttribute && exceptionIds.some((item)=>el.getAttribute('id').includes(item));
514 // If shape element has 'o:gfxdata' attribute and is not directly before
515 // `<v:shapetype>` element it means that it represents a Word shape.
516 if (isElementAShape && hasElementGfxdataAttribute && !isPreviousSiblingAShapeType && !isElementIdInExceptionsArray) {
517 shapesIds.push(value.item.getAttribute('id'));
518 }
519 }
520 return shapesIds;
521}
522/**
523 * Removes all `<img>` elements which represents Word shapes and not regular images.
524 *
525 * @param shapesIds Shape ids which will be checked against `<img>` elements.
526 * @param documentFragment Document fragment from which to remove `<img>` elements.
527 */ function removeAllImgElementsRepresentingShapes(shapesIds, documentFragment, writer) {
528 const range = writer.createRangeIn(documentFragment);
529 const imageElementsMatcher = new Matcher({
530 name: 'img'
531 });
532 const imgs = [];
533 for (const value of range){
534 if (value.item.is('element') && imageElementsMatcher.match(value.item)) {
535 const el = value.item;
536 const shapes = el.getAttribute('v:shapes') ? el.getAttribute('v:shapes').split(' ') : [];
537 if (shapes.length && shapes.every((shape)=>shapesIds.indexOf(shape) > -1)) {
538 imgs.push(el);
539 // Shapes may also have empty source while content is paste in some browsers (Safari).
540 } else if (!el.getAttribute('src')) {
541 imgs.push(el);
542 }
543 }
544 }
545 for (const img of imgs){
546 writer.remove(img);
547 }
548}
549/**
550 * Removes all shape elements (`<v:*>...</v:*>`) so they do not pollute the output structure.
551 *
552 * @param documentFragment Document fragment from which to remove shape elements.
553 */ function removeAllShapeElements(documentFragment, writer) {
554 const range = writer.createRangeIn(documentFragment);
555 const shapeElementsMatcher = new Matcher({
556 name: /v:(.+)/
557 });
558 const shapes = [];
559 for (const value of range){
560 if (value.type == 'elementStart' && shapeElementsMatcher.match(value.item)) {
561 shapes.push(value.item);
562 }
563 }
564 for (const shape of shapes){
565 writer.remove(shape);
566 }
567}
568/**
569 * Inserts `img` tags if there is none after a shape.
570 */ function insertMissingImgs(shapeIds, documentFragment, writer) {
571 const range = writer.createRangeIn(documentFragment);
572 const shapes = [];
573 for (const value of range){
574 if (value.type == 'elementStart' && value.item.is('element', 'v:shape')) {
575 const id = value.item.getAttribute('id');
576 if (shapeIds.includes(id)) {
577 continue;
578 }
579 if (!containsMatchingImg(value.item.parent.getChildren(), id)) {
580 shapes.push(value.item);
581 }
582 }
583 }
584 for (const shape of shapes){
585 const attrs = {
586 src: findSrc(shape)
587 };
588 if (shape.hasAttribute('alt')) {
589 attrs.alt = shape.getAttribute('alt');
590 }
591 const img = writer.createElement('img', attrs);
592 writer.insertChild(shape.index + 1, img, shape.parent);
593 }
594 function containsMatchingImg(nodes, id) {
595 for (const node of nodes){
596 /* istanbul ignore else -- @preserve */ if (node.is('element')) {
597 if (node.name == 'img' && node.getAttribute('v:shapes') == id) {
598 return true;
599 }
600 if (containsMatchingImg(node.getChildren(), id)) {
601 return true;
602 }
603 }
604 }
605 return false;
606 }
607 function findSrc(shape) {
608 for (const child of shape.getChildren()){
609 /* istanbul ignore else -- @preserve */ if (child.is('element') && child.getAttribute('src')) {
610 return child.getAttribute('src');
611 }
612 }
613 }
614}
615/**
616 * Finds all `<img>` elements in a given document fragment which have source pointing to local `file://` resource.
617 *
618 * @param documentFragment Document fragment in which to look for `<img>` elements.
619 * @returns result All found images grouped by source type.
620 */ function findAllImageElementsWithLocalSource(documentFragment, writer) {
621 const range = writer.createRangeIn(documentFragment);
622 const imageElementsMatcher = new Matcher({
623 name: 'img'
624 });
625 const imgs = [];
626 for (const value of range){
627 if (value.item.is('element') && imageElementsMatcher.match(value.item)) {
628 if (value.item.getAttribute('src').startsWith('file://')) {
629 imgs.push(value.item);
630 }
631 }
632 }
633 return imgs;
634}
635/**
636 * Extracts all images HEX representations from a given RTF data.
637 *
638 * @param rtfData The RTF data from which to extract images HEX representation.
639 * @returns Array of found HEX representations. Each array item is an object containing:
640 *
641 * * hex Image representation in HEX format.
642 * * type Type of image, `image/png` or `image/jpeg`.
643 */ function extractImageDataFromRtf(rtfData) {
644 if (!rtfData) {
645 return [];
646 }
647 const regexPictureHeader = /{\\pict[\s\S]+?\\bliptag-?\d+(\\blipupi-?\d+)?({\\\*\\blipuid\s?[\da-fA-F]+)?[\s}]*?/;
648 const regexPicture = new RegExp('(?:(' + regexPictureHeader.source + '))([\\da-fA-F\\s]+)\\}', 'g');
649 const images = rtfData.match(regexPicture);
650 const result = [];
651 if (images) {
652 for (const image of images){
653 let imageType = false;
654 if (image.includes('\\pngblip')) {
655 imageType = 'image/png';
656 } else if (image.includes('\\jpegblip')) {
657 imageType = 'image/jpeg';
658 }
659 if (imageType) {
660 result.push({
661 hex: image.replace(regexPictureHeader, '').replace(/[^\da-fA-F]/g, ''),
662 type: imageType
663 });
664 }
665 }
666 }
667 return result;
668}
669/**
670 * Replaces `src` attribute value of all given images with the corresponding base64 image representation.
671 *
672 * @param imageElements Array of image elements which will have its source replaced.
673 * @param imagesHexSources Array of images hex sources (usually the result of `extractImageDataFromRtf()` function).
674 * The array should be the same length as `imageElements` parameter.
675 */ function replaceImagesFileSourceWithInlineRepresentation(imageElements, imagesHexSources, writer) {
676 // Assume there is an equal amount of image elements and images HEX sources so they can be matched accordingly based on existing order.
677 if (imageElements.length === imagesHexSources.length) {
678 for(let i = 0; i < imageElements.length; i++){
679 const newSrc = `data:${imagesHexSources[i].type};base64,${_convertHexToBase64(imagesHexSources[i].hex)}`;
680 writer.setAttribute('src', newSrc, imageElements[i]);
681 }
682 }
683}
684
685/**
686 * Cleanup MS attributes like styles, attributes and elements.
687 *
688 * @param documentFragment element `data.content` obtained from clipboard.
689 */ function removeMSAttributes(documentFragment) {
690 const elementsToUnwrap = [];
691 const writer = new UpcastWriter(documentFragment.document);
692 for (const { item } of writer.createRangeIn(documentFragment)){
693 if (!item.is('element')) {
694 continue;
695 }
696 for (const className of item.getClassNames()){
697 if (/\bmso/gi.exec(className)) {
698 writer.removeClass(className, item);
699 }
700 }
701 for (const styleName of item.getStyleNames()){
702 if (/\bmso/gi.exec(styleName)) {
703 writer.removeStyle(styleName, item);
704 }
705 }
706 if (item.is('element', 'w:sdt') || item.is('element', 'w:sdtpr') && item.isEmpty || item.is('element', 'o:p') && item.isEmpty) {
707 elementsToUnwrap.push(item);
708 }
709 }
710 for (const item of elementsToUnwrap){
711 const itemParent = item.parent;
712 const childIndex = itemParent.getChildIndex(item);
713 writer.insertChild(childIndex, item.getChildren(), itemParent);
714 writer.remove(item);
715 }
716}
717
718const msWordMatch1 = /<meta\s*name="?generator"?\s*content="?microsoft\s*word\s*\d+"?\/?>/i;
719const msWordMatch2 = /xmlns:o="urn:schemas-microsoft-com/i;
720/**
721 * Normalizer for the content pasted from Microsoft Word.
722 */ class MSWordNormalizer {
723 document;
724 hasMultiLevelListPlugin;
725 /**
726 * Creates a new `MSWordNormalizer` instance.
727 *
728 * @param document View document.
729 */ constructor(document, hasMultiLevelListPlugin = false){
730 this.document = document;
731 this.hasMultiLevelListPlugin = hasMultiLevelListPlugin;
732 }
733 /**
734 * @inheritDoc
735 */ isActive(htmlString) {
736 return msWordMatch1.test(htmlString) || msWordMatch2.test(htmlString);
737 }
738 /**
739 * @inheritDoc
740 */ execute(data) {
741 const { body: documentFragment, stylesString } = data._parsedData;
742 transformListItemLikeElementsIntoLists(documentFragment, stylesString, this.hasMultiLevelListPlugin);
743 replaceImagesSourceWithBase64(documentFragment, data.dataTransfer.getData('text/rtf'));
744 removeMSAttributes(documentFragment);
745 data.content = documentFragment;
746 }
747}
748
749/**
750 * @license Copyright (c) 2003-2024, CKSource Holding sp. z o.o. All rights reserved.
751 * For licensing, see LICENSE.md or https://ckeditor.com/legal/ckeditor-oss-license
752 */ /**
753 * @module paste-from-office/filters/removeboldwrapper
754 */ /**
755 * Removes the `<b>` tag wrapper added by Google Docs to a copied content.
756 *
757 * @param documentFragment element `data.content` obtained from clipboard
758 */ function removeBoldWrapper(documentFragment, writer) {
759 for (const child of documentFragment.getChildren()){
760 if (child.is('element', 'b') && child.getStyle('font-weight') === 'normal') {
761 const childIndex = documentFragment.getChildIndex(child);
762 writer.remove(child);
763 writer.insertChild(childIndex, child.getChildren(), documentFragment);
764 }
765 }
766}
767
768/**
769 * Transforms `<br>` elements that are siblings to some block element into a paragraphs.
770 *
771 * @param documentFragment The view structure to be transformed.
772 */ function transformBlockBrsToParagraphs(documentFragment, writer) {
773 const viewDocument = new ViewDocument(writer.document.stylesProcessor);
774 const domConverter = new DomConverter(viewDocument, {
775 renderingMode: 'data'
776 });
777 const blockElements = domConverter.blockElements;
778 const inlineObjectElements = domConverter.inlineObjectElements;
779 const elementsToReplace = [];
780 for (const value of writer.createRangeIn(documentFragment)){
781 const element = value.item;
782 if (element.is('element', 'br')) {
783 const nextSibling = findSibling(element, 'forward', writer, {
784 blockElements,
785 inlineObjectElements
786 });
787 const previousSibling = findSibling(element, 'backward', writer, {
788 blockElements,
789 inlineObjectElements
790 });
791 const nextSiblingIsBlock = isBlockViewElement(nextSibling, blockElements);
792 const previousSiblingIsBlock = isBlockViewElement(previousSibling, blockElements);
793 // If the <br> is surrounded by blocks then convert it to a paragraph:
794 // * <p>foo</p>[<br>]<p>bar</p> -> <p>foo</p>[<p></p>]<p>bar</p>
795 // * <p>foo</p>[<br>] -> <p>foo</p>[<p></p>]
796 // * [<br>]<p>foo</p> -> [<p></p>]<p>foo</p>
797 if (previousSiblingIsBlock || nextSiblingIsBlock) {
798 elementsToReplace.push(element);
799 }
800 }
801 }
802 for (const element of elementsToReplace){
803 if (element.hasClass('Apple-interchange-newline')) {
804 writer.remove(element);
805 } else {
806 writer.replace(element, writer.createElement('p'));
807 }
808 }
809}
810/**
811 * Returns sibling node, threats inline elements as transparent (but should stop on an inline objects).
812 */ function findSibling(viewElement, direction, writer, { blockElements, inlineObjectElements }) {
813 let position = writer.createPositionAt(viewElement, direction == 'forward' ? 'after' : 'before');
814 // Find first position that is just before a first:
815 // * text node,
816 // * block element,
817 // * inline object element.
818 // It's ignoring any inline (non-object) elements like span, strong, etc.
819 position = position.getLastMatchingPosition(({ item })=>item.is('element') && !blockElements.includes(item.name) && !inlineObjectElements.includes(item.name), {
820 direction
821 });
822 return direction == 'forward' ? position.nodeAfter : position.nodeBefore;
823}
824/**
825 * Returns true for view elements that are listed as block view elements.
826 */ function isBlockViewElement(node, blockElements) {
827 return !!node && node.is('element') && blockElements.includes(node.name);
828}
829
830const googleDocsMatch = /id=("|')docs-internal-guid-[-0-9a-f]+("|')/i;
831/**
832 * Normalizer for the content pasted from Google Docs.
833 */ class GoogleDocsNormalizer {
834 document;
835 /**
836 * Creates a new `GoogleDocsNormalizer` instance.
837 *
838 * @param document View document.
839 */ constructor(document){
840 this.document = document;
841 }
842 /**
843 * @inheritDoc
844 */ isActive(htmlString) {
845 return googleDocsMatch.test(htmlString);
846 }
847 /**
848 * @inheritDoc
849 */ execute(data) {
850 const writer = new UpcastWriter(this.document);
851 const { body: documentFragment } = data._parsedData;
852 removeBoldWrapper(documentFragment, writer);
853 unwrapParagraphInListItem(documentFragment, writer);
854 transformBlockBrsToParagraphs(documentFragment, writer);
855 data.content = documentFragment;
856 }
857}
858
859/**
860 * @license Copyright (c) 2003-2024, CKSource Holding sp. z o.o. All rights reserved.
861 * For licensing, see LICENSE.md or https://ckeditor.com/legal/ckeditor-oss-license
862 */ /**
863 * @module paste-from-office/filters/removexmlns
864 */ /**
865 * Removes the `xmlns` attribute from table pasted from Google Sheets.
866 *
867 * @param documentFragment element `data.content` obtained from clipboard
868 */ function removeXmlns(documentFragment, writer) {
869 for (const child of documentFragment.getChildren()){
870 if (child.is('element', 'table') && child.hasAttribute('xmlns')) {
871 writer.removeAttribute('xmlns', child);
872 }
873 }
874}
875
876/**
877 * @license Copyright (c) 2003-2024, CKSource Holding sp. z o.o. All rights reserved.
878 * For licensing, see LICENSE.md or https://ckeditor.com/legal/ckeditor-oss-license
879 */ /**
880 * @module paste-from-office/filters/removegooglesheetstag
881 */ /**
882 * Removes the `<google-sheets-html-origin>` tag wrapper added by Google Sheets to a copied content.
883 *
884 * @param documentFragment element `data.content` obtained from clipboard
885 */ function removeGoogleSheetsTag(documentFragment, writer) {
886 for (const child of documentFragment.getChildren()){
887 if (child.is('element', 'google-sheets-html-origin')) {
888 const childIndex = documentFragment.getChildIndex(child);
889 writer.remove(child);
890 writer.insertChild(childIndex, child.getChildren(), documentFragment);
891 }
892 }
893}
894
895/**
896 * @license Copyright (c) 2003-2024, CKSource Holding sp. z o.o. All rights reserved.
897 * For licensing, see LICENSE.md or https://ckeditor.com/legal/ckeditor-oss-license
898 */ /**
899 * @module paste-from-office/filters/removeinvalidtablewidth
900 */ /**
901 * Removes the `width:0px` style from table pasted from Google Sheets.
902 *
903 * @param documentFragment element `data.content` obtained from clipboard
904 */ function removeInvalidTableWidth(documentFragment, writer) {
905 for (const child of documentFragment.getChildren()){
906 if (child.is('element', 'table') && child.getStyle('width') === '0px') {
907 writer.removeStyle('width', child);
908 }
909 }
910}
911
912/**
913 * @license Copyright (c) 2003-2024, CKSource Holding sp. z o.o. All rights reserved.
914 * For licensing, see LICENSE.md or https://ckeditor.com/legal/ckeditor-oss-license
915 */ /**
916 * @module paste-from-office/filters/removestyleblock
917 */ /**
918 * Removes `<style>` block added by Google Sheets to a copied content.
919 *
920 * @param documentFragment element `data.content` obtained from clipboard
921 */ function removeStyleBlock(documentFragment, writer) {
922 for (const child of Array.from(documentFragment.getChildren())){
923 if (child.is('element', 'style')) {
924 writer.remove(child);
925 }
926 }
927}
928
929const googleSheetsMatch = /<google-sheets-html-origin/i;
930/**
931 * Normalizer for the content pasted from Google Sheets.
932 */ class GoogleSheetsNormalizer {
933 document;
934 /**
935 * Creates a new `GoogleSheetsNormalizer` instance.
936 *
937 * @param document View document.
938 */ constructor(document){
939 this.document = document;
940 }
941 /**
942 * @inheritDoc
943 */ isActive(htmlString) {
944 return googleSheetsMatch.test(htmlString);
945 }
946 /**
947 * @inheritDoc
948 */ execute(data) {
949 const writer = new UpcastWriter(this.document);
950 const { body: documentFragment } = data._parsedData;
951 removeGoogleSheetsTag(documentFragment, writer);
952 removeXmlns(documentFragment, writer);
953 removeInvalidTableWidth(documentFragment, writer);
954 removeStyleBlock(documentFragment, writer);
955 data.content = documentFragment;
956 }
957}
958
959/**
960 * @license Copyright (c) 2003-2024, CKSource Holding sp. z o.o. All rights reserved.
961 * For licensing, see LICENSE.md or https://ckeditor.com/legal/ckeditor-oss-license
962 */ /**
963 * @module paste-from-office/filters/space
964 */ /**
965 * Replaces last space preceding elements closing tag with `&nbsp;`. Such operation prevents spaces from being removed
966 * during further DOM/View processing (see especially {@link module:engine/view/domconverter~DomConverter#_processDomInlineNodes}).
967 * This method also takes into account Word specific `<o:p></o:p>` empty tags.
968 * Additionally multiline sequences of spaces and new lines between tags are removed (see #39 and #40).
969 *
970 * @param htmlString HTML string in which spacing should be normalized.
971 * @returns Input HTML with spaces normalized.
972 */ function normalizeSpacing(htmlString) {
973 // Run normalizeSafariSpaceSpans() two times to cover nested spans.
974 return normalizeSafariSpaceSpans(normalizeSafariSpaceSpans(htmlString))// Remove all \r\n from "spacerun spans" so the last replace line doesn't strip all whitespaces.
975 .replace(/(<span\s+style=['"]mso-spacerun:yes['"]>[^\S\r\n]*?)[\r\n]+([^\S\r\n]*<\/span>)/g, '$1$2').replace(/<span\s+style=['"]mso-spacerun:yes['"]><\/span>/g, '').replace(/(<span\s+style=['"]letter-spacing:[^'"]+?['"]>)[\r\n]+(<\/span>)/g, '$1 $2').replace(/ <\//g, '\u00A0</').replace(/ <o:p><\/o:p>/g, '\u00A0<o:p></o:p>')// Remove <o:p> block filler from empty paragraph. Safari uses \u00A0 instead of &nbsp;.
976 .replace(/<o:p>(&nbsp;|\u00A0)<\/o:p>/g, '')// Remove all whitespaces when they contain any \r or \n.
977 .replace(/>([^\S\r\n]*[\r\n]\s*)</g, '><');
978}
979/**
980 * Normalizes spacing in special Word `spacerun spans` (`<span style='mso-spacerun:yes'>\s+</span>`) by replacing
981 * all spaces with `&nbsp; ` pairs. This prevents spaces from being removed during further DOM/View processing
982 * (see especially {@link module:engine/view/domconverter~DomConverter#_processDomInlineNodes}).
983 *
984 * @param htmlDocument Native `Document` object in which spacing should be normalized.
985 */ function normalizeSpacerunSpans(htmlDocument) {
986 htmlDocument.querySelectorAll('span[style*=spacerun]').forEach((el)=>{
987 const htmlElement = el;
988 const innerTextLength = htmlElement.innerText.length || 0;
989 htmlElement.innerText = Array(innerTextLength + 1).join('\u00A0 ').substr(0, innerTextLength);
990 });
991}
992/**
993 * Normalizes specific spacing generated by Safari when content pasted from Word (`<span class="Apple-converted-space"> </span>`)
994 * by replacing all spaces sequences longer than 1 space with `&nbsp; ` pairs. This prevents spaces from being removed during
995 * further DOM/View processing (see especially {@link module:engine/view/domconverter~DomConverter#_processDataFromDomText}).
996 *
997 * This function is similar to {@link module:clipboard/utils/normalizeclipboarddata normalizeClipboardData util} but uses
998 * regular spaces / &nbsp; sequence for replacement.
999 *
1000 * @param htmlString HTML string in which spacing should be normalized
1001 * @returns Input HTML with spaces normalized.
1002 */ function normalizeSafariSpaceSpans(htmlString) {
1003 return htmlString.replace(/<span(?: class="Apple-converted-space"|)>(\s+)<\/span>/g, (fullMatch, spaces)=>{
1004 return spaces.length === 1 ? ' ' : Array(spaces.length + 1).join('\u00A0 ').substr(0, spaces.length);
1005 });
1006}
1007
1008/**
1009 * Parses the provided HTML extracting contents of `<body>` and `<style>` tags.
1010 *
1011 * @param htmlString HTML string to be parsed.
1012 */ function parseHtml(htmlString, stylesProcessor) {
1013 const domParser = new DOMParser();
1014 // Remove Word specific "if comments" so content inside is not omitted by the parser.
1015 htmlString = htmlString.replace(/<!--\[if gte vml 1]>/g, '');
1016 // Clean the <head> section of MS Windows specific tags. See https://github.com/ckeditor/ckeditor5/issues/15333.
1017 // The regular expression matches the <o:SmartTagType> tag with optional attributes (with or without values).
1018 htmlString = htmlString.replace(/<o:SmartTagType(?:\s+[^\s>=]+(?:="[^"]*")?)*\s*\/?>/gi, '');
1019 const normalizedHtml = normalizeSpacing(cleanContentAfterBody(htmlString));
1020 // Parse htmlString as native Document object.
1021 const htmlDocument = domParser.parseFromString(normalizedHtml, 'text/html');
1022 normalizeSpacerunSpans(htmlDocument);
1023 // Get `innerHTML` first as transforming to View modifies the source document.
1024 const bodyString = htmlDocument.body.innerHTML;
1025 // Transform document.body to View.
1026 const bodyView = documentToView(htmlDocument, stylesProcessor);
1027 // Extract stylesheets.
1028 const stylesObject = extractStyles(htmlDocument);
1029 return {
1030 body: bodyView,
1031 bodyString,
1032 styles: stylesObject.styles,
1033 stylesString: stylesObject.stylesString
1034 };
1035}
1036/**
1037 * Transforms native `Document` object into {@link module:engine/view/documentfragment~DocumentFragment}. Comments are skipped.
1038 *
1039 * @param htmlDocument Native `Document` object to be transformed.
1040 */ function documentToView(htmlDocument, stylesProcessor) {
1041 const viewDocument = new ViewDocument(stylesProcessor);
1042 const domConverter = new DomConverter(viewDocument, {
1043 renderingMode: 'data'
1044 });
1045 const fragment = htmlDocument.createDocumentFragment();
1046 const nodes = htmlDocument.body.childNodes;
1047 while(nodes.length > 0){
1048 fragment.appendChild(nodes[0]);
1049 }
1050 return domConverter.domToView(fragment, {
1051 skipComments: true
1052 });
1053}
1054/**
1055 * Extracts both `CSSStyleSheet` and string representation from all `style` elements available in a provided `htmlDocument`.
1056 *
1057 * @param htmlDocument Native `Document` object from which styles will be extracted.
1058 */ function extractStyles(htmlDocument) {
1059 const styles = [];
1060 const stylesString = [];
1061 const styleTags = Array.from(htmlDocument.getElementsByTagName('style'));
1062 for (const style of styleTags){
1063 if (style.sheet && style.sheet.cssRules && style.sheet.cssRules.length) {
1064 styles.push(style.sheet);
1065 stylesString.push(style.innerHTML);
1066 }
1067 }
1068 return {
1069 styles,
1070 stylesString: stylesString.join(' ')
1071 };
1072}
1073/**
1074 * Removes leftover content from between closing </body> and closing </html> tag:
1075 *
1076 * ```html
1077 * <html><body><p>Foo Bar</p></body><span>Fo</span></html> -> <html><body><p>Foo Bar</p></body></html>
1078 * ```
1079 *
1080 * This function is used as specific browsers (Edge) add some random content after `body` tag when pasting from Word.
1081 * @param htmlString The HTML string to be cleaned.
1082 * @returns The HTML string with leftover content removed.
1083 */ function cleanContentAfterBody(htmlString) {
1084 const bodyCloseTag = '</body>';
1085 const htmlCloseTag = '</html>';
1086 const bodyCloseIndex = htmlString.indexOf(bodyCloseTag);
1087 if (bodyCloseIndex < 0) {
1088 return htmlString;
1089 }
1090 const htmlCloseIndex = htmlString.indexOf(htmlCloseTag, bodyCloseIndex + bodyCloseTag.length);
1091 return htmlString.substring(0, bodyCloseIndex + bodyCloseTag.length) + (htmlCloseIndex >= 0 ? htmlString.substring(htmlCloseIndex) : '');
1092}
1093
1094/**
1095 * The Paste from Office plugin.
1096 *
1097 * This plugin handles content pasted from Office apps and transforms it (if necessary)
1098 * to a valid structure which can then be understood by the editor features.
1099 *
1100 * Transformation is made by a set of predefined {@link module:paste-from-office/normalizer~Normalizer normalizers}.
1101 * This plugin includes following normalizers:
1102 * * {@link module:paste-from-office/normalizers/mswordnormalizer~MSWordNormalizer Microsoft Word normalizer}
1103 * * {@link module:paste-from-office/normalizers/googledocsnormalizer~GoogleDocsNormalizer Google Docs normalizer}
1104 *
1105 * For more information about this feature check the {@glink api/paste-from-office package page}.
1106 */ class PasteFromOffice extends Plugin {
1107 /**
1108 * @inheritDoc
1109 */ static get pluginName() {
1110 return 'PasteFromOffice';
1111 }
1112 /**
1113 * @inheritDoc
1114 */ static get isOfficialPlugin() {
1115 return true;
1116 }
1117 /**
1118 * @inheritDoc
1119 */ static get requires() {
1120 return [
1121 ClipboardPipeline
1122 ];
1123 }
1124 /**
1125 * @inheritDoc
1126 */ init() {
1127 const editor = this.editor;
1128 const clipboardPipeline = editor.plugins.get('ClipboardPipeline');
1129 const viewDocument = editor.editing.view.document;
1130 const normalizers = [];
1131 const hasMultiLevelListPlugin = this.editor.plugins.has('MultiLevelList');
1132 normalizers.push(new MSWordNormalizer(viewDocument, hasMultiLevelListPlugin));
1133 normalizers.push(new GoogleDocsNormalizer(viewDocument));
1134 normalizers.push(new GoogleSheetsNormalizer(viewDocument));
1135 clipboardPipeline.on('inputTransformation', (evt, data)=>{
1136 if (data._isTransformedWithPasteFromOffice) {
1137 return;
1138 }
1139 const codeBlock = editor.model.document.selection.getFirstPosition().parent;
1140 if (codeBlock.is('element', 'codeBlock')) {
1141 return;
1142 }
1143 const htmlString = data.dataTransfer.getData('text/html');
1144 const activeNormalizer = normalizers.find((normalizer)=>normalizer.isActive(htmlString));
1145 if (activeNormalizer) {
1146 if (!data._parsedData) {
1147 data._parsedData = parseHtml(htmlString, viewDocument.stylesProcessor);
1148 }
1149 activeNormalizer.execute(data);
1150 data._isTransformedWithPasteFromOffice = true;
1151 }
1152 }, {
1153 priority: 'high'
1154 });
1155 }
1156}
1157
1158export { MSWordNormalizer, PasteFromOffice, parseHtml };
1159//# sourceMappingURL=index.js.map