UNPKG

10 kBJavaScriptView Raw
1/**
2 * @license Copyright (c) 2003-2024, CKSource Holding sp. z o.o. All rights reserved.
3 * For licensing, see LICENSE.md or https://ckeditor.com/legal/ckeditor-oss-license
4 */
5/**
6 * @module paste-from-office/filters/image
7 */
8/* globals btoa */
9import { Matcher, UpcastWriter } from 'ckeditor5/src/engine.js';
10/**
11 * Replaces source attribute of all `<img>` elements representing regular
12 * images (not the Word shapes) with inlined base64 image representation extracted from RTF or Blob data.
13 *
14 * @param documentFragment Document fragment on which transform images.
15 * @param rtfData The RTF data from which images representation will be used.
16 */
17export function replaceImagesSourceWithBase64(documentFragment, rtfData) {
18 if (!documentFragment.childCount) {
19 return;
20 }
21 const upcastWriter = new UpcastWriter(documentFragment.document);
22 const shapesIds = findAllShapesIds(documentFragment, upcastWriter);
23 removeAllImgElementsRepresentingShapes(shapesIds, documentFragment, upcastWriter);
24 insertMissingImgs(shapesIds, documentFragment, upcastWriter);
25 removeAllShapeElements(documentFragment, upcastWriter);
26 const images = findAllImageElementsWithLocalSource(documentFragment, upcastWriter);
27 if (images.length) {
28 replaceImagesFileSourceWithInlineRepresentation(images, extractImageDataFromRtf(rtfData), upcastWriter);
29 }
30}
31/**
32 * Converts given HEX string to base64 representation.
33 *
34 * @internal
35 * @param hexString The HEX string to be converted.
36 * @returns Base64 representation of a given HEX string.
37 */
38export function _convertHexToBase64(hexString) {
39 return btoa(hexString.match(/\w{2}/g).map(char => {
40 return String.fromCharCode(parseInt(char, 16));
41 }).join(''));
42}
43/**
44 * Finds all shapes (`<v:*>...</v:*>`) ids. Shapes can represent images (canvas)
45 * or Word shapes (which does not have RTF or Blob representation).
46 *
47 * @param documentFragment Document fragment from which to extract shape ids.
48 * @returns Array of shape ids.
49 */
50function findAllShapesIds(documentFragment, writer) {
51 const range = writer.createRangeIn(documentFragment);
52 const shapeElementsMatcher = new Matcher({
53 name: /v:(.+)/
54 });
55 const shapesIds = [];
56 for (const value of range) {
57 if (value.type != 'elementStart') {
58 continue;
59 }
60 const el = value.item;
61 const previousSibling = el.previousSibling;
62 const prevSiblingName = previousSibling && previousSibling.is('element') ? previousSibling.name : null;
63 // List of ids which should not be considered as shapes.
64 // https://github.com/ckeditor/ckeditor5/pull/15847#issuecomment-1941543983
65 const exceptionIds = ['Chart'];
66 const isElementAShape = shapeElementsMatcher.match(el);
67 const hasElementGfxdataAttribute = el.getAttribute('o:gfxdata');
68 const isPreviousSiblingAShapeType = prevSiblingName === 'v:shapetype';
69 const isElementIdInExceptionsArray = hasElementGfxdataAttribute &&
70 exceptionIds.some(item => el.getAttribute('id').includes(item));
71 // If shape element has 'o:gfxdata' attribute and is not directly before
72 // `<v:shapetype>` element it means that it represents a Word shape.
73 if (isElementAShape &&
74 hasElementGfxdataAttribute &&
75 !isPreviousSiblingAShapeType &&
76 !isElementIdInExceptionsArray) {
77 shapesIds.push(value.item.getAttribute('id'));
78 }
79 }
80 return shapesIds;
81}
82/**
83 * Removes all `<img>` elements which represents Word shapes and not regular images.
84 *
85 * @param shapesIds Shape ids which will be checked against `<img>` elements.
86 * @param documentFragment Document fragment from which to remove `<img>` elements.
87 */
88function removeAllImgElementsRepresentingShapes(shapesIds, documentFragment, writer) {
89 const range = writer.createRangeIn(documentFragment);
90 const imageElementsMatcher = new Matcher({
91 name: 'img'
92 });
93 const imgs = [];
94 for (const value of range) {
95 if (value.item.is('element') && imageElementsMatcher.match(value.item)) {
96 const el = value.item;
97 const shapes = el.getAttribute('v:shapes') ? el.getAttribute('v:shapes').split(' ') : [];
98 if (shapes.length && shapes.every(shape => shapesIds.indexOf(shape) > -1)) {
99 imgs.push(el);
100 // Shapes may also have empty source while content is paste in some browsers (Safari).
101 }
102 else if (!el.getAttribute('src')) {
103 imgs.push(el);
104 }
105 }
106 }
107 for (const img of imgs) {
108 writer.remove(img);
109 }
110}
111/**
112 * Removes all shape elements (`<v:*>...</v:*>`) so they do not pollute the output structure.
113 *
114 * @param documentFragment Document fragment from which to remove shape elements.
115 */
116function removeAllShapeElements(documentFragment, writer) {
117 const range = writer.createRangeIn(documentFragment);
118 const shapeElementsMatcher = new Matcher({
119 name: /v:(.+)/
120 });
121 const shapes = [];
122 for (const value of range) {
123 if (value.type == 'elementStart' && shapeElementsMatcher.match(value.item)) {
124 shapes.push(value.item);
125 }
126 }
127 for (const shape of shapes) {
128 writer.remove(shape);
129 }
130}
131/**
132 * Inserts `img` tags if there is none after a shape.
133 */
134function insertMissingImgs(shapeIds, documentFragment, writer) {
135 const range = writer.createRangeIn(documentFragment);
136 const shapes = [];
137 for (const value of range) {
138 if (value.type == 'elementStart' && value.item.is('element', 'v:shape')) {
139 const id = value.item.getAttribute('id');
140 if (shapeIds.includes(id)) {
141 continue;
142 }
143 if (!containsMatchingImg(value.item.parent.getChildren(), id)) {
144 shapes.push(value.item);
145 }
146 }
147 }
148 for (const shape of shapes) {
149 const attrs = {
150 src: findSrc(shape)
151 };
152 if (shape.hasAttribute('alt')) {
153 attrs.alt = shape.getAttribute('alt');
154 }
155 const img = writer.createElement('img', attrs);
156 writer.insertChild(shape.index + 1, img, shape.parent);
157 }
158 function containsMatchingImg(nodes, id) {
159 for (const node of nodes) {
160 /* istanbul ignore else -- @preserve */
161 if (node.is('element')) {
162 if (node.name == 'img' && node.getAttribute('v:shapes') == id) {
163 return true;
164 }
165 if (containsMatchingImg(node.getChildren(), id)) {
166 return true;
167 }
168 }
169 }
170 return false;
171 }
172 function findSrc(shape) {
173 for (const child of shape.getChildren()) {
174 /* istanbul ignore else -- @preserve */
175 if (child.is('element') && child.getAttribute('src')) {
176 return child.getAttribute('src');
177 }
178 }
179 }
180}
181/**
182 * Finds all `<img>` elements in a given document fragment which have source pointing to local `file://` resource.
183 *
184 * @param documentFragment Document fragment in which to look for `<img>` elements.
185 * @returns result All found images grouped by source type.
186 */
187function findAllImageElementsWithLocalSource(documentFragment, writer) {
188 const range = writer.createRangeIn(documentFragment);
189 const imageElementsMatcher = new Matcher({
190 name: 'img'
191 });
192 const imgs = [];
193 for (const value of range) {
194 if (value.item.is('element') && imageElementsMatcher.match(value.item)) {
195 if (value.item.getAttribute('src').startsWith('file://')) {
196 imgs.push(value.item);
197 }
198 }
199 }
200 return imgs;
201}
202/**
203 * Extracts all images HEX representations from a given RTF data.
204 *
205 * @param rtfData The RTF data from which to extract images HEX representation.
206 * @returns Array of found HEX representations. Each array item is an object containing:
207 *
208 * * hex Image representation in HEX format.
209 * * type Type of image, `image/png` or `image/jpeg`.
210 */
211function extractImageDataFromRtf(rtfData) {
212 if (!rtfData) {
213 return [];
214 }
215 const regexPictureHeader = /{\\pict[\s\S]+?\\bliptag-?\d+(\\blipupi-?\d+)?({\\\*\\blipuid\s?[\da-fA-F]+)?[\s}]*?/;
216 const regexPicture = new RegExp('(?:(' + regexPictureHeader.source + '))([\\da-fA-F\\s]+)\\}', 'g');
217 const images = rtfData.match(regexPicture);
218 const result = [];
219 if (images) {
220 for (const image of images) {
221 let imageType = false;
222 if (image.includes('\\pngblip')) {
223 imageType = 'image/png';
224 }
225 else if (image.includes('\\jpegblip')) {
226 imageType = 'image/jpeg';
227 }
228 if (imageType) {
229 result.push({
230 hex: image.replace(regexPictureHeader, '').replace(/[^\da-fA-F]/g, ''),
231 type: imageType
232 });
233 }
234 }
235 }
236 return result;
237}
238/**
239 * Replaces `src` attribute value of all given images with the corresponding base64 image representation.
240 *
241 * @param imageElements Array of image elements which will have its source replaced.
242 * @param imagesHexSources Array of images hex sources (usually the result of `extractImageDataFromRtf()` function).
243 * The array should be the same length as `imageElements` parameter.
244 */
245function replaceImagesFileSourceWithInlineRepresentation(imageElements, imagesHexSources, writer) {
246 // Assume there is an equal amount of image elements and images HEX sources so they can be matched accordingly based on existing order.
247 if (imageElements.length === imagesHexSources.length) {
248 for (let i = 0; i < imageElements.length; i++) {
249 const newSrc = `data:${imagesHexSources[i].type};base64,${_convertHexToBase64(imagesHexSources[i].hex)}`;
250 writer.setAttribute('src', newSrc, imageElements[i]);
251 }
252 }
253}