UNPKG

8.05 kBJavaScriptView Raw
1/**
2 * @license Copyright (c) 2003-2022, CKSource Holding sp. z o.o. All rights reserved.
3 * For licensing, see LICENSE.md or https://ckeditor.com/legal/ckeditor-oss-license
4 */
5
6/**
7 * @module paste-from-office/filters/image
8 */
9
10/* globals btoa */
11
12import { Matcher, UpcastWriter } from 'ckeditor5/src/engine';
13
14/**
15 * Replaces source attribute of all `<img>` elements representing regular
16 * images (not the Word shapes) with inlined base64 image representation extracted from RTF or Blob data.
17 *
18 * @param {module:engine/view/documentfragment~DocumentFragment} documentFragment Document fragment on which transform images.
19 * @param {String} rtfData The RTF data from which images representation will be used.
20 */
21export function replaceImagesSourceWithBase64( documentFragment, rtfData ) {
22 if ( !documentFragment.childCount ) {
23 return;
24 }
25
26 const upcastWriter = new UpcastWriter();
27 const shapesIds = findAllShapesIds( documentFragment, upcastWriter );
28
29 removeAllImgElementsRepresentingShapes( shapesIds, documentFragment, upcastWriter );
30 removeAllShapeElements( documentFragment, upcastWriter );
31
32 const images = findAllImageElementsWithLocalSource( documentFragment, upcastWriter );
33
34 if ( images.length ) {
35 replaceImagesFileSourceWithInlineRepresentation( images, extractImageDataFromRtf( rtfData ), upcastWriter );
36 }
37}
38
39/**
40 * Converts given HEX string to base64 representation.
41 *
42 * @protected
43 * @param {String} hexString The HEX string to be converted.
44 * @returns {String} Base64 representation of a given HEX string.
45 */
46export function _convertHexToBase64( hexString ) {
47 return btoa( hexString.match( /\w{2}/g ).map( char => {
48 return String.fromCharCode( parseInt( char, 16 ) );
49 } ).join( '' ) );
50}
51
52// Finds all shapes (`<v:*>...</v:*>`) ids. Shapes can represent images (canvas)
53// or Word shapes (which does not have RTF or Blob representation).
54//
55// @param {module:engine/view/documentfragment~DocumentFragment} documentFragment Document fragment
56// from which to extract shape ids.
57// @param {module:engine/view/upcastwriter~UpcastWriter} writer
58// @returns {Array.<String>} Array of shape ids.
59function findAllShapesIds( documentFragment, writer ) {
60 const range = writer.createRangeIn( documentFragment );
61
62 const shapeElementsMatcher = new Matcher( {
63 name: /v:(.+)/
64 } );
65
66 const shapesIds = [];
67
68 for ( const value of range ) {
69 if ( value.type != 'elementStart' ) {
70 continue;
71 }
72
73 const el = value.item;
74 const prevSiblingName = el.previousSibling && el.previousSibling.name || null;
75
76 // If shape element have 'o:gfxdata' attribute and is not directly before `<v:shapetype>` element it means it represent Word shape.
77 if ( shapeElementsMatcher.match( el ) && el.getAttribute( 'o:gfxdata' ) && prevSiblingName !== 'v:shapetype' ) {
78 shapesIds.push( value.item.getAttribute( 'id' ) );
79 }
80 }
81
82 return shapesIds;
83}
84
85// Removes all `<img>` elements which represents Word shapes and not regular images.
86//
87// @param {Array.<String>} shapesIds Shape ids which will be checked against `<img>` elements.
88// @param {module:engine/view/documentfragment~DocumentFragment} documentFragment Document fragment from which to remove `<img>` elements.
89// @param {module:engine/view/upcastwriter~UpcastWriter} writer
90function removeAllImgElementsRepresentingShapes( shapesIds, documentFragment, writer ) {
91 const range = writer.createRangeIn( documentFragment );
92
93 const imageElementsMatcher = new Matcher( {
94 name: 'img'
95 } );
96
97 const imgs = [];
98
99 for ( const value of range ) {
100 if ( imageElementsMatcher.match( value.item ) ) {
101 const el = value.item;
102 const shapes = el.getAttribute( 'v:shapes' ) ? el.getAttribute( 'v:shapes' ).split( ' ' ) : [];
103
104 if ( shapes.length && shapes.every( shape => shapesIds.indexOf( shape ) > -1 ) ) {
105 imgs.push( el );
106 // Shapes may also have empty source while content is paste in some browsers (Safari).
107 } else if ( !el.getAttribute( 'src' ) ) {
108 imgs.push( el );
109 }
110 }
111 }
112
113 for ( const img of imgs ) {
114 writer.remove( img );
115 }
116}
117
118// Removes all shape elements (`<v:*>...</v:*>`) so they do not pollute the output structure.
119//
120// @param {module:engine/view/documentfragment~DocumentFragment} documentFragment Document fragment from which to remove shape elements.
121// @param {module:engine/view/upcastwriter~UpcastWriter} writer
122function removeAllShapeElements( documentFragment, writer ) {
123 const range = writer.createRangeIn( documentFragment );
124
125 const shapeElementsMatcher = new Matcher( {
126 name: /v:(.+)/
127 } );
128
129 const shapes = [];
130
131 for ( const value of range ) {
132 if ( value.type == 'elementStart' && shapeElementsMatcher.match( value.item ) ) {
133 shapes.push( value.item );
134 }
135 }
136
137 for ( const shape of shapes ) {
138 writer.remove( shape );
139 }
140}
141
142// Finds all `<img>` elements in a given document fragment which have source pointing to local `file://` resource.
143//
144// @param {module:engine/view/documentfragment~DocumentFragment} documentFragment Document fragment in which to look for `<img>` elements.
145// @param {module:engine/view/upcastwriter~UpcastWriter} writer
146// @returns {Object} result All found images grouped by source type.
147// @returns {Array.<module:engine/view/element~Element>} result.file Array of found `<img>` elements with `file://` source.
148// @returns {Array.<module:engine/view/element~Element>} result.blob Array of found `<img>` elements with `blob:` source.
149function findAllImageElementsWithLocalSource( documentFragment, writer ) {
150 const range = writer.createRangeIn( documentFragment );
151
152 const imageElementsMatcher = new Matcher( {
153 name: 'img'
154 } );
155
156 const imgs = [];
157
158 for ( const value of range ) {
159 if ( imageElementsMatcher.match( value.item ) ) {
160 if ( value.item.getAttribute( 'src' ).startsWith( 'file://' ) ) {
161 imgs.push( value.item );
162 }
163 }
164 }
165
166 return imgs;
167}
168
169// Extracts all images HEX representations from a given RTF data.
170//
171// @param {String} rtfData The RTF data from which to extract images HEX representation.
172// @returns {Array.<Object>} Array of found HEX representations. Each array item is an object containing:
173//
174// * {String} hex Image representation in HEX format.
175// * {string} type Type of image, `image/png` or `image/jpeg`.
176function extractImageDataFromRtf( rtfData ) {
177 if ( !rtfData ) {
178 return [];
179 }
180
181 const regexPictureHeader = /{\\pict[\s\S]+?\\bliptag-?\d+(\\blipupi-?\d+)?({\\\*\\blipuid\s?[\da-fA-F]+)?[\s}]*?/;
182 const regexPicture = new RegExp( '(?:(' + regexPictureHeader.source + '))([\\da-fA-F\\s]+)\\}', 'g' );
183 const images = rtfData.match( regexPicture );
184 const result = [];
185
186 if ( images ) {
187 for ( const image of images ) {
188 let imageType = false;
189
190 if ( image.includes( '\\pngblip' ) ) {
191 imageType = 'image/png';
192 } else if ( image.includes( '\\jpegblip' ) ) {
193 imageType = 'image/jpeg';
194 }
195
196 if ( imageType ) {
197 result.push( {
198 hex: image.replace( regexPictureHeader, '' ).replace( /[^\da-fA-F]/g, '' ),
199 type: imageType
200 } );
201 }
202 }
203 }
204
205 return result;
206}
207
208// Replaces `src` attribute value of all given images with the corresponding base64 image representation.
209//
210// @param {Array.<module:engine/view/element~Element>} imageElements Array of image elements which will have its source replaced.
211// @param {Array.<Object>} imagesHexSources Array of images hex sources (usually the result of `extractImageDataFromRtf()` function).
212// The array should be the same length as `imageElements` parameter.
213// @param {module:engine/view/upcastwriter~UpcastWriter} writer
214function replaceImagesFileSourceWithInlineRepresentation( imageElements, imagesHexSources, writer ) {
215 // Assume there is an equal amount of image elements and images HEX sources so they can be matched accordingly based on existing order.
216 if ( imageElements.length === imagesHexSources.length ) {
217 for ( let i = 0; i < imageElements.length; i++ ) {
218 const newSrc = `data:${ imagesHexSources[ i ].type };base64,${ _convertHexToBase64( imagesHexSources[ i ].hex ) }`;
219 writer.setAttribute( 'src', newSrc, imageElements[ i ] );
220 }
221 }
222}