1 | ;
|
2 | /*!
|
3 | * Copyright 2022 Google LLC. All Rights Reserved.
|
4 | *
|
5 | * Licensed under the Apache License, Version 2.0 (the "License");
|
6 | * you may not use this file except in compliance with the License.
|
7 | * You may obtain a copy of the License at
|
8 | *
|
9 | * http://www.apache.org/licenses/LICENSE-2.0
|
10 | *
|
11 | * Unless required by applicable law or agreed to in writing, software
|
12 | * distributed under the License is distributed on an "AS IS" BASIS,
|
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
14 | * See the License for the specific language governing permissions and
|
15 | * limitations under the License.
|
16 | */
|
17 | Object.defineProperty(exports, "__esModule", { value: true });
|
18 | exports.TransferManager = void 0;
|
19 | const pLimit = require("p-limit");
|
20 | const path = require("path");
|
21 | const extend = require("extend");
|
22 | const fs_1 = require("fs");
|
23 | const crc32c_1 = require("./crc32c");
|
24 | /**
|
25 | * Default number of concurrently executing promises to use when calling uploadManyFiles.
|
26 | * @experimental
|
27 | */
|
28 | const DEFAULT_PARALLEL_UPLOAD_LIMIT = 2;
|
29 | /**
|
30 | * Default number of concurrently executing promises to use when calling downloadManyFiles.
|
31 | * @experimental
|
32 | */
|
33 | const DEFAULT_PARALLEL_DOWNLOAD_LIMIT = 2;
|
34 | /**
|
35 | * Default number of concurrently executing promises to use when calling downloadFileInChunks.
|
36 | * @experimental
|
37 | */
|
38 | const DEFAULT_PARALLEL_CHUNKED_DOWNLOAD_LIMIT = 2;
|
39 | /**
|
40 | * The minimum size threshold in bytes at which to apply a chunked download strategy when calling downloadFileInChunks.
|
41 | * @experimental
|
42 | */
|
43 | const DOWNLOAD_IN_CHUNKS_FILE_SIZE_THRESHOLD = 32 * 1024 * 1024;
|
44 | /**
|
45 | * The chunk size in bytes to use when calling downloadFileInChunks.
|
46 | * @experimental
|
47 | */
|
48 | const DOWNLOAD_IN_CHUNKS_DEFAULT_CHUNK_SIZE = 10 * 1024 * 1024;
|
49 | const EMPTY_REGEX = '(?:)';
|
50 | /**
|
51 | * Create a TransferManager object to perform parallel transfer operations on a Cloud Storage bucket.
|
52 | *
|
53 | * @class
|
54 | * @hideconstructor
|
55 | *
|
56 | * @param {Bucket} bucket A {@link Bucket} instance
|
57 | * @experimental
|
58 | */
|
59 | class TransferManager {
|
60 | constructor(bucket) {
|
61 | this.bucket = bucket;
|
62 | }
|
63 | /**
|
64 | * @typedef {object} UploadManyFilesOptions
|
65 | * @property {number} [concurrencyLimit] The number of concurrently executing promises
|
66 | * to use when uploading the files.
|
67 | * @property {boolean} [skipIfExists] Do not upload the file if it already exists in
|
68 | * the bucket. This will set the precondition ifGenerationMatch = 0.
|
69 | * @property {string} [prefix] A prefix to append to all of the uploaded files.
|
70 | * @property {object} [passthroughOptions] {@link UploadOptions} Options to be passed through
|
71 | * to each individual upload operation.
|
72 | * @experimental
|
73 | */
|
74 | /**
|
75 | * Upload multiple files in parallel to the bucket. This is a convenience method
|
76 | * that utilizes {@link Bucket#upload} to perform the upload.
|
77 | *
|
78 | * @param {array | string} [filePathsOrDirectory] An array of fully qualified paths to the files or a directory name.
|
79 | * If a directory name is provided, the directory will be recursively walked and all files will be added to the upload list.
|
80 | * to be uploaded to the bucket
|
81 | * @param {UploadManyFilesOptions} [options] Configuration options.
|
82 | * @returns {Promise<UploadResponse[]>}
|
83 | *
|
84 | * @example
|
85 | * ```
|
86 | * const {Storage} = require('@google-cloud/storage');
|
87 | * const storage = new Storage();
|
88 | * const bucket = storage.bucket('my-bucket');
|
89 | * const transferManager = new TransferManager(bucket);
|
90 | *
|
91 | * //-
|
92 | * // Upload multiple files in parallel.
|
93 | * //-
|
94 | * const response = await transferManager.uploadManyFiles(['/local/path/file1.txt, 'local/path/file2.txt']);
|
95 | * // Your bucket now contains:
|
96 | * // - "local/path/file1.txt" (with the contents of '/local/path/file1.txt')
|
97 | * // - "local/path/file2.txt" (with the contents of '/local/path/file2.txt')
|
98 | * const response = await transferManager.uploadManyFiles('/local/directory');
|
99 | * // Your bucket will now contain all files contained in '/local/directory' maintaining the subdirectory structure.
|
100 | * ```
|
101 | * @experimental
|
102 | */
|
103 | async uploadManyFiles(filePathsOrDirectory, options = {}) {
|
104 | var _a;
|
105 | if (options.skipIfExists && ((_a = options.passthroughOptions) === null || _a === void 0 ? void 0 : _a.preconditionOpts)) {
|
106 | options.passthroughOptions.preconditionOpts.ifGenerationMatch = 0;
|
107 | }
|
108 | else if (options.skipIfExists &&
|
109 | options.passthroughOptions === undefined) {
|
110 | options.passthroughOptions = {
|
111 | preconditionOpts: {
|
112 | ifGenerationMatch: 0,
|
113 | },
|
114 | };
|
115 | }
|
116 | const limit = pLimit(options.concurrencyLimit || DEFAULT_PARALLEL_UPLOAD_LIMIT);
|
117 | const promises = [];
|
118 | let allPaths = [];
|
119 | if (!Array.isArray(filePathsOrDirectory)) {
|
120 | for await (const curPath of this.getPathsFromDirectory(filePathsOrDirectory)) {
|
121 | allPaths.push(curPath);
|
122 | }
|
123 | }
|
124 | else {
|
125 | allPaths = filePathsOrDirectory;
|
126 | }
|
127 | for (const filePath of allPaths) {
|
128 | const stat = await fs_1.promises.lstat(filePath);
|
129 | if (stat.isDirectory()) {
|
130 | continue;
|
131 | }
|
132 | const passThroughOptionsCopy = extend(true, {}, options.passthroughOptions);
|
133 | passThroughOptionsCopy.destination = filePath;
|
134 | if (options.prefix) {
|
135 | passThroughOptionsCopy.destination = path.join(options.prefix, passThroughOptionsCopy.destination);
|
136 | }
|
137 | promises.push(limit(() => this.bucket.upload(filePath, passThroughOptionsCopy)));
|
138 | }
|
139 | return Promise.all(promises);
|
140 | }
|
141 | /**
|
142 | * @typedef {object} DownloadManyFilesOptions
|
143 | * @property {number} [concurrencyLimit] The number of concurrently executing promises
|
144 | * to use when downloading the files.
|
145 | * @property {string} [prefix] A prefix to append to all of the downloaded files.
|
146 | * @property {string} [stripPrefix] A prefix to remove from all of the downloaded files.
|
147 | * @property {object} [passthroughOptions] {@link DownloadOptions} Options to be passed through
|
148 | * to each individual download operation.
|
149 | * @experimental
|
150 | */
|
151 | /**
|
152 | * Download multiple files in parallel to the local filesystem. This is a convenience method
|
153 | * that utilizes {@link File#download} to perform the download.
|
154 | *
|
155 | * @param {array | string} [filesOrFolder] An array of file name strings or file objects to be downloaded. If
|
156 | * a string is provided this will be treated as a GCS prefix and all files with that prefix will be downloaded.
|
157 | * @param {DownloadManyFilesOptions} [options] Configuration options.
|
158 | * @returns {Promise<DownloadResponse[]>}
|
159 | *
|
160 | * @example
|
161 | * ```
|
162 | * const {Storage} = require('@google-cloud/storage');
|
163 | * const storage = new Storage();
|
164 | * const bucket = storage.bucket('my-bucket');
|
165 | * const transferManager = new TransferManager(bucket);
|
166 | *
|
167 | * //-
|
168 | * // Download multiple files in parallel.
|
169 | * //-
|
170 | * const response = await transferManager.downloadManyFiles(['file1.txt', 'file2.txt']);
|
171 | * // The following files have been downloaded:
|
172 | * // - "file1.txt" (with the contents from my-bucket.file1.txt)
|
173 | * // - "file2.txt" (with the contents from my-bucket.file2.txt)
|
174 | * const response = await transferManager.downloadManyFiles([bucket.File('file1.txt'), bucket.File('file2.txt')]);
|
175 | * // The following files have been downloaded:
|
176 | * // - "file1.txt" (with the contents from my-bucket.file1.txt)
|
177 | * // - "file2.txt" (with the contents from my-bucket.file2.txt)
|
178 | * const response = await transferManager.downloadManyFiles('test-folder');
|
179 | * // All files with GCS prefix of 'test-folder' have been downloaded.
|
180 | * ```
|
181 | * @experimental
|
182 | */
|
183 | async downloadManyFiles(filesOrFolder, options = {}) {
|
184 | const limit = pLimit(options.concurrencyLimit || DEFAULT_PARALLEL_DOWNLOAD_LIMIT);
|
185 | const promises = [];
|
186 | let files = [];
|
187 | if (!Array.isArray(filesOrFolder)) {
|
188 | const directoryFiles = await this.bucket.getFiles({
|
189 | prefix: filesOrFolder,
|
190 | });
|
191 | files = directoryFiles[0];
|
192 | }
|
193 | else {
|
194 | files = filesOrFolder.map(curFile => {
|
195 | if (typeof curFile === 'string') {
|
196 | return this.bucket.file(curFile);
|
197 | }
|
198 | return curFile;
|
199 | });
|
200 | }
|
201 | const stripRegexString = options.stripPrefix
|
202 | ? `^${options.stripPrefix}`
|
203 | : EMPTY_REGEX;
|
204 | const regex = new RegExp(stripRegexString, 'g');
|
205 | for (const file of files) {
|
206 | const passThroughOptionsCopy = extend(true, {}, options.passthroughOptions);
|
207 | if (options.prefix) {
|
208 | passThroughOptionsCopy.destination = path.join(options.prefix || '', passThroughOptionsCopy.destination || '', file.name);
|
209 | }
|
210 | if (options.stripPrefix) {
|
211 | passThroughOptionsCopy.destination = file.name.replace(regex, '');
|
212 | }
|
213 | promises.push(limit(() => file.download(passThroughOptionsCopy)));
|
214 | }
|
215 | return Promise.all(promises);
|
216 | }
|
217 | /**
|
218 | * @typedef {object} DownloadFileInChunksOptions
|
219 | * @property {number} [concurrencyLimit] The number of concurrently executing promises
|
220 | * to use when downloading the file.
|
221 | * @property {number} [chunkSizeBytes] The size in bytes of each chunk to be downloaded.
|
222 | * @property {string | boolean} [validation] Whether or not to perform a CRC32C validation check when download is complete.
|
223 | * @experimental
|
224 | */
|
225 | /**
|
226 | * Download a large file in chunks utilizing parallel download operations. This is a convenience method
|
227 | * that utilizes {@link File#download} to perform the download.
|
228 | *
|
229 | * @param {object} [file | string] {@link File} to download.
|
230 | * @param {DownloadFileInChunksOptions} [options] Configuration options.
|
231 | * @returns {Promise<DownloadResponse>}
|
232 | *
|
233 | * @example
|
234 | * ```
|
235 | * const {Storage} = require('@google-cloud/storage');
|
236 | * const storage = new Storage();
|
237 | * const bucket = storage.bucket('my-bucket');
|
238 | * const transferManager = new TransferManager(bucket);
|
239 | *
|
240 | * //-
|
241 | * // Download a large file in chunks utilizing parallel operations.
|
242 | * //-
|
243 | * const response = await transferManager.downloadLargeFile(bucket.file('large-file.txt');
|
244 | * // Your local directory now contains:
|
245 | * // - "large-file.txt" (with the contents from my-bucket.large-file.txt)
|
246 | * ```
|
247 | * @experimental
|
248 | */
|
249 | async downloadFileInChunks(fileOrName, options = {}) {
|
250 | let chunkSize = options.chunkSizeBytes || DOWNLOAD_IN_CHUNKS_DEFAULT_CHUNK_SIZE;
|
251 | let limit = pLimit(options.concurrencyLimit || DEFAULT_PARALLEL_CHUNKED_DOWNLOAD_LIMIT);
|
252 | const promises = [];
|
253 | const file = typeof fileOrName === 'string'
|
254 | ? this.bucket.file(fileOrName)
|
255 | : fileOrName;
|
256 | const fileInfo = await file.get();
|
257 | const size = parseInt(fileInfo[0].metadata.size);
|
258 | // If the file size does not meet the threshold download it as a single chunk.
|
259 | if (size < DOWNLOAD_IN_CHUNKS_FILE_SIZE_THRESHOLD) {
|
260 | limit = pLimit(1);
|
261 | chunkSize = size;
|
262 | }
|
263 | let start = 0;
|
264 | const filePath = options.destination || path.basename(file.name);
|
265 | const fileToWrite = await fs_1.promises.open(filePath, 'w+');
|
266 | while (start < size) {
|
267 | const chunkStart = start;
|
268 | let chunkEnd = start + chunkSize - 1;
|
269 | chunkEnd = chunkEnd > size ? size : chunkEnd;
|
270 | promises.push(limit(() => file.download({ start: chunkStart, end: chunkEnd }).then(resp => {
|
271 | return fileToWrite.write(resp[0], 0, resp[0].length, chunkStart);
|
272 | })));
|
273 | start += chunkSize;
|
274 | }
|
275 | return new Promise((resolve, reject) => {
|
276 | let results;
|
277 | Promise.all(promises)
|
278 | .then(data => {
|
279 | results = data.map(result => result.buffer);
|
280 | if (options.validation === 'crc32c') {
|
281 | return crc32c_1.CRC32C.fromFile(filePath);
|
282 | }
|
283 | return;
|
284 | })
|
285 | .then(() => {
|
286 | resolve(results);
|
287 | })
|
288 | .catch(e => {
|
289 | reject(e);
|
290 | })
|
291 | .finally(() => {
|
292 | fileToWrite.close();
|
293 | });
|
294 | });
|
295 | }
|
296 | async *getPathsFromDirectory(directory) {
|
297 | const filesAndSubdirectories = await fs_1.promises.readdir(directory, {
|
298 | withFileTypes: true,
|
299 | });
|
300 | for (const curFileOrDirectory of filesAndSubdirectories) {
|
301 | const fullPath = path.join(directory, curFileOrDirectory.name);
|
302 | curFileOrDirectory.isDirectory()
|
303 | ? yield* this.getPathsFromDirectory(fullPath)
|
304 | : yield fullPath;
|
305 | }
|
306 | }
|
307 | }
|
308 | exports.TransferManager = TransferManager;
|
309 | //# sourceMappingURL=transfer-manager.js.map |
\ | No newline at end of file |