UNPKG

8.61 kBJavaScriptView Raw
1import {
2 open as zarrOpen,
3 root as zarrRoot,
4 get as zarrGet,
5 slice,
6 FetchStore,
7} from 'zarrita';
8
9function multivecChunksToTileDenseArray(chunks, tileShape, isRow) {
10 // Allocate a Float32Array for the tile (with length tile_size).
11 const fullTileLength = (isRow ? tileShape[1] : tileShape[0] * tileShape[1]);
12 const fullTileArray = new Float32Array(fullTileLength);
13
14 // Fill in the data for each sample and chunk.
15 let offset = 0;
16 if(isRow) {
17 // Single row, no need to iterate over samples.
18 for (const chunk of chunks) {
19 const chunkData = chunk.data;
20 fullTileArray.set(chunkData, offset);
21 offset += chunkData.length;
22 }
23 } else {
24 // Multi-row, need to iterate over samples.
25 const numSamples = tileShape[0];
26 for (let sampleI = 0; sampleI < numSamples; sampleI++) {
27 for (const chunk of chunks) {
28 // Zarrita returns strided arrays.
29 const chunkData = chunk.data.subarray(sampleI * chunk.stride[0], (sampleI+1) * chunk.stride[0]);
30 fullTileArray.set(chunkData, offset);
31 offset += chunkData.length;
32 }
33 }
34 }
35
36 return fullTileArray;
37}
38
39const ZarrMultivecDataFetcher = function ZarrMultivecDataFetcher(HGC, ...args) {
40
41 if (!new.target) {
42 throw new Error(
43 'Uncaught TypeError: Class constructor cannot be invoked without "new"',
44 );
45 }
46
47 const { slugid } = HGC.libraries;
48 const {
49 absToChr,
50 parseChromsizesRows,
51 genomicRangeToChromosomeChunks,
52 DenseDataExtrema1D,
53 minNonZero,
54 maxNonZero,
55 } = HGC.utils;
56
57 class ZarrMultivecDataFetcherClass {
58 constructor(dataConfig) {
59 this.dataConfig = dataConfig;
60 this.trackUid = slugid.nice();
61
62 if (dataConfig.url) {
63 // console.assert(dataConfig.url.endsWith('.zarr'));
64 // S3 bucket must have a CORS policy to allow reading from any origin.
65 const { url, options = {} } = dataConfig;
66 this.store = new FetchStore(url, options);
67 this.storeRoot = Promise.resolve(zarrRoot(this.store));
68 }
69
70 if(dataConfig.row !== undefined) {
71 this.row = dataConfig.row;
72 }
73 }
74
75 tilesetInfo(callback) {
76 this.tilesetInfoLoading = true;
77
78 // Use the tileset_info stored as JSON in file.zarr/.zattrs
79 return this.storeRoot
80 .then(root => zarrOpen(root))
81 .then(grp => {
82 const attrs = grp.attrs;
83 this.tilesetInfoLoading = false;
84
85 const chromSizes = attrs.multiscales.map(d => ([d.name, d.metadata.chromsize]));
86
87 const finalChrom = attrs.multiscales[attrs.multiscales.length - 1];
88 const maxPos = finalChrom.metadata.chromoffset + finalChrom.metadata.chromsize;
89 const tileSize = attrs.shape[1];
90 const retVal = {
91 ...attrs,
92 shape: [attrs.shape[1], attrs.shape[0]],
93 chromSizes,
94 tile_size: tileSize,
95 max_width: maxPos,
96 min_pos: [0],
97 max_pos: [maxPos],
98 max_zoom: Math.ceil(Math.log(maxPos / tileSize) / Math.log(2)),
99 };
100
101 if (callback) {
102 callback(retVal);
103 }
104
105 return retVal;
106 })
107 .catch(err => {
108 this.tilesetInfoLoading = false;
109
110 if (callback) {
111 callback({
112 error: `Error parsing zarr multivec: ${err}`,
113 });
114 }
115 });
116 }
117
118 fetchTilesDebounced(receivedTiles, tileIds) {
119 const tiles = {};
120
121 const validTileIds = [];
122 const tilePromises = [];
123
124 for (const tileId of tileIds) {
125 const parts = tileId.split('.');
126 const z = parseInt(parts[0], 10);
127 const x = parseInt(parts[1], 10);
128
129 if (Number.isNaN(x) || Number.isNaN(z)) {
130 console.warn('Invalid tile zoom or position:', z, x);
131 continue;
132 }
133
134 validTileIds.push(tileId);
135 tilePromises.push(this.tile(z, x, tileId));
136 }
137
138 Promise.all(tilePromises).then(values => {
139 for (let i = 0; i < values.length; i++) {
140 const validTileId = validTileIds[i];
141 tiles[validTileId] = values[i];
142 tiles[validTileId].tilePositionId = validTileId;
143 }
144 receivedTiles(tiles);
145 });
146 return tiles;
147 }
148
149 tile(z, x, tileId) {
150 const { storeRoot } = this;
151 return this.tilesetInfo().then(tsInfo => {
152 // const multiscales = tsInfo.multiscales;
153
154 // Adapted from clodius.tiles.multivec.get_single_tile
155 // Reference: https://github.com/higlass/clodius/blob/develop/clodius/tiles/multivec.py#L66
156
157 // z is the index of the resolution that should be selected.
158 // Resolution is size of each bin (except for the last bin in each chromosome).
159 const resolution = +tsInfo.resolutions[z];
160 const tileSize = +tsInfo.tile_size;
161 const binSize = resolution;
162
163 // Where in the data does the tile start and end?
164 const tileStart = x * tileSize * resolution;
165 const tileEnd = tileStart + tileSize * resolution;
166
167 // chromSizes is an array of "tuples" [ ["chr1", 1000], ... ]
168 const chromSizes = tsInfo.chromSizes;
169
170 // Adapted from clodius.tiles.multivec.get_tile
171 // Reference: https://github.com/higlass/clodius/blob/develop/clodius/tiles/multivec.py#L110
172
173 const chromInfo = parseChromsizesRows(chromSizes);
174 const [chrStart, chrStartPos] = absToChr(tileStart, chromInfo);
175 const [chrEnd, chrEndPos] = absToChr(tileEnd, chromInfo);
176 const genomicStart = { chr: chrStart, pos: chrStartPos };
177 const genomicEnd = { chr: chrEnd, pos: chrEndPos };
178
179 // Using the [genomicStart, genomicEnd] range, get an array of "chromosome chunks",
180 // where each chunk range starts and ends with the same chromosome.
181 // Start a new chromosome chunk at each chromosome boundary.
182 const chrChunks = genomicRangeToChromosomeChunks(
183 chromSizes,
184 genomicStart,
185 genomicEnd,
186 binSize,
187 tileSize,
188 );
189
190 // Get the zarr data for each chromosome chunk,
191 // since data for each chromosome is stored in a separate zarr array.
192 return Promise.all(
193 chrChunks.map(([chrName, zStart, zEnd]) => {
194 return storeRoot
195 .then(root => zarrOpen(root.resolve(`/chromosomes/${chrName}/${resolution}/`), { kind: "array" }))
196 .then(arr => (this.row !== undefined
197 ? zarrGet(arr, [this.row, slice(zStart, zEnd)])
198 : zarrGet(arr, [null, slice(zStart, zEnd)])
199 ));
200 }),
201 ).then(chunks => {
202 const dense = multivecChunksToTileDenseArray(chunks, [tsInfo.shape[1], tsInfo.shape[0]], this.row !== undefined);
203 return Promise.resolve({
204 dense,
205 denseDataExtrema: new DenseDataExtrema1D(dense),
206 dtype: 'float32',
207 min_value: Math.min.apply(null, dense),
208 max_value: Math.max.apply(null, dense),
209 minNonZero: minNonZero(dense),
210 maxNonZero: maxNonZero(dense),
211 server: null,
212 size: 1,
213 shape: tsInfo.shape,
214 tileId,
215 tilePos: [x],
216 tilePositionId: tileId,
217 tilesetUid: null,
218 zoomLevel: z,
219 });
220 });
221 });
222 }
223 } // end class
224 return new ZarrMultivecDataFetcherClass(...args);
225} // end function wrapper
226
227
228ZarrMultivecDataFetcher.config = {
229 type: 'zarr-multivec',
230};
231
232export default ZarrMultivecDataFetcher;