UNPKG

8.86 kBJavaScriptView Raw
1// Implemented by Zoe Xi @zoexi for GSOC 2016
2// https://github.com/cytoscape/cytoscape.js-hierarchical
3
4// Implemented from the reference library: https://harthur.github.io/clusterfck/
5
6import * as util from '../../util';
7import clusteringDistance from './clustering-distances';
8
9const defaults = util.defaults({
10 distance: 'euclidean', // distance metric to compare nodes
11 linkage: 'min', // linkage criterion : how to determine the distance between clusters of nodes
12 mode: 'threshold',
13 // mode:'threshold' => clusters must be threshold distance apart
14 threshold: Infinity, // the distance threshold
15 // mode:'dendrogram' => the nodes are organised as leaves in a tree (siblings are close), merging makes clusters
16 addDendrogram: false, // whether to add the dendrogram to the graph for viz
17 dendrogramDepth: 0, // depth at which dendrogram branches are merged into the returned clusters
18 attributes: [] // array of attr functions
19});
20
21const linkageAliases = {
22 'single': 'min',
23 'complete': 'max'
24};
25
26let setOptions = ( options ) => {
27 let opts = defaults( options );
28
29 let preferredAlias = linkageAliases[ opts.linkage ];
30
31 if( preferredAlias != null ){
32 opts.linkage = preferredAlias;
33 }
34
35 return opts;
36};
37
38let mergeClosest = function( clusters, index, dists, mins, opts ) {
39 // Find two closest clusters from cached mins
40 let minKey = 0;
41 let min = Infinity;
42 let dist;
43 let attrs = opts.attributes;
44
45 let getDist = (n1, n2) => clusteringDistance( opts.distance, attrs.length, i => attrs[i](n1), i => attrs[i](n2), n1, n2 );
46
47 for ( let i = 0; i < clusters.length; i++ ) {
48 let key = clusters[i].key;
49 let dist = dists[key][mins[key]];
50 if ( dist < min ) {
51 minKey = key;
52 min = dist;
53 }
54 }
55 if ( (opts.mode === 'threshold' && min >= opts.threshold) ||
56 (opts.mode === 'dendrogram' && clusters.length === 1) ) {
57 return false;
58 }
59
60 let c1 = index[minKey];
61 let c2 = index[mins[minKey]];
62 let merged;
63
64 // Merge two closest clusters
65 if ( opts.mode === 'dendrogram' ) {
66 merged = {
67 left: c1,
68 right: c2,
69 key: c1.key
70 };
71 }
72 else {
73 merged = {
74 value: c1.value.concat(c2.value),
75 key: c1.key
76 };
77 }
78
79 clusters[c1.index] = merged;
80 clusters.splice(c2.index, 1);
81
82 index[c1.key] = merged;
83
84 // Update distances with new merged cluster
85 for ( let i = 0; i < clusters.length; i++ ) {
86 let cur = clusters[i];
87
88 if ( c1.key === cur.key ) {
89 dist = Infinity;
90 }
91 else if ( opts.linkage === 'min' ) {
92 dist = dists[c1.key][cur.key];
93 if ( dists[c1.key][cur.key] > dists[c2.key][cur.key] ) {
94 dist = dists[c2.key][cur.key];
95 }
96 }
97 else if ( opts.linkage === 'max' ) {
98 dist = dists[c1.key][cur.key];
99 if ( dists[c1.key][cur.key] < dists[c2.key][cur.key] ) {
100 dist = dists[c2.key][cur.key];
101 }
102 }
103 else if ( opts.linkage === 'mean' ) {
104 dist = (dists[c1.key][cur.key] * c1.size + dists[c2.key][cur.key] * c2.size) / (c1.size + c2.size);
105 }
106 else {
107 if ( opts.mode === 'dendrogram' )
108 dist = getDist( cur.value, c1.value );
109 else
110 dist = getDist( cur.value[0], c1.value[0] );
111 }
112
113 dists[c1.key][cur.key] = dists[cur.key][c1.key] = dist; // distance matrix is symmetric
114 }
115
116 // Update cached mins
117 for ( let i = 0; i < clusters.length; i++ ) {
118 let key1 = clusters[i].key;
119 if ( mins[key1] === c1.key || mins[key1] === c2.key ) {
120 let min = key1;
121 for ( let j = 0; j < clusters.length; j++ ) {
122 let key2 = clusters[j].key;
123 if ( dists[key1][key2] < dists[key1][min] ) {
124 min = key2;
125 }
126 }
127 mins[key1] = min;
128 }
129 clusters[i].index = i;
130 }
131
132 // Clean up meta data used for clustering
133 c1.key = c2.key = c1.index = c2.index = null;
134
135 return true;
136};
137
138let getAllChildren = function( root, arr, cy ) {
139
140 if ( !root )
141 return;
142
143 if ( root.value ) {
144 arr.push( root.value );
145 }
146 else {
147 if ( root.left )
148 getAllChildren( root.left, arr, cy );
149 if ( root.right )
150 getAllChildren( root.right, arr, cy );
151 }
152};
153
154let buildDendrogram = function ( root, cy ) {
155
156 if ( !root )
157 return '';
158
159 if ( root.left && root.right ) {
160
161 let leftStr = buildDendrogram( root.left, cy );
162 let rightStr = buildDendrogram( root.right, cy );
163
164 let node = cy.add({group:'nodes', data: {id: leftStr + ',' + rightStr}});
165
166 cy.add({group:'edges', data: { source: leftStr, target: node.id() }});
167 cy.add({group:'edges', data: { source: rightStr, target: node.id() }});
168
169 return node.id();
170 }
171 else if ( root.value ) {
172 return root.value.id();
173 }
174
175};
176
177let buildClustersFromTree = function( root, k, cy ) {
178
179 if ( !root )
180 return [];
181
182 let left = [], right = [], leaves = [];
183
184 if ( k === 0 ) { // don't cut tree, simply return all nodes as 1 single cluster
185 if ( root.left )
186 getAllChildren( root.left, left, cy );
187 if ( root.right )
188 getAllChildren( root.right, right, cy );
189
190 leaves = left.concat(right);
191 return [ cy.collection(leaves) ];
192 }
193 else if ( k === 1 ) { // cut at root
194
195 if ( root.value ) { // leaf node
196 return [ cy.collection( root.value ) ];
197 }
198 else {
199 if ( root.left )
200 getAllChildren( root.left, left, cy );
201 if ( root.right )
202 getAllChildren( root.right, right, cy );
203
204 return [ cy.collection(left), cy.collection(right) ];
205 }
206 }
207 else {
208 if ( root.value ) {
209 return [ cy.collection(root.value) ];
210 }
211 else {
212 if ( root.left )
213 left = buildClustersFromTree( root.left, k - 1, cy );
214 if ( root.right )
215 right = buildClustersFromTree( root.right, k - 1, cy );
216
217 return left.concat(right);
218 }
219 }
220};
221
222if( process.env.NODE_ENV !== 'production' ){ /* eslint-disable no-console, no-unused-vars */
223 let printMatrix = function( M ) { // used for debugging purposes only
224 let n = M.length;
225 for(let i = 0; i < n; i++ ) {
226 let row = '';
227 for ( let j = 0; j < n; j++ ) {
228 row += Math.round(M[i][j]*100)/100 + ' ';
229 }
230 console.log(row);
231 }
232 console.log('');
233 };
234} /* eslint-enable */
235
236let hierarchicalClustering = function( options ){
237 let cy = this.cy();
238 let nodes = this.nodes();
239
240 // Set parameters of algorithm: linkage type, distance metric, etc.
241 let opts = setOptions( options );
242
243 let attrs = opts.attributes;
244 let getDist = (n1, n2) => clusteringDistance( opts.distance, attrs.length, i => attrs[i](n1), i => attrs[i](n2), n1, n2 );
245
246 // Begin hierarchical algorithm
247 let clusters = [];
248 let dists = []; // distances between each pair of clusters
249 let mins = []; // closest cluster for each cluster
250 let index = []; // hash of all clusters by key
251
252 // In agglomerative (bottom-up) clustering, each node starts as its own cluster
253 for ( let n = 0; n < nodes.length; n++ ) {
254 let cluster = {
255 value: (opts.mode === 'dendrogram') ? nodes[n] : [ nodes[n] ],
256 key: n,
257 index: n
258 };
259 clusters[n] = cluster;
260 index[n] = cluster;
261 dists[n] = [];
262 mins[n] = 0;
263 }
264
265 // Calculate the distance between each pair of clusters
266 for ( let i = 0; i < clusters.length; i++ ) {
267 for ( let j = 0; j <= i; j++ ) {
268 let dist;
269
270 if ( opts.mode === 'dendrogram' ){ // modes store cluster values differently
271 dist = (i === j) ? Infinity : getDist( clusters[i].value, clusters[j].value );
272 } else {
273 dist = (i === j) ? Infinity : getDist( clusters[i].value[0], clusters[j].value[0] );
274 }
275
276 dists[i][j] = dist;
277 dists[j][i] = dist;
278
279 if ( dist < dists[i][mins[i]] ) {
280 mins[i] = j; // Cache mins: closest cluster to cluster i is cluster j
281 }
282 }
283 }
284
285 // Find the closest pair of clusters and merge them into a single cluster.
286 // Update distances between new cluster and each of the old clusters, and loop until threshold reached.
287 let merged = mergeClosest( clusters, index, dists, mins, opts );
288 while ( merged ) {
289 merged = mergeClosest( clusters, index, dists, mins, opts );
290 }
291
292 let retClusters;
293
294 // Dendrogram mode builds the hierarchy and adds intermediary nodes + edges
295 // in addition to returning the clusters.
296 if ( opts.mode === 'dendrogram') {
297 retClusters = buildClustersFromTree( clusters[0], opts.dendrogramDepth, cy );
298
299 if ( opts.addDendrogram )
300 buildDendrogram( clusters[0], cy );
301 }
302 else { // Regular mode simply returns the clusters
303
304 retClusters = new Array(clusters.length);
305 clusters.forEach( function( cluster, i ) {
306 // Clean up meta data used for clustering
307 cluster.key = cluster.index = null;
308
309 retClusters[i] = cy.collection( cluster.value );
310 });
311 }
312
313 return retClusters;
314};
315
316export default { hierarchicalClustering, hca: hierarchicalClustering };