1 |
|
2 |
|
3 |
|
4 |
|
5 |
|
6 | import * as util from '../../util';
|
7 | import clusteringDistance from './clustering-distances';
|
8 |
|
9 | const defaults = util.defaults({
|
10 | distance: 'euclidean',
|
11 | linkage: 'min',
|
12 | mode: 'threshold',
|
13 |
|
14 | threshold: Infinity,
|
15 |
|
16 | addDendrogram: false,
|
17 | dendrogramDepth: 0,
|
18 | attributes: []
|
19 | });
|
20 |
|
21 | const linkageAliases = {
|
22 | 'single': 'min',
|
23 | 'complete': 'max'
|
24 | };
|
25 |
|
26 | let setOptions = ( options ) => {
|
27 | let opts = defaults( options );
|
28 |
|
29 | let preferredAlias = linkageAliases[ opts.linkage ];
|
30 |
|
31 | if( preferredAlias != null ){
|
32 | opts.linkage = preferredAlias;
|
33 | }
|
34 |
|
35 | return opts;
|
36 | };
|
37 |
|
38 | let mergeClosest = function( clusters, index, dists, mins, opts ) {
|
39 |
|
40 | let minKey = 0;
|
41 | let min = Infinity;
|
42 | let dist;
|
43 | let attrs = opts.attributes;
|
44 |
|
45 | let getDist = (n1, n2) => clusteringDistance( opts.distance, attrs.length, i => attrs[i](n1), i => attrs[i](n2), n1, n2 );
|
46 |
|
47 | for ( let i = 0; i < clusters.length; i++ ) {
|
48 | let key = clusters[i].key;
|
49 | let dist = dists[key][mins[key]];
|
50 | if ( dist < min ) {
|
51 | minKey = key;
|
52 | min = dist;
|
53 | }
|
54 | }
|
55 | if ( (opts.mode === 'threshold' && min >= opts.threshold) ||
|
56 | (opts.mode === 'dendrogram' && clusters.length === 1) ) {
|
57 | return false;
|
58 | }
|
59 |
|
60 | let c1 = index[minKey];
|
61 | let c2 = index[mins[minKey]];
|
62 | let merged;
|
63 |
|
64 |
|
65 | if ( opts.mode === 'dendrogram' ) {
|
66 | merged = {
|
67 | left: c1,
|
68 | right: c2,
|
69 | key: c1.key
|
70 | };
|
71 | }
|
72 | else {
|
73 | merged = {
|
74 | value: c1.value.concat(c2.value),
|
75 | key: c1.key
|
76 | };
|
77 | }
|
78 |
|
79 | clusters[c1.index] = merged;
|
80 | clusters.splice(c2.index, 1);
|
81 |
|
82 | index[c1.key] = merged;
|
83 |
|
84 |
|
85 | for ( let i = 0; i < clusters.length; i++ ) {
|
86 | let cur = clusters[i];
|
87 |
|
88 | if ( c1.key === cur.key ) {
|
89 | dist = Infinity;
|
90 | }
|
91 | else if ( opts.linkage === 'min' ) {
|
92 | dist = dists[c1.key][cur.key];
|
93 | if ( dists[c1.key][cur.key] > dists[c2.key][cur.key] ) {
|
94 | dist = dists[c2.key][cur.key];
|
95 | }
|
96 | }
|
97 | else if ( opts.linkage === 'max' ) {
|
98 | dist = dists[c1.key][cur.key];
|
99 | if ( dists[c1.key][cur.key] < dists[c2.key][cur.key] ) {
|
100 | dist = dists[c2.key][cur.key];
|
101 | }
|
102 | }
|
103 | else if ( opts.linkage === 'mean' ) {
|
104 | dist = (dists[c1.key][cur.key] * c1.size + dists[c2.key][cur.key] * c2.size) / (c1.size + c2.size);
|
105 | }
|
106 | else {
|
107 | if ( opts.mode === 'dendrogram' )
|
108 | dist = getDist( cur.value, c1.value );
|
109 | else
|
110 | dist = getDist( cur.value[0], c1.value[0] );
|
111 | }
|
112 |
|
113 | dists[c1.key][cur.key] = dists[cur.key][c1.key] = dist;
|
114 | }
|
115 |
|
116 |
|
117 | for ( let i = 0; i < clusters.length; i++ ) {
|
118 | let key1 = clusters[i].key;
|
119 | if ( mins[key1] === c1.key || mins[key1] === c2.key ) {
|
120 | let min = key1;
|
121 | for ( let j = 0; j < clusters.length; j++ ) {
|
122 | let key2 = clusters[j].key;
|
123 | if ( dists[key1][key2] < dists[key1][min] ) {
|
124 | min = key2;
|
125 | }
|
126 | }
|
127 | mins[key1] = min;
|
128 | }
|
129 | clusters[i].index = i;
|
130 | }
|
131 |
|
132 |
|
133 | c1.key = c2.key = c1.index = c2.index = null;
|
134 |
|
135 | return true;
|
136 | };
|
137 |
|
138 | let getAllChildren = function( root, arr, cy ) {
|
139 |
|
140 | if ( !root )
|
141 | return;
|
142 |
|
143 | if ( root.value ) {
|
144 | arr.push( root.value );
|
145 | }
|
146 | else {
|
147 | if ( root.left )
|
148 | getAllChildren( root.left, arr, cy );
|
149 | if ( root.right )
|
150 | getAllChildren( root.right, arr, cy );
|
151 | }
|
152 | };
|
153 |
|
154 | let buildDendrogram = function ( root, cy ) {
|
155 |
|
156 | if ( !root )
|
157 | return '';
|
158 |
|
159 | if ( root.left && root.right ) {
|
160 |
|
161 | let leftStr = buildDendrogram( root.left, cy );
|
162 | let rightStr = buildDendrogram( root.right, cy );
|
163 |
|
164 | let node = cy.add({group:'nodes', data: {id: leftStr + ',' + rightStr}});
|
165 |
|
166 | cy.add({group:'edges', data: { source: leftStr, target: node.id() }});
|
167 | cy.add({group:'edges', data: { source: rightStr, target: node.id() }});
|
168 |
|
169 | return node.id();
|
170 | }
|
171 | else if ( root.value ) {
|
172 | return root.value.id();
|
173 | }
|
174 |
|
175 | };
|
176 |
|
177 | let buildClustersFromTree = function( root, k, cy ) {
|
178 |
|
179 | if ( !root )
|
180 | return [];
|
181 |
|
182 | let left = [], right = [], leaves = [];
|
183 |
|
184 | if ( k === 0 ) {
|
185 | if ( root.left )
|
186 | getAllChildren( root.left, left, cy );
|
187 | if ( root.right )
|
188 | getAllChildren( root.right, right, cy );
|
189 |
|
190 | leaves = left.concat(right);
|
191 | return [ cy.collection(leaves) ];
|
192 | }
|
193 | else if ( k === 1 ) {
|
194 |
|
195 | if ( root.value ) {
|
196 | return [ cy.collection( root.value ) ];
|
197 | }
|
198 | else {
|
199 | if ( root.left )
|
200 | getAllChildren( root.left, left, cy );
|
201 | if ( root.right )
|
202 | getAllChildren( root.right, right, cy );
|
203 |
|
204 | return [ cy.collection(left), cy.collection(right) ];
|
205 | }
|
206 | }
|
207 | else {
|
208 | if ( root.value ) {
|
209 | return [ cy.collection(root.value) ];
|
210 | }
|
211 | else {
|
212 | if ( root.left )
|
213 | left = buildClustersFromTree( root.left, k - 1, cy );
|
214 | if ( root.right )
|
215 | right = buildClustersFromTree( root.right, k - 1, cy );
|
216 |
|
217 | return left.concat(right);
|
218 | }
|
219 | }
|
220 | };
|
221 |
|
222 | if( process.env.NODE_ENV !== 'production' ){
|
223 | let printMatrix = function( M ) {
|
224 | let n = M.length;
|
225 | for(let i = 0; i < n; i++ ) {
|
226 | let row = '';
|
227 | for ( let j = 0; j < n; j++ ) {
|
228 | row += Math.round(M[i][j]*100)/100 + ' ';
|
229 | }
|
230 | console.log(row);
|
231 | }
|
232 | console.log('');
|
233 | };
|
234 | }
|
235 |
|
236 | let hierarchicalClustering = function( options ){
|
237 | let cy = this.cy();
|
238 | let nodes = this.nodes();
|
239 |
|
240 |
|
241 | let opts = setOptions( options );
|
242 |
|
243 | let attrs = opts.attributes;
|
244 | let getDist = (n1, n2) => clusteringDistance( opts.distance, attrs.length, i => attrs[i](n1), i => attrs[i](n2), n1, n2 );
|
245 |
|
246 |
|
247 | let clusters = [];
|
248 | let dists = [];
|
249 | let mins = [];
|
250 | let index = [];
|
251 |
|
252 |
|
253 | for ( let n = 0; n < nodes.length; n++ ) {
|
254 | let cluster = {
|
255 | value: (opts.mode === 'dendrogram') ? nodes[n] : [ nodes[n] ],
|
256 | key: n,
|
257 | index: n
|
258 | };
|
259 | clusters[n] = cluster;
|
260 | index[n] = cluster;
|
261 | dists[n] = [];
|
262 | mins[n] = 0;
|
263 | }
|
264 |
|
265 |
|
266 | for ( let i = 0; i < clusters.length; i++ ) {
|
267 | for ( let j = 0; j <= i; j++ ) {
|
268 | let dist;
|
269 |
|
270 | if ( opts.mode === 'dendrogram' ){
|
271 | dist = (i === j) ? Infinity : getDist( clusters[i].value, clusters[j].value );
|
272 | } else {
|
273 | dist = (i === j) ? Infinity : getDist( clusters[i].value[0], clusters[j].value[0] );
|
274 | }
|
275 |
|
276 | dists[i][j] = dist;
|
277 | dists[j][i] = dist;
|
278 |
|
279 | if ( dist < dists[i][mins[i]] ) {
|
280 | mins[i] = j;
|
281 | }
|
282 | }
|
283 | }
|
284 |
|
285 |
|
286 |
|
287 | let merged = mergeClosest( clusters, index, dists, mins, opts );
|
288 | while ( merged ) {
|
289 | merged = mergeClosest( clusters, index, dists, mins, opts );
|
290 | }
|
291 |
|
292 | let retClusters;
|
293 |
|
294 |
|
295 |
|
296 | if ( opts.mode === 'dendrogram') {
|
297 | retClusters = buildClustersFromTree( clusters[0], opts.dendrogramDepth, cy );
|
298 |
|
299 | if ( opts.addDendrogram )
|
300 | buildDendrogram( clusters[0], cy );
|
301 | }
|
302 | else {
|
303 |
|
304 | retClusters = new Array(clusters.length);
|
305 | clusters.forEach( function( cluster, i ) {
|
306 |
|
307 | cluster.key = cluster.index = null;
|
308 |
|
309 | retClusters[i] = cy.collection( cluster.value );
|
310 | });
|
311 | }
|
312 |
|
313 | return retClusters;
|
314 | };
|
315 |
|
316 | export default { hierarchicalClustering, hca: hierarchicalClustering };
|