UNPKG

6.32 kBJavaScriptView Raw
1/**
2 * Chinese Whispers Algorithm for Node.js
3 *
4 * https://github.com/huan/chinese-whispers
5 * License: Apache-2.0
6 * Author: Huan LI <zixia@zixia.net>
7 *
8 * Inspired by:
9 * - http://blog.csdn.net/liyuan123zhouhui/article/details/70312716
10 * - http://alexloveless.co.uk/data/chinese-whispers-graph-clustering-in-python/
11 * - https://github.com/uhh-lt/chinese-whispers
12 * - https://github.com/anvaka/ngraph.cw
13 *
14 */
15import { EventEmitter } from 'events';
16export { VERSION } from './version';
17const jsnx = require('jsnetworkx');
18const { knuthShuffle } = require('knuth-shuffle');
19export class ChineseWhispers extends EventEmitter {
20 constructor(options) {
21 super();
22 this.weightFunc = options.weightFunc;
23 this.epochs = options.epochs || 15;
24 this.threshold = options.threshold;
25 this.on('change', () => this.changeCounter++);
26 }
27 on(event, listener) {
28 super.on(event, listener);
29 return this;
30 }
31 emit(event, ...args) {
32 return super.emit(event, ...args);
33 }
34 cluster(dataList) {
35 // this.dataList = dataList
36 this.graph = this.buildNetwork(dataList, this.weightFunc, this.threshold);
37 // initial epoch
38 this.emit('epoch', this.graph, -1);
39 // run Chinese Whispers
40 // I default to 10 iterations. This number is usually low.
41 // After a certain number (individual to the data set) no further clustering occurs
42 let epochs = this.epochs;
43 while (epochs--) {
44 this.changeCounter = 0;
45 this.iterate();
46 this.emit('epoch', this.graph, this.changeCounter);
47 }
48 const clusterList = this.buildClusterList(this.graph);
49 return clusterList;
50 }
51 iterate() {
52 const nodeList = this.graph.nodes();
53 // I randomize the nodes to give me an arbitrary start point
54 knuthShuffle(nodeList); // orignal array modified
55 for (const node of nodeList) {
56 this.relabelNode(node);
57 }
58 }
59 relabelNode(node) {
60 const newLabel = this.recalcLabel(node);
61 const nodeAttr = this.graph.node.get(node);
62 if (nodeAttr.label !== newLabel) {
63 // set the label of target node to the winning local label
64 this.emit('change', node, nodeAttr.label, newLabel);
65 nodeAttr.label = newLabel;
66 // console.log('set node ' + this.data[node] + ' label to ' + this.data[parseInt(newLabel)])
67 }
68 }
69 recalcLabel(node) {
70 const nxNode = this.graph.get(node);
71 const neighborList = this.graph.neighbors(node);
72 if (neighborList.length === 0) {
73 const attr = this.graph.node.get(node);
74 return attr.label; // return self label if no neighbors
75 }
76 // console.log('neighbors of node ' + this.data[node]
77 // + ' is: ', neighborList.map((i: number) => this.data[i]))
78 const labelWeightMap = {};
79 // do an inventory of the given nodes neighbours and edge weights
80 for (const neighbor of neighborList) {
81 const neighborAttr = this.graph.node.get(neighbor);
82 const label = neighborAttr.label;
83 if (!(label in labelWeightMap)) {
84 labelWeightMap[label] = 0;
85 }
86 const edgeAttr = nxNode.get(neighbor);
87 const weight = edgeAttr.weight;
88 // console.log('### ', weight, ' ###')
89 labelWeightMap[label] += weight;
90 }
91 // find the label with the highest edge weight sum
92 let max = 0;
93 // In javascript the keys of object can only be strings
94 // - https://stackoverflow.com/a/41870625/1123955
95 let maxLabel = '-1';
96 // for (const label in labelWeightMap) {
97 // console.log('labelWeight node: ' + this.data[node] + ' - '
98 // + this.data[parseInt(label)] + ' is: ', labelWeightMap[label])
99 // }
100 for (const label in labelWeightMap) {
101 if (labelWeightMap[label] > max) {
102 max = labelWeightMap[label];
103 maxLabel = label;
104 }
105 }
106 return maxLabel;
107 }
108 buildClusterList(G) {
109 const clusterList = [];
110 const labelIndexMap = {};
111 let index = 0;
112 for (const node of G.nodes()) {
113 const label = G.node.get(node).label;
114 if (!(label in labelIndexMap)) {
115 labelIndexMap[label] = index++;
116 }
117 const labelIndex = labelIndexMap[label];
118 if (!(labelIndex in clusterList)) {
119 clusterList[labelIndex] = [];
120 }
121 clusterList[labelIndex].push(node);
122 }
123 return clusterList;
124 }
125 buildNetwork(dataList, weightFunc, threshold) {
126 const nodeList = Object.keys(dataList).map(k => parseInt(k, 10)); // [0, 1, 2, ..., data.length - 1]
127 // const nodeList: CWNode[] = [...data.keys()] // [0, 1, 2, ..., data.length - 1]
128 const edgeList = [];
129 for (let i = 0; i < dataList.length; i++) {
130 for (let j = i + 1; j < dataList.length; j++) {
131 const weight = weightFunc(dataList[i], dataList[j]);
132 if (threshold && threshold > weight) {
133 // console.log('threshold: ', threshold, ' weight: ', weight)
134 // skip this edge because it's weight is below threshold
135 this.emit('edge');
136 }
137 else {
138 const edge = [i, j, { weight }];
139 edgeList.push(edge);
140 this.emit('edge', edge);
141 }
142 }
143 }
144 // initialize the graph
145 const G = new jsnx.Graph();
146 // Add nodes
147 G.addNodesFrom(nodeList);
148 // CW needs an arbitrary, unique label for each node before initialisation
149 // Here I use the ID of the node since I know it's unique
150 // You could use a random number or a counter or anything really
151 for (const n of G.nodes()) {
152 const nodeAttr = G.node.get(n);
153 nodeAttr.label = n;
154 }
155 // add edges
156 G.addEdgesFrom(edgeList);
157 // console.log(edgeList)
158 return G;
159 }
160}
161export default ChineseWhispers;
162//# sourceMappingURL=chinese-whispers.js.map
\No newline at end of file