1 | /**
|
2 | * Chinese Whispers Algorithm for Node.js
|
3 | *
|
4 | * https://github.com/huan/chinese-whispers
|
5 | * License: Apache-2.0
|
6 | * Author: Huan LI <zixia@zixia.net>
|
7 | *
|
8 | * Inspired by:
|
9 | * - http://blog.csdn.net/liyuan123zhouhui/article/details/70312716
|
10 | * - http://alexloveless.co.uk/data/chinese-whispers-graph-clustering-in-python/
|
11 | * - https://github.com/uhh-lt/chinese-whispers
|
12 | * - https://github.com/anvaka/ngraph.cw
|
13 | *
|
14 | */
|
15 | import { EventEmitter } from 'events';
|
16 | export { VERSION } from './version';
|
17 | const jsnx = require('jsnetworkx');
|
18 | const { knuthShuffle } = require('knuth-shuffle');
|
19 | export class ChineseWhispers extends EventEmitter {
|
20 | constructor(options) {
|
21 | super();
|
22 | this.weightFunc = options.weightFunc;
|
23 | this.epochs = options.epochs || 15;
|
24 | this.threshold = options.threshold;
|
25 | this.on('change', () => this.changeCounter++);
|
26 | }
|
27 | on(event, listener) {
|
28 | super.on(event, listener);
|
29 | return this;
|
30 | }
|
31 | emit(event, ...args) {
|
32 | return super.emit(event, ...args);
|
33 | }
|
34 | cluster(dataList) {
|
35 | // this.dataList = dataList
|
36 | this.graph = this.buildNetwork(dataList, this.weightFunc, this.threshold);
|
37 | // initial epoch
|
38 | this.emit('epoch', this.graph, -1);
|
39 | // run Chinese Whispers
|
40 | // I default to 10 iterations. This number is usually low.
|
41 | // After a certain number (individual to the data set) no further clustering occurs
|
42 | let epochs = this.epochs;
|
43 | while (epochs--) {
|
44 | this.changeCounter = 0;
|
45 | this.iterate();
|
46 | this.emit('epoch', this.graph, this.changeCounter);
|
47 | }
|
48 | const clusterList = this.buildClusterList(this.graph);
|
49 | return clusterList;
|
50 | }
|
51 | iterate() {
|
52 | const nodeList = this.graph.nodes();
|
53 | // I randomize the nodes to give me an arbitrary start point
|
54 | knuthShuffle(nodeList); // orignal array modified
|
55 | for (const node of nodeList) {
|
56 | this.relabelNode(node);
|
57 | }
|
58 | }
|
59 | relabelNode(node) {
|
60 | const newLabel = this.recalcLabel(node);
|
61 | const nodeAttr = this.graph.node.get(node);
|
62 | if (nodeAttr.label !== newLabel) {
|
63 | // set the label of target node to the winning local label
|
64 | this.emit('change', node, nodeAttr.label, newLabel);
|
65 | nodeAttr.label = newLabel;
|
66 | // console.log('set node ' + this.data[node] + ' label to ' + this.data[parseInt(newLabel)])
|
67 | }
|
68 | }
|
69 | recalcLabel(node) {
|
70 | const nxNode = this.graph.get(node);
|
71 | const neighborList = this.graph.neighbors(node);
|
72 | if (neighborList.length === 0) {
|
73 | const attr = this.graph.node.get(node);
|
74 | return attr.label; // return self label if no neighbors
|
75 | }
|
76 | // console.log('neighbors of node ' + this.data[node]
|
77 | // + ' is: ', neighborList.map((i: number) => this.data[i]))
|
78 | const labelWeightMap = {};
|
79 | // do an inventory of the given nodes neighbours and edge weights
|
80 | for (const neighbor of neighborList) {
|
81 | const neighborAttr = this.graph.node.get(neighbor);
|
82 | const label = neighborAttr.label;
|
83 | if (!(label in labelWeightMap)) {
|
84 | labelWeightMap[label] = 0;
|
85 | }
|
86 | const edgeAttr = nxNode.get(neighbor);
|
87 | const weight = edgeAttr.weight;
|
88 | // console.log('### ', weight, ' ###')
|
89 | labelWeightMap[label] += weight;
|
90 | }
|
91 | // find the label with the highest edge weight sum
|
92 | let max = 0;
|
93 | // In javascript the keys of object can only be strings
|
94 | // - https://stackoverflow.com/a/41870625/1123955
|
95 | let maxLabel = '-1';
|
96 | // for (const label in labelWeightMap) {
|
97 | // console.log('labelWeight node: ' + this.data[node] + ' - '
|
98 | // + this.data[parseInt(label)] + ' is: ', labelWeightMap[label])
|
99 | // }
|
100 | for (const label in labelWeightMap) {
|
101 | if (labelWeightMap[label] > max) {
|
102 | max = labelWeightMap[label];
|
103 | maxLabel = label;
|
104 | }
|
105 | }
|
106 | return maxLabel;
|
107 | }
|
108 | buildClusterList(G) {
|
109 | const clusterList = [];
|
110 | const labelIndexMap = {};
|
111 | let index = 0;
|
112 | for (const node of G.nodes()) {
|
113 | const label = G.node.get(node).label;
|
114 | if (!(label in labelIndexMap)) {
|
115 | labelIndexMap[label] = index++;
|
116 | }
|
117 | const labelIndex = labelIndexMap[label];
|
118 | if (!(labelIndex in clusterList)) {
|
119 | clusterList[labelIndex] = [];
|
120 | }
|
121 | clusterList[labelIndex].push(node);
|
122 | }
|
123 | return clusterList;
|
124 | }
|
125 | buildNetwork(dataList, weightFunc, threshold) {
|
126 | const nodeList = Object.keys(dataList).map(k => parseInt(k, 10)); // [0, 1, 2, ..., data.length - 1]
|
127 | // const nodeList: CWNode[] = [...data.keys()] // [0, 1, 2, ..., data.length - 1]
|
128 | const edgeList = [];
|
129 | for (let i = 0; i < dataList.length; i++) {
|
130 | for (let j = i + 1; j < dataList.length; j++) {
|
131 | const weight = weightFunc(dataList[i], dataList[j]);
|
132 | if (threshold && threshold > weight) {
|
133 | // console.log('threshold: ', threshold, ' weight: ', weight)
|
134 | // skip this edge because it's weight is below threshold
|
135 | this.emit('edge');
|
136 | }
|
137 | else {
|
138 | const edge = [i, j, { weight }];
|
139 | edgeList.push(edge);
|
140 | this.emit('edge', edge);
|
141 | }
|
142 | }
|
143 | }
|
144 | // initialize the graph
|
145 | const G = new jsnx.Graph();
|
146 | // Add nodes
|
147 | G.addNodesFrom(nodeList);
|
148 | // CW needs an arbitrary, unique label for each node before initialisation
|
149 | // Here I use the ID of the node since I know it's unique
|
150 | // You could use a random number or a counter or anything really
|
151 | for (const n of G.nodes()) {
|
152 | const nodeAttr = G.node.get(n);
|
153 | nodeAttr.label = n;
|
154 | }
|
155 | // add edges
|
156 | G.addEdgesFrom(edgeList);
|
157 | // console.log(edgeList)
|
158 | return G;
|
159 | }
|
160 | }
|
161 | export default ChineseWhispers;
|
162 | //# sourceMappingURL=chinese-whispers.js.map |
\ | No newline at end of file |