1 |
|
2 |
|
3 | var dgram = require('dgram')
|
4 | , net = require('net')
|
5 | , events = require('events')
|
6 | , logger = require('./lib/logger')
|
7 | , hashring = require('hashring')
|
8 | , cluster = require('cluster')
|
9 | , helpers = require('./lib/helpers')
|
10 | , mgmt_server = require('./lib/mgmt_server')
|
11 | , configlib = require('./lib/config');
|
12 |
|
13 | var packet = new events.EventEmitter();
|
14 | var startup_time = Math.round(new Date().getTime() / 1000);
|
15 | var node_status = {};
|
16 | var workers = [];
|
17 | var node_ring = {};
|
18 | var servers_loaded;
|
19 | var config;
|
20 | var l;
|
21 |
|
22 | configlib.configFile(process.argv[2], function (conf, oldConfig) {
|
23 | config = conf;
|
24 | var udp_version = config.address_ipv6 ? 'udp6' : 'udp4';
|
25 | var nodes = config.nodes;
|
26 | l = new logger.Logger(config.log || {});
|
27 |
|
28 | var forkCount = config.forkCount;
|
29 | if (forkCount === 'auto') {
|
30 | forkCount = require('os').cpus().length;
|
31 | }
|
32 |
|
33 | var logPrefix = "[" + process.pid + "] ";
|
34 | var log = function(msg, type) {
|
35 | l.log(logPrefix + msg, type);
|
36 | };
|
37 |
|
38 | var healthStatus = configlib.healthStatus || 'up';
|
39 | var healthCheckInterval = config.checkInterval || 10000;
|
40 |
|
41 | var broadcastMsg = function(msg) {
|
42 | for (var i = 0; i < workers.length; i++) {
|
43 | workers[i].send(msg);
|
44 | }
|
45 | };
|
46 |
|
47 | if (forkCount > 1) {
|
48 | if (cluster.isMaster) {
|
49 | var worker;
|
50 | logPrefix += "[master] ";
|
51 | log("forking " + forkCount + " childs", "INFO");
|
52 |
|
53 | for (var i = 0; i < forkCount; i++) {
|
54 | worker = cluster.fork();
|
55 | worker.on('message', broadcastMsg);
|
56 | }
|
57 |
|
58 | cluster.on('online', function(worker) {
|
59 | log('worker ' + worker.process.pid + ' is online', 'INFO');
|
60 | workers.push(worker);
|
61 | });
|
62 |
|
63 | cluster.on('exit', function(worker, code, signal) {
|
64 | log('worker ' + worker.process.pid + ' died with exit code:' + code + " restarting", 'ERROR');
|
65 |
|
66 |
|
67 | for (var i = 0; i < workers.length; i++) {
|
68 | if (workers[i].process.pid == worker.process.pid) {
|
69 | workers.splice(i, 1);
|
70 | }
|
71 | }
|
72 |
|
73 | worker = cluster.fork();
|
74 | worker.on('message', broadcastMsg);
|
75 | });
|
76 |
|
77 | return;
|
78 |
|
79 | } else {
|
80 | process.on('message', function(msg) {
|
81 | if (msg.healthStatus) {
|
82 | healthStatus = msg.healthStatus;
|
83 | }
|
84 | });
|
85 | }
|
86 | }
|
87 |
|
88 |
|
89 |
|
90 | nodes.forEach(function(element, index, array) {
|
91 | node_ring[element.host + ':' + element.port] = 100;
|
92 | });
|
93 |
|
94 | var ring = new hashring(
|
95 | node_ring, 'md5', {
|
96 | 'max cache size': config.cacheSize || 10000,
|
97 |
|
98 | 'replicas': 0
|
99 | });
|
100 |
|
101 | if (!servers_loaded) {
|
102 |
|
103 | doHealthChecks();
|
104 |
|
105 |
|
106 | var server_config = config.server || './servers/udp';
|
107 | var servermod = require(server_config);
|
108 | var server = servermod.start(config, function (msg, rinfo) {
|
109 |
|
110 | var packet_data = msg.toString();
|
111 | var current_metric
|
112 | , bits
|
113 | , key;
|
114 |
|
115 | if (packet_data.indexOf("\n") > -1) {
|
116 | var metrics;
|
117 | metrics = packet_data.split("\n");
|
118 |
|
119 | for (var midx in metrics) {
|
120 | current_metric = metrics[midx];
|
121 | bits = current_metric.split(':');
|
122 | key = bits.shift();
|
123 | if (current_metric !== '') {
|
124 | var new_msg = new Buffer(current_metric);
|
125 | packet.emit('send', key, new_msg);
|
126 | }
|
127 | }
|
128 |
|
129 | } else {
|
130 |
|
131 | current_metric = packet_data;
|
132 | bits = current_metric.split(':');
|
133 | key = bits.shift();
|
134 | if (current_metric !== '') {
|
135 | packet.emit('send', key, msg);
|
136 | }
|
137 | }
|
138 | });
|
139 | var client = dgram.createSocket(udp_version);
|
140 |
|
141 | packet.on('send', function(key, msg) {
|
142 |
|
143 | var statsd_host = ring.get(key);
|
144 |
|
145 |
|
146 | if (statsd_host === undefined) {
|
147 | log('Warning: No backend statsd nodes available!', 'WARNING');
|
148 | } else {
|
149 | var host_config = statsd_host.split(':');
|
150 |
|
151 |
|
152 | client.send(msg, 0, msg.length, host_config[1], host_config[0]);
|
153 | }
|
154 | });
|
155 |
|
156 | mgmt_server.start(
|
157 | config,
|
158 | function(cmd, parameters, stream) {
|
159 | switch(cmd) {
|
160 | case "help":
|
161 | stream.write("Commands: config, health, status, quit\n\n");
|
162 | break;
|
163 |
|
164 | case "config":
|
165 | helpers.writeConfig(config, stream);
|
166 | break;
|
167 |
|
168 | case "health":
|
169 | if (parameters.length > 0) {
|
170 | var cmdaction = parameters[0].toLowerCase();
|
171 | if (cmdaction === 'up') {
|
172 | healthStatus = 'up';
|
173 | if (forkCount > 0) {
|
174 |
|
175 | process.send({ healthStatus: healthStatus });
|
176 | }
|
177 | } else if (cmdaction === 'down') {
|
178 | healthStatus = 'down';
|
179 | if (forkCount > 0) {
|
180 |
|
181 | process.send({ healthStatus: healthStatus });
|
182 | }
|
183 | }
|
184 | }
|
185 | stream.write("health: " + healthStatus + "\n");
|
186 | break;
|
187 |
|
188 | case "status":
|
189 | var now = Math.round(new Date().getTime() / 1000);
|
190 | var uptime = now - startup_time;
|
191 |
|
192 | stream.write("uptime: " + uptime + "\n");
|
193 |
|
194 | stream.write("nodes: ");
|
195 | ring.servers.forEach(function(server, index, array) {
|
196 | stream.write(server.string + " ");
|
197 | });
|
198 | stream.write("\n");
|
199 | break;
|
200 |
|
201 | case "quit":
|
202 | stream.end();
|
203 | break;
|
204 |
|
205 | default:
|
206 | stream.write("ERROR\n");
|
207 | break;
|
208 | }
|
209 | },
|
210 | function(err, stream) {
|
211 | log("MGMT: Caught " + err + ", Moving on", "WARNING");
|
212 | }
|
213 | );
|
214 |
|
215 | servers_loaded = true;
|
216 | log("server is up", "INFO");
|
217 |
|
218 |
|
219 | setInterval(doHealthChecks, healthCheckInterval);
|
220 | }
|
221 |
|
222 |
|
223 | function doHealthChecks() {
|
224 | nodes.forEach(function(element, index, array) {
|
225 | healthcheck(element);
|
226 | });
|
227 | }
|
228 |
|
229 | function markNodeAsHealthy(node_id) {
|
230 | if (node_status[node_id] !== undefined) {
|
231 | if (node_status[node_id] > 0) {
|
232 | var new_server = {};
|
233 | new_server[node_id] = 100;
|
234 | log('Adding node ' + node_id + ' to the ring.', 'WARNING');
|
235 | ring.add(new_server);
|
236 | }
|
237 | }
|
238 |
|
239 | node_status[node_id] = 0;
|
240 | }
|
241 |
|
242 | function markNodeAsUnhealthy(node_id) {
|
243 | if (node_status[node_id] === undefined) {
|
244 | node_status[node_id] = 1;
|
245 | } else {
|
246 | node_status[node_id]++;
|
247 | }
|
248 | if (node_status[node_id] < 2) {
|
249 | log('Removing node ' + node_id + ' from the ring.', 'WARNING');
|
250 | ring.remove(node_id);
|
251 | }
|
252 | }
|
253 |
|
254 |
|
255 | function healthcheck(node) {
|
256 | var ended = false;
|
257 | var node_id = node.host + ':' + node.port;
|
258 | var client = net.connect(
|
259 | {port: node.adminport, host: node.host},
|
260 | function onConnect() {
|
261 | if (!ended) {
|
262 | client.write('health\r\n');
|
263 | }
|
264 | }
|
265 | );
|
266 |
|
267 | client.setTimeout(healthCheckInterval, function() {
|
268 | client.end();
|
269 | markNodeAsUnhealthy(node_id);
|
270 | client.removeAllListeners('data');
|
271 | ended = true;
|
272 | });
|
273 |
|
274 | client.on('data', function(data) {
|
275 | if (ended) {
|
276 | return;
|
277 | }
|
278 |
|
279 | var health_status = data.toString();
|
280 | client.end();
|
281 | ended = true;
|
282 |
|
283 | if (health_status.indexOf('up') < 0) {
|
284 | markNodeAsUnhealthy(node_id);
|
285 | } else {
|
286 | markNodeAsHealthy(node_id);
|
287 | }
|
288 | });
|
289 |
|
290 | client.on('error', function(e) {
|
291 | if (ended) {
|
292 | return;
|
293 | }
|
294 |
|
295 | if (e.code !== 'ECONNREFUSED' && e.code !== 'EHOSTUNREACH' && e.code !== 'ECONNRESET') {
|
296 | log('Error during healthcheck on node ' + node_id + ' with ' + e.code, 'ERROR');
|
297 | }
|
298 |
|
299 | markNodeAsUnhealthy(node_id);
|
300 | });
|
301 | }
|
302 |
|
303 | });
|