1 |
|
2 |
|
3 |
|
4 |
|
5 |
|
6 |
|
7 |
|
8 |
|
9 |
|
10 |
|
11 |
|
12 |
|
13 |
|
14 |
|
15 |
|
16 |
|
17 |
|
18 | import { ChannelOptions } from './channel-options';
|
19 | import { ConnectivityState } from './connectivity-state';
|
20 | import { LogVerbosity, Status } from './constants';
|
21 | import { Duration, durationToMs, isDuration, msToDuration } from './duration';
|
22 | import {
|
23 | ChannelControlHelper,
|
24 | createChildChannelControlHelper,
|
25 | registerLoadBalancerType,
|
26 | } from './experimental';
|
27 | import {
|
28 | selectLbConfigFromList,
|
29 | LoadBalancer,
|
30 | TypedLoadBalancingConfig,
|
31 | } from './load-balancer';
|
32 | import { ChildLoadBalancerHandler } from './load-balancer-child-handler';
|
33 | import { PickArgs, Picker, PickResult, PickResultType } from './picker';
|
34 | import {
|
35 | Endpoint,
|
36 | EndpointMap,
|
37 | SubchannelAddress,
|
38 | endpointToString,
|
39 | } from './subchannel-address';
|
40 | import {
|
41 | BaseSubchannelWrapper,
|
42 | SubchannelInterface,
|
43 | } from './subchannel-interface';
|
44 | import * as logging from './logging';
|
45 | import { LoadBalancingConfig } from './service-config';
|
46 |
|
47 | const TRACER_NAME = 'outlier_detection';
|
48 |
|
49 | function trace(text: string): void {
|
50 | logging.trace(LogVerbosity.DEBUG, TRACER_NAME, text);
|
51 | }
|
52 |
|
53 | const TYPE_NAME = 'outlier_detection';
|
54 |
|
55 | const OUTLIER_DETECTION_ENABLED =
|
56 | (process.env.GRPC_EXPERIMENTAL_ENABLE_OUTLIER_DETECTION ?? 'true') === 'true';
|
57 |
|
58 | export interface SuccessRateEjectionConfig {
|
59 | readonly stdev_factor: number;
|
60 | readonly enforcement_percentage: number;
|
61 | readonly minimum_hosts: number;
|
62 | readonly request_volume: number;
|
63 | }
|
64 |
|
65 | export interface FailurePercentageEjectionConfig {
|
66 | readonly threshold: number;
|
67 | readonly enforcement_percentage: number;
|
68 | readonly minimum_hosts: number;
|
69 | readonly request_volume: number;
|
70 | }
|
71 |
|
72 | export interface OutlierDetectionRawConfig {
|
73 | interval?: Duration;
|
74 | base_ejection_time?: Duration;
|
75 | max_ejection_time?: Duration;
|
76 | max_ejection_percent?: number;
|
77 | success_rate_ejection?: Partial<SuccessRateEjectionConfig>;
|
78 | failure_percentage_ejection?: Partial<FailurePercentageEjectionConfig>;
|
79 | child_policy: LoadBalancingConfig[];
|
80 | }
|
81 |
|
82 | const defaultSuccessRateEjectionConfig: SuccessRateEjectionConfig = {
|
83 | stdev_factor: 1900,
|
84 | enforcement_percentage: 100,
|
85 | minimum_hosts: 5,
|
86 | request_volume: 100,
|
87 | };
|
88 |
|
89 | const defaultFailurePercentageEjectionConfig: FailurePercentageEjectionConfig =
|
90 | {
|
91 | threshold: 85,
|
92 | enforcement_percentage: 100,
|
93 | minimum_hosts: 5,
|
94 | request_volume: 50,
|
95 | };
|
96 |
|
97 | type TypeofValues =
|
98 | | 'object'
|
99 | | 'boolean'
|
100 | | 'function'
|
101 | | 'number'
|
102 | | 'string'
|
103 | | 'undefined';
|
104 |
|
105 | function validateFieldType(
|
106 | obj: any,
|
107 | fieldName: string,
|
108 | expectedType: TypeofValues,
|
109 | objectName?: string
|
110 | ) {
|
111 | if (
|
112 | fieldName in obj &&
|
113 | obj[fieldName] !== undefined &&
|
114 | typeof obj[fieldName] !== expectedType
|
115 | ) {
|
116 | const fullFieldName = objectName ? `${objectName}.${fieldName}` : fieldName;
|
117 | throw new Error(
|
118 | `outlier detection config ${fullFieldName} parse error: expected ${expectedType}, got ${typeof obj[
|
119 | fieldName
|
120 | ]}`
|
121 | );
|
122 | }
|
123 | }
|
124 |
|
125 | function validatePositiveDuration(
|
126 | obj: any,
|
127 | fieldName: string,
|
128 | objectName?: string
|
129 | ) {
|
130 | const fullFieldName = objectName ? `${objectName}.${fieldName}` : fieldName;
|
131 | if (fieldName in obj && obj[fieldName] !== undefined) {
|
132 | if (!isDuration(obj[fieldName])) {
|
133 | throw new Error(
|
134 | `outlier detection config ${fullFieldName} parse error: expected Duration, got ${typeof obj[
|
135 | fieldName
|
136 | ]}`
|
137 | );
|
138 | }
|
139 | if (
|
140 | !(
|
141 | obj[fieldName].seconds >= 0 &&
|
142 | obj[fieldName].seconds <= 315_576_000_000 &&
|
143 | obj[fieldName].nanos >= 0 &&
|
144 | obj[fieldName].nanos <= 999_999_999
|
145 | )
|
146 | ) {
|
147 | throw new Error(
|
148 | `outlier detection config ${fullFieldName} parse error: values out of range for non-negative Duaration`
|
149 | );
|
150 | }
|
151 | }
|
152 | }
|
153 |
|
154 | function validatePercentage(obj: any, fieldName: string, objectName?: string) {
|
155 | const fullFieldName = objectName ? `${objectName}.${fieldName}` : fieldName;
|
156 | validateFieldType(obj, fieldName, 'number', objectName);
|
157 | if (
|
158 | fieldName in obj &&
|
159 | obj[fieldName] !== undefined &&
|
160 | !(obj[fieldName] >= 0 && obj[fieldName] <= 100)
|
161 | ) {
|
162 | throw new Error(
|
163 | `outlier detection config ${fullFieldName} parse error: value out of range for percentage (0-100)`
|
164 | );
|
165 | }
|
166 | }
|
167 |
|
168 | export class OutlierDetectionLoadBalancingConfig
|
169 | implements TypedLoadBalancingConfig
|
170 | {
|
171 | private readonly intervalMs: number;
|
172 | private readonly baseEjectionTimeMs: number;
|
173 | private readonly maxEjectionTimeMs: number;
|
174 | private readonly maxEjectionPercent: number;
|
175 | private readonly successRateEjection: SuccessRateEjectionConfig | null;
|
176 | private readonly failurePercentageEjection: FailurePercentageEjectionConfig | null;
|
177 |
|
178 | constructor(
|
179 | intervalMs: number | null,
|
180 | baseEjectionTimeMs: number | null,
|
181 | maxEjectionTimeMs: number | null,
|
182 | maxEjectionPercent: number | null,
|
183 | successRateEjection: Partial<SuccessRateEjectionConfig> | null,
|
184 | failurePercentageEjection: Partial<FailurePercentageEjectionConfig> | null,
|
185 | private readonly childPolicy: TypedLoadBalancingConfig
|
186 | ) {
|
187 | if (childPolicy.getLoadBalancerName() === 'pick_first') {
|
188 | throw new Error(
|
189 | 'outlier_detection LB policy cannot have a pick_first child policy'
|
190 | );
|
191 | }
|
192 | this.intervalMs = intervalMs ?? 10_000;
|
193 | this.baseEjectionTimeMs = baseEjectionTimeMs ?? 30_000;
|
194 | this.maxEjectionTimeMs = maxEjectionTimeMs ?? 300_000;
|
195 | this.maxEjectionPercent = maxEjectionPercent ?? 10;
|
196 | this.successRateEjection = successRateEjection
|
197 | ? { ...defaultSuccessRateEjectionConfig, ...successRateEjection }
|
198 | : null;
|
199 | this.failurePercentageEjection = failurePercentageEjection
|
200 | ? {
|
201 | ...defaultFailurePercentageEjectionConfig,
|
202 | ...failurePercentageEjection,
|
203 | }
|
204 | : null;
|
205 | }
|
206 | getLoadBalancerName(): string {
|
207 | return TYPE_NAME;
|
208 | }
|
209 | toJsonObject(): object {
|
210 | return {
|
211 | outlier_detection: {
|
212 | interval: msToDuration(this.intervalMs),
|
213 | base_ejection_time: msToDuration(this.baseEjectionTimeMs),
|
214 | max_ejection_time: msToDuration(this.maxEjectionTimeMs),
|
215 | max_ejection_percent: this.maxEjectionPercent,
|
216 | success_rate_ejection: this.successRateEjection ?? undefined,
|
217 | failure_percentage_ejection:
|
218 | this.failurePercentageEjection ?? undefined,
|
219 | child_policy: [this.childPolicy.toJsonObject()],
|
220 | },
|
221 | };
|
222 | }
|
223 |
|
224 | getIntervalMs(): number {
|
225 | return this.intervalMs;
|
226 | }
|
227 | getBaseEjectionTimeMs(): number {
|
228 | return this.baseEjectionTimeMs;
|
229 | }
|
230 | getMaxEjectionTimeMs(): number {
|
231 | return this.maxEjectionTimeMs;
|
232 | }
|
233 | getMaxEjectionPercent(): number {
|
234 | return this.maxEjectionPercent;
|
235 | }
|
236 | getSuccessRateEjectionConfig(): SuccessRateEjectionConfig | null {
|
237 | return this.successRateEjection;
|
238 | }
|
239 | getFailurePercentageEjectionConfig(): FailurePercentageEjectionConfig | null {
|
240 | return this.failurePercentageEjection;
|
241 | }
|
242 | getChildPolicy(): TypedLoadBalancingConfig {
|
243 | return this.childPolicy;
|
244 | }
|
245 |
|
246 | static createFromJson(obj: any): OutlierDetectionLoadBalancingConfig {
|
247 | validatePositiveDuration(obj, 'interval');
|
248 | validatePositiveDuration(obj, 'base_ejection_time');
|
249 | validatePositiveDuration(obj, 'max_ejection_time');
|
250 | validatePercentage(obj, 'max_ejection_percent');
|
251 | if (
|
252 | 'success_rate_ejection' in obj &&
|
253 | obj.success_rate_ejection !== undefined
|
254 | ) {
|
255 | if (typeof obj.success_rate_ejection !== 'object') {
|
256 | throw new Error(
|
257 | 'outlier detection config success_rate_ejection must be an object'
|
258 | );
|
259 | }
|
260 | validateFieldType(
|
261 | obj.success_rate_ejection,
|
262 | 'stdev_factor',
|
263 | 'number',
|
264 | 'success_rate_ejection'
|
265 | );
|
266 | validatePercentage(
|
267 | obj.success_rate_ejection,
|
268 | 'enforcement_percentage',
|
269 | 'success_rate_ejection'
|
270 | );
|
271 | validateFieldType(
|
272 | obj.success_rate_ejection,
|
273 | 'minimum_hosts',
|
274 | 'number',
|
275 | 'success_rate_ejection'
|
276 | );
|
277 | validateFieldType(
|
278 | obj.success_rate_ejection,
|
279 | 'request_volume',
|
280 | 'number',
|
281 | 'success_rate_ejection'
|
282 | );
|
283 | }
|
284 | if (
|
285 | 'failure_percentage_ejection' in obj &&
|
286 | obj.failure_percentage_ejection !== undefined
|
287 | ) {
|
288 | if (typeof obj.failure_percentage_ejection !== 'object') {
|
289 | throw new Error(
|
290 | 'outlier detection config failure_percentage_ejection must be an object'
|
291 | );
|
292 | }
|
293 | validatePercentage(
|
294 | obj.failure_percentage_ejection,
|
295 | 'threshold',
|
296 | 'failure_percentage_ejection'
|
297 | );
|
298 | validatePercentage(
|
299 | obj.failure_percentage_ejection,
|
300 | 'enforcement_percentage',
|
301 | 'failure_percentage_ejection'
|
302 | );
|
303 | validateFieldType(
|
304 | obj.failure_percentage_ejection,
|
305 | 'minimum_hosts',
|
306 | 'number',
|
307 | 'failure_percentage_ejection'
|
308 | );
|
309 | validateFieldType(
|
310 | obj.failure_percentage_ejection,
|
311 | 'request_volume',
|
312 | 'number',
|
313 | 'failure_percentage_ejection'
|
314 | );
|
315 | }
|
316 |
|
317 | if (!('child_policy' in obj) || !Array.isArray(obj.child_policy)) {
|
318 | throw new Error('outlier detection config child_policy must be an array');
|
319 | }
|
320 | const childPolicy = selectLbConfigFromList(obj.child_policy);
|
321 | if (!childPolicy) {
|
322 | throw new Error(
|
323 | 'outlier detection config child_policy: no valid recognized policy found'
|
324 | );
|
325 | }
|
326 |
|
327 | return new OutlierDetectionLoadBalancingConfig(
|
328 | obj.interval ? durationToMs(obj.interval) : null,
|
329 | obj.base_ejection_time ? durationToMs(obj.base_ejection_time) : null,
|
330 | obj.max_ejection_time ? durationToMs(obj.max_ejection_time) : null,
|
331 | obj.max_ejection_percent ?? null,
|
332 | obj.success_rate_ejection,
|
333 | obj.failure_percentage_ejection,
|
334 | childPolicy
|
335 | );
|
336 | }
|
337 | }
|
338 |
|
339 | class OutlierDetectionSubchannelWrapper
|
340 | extends BaseSubchannelWrapper
|
341 | implements SubchannelInterface
|
342 | {
|
343 | private refCount = 0;
|
344 | constructor(
|
345 | childSubchannel: SubchannelInterface,
|
346 | private mapEntry?: MapEntry
|
347 | ) {
|
348 | super(childSubchannel);
|
349 | }
|
350 |
|
351 | ref() {
|
352 | this.child.ref();
|
353 | this.refCount += 1;
|
354 | }
|
355 |
|
356 | unref() {
|
357 | this.child.unref();
|
358 | this.refCount -= 1;
|
359 | if (this.refCount <= 0) {
|
360 | if (this.mapEntry) {
|
361 | const index = this.mapEntry.subchannelWrappers.indexOf(this);
|
362 | if (index >= 0) {
|
363 | this.mapEntry.subchannelWrappers.splice(index, 1);
|
364 | }
|
365 | }
|
366 | }
|
367 | }
|
368 |
|
369 | eject() {
|
370 | this.setHealthy(false);
|
371 | }
|
372 |
|
373 | uneject() {
|
374 | this.setHealthy(true);
|
375 | }
|
376 |
|
377 | getMapEntry(): MapEntry | undefined {
|
378 | return this.mapEntry;
|
379 | }
|
380 |
|
381 | getWrappedSubchannel(): SubchannelInterface {
|
382 | return this.child;
|
383 | }
|
384 | }
|
385 |
|
386 | interface CallCountBucket {
|
387 | success: number;
|
388 | failure: number;
|
389 | }
|
390 |
|
391 | function createEmptyBucket(): CallCountBucket {
|
392 | return {
|
393 | success: 0,
|
394 | failure: 0,
|
395 | };
|
396 | }
|
397 |
|
398 | class CallCounter {
|
399 | private activeBucket: CallCountBucket = createEmptyBucket();
|
400 | private inactiveBucket: CallCountBucket = createEmptyBucket();
|
401 | addSuccess() {
|
402 | this.activeBucket.success += 1;
|
403 | }
|
404 | addFailure() {
|
405 | this.activeBucket.failure += 1;
|
406 | }
|
407 | switchBuckets() {
|
408 | this.inactiveBucket = this.activeBucket;
|
409 | this.activeBucket = createEmptyBucket();
|
410 | }
|
411 | getLastSuccesses() {
|
412 | return this.inactiveBucket.success;
|
413 | }
|
414 | getLastFailures() {
|
415 | return this.inactiveBucket.failure;
|
416 | }
|
417 | }
|
418 |
|
419 | class OutlierDetectionPicker implements Picker {
|
420 | constructor(private wrappedPicker: Picker, private countCalls: boolean) {}
|
421 | pick(pickArgs: PickArgs): PickResult {
|
422 | const wrappedPick = this.wrappedPicker.pick(pickArgs);
|
423 | if (wrappedPick.pickResultType === PickResultType.COMPLETE) {
|
424 | const subchannelWrapper =
|
425 | wrappedPick.subchannel as OutlierDetectionSubchannelWrapper;
|
426 | const mapEntry = subchannelWrapper.getMapEntry();
|
427 | if (mapEntry) {
|
428 | let onCallEnded = wrappedPick.onCallEnded;
|
429 | if (this.countCalls) {
|
430 | onCallEnded = statusCode => {
|
431 | if (statusCode === Status.OK) {
|
432 | mapEntry.counter.addSuccess();
|
433 | } else {
|
434 | mapEntry.counter.addFailure();
|
435 | }
|
436 | wrappedPick.onCallEnded?.(statusCode);
|
437 | };
|
438 | }
|
439 | return {
|
440 | ...wrappedPick,
|
441 | subchannel: subchannelWrapper.getWrappedSubchannel(),
|
442 | onCallEnded: onCallEnded,
|
443 | };
|
444 | } else {
|
445 | return {
|
446 | ...wrappedPick,
|
447 | subchannel: subchannelWrapper.getWrappedSubchannel(),
|
448 | };
|
449 | }
|
450 | } else {
|
451 | return wrappedPick;
|
452 | }
|
453 | }
|
454 | }
|
455 |
|
456 | interface MapEntry {
|
457 | counter: CallCounter;
|
458 | currentEjectionTimestamp: Date | null;
|
459 | ejectionTimeMultiplier: number;
|
460 | subchannelWrappers: OutlierDetectionSubchannelWrapper[];
|
461 | }
|
462 |
|
463 | export class OutlierDetectionLoadBalancer implements LoadBalancer {
|
464 | private childBalancer: ChildLoadBalancerHandler;
|
465 | private entryMap = new EndpointMap<MapEntry>();
|
466 | private latestConfig: OutlierDetectionLoadBalancingConfig | null = null;
|
467 | private ejectionTimer: NodeJS.Timeout;
|
468 | private timerStartTime: Date | null = null;
|
469 |
|
470 | constructor(
|
471 | channelControlHelper: ChannelControlHelper,
|
472 | options: ChannelOptions
|
473 | ) {
|
474 | this.childBalancer = new ChildLoadBalancerHandler(
|
475 | createChildChannelControlHelper(channelControlHelper, {
|
476 | createSubchannel: (
|
477 | subchannelAddress: SubchannelAddress,
|
478 | subchannelArgs: ChannelOptions
|
479 | ) => {
|
480 | const originalSubchannel = channelControlHelper.createSubchannel(
|
481 | subchannelAddress,
|
482 | subchannelArgs
|
483 | );
|
484 | const mapEntry =
|
485 | this.entryMap.getForSubchannelAddress(subchannelAddress);
|
486 | const subchannelWrapper = new OutlierDetectionSubchannelWrapper(
|
487 | originalSubchannel,
|
488 | mapEntry
|
489 | );
|
490 | if (mapEntry?.currentEjectionTimestamp !== null) {
|
491 |
|
492 | subchannelWrapper.eject();
|
493 | }
|
494 | mapEntry?.subchannelWrappers.push(subchannelWrapper);
|
495 | return subchannelWrapper;
|
496 | },
|
497 | updateState: (connectivityState: ConnectivityState, picker: Picker) => {
|
498 | if (connectivityState === ConnectivityState.READY) {
|
499 | channelControlHelper.updateState(
|
500 | connectivityState,
|
501 | new OutlierDetectionPicker(picker, this.isCountingEnabled())
|
502 | );
|
503 | } else {
|
504 | channelControlHelper.updateState(connectivityState, picker);
|
505 | }
|
506 | },
|
507 | }),
|
508 | options
|
509 | );
|
510 | this.ejectionTimer = setInterval(() => {}, 0);
|
511 | clearInterval(this.ejectionTimer);
|
512 | }
|
513 |
|
514 | private isCountingEnabled(): boolean {
|
515 | return (
|
516 | this.latestConfig !== null &&
|
517 | (this.latestConfig.getSuccessRateEjectionConfig() !== null ||
|
518 | this.latestConfig.getFailurePercentageEjectionConfig() !== null)
|
519 | );
|
520 | }
|
521 |
|
522 | private getCurrentEjectionPercent() {
|
523 | let ejectionCount = 0;
|
524 | for (const mapEntry of this.entryMap.values()) {
|
525 | if (mapEntry.currentEjectionTimestamp !== null) {
|
526 | ejectionCount += 1;
|
527 | }
|
528 | }
|
529 | return (ejectionCount * 100) / this.entryMap.size;
|
530 | }
|
531 |
|
532 | private runSuccessRateCheck(ejectionTimestamp: Date) {
|
533 | if (!this.latestConfig) {
|
534 | return;
|
535 | }
|
536 | const successRateConfig = this.latestConfig.getSuccessRateEjectionConfig();
|
537 | if (!successRateConfig) {
|
538 | return;
|
539 | }
|
540 | trace('Running success rate check');
|
541 |
|
542 | const targetRequestVolume = successRateConfig.request_volume;
|
543 | let addresesWithTargetVolume = 0;
|
544 | const successRates: number[] = [];
|
545 | for (const [endpoint, mapEntry] of this.entryMap.entries()) {
|
546 | const successes = mapEntry.counter.getLastSuccesses();
|
547 | const failures = mapEntry.counter.getLastFailures();
|
548 | trace(
|
549 | 'Stats for ' +
|
550 | endpointToString(endpoint) +
|
551 | ': successes=' +
|
552 | successes +
|
553 | ' failures=' +
|
554 | failures +
|
555 | ' targetRequestVolume=' +
|
556 | targetRequestVolume
|
557 | );
|
558 | if (successes + failures >= targetRequestVolume) {
|
559 | addresesWithTargetVolume += 1;
|
560 | successRates.push(successes / (successes + failures));
|
561 | }
|
562 | }
|
563 | trace(
|
564 | 'Found ' +
|
565 | addresesWithTargetVolume +
|
566 | ' success rate candidates; currentEjectionPercent=' +
|
567 | this.getCurrentEjectionPercent() +
|
568 | ' successRates=[' +
|
569 | successRates +
|
570 | ']'
|
571 | );
|
572 | if (addresesWithTargetVolume < successRateConfig.minimum_hosts) {
|
573 | return;
|
574 | }
|
575 |
|
576 |
|
577 | const successRateMean =
|
578 | successRates.reduce((a, b) => a + b) / successRates.length;
|
579 | let successRateDeviationSum = 0;
|
580 | for (const rate of successRates) {
|
581 | const deviation = rate - successRateMean;
|
582 | successRateDeviationSum += deviation * deviation;
|
583 | }
|
584 | const successRateVariance = successRateDeviationSum / successRates.length;
|
585 | const successRateStdev = Math.sqrt(successRateVariance);
|
586 | const ejectionThreshold =
|
587 | successRateMean -
|
588 | successRateStdev * (successRateConfig.stdev_factor / 1000);
|
589 | trace(
|
590 | 'stdev=' + successRateStdev + ' ejectionThreshold=' + ejectionThreshold
|
591 | );
|
592 |
|
593 |
|
594 | for (const [address, mapEntry] of this.entryMap.entries()) {
|
595 |
|
596 | if (
|
597 | this.getCurrentEjectionPercent() >=
|
598 | this.latestConfig.getMaxEjectionPercent()
|
599 | ) {
|
600 | break;
|
601 | }
|
602 |
|
603 | const successes = mapEntry.counter.getLastSuccesses();
|
604 | const failures = mapEntry.counter.getLastFailures();
|
605 | if (successes + failures < targetRequestVolume) {
|
606 | continue;
|
607 | }
|
608 |
|
609 | const successRate = successes / (successes + failures);
|
610 | trace('Checking candidate ' + address + ' successRate=' + successRate);
|
611 | if (successRate < ejectionThreshold) {
|
612 | const randomNumber = Math.random() * 100;
|
613 | trace(
|
614 | 'Candidate ' +
|
615 | address +
|
616 | ' randomNumber=' +
|
617 | randomNumber +
|
618 | ' enforcement_percentage=' +
|
619 | successRateConfig.enforcement_percentage
|
620 | );
|
621 | if (randomNumber < successRateConfig.enforcement_percentage) {
|
622 | trace('Ejecting candidate ' + address);
|
623 | this.eject(mapEntry, ejectionTimestamp);
|
624 | }
|
625 | }
|
626 | }
|
627 | }
|
628 |
|
629 | private runFailurePercentageCheck(ejectionTimestamp: Date) {
|
630 | if (!this.latestConfig) {
|
631 | return;
|
632 | }
|
633 | const failurePercentageConfig =
|
634 | this.latestConfig.getFailurePercentageEjectionConfig();
|
635 | if (!failurePercentageConfig) {
|
636 | return;
|
637 | }
|
638 | trace(
|
639 | 'Running failure percentage check. threshold=' +
|
640 | failurePercentageConfig.threshold +
|
641 | ' request volume threshold=' +
|
642 | failurePercentageConfig.request_volume
|
643 | );
|
644 |
|
645 | let addressesWithTargetVolume = 0;
|
646 | for (const mapEntry of this.entryMap.values()) {
|
647 | const successes = mapEntry.counter.getLastSuccesses();
|
648 | const failures = mapEntry.counter.getLastFailures();
|
649 | if (successes + failures >= failurePercentageConfig.request_volume) {
|
650 | addressesWithTargetVolume += 1;
|
651 | }
|
652 | }
|
653 | if (addressesWithTargetVolume < failurePercentageConfig.minimum_hosts) {
|
654 | return;
|
655 | }
|
656 |
|
657 |
|
658 | for (const [address, mapEntry] of this.entryMap.entries()) {
|
659 |
|
660 | if (
|
661 | this.getCurrentEjectionPercent() >=
|
662 | this.latestConfig.getMaxEjectionPercent()
|
663 | ) {
|
664 | break;
|
665 | }
|
666 |
|
667 | const successes = mapEntry.counter.getLastSuccesses();
|
668 | const failures = mapEntry.counter.getLastFailures();
|
669 | trace('Candidate successes=' + successes + ' failures=' + failures);
|
670 | if (successes + failures < failurePercentageConfig.request_volume) {
|
671 | continue;
|
672 | }
|
673 |
|
674 | const failurePercentage = (failures * 100) / (failures + successes);
|
675 | if (failurePercentage > failurePercentageConfig.threshold) {
|
676 | const randomNumber = Math.random() * 100;
|
677 | trace(
|
678 | 'Candidate ' +
|
679 | address +
|
680 | ' randomNumber=' +
|
681 | randomNumber +
|
682 | ' enforcement_percentage=' +
|
683 | failurePercentageConfig.enforcement_percentage
|
684 | );
|
685 | if (randomNumber < failurePercentageConfig.enforcement_percentage) {
|
686 | trace('Ejecting candidate ' + address);
|
687 | this.eject(mapEntry, ejectionTimestamp);
|
688 | }
|
689 | }
|
690 | }
|
691 | }
|
692 |
|
693 | private eject(mapEntry: MapEntry, ejectionTimestamp: Date) {
|
694 | mapEntry.currentEjectionTimestamp = new Date();
|
695 | mapEntry.ejectionTimeMultiplier += 1;
|
696 | for (const subchannelWrapper of mapEntry.subchannelWrappers) {
|
697 | subchannelWrapper.eject();
|
698 | }
|
699 | }
|
700 |
|
701 | private uneject(mapEntry: MapEntry) {
|
702 | mapEntry.currentEjectionTimestamp = null;
|
703 | for (const subchannelWrapper of mapEntry.subchannelWrappers) {
|
704 | subchannelWrapper.uneject();
|
705 | }
|
706 | }
|
707 |
|
708 | private switchAllBuckets() {
|
709 | for (const mapEntry of this.entryMap.values()) {
|
710 | mapEntry.counter.switchBuckets();
|
711 | }
|
712 | }
|
713 |
|
714 | private startTimer(delayMs: number) {
|
715 | this.ejectionTimer = setTimeout(() => this.runChecks(), delayMs);
|
716 | this.ejectionTimer.unref?.();
|
717 | }
|
718 |
|
719 | private runChecks() {
|
720 | const ejectionTimestamp = new Date();
|
721 | trace('Ejection timer running');
|
722 |
|
723 | this.switchAllBuckets();
|
724 |
|
725 | if (!this.latestConfig) {
|
726 | return;
|
727 | }
|
728 | this.timerStartTime = ejectionTimestamp;
|
729 | this.startTimer(this.latestConfig.getIntervalMs());
|
730 |
|
731 | this.runSuccessRateCheck(ejectionTimestamp);
|
732 | this.runFailurePercentageCheck(ejectionTimestamp);
|
733 |
|
734 | for (const [address, mapEntry] of this.entryMap.entries()) {
|
735 | if (mapEntry.currentEjectionTimestamp === null) {
|
736 | if (mapEntry.ejectionTimeMultiplier > 0) {
|
737 | mapEntry.ejectionTimeMultiplier -= 1;
|
738 | }
|
739 | } else {
|
740 | const baseEjectionTimeMs = this.latestConfig.getBaseEjectionTimeMs();
|
741 | const maxEjectionTimeMs = this.latestConfig.getMaxEjectionTimeMs();
|
742 | const returnTime = new Date(
|
743 | mapEntry.currentEjectionTimestamp.getTime()
|
744 | );
|
745 | returnTime.setMilliseconds(
|
746 | returnTime.getMilliseconds() +
|
747 | Math.min(
|
748 | baseEjectionTimeMs * mapEntry.ejectionTimeMultiplier,
|
749 | Math.max(baseEjectionTimeMs, maxEjectionTimeMs)
|
750 | )
|
751 | );
|
752 | if (returnTime < new Date()) {
|
753 | trace('Unejecting ' + address);
|
754 | this.uneject(mapEntry);
|
755 | }
|
756 | }
|
757 | }
|
758 | }
|
759 |
|
760 | updateAddressList(
|
761 | endpointList: Endpoint[],
|
762 | lbConfig: TypedLoadBalancingConfig,
|
763 | attributes: { [key: string]: unknown }
|
764 | ): void {
|
765 | if (!(lbConfig instanceof OutlierDetectionLoadBalancingConfig)) {
|
766 | return;
|
767 | }
|
768 | for (const endpoint of endpointList) {
|
769 | if (!this.entryMap.has(endpoint)) {
|
770 | trace('Adding map entry for ' + endpointToString(endpoint));
|
771 | this.entryMap.set(endpoint, {
|
772 | counter: new CallCounter(),
|
773 | currentEjectionTimestamp: null,
|
774 | ejectionTimeMultiplier: 0,
|
775 | subchannelWrappers: [],
|
776 | });
|
777 | }
|
778 | }
|
779 | this.entryMap.deleteMissing(endpointList);
|
780 | const childPolicy = lbConfig.getChildPolicy();
|
781 | this.childBalancer.updateAddressList(endpointList, childPolicy, attributes);
|
782 |
|
783 | if (
|
784 | lbConfig.getSuccessRateEjectionConfig() ||
|
785 | lbConfig.getFailurePercentageEjectionConfig()
|
786 | ) {
|
787 | if (this.timerStartTime) {
|
788 | trace('Previous timer existed. Replacing timer');
|
789 | clearTimeout(this.ejectionTimer);
|
790 | const remainingDelay =
|
791 | lbConfig.getIntervalMs() -
|
792 | (new Date().getTime() - this.timerStartTime.getTime());
|
793 | this.startTimer(remainingDelay);
|
794 | } else {
|
795 | trace('Starting new timer');
|
796 | this.timerStartTime = new Date();
|
797 | this.startTimer(lbConfig.getIntervalMs());
|
798 | this.switchAllBuckets();
|
799 | }
|
800 | } else {
|
801 | trace('Counting disabled. Cancelling timer.');
|
802 | this.timerStartTime = null;
|
803 | clearTimeout(this.ejectionTimer);
|
804 | for (const mapEntry of this.entryMap.values()) {
|
805 | this.uneject(mapEntry);
|
806 | mapEntry.ejectionTimeMultiplier = 0;
|
807 | }
|
808 | }
|
809 |
|
810 | this.latestConfig = lbConfig;
|
811 | }
|
812 | exitIdle(): void {
|
813 | this.childBalancer.exitIdle();
|
814 | }
|
815 | resetBackoff(): void {
|
816 | this.childBalancer.resetBackoff();
|
817 | }
|
818 | destroy(): void {
|
819 | clearTimeout(this.ejectionTimer);
|
820 | this.childBalancer.destroy();
|
821 | }
|
822 | getTypeName(): string {
|
823 | return TYPE_NAME;
|
824 | }
|
825 | }
|
826 |
|
827 | export function setup() {
|
828 | if (OUTLIER_DETECTION_ENABLED) {
|
829 | registerLoadBalancerType(
|
830 | TYPE_NAME,
|
831 | OutlierDetectionLoadBalancer,
|
832 | OutlierDetectionLoadBalancingConfig
|
833 | );
|
834 | }
|
835 | }
|