Getting the data
load.data = function (name, has.issue) {
return(list(
gc=read.csv(paste0(name, '/', name, '-gcevents.csv'), strip.white=T),
stat=read.csv(paste0(name, '/', name, '-processstat.csv'), strip.white=T)
))
};
dat.sample.1 = load.data('10145.clinic-doctor')
dat.sample.2 = load.data('12318.clinic-doctor')
dat.sample.3 = load.data('3139.clinic-doctor')
print.data.table(dat.sample.1$gc)
| 0 |
1.51188e+12 |
1.51188e+12 |
SCAVENGE |
| 0 |
1.51188e+12 |
1.51188e+12 |
SCAVENGE |
| 0 |
1.51188e+12 |
1.51188e+12 |
SCAVENGE |
| 0 |
1.51188e+12 |
1.51188e+12 |
SCAVENGE |
| 0 |
1.51188e+12 |
1.51188e+12 |
SCAVENGE |
| 0 |
1.51188e+12 |
1.51188e+12 |
INCREMENTAL_MARKING |
print.data.table(dat.sample.1$stat)
| 1.51188e+12 |
0 |
167.104022 |
1.1124310 |
44048384 |
25067520 |
18032920 |
408161 |
3 |
| 1.51188e+12 |
0 |
0.764206 |
0.2614220 |
44142592 |
25067520 |
18121384 |
408350 |
3 |
| 1.51188e+12 |
0 |
2.871670 |
0.0258708 |
44146688 |
25067520 |
18129704 |
408419 |
3 |
| 1.51188e+12 |
0 |
0.255304 |
0.0182345 |
44146688 |
25067520 |
18134104 |
408488 |
3 |
| 1.51188e+12 |
0 |
1.186313 |
0.0172532 |
44146688 |
25067520 |
18138376 |
408557 |
3 |
| 1.51188e+12 |
0 |
1.871463 |
0.0173525 |
44146688 |
25067520 |
18142648 |
408626 |
3 |
subset.interval = function (dat) {
dat.gc = dat$gc[dat$gc$interval == 1, ]
dat.stat = dat$stat[dat$stat$interval == 1, ]
offset = dat.stat[1, 'timestamp']
dat.stat$time = as.POSIXct((dat.stat$timestamp - offset) / 1000, origin="1970-01-01", tz="GMT")
dat.gc$startTime = as.POSIXct((dat.gc$startTimestamp - offset) / 1000, origin="1970-01-01", tz="GMT")
dat.gc$endTime = as.POSIXct((dat.gc$endTimestamp - offset) / 1000, origin="1970-01-01", tz="GMT")
return(list(
gc=dat.gc,
stat=dat.stat
))
}
dat.sample.1 = subset.interval(dat.sample.1)
dat.sample.2 = subset.interval(dat.sample.2)
dat.sample.3 = subset.interval(dat.sample.2)
print.data.table(dat.sample.3$gc)
| 10 |
1 |
1.511966e+12 |
1.511966e+12 |
SCAVENGE |
| 11 |
1 |
1.511966e+12 |
1.511966e+12 |
INCREMENTAL_MARKING |
| 12 |
1 |
1.511966e+12 |
1.511966e+12 |
MARK_SWEEP_COMPACT |
| 13 |
1 |
1.511966e+12 |
1.511966e+12 |
SCAVENGE |
| 14 |
1 |
1.511966e+12 |
1.511966e+12 |
SCAVENGE |
| 15 |
1 |
1.511966e+12 |
1.511966e+12 |
SCAVENGE |
print.data.table(dat.sample.3$stat)
| 113 |
1.511966e+12 |
1 |
29.262148 |
1.4371603 |
59179008 |
37085184 |
24149248 |
58063 |
871 |
| 114 |
1.511966e+12 |
1 |
19.561119 |
1.3971054 |
61825024 |
55959552 |
24500528 |
57963 |
999 |
| 115 |
1.511966e+12 |
1 |
44.112346 |
1.3991631 |
62373888 |
59629568 |
26785712 |
58063 |
1127 |
| 116 |
1.511966e+12 |
1 |
16.410012 |
1.4713359 |
62566400 |
59629568 |
29982552 |
58193 |
1200 |
| 117 |
1.511966e+12 |
1 |
4.488328 |
0.9864492 |
63397888 |
59629568 |
32472776 |
58263 |
1256 |
| 118 |
1.511966e+12 |
1 |
4.557432 |
0.9692644 |
65548288 |
59629568 |
34636368 |
58333 |
1265 |
Plot data
dat = melt(dat.sample.3$stat, id.vars=c("time", "timestamp", "interval"))
p = ggplot(dat)
p = p + geom_rect(data = dat.sample.3$gc, aes(xmin=startTime, xmax=endTime, ymin=-Inf, ymax=Inf, fill=type), alpha=0.8)
p = p + geom_line(aes(x = time, y = value))
p = p + facet_grid(variable ~ ., scales='free_y')
p = p + scale_x_datetime(labels = date_format("%S sec"))
p = p + scale_y_continuous(limits = c(0, NA))
p = p + theme(legend.position="bottom")
print(p)

Model hypothesis
If delay and MARK_SWEEP_COMPACT are correlated.
Model
dat.annotate.msc = function (dat) {
msc = dat$gc[dat$gc$type == 'MARK_SWEEP_COMPACT', ]
annotate = rep(F, nrow(dat$stat))
for(i in 1:nrow(msc)) {
intervalStart = msc[i, 'startTimestamp']
intervalEnd = msc[i, 'endTimestamp'] + 20
annotate = annotate | (intervalStart <= dat$stat$timestamp & dat$stat$timestamp <= intervalEnd)
}
dat.stat = data.frame(dat$stat)
dat.stat$msc = annotate
return(list(
gc=dat$gc,
stat=dat.stat
))
}
dat.sample.3 = dat.annotate.msc(dat.sample.3)
p = ggplot(dat.sample.3$stat, aes(x = time, y = delay, colour=msc))
p = p + geom_point()
p = p + scale_x_datetime(labels = date_format("%S sec"))
p = p + scale_y_continuous(limits = c(0, NA))
p = p + theme(legend.position="bottom")
print(p)

analysis.msc.delay = function (dat) {
dat.stat = dat$stat
return(data.frame(list(
msc = c(T, F),
median = c(
median(dat.stat[dat.stat$msc == T, 'delay']),
median(dat.stat[dat.stat$msc == F, 'delay'])
),
max = c(
max(dat.stat[dat.stat$msc == T, 'delay']),
max(dat.stat[dat.stat$msc == F, 'delay'])
)
)))
}
kable(analysis.msc.delay(dat.sample.3))
| TRUE |
230.559838 |
807.8275 |
| FALSE |
3.332706 |
356.5879 |