Skip to content

Commit 01b488f

Browse files
committed
Provide generalized aggregation
This implements a generalized method for aggregating time-series data. Data can be aggregated over week or month intervals with a variety of aggregation methods to choose from. This will be useful for providing chart views at different levels (such as two-year periods vs. just showing the last month). Additionally, the generalized form of aggregation can be used to smooth out graphs where the sampling frequency changed with an update to Hubble Enterprise. The aggregation is done by splitting the time data into subsequent, gapless periods of time (weeks starting with Mondays or months), for each of which the aggregated values are then computed and returned. Aggregation methods define how to aggregate the values within individual time periods. The following aggregation methods are supported: - sum - mean - min - max - first (the chronologically first available value for that period) - last - median Additionally, periods at the beginning or the end of the time series may or may not be included if they are incomplete (there isn’t data for each day in the period). This is controlled by the setting includeIncomplete, which supports the following values: - none - start (includes an incomplete period at the beginning of the series) - end - both Finally, the pull request usage chart is changed to make use of the new aggregation facilities to reduce the granularity from daily to monthly data for now. This might be changed when we implement detail views. I also added several unit tests to check the aggregation methods (for off-by-one errors in particular).
1 parent 12d7c13 commit 01b488f

File tree

5 files changed

+276
-41
lines changed

5 files changed

+276
-41
lines changed

docs/_layouts/default.html

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@
2727
<script src="{{ site.baseurl }}/assets/js/vendor/moment-with-locales.min.js"></script>
2828
<script src="{{ site.baseurl }}/assets/js/vendor/Chart-2.7.1.min.js"></script>
2929
<script src="{{ site.baseurl }}/assets/js/vendor/spin-2.3.2.min.js"></script>
30-
<script src="{{ site.baseurl }}/assets/js/charts.js?version=1ff0187"></script>
30+
<script src="{{ site.baseurl }}/assets/js/charts.js?version=e7e9c5a"></script>
3131
</head>
3232
<body>
3333
<section class="page-header">

docs/assets/js/charts.js

Lines changed: 113 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -142,6 +142,116 @@ function createSpinner(canvas)
142142
};
143143
}
144144

145+
function aggregateTimeData(data, aggregationConfig)
146+
{
147+
if (!(data instanceof Array))
148+
throw 'expected data array as input';
149+
150+
if (data.length < 1)
151+
return;
152+
153+
// Turn date strings into proper date objects
154+
for (let i = 0; i < data.length; i++)
155+
data[i]['date'] = d3.isoParse(data[i]['date']);
156+
157+
// Sort data, just in case it isn’t already
158+
data.sort((row1, row2) => row1['date'] - row2['date']);
159+
160+
const dateStart = data[0]['date'];
161+
// Ranges are exclusive, so add one more day to include the last date
162+
const dateEnd = d3.utcDay.offset(data[data.length - 1]['date'], 1);
163+
164+
let period;
165+
166+
switch (aggregationConfig['period'])
167+
{
168+
case 'week':
169+
period = d3.utcMonday;
170+
break;
171+
case 'month':
172+
period = d3.utcMonth;
173+
break;
174+
default:
175+
throw 'unknown aggregation period "' + aggregationConfig['period'] + '"';
176+
}
177+
178+
let t0, t1;
179+
180+
if (['start', 'both'].includes(aggregationConfig['includeIncomplete']))
181+
t0 = period.floor(dateStart);
182+
else
183+
t0 = period.ceil(dateStart);
184+
185+
if (['end', 'both'].includes(aggregationConfig['includeIncomplete']))
186+
t1 = period.ceil(dateEnd);
187+
else
188+
t1 = period.floor(dateEnd);
189+
190+
// In d3, ranges include the start value but exclude the end value
191+
// We want to include the last period as well, so add one more period
192+
t1 = period.offset(t1, 1);
193+
const periods = period.range(t0, t1);
194+
195+
let aggregatedData = Array();
196+
197+
for (let i = 0; i < periods.length - 1; i++)
198+
{
199+
const t0 = periods[i];
200+
const t1 = periods[i + 1];
201+
202+
const dates = data.filter(row => row['date'] >= t0 && row['date'] < t1);
203+
204+
let row = Object();
205+
row['date'] = t0;
206+
207+
$.each(Object.keys(data[0]),
208+
function(keyID, key)
209+
{
210+
if (key == 'date')
211+
return;
212+
213+
if (dates.length == 0)
214+
{
215+
row[key] = undefined;
216+
return;
217+
}
218+
219+
const accessor = (row => row[key]);
220+
221+
switch (aggregationConfig['method'])
222+
{
223+
case 'sum':
224+
row[key] = d3.sum(dates, accessor);
225+
break;
226+
case 'mean':
227+
row[key] = d3.mean(dates, accessor);
228+
break;
229+
case 'median':
230+
row[key] = d3.median(dates, accessor);
231+
break;
232+
case 'first':
233+
row[key] = dates[0][key];
234+
break;
235+
case 'last':
236+
row[key] = dates[dates.length - 1][key];
237+
break;
238+
case 'min':
239+
row[key] = d3.min(dates, accessor);
240+
break;
241+
case 'max':
242+
row[key] = d3.max(dates, accessor);
243+
break;
244+
default:
245+
throw 'unknown aggregation method "' + aggregationConfig['method'] + '"';
246+
}
247+
});
248+
249+
aggregatedData.push(row);
250+
}
251+
252+
return aggregatedData;
253+
}
254+
145255
function createHistoryChart(canvas)
146256
{
147257
const url = $(canvas).data('url');
@@ -169,47 +279,12 @@ function createHistoryChart(canvas)
169279

170280
const context = canvas.getContext('2d');
171281

172-
if (readConfig($(canvas), 'aggregate') == 'weekly')
173-
{
174-
let aggregatedData = Array();
175-
data.sort(
176-
function(row1, row2)
177-
{
178-
let date1 = new Date(row1['date']);
179-
let date2 = new Date(row2['date']);
180-
return date1 - date2;
181-
});
182-
183-
let currentRow = Object();
184-
185-
for (let i = 0; i < data.length; i++)
186-
{
187-
if (i % 7 == 0)
188-
$.each(Object.keys(data[i]).slice(1),
189-
function(keyID, key)
190-
{
191-
currentRow[key] = 0;
192-
});
193-
194-
currentRow['date'] = data[i]['date'];
195-
196-
$.each(Object.keys(data[i]).slice(1),
197-
function(keyID, key)
198-
{
199-
currentRow[key] += data[i][key];
200-
});
201-
202-
if (i % 7 == 6)
203-
// Store a copy of the aggregated data
204-
aggregatedData.push($.extend({}, currentRow));
205-
}
206-
207-
data = aggregatedData;
208-
}
209-
210282
if (hasConfig($(canvas), 'sliceData'))
211283
data = data.slice(readConfig($(canvas), 'sliceData')[0], readConfig($(canvas), 'sliceData')[1]);
212284

285+
if (hasConfig($(canvas), 'aggregate'))
286+
data = aggregateTimeData(data, $(canvas).data('config').aggregate);
287+
213288
const originalDataSeries = Object.keys(data[0]).slice(1);
214289

215290
const dataSeries = hasConfig($(canvas), 'series')

docs/pr-total.html

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,11 @@ <h3>Pull Requests (Total, by Week)</h3>
1717
"visibleSeries": [
1818
"merged"
1919
],
20-
"aggregate": "weekly"
20+
"aggregate": {
21+
"period": "week",
22+
"method": "sum",
23+
"includeIncomplete": "both"
24+
}
2125
}'></canvas>
2226
<div class="info-box">
2327
<p>

docs/pr-usage.html

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ <h3>Pull Request Usage</h3>
99
<canvas
1010
data-url="{{ site.dataURL }}/pull-request-usage.tsv"
1111
data-type="history"
12+
data-config='{"aggregate": {"period": "month", "method": "first", "includeIncomplete": "both"}}'
1213
></canvas>
1314
<div class="info-box">
1415
<p>

docs/spec/charts.js

Lines changed: 156 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,12 @@
1-
/* global createChordChart, createHistoryChart, createList, createTable, createSpinner */
1+
/* global
2+
aggregateTimeData,
3+
createChordChart,
4+
createHistoryChart,
5+
createList,
6+
createTable,
7+
createSpinner,
8+
d3,
9+
*/
210

311
describe('global charts.js', function()
412
{
@@ -62,4 +70,151 @@ describe('global charts.js', function()
6270
});
6371
});
6472
});
73+
describe('aggregation for time series', function()
74+
{
75+
// Generate data from startDate to endDate (both inclusive) with a generator functor
76+
function generateData(startDate, endDate, generator)
77+
{
78+
let dates = d3.utcDay.range(d3.isoParse(startDate), d3.utcDay.offset(d3.isoParse(endDate), 1));
79+
let data = Array();
80+
81+
for (let i = 0; i < dates.length; i++)
82+
data.push({'date': dates[i], 'value': generator(i)});
83+
84+
return data;
85+
}
86+
87+
// Integer range generator
88+
function integerRangeGenerator(start, modulo)
89+
{
90+
if (modulo)
91+
return (i => (start + i) % modulo);
92+
93+
return (i => start + i);
94+
}
95+
96+
const dateToString = d3.utcFormat('%Y-%m-%d');
97+
98+
it('should aggregate over weeks correctly', function()
99+
{
100+
const aggregationConfig = {'period': 'week', 'method': 'max', 'includeIncomplete': 'both'};
101+
const generator = integerRangeGenerator(0, 28);
102+
// 2018-01-01 is a Monday, and 2018-09-30 is a Sunday
103+
const data = generateData('2018-01-01', '2018-09-30', generator);
104+
const aggregatedData = aggregateTimeData(data, aggregationConfig);
105+
106+
expect(aggregatedData.length = 39);
107+
expect(dateToString(aggregatedData[0]['date'])).toEqual('2018-01-01');
108+
expect(dateToString(aggregatedData[1]['date'])).toEqual('2018-01-08');
109+
expect(dateToString(aggregatedData[2]['date'])).toEqual('2018-01-15');
110+
expect(dateToString(aggregatedData[37]['date'])).toEqual('2018-09-17');
111+
expect(dateToString(aggregatedData[38]['date'])).toEqual('2018-09-24');
112+
expect(aggregatedData[0]['value']).toEqual(6);
113+
expect(aggregatedData[1]['value']).toEqual(13);
114+
expect(aggregatedData[2]['value']).toEqual(20);
115+
expect(aggregatedData[4]['value']).toEqual(6);
116+
expect(aggregatedData[5]['value']).toEqual(13);
117+
expect(aggregatedData[36]['value']).toEqual(6);
118+
expect(aggregatedData[37]['value']).toEqual(13);
119+
expect(aggregatedData[38]['value']).toEqual(20);
120+
});
121+
122+
it('should not have off-by-one errors (1)', function()
123+
{
124+
const aggregationConfig = {'period': 'week', 'method': 'max', 'includeIncomplete': 'both'};
125+
const generator = integerRangeGenerator(27, 28);
126+
// 2017-12-31 is a Sunday, and 2018-10-01 is a Monday
127+
const data = generateData('2017-12-31', '2018-10-01', generator);
128+
const aggregatedData = aggregateTimeData(data, aggregationConfig);
129+
130+
expect(aggregatedData.length = 41);
131+
expect(dateToString(aggregatedData[0]['date'])).toEqual('2017-12-25');
132+
expect(dateToString(aggregatedData[1]['date'])).toEqual('2018-01-01');
133+
expect(dateToString(aggregatedData[2]['date'])).toEqual('2018-01-08');
134+
expect(dateToString(aggregatedData[3]['date'])).toEqual('2018-01-15');
135+
expect(dateToString(aggregatedData[38]['date'])).toEqual('2018-09-17');
136+
expect(dateToString(aggregatedData[39]['date'])).toEqual('2018-09-24');
137+
expect(dateToString(aggregatedData[40]['date'])).toEqual('2018-10-01');
138+
expect(aggregatedData[0]['value']).toEqual(27);
139+
expect(aggregatedData[1]['value']).toEqual(6);
140+
expect(aggregatedData[39]['value']).toEqual(20);
141+
expect(aggregatedData[40]['value']).toEqual(21);
142+
});
143+
144+
it('should not have off-by-one errors (2)', function()
145+
{
146+
const aggregationConfig = {'period': 'week', 'method': 'max', 'includeIncomplete': 'both'};
147+
const generator = integerRangeGenerator(1, 28);
148+
// 2018-01-02 is a Tuesday, and 2018-09-29 is a Saturday
149+
const data = generateData('2018-01-02', '2018-09-29', generator);
150+
const aggregatedData = aggregateTimeData(data, aggregationConfig);
151+
152+
expect(aggregatedData.length = 39);
153+
expect(dateToString(aggregatedData[0]['date'])).toEqual('2018-01-01');
154+
expect(dateToString(aggregatedData[1]['date'])).toEqual('2018-01-08');
155+
expect(dateToString(aggregatedData[2]['date'])).toEqual('2018-01-15');
156+
expect(dateToString(aggregatedData[37]['date'])).toEqual('2018-09-17');
157+
expect(dateToString(aggregatedData[38]['date'])).toEqual('2018-09-24');
158+
expect(aggregatedData[0]['value']).toEqual(6);
159+
expect(aggregatedData[1]['value']).toEqual(13);
160+
expect(aggregatedData[37]['value']).toEqual(13);
161+
expect(aggregatedData[38]['value']).toEqual(19);
162+
});
163+
164+
it('should not include incomplete periods with incomplete data if requested', function()
165+
{
166+
const aggregationConfig = {'period': 'week', 'method': 'max', 'includeIncomplete': 'none'};
167+
const generator = integerRangeGenerator(1, 28);
168+
// 2018-01-02 is a Tuesday, and 2018-09-29 is a Saturday
169+
const data = generateData('2018-01-02', '2018-09-29', generator);
170+
const aggregatedData = aggregateTimeData(data, aggregationConfig);
171+
172+
expect(aggregatedData.length = 37);
173+
expect(dateToString(aggregatedData[0]['date'])).toEqual('2018-01-08');
174+
expect(dateToString(aggregatedData[1]['date'])).toEqual('2018-01-15');
175+
expect(dateToString(aggregatedData[35]['date'])).toEqual('2018-09-10');
176+
expect(dateToString(aggregatedData[36]['date'])).toEqual('2018-09-17');
177+
expect(aggregatedData[0]['value']).toEqual(13);
178+
expect(aggregatedData[1]['value']).toEqual(20);
179+
expect(aggregatedData[35]['value']).toEqual(6);
180+
expect(aggregatedData[36]['value']).toEqual(13);
181+
});
182+
183+
it('should aggregate sums correctly', function()
184+
{
185+
const aggregationConfig = {'period': 'week', 'method': 'sum', 'includeIncomplete': 'both'};
186+
const generator = integerRangeGenerator(0, 10);
187+
// 2018-01-01 is a Monday, and 2018-09-30 is a Sunday
188+
const data = generateData('2018-01-01', '2018-09-30', generator);
189+
const aggregatedData = aggregateTimeData(data, aggregationConfig);
190+
191+
expect(aggregatedData.length = 39);
192+
expect(aggregatedData[0]['value']).toEqual(21);
193+
expect(aggregatedData[1]['value']).toEqual(30);
194+
expect(aggregatedData[2]['value']).toEqual(39);
195+
expect(aggregatedData[36]['value']).toEqual(35);
196+
expect(aggregatedData[37]['value']).toEqual(24);
197+
expect(aggregatedData[38]['value']).toEqual(33);
198+
});
199+
200+
it('should aggregate over months correctly', function()
201+
{
202+
const aggregationConfig = {'period': 'month', 'method': 'first', 'includeIncomplete': 'both'};
203+
const generator = integerRangeGenerator(9, 10);
204+
const data = generateData('2017-12-31', '2019-01-01', generator);
205+
const aggregatedData = aggregateTimeData(data, aggregationConfig);
206+
207+
expect(aggregatedData.length = 14);
208+
expect(dateToString(aggregatedData[0]['date'])).toEqual('2017-12-01');
209+
expect(dateToString(aggregatedData[1]['date'])).toEqual('2018-01-01');
210+
expect(dateToString(aggregatedData[2]['date'])).toEqual('2018-02-01');
211+
expect(dateToString(aggregatedData[12]['date'])).toEqual('2018-12-01');
212+
expect(dateToString(aggregatedData[13]['date'])).toEqual('2019-01-01');
213+
expect(aggregatedData[0]['value']).toEqual(9);
214+
expect(aggregatedData[1]['value']).toEqual(0);
215+
expect(aggregatedData[2]['value']).toEqual(1);
216+
expect(aggregatedData[12]['value']).toEqual(4);
217+
expect(aggregatedData[13]['value']).toEqual(5);
218+
});
219+
});
65220
});

0 commit comments

Comments
 (0)