Skip to content

Commit 7dfc075

Browse files
committed
Provide generalized aggregation
This implements a generalized method for aggregating time-series data. Data can be aggregated over week or month intervals with a variety of aggregation methods to choose from. This will be useful for providing chart views at different levels (such as two-year periods vs. just showing the last month). Additionally, the generalized form of aggregation can be used to smooth out graphs where the sampling frequency changed with an update to Hubble Enterprise. The aggregation is done by splitting the time data into subsequent, gapless periods of time (weeks starting with Mondays or months), for each of which the aggregated values are then computed and returned. Aggregation methods define how to aggregate the values within individual time periods. The following aggregation methods are supported: - sum - mean - min - max - first (the chronologically first available value for that period) - last - median Additionally, periods at the beginning or the end of the time series may or may not be included if they are incomplete (there isn’t data for each day in the period). This is controlled by the setting includeIncomplete, which supports the following values: - none - start (includes an incomplete period at the beginning of the series) - end - both Finally, the pull request usage chart is changed to make use of the new aggregation facilities to reduce the granularity from daily to monthly data for now. This might be changed when we implement detail views. I also added several unit tests to check the aggregation methods (for off-by-one errors in particular).
1 parent 4fc9b8d commit 7dfc075

File tree

4 files changed

+271
-41
lines changed

4 files changed

+271
-41
lines changed

docs/_layouts/default.html

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@
2727
<script src="{{ site.baseurl }}/assets/js/vendor/moment-with-locales.min.js"></script>
2828
<script src="{{ site.baseurl }}/assets/js/vendor/Chart-2.7.1.min.js"></script>
2929
<script src="{{ site.baseurl }}/assets/js/vendor/spin-2.3.2.min.js"></script>
30-
<script src="{{ site.baseurl }}/assets/js/charts.js?version=1ff0187"></script>
30+
<script src="{{ site.baseurl }}/assets/js/charts.js?version=e7e9c5a"></script>
3131
</head>
3232
<body>
3333
<section class="page-header">

docs/assets/js/charts.js

Lines changed: 113 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -127,6 +127,116 @@ function createSpinner(canvas)
127127
};
128128
}
129129

130+
function aggregateTimeData(data, aggregationConfig)
131+
{
132+
if (!(data instanceof Array))
133+
throw 'expected data array as input';
134+
135+
if (data.length < 1)
136+
return;
137+
138+
// Turn date strings into proper date objects
139+
for (let i = 0; i < data.length; i++)
140+
data[i]['date'] = d3.isoParse(data[i]['date']);
141+
142+
// Sort data, just in case it isn’t already
143+
data.sort((row1, row2) => row1['date'] - row2['date']);
144+
145+
const dateStart = data[0]['date'];
146+
// Ranges are exclusive, so add one more day to include the last date
147+
const dateEnd = d3.utcDay.offset(data[data.length - 1]['date'], 1);
148+
149+
let period;
150+
151+
switch (aggregationConfig['period'])
152+
{
153+
case 'week':
154+
period = d3.utcMonday;
155+
break;
156+
case 'month':
157+
period = d3.utcMonth;
158+
break;
159+
default:
160+
throw 'unknown aggregation period "' + aggregationConfig['period'] + '"';
161+
}
162+
163+
let t0, t1;
164+
165+
if (['start', 'both'].includes(aggregationConfig['includeIncomplete']))
166+
t0 = period.floor(dateStart);
167+
else
168+
t0 = period.ceil(dateStart);
169+
170+
if (['end', 'both'].includes(aggregationConfig['includeIncomplete']))
171+
t1 = period.ceil(dateEnd);
172+
else
173+
t1 = period.floor(dateEnd);
174+
175+
// In d3, ranges include the start value but exclude the end value
176+
// We want to include the last period as well, so add one more period
177+
t1 = period.offset(t1, 1);
178+
const periods = period.range(t0, t1);
179+
180+
let aggregatedData = Array();
181+
182+
for (let i = 0; i < periods.length - 1; i++)
183+
{
184+
const t0 = periods[i];
185+
const t1 = periods[i + 1];
186+
187+
const dates = data.filter(row => row['date'] >= t0 && row['date'] < t1);
188+
189+
let row = Object();
190+
row['date'] = t0;
191+
192+
$.each(Object.keys(data[0]),
193+
function(keyID, key)
194+
{
195+
if (key == 'date')
196+
return;
197+
198+
if (dates.length == 0)
199+
{
200+
row[key] = undefined;
201+
return;
202+
}
203+
204+
const accessor = (row => row[key]);
205+
206+
switch (aggregationConfig['method'])
207+
{
208+
case 'sum':
209+
row[key] = d3.sum(dates, accessor);
210+
break;
211+
case 'mean':
212+
row[key] = d3.mean(dates, accessor);
213+
break;
214+
case 'median':
215+
row[key] = d3.median(dates, accessor);
216+
break;
217+
case 'first':
218+
row[key] = dates[0][key];
219+
break;
220+
case 'last':
221+
row[key] = dates[dates.length - 1][key];
222+
break;
223+
case 'min':
224+
row[key] = d3.min(dates, accessor);
225+
break;
226+
case 'max':
227+
row[key] = d3.max(dates, accessor);
228+
break;
229+
default:
230+
throw 'unknown aggregation method "' + aggregationConfig['method'] + '"';
231+
}
232+
});
233+
234+
aggregatedData.push(row);
235+
}
236+
237+
return aggregatedData;
238+
}
239+
130240
function createHistoryChart(canvas)
131241
{
132242
const url = $(canvas).data('url');
@@ -153,48 +263,12 @@ function createHistoryChart(canvas)
153263

154264
const context = canvas.getContext('2d');
155265

156-
if ($(canvas).data('config') && 'aggregate' in $(canvas).data('config') &&
157-
$(canvas).data('config').aggregate == 'weekly')
158-
{
159-
let aggregatedData = Array();
160-
data.sort(
161-
function(row1, row2)
162-
{
163-
let date1 = new Date(row1['date']);
164-
let date2 = new Date(row2['date']);
165-
return date1 - date2;
166-
});
167-
168-
let currentRow = Object();
169-
170-
for (let i = 0; i < data.length; i++)
171-
{
172-
if (i % 7 == 0)
173-
$.each(Object.keys(data[i]).slice(1),
174-
function(keyID, key)
175-
{
176-
currentRow[key] = 0;
177-
});
178-
179-
currentRow['date'] = data[i]['date'];
180-
181-
$.each(Object.keys(data[i]).slice(1),
182-
function(keyID, key)
183-
{
184-
currentRow[key] += data[i][key];
185-
});
186-
187-
if (i % 7 == 6)
188-
// Store a copy of the aggregated data
189-
aggregatedData.push($.extend({}, currentRow));
190-
}
191-
192-
data = aggregatedData;
193-
}
194-
195266
if ($(canvas).data('config') && 'sliceData' in $(canvas).data('config'))
196267
data = data.slice($(canvas).data('config').sliceData[0], $(canvas).data('config').sliceData[1]);
197268

269+
if ($(canvas).data('config') && 'aggregate' in $(canvas).data('config'))
270+
data = aggregateTimeData(data, $(canvas).data('config').aggregate);
271+
198272
const originalDataSeries = Object.keys(data[0]).slice(1);
199273

200274
let dataSeries, visibleDataSeries;

docs/pr-usage.html

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ <h3>Pull Request Usage</h3>
1313
class="chart"
1414
data-url="{{ site.dataURL }}/pull-request-usage.tsv"
1515
data-type="history"
16+
data-config='{"aggregate": {"period": "month", "method": "first", "includeIncomplete": "both"}}'
1617
></canvas>
1718
</div>
1819
</div>

docs/spec/charts.js

Lines changed: 156 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,12 @@
1-
/* global createCollaborationChart, createHistoryChart, createList, createTable, createSpinner */
1+
/* global
2+
aggregateTimeData,
3+
createCollaborationChart,
4+
createHistoryChart,
5+
createList,
6+
createTable,
7+
createSpinner,
8+
d3,
9+
*/
210

311
describe('global charts.js', function()
412
{
@@ -62,4 +70,151 @@ describe('global charts.js', function()
6270
});
6371
});
6472
});
73+
describe('aggregation for time series', function()
74+
{
75+
// Generate data from startDate to endDate (both inclusive) with a generator functor
76+
function generateData(startDate, endDate, generator)
77+
{
78+
let dates = d3.utcDay.range(d3.isoParse(startDate), d3.utcDay.offset(d3.isoParse(endDate), 1));
79+
let data = Array();
80+
81+
for (let i = 0; i < dates.length; i++)
82+
data.push({'date': dates[i], 'value': generator(i)});
83+
84+
return data;
85+
}
86+
87+
// Integer range generator
88+
function integerRangeGenerator(start, modulo)
89+
{
90+
if (modulo)
91+
return (i => (start + i) % modulo);
92+
93+
return (i => start + i);
94+
}
95+
96+
const dateToString = d3.utcFormat('%Y-%m-%d');
97+
98+
it('should aggregate over weeks correctly', function()
99+
{
100+
const aggregationConfig = {'period': 'week', 'method': 'max', 'includeIncomplete': 'both'};
101+
const generator = integerRangeGenerator(0, 28);
102+
// 2018-01-01 is a Monday, and 2018-09-30 is a Sunday
103+
const data = generateData('2018-01-01', '2018-09-30', generator);
104+
const aggregatedData = aggregateTimeData(data, aggregationConfig);
105+
106+
expect(aggregatedData.length = 39);
107+
expect(dateToString(aggregatedData[0]['date'])).toEqual('2018-01-01');
108+
expect(dateToString(aggregatedData[1]['date'])).toEqual('2018-01-08');
109+
expect(dateToString(aggregatedData[2]['date'])).toEqual('2018-01-15');
110+
expect(dateToString(aggregatedData[37]['date'])).toEqual('2018-09-17');
111+
expect(dateToString(aggregatedData[38]['date'])).toEqual('2018-09-24');
112+
expect(aggregatedData[0]['value']).toEqual(6);
113+
expect(aggregatedData[1]['value']).toEqual(13);
114+
expect(aggregatedData[2]['value']).toEqual(20);
115+
expect(aggregatedData[4]['value']).toEqual(6);
116+
expect(aggregatedData[5]['value']).toEqual(13);
117+
expect(aggregatedData[36]['value']).toEqual(6);
118+
expect(aggregatedData[37]['value']).toEqual(13);
119+
expect(aggregatedData[38]['value']).toEqual(20);
120+
});
121+
122+
it('should not have off-by-one errors (1)', function()
123+
{
124+
const aggregationConfig = {'period': 'week', 'method': 'max', 'includeIncomplete': 'both'};
125+
const generator = integerRangeGenerator(27, 28);
126+
// 2017-12-31 is a Sunday, and 2018-10-01 is a Monday
127+
const data = generateData('2017-12-31', '2018-10-01', generator);
128+
const aggregatedData = aggregateTimeData(data, aggregationConfig);
129+
130+
expect(aggregatedData.length = 41);
131+
expect(dateToString(aggregatedData[0]['date'])).toEqual('2017-12-25');
132+
expect(dateToString(aggregatedData[1]['date'])).toEqual('2018-01-01');
133+
expect(dateToString(aggregatedData[2]['date'])).toEqual('2018-01-08');
134+
expect(dateToString(aggregatedData[3]['date'])).toEqual('2018-01-15');
135+
expect(dateToString(aggregatedData[38]['date'])).toEqual('2018-09-17');
136+
expect(dateToString(aggregatedData[39]['date'])).toEqual('2018-09-24');
137+
expect(dateToString(aggregatedData[40]['date'])).toEqual('2018-10-01');
138+
expect(aggregatedData[0]['value']).toEqual(27);
139+
expect(aggregatedData[1]['value']).toEqual(6);
140+
expect(aggregatedData[39]['value']).toEqual(20);
141+
expect(aggregatedData[40]['value']).toEqual(21);
142+
});
143+
144+
it('should not have off-by-one errors (2)', function()
145+
{
146+
const aggregationConfig = {'period': 'week', 'method': 'max', 'includeIncomplete': 'both'};
147+
const generator = integerRangeGenerator(1, 28);
148+
// 2018-01-02 is a Tuesday, and 2018-09-29 is a Saturday
149+
const data = generateData('2018-01-02', '2018-09-29', generator);
150+
const aggregatedData = aggregateTimeData(data, aggregationConfig);
151+
152+
expect(aggregatedData.length = 39);
153+
expect(dateToString(aggregatedData[0]['date'])).toEqual('2018-01-01');
154+
expect(dateToString(aggregatedData[1]['date'])).toEqual('2018-01-08');
155+
expect(dateToString(aggregatedData[2]['date'])).toEqual('2018-01-15');
156+
expect(dateToString(aggregatedData[37]['date'])).toEqual('2018-09-17');
157+
expect(dateToString(aggregatedData[38]['date'])).toEqual('2018-09-24');
158+
expect(aggregatedData[0]['value']).toEqual(6);
159+
expect(aggregatedData[1]['value']).toEqual(13);
160+
expect(aggregatedData[37]['value']).toEqual(13);
161+
expect(aggregatedData[38]['value']).toEqual(19);
162+
});
163+
164+
it('should not include incomplete periods with incomplete data if requested', function()
165+
{
166+
const aggregationConfig = {'period': 'week', 'method': 'max', 'includeIncomplete': 'none'};
167+
const generator = integerRangeGenerator(1, 28);
168+
// 2018-01-02 is a Tuesday, and 2018-09-29 is a Saturday
169+
const data = generateData('2018-01-02', '2018-09-29', generator);
170+
const aggregatedData = aggregateTimeData(data, aggregationConfig);
171+
172+
expect(aggregatedData.length = 37);
173+
expect(dateToString(aggregatedData[0]['date'])).toEqual('2018-01-08');
174+
expect(dateToString(aggregatedData[1]['date'])).toEqual('2018-01-15');
175+
expect(dateToString(aggregatedData[35]['date'])).toEqual('2018-09-10');
176+
expect(dateToString(aggregatedData[36]['date'])).toEqual('2018-09-17');
177+
expect(aggregatedData[0]['value']).toEqual(13);
178+
expect(aggregatedData[1]['value']).toEqual(20);
179+
expect(aggregatedData[35]['value']).toEqual(6);
180+
expect(aggregatedData[36]['value']).toEqual(13);
181+
});
182+
183+
it('should aggregate sums correctly', function()
184+
{
185+
const aggregationConfig = {'period': 'week', 'method': 'sum', 'includeIncomplete': 'both'};
186+
const generator = integerRangeGenerator(0, 10);
187+
// 2018-01-01 is a Monday, and 2018-09-30 is a Sunday
188+
const data = generateData('2018-01-01', '2018-09-30', generator);
189+
const aggregatedData = aggregateTimeData(data, aggregationConfig);
190+
191+
expect(aggregatedData.length = 39);
192+
expect(aggregatedData[0]['value']).toEqual(21);
193+
expect(aggregatedData[1]['value']).toEqual(30);
194+
expect(aggregatedData[2]['value']).toEqual(39);
195+
expect(aggregatedData[36]['value']).toEqual(35);
196+
expect(aggregatedData[37]['value']).toEqual(24);
197+
expect(aggregatedData[38]['value']).toEqual(33);
198+
});
199+
200+
it('should aggregate over months correctly', function()
201+
{
202+
const aggregationConfig = {'period': 'month', 'method': 'first', 'includeIncomplete': 'both'};
203+
const generator = integerRangeGenerator(9, 10);
204+
const data = generateData('2017-12-31', '2019-01-01', generator);
205+
const aggregatedData = aggregateTimeData(data, aggregationConfig);
206+
207+
expect(aggregatedData.length = 14);
208+
expect(dateToString(aggregatedData[0]['date'])).toEqual('2017-12-01');
209+
expect(dateToString(aggregatedData[1]['date'])).toEqual('2018-01-01');
210+
expect(dateToString(aggregatedData[2]['date'])).toEqual('2018-02-01');
211+
expect(dateToString(aggregatedData[12]['date'])).toEqual('2018-12-01');
212+
expect(dateToString(aggregatedData[13]['date'])).toEqual('2019-01-01');
213+
expect(aggregatedData[0]['value']).toEqual(9);
214+
expect(aggregatedData[1]['value']).toEqual(0);
215+
expect(aggregatedData[2]['value']).toEqual(1);
216+
expect(aggregatedData[12]['value']).toEqual(4);
217+
expect(aggregatedData[13]['value']).toEqual(5);
218+
});
219+
});
65220
});

0 commit comments

Comments
 (0)