Skip to content

Commit 01e714c

Browse files
committed
Provide generalized aggregation
This implements a generalized method for aggregating time-series data. Data can be aggregated over week or month intervals with a variety of aggregation methods to choose from. This will be useful for providing chart views at different levels (such as two-year periods vs. just showing the last month). Additionally, the generalized form of aggregation can be used to smooth out graphs where the sampling frequency changed with an update to Hubble Enterprise. The aggregation is done by splitting the time data into subsequent, gapless periods of time (weeks starting with Mondays or months), for each of which the aggregated values are then computed and returned. Aggregation methods define how to aggregate the values within individual time periods. The following aggregation methods are supported: - sum - mean - min - max - first (the chronologically first available value for that period) - last - median Periods with incomplete data at the beginning or the end of the time series are excluded from the aggregation. Finally, the pull request usage chart is changed to make use of the new aggregation facilities to reduce the granularity from daily to monthly data for now. This might be changed when we implement detail views. I also added several unit tests to check the aggregation methods (for off-by-one errors in particular) as well as a short piece of documentation on the new configuration options.
1 parent 05528b6 commit 01e714c

File tree

6 files changed

+256
-41
lines changed

6 files changed

+256
-41
lines changed

docs/README.md

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,9 @@ For details on how each kind of chart is rendered, take a look at [`charts.js`](
4949
| `series ` | array of strings | only include these data series and drop all others (referenced by TSV table headings) |
5050
| `visibleSeries ` | array of strings | only show the listed data series and hide all others initially (referenced by TSV table headings) |
5151
| `sliceData ` | array `[t0, t1]` | slice the data from the TSV file as if `data.slice(t0, t1)` was called |
52-
| `aggregate ` | weekly | if set to `weekly`, aggregate the data by week by computing the sum of the values within each week |
52+
| `aggregate ` | dictionary (see below) | defines how data should be aggregated (default: `undefined`, which leaves the data untouched) |
53+
| `aggregate.period` | `week`, `month` | specifies the range over which the data shall be aggregated |
54+
| `aggregate.method` | `sum`, `mean`, `min`, `max`, `first`, `last`, `median` | specifies the aggregation method; `first` and `last` select the chronologically first or last data point present in each period, respectively |
5355
| `showRawDataLink` | `true`, `false` | show the link to download the chart’s raw data (default: `true`) |
5456

5557
##### List Charts

docs/assets/js/charts.js

Lines changed: 108 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -142,6 +142,111 @@ function createSpinner(canvas)
142142
};
143143
}
144144

145+
function aggregateTimeData(data, aggregationConfig)
146+
{
147+
if (!(data instanceof Array))
148+
throw 'expected data array as input';
149+
150+
if (data.length < 1)
151+
return;
152+
153+
// Turn date strings into proper date objects
154+
for (let i = 0; i < data.length; i++)
155+
data[i]['date'] = d3.isoParse(data[i]['date']);
156+
157+
// Sort data, just in case it isn’t already
158+
data.sort((row1, row2) => row1['date'] - row2['date']);
159+
160+
const dateStart = data[0]['date'];
161+
// Ranges are exclusive, so add one more day to include the last date
162+
const dateEnd = d3.utcDay.offset(data[data.length - 1]['date'], 1);
163+
164+
let period;
165+
166+
switch (aggregationConfig['period'])
167+
{
168+
case 'week':
169+
period = d3.utcMonday;
170+
break;
171+
case 'month':
172+
period = d3.utcMonth;
173+
break;
174+
default:
175+
throw 'unknown aggregation period "' + aggregationConfig['period'] + '"';
176+
}
177+
178+
// Don't use incomplete periods at the beginning and the end of the data
179+
const t0 = period.ceil(dateStart);
180+
// In d3, ranges include the start value but exclude the end value.
181+
// We want to include the last period as well, so add one more period
182+
const t1 = period.offset(period.floor(dateEnd), 1);
183+
const periods = period.range(t0, t1);
184+
185+
let aggregatedData = Array();
186+
187+
for (let i = 0; i < periods.length - 1; i++)
188+
{
189+
const t0 = periods[i];
190+
const t1 = periods[i + 1];
191+
192+
// Note that this assumes complete data in the period.
193+
// Should data points be missing, aggregation methods such as the sum will lead to results that can't be
194+
// compared to periods with complete data.
195+
// Hence, the maintainers of the data need to ensure that the input is well-formed
196+
const dates = data.filter(row => row['date'] >= t0 && row['date'] < t1);
197+
198+
let row = Object();
199+
row['date'] = t0;
200+
201+
$.each(Object.keys(data[0]),
202+
function(keyID, key)
203+
{
204+
// Exclude the date itself from aggregation
205+
if (key == 'date')
206+
return;
207+
208+
if (dates.length == 0)
209+
{
210+
row[key] = undefined;
211+
return;
212+
}
213+
214+
const accessor = (row => row[key]);
215+
216+
switch (aggregationConfig['method'])
217+
{
218+
case 'sum':
219+
row[key] = d3.sum(dates, accessor);
220+
break;
221+
case 'mean':
222+
row[key] = d3.mean(dates, accessor);
223+
break;
224+
case 'median':
225+
row[key] = d3.median(dates, accessor);
226+
break;
227+
case 'first':
228+
row[key] = dates[0][key];
229+
break;
230+
case 'last':
231+
row[key] = dates[dates.length - 1][key];
232+
break;
233+
case 'min':
234+
row[key] = d3.min(dates, accessor);
235+
break;
236+
case 'max':
237+
row[key] = d3.max(dates, accessor);
238+
break;
239+
default:
240+
throw 'unknown aggregation method "' + aggregationConfig['method'] + '"';
241+
}
242+
});
243+
244+
aggregatedData.push(row);
245+
}
246+
247+
return aggregatedData;
248+
}
249+
145250
function createHistoryChart(canvas)
146251
{
147252
const url = $(canvas).data('url');
@@ -169,47 +274,12 @@ function createHistoryChart(canvas)
169274

170275
const context = canvas.getContext('2d');
171276

172-
if (readConfig($(canvas), 'aggregate') == 'weekly')
173-
{
174-
let aggregatedData = Array();
175-
data.sort(
176-
function(row1, row2)
177-
{
178-
let date1 = new Date(row1['date']);
179-
let date2 = new Date(row2['date']);
180-
return date1 - date2;
181-
});
182-
183-
let currentRow = Object();
184-
185-
for (let i = 0; i < data.length; i++)
186-
{
187-
if (i % 7 == 0)
188-
$.each(Object.keys(data[i]).slice(1),
189-
function(keyID, key)
190-
{
191-
currentRow[key] = 0;
192-
});
193-
194-
currentRow['date'] = data[i]['date'];
195-
196-
$.each(Object.keys(data[i]).slice(1),
197-
function(keyID, key)
198-
{
199-
currentRow[key] += data[i][key];
200-
});
201-
202-
if (i % 7 == 6)
203-
// Store a copy of the aggregated data
204-
aggregatedData.push($.extend({}, currentRow));
205-
}
206-
207-
data = aggregatedData;
208-
}
209-
210277
if (hasConfig($(canvas), 'sliceData'))
211278
data = data.slice(readConfig($(canvas), 'sliceData')[0], readConfig($(canvas), 'sliceData')[1]);
212279

280+
if (hasConfig($(canvas), 'aggregate'))
281+
data = aggregateTimeData(data, $(canvas).data('config').aggregate);
282+
213283
const originalDataSeries = Object.keys(data[0]).slice(1);
214284

215285
const dataSeries = hasConfig($(canvas), 'series')

docs/pr-total.html

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,10 @@ <h3>Pull Requests (Total, by Week)</h3>
1717
"visibleSeries": [
1818
"merged"
1919
],
20-
"aggregate": "weekly"
20+
"aggregate": {
21+
"period": "week",
22+
"method": "sum"
23+
}
2124
}'></canvas>
2225
<div class="info-box">
2326
<p>

docs/pr-usage.html

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ <h3>Pull Request Usage</h3>
99
<canvas
1010
data-url="{{ site.dataURL }}/pull-request-usage.tsv"
1111
data-type="history"
12+
data-config='{"aggregate": {"period": "month", "method": "first"}}'
1213
></canvas>
1314
<div class="info-box">
1415
<p>

docs/spec/.eslintrc.json

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,5 +6,8 @@
66
"node": true,
77
"jasmine": true,
88
"jquery": true
9+
},
10+
"globals": {
11+
"d3": false
912
}
1013
}

docs/spec/charts.js

Lines changed: 137 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,11 @@
1-
/* global createChordChart, createHistoryChart, createList, createTable, createSpinner */
1+
/* global
2+
aggregateTimeData,
3+
createChordChart,
4+
createHistoryChart,
5+
createList,
6+
createTable,
7+
createSpinner,
8+
*/
29

310
describe('global charts.js', function()
411
{
@@ -62,4 +69,133 @@ describe('global charts.js', function()
6269
});
6370
});
6471
});
72+
describe('aggregation for time series', function()
73+
{
74+
// Generate data from startDate to endDate (both inclusive) with a generator functor
75+
function generateData(startDate, endDate, generator)
76+
{
77+
let dates = d3.utcDay.range(d3.isoParse(startDate), d3.utcDay.offset(d3.isoParse(endDate), 1));
78+
let data = Array();
79+
80+
for (let i = 0; i < dates.length; i++)
81+
data.push({'date': dates[i], 'value': generator(i)});
82+
83+
return data;
84+
}
85+
86+
// Integer range generator
87+
function integerRangeGenerator(start, modulo)
88+
{
89+
if (modulo)
90+
return (i => (start + i) % modulo);
91+
92+
return (i => start + i);
93+
}
94+
95+
const dateToString = d3.utcFormat('%Y-%m-%d');
96+
97+
it('should aggregate over weeks correctly', function()
98+
{
99+
const aggregationConfig = {'period': 'week', 'method': 'max'};
100+
const generator = integerRangeGenerator(0, 28);
101+
// 2018-01-01 is a Monday, and 2018-09-30 is a Sunday
102+
const data = generateData('2018-01-01', '2018-09-30', generator);
103+
const aggregatedData = aggregateTimeData(data, aggregationConfig);
104+
105+
expect(aggregatedData.length = 39);
106+
expect(dateToString(aggregatedData[0]['date'])).toEqual('2018-01-01');
107+
expect(dateToString(aggregatedData[1]['date'])).toEqual('2018-01-08');
108+
expect(dateToString(aggregatedData[2]['date'])).toEqual('2018-01-15');
109+
expect(dateToString(aggregatedData[37]['date'])).toEqual('2018-09-17');
110+
expect(dateToString(aggregatedData[38]['date'])).toEqual('2018-09-24');
111+
expect(aggregatedData[0]['value']).toEqual(6);
112+
expect(aggregatedData[1]['value']).toEqual(13);
113+
expect(aggregatedData[2]['value']).toEqual(20);
114+
expect(aggregatedData[4]['value']).toEqual(6);
115+
expect(aggregatedData[5]['value']).toEqual(13);
116+
expect(aggregatedData[36]['value']).toEqual(6);
117+
expect(aggregatedData[37]['value']).toEqual(13);
118+
expect(aggregatedData[38]['value']).toEqual(20);
119+
});
120+
121+
it('should not have off-by-one errors (1)', function()
122+
{
123+
const aggregationConfig = {'period': 'week', 'method': 'max'};
124+
const generator = integerRangeGenerator(27, 28);
125+
// 2017-12-31 is a Sunday, and 2018-10-01 is a Monday
126+
const data = generateData('2017-12-31', '2018-10-01', generator);
127+
const aggregatedData = aggregateTimeData(data, aggregationConfig);
128+
129+
expect(aggregatedData.length = 39);
130+
expect(dateToString(aggregatedData[0]['date'])).toEqual('2018-01-01');
131+
expect(dateToString(aggregatedData[1]['date'])).toEqual('2018-01-08');
132+
expect(dateToString(aggregatedData[2]['date'])).toEqual('2018-01-15');
133+
expect(dateToString(aggregatedData[37]['date'])).toEqual('2018-09-17');
134+
expect(dateToString(aggregatedData[38]['date'])).toEqual('2018-09-24');
135+
expect(aggregatedData[0]['value']).toEqual(6);
136+
expect(aggregatedData[1]['value']).toEqual(13);
137+
expect(aggregatedData[2]['value']).toEqual(20);
138+
expect(aggregatedData[4]['value']).toEqual(6);
139+
expect(aggregatedData[5]['value']).toEqual(13);
140+
expect(aggregatedData[36]['value']).toEqual(6);
141+
expect(aggregatedData[37]['value']).toEqual(13);
142+
expect(aggregatedData[38]['value']).toEqual(20);
143+
});
144+
145+
it('should not have off-by-one errors (2)', function()
146+
{
147+
const aggregationConfig = {'period': 'week', 'method': 'max'};
148+
const generator = integerRangeGenerator(1, 28);
149+
// 2018-01-02 is a Tuesday, and 2018-09-29 is a Saturday
150+
const data = generateData('2018-01-02', '2018-09-29', generator);
151+
const aggregatedData = aggregateTimeData(data, aggregationConfig);
152+
153+
expect(aggregatedData.length = 37);
154+
expect(dateToString(aggregatedData[0]['date'])).toEqual('2018-01-08');
155+
expect(dateToString(aggregatedData[1]['date'])).toEqual('2018-01-15');
156+
expect(dateToString(aggregatedData[35]['date'])).toEqual('2018-09-10');
157+
expect(dateToString(aggregatedData[36]['date'])).toEqual('2018-09-17');
158+
expect(aggregatedData[0]['value']).toEqual(13);
159+
expect(aggregatedData[1]['value']).toEqual(20);
160+
expect(aggregatedData[3]['value']).toEqual(6);
161+
expect(aggregatedData[4]['value']).toEqual(13);
162+
expect(aggregatedData[35]['value']).toEqual(6);
163+
expect(aggregatedData[36]['value']).toEqual(13);
164+
});
165+
166+
it('should aggregate sums correctly', function()
167+
{
168+
const aggregationConfig = {'period': 'week', 'method': 'sum'};
169+
const generator = integerRangeGenerator(0, 10);
170+
// 2018-01-01 is a Monday, and 2018-09-30 is a Sunday
171+
const data = generateData('2018-01-01', '2018-09-30', generator);
172+
const aggregatedData = aggregateTimeData(data, aggregationConfig);
173+
174+
expect(aggregatedData.length = 39);
175+
expect(aggregatedData[0]['value']).toEqual(21);
176+
expect(aggregatedData[1]['value']).toEqual(30);
177+
expect(aggregatedData[2]['value']).toEqual(39);
178+
expect(aggregatedData[36]['value']).toEqual(35);
179+
expect(aggregatedData[37]['value']).toEqual(24);
180+
expect(aggregatedData[38]['value']).toEqual(33);
181+
});
182+
183+
it('should aggregate over months correctly', function()
184+
{
185+
const aggregationConfig = {'period': 'month', 'method': 'first'};
186+
const generator = integerRangeGenerator(9, 10);
187+
const data = generateData('2017-12-31', '2019-01-01', generator);
188+
const aggregatedData = aggregateTimeData(data, aggregationConfig);
189+
190+
expect(aggregatedData.length = 12);
191+
expect(dateToString(aggregatedData[0]['date'])).toEqual('2018-01-01');
192+
expect(dateToString(aggregatedData[1]['date'])).toEqual('2018-02-01');
193+
expect(dateToString(aggregatedData[10]['date'])).toEqual('2018-11-01');
194+
expect(dateToString(aggregatedData[11]['date'])).toEqual('2018-12-01');
195+
expect(aggregatedData[0]['value']).toEqual(0);
196+
expect(aggregatedData[1]['value']).toEqual(1);
197+
expect(aggregatedData[10]['value']).toEqual(4);
198+
expect(aggregatedData[11]['value']).toEqual(4);
199+
});
200+
});
65201
});

0 commit comments

Comments
 (0)