Skip to content

Commit 379562a

Browse files
committed
Provide generalized aggregation
This implements a generalized method for aggregating time-series data. Data can be aggregated over week or month intervals with a variety of aggregation methods to choose from. This will be useful for providing chart views at different levels (such as two-year periods vs. just showing the last month). Additionally, the generalized form of aggregation can be used to smooth out graphs where the sampling frequency changed with an update to Hubble Enterprise. The aggregation is done by splitting the time data into subsequent, gapless periods of time (weeks starting with Mondays or months), for each of which the aggregated values are then computed and returned. Aggregation methods define how to aggregate the values within individual time periods. The following aggregation methods are supported: - sum - mean - min - max - first (the chronologically first available value for that period) - last - median Periods with incomplete data at the beginning or the end of the time series are excluded from the aggregation. Finally, the pull request usage chart is changed to make use of the new aggregation facilities to reduce the granularity from daily to monthly data for now. This might be changed when we implement detail views. I also added several unit tests to check the aggregation methods (for off-by-one errors in particular) as well as a short piece of documentation on the new configuration options.
1 parent 05528b6 commit 379562a

File tree

5 files changed

+253
-41
lines changed

5 files changed

+253
-41
lines changed

docs/README.md

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,9 @@ For details on how each kind of chart is rendered, take a look at [`charts.js`](
4949
| `series ` | array of strings | only include these data series and drop all others (referenced by TSV table headings) |
5050
| `visibleSeries ` | array of strings | only show the listed data series and hide all others initially (referenced by TSV table headings) |
5151
| `sliceData ` | array `[t0, t1]` | slice the data from the TSV file as if `data.slice(t0, t1)` was called |
52-
| `aggregate ` | weekly | if set to `weekly`, aggregate the data by week by computing the sum of the values within each week |
52+
| `aggregate ` | dictionary (see below) | defines how data should be aggregated (default: `undefined`, which leaves the data untouched) |
53+
| `aggregate.period` | `week`, `month` | specifies the range over which the data shall be aggregated |
54+
| `aggregate.method` | `sum`, `mean`, `min`, `max`, `first`, `last`, `median` | specifies the aggregation method; `first` and `last` select the chronologically first or last data point present in each period, respectively |
5355
| `showRawDataLink` | `true`, `false` | show the link to download the chart’s raw data (default: `true`) |
5456

5557
##### List Charts

docs/assets/js/charts.js

Lines changed: 107 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -142,6 +142,110 @@ function createSpinner(canvas)
142142
};
143143
}
144144

145+
function aggregateTimeData(data, aggregationConfig)
146+
{
147+
if (!(data instanceof Array))
148+
throw 'expected data array as input';
149+
150+
if (data.length < 1)
151+
return;
152+
153+
// Turn date strings into proper date objects
154+
for (let i = 0; i < data.length; i++)
155+
data[i]['date'] = d3.isoParse(data[i]['date']);
156+
157+
// Sort data, just in case it isn’t already
158+
data.sort((row1, row2) => row1['date'] - row2['date']);
159+
160+
const dateStart = data[0]['date'];
161+
// Ranges are exclusive, so add one more day to include the last date
162+
const dateEnd = d3.utcDay.offset(data[data.length - 1]['date'], 1);
163+
164+
let period;
165+
166+
switch (aggregationConfig['period'])
167+
{
168+
case 'week':
169+
period = d3.utcMonday;
170+
break;
171+
case 'month':
172+
period = d3.utcMonth;
173+
break;
174+
default:
175+
throw 'unknown aggregation period "' + aggregationConfig['period'] + '"';
176+
}
177+
178+
// Don't use incomplete periods at the beginning and the end of the data
179+
const t0 = period.ceil(dateStart);
180+
// In d3, ranges include the start value but exclude the end value
181+
// We want to include the last period as well, so add one more period
182+
const t1 = period.offset(period.floor(dateEnd), 1);
183+
const periods = period.range(t0, t1);
184+
185+
let aggregatedData = Array();
186+
187+
for (let i = 0; i < periods.length - 1; i++)
188+
{
189+
const t0 = periods[i];
190+
const t1 = periods[i + 1];
191+
192+
// Note that this assumes complete data in the period
193+
// Should data points be missing, aggregation methods such as the sum will lead to results that can't be
194+
// compared to periods with complete data
195+
// Hence, the maintainers of the data need to ensure that the input is well-formed
196+
const dates = data.filter(row => row['date'] >= t0 && row['date'] < t1);
197+
198+
let row = Object();
199+
row['date'] = t0;
200+
201+
$.each(Object.keys(data[0]),
202+
function(keyID, key)
203+
{
204+
if (key == 'date')
205+
return;
206+
207+
if (dates.length == 0)
208+
{
209+
row[key] = undefined;
210+
return;
211+
}
212+
213+
const accessor = (row => row[key]);
214+
215+
switch (aggregationConfig['method'])
216+
{
217+
case 'sum':
218+
row[key] = d3.sum(dates, accessor);
219+
break;
220+
case 'mean':
221+
row[key] = d3.mean(dates, accessor);
222+
break;
223+
case 'median':
224+
row[key] = d3.median(dates, accessor);
225+
break;
226+
case 'first':
227+
row[key] = dates[0][key];
228+
break;
229+
case 'last':
230+
row[key] = dates[dates.length - 1][key];
231+
break;
232+
case 'min':
233+
row[key] = d3.min(dates, accessor);
234+
break;
235+
case 'max':
236+
row[key] = d3.max(dates, accessor);
237+
break;
238+
default:
239+
throw 'unknown aggregation method "' + aggregationConfig['method'] + '"';
240+
}
241+
});
242+
243+
aggregatedData.push(row);
244+
}
245+
246+
return aggregatedData;
247+
}
248+
145249
function createHistoryChart(canvas)
146250
{
147251
const url = $(canvas).data('url');
@@ -169,47 +273,12 @@ function createHistoryChart(canvas)
169273

170274
const context = canvas.getContext('2d');
171275

172-
if (readConfig($(canvas), 'aggregate') == 'weekly')
173-
{
174-
let aggregatedData = Array();
175-
data.sort(
176-
function(row1, row2)
177-
{
178-
let date1 = new Date(row1['date']);
179-
let date2 = new Date(row2['date']);
180-
return date1 - date2;
181-
});
182-
183-
let currentRow = Object();
184-
185-
for (let i = 0; i < data.length; i++)
186-
{
187-
if (i % 7 == 0)
188-
$.each(Object.keys(data[i]).slice(1),
189-
function(keyID, key)
190-
{
191-
currentRow[key] = 0;
192-
});
193-
194-
currentRow['date'] = data[i]['date'];
195-
196-
$.each(Object.keys(data[i]).slice(1),
197-
function(keyID, key)
198-
{
199-
currentRow[key] += data[i][key];
200-
});
201-
202-
if (i % 7 == 6)
203-
// Store a copy of the aggregated data
204-
aggregatedData.push($.extend({}, currentRow));
205-
}
206-
207-
data = aggregatedData;
208-
}
209-
210276
if (hasConfig($(canvas), 'sliceData'))
211277
data = data.slice(readConfig($(canvas), 'sliceData')[0], readConfig($(canvas), 'sliceData')[1]);
212278

279+
if (hasConfig($(canvas), 'aggregate'))
280+
data = aggregateTimeData(data, $(canvas).data('config').aggregate);
281+
213282
const originalDataSeries = Object.keys(data[0]).slice(1);
214283

215284
const dataSeries = hasConfig($(canvas), 'series')

docs/pr-total.html

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,10 @@ <h3>Pull Requests (Total, by Week)</h3>
1717
"visibleSeries": [
1818
"merged"
1919
],
20-
"aggregate": "weekly"
20+
"aggregate": {
21+
"period": "week",
22+
"method": "sum"
23+
}
2124
}'></canvas>
2225
<div class="info-box">
2326
<p>

docs/pr-usage.html

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ <h3>Pull Request Usage</h3>
99
<canvas
1010
data-url="{{ site.dataURL }}/pull-request-usage.tsv"
1111
data-type="history"
12+
data-config='{"aggregate": {"period": "month", "method": "first"}}'
1213
></canvas>
1314
<div class="info-box">
1415
<p>

docs/spec/charts.js

Lines changed: 138 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,12 @@
1-
/* global createChordChart, createHistoryChart, createList, createTable, createSpinner */
1+
/* global
2+
aggregateTimeData,
3+
createChordChart,
4+
createHistoryChart,
5+
createList,
6+
createTable,
7+
createSpinner,
8+
d3,
9+
*/
210

311
describe('global charts.js', function()
412
{
@@ -62,4 +70,133 @@ describe('global charts.js', function()
6270
});
6371
});
6472
});
73+
describe('aggregation for time series', function()
74+
{
75+
// Generate data from startDate to endDate (both inclusive) with a generator functor
76+
function generateData(startDate, endDate, generator)
77+
{
78+
let dates = d3.utcDay.range(d3.isoParse(startDate), d3.utcDay.offset(d3.isoParse(endDate), 1));
79+
let data = Array();
80+
81+
for (let i = 0; i < dates.length; i++)
82+
data.push({'date': dates[i], 'value': generator(i)});
83+
84+
return data;
85+
}
86+
87+
// Integer range generator
88+
function integerRangeGenerator(start, modulo)
89+
{
90+
if (modulo)
91+
return (i => (start + i) % modulo);
92+
93+
return (i => start + i);
94+
}
95+
96+
const dateToString = d3.utcFormat('%Y-%m-%d');
97+
98+
it('should aggregate over weeks correctly', function()
99+
{
100+
const aggregationConfig = {'period': 'week', 'method': 'max'};
101+
const generator = integerRangeGenerator(0, 28);
102+
// 2018-01-01 is a Monday, and 2018-09-30 is a Sunday
103+
const data = generateData('2018-01-01', '2018-09-30', generator);
104+
const aggregatedData = aggregateTimeData(data, aggregationConfig);
105+
106+
expect(aggregatedData.length = 39);
107+
expect(dateToString(aggregatedData[0]['date'])).toEqual('2018-01-01');
108+
expect(dateToString(aggregatedData[1]['date'])).toEqual('2018-01-08');
109+
expect(dateToString(aggregatedData[2]['date'])).toEqual('2018-01-15');
110+
expect(dateToString(aggregatedData[37]['date'])).toEqual('2018-09-17');
111+
expect(dateToString(aggregatedData[38]['date'])).toEqual('2018-09-24');
112+
expect(aggregatedData[0]['value']).toEqual(6);
113+
expect(aggregatedData[1]['value']).toEqual(13);
114+
expect(aggregatedData[2]['value']).toEqual(20);
115+
expect(aggregatedData[4]['value']).toEqual(6);
116+
expect(aggregatedData[5]['value']).toEqual(13);
117+
expect(aggregatedData[36]['value']).toEqual(6);
118+
expect(aggregatedData[37]['value']).toEqual(13);
119+
expect(aggregatedData[38]['value']).toEqual(20);
120+
});
121+
122+
it('should not have off-by-one errors (1)', function()
123+
{
124+
const aggregationConfig = {'period': 'week', 'method': 'max'};
125+
const generator = integerRangeGenerator(27, 28);
126+
// 2017-12-31 is a Sunday, and 2018-10-01 is a Monday
127+
const data = generateData('2017-12-31', '2018-10-01', generator);
128+
const aggregatedData = aggregateTimeData(data, aggregationConfig);
129+
130+
expect(aggregatedData.length = 39);
131+
expect(dateToString(aggregatedData[0]['date'])).toEqual('2018-01-01');
132+
expect(dateToString(aggregatedData[1]['date'])).toEqual('2018-01-08');
133+
expect(dateToString(aggregatedData[2]['date'])).toEqual('2018-01-15');
134+
expect(dateToString(aggregatedData[37]['date'])).toEqual('2018-09-17');
135+
expect(dateToString(aggregatedData[38]['date'])).toEqual('2018-09-24');
136+
expect(aggregatedData[0]['value']).toEqual(6);
137+
expect(aggregatedData[1]['value']).toEqual(13);
138+
expect(aggregatedData[2]['value']).toEqual(20);
139+
expect(aggregatedData[4]['value']).toEqual(6);
140+
expect(aggregatedData[5]['value']).toEqual(13);
141+
expect(aggregatedData[36]['value']).toEqual(6);
142+
expect(aggregatedData[37]['value']).toEqual(13);
143+
expect(aggregatedData[38]['value']).toEqual(20);
144+
});
145+
146+
it('should not have off-by-one errors (2)', function()
147+
{
148+
const aggregationConfig = {'period': 'week', 'method': 'max'};
149+
const generator = integerRangeGenerator(1, 28);
150+
// 2018-01-02 is a Tuesday, and 2018-09-29 is a Saturday
151+
const data = generateData('2018-01-02', '2018-09-29', generator);
152+
const aggregatedData = aggregateTimeData(data, aggregationConfig);
153+
154+
expect(aggregatedData.length = 37);
155+
expect(dateToString(aggregatedData[0]['date'])).toEqual('2018-01-08');
156+
expect(dateToString(aggregatedData[1]['date'])).toEqual('2018-01-15');
157+
expect(dateToString(aggregatedData[35]['date'])).toEqual('2018-09-10');
158+
expect(dateToString(aggregatedData[36]['date'])).toEqual('2018-09-17');
159+
expect(aggregatedData[0]['value']).toEqual(13);
160+
expect(aggregatedData[1]['value']).toEqual(20);
161+
expect(aggregatedData[3]['value']).toEqual(6);
162+
expect(aggregatedData[4]['value']).toEqual(13);
163+
expect(aggregatedData[35]['value']).toEqual(6);
164+
expect(aggregatedData[36]['value']).toEqual(13);
165+
});
166+
167+
it('should aggregate sums correctly', function()
168+
{
169+
const aggregationConfig = {'period': 'week', 'method': 'sum'};
170+
const generator = integerRangeGenerator(0, 10);
171+
// 2018-01-01 is a Monday, and 2018-09-30 is a Sunday
172+
const data = generateData('2018-01-01', '2018-09-30', generator);
173+
const aggregatedData = aggregateTimeData(data, aggregationConfig);
174+
175+
expect(aggregatedData.length = 39);
176+
expect(aggregatedData[0]['value']).toEqual(21);
177+
expect(aggregatedData[1]['value']).toEqual(30);
178+
expect(aggregatedData[2]['value']).toEqual(39);
179+
expect(aggregatedData[36]['value']).toEqual(35);
180+
expect(aggregatedData[37]['value']).toEqual(24);
181+
expect(aggregatedData[38]['value']).toEqual(33);
182+
});
183+
184+
it('should aggregate over months correctly', function()
185+
{
186+
const aggregationConfig = {'period': 'month', 'method': 'first'};
187+
const generator = integerRangeGenerator(9, 10);
188+
const data = generateData('2017-12-31', '2019-01-01', generator);
189+
const aggregatedData = aggregateTimeData(data, aggregationConfig);
190+
191+
expect(aggregatedData.length = 12);
192+
expect(dateToString(aggregatedData[0]['date'])).toEqual('2018-01-01');
193+
expect(dateToString(aggregatedData[1]['date'])).toEqual('2018-02-01');
194+
expect(dateToString(aggregatedData[10]['date'])).toEqual('2018-11-01');
195+
expect(dateToString(aggregatedData[11]['date'])).toEqual('2018-12-01');
196+
expect(aggregatedData[0]['value']).toEqual(0);
197+
expect(aggregatedData[1]['value']).toEqual(1);
198+
expect(aggregatedData[10]['value']).toEqual(4);
199+
expect(aggregatedData[11]['value']).toEqual(4);
200+
});
201+
});
65202
});

0 commit comments

Comments
 (0)