From 03a3d08d9964da634654619c34a4be00a87d52fb Mon Sep 17 00:00:00 2001 From: LukeMathWalker Date: Tue, 9 Oct 2018 08:01:35 +0100 Subject: [PATCH 001/146] Adding skeleton provided by jturner --- src/histogram.rs | 58 ++++++++++++++++++++++++++++++++++++++++++++++++ src/lib.rs | 3 ++- 2 files changed, 60 insertions(+), 1 deletion(-) create mode 100644 src/histogram.rs diff --git a/src/histogram.rs b/src/histogram.rs new file mode 100644 index 00000000..fe09326d --- /dev/null +++ b/src/histogram.rs @@ -0,0 +1,58 @@ +/// Wrapper around `Array1` that makes sure the elements are in ascending order. +struct Edges { + edges: Array1, +} + +impl From> for Edges { + fn from(mut edges: Array1) -> Self { + // sort the array in-place + Edges { edges } + } +} + +impl Edges { + fn view(&self) -> ArrayView1 { + self.edges.view() + } + + /// Returns the index of the bin containing the given value, + /// or `None` if none of the bins contain the value. + fn bin_index(&self, value: &A) -> Option { + // binary search for the correct bin + } + + /// Returns the range of the bin containing the given value. + fn bin_range(&self, value: &A) -> Option> + where + A: Clone, + { + let i = self.bin_index(value); + Range { start: self.edges[i].clone(), end: self.edges[i + 1].clone() } + } +} + +struct HistogramCounts { + counts: ArrayD, + edges: Vec>, +} + +struct HistogramDensity { + density: ArrayD, + edges: Vec>, +} + +impl HistogramCounts { + pub fn new(edges: Vec>) -> Self { + let counts = ArrayD::zeros(edges.iter().map(|e| e.len() - 1).collect::>()); + HistogramCounts { counts, edges } + } + + pub fn add_observation(observation: ArrayView1) -> Result<(), NotFound> { + let bin = observation + .iter() + .zip(&self.edges) + .map(|(v, e)| e.bin_index(v).ok_or(NotFound)) + .collect::, _>>()?; + self.counts[bin] += 1; + } +} \ No newline at end of file diff --git a/src/lib.rs b/src/lib.rs index cf8e56d8..57dae2d9 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -19,4 +19,5 @@ pub use correlation::CorrelationExt; mod maybe_nan; mod quantile; mod sort; -mod correlation; \ No newline at end of file +mod correlation; +mod histogram; \ No newline at end of file From d1e20bfb39203d804d7d4bf46877dd97d09374da Mon Sep 17 00:00:00 2001 From: LukeMathWalker Date: Tue, 9 Oct 2018 08:02:14 +0100 Subject: [PATCH 002/146] Added some IDE-related files --- .gitignore | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/.gitignore b/.gitignore index 69369904..91a0d835 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,8 @@ /target **/*.rs.bk Cargo.lock + +# IDE-related +tags +rusty-tags.vi +.vscode \ No newline at end of file From 3cdb3badec251709e7ee7e71652e10ea428155f9 Mon Sep 17 00:00:00 2001 From: LukeMathWalker Date: Tue, 9 Oct 2018 08:53:44 +0100 Subject: [PATCH 003/146] Adding missing pieces - now it compiles --- src/histogram.rs | 77 +++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 67 insertions(+), 10 deletions(-) diff --git a/src/histogram.rs b/src/histogram.rs index fe09326d..61a37499 100644 --- a/src/histogram.rs +++ b/src/histogram.rs @@ -1,24 +1,74 @@ -/// Wrapper around `Array1` that makes sure the elements are in ascending order. +use ndarray::prelude::*; +use std::ops::Range; +use std::error; +use std::fmt; + +#[derive(Debug, Clone)] +struct BinNotFound; + +impl fmt::Display for BinNotFound { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "No bin has been found.") + } +} + +impl error::Error for BinNotFound { + fn description(&self) -> &str { + "No bin has been found." + } + + fn cause(&self) -> Option<&error::Error> { + // Generic error, underlying cause isn't tracked. + None + } +} + +/// Wrapper around `Vec` that makes sure the elements are in ascending order. struct Edges { - edges: Array1, + edges: Vec, +} + +impl From> for Edges { + fn from(edges: Array1) -> Self { + let mut edges = edges.to_vec(); + // sort the array in-place + edges.sort_unstable(); + Edges { edges } + } } -impl From> for Edges { - fn from(mut edges: Array1) -> Self { +impl From> for Edges { + fn from(mut edges: Vec) -> Self { // sort the array in-place + edges.sort_unstable(); Edges { edges } } } impl Edges { - fn view(&self) -> ArrayView1 { - self.edges.view() + fn len(&self) -> usize { + self.edges.len() + } + + fn slice(&self) -> &[A] { + &self.edges } /// Returns the index of the bin containing the given value, /// or `None` if none of the bins contain the value. fn bin_index(&self, value: &A) -> Option { // binary search for the correct bin + let n = self.len(); + match self.edges.binary_search(value) { + Ok(i) => Some(i), + Err(i) => { + match i { + 0 => None, + j if j == n => None, + _ => Some(i - 1), + } + } + } } /// Returns the range of the bin containing the given value. @@ -27,7 +77,13 @@ impl Edges { A: Clone, { let i = self.bin_index(value); - Range { start: self.edges[i].clone(), end: self.edges[i + 1].clone() } + match i { + Some(j) => Some( + Range { start: self.edges[j].clone(), + end: self.edges[j + 1].clone() } + ), + None => None, + } } } @@ -47,12 +103,13 @@ impl HistogramCounts { HistogramCounts { counts, edges } } - pub fn add_observation(observation: ArrayView1) -> Result<(), NotFound> { + pub fn add_observation(&mut self, observation: ArrayView1) -> Result<(), BinNotFound> { let bin = observation .iter() .zip(&self.edges) - .map(|(v, e)| e.bin_index(v).ok_or(NotFound)) + .map(|(v, e)| e.bin_index(v).ok_or(BinNotFound)) .collect::, _>>()?; - self.counts[bin] += 1; + self.counts[IxDyn(&bin)] += 1; + Ok(()) } } \ No newline at end of file From 3eedbbc4086fc08e8417d7f1fb1db6006461a09e Mon Sep 17 00:00:00 2001 From: LukeMathWalker Date: Sat, 13 Oct 2018 10:52:34 +0100 Subject: [PATCH 004/146] Reusing code of from in from --- src/histogram.rs | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/src/histogram.rs b/src/histogram.rs index 61a37499..989bc445 100644 --- a/src/histogram.rs +++ b/src/histogram.rs @@ -28,20 +28,18 @@ struct Edges { edges: Vec, } -impl From> for Edges { - fn from(edges: Array1) -> Self { - let mut edges = edges.to_vec(); +impl From> for Edges { + fn from(mut edges: Vec) -> Self { // sort the array in-place edges.sort_unstable(); Edges { edges } } } -impl From> for Edges { - fn from(mut edges: Vec) -> Self { - // sort the array in-place - edges.sort_unstable(); - Edges { edges } +impl From> for Edges { + fn from(edges: Array1) -> Self { + let mut edges = edges.to_vec(); + Self::from(edges) } } From bbaee7df25d43018bb8c9a2cda91b8c2a0b950c6 Mon Sep 17 00:00:00 2001 From: LukeMathWalker Date: Sat, 13 Oct 2018 11:54:39 +0100 Subject: [PATCH 005/146] Fixed bugs, better method names, exported methods needed for doc tests. --- src/histogram.rs | 81 +++++++++++++++++++++++++++++++++++------------- src/lib.rs | 1 + 2 files changed, 61 insertions(+), 21 deletions(-) diff --git a/src/histogram.rs b/src/histogram.rs index 989bc445..e99e6474 100644 --- a/src/histogram.rs +++ b/src/histogram.rs @@ -4,7 +4,7 @@ use std::error; use std::fmt; #[derive(Debug, Clone)] -struct BinNotFound; +pub struct BinNotFound; impl fmt::Display for BinNotFound { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { @@ -23,8 +23,31 @@ impl error::Error for BinNotFound { } } -/// Wrapper around `Vec` that makes sure the elements are in ascending order. -struct Edges { +/// `Edges` is a sorted collection of `A` elements used +/// to represent the boundaries of intervals on +/// a 1-dimensional axis. +/// +/// # Example: +/// +/// ``` +/// extern crate ndarray_stats; +/// extern crate noisy_float; +/// use ndarray_stats::Edges; +/// use noisy_float::types::n64; +/// +/// let unit_interval = Edges::from(vec![n64(0.), n64(1.)]); +/// // left inclusive +/// assert_eq!( +/// unit_interval.bin_range(&n64(0.)).unwrap(), +/// n64(0.)..n64(1.), +/// ); +/// // right exclusive +/// assert_eq!( +/// unit_interval.bin_range(&n64(1.)), +/// None +/// ); +/// ``` +pub struct Edges { edges: Vec, } @@ -44,48 +67,64 @@ impl From> for Edges { } impl Edges { - fn len(&self) -> usize { + pub fn n_intervals(&self) -> usize { + match self.n_edges() { + 0 => 0, + n => n - 1, + } + } + + pub fn n_edges(&self) -> usize { self.edges.len() } - fn slice(&self) -> &[A] { + pub fn slice(&self) -> &[A] { &self.edges } /// Returns the index of the bin containing the given value, /// or `None` if none of the bins contain the value. - fn bin_index(&self, value: &A) -> Option { + fn edges_indexes(&self, value: &A) -> Option<(usize, usize)> { // binary search for the correct bin - let n = self.len(); + let n_edges = self.n_edges(); match self.edges.binary_search(value) { - Ok(i) => Some(i), + Ok(i) if i == n_edges-1 => None, + Ok(i) => Some((i, i+1)), Err(i) => { match i { 0 => None, - j if j == n => None, - _ => Some(i - 1), + j if j == n_edges => None, + j => Some((j-1, j)), } } } } + /// Returns the index of the bin containing the given value, + /// or `None` if none of the bins contain the value. + fn bin_index(&self, value: &A) -> Option { + self.edges_indexes(value).map(|t| t.0) + } + /// Returns the range of the bin containing the given value. - fn bin_range(&self, value: &A) -> Option> + pub fn bin_range(&self, value: &A) -> Option> where A: Clone, { - let i = self.bin_index(value); - match i { - Some(j) => Some( - Range { start: self.edges[j].clone(), - end: self.edges[j + 1].clone() } - ), - None => None, - } + let edges_indexes= self.edges_indexes(value); + edges_indexes.map( + |t| { + let (left, right) = t; + Range { + start: self.edges[left].clone(), + end: self.edges[right].clone() + } + } + ) } } -struct HistogramCounts { +pub struct HistogramCounts { counts: ArrayD, edges: Vec>, } @@ -97,7 +136,7 @@ struct HistogramDensity { impl HistogramCounts { pub fn new(edges: Vec>) -> Self { - let counts = ArrayD::zeros(edges.iter().map(|e| e.len() - 1).collect::>()); + let counts = ArrayD::zeros(edges.iter().map(|e| e.n_intervals()).collect::>()); HistogramCounts { counts, edges } } diff --git a/src/lib.rs b/src/lib.rs index 57dae2d9..dfea5aa7 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -15,6 +15,7 @@ pub use maybe_nan::{MaybeNan, MaybeNanExt}; pub use quantile::{interpolate, QuantileExt}; pub use sort::Sort1dExt; pub use correlation::CorrelationExt; +pub use histogram::{Edges, HistogramCounts, BinNotFound}; mod maybe_nan; mod quantile; From 7b1906123d8b8c3c5eb3cdb5e137906d2c75e1c5 Mon Sep 17 00:00:00 2001 From: LukeMathWalker Date: Sun, 14 Oct 2018 14:52:51 +0100 Subject: [PATCH 006/146] Reorganized code in a submodule --- src/{histogram.rs => histogram/bins.rs} | 66 ++++--------------------- src/histogram/errors.rs | 22 +++++++++ src/histogram/histograms.rs | 30 +++++++++++ src/histogram/mod.rs | 7 +++ 4 files changed, 68 insertions(+), 57 deletions(-) rename src/{histogram.rs => histogram/bins.rs} (60%) create mode 100644 src/histogram/errors.rs create mode 100644 src/histogram/histograms.rs create mode 100644 src/histogram/mod.rs diff --git a/src/histogram.rs b/src/histogram/bins.rs similarity index 60% rename from src/histogram.rs rename to src/histogram/bins.rs index e99e6474..947d8acd 100644 --- a/src/histogram.rs +++ b/src/histogram/bins.rs @@ -1,27 +1,6 @@ use ndarray::prelude::*; use std::ops::Range; -use std::error; -use std::fmt; - -#[derive(Debug, Clone)] -pub struct BinNotFound; - -impl fmt::Display for BinNotFound { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - write!(f, "No bin has been found.") - } -} - -impl error::Error for BinNotFound { - fn description(&self) -> &str { - "No bin has been found." - } - - fn cause(&self) -> Option<&error::Error> { - // Generic error, underlying cause isn't tracked. - None - } -} +use super::errors::BinNotFound; /// `Edges` is a sorted collection of `A` elements used /// to represent the boundaries of intervals on @@ -88,13 +67,13 @@ impl Edges { // binary search for the correct bin let n_edges = self.n_edges(); match self.edges.binary_search(value) { - Ok(i) if i == n_edges-1 => None, - Ok(i) => Some((i, i+1)), + Ok(i) if i == n_edges - 1 => None, + Ok(i) => Some((i, i + 1)), Err(i) => { match i { 0 => None, j if j == n_edges => None, - j => Some((j-1, j)), + j => Some((j - 1, j)), } } } @@ -102,51 +81,24 @@ impl Edges { /// Returns the index of the bin containing the given value, /// or `None` if none of the bins contain the value. - fn bin_index(&self, value: &A) -> Option { + pub (crate) fn bin_index(&self, value: &A) -> Option { self.edges_indexes(value).map(|t| t.0) } /// Returns the range of the bin containing the given value. pub fn bin_range(&self, value: &A) -> Option> - where - A: Clone, + where + A: Clone, { - let edges_indexes= self.edges_indexes(value); + let edges_indexes = self.edges_indexes(value); edges_indexes.map( |t| { let (left, right) = t; Range { start: self.edges[left].clone(), - end: self.edges[right].clone() + end: self.edges[right].clone(), } } ) } } - -pub struct HistogramCounts { - counts: ArrayD, - edges: Vec>, -} - -struct HistogramDensity { - density: ArrayD, - edges: Vec>, -} - -impl HistogramCounts { - pub fn new(edges: Vec>) -> Self { - let counts = ArrayD::zeros(edges.iter().map(|e| e.n_intervals()).collect::>()); - HistogramCounts { counts, edges } - } - - pub fn add_observation(&mut self, observation: ArrayView1) -> Result<(), BinNotFound> { - let bin = observation - .iter() - .zip(&self.edges) - .map(|(v, e)| e.bin_index(v).ok_or(BinNotFound)) - .collect::, _>>()?; - self.counts[IxDyn(&bin)] += 1; - Ok(()) - } -} \ No newline at end of file diff --git a/src/histogram/errors.rs b/src/histogram/errors.rs new file mode 100644 index 00000000..22d40a1b --- /dev/null +++ b/src/histogram/errors.rs @@ -0,0 +1,22 @@ +use std::error; +use std::fmt; + +#[derive(Debug, Clone)] +pub struct BinNotFound; + +impl fmt::Display for BinNotFound { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "No bin has been found.") + } +} + +impl error::Error for BinNotFound { + fn description(&self) -> &str { + "No bin has been found." + } + + fn cause(&self) -> Option<&error::Error> { + // Generic error, underlying cause isn't tracked. + None + } +} diff --git a/src/histogram/histograms.rs b/src/histogram/histograms.rs new file mode 100644 index 00000000..26df2c61 --- /dev/null +++ b/src/histogram/histograms.rs @@ -0,0 +1,30 @@ +use ndarray::prelude::*; +use super::bins::Edges; +use super::errors::BinNotFound; + +pub struct HistogramCounts { + counts: ArrayD, + edges: Vec>, +} + +struct HistogramDensity { + density: ArrayD, + edges: Vec>, +} + +impl HistogramCounts { + pub fn new(edges: Vec>) -> Self { + let counts = ArrayD::zeros(edges.iter().map(|e| e.n_intervals()).collect::>()); + HistogramCounts { counts, edges } + } + + pub fn add_observation(&mut self, observation: ArrayView1) -> Result<(), BinNotFound> { + let bin = observation + .iter() + .zip(&self.edges) + .map(|(v, e)| e.bin_index(v).ok_or(BinNotFound)) + .collect::, _>>()?; + self.counts[IxDyn(&bin)] += 1; + Ok(()) + } +} diff --git a/src/histogram/mod.rs b/src/histogram/mod.rs new file mode 100644 index 00000000..d46c82c0 --- /dev/null +++ b/src/histogram/mod.rs @@ -0,0 +1,7 @@ +pub use self::histograms::HistogramCounts; +pub use self::bins::Edges; +pub use self::errors::BinNotFound; + +mod histograms; +mod bins; +mod errors; From acea0a6938c360cb1a62665cab0620bf94347a09 Mon Sep 17 00:00:00 2001 From: LukeMathWalker Date: Sun, 14 Oct 2018 15:20:40 +0100 Subject: [PATCH 007/146] Created Bins struct - split code between Bins and Edges --- src/histogram/bins.rs | 62 +++++++++++++++++++++++-------------- src/histogram/histograms.rs | 18 ++++++----- src/histogram/mod.rs | 2 +- src/lib.rs | 3 +- 4 files changed, 50 insertions(+), 35 deletions(-) diff --git a/src/histogram/bins.rs b/src/histogram/bins.rs index 947d8acd..81f92cfb 100644 --- a/src/histogram/bins.rs +++ b/src/histogram/bins.rs @@ -1,6 +1,5 @@ use ndarray::prelude::*; -use std::ops::Range; -use super::errors::BinNotFound; +use std::ops::{Index, Range}; /// `Edges` is a sorted collection of `A` elements used /// to represent the boundaries of intervals on @@ -11,18 +10,19 @@ use super::errors::BinNotFound; /// ``` /// extern crate ndarray_stats; /// extern crate noisy_float; -/// use ndarray_stats::Edges; +/// use ndarray_stats::histogram::{Edges, Bins}; /// use noisy_float::types::n64; /// -/// let unit_interval = Edges::from(vec![n64(0.), n64(1.)]); +/// let unit_edges = Edges::from(vec![n64(0.), n64(1.)]); +/// let unit_interval = Bins::new(unit_edges); /// // left inclusive /// assert_eq!( -/// unit_interval.bin_range(&n64(0.)).unwrap(), +/// unit_interval.range(&n64(0.)).unwrap(), /// n64(0.)..n64(1.), /// ); /// // right exclusive /// assert_eq!( -/// unit_interval.bin_range(&n64(1.)), +/// unit_interval.range(&n64(1.)), /// None /// ); /// ``` @@ -40,20 +40,21 @@ impl From> for Edges { impl From> for Edges { fn from(edges: Array1) -> Self { - let mut edges = edges.to_vec(); + let edges = edges.to_vec(); Self::from(edges) } } -impl Edges { - pub fn n_intervals(&self) -> usize { - match self.n_edges() { - 0 => 0, - n => n - 1, - } +impl Index for Edges{ + type Output = A; + + fn index(&self, i: usize) -> &A { + &self.edges[i] } +} - pub fn n_edges(&self) -> usize { +impl Edges { + pub fn len(&self) -> usize { self.edges.len() } @@ -61,11 +62,9 @@ impl Edges { &self.edges } - /// Returns the index of the bin containing the given value, - /// or `None` if none of the bins contain the value. - fn edges_indexes(&self, value: &A) -> Option<(usize, usize)> { + pub fn indexes(&self, value: &A) -> Option<(usize, usize)> { // binary search for the correct bin - let n_edges = self.n_edges(); + let n_edges = self.len(); match self.edges.binary_search(value) { Ok(i) if i == n_edges - 1 => None, Ok(i) => Some((i, i + 1)), @@ -78,19 +77,34 @@ impl Edges { } } } +} + +pub struct Bins { + edges: Edges, +} + +impl Bins { + pub fn new(edges: Edges) -> Self { + Bins { edges } + } + + pub fn len(&self) -> usize { + match self.edges.len() { + 0 => 0, + n => n - 1, + } + } - /// Returns the index of the bin containing the given value, - /// or `None` if none of the bins contain the value. - pub (crate) fn bin_index(&self, value: &A) -> Option { - self.edges_indexes(value).map(|t| t.0) + pub fn index(&self, value: &A) -> Option { + self.edges.indexes(value).map(|t| t.0) } /// Returns the range of the bin containing the given value. - pub fn bin_range(&self, value: &A) -> Option> + pub fn range(&self, value: &A) -> Option> where A: Clone, { - let edges_indexes = self.edges_indexes(value); + let edges_indexes = self.edges.indexes(value); edges_indexes.map( |t| { let (left, right) = t; diff --git a/src/histogram/histograms.rs b/src/histogram/histograms.rs index 26df2c61..1c947645 100644 --- a/src/histogram/histograms.rs +++ b/src/histogram/histograms.rs @@ -1,28 +1,30 @@ use ndarray::prelude::*; -use super::bins::Edges; +use super::bins::Bins; use super::errors::BinNotFound; pub struct HistogramCounts { counts: ArrayD, - edges: Vec>, + bins: Vec>, } struct HistogramDensity { density: ArrayD, - edges: Vec>, + bins: Vec>, } impl HistogramCounts { - pub fn new(edges: Vec>) -> Self { - let counts = ArrayD::zeros(edges.iter().map(|e| e.n_intervals()).collect::>()); - HistogramCounts { counts, edges } + pub fn new(edges: Vec>) -> Self { + let counts = ArrayD::zeros( + edges.iter().map(|e| e.len() + ).collect::>()); + HistogramCounts { counts, bins: edges } } pub fn add_observation(&mut self, observation: ArrayView1) -> Result<(), BinNotFound> { let bin = observation .iter() - .zip(&self.edges) - .map(|(v, e)| e.bin_index(v).ok_or(BinNotFound)) + .zip(&self.bins) + .map(|(v, e)| e.index(v).ok_or(BinNotFound)) .collect::, _>>()?; self.counts[IxDyn(&bin)] += 1; Ok(()) diff --git a/src/histogram/mod.rs b/src/histogram/mod.rs index d46c82c0..49e1ac8c 100644 --- a/src/histogram/mod.rs +++ b/src/histogram/mod.rs @@ -1,5 +1,5 @@ pub use self::histograms::HistogramCounts; -pub use self::bins::Edges; +pub use self::bins::{Edges, Bins}; pub use self::errors::BinNotFound; mod histograms; diff --git a/src/lib.rs b/src/lib.rs index dfea5aa7..715e9ca7 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -15,10 +15,9 @@ pub use maybe_nan::{MaybeNan, MaybeNanExt}; pub use quantile::{interpolate, QuantileExt}; pub use sort::Sort1dExt; pub use correlation::CorrelationExt; -pub use histogram::{Edges, HistogramCounts, BinNotFound}; mod maybe_nan; mod quantile; mod sort; mod correlation; -mod histogram; \ No newline at end of file +pub mod histogram; \ No newline at end of file From b8624a0624de0c2ec6d4b889001d1f46d41d92e1 Mon Sep 17 00:00:00 2001 From: LukeMathWalker Date: Sun, 14 Oct 2018 17:01:28 +0100 Subject: [PATCH 008/146] Added get method to Bins --- src/histogram/bins.rs | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/src/histogram/bins.rs b/src/histogram/bins.rs index 81f92cfb..480586e8 100644 --- a/src/histogram/bins.rs +++ b/src/histogram/bins.rs @@ -116,3 +116,12 @@ impl Bins { ) } } + +impl Bins { + pub fn get(&self, index: usize) -> Range { + Range { + start: self.edges[index].clone(), + end: self.edges[index+1].clone(), + } + } +} From cfc59c8769e9bdb4d621a1fd48bc7d315a544539 Mon Sep 17 00:00:00 2001 From: LukeMathWalker Date: Sun, 14 Oct 2018 17:11:16 +0100 Subject: [PATCH 009/146] Implemented IntoIterator for Edges --- src/histogram/bins.rs | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/src/histogram/bins.rs b/src/histogram/bins.rs index 480586e8..5fc05c4d 100644 --- a/src/histogram/bins.rs +++ b/src/histogram/bins.rs @@ -48,11 +48,20 @@ impl From> for Edges { impl Index for Edges{ type Output = A; - fn index(&self, i: usize) -> &A { + fn index(&self, i: usize) -> &Self::Output { &self.edges[i] } } +impl IntoIterator for Edges { + type Item = A; + type IntoIter = ::std::vec::IntoIter; + + fn into_iter(self) -> Self::IntoIter { + self.edges.into_iter() + } +} + impl Edges { pub fn len(&self) -> usize { self.edges.len() From e8535a47d4dbfae72c5c5f172f62189fdf3661cf Mon Sep 17 00:00:00 2001 From: LukeMathWalker Date: Sun, 14 Oct 2018 17:42:45 +0100 Subject: [PATCH 010/146] Added doc tests for all methods of Edges --- src/histogram/bins.rs | 112 +++++++++++++++++++++++++++++++++++++++++- 1 file changed, 110 insertions(+), 2 deletions(-) diff --git a/src/histogram/bins.rs b/src/histogram/bins.rs index 5fc05c4d..9a74ffbe 100644 --- a/src/histogram/bins.rs +++ b/src/histogram/bins.rs @@ -2,9 +2,10 @@ use ndarray::prelude::*; use std::ops::{Index, Range}; /// `Edges` is a sorted collection of `A` elements used -/// to represent the boundaries of intervals on +/// to represent the boundaries of intervals ([`Bins`]) on /// a 1-dimensional axis. /// +/// [`Bins`]: struct.Bins.html /// # Example: /// /// ``` @@ -31,6 +32,28 @@ pub struct Edges { } impl From> for Edges { + + /// Get an `Edges` instance from a `Vec`: + /// the vector will be sorted in increasing order + /// using an unstable sorting algorithm. + /// + /// # Example: + /// + /// ``` + /// extern crate ndarray_stats; + /// #[macro_use(array)] + /// extern crate ndarray; + /// use ndarray_stats::histogram::Edges; + /// + /// # fn main() { + /// let edges = Edges::from(array![1, 15, 10, 20]); + /// // The array gets sorted! + /// assert_eq!( + /// edges[2], + /// 15 + /// ); + /// # } + /// ``` fn from(mut edges: Vec) -> Self { // sort the array in-place edges.sort_unstable(); @@ -39,6 +62,23 @@ impl From> for Edges { } impl From> for Edges { + /// Get an `Edges` instance from a `Array1`: + /// the array elements will be sorted in increasing order + /// using an unstable sorting algorithm. + /// + /// # Example: + /// + /// ``` + /// extern crate ndarray_stats; + /// use ndarray_stats::histogram::Edges; + /// + /// let edges = Edges::from(vec![1, 15, 10, 20]); + /// // The vec gets sorted! + /// assert_eq!( + /// edges[1], + /// 10 + /// ); + /// ``` fn from(edges: Array1) -> Self { let edges = edges.to_vec(); Self::from(edges) @@ -48,6 +88,22 @@ impl From> for Edges { impl Index for Edges{ type Output = A; + /// Get the `i`-th edge. + /// + /// **Panics** if the index `i` is out of bounds. + /// + /// # Example: + /// + /// ``` + /// extern crate ndarray_stats; + /// use ndarray_stats::histogram::Edges; + /// + /// let edges = Edges::from(vec![1, 5, 10, 20]); + /// assert_eq!( + /// edges[1], + /// 5 + /// ); + /// ``` fn index(&self, i: usize) -> &Self::Output { &self.edges[i] } @@ -63,14 +119,66 @@ impl IntoIterator for Edges { } impl Edges { + /// Number of edges in `Self`. + /// + /// # Example: + /// + /// ``` + /// extern crate ndarray_stats; + /// extern crate noisy_float; + /// use ndarray_stats::histogram::Edges; + /// use noisy_float::types::n64; + /// + /// let edges = Edges::from(vec![n64(0.), n64(1.), n64(3.)]); + /// assert_eq!( + /// edges.len(), + /// 3 + /// ); + /// ``` pub fn len(&self) -> usize { self.edges.len() } - pub fn slice(&self) -> &[A] { + /// Borrow an immutable reference to the edges as a vector + /// slice. + /// + /// # Example: + /// + /// ``` + /// extern crate ndarray_stats; + /// use ndarray_stats::histogram::Edges; + /// + /// let edges = Edges::from(vec![0, 5, 3]); + /// assert_eq!( + /// edges.as_slice(), + /// vec![0, 3, 5].as_slice() + /// ); + /// ``` + pub fn as_slice(&self) -> &[A] { &self.edges } + /// Given `value`, it returns an option: + /// - `Some((left, right))`, where `right=left+1`, if there are two consecutive edges in + /// Self such that `self[left] <= value < self[right]`; + /// - `None`, otherwise. + /// + /// # Example: + /// + /// ``` + /// extern crate ndarray_stats; + /// use ndarray_stats::histogram::Edges; + /// + /// let edges = Edges::from(vec![0, 2, 3]); + /// assert_eq!( + /// edges.indexes(&1), + /// Some((0, 1)) + /// ); + /// assert_eq!( + /// edges.indexes(&5), + /// None + /// ); + /// ``` pub fn indexes(&self, value: &A) -> Option<(usize, usize)> { // binary search for the correct bin let n_edges = self.len(); From 837cb675c74782ec487e1ce47c1b50432d7b585b Mon Sep 17 00:00:00 2001 From: LukeMathWalker Date: Sun, 14 Oct 2018 17:45:45 +0100 Subject: [PATCH 011/146] Fixed typos --- src/histogram/bins.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/histogram/bins.rs b/src/histogram/bins.rs index 9a74ffbe..9a810387 100644 --- a/src/histogram/bins.rs +++ b/src/histogram/bins.rs @@ -119,7 +119,7 @@ impl IntoIterator for Edges { } impl Edges { - /// Number of edges in `Self`. + /// Number of edges in `self`. /// /// # Example: /// @@ -160,7 +160,7 @@ impl Edges { /// Given `value`, it returns an option: /// - `Some((left, right))`, where `right=left+1`, if there are two consecutive edges in - /// Self such that `self[left] <= value < self[right]`; + /// `self` such that `self[left] <= value < self[right]`; /// - `None`, otherwise. /// /// # Example: From 082ed402ae9f8210e5c278a579e05a49fde02ac7 Mon Sep 17 00:00:00 2001 From: LukeMathWalker Date: Sun, 14 Oct 2018 18:03:44 +0100 Subject: [PATCH 012/146] All Bins' methods have been documented --- src/histogram/bins.rs | 105 +++++++++++++++++++++++++++++++++++++++++- 1 file changed, 104 insertions(+), 1 deletion(-) diff --git a/src/histogram/bins.rs b/src/histogram/bins.rs index 9a810387..67791b34 100644 --- a/src/histogram/bins.rs +++ b/src/histogram/bins.rs @@ -196,6 +196,32 @@ impl Edges { } } +/// `Bins` is a sorted collection of non-overlapping +/// 1-dimensional intervals. +/// +/// All intervals are left-inclusive and right-exclusive. +/// +/// # Example: +/// +/// ``` +/// extern crate ndarray_stats; +/// extern crate noisy_float; +/// use ndarray_stats::histogram::{Edges, Bins}; +/// use noisy_float::types::n64; +/// +/// let edges = Edges::from(vec![n64(0.), n64(1.), n64(2.)]); +/// let bins = Bins::new(edges); +/// // first bin +/// assert_eq!( +/// bins.get(0), +/// n64(0.)..n64(1.) // n63(1.) is not included! +/// ); +/// // second bin +/// assert_eq!( +/// bins.get(1), +/// n64(1.)..n64(2.) +/// ); +/// ``` pub struct Bins { edges: Edges, } @@ -205,6 +231,23 @@ impl Bins { Bins { edges } } + /// Returns the number of bins. + /// + /// # Example: + /// + /// ``` + /// extern crate ndarray_stats; + /// extern crate noisy_float; + /// use ndarray_stats::histogram::{Edges, Bins}; + /// use noisy_float::types::n64; + /// + /// let edges = Edges::from(vec![n64(0.), n64(1.), n64(2.)]); + /// let bins = Bins::new(edges); + /// assert_eq!( + /// bins.len(), + /// 2 + /// ); + /// ``` pub fn len(&self) -> usize { match self.edges.len() { 0 => 0, @@ -212,11 +255,54 @@ impl Bins { } } + /// Given `value`, it returns an option: + /// - `Some(i)`, if the `i`-th bin in `self` contains `value`; + /// - `None`, if `value` does not belong to any of the bins in `self`. + /// + /// # Example: + /// + /// ``` + /// extern crate ndarray_stats; + /// use ndarray_stats::histogram::{Edges, Bins}; + /// + /// let edges = Edges::from(vec![0, 2, 4, 6]); + /// let bins = Bins::new(edges); + /// let value = 1; + /// assert_eq!( + /// bins.index(&1), + /// Some(0) + /// ); + /// assert_eq!( + /// bins.get(bins.index(&1).unwrap()), + /// 0..2 + /// ); + /// ``` pub fn index(&self, value: &A) -> Option { self.edges.indexes(value).map(|t| t.0) } - /// Returns the range of the bin containing the given value. + /// Given `value`, it returns an option: + /// - `Some(left_edge..right_edge))`, if there exists a bin in `self` such that + /// `left_edge <= value < right_edge`; + /// - `None`, otherwise. + /// + /// # Example: + /// + /// ``` + /// extern crate ndarray_stats; + /// use ndarray_stats::histogram::{Edges, Bins}; + /// + /// let edges = Edges::from(vec![0, 2, 4, 6]); + /// let bins = Bins::new(edges); + /// assert_eq!( + /// bins.range(&1), + /// Some(0..2) + /// ); + /// assert_eq!( + /// bins.range(&10), + /// None + /// ); + /// ``` pub fn range(&self, value: &A) -> Option> where A: Clone, @@ -235,6 +321,23 @@ impl Bins { } impl Bins { + /// Get the `i`-th bin. + /// + /// **Panics** if the index `i` is out of bounds. + /// + /// # Example: + /// + /// ``` + /// extern crate ndarray_stats; + /// use ndarray_stats::histogram::{Edges, Bins}; + /// + /// let edges = Edges::from(vec![1, 5, 10, 20]); + /// let bins = Bins::new(edges); + /// assert_eq!( + /// bins.get(1), + /// 5..10 + /// ); + /// ``` pub fn get(&self, index: usize) -> Range { Range { start: self.edges[index].clone(), From 3897b7023a1c5bd724ea21726d099ca15f2c28d8 Mon Sep 17 00:00:00 2001 From: LukeMathWalker Date: Sun, 14 Oct 2018 18:04:50 +0100 Subject: [PATCH 013/146] Fixed typo --- src/histogram/bins.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/histogram/bins.rs b/src/histogram/bins.rs index 67791b34..33ac876f 100644 --- a/src/histogram/bins.rs +++ b/src/histogram/bins.rs @@ -214,7 +214,7 @@ impl Edges { /// // first bin /// assert_eq!( /// bins.get(0), -/// n64(0.)..n64(1.) // n63(1.) is not included! +/// n64(0.)..n64(1.) // n64(1.) is not included in the bin! /// ); /// // second bin /// assert_eq!( From 6f8d28fea350ff4b2fa8f0b731ab1b9895f26030 Mon Sep 17 00:00:00 2001 From: LukeMathWalker Date: Sun, 14 Oct 2018 18:05:51 +0100 Subject: [PATCH 014/146] Better formulation in docs --- src/histogram/bins.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/histogram/bins.rs b/src/histogram/bins.rs index 33ac876f..288b47fb 100644 --- a/src/histogram/bins.rs +++ b/src/histogram/bins.rs @@ -255,7 +255,7 @@ impl Bins { } } - /// Given `value`, it returns an option: + /// Given `value`, it returns: /// - `Some(i)`, if the `i`-th bin in `self` contains `value`; /// - `None`, if `value` does not belong to any of the bins in `self`. /// From 9b19ad6bf70b564bed6792e16990e2cdadfe373f Mon Sep 17 00:00:00 2001 From: LukeMathWalker Date: Sun, 14 Oct 2018 18:07:06 +0100 Subject: [PATCH 015/146] Fixed typo, better wording --- src/histogram/bins.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/histogram/bins.rs b/src/histogram/bins.rs index 288b47fb..3927ed27 100644 --- a/src/histogram/bins.rs +++ b/src/histogram/bins.rs @@ -281,8 +281,8 @@ impl Bins { self.edges.indexes(value).map(|t| t.0) } - /// Given `value`, it returns an option: - /// - `Some(left_edge..right_edge))`, if there exists a bin in `self` such that + /// Given `value`, it returns: + /// - `Some(left_edge..right_edge)`, if there exists a bin in `self` such that /// `left_edge <= value < right_edge`; /// - `None`, otherwise. /// From d93c5d0733e29b3477759b0fb422c1e01bb22fc9 Mon Sep 17 00:00:00 2001 From: LukeMathWalker Date: Thu, 18 Oct 2018 08:01:47 +0100 Subject: [PATCH 016/146] Added short docstring to BinNotFound --- src/histogram/errors.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/src/histogram/errors.rs b/src/histogram/errors.rs index 22d40a1b..7afaea1f 100644 --- a/src/histogram/errors.rs +++ b/src/histogram/errors.rs @@ -1,6 +1,7 @@ use std::error; use std::fmt; +/// Error to denote that no bin has been found for a certain observation. #[derive(Debug, Clone)] pub struct BinNotFound; From f7f9dc72e7475db479b1f57f3e7d8a1df41d5829 Mon Sep 17 00:00:00 2001 From: LukeMathWalker Date: Thu, 18 Oct 2018 08:03:09 +0100 Subject: [PATCH 017/146] Improved docstring for `get` --- src/histogram/bins.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/histogram/bins.rs b/src/histogram/bins.rs index 3927ed27..9cef29fc 100644 --- a/src/histogram/bins.rs +++ b/src/histogram/bins.rs @@ -323,7 +323,7 @@ impl Bins { impl Bins { /// Get the `i`-th bin. /// - /// **Panics** if the index `i` is out of bounds. + /// **Panics** if `index` is out of bounds. /// /// # Example: /// From 6419da2ca15991d87e6dbf7841992d92b59129b0 Mon Sep 17 00:00:00 2001 From: LukeMathWalker Date: Thu, 18 Oct 2018 08:06:11 +0100 Subject: [PATCH 018/146] HistogramExt trait has been added with a minimal signature --- src/histogram/histograms.rs | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/src/histogram/histograms.rs b/src/histogram/histograms.rs index 1c947645..62403c3e 100644 --- a/src/histogram/histograms.rs +++ b/src/histogram/histograms.rs @@ -1,4 +1,5 @@ use ndarray::prelude::*; +use ndarray::Data; use super::bins::Bins; use super::errors::BinNotFound; @@ -7,11 +8,6 @@ pub struct HistogramCounts { bins: Vec>, } -struct HistogramDensity { - density: ArrayD, - bins: Vec>, -} - impl HistogramCounts { pub fn new(edges: Vec>) -> Self { let counts = ArrayD::zeros( @@ -30,3 +26,14 @@ impl HistogramCounts { Ok(()) } } + +/// Histogram methods. +pub trait HistogramExt + where + S: Data, + D: Dimension, +{ + fn histogram(&self, bins: Vec>) -> HistogramCounts + where + A: Ord; +} From 94d71a2dd05d34aed40f1e2cccfa7ecd459c931d Mon Sep 17 00:00:00 2001 From: LukeMathWalker Date: Thu, 18 Oct 2018 08:08:18 +0100 Subject: [PATCH 019/146] Removed trait parameter D from HistogramExt trait signature --- src/histogram/histograms.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/histogram/histograms.rs b/src/histogram/histograms.rs index 62403c3e..9eb6662e 100644 --- a/src/histogram/histograms.rs +++ b/src/histogram/histograms.rs @@ -28,11 +28,11 @@ impl HistogramCounts { } /// Histogram methods. -pub trait HistogramExt +pub trait HistogramExt where S: Data, - D: Dimension, { + fn histogram(&self, bins: Vec>) -> HistogramCounts where A: Ord; From 41cc373dd421389558c02a9a9724554276d60d6a Mon Sep 17 00:00:00 2001 From: LukeMathWalker Date: Thu, 18 Oct 2018 08:11:35 +0100 Subject: [PATCH 020/146] Added docstrings to histogram method --- src/histogram/histograms.rs | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/src/histogram/histograms.rs b/src/histogram/histograms.rs index 9eb6662e..c026a55c 100644 --- a/src/histogram/histograms.rs +++ b/src/histogram/histograms.rs @@ -32,7 +32,18 @@ pub trait HistogramExt where S: Data, { - + /// Return the [histogram](https://en.wikipedia.org/wiki/Histogram) + /// for a 2-dimensional array of points `M`. + /// + /// Let `(n, d)` be the shape of `M`: + /// - `n` is the number of points; + /// - `d` is the number of dimensions of the space those points belong to. + /// It follows that every column in `M` is a `d`-dimensional point. + /// + /// For example: a (3, 4) matrix `M` is a collection of 3 points in a + /// 4-dimensional space. + /// + /// **Panics** if `d` is different from `bins.len()`. fn histogram(&self, bins: Vec>) -> HistogramCounts where A: Ord; From 03654809094366ee1a81cba4b88867a83baca9c5 Mon Sep 17 00:00:00 2001 From: LukeMathWalker Date: Thu, 18 Oct 2018 08:21:12 +0100 Subject: [PATCH 021/146] Implemented histogram method; renamed edges to bins in HistogramCounts constructor --- src/histogram/histograms.rs | 21 ++++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) diff --git a/src/histogram/histograms.rs b/src/histogram/histograms.rs index c026a55c..1dddfab8 100644 --- a/src/histogram/histograms.rs +++ b/src/histogram/histograms.rs @@ -9,11 +9,11 @@ pub struct HistogramCounts { } impl HistogramCounts { - pub fn new(edges: Vec>) -> Self { + pub fn new(bins: Vec>) -> Self { let counts = ArrayD::zeros( - edges.iter().map(|e| e.len() + bins.iter().map(|e| e.len() ).collect::>()); - HistogramCounts { counts, bins: edges } + HistogramCounts { counts, bins } } pub fn add_observation(&mut self, observation: ArrayView1) -> Result<(), BinNotFound> { @@ -48,3 +48,18 @@ pub trait HistogramExt where A: Ord; } + +impl HistogramExt for ArrayBase + where + S: Data, + A: Ord, +{ + fn histogram(&self, bins: Vec>) -> HistogramCounts + { + let mut histogram = HistogramCounts::new(bins); + for point in self.axis_iter(Axis(0)) { + histogram.add_observation(point); + } + histogram + } +} From 6420b3fcbde46b778a8ac50d7c1bdb50053e5368 Mon Sep 17 00:00:00 2001 From: LukeMathWalker Date: Thu, 18 Oct 2018 08:23:39 +0100 Subject: [PATCH 022/146] Exporting HistogramExt trait --- src/histogram/mod.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/histogram/mod.rs b/src/histogram/mod.rs index 49e1ac8c..0fdb7a9f 100644 --- a/src/histogram/mod.rs +++ b/src/histogram/mod.rs @@ -1,4 +1,4 @@ -pub use self::histograms::HistogramCounts; +pub use self::histograms::{HistogramCounts, HistogramExt}; pub use self::bins::{Edges, Bins}; pub use self::errors::BinNotFound; From f728a63b295028753ae8bc652c85b4acdbd55d37 Mon Sep 17 00:00:00 2001 From: LukeMathWalker Date: Thu, 18 Oct 2018 08:28:41 +0100 Subject: [PATCH 023/146] Added ndim field to HistogramCounts to implement dimensionality check in add_dimensions --- src/histogram/histograms.rs | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/src/histogram/histograms.rs b/src/histogram/histograms.rs index 1dddfab8..23d48e66 100644 --- a/src/histogram/histograms.rs +++ b/src/histogram/histograms.rs @@ -6,17 +6,25 @@ use super::errors::BinNotFound; pub struct HistogramCounts { counts: ArrayD, bins: Vec>, + ndim: usize, } impl HistogramCounts { pub fn new(bins: Vec>) -> Self { + let ndim = bins.len(); let counts = ArrayD::zeros( bins.iter().map(|e| e.len() ).collect::>()); - HistogramCounts { counts, bins } + HistogramCounts { counts, bins, ndim } } pub fn add_observation(&mut self, observation: ArrayView1) -> Result<(), BinNotFound> { + assert_eq!( + self.ndim, + observation.len(), + "Dimensions do not match: observation has {0} dimensions, \ + while the histogram has {1}.", observation.len(), self.ndim + ); let bin = observation .iter() .zip(&self.bins) From 62c46e8288b14c414562072f76fae638fa91dbf4 Mon Sep 17 00:00:00 2001 From: LukeMathWalker Date: Thu, 18 Oct 2018 08:34:05 +0100 Subject: [PATCH 024/146] Improving docstring --- src/histogram/histograms.rs | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/src/histogram/histograms.rs b/src/histogram/histograms.rs index 23d48e66..d3813234 100644 --- a/src/histogram/histograms.rs +++ b/src/histogram/histograms.rs @@ -10,6 +10,13 @@ pub struct HistogramCounts { } impl HistogramCounts { + /// Return a new instance of HistogramCounts given + /// a vector of [`Bins`]. + /// + /// The `i`-th element in `Vec>` represents the 1-dimensional + /// projection of the bin grid on the `i`-th axis. + /// + /// [`Bins`]: struct.Bins.html pub fn new(bins: Vec>) -> Self { let ndim = bins.len(); let counts = ArrayD::zeros( @@ -18,6 +25,9 @@ impl HistogramCounts { HistogramCounts { counts, bins, ndim } } + /// Add a single observation to the histogram. + /// + /// **Panics** if dimensions do not match: `self.ndim != observation.len()`. pub fn add_observation(&mut self, observation: ArrayView1) -> Result<(), BinNotFound> { assert_eq!( self.ndim, From 068c893ba6b50b133864e4e6223f661b9df7cb46 Mon Sep 17 00:00:00 2001 From: LukeMathWalker Date: Thu, 18 Oct 2018 08:36:12 +0100 Subject: [PATCH 025/146] Added docstring to Bins::new --- src/histogram/bins.rs | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/histogram/bins.rs b/src/histogram/bins.rs index 9cef29fc..2c9464ba 100644 --- a/src/histogram/bins.rs +++ b/src/histogram/bins.rs @@ -227,6 +227,9 @@ pub struct Bins { } impl Bins { + /// Given a collection of [`Edges`], it returns the corresponding `Bins` instance. + /// + /// [`Edges`]: struct.Edges.html pub fn new(edges: Edges) -> Self { Bins { edges } } From 6d0f7b694d6bf2e0d8f36509fc0e7c3468861bbf Mon Sep 17 00:00:00 2001 From: LukeMathWalker Date: Thu, 18 Oct 2018 08:38:17 +0100 Subject: [PATCH 026/146] Removed trailing white line at the end of the file --- src/histogram/histograms.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/histogram/histograms.rs b/src/histogram/histograms.rs index d3813234..8ebebc97 100644 --- a/src/histogram/histograms.rs +++ b/src/histogram/histograms.rs @@ -80,4 +80,4 @@ impl HistogramExt for ArrayBase } histogram } -} +} \ No newline at end of file From c03945ea9dc09a311e379c7d03e3ad4e4d45bb55 Mon Sep 17 00:00:00 2001 From: LukeMathWalker Date: Thu, 18 Oct 2018 08:48:59 +0100 Subject: [PATCH 027/146] Checked Edges::from methods --- src/histogram/bins.rs | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/src/histogram/bins.rs b/src/histogram/bins.rs index 2c9464ba..f160ca77 100644 --- a/src/histogram/bins.rs +++ b/src/histogram/bins.rs @@ -348,3 +348,29 @@ impl Bins { } } } + +#[cfg(test)] +mod edges_tests { + use super::*; + + quickcheck! { + fn check_sorted_from_vec(v: Vec) -> bool { + let edges = Edges::from(v); + let n = edges.len(); + for i in 1..n { + assert!(edges[i-1] <= edges[i]); + } + true + } + + fn check_sorted_from_array(v: Vec) -> bool { + let a = Array1::from_vec(v); + let edges = Edges::from(a); + let n = edges.len(); + for i in 1..n { + assert!(edges[i-1] <= edges[i]); + } + true + } + } +} From 71c58ffdc4964bb867ce8c200c76b9646ab28729 Mon Sep 17 00:00:00 2001 From: LukeMathWalker Date: Thu, 18 Oct 2018 08:57:44 +0100 Subject: [PATCH 028/146] Checked right-exclusiveness and left-inclusiveness --- src/histogram/bins.rs | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/src/histogram/bins.rs b/src/histogram/bins.rs index f160ca77..2911acaa 100644 --- a/src/histogram/bins.rs +++ b/src/histogram/bins.rs @@ -372,5 +372,27 @@ mod edges_tests { } true } + + fn edges_are_right_exclusive(v: Vec) -> bool { + let edges = Edges::from(v); + let last = edges.as_slice().last(); + match last { + None => true, + Some(x) => { + edges.indexes(x).is_none() + } + } + } + + fn edges_are_left_inclusive(v: Vec) -> bool { + let edges = Edges::from(v); + let first = edges.as_slice().first(); + match first { + None => true, + Some(x) => { + edges.indexes(x).is_some() + } + } + } } } From 992f969f77d9e77a9d32ee8c5a57dc4b97f03db0 Mon Sep 17 00:00:00 2001 From: LukeMathWalker Date: Tue, 23 Oct 2018 08:36:20 +0100 Subject: [PATCH 029/146] Edges are now duplicates-free --- src/histogram/bins.rs | 29 ++++++++++++++++++++++------- 1 file changed, 22 insertions(+), 7 deletions(-) diff --git a/src/histogram/bins.rs b/src/histogram/bins.rs index 2911acaa..195c046c 100644 --- a/src/histogram/bins.rs +++ b/src/histogram/bins.rs @@ -35,7 +35,8 @@ impl From> for Edges { /// Get an `Edges` instance from a `Vec`: /// the vector will be sorted in increasing order - /// using an unstable sorting algorithm. + /// using an unstable sorting algorithm and duplicates + /// will be removed. /// /// # Example: /// @@ -46,7 +47,7 @@ impl From> for Edges { /// use ndarray_stats::histogram::Edges; /// /// # fn main() { - /// let edges = Edges::from(array![1, 15, 10, 20]); + /// let edges = Edges::from(array![1, 15, 10, 10, 20]); /// // The array gets sorted! /// assert_eq!( /// edges[2], @@ -57,6 +58,8 @@ impl From> for Edges { fn from(mut edges: Vec) -> Self { // sort the array in-place edges.sort_unstable(); + // remove duplicates + edges.dedup(); Edges { edges } } } @@ -386,13 +389,25 @@ mod edges_tests { fn edges_are_left_inclusive(v: Vec) -> bool { let edges = Edges::from(v); - let first = edges.as_slice().first(); - match first { - None => true, - Some(x) => { - edges.indexes(x).is_some() + match edges.len() { + 1 => true, + _ => { + let first = edges.as_slice().first(); + match first { + None => true, + Some(x) => { + edges.indexes(x).is_some() + } + } } } } } + + #[test] + fn check_degenerate_bins() { + let v = vec![2, 4, 4, 5, 6]; + let edges = Edges::from(v); + assert_eq!(edges.indexes(&4), Some((1, 2))); + } } From 1ac254ce7a0b01c9ba4f4df6b6fc5d90f8bd7bdd Mon Sep 17 00:00:00 2001 From: LukeMathWalker Date: Tue, 23 Oct 2018 08:57:19 +0100 Subject: [PATCH 030/146] Removed ndim from the HistogramCounts struct; created a ndim method instead --- src/histogram/histograms.rs | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/src/histogram/histograms.rs b/src/histogram/histograms.rs index 8ebebc97..a7f759fa 100644 --- a/src/histogram/histograms.rs +++ b/src/histogram/histograms.rs @@ -6,7 +6,6 @@ use super::errors::BinNotFound; pub struct HistogramCounts { counts: ArrayD, bins: Vec>, - ndim: usize, } impl HistogramCounts { @@ -22,12 +21,12 @@ impl HistogramCounts { let counts = ArrayD::zeros( bins.iter().map(|e| e.len() ).collect::>()); - HistogramCounts { counts, bins, ndim } + HistogramCounts { counts, bins } } /// Add a single observation to the histogram. /// - /// **Panics** if dimensions do not match: `self.ndim != observation.len()`. + /// **Panics** if dimensions do not match: `self.ndim() != observation.len()`. pub fn add_observation(&mut self, observation: ArrayView1) -> Result<(), BinNotFound> { assert_eq!( self.ndim, @@ -43,6 +42,12 @@ impl HistogramCounts { self.counts[IxDyn(&bin)] += 1; Ok(()) } + + /// Returns the number of dimensions of the space the histogram is covering. + pub fn ndim(&self) -> usize { + debug_assert_eq!(self.counts.ndim(), self.bins.len()); + self.counts.len() + } } /// Histogram methods. From 4e39913de5f8fccea08072f82c9a1fdf8e7046c6 Mon Sep 17 00:00:00 2001 From: LukeMathWalker Date: Tue, 23 Oct 2018 08:59:10 +0100 Subject: [PATCH 031/146] HistogramCounts renamed to Histogram --- src/histogram/histograms.rs | 19 +++++++++---------- src/histogram/mod.rs | 2 +- 2 files changed, 10 insertions(+), 11 deletions(-) diff --git a/src/histogram/histograms.rs b/src/histogram/histograms.rs index a7f759fa..58de1e8b 100644 --- a/src/histogram/histograms.rs +++ b/src/histogram/histograms.rs @@ -3,12 +3,12 @@ use ndarray::Data; use super::bins::Bins; use super::errors::BinNotFound; -pub struct HistogramCounts { +pub struct Histogram { counts: ArrayD, bins: Vec>, } -impl HistogramCounts { +impl Histogram { /// Return a new instance of HistogramCounts given /// a vector of [`Bins`]. /// @@ -17,11 +17,10 @@ impl HistogramCounts { /// /// [`Bins`]: struct.Bins.html pub fn new(bins: Vec>) -> Self { - let ndim = bins.len(); let counts = ArrayD::zeros( bins.iter().map(|e| e.len() ).collect::>()); - HistogramCounts { counts, bins } + Histogram { counts, bins } } /// Add a single observation to the histogram. @@ -29,10 +28,10 @@ impl HistogramCounts { /// **Panics** if dimensions do not match: `self.ndim() != observation.len()`. pub fn add_observation(&mut self, observation: ArrayView1) -> Result<(), BinNotFound> { assert_eq!( - self.ndim, + self.ndim(), observation.len(), "Dimensions do not match: observation has {0} dimensions, \ - while the histogram has {1}.", observation.len(), self.ndim + while the histogram has {1}.", observation.len(), self.ndim() ); let bin = observation .iter() @@ -46,7 +45,7 @@ impl HistogramCounts { /// Returns the number of dimensions of the space the histogram is covering. pub fn ndim(&self) -> usize { debug_assert_eq!(self.counts.ndim(), self.bins.len()); - self.counts.len() + self.counts.ndim() } } @@ -67,7 +66,7 @@ pub trait HistogramExt /// 4-dimensional space. /// /// **Panics** if `d` is different from `bins.len()`. - fn histogram(&self, bins: Vec>) -> HistogramCounts + fn histogram(&self, bins: Vec>) -> Histogram where A: Ord; } @@ -77,9 +76,9 @@ impl HistogramExt for ArrayBase S: Data, A: Ord, { - fn histogram(&self, bins: Vec>) -> HistogramCounts + fn histogram(&self, bins: Vec>) -> Histogram { - let mut histogram = HistogramCounts::new(bins); + let mut histogram = Histogram::new(bins); for point in self.axis_iter(Axis(0)) { histogram.add_observation(point); } diff --git a/src/histogram/mod.rs b/src/histogram/mod.rs index 0fdb7a9f..592a71df 100644 --- a/src/histogram/mod.rs +++ b/src/histogram/mod.rs @@ -1,4 +1,4 @@ -pub use self::histograms::{HistogramCounts, HistogramExt}; +pub use self::histograms::{Histogram, HistogramExt}; pub use self::bins::{Edges, Bins}; pub use self::errors::BinNotFound; From 5315286e797b0dd5ef2bff4aa0aa4eba05a25813 Mon Sep 17 00:00:00 2001 From: LukeMathWalker Date: Tue, 23 Oct 2018 09:02:52 +0100 Subject: [PATCH 032/146] Added comment to get explaining why it's not implemented using the Index trait for Bins --- src/histogram/bins.rs | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/histogram/bins.rs b/src/histogram/bins.rs index 195c046c..3a4b2873 100644 --- a/src/histogram/bins.rs +++ b/src/histogram/bins.rs @@ -345,6 +345,10 @@ impl Bins { /// ); /// ``` pub fn get(&self, index: usize) -> Range { + // It was not possible to implement this functionality + // using the `Index` trait unless we were willing to + // allocate a `Vec>` in the struct. + // Index, in fact, forces you to return a reference. Range { start: self.edges[index].clone(), end: self.edges[index+1].clone(), From 810d25e440de169c16008e664a41fbcd50165d06 Mon Sep 17 00:00:00 2001 From: LukeMathWalker Date: Tue, 23 Oct 2018 09:04:32 +0100 Subject: [PATCH 033/146] Fixed docs --- src/histogram/histograms.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/histogram/histograms.rs b/src/histogram/histograms.rs index 58de1e8b..252e2986 100644 --- a/src/histogram/histograms.rs +++ b/src/histogram/histograms.rs @@ -3,13 +3,14 @@ use ndarray::Data; use super::bins::Bins; use super::errors::BinNotFound; +/// Histogram data structure. pub struct Histogram { counts: ArrayD, bins: Vec>, } impl Histogram { - /// Return a new instance of HistogramCounts given + /// Return a new instance of Histogram given /// a vector of [`Bins`]. /// /// The `i`-th element in `Vec>` represents the 1-dimensional From 89a4af9c36cd2a121231a6b13dc73bece8ca8f7e Mon Sep 17 00:00:00 2001 From: LukeMathWalker Date: Tue, 23 Oct 2018 09:07:53 +0100 Subject: [PATCH 034/146] Added docs to histogram submodule; exported HistogramExt as top level trait --- src/histogram/mod.rs | 1 + src/lib.rs | 1 + 2 files changed, 2 insertions(+) diff --git a/src/histogram/mod.rs b/src/histogram/mod.rs index 592a71df..d2d5a1b2 100644 --- a/src/histogram/mod.rs +++ b/src/histogram/mod.rs @@ -1,3 +1,4 @@ +//! Histogram functionalities. pub use self::histograms::{Histogram, HistogramExt}; pub use self::bins::{Edges, Bins}; pub use self::errors::BinNotFound; diff --git a/src/lib.rs b/src/lib.rs index 715e9ca7..371b032c 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -15,6 +15,7 @@ pub use maybe_nan::{MaybeNan, MaybeNanExt}; pub use quantile::{interpolate, QuantileExt}; pub use sort::Sort1dExt; pub use correlation::CorrelationExt; +pub use histogram::HistogramExt; mod maybe_nan; mod quantile; From f3f383557c804587eb4b7f6c3ff8f2d07ae4ce28 Mon Sep 17 00:00:00 2001 From: LukeMathWalker Date: Tue, 23 Oct 2018 09:09:26 +0100 Subject: [PATCH 035/146] Revised docstring for HistogramExt --- src/histogram/histograms.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/histogram/histograms.rs b/src/histogram/histograms.rs index 252e2986..23c44123 100644 --- a/src/histogram/histograms.rs +++ b/src/histogram/histograms.rs @@ -50,7 +50,7 @@ impl Histogram { } } -/// Histogram methods. +/// Extension trait for `ArrayBase` providing methods to compute histograms. pub trait HistogramExt where S: Data, From a9dbb1f0a44ae1e42c9eaaab92b625e621792ea1 Mon Sep 17 00:00:00 2001 From: LukeMathWalker Date: Tue, 23 Oct 2018 20:15:06 +0100 Subject: [PATCH 036/146] Added quickcheck test for deduplication --- src/histogram/bins.rs | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/src/histogram/bins.rs b/src/histogram/bins.rs index 3a4b2873..ef89b767 100644 --- a/src/histogram/bins.rs +++ b/src/histogram/bins.rs @@ -359,6 +359,8 @@ impl Bins { #[cfg(test)] mod edges_tests { use super::*; + use std::collections::BTreeSet; + use std::iter::FromIterator; quickcheck! { fn check_sorted_from_vec(v: Vec) -> bool { @@ -406,12 +408,12 @@ mod edges_tests { } } } - } - #[test] - fn check_degenerate_bins() { - let v = vec![2, 4, 4, 5, 6]; - let edges = Edges::from(v); - assert_eq!(edges.indexes(&4), Some((1, 2))); + fn edges_are_deduped(v: Vec) -> bool { + let unique_elements = BTreeSet::from_iter(v.iter()); + let edges = Edges::from(v.clone()); + let unique_edges = BTreeSet::from_iter(edges.as_slice().iter()); + unique_edges == unique_elements + } } } From d5266c13a43b7f592eddef9d6c1bf28f4c8310d7 Mon Sep 17 00:00:00 2001 From: LukeMathWalker Date: Tue, 23 Oct 2018 20:20:10 +0100 Subject: [PATCH 037/146] Added test for out of bounds access to bins using get --- src/histogram/bins.rs | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/src/histogram/bins.rs b/src/histogram/bins.rs index ef89b767..4dfe9456 100644 --- a/src/histogram/bins.rs +++ b/src/histogram/bins.rs @@ -417,3 +417,17 @@ mod edges_tests { } } } + +#[cfg(test)] +mod bins_tests { + use super::*; + + #[test] + #[should_panic] + fn get_panics_for_out_of_bound_indexes() { + let edges = Edges::from(vec![0]); + let bins = Bins::new(edges); + // we need at least two edges to make a valid bin! + bins.get(0); + } +} From 728556c146b850c747a1ace024876219c3723cfb Mon Sep 17 00:00:00 2001 From: LukeMathWalker Date: Tue, 23 Oct 2018 20:27:14 +0100 Subject: [PATCH 038/146] Added as_view method to Histogram --- src/histogram/histograms.rs | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/histogram/histograms.rs b/src/histogram/histograms.rs index 23c44123..c6c9507a 100644 --- a/src/histogram/histograms.rs +++ b/src/histogram/histograms.rs @@ -48,6 +48,11 @@ impl Histogram { debug_assert_eq!(self.counts.ndim(), self.bins.len()); self.counts.ndim() } + + /// Borrow a view to the histogram matrix. + pub fn as_view(&self) -> ArrayViewD { + self.counts.view() + } } /// Extension trait for `ArrayBase` providing methods to compute histograms. @@ -85,4 +90,4 @@ impl HistogramExt for ArrayBase } histogram } -} \ No newline at end of file +} From 226ea87d1028118da9a7d90630ad3a744758b073 Mon Sep 17 00:00:00 2001 From: LukeMathWalker Date: Tue, 23 Oct 2018 20:41:53 +0100 Subject: [PATCH 039/146] Added a doc_test to add_observation --- src/histogram/histograms.rs | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/src/histogram/histograms.rs b/src/histogram/histograms.rs index c6c9507a..b36e6e0d 100644 --- a/src/histogram/histograms.rs +++ b/src/histogram/histograms.rs @@ -27,6 +27,30 @@ impl Histogram { /// Add a single observation to the histogram. /// /// **Panics** if dimensions do not match: `self.ndim() != observation.len()`. + /// + /// # Example: + /// ``` + /// extern crate ndarray_stats; + /// #[macro_use(array)] + /// extern crate ndarray; + /// extern crate noisy_float; + /// use ndarray_stats::histogram::{Edges, Bins, Histogram}; + /// use noisy_float::types::n64; + /// + /// # fn main() { + /// let edges = Edges::from(vec![n64(-1.), n64(0.), n64(1.)]); + /// let bins = Bins::new(edges); + /// let square_grid = vec![bins.clone(), bins.clone()]; + /// let histogram = Histogram::new(square_grid); + /// + /// let observation = array![n64(0.5), n64(0.6)]; + /// + /// histogram.add_observation(observation.view()); + /// + /// let histogram_matrix = histogram.as_view(); + /// assert_eq!(histogram_matrix[[1, 1]], 1); + /// # } + /// ``` pub fn add_observation(&mut self, observation: ArrayView1) -> Result<(), BinNotFound> { assert_eq!( self.ndim(), From 810f74b2509f404f7f98a162de4a99a37fc42dda Mon Sep 17 00:00:00 2001 From: LukeMathWalker Date: Tue, 23 Oct 2018 20:42:52 +0100 Subject: [PATCH 040/146] Bins and Edges are now clonable --- src/histogram/bins.rs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/histogram/bins.rs b/src/histogram/bins.rs index 4dfe9456..d9b4a7cb 100644 --- a/src/histogram/bins.rs +++ b/src/histogram/bins.rs @@ -27,6 +27,7 @@ use std::ops::{Index, Range}; /// None /// ); /// ``` +#[derive(Clone)] pub struct Edges { edges: Vec, } @@ -225,6 +226,7 @@ impl Edges { /// n64(1.)..n64(2.) /// ); /// ``` +#[derive(Clone)] pub struct Bins { edges: Edges, } From 848b61f4cbeb802f35bdc9480299475032ca7642 Mon Sep 17 00:00:00 2001 From: LukeMathWalker Date: Tue, 23 Oct 2018 20:43:18 +0100 Subject: [PATCH 041/146] add_observation doctest is now green --- src/histogram/histograms.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/histogram/histograms.rs b/src/histogram/histograms.rs index b36e6e0d..1e076f6e 100644 --- a/src/histogram/histograms.rs +++ b/src/histogram/histograms.rs @@ -41,7 +41,7 @@ impl Histogram { /// let edges = Edges::from(vec![n64(-1.), n64(0.), n64(1.)]); /// let bins = Bins::new(edges); /// let square_grid = vec![bins.clone(), bins.clone()]; - /// let histogram = Histogram::new(square_grid); + /// let mut histogram = Histogram::new(square_grid); /// /// let observation = array![n64(0.5), n64(0.6)]; /// From 18d64f368a91ca6b8a5f4e84105bd95f4d59e884 Mon Sep 17 00:00:00 2001 From: LukeMathWalker Date: Tue, 23 Oct 2018 21:07:42 +0100 Subject: [PATCH 042/146] Added `grid` method to Histogram to access self.bins --- src/histogram/histograms.rs | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/histogram/histograms.rs b/src/histogram/histograms.rs index 1e076f6e..8d0b2d76 100644 --- a/src/histogram/histograms.rs +++ b/src/histogram/histograms.rs @@ -77,6 +77,12 @@ impl Histogram { pub fn as_view(&self) -> ArrayViewD { self.counts.view() } + + /// Borrow an immutable reference to the histogram grid as a vector + /// slice. + pub fn grid(&self) -> &[Bins] { + &self.bins + } } /// Extension trait for `ArrayBase` providing methods to compute histograms. From a4490a472fc0edeb811af7ebc3553570337eb9d3 Mon Sep 17 00:00:00 2001 From: LukeMathWalker Date: Thu, 25 Oct 2018 08:49:47 +0100 Subject: [PATCH 043/146] Added grid submodule to histogram submodule --- src/histogram/grid.rs | 0 src/histogram/mod.rs | 1 + 2 files changed, 1 insertion(+) create mode 100644 src/histogram/grid.rs diff --git a/src/histogram/grid.rs b/src/histogram/grid.rs new file mode 100644 index 00000000..e69de29b diff --git a/src/histogram/mod.rs b/src/histogram/mod.rs index d2d5a1b2..0dc8778d 100644 --- a/src/histogram/mod.rs +++ b/src/histogram/mod.rs @@ -5,4 +5,5 @@ pub use self::errors::BinNotFound; mod histograms; mod bins; +mod grid; mod errors; From f762f63baa570dfdb3764d37d26e1577fa8d971e Mon Sep 17 00:00:00 2001 From: LukeMathWalker Date: Thu, 25 Oct 2018 08:51:14 +0100 Subject: [PATCH 044/146] Added Grid struct --- src/histogram/grid.rs | 5 +++++ src/histogram/mod.rs | 1 + 2 files changed, 6 insertions(+) diff --git a/src/histogram/grid.rs b/src/histogram/grid.rs index e69de29b..7b6b698a 100644 --- a/src/histogram/grid.rs +++ b/src/histogram/grid.rs @@ -0,0 +1,5 @@ +use super::bins::Bins; + +pub struct Grid { + grid: Vec>, +} diff --git a/src/histogram/mod.rs b/src/histogram/mod.rs index 0dc8778d..955d07aa 100644 --- a/src/histogram/mod.rs +++ b/src/histogram/mod.rs @@ -1,6 +1,7 @@ //! Histogram functionalities. pub use self::histograms::{Histogram, HistogramExt}; pub use self::bins::{Edges, Bins}; +pub use self::grid::Grid; pub use self::errors::BinNotFound; mod histograms; From f7fa2de19b36cb86c52d3eec5bdf490390304fa2 Mon Sep 17 00:00:00 2001 From: LukeMathWalker Date: Thu, 25 Oct 2018 09:01:15 +0100 Subject: [PATCH 045/146] Replaced bins with grid in Histogram --- src/histogram/histograms.rs | 31 ++++++++++++++++--------------- 1 file changed, 16 insertions(+), 15 deletions(-) diff --git a/src/histogram/histograms.rs b/src/histogram/histograms.rs index 8d0b2d76..02998696 100644 --- a/src/histogram/histograms.rs +++ b/src/histogram/histograms.rs @@ -1,27 +1,28 @@ use ndarray::prelude::*; use ndarray::Data; use super::bins::Bins; +use super::grid::Grid; use super::errors::BinNotFound; /// Histogram data structure. pub struct Histogram { counts: ArrayD, - bins: Vec>, + grid: Grid, } impl Histogram { /// Return a new instance of Histogram given - /// a vector of [`Bins`]. + /// a [`Grid`]. /// - /// The `i`-th element in `Vec>` represents the 1-dimensional + /// The `i`-th element in `Grid` represents the 1-dimensional /// projection of the bin grid on the `i`-th axis. /// - /// [`Bins`]: struct.Bins.html - pub fn new(bins: Vec>) -> Self { + /// [`Grid`]: struct.Grid.html + pub fn new(grid: Grid) -> Self { let counts = ArrayD::zeros( - bins.iter().map(|e| e.len() + grid.iter().map(|e| e.len() ).collect::>()); - Histogram { counts, bins } + Histogram { counts, grid } } /// Add a single observation to the histogram. @@ -60,7 +61,7 @@ impl Histogram { ); let bin = observation .iter() - .zip(&self.bins) + .zip(&self.grid) .map(|(v, e)| e.index(v).ok_or(BinNotFound)) .collect::, _>>()?; self.counts[IxDyn(&bin)] += 1; @@ -69,7 +70,7 @@ impl Histogram { /// Returns the number of dimensions of the space the histogram is covering. pub fn ndim(&self) -> usize { - debug_assert_eq!(self.counts.ndim(), self.bins.len()); + debug_assert_eq!(self.counts.ndim(), self.grid.len()); self.counts.ndim() } @@ -80,8 +81,8 @@ impl Histogram { /// Borrow an immutable reference to the histogram grid as a vector /// slice. - pub fn grid(&self) -> &[Bins] { - &self.bins + pub fn grid(&self) -> &Grid { + &self.grid } } @@ -101,8 +102,8 @@ pub trait HistogramExt /// For example: a (3, 4) matrix `M` is a collection of 3 points in a /// 4-dimensional space. /// - /// **Panics** if `d` is different from `bins.len()`. - fn histogram(&self, bins: Vec>) -> Histogram + /// **Panics** if `d` is different from `grid.ndim()`. + fn histogram(&self, grid: Grid) -> Histogram where A: Ord; } @@ -112,9 +113,9 @@ impl HistogramExt for ArrayBase S: Data, A: Ord, { - fn histogram(&self, bins: Vec>) -> Histogram + fn histogram(&self, grid: Grid) -> Histogram { - let mut histogram = Histogram::new(bins); + let mut histogram = Histogram::new(grid); for point in self.axis_iter(Axis(0)) { histogram.add_observation(point); } From 327cb482fd3332a92ff6b210e2da5a1eef38332d Mon Sep 17 00:00:00 2001 From: LukeMathWalker Date: Thu, 25 Oct 2018 09:08:47 +0100 Subject: [PATCH 046/146] Compilation is green --- src/histogram/grid.rs | 11 +++++++++++ src/histogram/histograms.rs | 4 ++-- 2 files changed, 13 insertions(+), 2 deletions(-) diff --git a/src/histogram/grid.rs b/src/histogram/grid.rs index 7b6b698a..757a1788 100644 --- a/src/histogram/grid.rs +++ b/src/histogram/grid.rs @@ -1,5 +1,16 @@ use super::bins::Bins; +use std::slice::Iter; pub struct Grid { grid: Vec>, } + +impl Grid { + pub fn iter(&self) -> Iter> { + self.grid.iter() + } + + pub fn ndim(&self) -> usize { + self.grid.len() + } +} diff --git a/src/histogram/histograms.rs b/src/histogram/histograms.rs index 02998696..f581ca05 100644 --- a/src/histogram/histograms.rs +++ b/src/histogram/histograms.rs @@ -61,7 +61,7 @@ impl Histogram { ); let bin = observation .iter() - .zip(&self.grid) + .zip(self.grid.iter()) .map(|(v, e)| e.index(v).ok_or(BinNotFound)) .collect::, _>>()?; self.counts[IxDyn(&bin)] += 1; @@ -70,7 +70,7 @@ impl Histogram { /// Returns the number of dimensions of the space the histogram is covering. pub fn ndim(&self) -> usize { - debug_assert_eq!(self.counts.ndim(), self.grid.len()); + debug_assert_eq!(self.counts.ndim(), self.grid.ndim()); self.counts.ndim() } From 2425da5086155c7e68b016ee6857bc14cd05c22b Mon Sep 17 00:00:00 2001 From: LukeMathWalker Date: Fri, 26 Oct 2018 08:20:54 +0100 Subject: [PATCH 047/146] Implement From>> for Grid. Tests are green --- src/histogram/grid.rs | 14 +++++++++++--- src/histogram/histograms.rs | 4 ++-- 2 files changed, 13 insertions(+), 5 deletions(-) diff --git a/src/histogram/grid.rs b/src/histogram/grid.rs index 757a1788..34beccef 100644 --- a/src/histogram/grid.rs +++ b/src/histogram/grid.rs @@ -2,15 +2,23 @@ use super::bins::Bins; use std::slice::Iter; pub struct Grid { - grid: Vec>, + projections: Vec>, +} + +impl From>> for Grid { + + /// Get a `Grid` instance from a `Vec>`. + fn from(mut projections: Vec>) -> Self { + Grid { projections } + } } impl Grid { pub fn iter(&self) -> Iter> { - self.grid.iter() + self.projections.iter() } pub fn ndim(&self) -> usize { - self.grid.len() + self.projections.len() } } diff --git a/src/histogram/histograms.rs b/src/histogram/histograms.rs index f581ca05..d69deed8 100644 --- a/src/histogram/histograms.rs +++ b/src/histogram/histograms.rs @@ -35,13 +35,13 @@ impl Histogram { /// #[macro_use(array)] /// extern crate ndarray; /// extern crate noisy_float; - /// use ndarray_stats::histogram::{Edges, Bins, Histogram}; + /// use ndarray_stats::histogram::{Edges, Bins, Histogram, Grid}; /// use noisy_float::types::n64; /// /// # fn main() { /// let edges = Edges::from(vec![n64(-1.), n64(0.), n64(1.)]); /// let bins = Bins::new(edges); - /// let square_grid = vec![bins.clone(), bins.clone()]; + /// let square_grid = Grid::from(vec![bins.clone(), bins.clone()]); /// let mut histogram = Histogram::new(square_grid); /// /// let observation = array![n64(0.5), n64(0.6)]; From 69710bd5ec848a04aea74e29dbae50df51e8472e Mon Sep 17 00:00:00 2001 From: LukeMathWalker Date: Fri, 26 Oct 2018 08:23:32 +0100 Subject: [PATCH 048/146] Implement From>> for Grid. Tests are green --- src/histogram/grid.rs | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/histogram/grid.rs b/src/histogram/grid.rs index 34beccef..f0cff0f3 100644 --- a/src/histogram/grid.rs +++ b/src/histogram/grid.rs @@ -21,4 +21,8 @@ impl Grid { pub fn ndim(&self) -> usize { self.projections.len() } + + pub fn projections(&self) -> &[Bins] { + &self.projections + } } From e1b3d2fa500f0150d818ba43751ccbd168c406d7 Mon Sep 17 00:00:00 2001 From: LukeMathWalker Date: Fri, 26 Oct 2018 08:25:02 +0100 Subject: [PATCH 049/146] grid.iter renamed to iter_projections --- src/histogram/grid.rs | 2 +- src/histogram/histograms.rs | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/histogram/grid.rs b/src/histogram/grid.rs index f0cff0f3..af911d05 100644 --- a/src/histogram/grid.rs +++ b/src/histogram/grid.rs @@ -14,7 +14,7 @@ impl From>> for Grid { } impl Grid { - pub fn iter(&self) -> Iter> { + pub fn iter_projections(&self) -> Iter> { self.projections.iter() } diff --git a/src/histogram/histograms.rs b/src/histogram/histograms.rs index d69deed8..30ddaea9 100644 --- a/src/histogram/histograms.rs +++ b/src/histogram/histograms.rs @@ -20,7 +20,7 @@ impl Histogram { /// [`Grid`]: struct.Grid.html pub fn new(grid: Grid) -> Self { let counts = ArrayD::zeros( - grid.iter().map(|e| e.len() + grid.iter_projections().map(|e| e.len() ).collect::>()); Histogram { counts, grid } } @@ -61,7 +61,7 @@ impl Histogram { ); let bin = observation .iter() - .zip(self.grid.iter()) + .zip(self.grid.iter_projections()) .map(|(v, e)| e.index(v).ok_or(BinNotFound)) .collect::, _>>()?; self.counts[IxDyn(&bin)] += 1; From 4d5119c102a9b6f4a0935fbf76c748dedf9c17b1 Mon Sep 17 00:00:00 2001 From: LukeMathWalker Date: Fri, 26 Oct 2018 08:32:02 +0100 Subject: [PATCH 050/146] grid.iter renamed to iter_projections --- src/histogram/grid.rs | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/src/histogram/grid.rs b/src/histogram/grid.rs index af911d05..19b15c1e 100644 --- a/src/histogram/grid.rs +++ b/src/histogram/grid.rs @@ -1,5 +1,7 @@ use super::bins::Bins; +use super::errors::BinNotFound; use std::slice::Iter; +use ndarray::ArrayView1; pub struct Grid { projections: Vec>, @@ -25,4 +27,15 @@ impl Grid { pub fn projections(&self) -> &[Bins] { &self.projections } + + pub fn index(&self, observation: ArrayView1) -> Result, BinNotFound> { + assert_eq!(observation.len(), self.ndim(), + "Dimension mismatch: the observation has {0:?} dimensions, the grid \ + instead has {1:?} dimensions.", observation.len(), self.ndim()); + observation + .iter() + .zip(self.grid.iter_projections()) + .map(|(v, e)| e.index(v).ok_or(BinNotFound)) + .collect::, _>>()? + } } From 812aff5574d88fdceb4774aeaca6bca144aca9e6 Mon Sep 17 00:00:00 2001 From: LukeMathWalker Date: Fri, 26 Oct 2018 08:36:29 +0100 Subject: [PATCH 051/146] index method added to Grid. Histogram.add_observation refactored to use it --- src/histogram/grid.rs | 16 ++++++++-------- src/histogram/histograms.rs | 9 ++------- 2 files changed, 10 insertions(+), 15 deletions(-) diff --git a/src/histogram/grid.rs b/src/histogram/grid.rs index 19b15c1e..6c18a433 100644 --- a/src/histogram/grid.rs +++ b/src/histogram/grid.rs @@ -10,7 +10,7 @@ pub struct Grid { impl From>> for Grid { /// Get a `Grid` instance from a `Vec>`. - fn from(mut projections: Vec>) -> Self { + fn from(projections: Vec>) -> Self { Grid { projections } } } @@ -28,14 +28,14 @@ impl Grid { &self.projections } - pub fn index(&self, observation: ArrayView1) -> Result, BinNotFound> { - assert_eq!(observation.len(), self.ndim(), - "Dimension mismatch: the observation has {0:?} dimensions, the grid \ - instead has {1:?} dimensions.", observation.len(), self.ndim()); - observation + pub fn index(&self, point: ArrayView1) -> Result, BinNotFound> { + assert_eq!(point.len(), self.ndim(), + "Dimension mismatch: the point has {0:?} dimensions, the grid \ + expected {1:?} dimensions.", point.len(), self.ndim()); + point .iter() - .zip(self.grid.iter_projections()) + .zip(self.iter_projections()) .map(|(v, e)| e.index(v).ok_or(BinNotFound)) - .collect::, _>>()? + .collect::, _>>() } } diff --git a/src/histogram/histograms.rs b/src/histogram/histograms.rs index 30ddaea9..850ae441 100644 --- a/src/histogram/histograms.rs +++ b/src/histogram/histograms.rs @@ -1,6 +1,5 @@ use ndarray::prelude::*; use ndarray::Data; -use super::bins::Bins; use super::grid::Grid; use super::errors::BinNotFound; @@ -59,12 +58,8 @@ impl Histogram { "Dimensions do not match: observation has {0} dimensions, \ while the histogram has {1}.", observation.len(), self.ndim() ); - let bin = observation - .iter() - .zip(self.grid.iter_projections()) - .map(|(v, e)| e.index(v).ok_or(BinNotFound)) - .collect::, _>>()?; - self.counts[IxDyn(&bin)] += 1; + let bin_index = self.grid.index(observation)?; + self.counts[IxDyn(&bin_index)] += 1; Ok(()) } From 29a26f19dd2795285bbe768f8f95c589bdb9f383 Mon Sep 17 00:00:00 2001 From: LukeMathWalker Date: Fri, 26 Oct 2018 08:54:49 +0100 Subject: [PATCH 052/146] Improved docs --- src/histogram/grid.rs | 3 +++ src/histogram/histograms.rs | 6 +----- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/src/histogram/grid.rs b/src/histogram/grid.rs index 6c18a433..33f50d98 100644 --- a/src/histogram/grid.rs +++ b/src/histogram/grid.rs @@ -10,6 +10,9 @@ pub struct Grid { impl From>> for Grid { /// Get a `Grid` instance from a `Vec>`. + /// + /// The `i`-th element in `Vec>` represents the 1-dimensional + /// projection of the bin grid on the `i`-th axis. fn from(projections: Vec>) -> Self { Grid { projections } } diff --git a/src/histogram/histograms.rs b/src/histogram/histograms.rs index 850ae441..fef5b884 100644 --- a/src/histogram/histograms.rs +++ b/src/histogram/histograms.rs @@ -10,11 +10,7 @@ pub struct Histogram { } impl Histogram { - /// Return a new instance of Histogram given - /// a [`Grid`]. - /// - /// The `i`-th element in `Grid` represents the 1-dimensional - /// projection of the bin grid on the `i`-th axis. + /// Return a new instance of Histogram given a [`Grid`]. /// /// [`Grid`]: struct.Grid.html pub fn new(grid: Grid) -> Self { From 38f40638652c7c5f2680e255c5b90500f84ab6f9 Mon Sep 17 00:00:00 2001 From: LukeMathWalker Date: Fri, 26 Oct 2018 08:55:09 +0100 Subject: [PATCH 053/146] Added Grid::shape and used it in Histogram constructor --- src/histogram/grid.rs | 4 ++++ src/histogram/histograms.rs | 4 +--- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/src/histogram/grid.rs b/src/histogram/grid.rs index 33f50d98..3c74816f 100644 --- a/src/histogram/grid.rs +++ b/src/histogram/grid.rs @@ -27,6 +27,10 @@ impl Grid { self.projections.len() } + pub fn shape(&self) -> Vec { + self.iter_projections().map(|e| e.len()).collect::>() + } + pub fn projections(&self) -> &[Bins] { &self.projections } diff --git a/src/histogram/histograms.rs b/src/histogram/histograms.rs index fef5b884..590cb3b8 100644 --- a/src/histogram/histograms.rs +++ b/src/histogram/histograms.rs @@ -14,9 +14,7 @@ impl Histogram { /// /// [`Grid`]: struct.Grid.html pub fn new(grid: Grid) -> Self { - let counts = ArrayD::zeros( - grid.iter_projections().map(|e| e.len() - ).collect::>()); + let counts = ArrayD::zeros(grid.shape()); Histogram { counts, grid } } From 40d294f060cd9fdc14d010b374fb395323fd92a2 Mon Sep 17 00:00:00 2001 From: LukeMathWalker Date: Fri, 26 Oct 2018 08:56:10 +0100 Subject: [PATCH 054/146] Panic is ensured by grid.index method --- src/histogram/histograms.rs | 6 ------ 1 file changed, 6 deletions(-) diff --git a/src/histogram/histograms.rs b/src/histogram/histograms.rs index 590cb3b8..e2318877 100644 --- a/src/histogram/histograms.rs +++ b/src/histogram/histograms.rs @@ -46,12 +46,6 @@ impl Histogram { /// # } /// ``` pub fn add_observation(&mut self, observation: ArrayView1) -> Result<(), BinNotFound> { - assert_eq!( - self.ndim(), - observation.len(), - "Dimensions do not match: observation has {0} dimensions, \ - while the histogram has {1}.", observation.len(), self.ndim() - ); let bin_index = self.grid.index(observation)?; self.counts[IxDyn(&bin_index)] += 1; Ok(()) From bd940e7282f81946e8ca1e36bf0ee76d56b4359e Mon Sep 17 00:00:00 2001 From: LukeMathWalker Date: Sun, 28 Oct 2018 22:13:03 +0000 Subject: [PATCH 055/146] Added get method to Grid --- src/histogram/grid.rs | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/src/histogram/grid.rs b/src/histogram/grid.rs index 3c74816f..631152f0 100644 --- a/src/histogram/grid.rs +++ b/src/histogram/grid.rs @@ -1,5 +1,6 @@ use super::bins::Bins; use super::errors::BinNotFound; +use std::ops::Range; use std::slice::Iter; use ndarray::ArrayView1; @@ -46,3 +47,16 @@ impl Grid { .collect::, _>>() } } + +impl Grid { + fn get(&self, index: &[usize]) -> Vec> { + assert_eq!(index.len(), self.ndim(), + "Dimension mismatch: the index has {0:?} dimensions, the grid \ + expected {1:?} dimensions.", index.len(), self.ndim()); + let mut bin = vec![]; + for (axis_index, i) in index.iter().enumerate() { + bin.push(self.projections[axis_index].get(*i)); + } + bin + } +} From 5623ab9534c118f882ee27815a615835e90ccf34 Mon Sep 17 00:00:00 2001 From: LukeMathWalker Date: Sun, 28 Oct 2018 22:25:52 +0000 Subject: [PATCH 056/146] Testing the whole histogram matrix in doctest, instead of a single entry --- src/histogram/histograms.rs | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/src/histogram/histograms.rs b/src/histogram/histograms.rs index e2318877..1a90acb4 100644 --- a/src/histogram/histograms.rs +++ b/src/histogram/histograms.rs @@ -42,7 +42,11 @@ impl Histogram { /// histogram.add_observation(observation.view()); /// /// let histogram_matrix = histogram.as_view(); - /// assert_eq!(histogram_matrix[[1, 1]], 1); + /// let expected = array![ + /// [0, 0], + /// [0, 1], + /// ]; + /// assert_eq!(histogram_matrix, expected.into_dyn()); /// # } /// ``` pub fn add_observation(&mut self, observation: ArrayView1) -> Result<(), BinNotFound> { @@ -62,8 +66,7 @@ impl Histogram { self.counts.view() } - /// Borrow an immutable reference to the histogram grid as a vector - /// slice. + /// Borrow an immutable reference to the histogram grid. pub fn grid(&self) -> &Grid { &self.grid } From 6d5c0188508acb38334b046a140e269931ca7672 Mon Sep 17 00:00:00 2001 From: LukeMathWalker Date: Sun, 28 Oct 2018 22:39:36 +0000 Subject: [PATCH 057/146] Added docstring to Grid --- src/histogram/grid.rs | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/src/histogram/grid.rs b/src/histogram/grid.rs index 631152f0..fac9124d 100644 --- a/src/histogram/grid.rs +++ b/src/histogram/grid.rs @@ -4,6 +4,34 @@ use std::ops::Range; use std::slice::Iter; use ndarray::ArrayView1; +/// A `Grid` is a partition of a rectangular region of an `n`-dimensional +/// space (e.g. `[a_1, b_1]x...x[a_n, b_n]`) into a collection of +/// rectangular `n`-dimensional bins. +/// +/// The grid is **fully determined by its 1-dimensional projections** on the +/// coordinate axes. For example, this is a partition that can be represented +/// as a `Grid` struct: +/// ``` +/// +---+-------+-+ +/// | | | | +/// +---+-------+-+ +/// | | | | +/// | | | | +/// | | | | +/// | | | | +/// +---+-------+-+ +/// ``` +/// while the next one can't: +/// ``` +/// +---+-------+-+ +/// | | | | +/// | +-------+-+ +/// | | | +/// | | | +/// | | | +/// | | | +/// +---+-------+-+ +/// ``` pub struct Grid { projections: Vec>, } From 735079e92176f9f3b8f531b152aaa8c3f41b15d0 Mon Sep 17 00:00:00 2001 From: LukeMathWalker Date: Sun, 28 Oct 2018 22:41:01 +0000 Subject: [PATCH 058/146] Added dosctring to Grid::ndim --- src/histogram/grid.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/src/histogram/grid.rs b/src/histogram/grid.rs index fac9124d..1711262e 100644 --- a/src/histogram/grid.rs +++ b/src/histogram/grid.rs @@ -52,6 +52,7 @@ impl Grid { self.projections.iter() } + /// Returns `n`, the number of dimensions of the region partitioned by the grid. pub fn ndim(&self) -> usize { self.projections.len() } From 715e1b609561c14bf3e981c189b22ab52fa41a9b Mon Sep 17 00:00:00 2001 From: LukeMathWalker Date: Sun, 28 Oct 2018 22:57:42 +0000 Subject: [PATCH 059/146] Added docstring to all method on Grid. Remove iter_projections --- src/histogram/grid.rs | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/src/histogram/grid.rs b/src/histogram/grid.rs index 1711262e..c833adea 100644 --- a/src/histogram/grid.rs +++ b/src/histogram/grid.rs @@ -48,37 +48,43 @@ impl From>> for Grid { } impl Grid { - pub fn iter_projections(&self) -> Iter> { - self.projections.iter() - } - /// Returns `n`, the number of dimensions of the region partitioned by the grid. pub fn ndim(&self) -> usize { self.projections.len() } + /// Returns `v=(v_i)_i`, a vector, where `v_i` is the number of bins in the grid projection + /// on the `i`-th coordinate axis. pub fn shape(&self) -> Vec { - self.iter_projections().map(|e| e.len()).collect::>() + self.projections.iter().map(|e| e.len()).collect::>() } + /// Returns the grid projections on the coordinate axes as a slice of immutable references. pub fn projections(&self) -> &[Bins] { &self.projections } + /// Given `P=(p_1, ..., p_n)`, a point, it returns: + /// - `Ok(i)`, where `i=(i_1, ..., i_n)`, if `p_j` belongs to `i_j`-th bin + /// on the `j`-th grid projection on the coordinate axes for all `j` in `{1, ..., n}`; + /// - `Err(BinNotFound)`, if `P` does not belong to the region of space covered by the grid. pub fn index(&self, point: ArrayView1) -> Result, BinNotFound> { assert_eq!(point.len(), self.ndim(), "Dimension mismatch: the point has {0:?} dimensions, the grid \ expected {1:?} dimensions.", point.len(), self.ndim()); point .iter() - .zip(self.iter_projections()) + .zip(self.projections.iter()) .map(|(v, e)| e.index(v).ok_or(BinNotFound)) .collect::, _>>() } } impl Grid { - fn get(&self, index: &[usize]) -> Vec> { + /// Given `i=(i_1, ..., i_n)`, an `n`-dimensional index, it returns `I_{i_1}x...xI_{i_n}`, an + /// `n`-dimensional bin, where `I_{i_j}` is the `i_j`-th interval on the `j`-th projection + /// of the grid on the coordinate axes. + pub fn get(&self, index: &[usize]) -> Vec> { assert_eq!(index.len(), self.ndim(), "Dimension mismatch: the index has {0:?} dimensions, the grid \ expected {1:?} dimensions.", index.len(), self.ndim()); From b35e376d3a350634be85e404f9d6d8c91255b540 Mon Sep 17 00:00:00 2001 From: LukeMathWalker Date: Sun, 28 Oct 2018 23:01:26 +0000 Subject: [PATCH 060/146] Ignoring Grid sketches, they are not tests --- src/histogram/grid.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/histogram/grid.rs b/src/histogram/grid.rs index c833adea..0ecaf67a 100644 --- a/src/histogram/grid.rs +++ b/src/histogram/grid.rs @@ -11,7 +11,7 @@ use ndarray::ArrayView1; /// The grid is **fully determined by its 1-dimensional projections** on the /// coordinate axes. For example, this is a partition that can be represented /// as a `Grid` struct: -/// ``` +/// ```rust,ignore /// +---+-------+-+ /// | | | | /// +---+-------+-+ @@ -22,7 +22,7 @@ use ndarray::ArrayView1; /// +---+-------+-+ /// ``` /// while the next one can't: -/// ``` +/// ```rust,ignore /// +---+-------+-+ /// | | | | /// | +-------+-+ From 7147798856ce6070c1b24fd4ade192635de5c52e Mon Sep 17 00:00:00 2001 From: LukeMathWalker Date: Mon, 29 Oct 2018 08:11:17 +0000 Subject: [PATCH 061/146] Added panics notice on Grid::get --- src/histogram/grid.rs | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/histogram/grid.rs b/src/histogram/grid.rs index 0ecaf67a..5812fe4b 100644 --- a/src/histogram/grid.rs +++ b/src/histogram/grid.rs @@ -84,6 +84,9 @@ impl Grid { /// Given `i=(i_1, ..., i_n)`, an `n`-dimensional index, it returns `I_{i_1}x...xI_{i_n}`, an /// `n`-dimensional bin, where `I_{i_j}` is the `i_j`-th interval on the `j`-th projection /// of the grid on the coordinate axes. + /// + /// *Panics* if at least one among `(i_1, ..., i_n)` is out of bounds on the respective + /// coordinate axis - i.e. if there exists `j` such that `i_j >= self.projections[j].len()`. pub fn get(&self, index: &[usize]) -> Vec> { assert_eq!(index.len(), self.ndim(), "Dimension mismatch: the index has {0:?} dimensions, the grid \ From 847b955fcb28f072eae779ef60e471f32c5f611d Mon Sep 17 00:00:00 2001 From: LukeMathWalker Date: Mon, 29 Oct 2018 08:16:02 +0000 Subject: [PATCH 062/146] Re-indexed all docs from 0 instead of 1, for consistency with rust notation. --- src/histogram/grid.rs | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/src/histogram/grid.rs b/src/histogram/grid.rs index 5812fe4b..ff9d868d 100644 --- a/src/histogram/grid.rs +++ b/src/histogram/grid.rs @@ -5,7 +5,7 @@ use std::slice::Iter; use ndarray::ArrayView1; /// A `Grid` is a partition of a rectangular region of an `n`-dimensional -/// space (e.g. `[a_1, b_1]x...x[a_n, b_n]`) into a collection of +/// space - e.g. `[a_0, b_0)x...x[a_{n-1}, b_{n-1})` - into a collection of /// rectangular `n`-dimensional bins. /// /// The grid is **fully determined by its 1-dimensional projections** on the @@ -65,8 +65,8 @@ impl Grid { } /// Given `P=(p_1, ..., p_n)`, a point, it returns: - /// - `Ok(i)`, where `i=(i_1, ..., i_n)`, if `p_j` belongs to `i_j`-th bin - /// on the `j`-th grid projection on the coordinate axes for all `j` in `{1, ..., n}`; + /// - `Ok(i)`, where `i=(i_0, ..., i_{n-1})`, if `p_j` belongs to `i_j`-th bin + /// on the `j`-th grid projection on the coordinate axes for all `j` in `{0, ..., n-1}`; /// - `Err(BinNotFound)`, if `P` does not belong to the region of space covered by the grid. pub fn index(&self, point: ArrayView1) -> Result, BinNotFound> { assert_eq!(point.len(), self.ndim(), @@ -81,11 +81,11 @@ impl Grid { } impl Grid { - /// Given `i=(i_1, ..., i_n)`, an `n`-dimensional index, it returns `I_{i_1}x...xI_{i_n}`, an - /// `n`-dimensional bin, where `I_{i_j}` is the `i_j`-th interval on the `j`-th projection - /// of the grid on the coordinate axes. + /// Given `i=(i_0, ..., i_{n-1})`, an `n`-dimensional index, it returns + /// `I_{i_0}x...xI_{i_{n-1}}`, an `n`-dimensional bin, where `I_{i_j}` is + /// the `i_j`-th interval on the `j`-th projection of the grid on the coordinate axes. /// - /// *Panics* if at least one among `(i_1, ..., i_n)` is out of bounds on the respective + /// *Panics* if at least one among `(i_0, ..., i_{n-1})` is out of bounds on the respective /// coordinate axis - i.e. if there exists `j` such that `i_j >= self.projections[j].len()`. pub fn get(&self, index: &[usize]) -> Vec> { assert_eq!(index.len(), self.ndim(), From d7839c26a7fe381c9171342c2d40835143047fb6 Mon Sep 17 00:00:00 2001 From: LukeMathWalker Date: Mon, 29 Oct 2018 08:17:07 +0000 Subject: [PATCH 063/146] Minor doc fixes --- src/histogram/grid.rs | 2 +- src/histogram/histograms.rs | 10 +++++----- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/src/histogram/grid.rs b/src/histogram/grid.rs index ff9d868d..87b22344 100644 --- a/src/histogram/grid.rs +++ b/src/histogram/grid.rs @@ -85,7 +85,7 @@ impl Grid { /// `I_{i_0}x...xI_{i_{n-1}}`, an `n`-dimensional bin, where `I_{i_j}` is /// the `i_j`-th interval on the `j`-th projection of the grid on the coordinate axes. /// - /// *Panics* if at least one among `(i_0, ..., i_{n-1})` is out of bounds on the respective + /// **Panics** if at least one among `(i_0, ..., i_{n-1})` is out of bounds on the respective /// coordinate axis - i.e. if there exists `j` such that `i_j >= self.projections[j].len()`. pub fn get(&self, index: &[usize]) -> Vec> { assert_eq!(index.len(), self.ndim(), diff --git a/src/histogram/histograms.rs b/src/histogram/histograms.rs index 1a90acb4..643ddda9 100644 --- a/src/histogram/histograms.rs +++ b/src/histogram/histograms.rs @@ -10,7 +10,7 @@ pub struct Histogram { } impl Histogram { - /// Return a new instance of Histogram given a [`Grid`]. + /// Returns a new instance of Histogram given a [`Grid`]. /// /// [`Grid`]: struct.Grid.html pub fn new(grid: Grid) -> Self { @@ -18,7 +18,7 @@ impl Histogram { Histogram { counts, grid } } - /// Add a single observation to the histogram. + /// Adds a single observation to the histogram. /// /// **Panics** if dimensions do not match: `self.ndim() != observation.len()`. /// @@ -61,12 +61,12 @@ impl Histogram { self.counts.ndim() } - /// Borrow a view to the histogram matrix. + /// Borrows a view on the histogram matrix. pub fn as_view(&self) -> ArrayViewD { self.counts.view() } - /// Borrow an immutable reference to the histogram grid. + /// Borrows an immutable reference to the histogram grid. pub fn grid(&self) -> &Grid { &self.grid } @@ -77,7 +77,7 @@ pub trait HistogramExt where S: Data, { - /// Return the [histogram](https://en.wikipedia.org/wiki/Histogram) + /// Returns the [histogram](https://en.wikipedia.org/wiki/Histogram) /// for a 2-dimensional array of points `M`. /// /// Let `(n, d)` be the shape of `M`: From 3e4022de1940f2576a67f9f105d4cb2d2e6deee3 Mon Sep 17 00:00:00 2001 From: LukeMathWalker Date: Wed, 31 Oct 2018 07:58:23 +0000 Subject: [PATCH 064/146] Bugfix in matrixmultiply is now available in ndarray master branch --- Cargo.toml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index b10c3149..3c4b17ac 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -14,6 +14,5 @@ quickcheck = "0.7" ndarray-rand = "0.8" [patch.crates-io] -ndarray = { git = "https://github.com/jturner314/ndarray.git", branch = "master" } +ndarray = { git = "https://github.com/rust-ndarray/ndarray.git", branch = "master" } noisy_float = { git = "https://github.com/SergiusIW/noisy_float-rs.git", rev = "c33a94803987475bbd205c9ff5a697af533f9a17" } -matrixmultiply = { git = "https://github.com/jturner314/matrixmultiply.git", rev = "344f4b43c55fcf7b20be20baff38406ebe9afbfb" } From a28aeabff4ea75c4a8e319832f4915488a382bc0 Mon Sep 17 00:00:00 2001 From: LukeMathWalker Date: Wed, 31 Oct 2018 08:16:57 +0000 Subject: [PATCH 065/146] Adding bins builders --- src/histogram/builders.rs | 133 ++++++++++++++++++++++++++++++++++++++ src/histogram/grid.rs | 1 - src/histogram/mod.rs | 1 + 3 files changed, 134 insertions(+), 1 deletion(-) create mode 100644 src/histogram/builders.rs diff --git a/src/histogram/builders.rs b/src/histogram/builders.rs new file mode 100644 index 00000000..11899141 --- /dev/null +++ b/src/histogram/builders.rs @@ -0,0 +1,133 @@ +use ndarray::prelude::*; +use ndarray::Data; +use num_traits::{FromPrimitive, NumOps}; +use super::super::QuantileExt; +use super::{Edges, Bins}; + +pub trait BinsBuilder + where + T: Ord +{ + fn from_array(array: ArrayBase) -> Self + where + S: Data; + + fn build(&self) -> Bins; +} + +pub struct EquiSpaced { + n_bins: usize, + min: T, + max: T, +} + +pub struct Sqrt { + builder: EquiSpaced, +} + +pub struct Rice { + builder: EquiSpaced, +} + +pub struct Sturges { + builder: EquiSpaced, +} + +pub struct FreedmanDiaconis { + builder: EquiSpaced, +} + +impl EquiSpaced + where + T: Ord + Clone + FromPrimitive + NumOps +{ + fn new(n_bins: usize, min: T, max: T) -> Self + { + Self { n_bins, min, max } + } + + fn build(&self) -> Bins { + let edges = match self.n_bins { + 0 => Edges::from(vec![]), + 1 => { + Edges::from( + vec![self.min.clone(), self.max.clone()] + ) + }, + _ => { + let range = self.max.clone() - self.min.clone(); + let step = range / T::from_usize(self.n_bins).unwrap(); + let mut edges: Vec = vec![]; + for i in 0..(self.n_bins+1) { + let edge = self.min.clone() + T::from_usize(i).unwrap()*step.clone(); + edges.push(edge); + } + Edges::from(edges) + }, + }; + Bins::new(edges) + } +} + +impl BinsBuilder for Sqrt + where + T: Ord + Clone + FromPrimitive + NumOps +{ + fn from_array(a: ArrayBase) -> Self + where + S: Data, + { + let n_elems = a.len(); + let n_bins = (n_elems as f64).sqrt().round() as usize; + let min = a.min().clone(); + let max = a.max().clone(); + let builder = EquiSpaced::new(n_bins, min, max); + Self { builder } + } + + fn build(&self) -> Bins { + self.builder.build() + } +} + +impl BinsBuilder for Rice + where + T: Ord + Clone + FromPrimitive + NumOps +{ + fn from_array(a: ArrayBase) -> Self + where + S: Data, + { + let n_elems = a.len(); + let n_bins = (2.*n_elems as f64).powf(1./3.).round() as usize; + let min = a.min().clone(); + let max = a.max().clone(); + let builder = EquiSpaced::new(n_bins, min, max); + Self { builder } + } + + fn build(&self) -> Bins { + self.builder.build() + } +} + +impl BinsBuilder for Sturges + where + T: Ord + Clone + FromPrimitive + NumOps +{ + fn from_array(a: ArrayBase) -> Self + where + S: Data, + { + let n_elems = a.len(); + let n_bins = (n_elems as f64).log2().round() as usize + 1; + let min = a.min().clone(); + let max = a.max().clone(); + let builder = EquiSpaced::new(n_bins, min, max); + Self { builder } + } + + fn build(&self) -> Bins { + self.builder.build() + } +} diff --git a/src/histogram/grid.rs b/src/histogram/grid.rs index 87b22344..7a444a5f 100644 --- a/src/histogram/grid.rs +++ b/src/histogram/grid.rs @@ -1,7 +1,6 @@ use super::bins::Bins; use super::errors::BinNotFound; use std::ops::Range; -use std::slice::Iter; use ndarray::ArrayView1; /// A `Grid` is a partition of a rectangular region of an `n`-dimensional diff --git a/src/histogram/mod.rs b/src/histogram/mod.rs index 955d07aa..4534b843 100644 --- a/src/histogram/mod.rs +++ b/src/histogram/mod.rs @@ -6,5 +6,6 @@ pub use self::errors::BinNotFound; mod histograms; mod bins; +mod builders; mod grid; mod errors; From 2d4e455ae3503e64571ff649b885404b33f0ea75 Mon Sep 17 00:00:00 2001 From: LukeMathWalker Date: Wed, 31 Oct 2018 09:06:37 +0000 Subject: [PATCH 066/146] Brittle implementation of quantile_mut for 1d arrays --- src/quantile.rs | 52 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 52 insertions(+) diff --git a/src/quantile.rs b/src/quantile.rs index e61e008c..c948f398 100644 --- a/src/quantile.rs +++ b/src/quantile.rs @@ -410,3 +410,55 @@ where }) } } + +pub trait QuantileExt1d + where + S: Data, +{ + fn quantile_mut(&mut self, q: f64) -> A + where + A: Ord + Clone, + S: DataMut, + I: Interpolate; +} + +impl QuantileExt1d for ArrayBase + where + S: Data, +{ + fn quantile_mut(&mut self, q: f64) -> A + where + A: Ord + Clone, + S: DataMut, + I: Interpolate, + { + assert!((0. <= q) && (q <= 1.)); + let mut lower = None; + let mut higher = None; + let len = self.len(); + if I::needs_lower(q, len) { + let lower_index = I::lower_index(q,len); + lower = Some(self.sorted_get_mut(lower_index)); + if I::needs_higher(q, len) { + let higher_index = I::higher_index(q, len); + let relative_higher_index = higher_index - lower_index; + higher = Some( + self. + slice_mut(s![lower_index..]). + sorted_get_mut(relative_higher_index) + ); + }; + } else { + higher = Some( + self.sorted_get_mut(I::higher_index(q, len)), + ); + }; + I::interpolate( + lower.map(|x| Array::from_elem((1,), x)), + higher.map(|x| Array::from_elem((1,), x)), + q, + len + )[0].clone() + } +} + From 592f7496aa87676c0d4ace47d8960ba186d7914f Mon Sep 17 00:00:00 2001 From: LukeMathWalker Date: Thu, 1 Nov 2018 08:27:20 +0000 Subject: [PATCH 067/146] Refactored FreedmanDiaconis implementation --- src/histogram/builders.rs | 46 ++++++++++++++++++++++++++++++++++++++- src/lib.rs | 2 +- 2 files changed, 46 insertions(+), 2 deletions(-) diff --git a/src/histogram/builders.rs b/src/histogram/builders.rs index 11899141..2a8948a5 100644 --- a/src/histogram/builders.rs +++ b/src/histogram/builders.rs @@ -1,7 +1,8 @@ use ndarray::prelude::*; use ndarray::Data; use num_traits::{FromPrimitive, NumOps}; -use super::super::QuantileExt; +use super::super::{QuantileExt, QuantileExt1d}; +use super::super::interpolate::Nearest; use super::{Edges, Bins}; pub trait BinsBuilder @@ -131,3 +132,46 @@ impl BinsBuilder for Sturges self.builder.build() } } + +impl BinsBuilder for FreedmanDiaconis + where + T: Ord + Clone + FromPrimitive + NumOps +{ + fn from_array(a: ArrayBase) -> Self + where + S: Data, + { + let n_bins = a.len(); + + let mut a_copy = a.to_owned(); + let first_quartile = a_copy.quantile_mut::(0.25); + let third_quartile = a_copy.quantile_mut::(0.75); + let iqr = third_quartile - first_quartile; + + let bin_width = FreedmanDiaconis::bin_width(n_bins, iqr); + let min = a_copy.min().clone(); + let max = a_copy.max().clone(); + let mut max_edge = min.clone(); + while max_edge < max { + max_edge = max_edge + bin_width.clone(); + } + let builder = EquiSpaced::new(n_bins, min, max_edge); + Self { builder } + } + + fn build(&self) -> Bins { + self.builder.build() + } +} + +impl FreedmanDiaconis + where + T: Ord + Clone + FromPrimitive + NumOps +{ + fn bin_width(n_bins: usize, iqr: T) -> T + { + let denominator = (n_bins as f64).powf(1. / 3.); + let bin_width = T::from_usize(2).unwrap() * iqr / T::from_f64(denominator).unwrap(); + bin_width + } +} diff --git a/src/lib.rs b/src/lib.rs index 371b032c..4b8a81ff 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -12,7 +12,7 @@ extern crate ndarray_rand; extern crate quickcheck; pub use maybe_nan::{MaybeNan, MaybeNanExt}; -pub use quantile::{interpolate, QuantileExt}; +pub use quantile::{interpolate, QuantileExt, QuantileExt1d}; pub use sort::Sort1dExt; pub use correlation::CorrelationExt; pub use histogram::HistogramExt; From 5ff4b21b8764c9a4280ab313fb6b86325f8d65dd Mon Sep 17 00:00:00 2001 From: LukeMathWalker Date: Thu, 1 Nov 2018 09:01:57 +0000 Subject: [PATCH 068/146] All important bin building strategies are covered --- src/histogram/builders.rs | 131 ++++++++++++++++++++++++++++++++++++-- 1 file changed, 124 insertions(+), 7 deletions(-) diff --git a/src/histogram/builders.rs b/src/histogram/builders.rs index 2a8948a5..61e8c813 100644 --- a/src/histogram/builders.rs +++ b/src/histogram/builders.rs @@ -14,6 +14,8 @@ pub trait BinsBuilder S: Data; fn build(&self) -> Bins; + + fn n_bins(&self) -> usize; } pub struct EquiSpaced { @@ -38,16 +40,25 @@ pub struct FreedmanDiaconis { builder: EquiSpaced, } +enum SturgesOrFD { + Sturges(Sturges), + FreedmanDiaconis(FreedmanDiaconis), +} + +pub struct Auto { + builder: SturgesOrFD, +} + impl EquiSpaced where T: Ord + Clone + FromPrimitive + NumOps { - fn new(n_bins: usize, min: T, max: T) -> Self + pub fn new(n_bins: usize, min: T, max: T) -> Self { Self { n_bins, min, max } } - fn build(&self) -> Bins { + pub fn build(&self) -> Bins { let edges = match self.n_bins { 0 => Edges::from(vec![]), 1 => { @@ -56,11 +67,10 @@ impl EquiSpaced ) }, _ => { - let range = self.max.clone() - self.min.clone(); - let step = range / T::from_usize(self.n_bins).unwrap(); + let bin_width = self.bin_width(); let mut edges: Vec = vec![]; for i in 0..(self.n_bins+1) { - let edge = self.min.clone() + T::from_usize(i).unwrap()*step.clone(); + let edge = self.min.clone() + T::from_usize(i).unwrap()*bin_width.clone(); edges.push(edge); } Edges::from(edges) @@ -68,6 +78,16 @@ impl EquiSpaced }; Bins::new(edges) } + + pub fn n_bins(&self) -> usize { + self.n_bins + } + + pub fn bin_width(&self) -> T { + let range = self.max.clone() - self.min.clone(); + let bin_width = range / T::from_usize(self.n_bins).unwrap(); + bin_width + } } impl BinsBuilder for Sqrt @@ -89,6 +109,19 @@ impl BinsBuilder for Sqrt fn build(&self) -> Bins { self.builder.build() } + + fn n_bins(&self) -> usize { + self.builder.n_bins() + } +} + +impl Sqrt + where + T: Ord + Clone + FromPrimitive + NumOps +{ + pub fn bin_width(&self) -> T { + self.builder.bin_width() + } } impl BinsBuilder for Rice @@ -110,6 +143,19 @@ impl BinsBuilder for Rice fn build(&self) -> Bins { self.builder.build() } + + fn n_bins(&self) -> usize { + self.builder.n_bins() + } +} + +impl Rice + where + T: Ord + Clone + FromPrimitive + NumOps +{ + pub fn bin_width(&self) -> T { + self.builder.bin_width() + } } impl BinsBuilder for Sturges @@ -131,6 +177,19 @@ impl BinsBuilder for Sturges fn build(&self) -> Bins { self.builder.build() } + + fn n_bins(&self) -> usize { + self.builder.n_bins() + } +} + +impl Sturges + where + T: Ord + Clone + FromPrimitive + NumOps +{ + pub fn bin_width(&self) -> T { + self.builder.bin_width() + } } impl BinsBuilder for FreedmanDiaconis @@ -148,7 +207,7 @@ impl BinsBuilder for FreedmanDiaconis let third_quartile = a_copy.quantile_mut::(0.75); let iqr = third_quartile - first_quartile; - let bin_width = FreedmanDiaconis::bin_width(n_bins, iqr); + let bin_width = FreedmanDiaconis::compute_bin_width(n_bins, iqr); let min = a_copy.min().clone(); let max = a_copy.max().clone(); let mut max_edge = min.clone(); @@ -162,16 +221,74 @@ impl BinsBuilder for FreedmanDiaconis fn build(&self) -> Bins { self.builder.build() } + + fn n_bins(&self) -> usize { + self.builder.n_bins() + } } impl FreedmanDiaconis where T: Ord + Clone + FromPrimitive + NumOps { - fn bin_width(n_bins: usize, iqr: T) -> T + fn compute_bin_width(n_bins: usize, iqr: T) -> T { let denominator = (n_bins as f64).powf(1. / 3.); let bin_width = T::from_usize(2).unwrap() * iqr / T::from_f64(denominator).unwrap(); bin_width } + + pub fn bin_width(&self) -> T { + self.builder.bin_width() + } +} + +impl BinsBuilder for Auto + where + T: Ord + Clone + FromPrimitive + NumOps +{ + fn from_array(a: ArrayBase) -> Self + where + S: Data, + { + let fd_builder = FreedmanDiaconis::from_array(a.view()); + let sturges_builder = Sturges::from_array(a.view()); + let builder = { + if fd_builder.bin_width() > sturges_builder.bin_width() { + SturgesOrFD::Sturges(sturges_builder) + } else { + SturgesOrFD::FreedmanDiaconis(fd_builder) + } + }; + Self { builder } + } + + fn build(&self) -> Bins { + // Ugly + match &self.builder { + SturgesOrFD::FreedmanDiaconis(b) => b.build(), + SturgesOrFD::Sturges(b) => b.build(), + } + } + + fn n_bins(&self) -> usize { + // Ugly + match &self.builder { + SturgesOrFD::FreedmanDiaconis(b) => b.n_bins(), + SturgesOrFD::Sturges(b) => b.n_bins(), + } + } +} + +impl Auto + where + T: Ord + Clone + FromPrimitive + NumOps +{ + pub fn bin_width(&self) -> T { + // Ugly + match &self.builder { + SturgesOrFD::FreedmanDiaconis(b) => b.bin_width(), + SturgesOrFD::Sturges(b) => b.bin_width(), + } + } } From c949d45911dff0107ac7f6ceab18796adcffe6d2 Mon Sep 17 00:00:00 2001 From: LukeMathWalker Date: Wed, 7 Nov 2018 08:39:21 +0000 Subject: [PATCH 069/146] Added signature for GridBuilder --- src/histogram/grid.rs | 23 ++++++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) diff --git a/src/histogram/grid.rs b/src/histogram/grid.rs index 7a444a5f..1bfce486 100644 --- a/src/histogram/grid.rs +++ b/src/histogram/grid.rs @@ -1,7 +1,9 @@ use super::bins::Bins; use super::errors::BinNotFound; +use super::builders::BinsBuilder; use std::ops::Range; -use ndarray::ArrayView1; +use std::marker::PhantomData; +use ndarray::{ArrayView1, ArrayBase, Data, Dimension}; /// A `Grid` is a partition of a rectangular region of an `n`-dimensional /// space - e.g. `[a_0, b_0)x...x[a_{n-1}, b_{n-1})` - into a collection of @@ -97,3 +99,22 @@ impl Grid { bin } } + +pub struct GridBuilder> { + bin_builder: B, + phantom: PhantomData +} + +impl> GridBuilder { + pub fn from_array(array: ArrayBase) -> Self + where + S: Data, + D: Dimension, + { + unimplemented!() + } + + pub fn build(&self) -> Grid { + unimplemented!() + } +} From 3db50547d308d20e595b2726def610a83e419f26 Mon Sep 17 00:00:00 2001 From: LukeMathWalker Date: Wed, 7 Nov 2018 08:52:12 +0000 Subject: [PATCH 070/146] Implemented from_array for GridBuilder --- src/histogram/grid.rs | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/src/histogram/grid.rs b/src/histogram/grid.rs index 1bfce486..9baec520 100644 --- a/src/histogram/grid.rs +++ b/src/histogram/grid.rs @@ -3,7 +3,7 @@ use super::errors::BinNotFound; use super::builders::BinsBuilder; use std::ops::Range; use std::marker::PhantomData; -use ndarray::{ArrayView1, ArrayBase, Data, Dimension}; +use ndarray::{ArrayView1, ArrayBase, Data, Axis, Ix2}; /// A `Grid` is a partition of a rectangular region of an `n`-dimensional /// space - e.g. `[a_0, b_0)x...x[a_{n-1}, b_{n-1})` - into a collection of @@ -101,17 +101,21 @@ impl Grid { } pub struct GridBuilder> { - bin_builder: B, + bin_builders: Vec, phantom: PhantomData } impl> GridBuilder { - pub fn from_array(array: ArrayBase) -> Self + pub fn from_array(array: ArrayBase) -> Self where S: Data, - D: Dimension, { - unimplemented!() + let mut bin_builders = vec![]; + for subview in array.axis_iter(Axis(1)) { + let bin_builder = B::from_array(subview); + bin_builders.push(bin_builder); + } + Self { bin_builders, phantom: PhantomData } } pub fn build(&self) -> Grid { From 35f3af04aa65dc6cddacbb6badd4ebc5361f2fc8 Mon Sep 17 00:00:00 2001 From: LukeMathWalker Date: Wed, 7 Nov 2018 08:54:52 +0000 Subject: [PATCH 071/146] Implemented build for GridBuilder --- src/histogram/grid.rs | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/histogram/grid.rs b/src/histogram/grid.rs index 9baec520..0baeaee9 100644 --- a/src/histogram/grid.rs +++ b/src/histogram/grid.rs @@ -119,6 +119,10 @@ impl> GridBuilder { } pub fn build(&self) -> Grid { - unimplemented!() + let mut projections = vec![]; + for bin_builder in &self.bin_builders { + projections.push(bin_builder.build()); + } + Grid::from(projections) } } From cdaf4832247be72b0e2613291d4804576037c300 Mon Sep 17 00:00:00 2001 From: LukeMathWalker Date: Wed, 7 Nov 2018 09:00:46 +0000 Subject: [PATCH 072/146] GridBuilder is exported as top level struct --- src/histogram/grid.rs | 35 +++++++++++++++++++++++++++++++++++ src/histogram/mod.rs | 4 ++-- 2 files changed, 37 insertions(+), 2 deletions(-) diff --git a/src/histogram/grid.rs b/src/histogram/grid.rs index 0baeaee9..3e08ce51 100644 --- a/src/histogram/grid.rs +++ b/src/histogram/grid.rs @@ -33,6 +33,41 @@ use ndarray::{ArrayView1, ArrayBase, Data, Axis, Ix2}; /// | | | /// +---+-------+-+ /// ``` +/// +/// # Example: +/// +/// ``` +/// extern crate ndarray_stats; +/// #[macro_use(array)] +/// extern crate ndarray; +/// extern crate noisy_float; +/// use ndarray_stats::histogram::{Edges, Bins, Histogram, Grid, GridBuilder}; +/// use noisy_float::types::n64; +/// +/// # fn main() { +/// let observations = array![ +/// [n64(1.), n64(0.5)], +/// [n64(-0.5), n64(1.)], +/// [n64(-1.), n64(-0.5)], +/// [n64(0.5), n64(-1.)] +/// ]; +/// let grid = GridBuilder +/// let bins = Bins::new(edges); +/// let square_grid = Grid::from(vec![bins.clone(), bins.clone()]); +/// let mut histogram = Histogram::new(square_grid); +/// +/// let observation = array![n64(0.5), n64(0.6)]; +/// +/// histogram.add_observation(observation.view()); +/// +/// let histogram_matrix = histogram.as_view(); +/// let expected = array![ +/// [0, 0], +/// [0, 1], +/// ]; +/// assert_eq!(histogram_matrix, expected.into_dyn()); +/// # } +/// ``` pub struct Grid { projections: Vec>, } diff --git a/src/histogram/mod.rs b/src/histogram/mod.rs index 4534b843..c43a67b9 100644 --- a/src/histogram/mod.rs +++ b/src/histogram/mod.rs @@ -1,11 +1,11 @@ //! Histogram functionalities. pub use self::histograms::{Histogram, HistogramExt}; pub use self::bins::{Edges, Bins}; -pub use self::grid::Grid; +pub use self::grid::{Grid, GridBuilder}; pub use self::errors::BinNotFound; mod histograms; mod bins; -mod builders; +pub mod builders; mod grid; mod errors; From ce0180f0828d9cd1d5b0138d875622c23cd58d9d Mon Sep 17 00:00:00 2001 From: LukeMathWalker Date: Wed, 7 Nov 2018 09:16:52 +0000 Subject: [PATCH 073/146] Explicitly require a view. Refactored builders to match it --- src/histogram/builders.rs | 24 ++++++------------------ src/histogram/grid.rs | 26 ++++++++++---------------- 2 files changed, 16 insertions(+), 34 deletions(-) diff --git a/src/histogram/builders.rs b/src/histogram/builders.rs index 61e8c813..24770a42 100644 --- a/src/histogram/builders.rs +++ b/src/histogram/builders.rs @@ -9,9 +9,7 @@ pub trait BinsBuilder where T: Ord { - fn from_array(array: ArrayBase) -> Self - where - S: Data; + fn from_array(array: ArrayView1) -> Self; fn build(&self) -> Bins; @@ -94,9 +92,7 @@ impl BinsBuilder for Sqrt where T: Ord + Clone + FromPrimitive + NumOps { - fn from_array(a: ArrayBase) -> Self - where - S: Data, + fn from_array(a: ArrayView1) -> Self { let n_elems = a.len(); let n_bins = (n_elems as f64).sqrt().round() as usize; @@ -128,9 +124,7 @@ impl BinsBuilder for Rice where T: Ord + Clone + FromPrimitive + NumOps { - fn from_array(a: ArrayBase) -> Self - where - S: Data, + fn from_array(a: ArrayView1) -> Self { let n_elems = a.len(); let n_bins = (2.*n_elems as f64).powf(1./3.).round() as usize; @@ -162,9 +156,7 @@ impl BinsBuilder for Sturges where T: Ord + Clone + FromPrimitive + NumOps { - fn from_array(a: ArrayBase) -> Self - where - S: Data, + fn from_array(a: ArrayView1) -> Self { let n_elems = a.len(); let n_bins = (n_elems as f64).log2().round() as usize + 1; @@ -196,9 +188,7 @@ impl BinsBuilder for FreedmanDiaconis where T: Ord + Clone + FromPrimitive + NumOps { - fn from_array(a: ArrayBase) -> Self - where - S: Data, + fn from_array(a: ArrayView1) -> Self { let n_bins = a.len(); @@ -247,9 +237,7 @@ impl BinsBuilder for Auto where T: Ord + Clone + FromPrimitive + NumOps { - fn from_array(a: ArrayBase) -> Self - where - S: Data, + fn from_array(a: ArrayView1) -> Self { let fd_builder = FreedmanDiaconis::from_array(a.view()); let sturges_builder = Sturges::from_array(a.view()); diff --git a/src/histogram/grid.rs b/src/histogram/grid.rs index 3e08ce51..b975b338 100644 --- a/src/histogram/grid.rs +++ b/src/histogram/grid.rs @@ -3,7 +3,7 @@ use super::errors::BinNotFound; use super::builders::BinsBuilder; use std::ops::Range; use std::marker::PhantomData; -use ndarray::{ArrayView1, ArrayBase, Data, Axis, Ix2}; +use ndarray::{ArrayView1, ArrayView2, Axis}; /// A `Grid` is a partition of a rectangular region of an `n`-dimensional /// space - e.g. `[a_0, b_0)x...x[a_{n-1}, b_{n-1})` - into a collection of @@ -41,8 +41,10 @@ use ndarray::{ArrayView1, ArrayBase, Data, Axis, Ix2}; /// #[macro_use(array)] /// extern crate ndarray; /// extern crate noisy_float; -/// use ndarray_stats::histogram::{Edges, Bins, Histogram, Grid, GridBuilder}; -/// use noisy_float::types::n64; +/// use ndarray_stats::HistogramExt; +/// use ndarray_stats::histogram::{Histogram, Grid, GridBuilder}; +/// use ndarray_stats::histogram::builders::Sqrt; +/// use noisy_float::types::{N64, n64}; /// /// # fn main() { /// let observations = array![ @@ -51,19 +53,13 @@ use ndarray::{ArrayView1, ArrayBase, Data, Axis, Ix2}; /// [n64(-1.), n64(-0.5)], /// [n64(0.5), n64(-1.)] /// ]; -/// let grid = GridBuilder -/// let bins = Bins::new(edges); -/// let square_grid = Grid::from(vec![bins.clone(), bins.clone()]); -/// let mut histogram = Histogram::new(square_grid); -/// -/// let observation = array![n64(0.5), n64(0.6)]; -/// -/// histogram.add_observation(observation.view()); +/// let grid = GridBuilder::>::from_array(observations.view()).build(); +/// let histogram = observations.histogram(grid); /// /// let histogram_matrix = histogram.as_view(); /// let expected = array![ -/// [0, 0], -/// [0, 1], +/// [1, 0], +/// [1, 0], /// ]; /// assert_eq!(histogram_matrix, expected.into_dyn()); /// # } @@ -141,9 +137,7 @@ pub struct GridBuilder> { } impl> GridBuilder { - pub fn from_array(array: ArrayBase) -> Self - where - S: Data, + pub fn from_array(array: ArrayView2) -> Self { let mut bin_builders = vec![]; for subview in array.axis_iter(Axis(1)) { From 4cbd66d43b9d917a4c941ed2083d357639fcfc68 Mon Sep 17 00:00:00 2001 From: LukeMathWalker Date: Thu, 8 Nov 2018 07:31:29 +0000 Subject: [PATCH 074/146] Added comment to clarify --- src/histogram/grid.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/src/histogram/grid.rs b/src/histogram/grid.rs index b975b338..8c866688 100644 --- a/src/histogram/grid.rs +++ b/src/histogram/grid.rs @@ -57,6 +57,7 @@ use ndarray::{ArrayView1, ArrayView2, Axis}; /// let histogram = observations.histogram(grid); /// /// let histogram_matrix = histogram.as_view(); +/// // Bins are left inclusive, right exclusive! /// let expected = array![ /// [1, 0], /// [1, 0], From 7bb35098877336b3dd7f7b387bcc225ed92d88f5 Mon Sep 17 00:00:00 2001 From: LukeMathWalker Date: Thu, 8 Nov 2018 07:44:59 +0000 Subject: [PATCH 075/146] More docs for Grid --- src/histogram/grid.rs | 26 +++++++++++++++----------- src/histogram/histograms.rs | 32 ++++++++++++++++++++++++++++++++ 2 files changed, 47 insertions(+), 11 deletions(-) diff --git a/src/histogram/grid.rs b/src/histogram/grid.rs index 8c866688..5b94a439 100644 --- a/src/histogram/grid.rs +++ b/src/histogram/grid.rs @@ -43,25 +43,24 @@ use ndarray::{ArrayView1, ArrayView2, Axis}; /// extern crate noisy_float; /// use ndarray_stats::HistogramExt; /// use ndarray_stats::histogram::{Histogram, Grid, GridBuilder}; -/// use ndarray_stats::histogram::builders::Sqrt; +/// use ndarray_stats::histogram::builders::Auto; /// use noisy_float::types::{N64, n64}; /// /// # fn main() { -/// let observations = array![ -/// [n64(1.), n64(0.5)], -/// [n64(-0.5), n64(1.)], -/// [n64(-1.), n64(-0.5)], -/// [n64(0.5), n64(-1.)] +/// // 1-dimensional observations, as a (n_observations, 1) 2-d matrix +/// let mut observations = array![ +/// [1, 4, 5, 2, 100, 20, 50, 65, 27, 40, 45, 23] /// ]; -/// let grid = GridBuilder::>::from_array(observations.view()).build(); +/// observations.swap_axes(0, 1); +/// +/// // The optimal grid layout is inferred from the data, +/// // specifying a strategy (Auto in this case) +/// let grid = GridBuilder::>::from_array(observations.view()).build(); /// let histogram = observations.histogram(grid); /// /// let histogram_matrix = histogram.as_view(); /// // Bins are left inclusive, right exclusive! -/// let expected = array![ -/// [1, 0], -/// [1, 0], -/// ]; +/// let expected = array![4, 1, 2, 1, 2, 0, 1, 0, 0, 1, 0, 0]; /// assert_eq!(histogram_matrix, expected.into_dyn()); /// # } /// ``` @@ -75,6 +74,11 @@ impl From>> for Grid { /// /// The `i`-th element in `Vec>` represents the 1-dimensional /// projection of the bin grid on the `i`-th axis. + /// + /// Alternatively, a `Grid` can be build directly from data using a + /// [`GridBuilder`]. + /// + /// [`GridBuilder`]: struct.GridBuilder.html fn from(projections: Vec>) -> Self { Grid { projections } } diff --git a/src/histogram/histograms.rs b/src/histogram/histograms.rs index 643ddda9..e38a9026 100644 --- a/src/histogram/histograms.rs +++ b/src/histogram/histograms.rs @@ -89,6 +89,38 @@ pub trait HistogramExt /// 4-dimensional space. /// /// **Panics** if `d` is different from `grid.ndim()`. + /// + /// # Example: + /// + /// ``` + /// extern crate ndarray_stats; + /// #[macro_use(array)] + /// extern crate ndarray; + /// extern crate noisy_float; + /// use ndarray_stats::HistogramExt; + /// use ndarray_stats::histogram::{Histogram, Grid, GridBuilder}; + /// use ndarray_stats::histogram::builders::Sqrt; + /// use noisy_float::types::{N64, n64}; + /// + /// # fn main() { + /// let observations = array![ + /// [n64(1.), n64(0.5)], + /// [n64(-0.5), n64(1.)], + /// [n64(-1.), n64(-0.5)], + /// [n64(0.5), n64(-1.)] + /// ]; + /// let grid = GridBuilder::>::from_array(observations.view()).build(); + /// let histogram = observations.histogram(grid); + /// + /// let histogram_matrix = histogram.as_view(); + /// // Bins are left inclusive, right exclusive! + /// let expected = array![ + /// [1, 0], + /// [1, 0], + /// ]; + /// assert_eq!(histogram_matrix, expected.into_dyn()); + /// # } + /// ``` fn histogram(&self, grid: Grid) -> Histogram where A: Ord; From f2bec0ee0023d3de49c34ff596c93874c4b69260 Mon Sep 17 00:00:00 2001 From: LukeMathWalker Date: Thu, 8 Nov 2018 08:02:35 +0000 Subject: [PATCH 076/146] Added docs to GridBuilder --- src/histogram/grid.rs | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/histogram/grid.rs b/src/histogram/grid.rs index 5b94a439..ef15a571 100644 --- a/src/histogram/grid.rs +++ b/src/histogram/grid.rs @@ -136,6 +136,12 @@ impl Grid { } } +/// `GridBuilder`, given a [`strategy`] and some observations, returns a [`Grid`] +/// instance for [`histogram`] computation. +/// +/// [`Grid`]: struct.Grid.html +/// [`histogram`]: trait.HistogramExt.html +/// [`strategy`]: builders/index.html pub struct GridBuilder> { bin_builders: Vec, phantom: PhantomData From 4f2f96b8a2505ea498835f115c4d0ef04d248254 Mon Sep 17 00:00:00 2001 From: LukeMathWalker Date: Thu, 8 Nov 2018 08:07:08 +0000 Subject: [PATCH 077/146] Added docs to GridBuilder's method --- src/histogram/grid.rs | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/src/histogram/grid.rs b/src/histogram/grid.rs index ef15a571..aeb3ed49 100644 --- a/src/histogram/grid.rs +++ b/src/histogram/grid.rs @@ -148,6 +148,12 @@ pub struct GridBuilder> { } impl> GridBuilder { + /// Given some observations in a 2-dimensional array with shape `(n_observations, n_dimension)` + /// it returns a `GridBuilder` instance that has learned the required parameter + /// to build a [`Grid`] according to the specified [`strategy`]. + /// + /// [`Grid`]: struct.Grid.html + /// [`strategy`]: builders/index.html pub fn from_array(array: ArrayView2) -> Self { let mut bin_builders = vec![]; @@ -158,6 +164,12 @@ impl> GridBuilder { Self { bin_builders, phantom: PhantomData } } + /// Returns a [`Grid`] instance, built accordingly to the specified [`strategy`] + /// using the parameters inferred from observations in [`from_array`]. + /// + /// [`Grid`]: struct.Grid.html + /// [`strategy`]: builders/index.html + /// [`from_array`]: #method.from_array.html pub fn build(&self) -> Grid { let mut projections = vec![]; for bin_builder in &self.bin_builders { From 2c3c5904da7c1b1dcadc817ba201201715fcd83b Mon Sep 17 00:00:00 2001 From: LukeMathWalker Date: Thu, 8 Nov 2018 08:08:44 +0000 Subject: [PATCH 078/146] Renamed BinsBuilder to BinsBuildingStrategy --- src/histogram/builders.rs | 12 ++++++------ src/histogram/grid.rs | 6 +++--- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/src/histogram/builders.rs b/src/histogram/builders.rs index 24770a42..4274ab63 100644 --- a/src/histogram/builders.rs +++ b/src/histogram/builders.rs @@ -5,7 +5,7 @@ use super::super::{QuantileExt, QuantileExt1d}; use super::super::interpolate::Nearest; use super::{Edges, Bins}; -pub trait BinsBuilder +pub trait BinsBuildingStrategy where T: Ord { @@ -88,7 +88,7 @@ impl EquiSpaced } } -impl BinsBuilder for Sqrt +impl BinsBuildingStrategy for Sqrt where T: Ord + Clone + FromPrimitive + NumOps { @@ -120,7 +120,7 @@ impl Sqrt } } -impl BinsBuilder for Rice +impl BinsBuildingStrategy for Rice where T: Ord + Clone + FromPrimitive + NumOps { @@ -152,7 +152,7 @@ impl Rice } } -impl BinsBuilder for Sturges +impl BinsBuildingStrategy for Sturges where T: Ord + Clone + FromPrimitive + NumOps { @@ -184,7 +184,7 @@ impl Sturges } } -impl BinsBuilder for FreedmanDiaconis +impl BinsBuildingStrategy for FreedmanDiaconis where T: Ord + Clone + FromPrimitive + NumOps { @@ -233,7 +233,7 @@ impl FreedmanDiaconis } } -impl BinsBuilder for Auto +impl BinsBuildingStrategy for Auto where T: Ord + Clone + FromPrimitive + NumOps { diff --git a/src/histogram/grid.rs b/src/histogram/grid.rs index aeb3ed49..faf24bca 100644 --- a/src/histogram/grid.rs +++ b/src/histogram/grid.rs @@ -1,6 +1,6 @@ use super::bins::Bins; use super::errors::BinNotFound; -use super::builders::BinsBuilder; +use super::builders::BinsBuildingStrategy; use std::ops::Range; use std::marker::PhantomData; use ndarray::{ArrayView1, ArrayView2, Axis}; @@ -142,12 +142,12 @@ impl Grid { /// [`Grid`]: struct.Grid.html /// [`histogram`]: trait.HistogramExt.html /// [`strategy`]: builders/index.html -pub struct GridBuilder> { +pub struct GridBuilder> { bin_builders: Vec, phantom: PhantomData } -impl> GridBuilder { +impl> GridBuilder { /// Given some observations in a 2-dimensional array with shape `(n_observations, n_dimension)` /// it returns a `GridBuilder` instance that has learned the required parameter /// to build a [`Grid`] according to the specified [`strategy`]. From 3dce9899b2a0b7d24e0776dccd6a8255dc332b76 Mon Sep 17 00:00:00 2001 From: LukeMathWalker Date: Thu, 8 Nov 2018 08:09:15 +0000 Subject: [PATCH 079/146] EquiSpaces is private --- src/histogram/builders.rs | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/histogram/builders.rs b/src/histogram/builders.rs index 4274ab63..11cd51f2 100644 --- a/src/histogram/builders.rs +++ b/src/histogram/builders.rs @@ -16,7 +16,7 @@ pub trait BinsBuildingStrategy fn n_bins(&self) -> usize; } -pub struct EquiSpaced { +struct EquiSpaced { n_bins: usize, min: T, max: T, @@ -51,12 +51,12 @@ impl EquiSpaced where T: Ord + Clone + FromPrimitive + NumOps { - pub fn new(n_bins: usize, min: T, max: T) -> Self + fn new(n_bins: usize, min: T, max: T) -> Self { Self { n_bins, min, max } } - pub fn build(&self) -> Bins { + fn build(&self) -> Bins { let edges = match self.n_bins { 0 => Edges::from(vec![]), 1 => { @@ -77,11 +77,11 @@ impl EquiSpaced Bins::new(edges) } - pub fn n_bins(&self) -> usize { + fn n_bins(&self) -> usize { self.n_bins } - pub fn bin_width(&self) -> T { + fn bin_width(&self) -> T { let range = self.max.clone() - self.min.clone(); let bin_width = range / T::from_usize(self.n_bins).unwrap(); bin_width From 2d563ab56ad42cbeba7203d06e4c56a3e2436acf Mon Sep 17 00:00:00 2001 From: LukeMathWalker Date: Thu, 8 Nov 2018 08:15:08 +0000 Subject: [PATCH 080/146] Added docs to BinsBuildingStrategy --- src/histogram/builders.rs | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/src/histogram/builders.rs b/src/histogram/builders.rs index 11cd51f2..577047eb 100644 --- a/src/histogram/builders.rs +++ b/src/histogram/builders.rs @@ -5,6 +5,16 @@ use super::super::{QuantileExt, QuantileExt1d}; use super::super::interpolate::Nearest; use super::{Edges, Bins}; +/// A trait implemented by all strategies to build [`Bins`] +/// with parameters inferred from observations. +/// +/// A `BinsBuildingStrategy` is required by [`GridBuilder`] +/// to know how to build a [`Grid`]'s projections on the +/// coordinate axes. +/// +/// [`Bins`]: ../struct.Bins.html +/// [`Grid`]: ../struct.Grid.html +/// [`GridBuilder`]: ../struct.GridBuilder.html pub trait BinsBuildingStrategy where T: Ord From 0f05d73662f9544c25f34a5baa3ece5c7617fbf8 Mon Sep 17 00:00:00 2001 From: LukeMathWalker Date: Thu, 8 Nov 2018 08:16:42 +0000 Subject: [PATCH 081/146] Documented bin_width method --- src/histogram/builders.rs | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/histogram/builders.rs b/src/histogram/builders.rs index 577047eb..feb319fe 100644 --- a/src/histogram/builders.rs +++ b/src/histogram/builders.rs @@ -91,6 +91,7 @@ impl EquiSpaced self.n_bins } + /// The bin width (or bin length) according to the fitted strategy. fn bin_width(&self) -> T { let range = self.max.clone() - self.min.clone(); let bin_width = range / T::from_usize(self.n_bins).unwrap(); @@ -125,6 +126,7 @@ impl Sqrt where T: Ord + Clone + FromPrimitive + NumOps { + /// The bin width (or bin length) according to the fitted strategy. pub fn bin_width(&self) -> T { self.builder.bin_width() } @@ -157,6 +159,7 @@ impl Rice where T: Ord + Clone + FromPrimitive + NumOps { + /// The bin width (or bin length) according to the fitted strategy. pub fn bin_width(&self) -> T { self.builder.bin_width() } @@ -189,6 +192,7 @@ impl Sturges where T: Ord + Clone + FromPrimitive + NumOps { + /// The bin width (or bin length) according to the fitted strategy. pub fn bin_width(&self) -> T { self.builder.bin_width() } @@ -238,6 +242,7 @@ impl FreedmanDiaconis bin_width } + /// The bin width (or bin length) according to the fitted strategy. pub fn bin_width(&self) -> T { self.builder.bin_width() } @@ -282,6 +287,7 @@ impl Auto where T: Ord + Clone + FromPrimitive + NumOps { + /// The bin width (or bin length) according to the fitted strategy. pub fn bin_width(&self) -> T { // Ugly match &self.builder { From 0d14e5393b59163540c85f318b115bc863d2de74 Mon Sep 17 00:00:00 2001 From: LukeMathWalker Date: Thu, 8 Nov 2018 08:30:50 +0000 Subject: [PATCH 082/146] Docs for all strategies --- src/histogram/builders.rs | 36 ++++++++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) diff --git a/src/histogram/builders.rs b/src/histogram/builders.rs index feb319fe..01c261eb 100644 --- a/src/histogram/builders.rs +++ b/src/histogram/builders.rs @@ -1,3 +1,12 @@ +//! Strategies to build [`Bins`]s and [`Grid`]s (using [`GridBuilder`]) inferring +//! optimal parameters directly from data. +//! +//! The docs for each strategy have been taken almost verbatim from [`NumPy`]. +//! +//! [`Bins`]: ../struct.Bins.html +//! [`Grid`]: ../struct.Grid.html +//! [`GridBuilder`]: ../struct.GridBuilder.html +//! [`NumPy`]: https://docs.scipy.org/doc/numpy/reference/generated/numpy.histogram_bin_edges.html#numpy.histogram_bin_edges use ndarray::prelude::*; use ndarray::Data; use num_traits::{FromPrimitive, NumOps}; @@ -5,6 +14,7 @@ use super::super::{QuantileExt, QuantileExt1d}; use super::super::interpolate::Nearest; use super::{Edges, Bins}; + /// A trait implemented by all strategies to build [`Bins`] /// with parameters inferred from observations. /// @@ -19,10 +29,23 @@ pub trait BinsBuildingStrategy where T: Ord { + /// Given some observations in a 1-dimensional array it returns a `BinsBuildingStrategy` + /// that has learned the required parameter to build a collection of [`Bins`]. + /// + /// [`Bins`]: ../struct.Bins.html fn from_array(array: ArrayView1) -> Self; + /// Returns a [`Bins`] instance, built accordingly to the parameters + /// inferred from observations in [`from_array`]. + /// + /// [`Bins`]: ../struct.Bins.html + /// [`from_array`]: #method.from_array.html fn build(&self) -> Bins; + /// Returns the optimal number of bins, according to the parameters + /// inferred from observations in [`from_array`]. + /// + /// [`from_array`]: #method.from_array.html fn n_bins(&self) -> usize; } @@ -32,18 +55,26 @@ struct EquiSpaced { max: T, } +/// Square root (of data size) strategy, used by Excel and other programs +/// for its speed and simplicity. pub struct Sqrt { builder: EquiSpaced, } +/// A strategy that does not take variability into account, only data size. Commonly +/// overestimates number of bins required. pub struct Rice { builder: EquiSpaced, } +/// R’s default strategy, only accounts for data size. Only optimal for gaussian data and +/// underestimates number of bins for large non-gaussian datasets. pub struct Sturges { builder: EquiSpaced, } +/// Robust (resilient to outliers) strategy that takes into +/// account data variability and data size. pub struct FreedmanDiaconis { builder: EquiSpaced, } @@ -53,6 +84,11 @@ enum SturgesOrFD { FreedmanDiaconis(FreedmanDiaconis), } +/// Maximum of the [`Sturges`] and [`FreedmanDiaconis`] strategies. +/// Provides good all around performance. +/// +/// [`Sturges`]: struct.Sturges.html +/// [`FreedmanDiaconis`]: struct.FreedmanDiaconis.html pub struct Auto { builder: SturgesOrFD, } From a48afbb52a2d122efb182cfed1ab532b8545c418 Mon Sep 17 00:00:00 2001 From: LukeMathWalker Date: Thu, 8 Nov 2018 08:42:30 +0000 Subject: [PATCH 083/146] Added detailed docs to all strategies using NumPy as base (almost verbatim) --- src/histogram/builders.rs | 34 ++++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) diff --git a/src/histogram/builders.rs b/src/histogram/builders.rs index 01c261eb..669e6384 100644 --- a/src/histogram/builders.rs +++ b/src/histogram/builders.rs @@ -57,24 +57,53 @@ struct EquiSpaced { /// Square root (of data size) strategy, used by Excel and other programs /// for its speed and simplicity. +/// +/// Let `n` be the number of observations. Then +/// +/// `n_bins` = `sqrt(n)` pub struct Sqrt { builder: EquiSpaced, } /// A strategy that does not take variability into account, only data size. Commonly /// overestimates number of bins required. +/// +/// Let `n` be the number of observations and `n_bins` the number of bins. +/// +/// `n_bins` = 2 * `n^(1/3)` +/// +/// `n_bins` is only proportional to cube root of `n`. It tends to overestimate +/// the `n_bins` and it does not take into account data variability. pub struct Rice { builder: EquiSpaced, } /// R’s default strategy, only accounts for data size. Only optimal for gaussian data and /// underestimates number of bins for large non-gaussian datasets. +/// +/// Let `n` be the number of observations. +/// The number of bins is the base 2 log of `n`. This estimator assumes normality of data and +/// is too conservative for larger, non-normal datasets. +/// +/// This is the default method in R’s hist method. pub struct Sturges { builder: EquiSpaced, } /// Robust (resilient to outliers) strategy that takes into /// account data variability and data size. +/// +/// Let `n` be the number of observations. +/// +/// `bin_width` = 2 * `IQR` / `n^(1/3)` +/// +/// The bin width is proportional to the interquartile range ([`IQR`]) and inversely proportional to +/// cube root of `n`. It can be too conservative for small datasets, but it is quite good for +/// large datasets. +/// +/// The [`IQR`] is very robust to outliers. +/// +/// [`IQR`]: https://en.wikipedia.org/wiki/Interquartile_range pub struct FreedmanDiaconis { builder: EquiSpaced, } @@ -87,6 +116,11 @@ enum SturgesOrFD { /// Maximum of the [`Sturges`] and [`FreedmanDiaconis`] strategies. /// Provides good all around performance. /// +/// A compromise to get a good value. For small datasets the [`Sturges`] value will usually be chosen, +/// while larger datasets will usually default to [`FreedmanDiaconis`]. Avoids the overly +/// conservative behaviour of [`FreedmanDiaconis`] and [`Sturges`] for +/// small and large datasets respectively. +/// /// [`Sturges`]: struct.Sturges.html /// [`FreedmanDiaconis`]: struct.FreedmanDiaconis.html pub struct Auto { From af41380931a2a9d31762aa4bf86e4a2eaae5e1a6 Mon Sep 17 00:00:00 2001 From: LukeMathWalker Date: Thu, 8 Nov 2018 08:44:30 +0000 Subject: [PATCH 084/146] Renamed `builders` submodule to `strategies` --- src/histogram/grid.rs | 10 +++++----- src/histogram/histograms.rs | 2 +- src/histogram/mod.rs | 2 +- src/histogram/{builders.rs => strategies.rs} | 0 4 files changed, 7 insertions(+), 7 deletions(-) rename src/histogram/{builders.rs => strategies.rs} (100%) diff --git a/src/histogram/grid.rs b/src/histogram/grid.rs index faf24bca..25437564 100644 --- a/src/histogram/grid.rs +++ b/src/histogram/grid.rs @@ -1,6 +1,6 @@ use super::bins::Bins; use super::errors::BinNotFound; -use super::builders::BinsBuildingStrategy; +use super::strategies::BinsBuildingStrategy; use std::ops::Range; use std::marker::PhantomData; use ndarray::{ArrayView1, ArrayView2, Axis}; @@ -43,7 +43,7 @@ use ndarray::{ArrayView1, ArrayView2, Axis}; /// extern crate noisy_float; /// use ndarray_stats::HistogramExt; /// use ndarray_stats::histogram::{Histogram, Grid, GridBuilder}; -/// use ndarray_stats::histogram::builders::Auto; +/// use ndarray_stats::histogram::strategies::Auto; /// use noisy_float::types::{N64, n64}; /// /// # fn main() { @@ -141,7 +141,7 @@ impl Grid { /// /// [`Grid`]: struct.Grid.html /// [`histogram`]: trait.HistogramExt.html -/// [`strategy`]: builders/index.html +/// [`strategy`]: strategies/index.html pub struct GridBuilder> { bin_builders: Vec, phantom: PhantomData @@ -153,7 +153,7 @@ impl> GridBuilder { /// to build a [`Grid`] according to the specified [`strategy`]. /// /// [`Grid`]: struct.Grid.html - /// [`strategy`]: builders/index.html + /// [`strategy`]: strategies/index.html pub fn from_array(array: ArrayView2) -> Self { let mut bin_builders = vec![]; @@ -168,7 +168,7 @@ impl> GridBuilder { /// using the parameters inferred from observations in [`from_array`]. /// /// [`Grid`]: struct.Grid.html - /// [`strategy`]: builders/index.html + /// [`strategy`]: strategies/index.html /// [`from_array`]: #method.from_array.html pub fn build(&self) -> Grid { let mut projections = vec![]; diff --git a/src/histogram/histograms.rs b/src/histogram/histograms.rs index e38a9026..85e49ff1 100644 --- a/src/histogram/histograms.rs +++ b/src/histogram/histograms.rs @@ -99,7 +99,7 @@ pub trait HistogramExt /// extern crate noisy_float; /// use ndarray_stats::HistogramExt; /// use ndarray_stats::histogram::{Histogram, Grid, GridBuilder}; - /// use ndarray_stats::histogram::builders::Sqrt; + /// use ndarray_stats::histogram::strategies::Sqrt; /// use noisy_float::types::{N64, n64}; /// /// # fn main() { diff --git a/src/histogram/mod.rs b/src/histogram/mod.rs index c43a67b9..c2b64660 100644 --- a/src/histogram/mod.rs +++ b/src/histogram/mod.rs @@ -6,6 +6,6 @@ pub use self::errors::BinNotFound; mod histograms; mod bins; -pub mod builders; +pub mod strategies; mod grid; mod errors; diff --git a/src/histogram/builders.rs b/src/histogram/strategies.rs similarity index 100% rename from src/histogram/builders.rs rename to src/histogram/strategies.rs From 543742da63a848f7095de59d78d8a246fce151c9 Mon Sep 17 00:00:00 2001 From: LukeMathWalker Date: Thu, 8 Nov 2018 08:45:59 +0000 Subject: [PATCH 085/146] Errors is not top-level in histogram --- src/histogram/mod.rs | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/histogram/mod.rs b/src/histogram/mod.rs index c2b64660..9176aee1 100644 --- a/src/histogram/mod.rs +++ b/src/histogram/mod.rs @@ -2,10 +2,9 @@ pub use self::histograms::{Histogram, HistogramExt}; pub use self::bins::{Edges, Bins}; pub use self::grid::{Grid, GridBuilder}; -pub use self::errors::BinNotFound; mod histograms; mod bins; pub mod strategies; mod grid; -mod errors; +pub mod errors; From 1a02954e40c9a65d2486743c35f87f2e1f721fae Mon Sep 17 00:00:00 2001 From: LukeMathWalker Date: Thu, 8 Nov 2018 08:48:24 +0000 Subject: [PATCH 086/146] Documented quantile_mut --- src/quantile.rs | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/src/quantile.rs b/src/quantile.rs index c948f398..be830b1e 100644 --- a/src/quantile.rs +++ b/src/quantile.rs @@ -415,6 +415,33 @@ pub trait QuantileExt1d where S: Data, { + /// Return the qth quantile of the data. + /// + /// `q` needs to be a float between 0 and 1, bounds included. + /// The qth quantile for a 1-dimensional array of length `N` is defined + /// as the element that would be indexed as `(N-1)q` if the array were to be sorted + /// in increasing order. + /// If `(N-1)q` is not an integer the desired quantile lies between + /// two data points: we return the lower, nearest, higher or interpolated + /// value depending on the type `Interpolate` bound `I`. + /// + /// Some examples: + /// - `q=0.` returns the minimum; + /// - `q=0.5` returns the median; + /// - `q=1.` returns the maximum. + /// (`q=0` and `q=1` are considered improper quantiles) + /// + /// The array is shuffled **in place** in order to produce the required quantile + /// without allocating a copy. + /// No assumptions should be made on the ordering of the array elements + /// after this computation. + /// + /// Complexity ([quickselect](https://en.wikipedia.org/wiki/Quickselect)): + /// - average case: O(`m`); + /// - worst case: O(`m`^2); + /// where `m` is the number of elements in the array. + /// + /// **Panics** if `q` is not between `0.` and `1.` (inclusive). fn quantile_mut(&mut self, q: f64) -> A where A: Ord + Clone, From eb78f6d9bb6367a8a95d2ebf081267c2262d7e35 Mon Sep 17 00:00:00 2001 From: LukeMathWalker Date: Thu, 8 Nov 2018 08:50:00 +0000 Subject: [PATCH 087/146] Added docs to QuantileExt1d --- src/quantile.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/quantile.rs b/src/quantile.rs index be830b1e..2b1a0058 100644 --- a/src/quantile.rs +++ b/src/quantile.rs @@ -169,7 +169,7 @@ pub mod interpolate { } } -/// Quantile methods. +/// Quantile methods for `ArrayBase`. pub trait QuantileExt where S: Data, @@ -411,6 +411,7 @@ where } } +/// Quantile methods for 1-dimensional arrays. pub trait QuantileExt1d where S: Data, From 7206e725970df7329934fd7c4ddebfe3bcc0c208 Mon Sep 17 00:00:00 2001 From: Jim Turner Date: Sun, 11 Nov 2018 22:34:24 +0000 Subject: [PATCH 088/146] Return false for failing tests in quickcheck Co-Authored-By: LukeMathWalker --- src/histogram/bins.rs | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/histogram/bins.rs b/src/histogram/bins.rs index d9b4a7cb..24308b64 100644 --- a/src/histogram/bins.rs +++ b/src/histogram/bins.rs @@ -369,7 +369,9 @@ mod edges_tests { let edges = Edges::from(v); let n = edges.len(); for i in 1..n { - assert!(edges[i-1] <= edges[i]); + if edges[i-1] > edges[i] { + return false; + } } true } From cc2376dd9a38b1a0094a27064f137976416d2ca4 Mon Sep 17 00:00:00 2001 From: Jim Turner Date: Sun, 11 Nov 2018 22:35:17 +0000 Subject: [PATCH 089/146] Return false for failing tests in quickcheck Co-Authored-By: LukeMathWalker --- src/histogram/bins.rs | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/histogram/bins.rs b/src/histogram/bins.rs index 24308b64..5f91a3e3 100644 --- a/src/histogram/bins.rs +++ b/src/histogram/bins.rs @@ -381,7 +381,9 @@ mod edges_tests { let edges = Edges::from(a); let n = edges.len(); for i in 1..n { - assert!(edges[i-1] <= edges[i]); + if edges[i-1] > edges[i]) { + return false; + } } true } From e11cce747109c6b8ff5dc1a3c50139775c50aaa9 Mon Sep 17 00:00:00 2001 From: Jim Turner Date: Sun, 11 Nov 2018 22:36:01 +0000 Subject: [PATCH 090/146] rust,ignore => text Co-Authored-By: LukeMathWalker --- src/histogram/grid.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/histogram/grid.rs b/src/histogram/grid.rs index 25437564..f926dfbc 100644 --- a/src/histogram/grid.rs +++ b/src/histogram/grid.rs @@ -12,7 +12,7 @@ use ndarray::{ArrayView1, ArrayView2, Axis}; /// The grid is **fully determined by its 1-dimensional projections** on the /// coordinate axes. For example, this is a partition that can be represented /// as a `Grid` struct: -/// ```rust,ignore +/// ```text /// +---+-------+-+ /// | | | | /// +---+-------+-+ From 31392254fe7250688119bd53d3fbb55db039576b Mon Sep 17 00:00:00 2001 From: Jim Turner Date: Sun, 11 Nov 2018 22:36:07 +0000 Subject: [PATCH 091/146] rust,ignore => text Co-Authored-By: LukeMathWalker --- src/histogram/grid.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/histogram/grid.rs b/src/histogram/grid.rs index f926dfbc..6f0f6d09 100644 --- a/src/histogram/grid.rs +++ b/src/histogram/grid.rs @@ -23,7 +23,7 @@ use ndarray::{ArrayView1, ArrayView2, Axis}; /// +---+-------+-+ /// ``` /// while the next one can't: -/// ```rust,ignore +/// ```text /// +---+-------+-+ /// | | | | /// | +-------+-+ From 3ac076f3c9bf863124e184d5c062504045c91bbc Mon Sep 17 00:00:00 2001 From: LukeMathWalker Date: Mon, 12 Nov 2018 07:59:45 +0000 Subject: [PATCH 092/146] Fixed typo --- src/histogram/bins.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/histogram/bins.rs b/src/histogram/bins.rs index 5f91a3e3..4c9529f9 100644 --- a/src/histogram/bins.rs +++ b/src/histogram/bins.rs @@ -381,7 +381,7 @@ mod edges_tests { let edges = Edges::from(a); let n = edges.len(); for i in 1..n { - if edges[i-1] > edges[i]) { + if edges[i-1] > edges[i] { return false; } } From b6efcfaf04843c6c4378467ddcee6da315d3b1c5 Mon Sep 17 00:00:00 2001 From: LukeMathWalker Date: Mon, 12 Nov 2018 08:00:33 +0000 Subject: [PATCH 093/146] Added mention of duplicates getting dropped --- src/histogram/bins.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/histogram/bins.rs b/src/histogram/bins.rs index 4c9529f9..016fdd4c 100644 --- a/src/histogram/bins.rs +++ b/src/histogram/bins.rs @@ -68,7 +68,7 @@ impl From> for Edges { impl From> for Edges { /// Get an `Edges` instance from a `Array1`: /// the array elements will be sorted in increasing order - /// using an unstable sorting algorithm. + /// using an unstable sorting algorithm and duplicates will be removed. /// /// # Example: /// From ca2da4c392f1a9d73222da4223ecb665c03a3823 Mon Sep 17 00:00:00 2001 From: LukeMathWalker Date: Mon, 12 Nov 2018 08:01:10 +0000 Subject: [PATCH 094/146] Destructure a tuple in one go --- src/histogram/bins.rs | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/histogram/bins.rs b/src/histogram/bins.rs index 016fdd4c..36180270 100644 --- a/src/histogram/bins.rs +++ b/src/histogram/bins.rs @@ -317,8 +317,7 @@ impl Bins { { let edges_indexes = self.edges.indexes(value); edges_indexes.map( - |t| { - let (left, right) = t; + |(left, right)| { Range { start: self.edges[left].clone(), end: self.edges[right].clone(), From 5801ee04dd81a724cb2320c5d33cf2e81da49fa4 Mon Sep 17 00:00:00 2001 From: LukeMathWalker Date: Mon, 12 Nov 2018 08:02:07 +0000 Subject: [PATCH 095/146] All methods in one impl block --- src/histogram/bins.rs | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/histogram/bins.rs b/src/histogram/bins.rs index 36180270..ffca2ac2 100644 --- a/src/histogram/bins.rs +++ b/src/histogram/bins.rs @@ -325,9 +325,7 @@ impl Bins { } ) } -} -impl Bins { /// Get the `i`-th bin. /// /// **Panics** if `index` is out of bounds. @@ -345,7 +343,10 @@ impl Bins { /// 5..10 /// ); /// ``` - pub fn get(&self, index: usize) -> Range { + pub fn get(&self, index: usize) -> Range + where + A: Clone, + { // It was not possible to implement this functionality // using the `Index` trait unless we were willing to // allocate a `Vec>` in the struct. From 2571829c67a2a9b9a64aad2f6050cf6317bc27af Mon Sep 17 00:00:00 2001 From: LukeMathWalker Date: Mon, 12 Nov 2018 08:05:10 +0000 Subject: [PATCH 096/146] index => index_of; range => range_of --- src/histogram/bins.rs | 16 ++++++++-------- src/histogram/grid.rs | 2 +- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/src/histogram/bins.rs b/src/histogram/bins.rs index ffca2ac2..2d7f4d89 100644 --- a/src/histogram/bins.rs +++ b/src/histogram/bins.rs @@ -18,12 +18,12 @@ use std::ops::{Index, Range}; /// let unit_interval = Bins::new(unit_edges); /// // left inclusive /// assert_eq!( -/// unit_interval.range(&n64(0.)).unwrap(), +/// unit_interval.range_of(&n64(0.)).unwrap(), /// n64(0.)..n64(1.), /// ); /// // right exclusive /// assert_eq!( -/// unit_interval.range(&n64(1.)), +/// unit_interval.range_of(&n64(1.)), /// None /// ); /// ``` @@ -277,15 +277,15 @@ impl Bins { /// let bins = Bins::new(edges); /// let value = 1; /// assert_eq!( - /// bins.index(&1), + /// bins.index_of(&1), /// Some(0) /// ); /// assert_eq!( - /// bins.get(bins.index(&1).unwrap()), + /// bins.get(bins.index_of(&1).unwrap()), /// 0..2 /// ); /// ``` - pub fn index(&self, value: &A) -> Option { + pub fn index_of(&self, value: &A) -> Option { self.edges.indexes(value).map(|t| t.0) } @@ -303,15 +303,15 @@ impl Bins { /// let edges = Edges::from(vec![0, 2, 4, 6]); /// let bins = Bins::new(edges); /// assert_eq!( - /// bins.range(&1), + /// bins.range_of(&1), /// Some(0..2) /// ); /// assert_eq!( - /// bins.range(&10), + /// bins.range_of(&10), /// None /// ); /// ``` - pub fn range(&self, value: &A) -> Option> + pub fn range_of(&self, value: &A) -> Option> where A: Clone, { diff --git a/src/histogram/grid.rs b/src/histogram/grid.rs index 6f0f6d09..968d553a 100644 --- a/src/histogram/grid.rs +++ b/src/histogram/grid.rs @@ -112,7 +112,7 @@ impl Grid { point .iter() .zip(self.projections.iter()) - .map(|(v, e)| e.index(v).ok_or(BinNotFound)) + .map(|(v, e)| e.index_of(v).ok_or(BinNotFound)) .collect::, _>>() } } From 0042c175ec3add13eac21139fec1656b6883a298 Mon Sep 17 00:00:00 2001 From: LukeMathWalker Date: Mon, 12 Nov 2018 08:07:00 +0000 Subject: [PATCH 097/146] indexes => indices_of --- src/histogram/bins.rs | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/src/histogram/bins.rs b/src/histogram/bins.rs index 2d7f4d89..0dd72c64 100644 --- a/src/histogram/bins.rs +++ b/src/histogram/bins.rs @@ -175,15 +175,15 @@ impl Edges { /// /// let edges = Edges::from(vec![0, 2, 3]); /// assert_eq!( - /// edges.indexes(&1), + /// edges.indices_of(&1), /// Some((0, 1)) /// ); /// assert_eq!( - /// edges.indexes(&5), + /// edges.indices_of(&5), /// None /// ); /// ``` - pub fn indexes(&self, value: &A) -> Option<(usize, usize)> { + pub fn indices_of(&self, value: &A) -> Option<(usize, usize)> { // binary search for the correct bin let n_edges = self.len(); match self.edges.binary_search(value) { @@ -286,7 +286,7 @@ impl Bins { /// ); /// ``` pub fn index_of(&self, value: &A) -> Option { - self.edges.indexes(value).map(|t| t.0) + self.edges.indices_of(value).map(|t| t.0) } /// Given `value`, it returns: @@ -315,7 +315,7 @@ impl Bins { where A: Clone, { - let edges_indexes = self.edges.indexes(value); + let edges_indexes = self.edges.indices_of(value); edges_indexes.map( |(left, right)| { Range { @@ -394,7 +394,7 @@ mod edges_tests { match last { None => true, Some(x) => { - edges.indexes(x).is_none() + edges.indices_of(x).is_none() } } } @@ -408,7 +408,7 @@ mod edges_tests { match first { None => true, Some(x) => { - edges.indexes(x).is_some() + edges.indices_of(x).is_some() } } } From 803ba61dd8b18ab270be7aa83572bc6740caa1d3 Mon Sep 17 00:00:00 2001 From: LukeMathWalker Date: Mon, 12 Nov 2018 08:13:28 +0000 Subject: [PATCH 098/146] Clearer inizialization --- src/histogram/grid.rs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/histogram/grid.rs b/src/histogram/grid.rs index 968d553a..e51a5e6f 100644 --- a/src/histogram/grid.rs +++ b/src/histogram/grid.rs @@ -48,10 +48,10 @@ use ndarray::{ArrayView1, ArrayView2, Axis}; /// /// # fn main() { /// // 1-dimensional observations, as a (n_observations, 1) 2-d matrix -/// let mut observations = array![ -/// [1, 4, 5, 2, 100, 20, 50, 65, 27, 40, 45, 23] -/// ]; -/// observations.swap_axes(0, 1); +/// let observations = Array::from_shape_vec( +/// (12, 1), +/// vec![1, 4, 5, 2, 100, 20, 50, 65, 27, 40, 45, 23], +/// ).unwrap(); /// /// // The optimal grid layout is inferred from the data, /// // specifying a strategy (Auto in this case) From c2f375bbb177df4f80ba32b178eca3553ba97d62 Mon Sep 17 00:00:00 2001 From: LukeMathWalker Date: Mon, 12 Nov 2018 08:16:55 +0000 Subject: [PATCH 099/146] Using reference instead of view --- src/histogram/grid.rs | 9 ++++++--- src/histogram/histograms.rs | 2 +- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/src/histogram/grid.rs b/src/histogram/grid.rs index e51a5e6f..bb95223a 100644 --- a/src/histogram/grid.rs +++ b/src/histogram/grid.rs @@ -3,7 +3,7 @@ use super::errors::BinNotFound; use super::strategies::BinsBuildingStrategy; use std::ops::Range; use std::marker::PhantomData; -use ndarray::{ArrayView1, ArrayView2, Axis}; +use ndarray::{ArrayBase, Data, Ix2, ArrayView1, Axis}; /// A `Grid` is a partition of a rectangular region of an `n`-dimensional /// space - e.g. `[a_0, b_0)x...x[a_{n-1}, b_{n-1})` - into a collection of @@ -41,6 +41,7 @@ use ndarray::{ArrayView1, ArrayView2, Axis}; /// #[macro_use(array)] /// extern crate ndarray; /// extern crate noisy_float; +/// use ndarray::Array; /// use ndarray_stats::HistogramExt; /// use ndarray_stats::histogram::{Histogram, Grid, GridBuilder}; /// use ndarray_stats::histogram::strategies::Auto; @@ -55,7 +56,7 @@ use ndarray::{ArrayView1, ArrayView2, Axis}; /// /// // The optimal grid layout is inferred from the data, /// // specifying a strategy (Auto in this case) -/// let grid = GridBuilder::>::from_array(observations.view()).build(); +/// let grid = GridBuilder::>::from_array(&observations).build(); /// let histogram = observations.histogram(grid); /// /// let histogram_matrix = histogram.as_view(); @@ -154,7 +155,9 @@ impl> GridBuilder { /// /// [`Grid`]: struct.Grid.html /// [`strategy`]: strategies/index.html - pub fn from_array(array: ArrayView2) -> Self + pub fn from_array(array: &ArrayBase) -> Self + where + S: Data, { let mut bin_builders = vec![]; for subview in array.axis_iter(Axis(1)) { diff --git a/src/histogram/histograms.rs b/src/histogram/histograms.rs index 85e49ff1..b85415a1 100644 --- a/src/histogram/histograms.rs +++ b/src/histogram/histograms.rs @@ -109,7 +109,7 @@ pub trait HistogramExt /// [n64(-1.), n64(-0.5)], /// [n64(0.5), n64(-1.)] /// ]; - /// let grid = GridBuilder::>::from_array(observations.view()).build(); + /// let grid = GridBuilder::>::from_array(&observations).build(); /// let histogram = observations.histogram(grid); /// /// let histogram_matrix = histogram.as_view(); From 9a7170b2cf9ceb09ae5c5ac79982c4b680e57880 Mon Sep 17 00:00:00 2001 From: LukeMathWalker Date: Mon, 12 Nov 2018 08:18:12 +0000 Subject: [PATCH 100/146] Dervice Clone, Debug, Eq and PartialEq for all relevant structs --- src/histogram/bins.rs | 4 ++-- src/histogram/grid.rs | 1 + 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/src/histogram/bins.rs b/src/histogram/bins.rs index 0dd72c64..95a577c9 100644 --- a/src/histogram/bins.rs +++ b/src/histogram/bins.rs @@ -27,7 +27,7 @@ use std::ops::{Index, Range}; /// None /// ); /// ``` -#[derive(Clone)] +#[derive(Clone, Debug, Eq, PartialEq)] pub struct Edges { edges: Vec, } @@ -226,7 +226,7 @@ impl Edges { /// n64(1.)..n64(2.) /// ); /// ``` -#[derive(Clone)] +#[derive(Clone, Debug, Eq, PartialEq)] pub struct Bins { edges: Edges, } diff --git a/src/histogram/grid.rs b/src/histogram/grid.rs index bb95223a..03ac20d1 100644 --- a/src/histogram/grid.rs +++ b/src/histogram/grid.rs @@ -65,6 +65,7 @@ use ndarray::{ArrayBase, Data, Ix2, ArrayView1, Axis}; /// assert_eq!(histogram_matrix, expected.into_dyn()); /// # } /// ``` +#[derive(Clone, Debug, Eq, PartialEq)] pub struct Grid { projections: Vec>, } From 3ea161c74cfb4140b3e400b790f3441398aeb52f Mon Sep 17 00:00:00 2001 From: LukeMathWalker Date: Mon, 12 Nov 2018 08:18:54 +0000 Subject: [PATCH 101/146] Typo --- src/histogram/grid.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/histogram/grid.rs b/src/histogram/grid.rs index 03ac20d1..cf510a8f 100644 --- a/src/histogram/grid.rs +++ b/src/histogram/grid.rs @@ -77,7 +77,7 @@ impl From>> for Grid { /// The `i`-th element in `Vec>` represents the 1-dimensional /// projection of the bin grid on the `i`-th axis. /// - /// Alternatively, a `Grid` can be build directly from data using a + /// Alternatively, a `Grid` can be built directly from data using a /// [`GridBuilder`]. /// /// [`GridBuilder`]: struct.GridBuilder.html From 1b5c8e3a42dc2022c093e8f677b557817a56a11d Mon Sep 17 00:00:00 2001 From: LukeMathWalker Date: Mon, 12 Nov 2018 08:19:45 +0000 Subject: [PATCH 102/146] Omitting turbofish notation --- src/histogram/grid.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/histogram/grid.rs b/src/histogram/grid.rs index cf510a8f..37b858fe 100644 --- a/src/histogram/grid.rs +++ b/src/histogram/grid.rs @@ -95,7 +95,7 @@ impl Grid { /// Returns `v=(v_i)_i`, a vector, where `v_i` is the number of bins in the grid projection /// on the `i`-th coordinate axis. pub fn shape(&self) -> Vec { - self.projections.iter().map(|e| e.len()).collect::>() + self.projections.iter().map(|e| e.len()).collect() } /// Returns the grid projections on the coordinate axes as a slice of immutable references. From 872cdbd7bcde084fb83d751491b05bceab45be80 Mon Sep 17 00:00:00 2001 From: LukeMathWalker Date: Mon, 12 Nov 2018 08:20:18 +0000 Subject: [PATCH 103/146] Better doc --- src/histogram/grid.rs | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/histogram/grid.rs b/src/histogram/grid.rs index 37b858fe..de837d7e 100644 --- a/src/histogram/grid.rs +++ b/src/histogram/grid.rs @@ -92,8 +92,7 @@ impl Grid { self.projections.len() } - /// Returns `v=(v_i)_i`, a vector, where `v_i` is the number of bins in the grid projection - /// on the `i`-th coordinate axis. + /// Returns the number of bins along each coordinate axis. pub fn shape(&self) -> Vec { self.projections.iter().map(|e| e.len()).collect() } From 9e3cc84a860bbb95aa408d34e99c3852215b089d Mon Sep 17 00:00:00 2001 From: LukeMathWalker Date: Mon, 12 Nov 2018 08:20:47 +0000 Subject: [PATCH 104/146] No need to use counters --- src/histogram/grid.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/histogram/grid.rs b/src/histogram/grid.rs index de837d7e..9f6a5649 100644 --- a/src/histogram/grid.rs +++ b/src/histogram/grid.rs @@ -108,8 +108,8 @@ impl Grid { /// - `Err(BinNotFound)`, if `P` does not belong to the region of space covered by the grid. pub fn index(&self, point: ArrayView1) -> Result, BinNotFound> { assert_eq!(point.len(), self.ndim(), - "Dimension mismatch: the point has {0:?} dimensions, the grid \ - expected {1:?} dimensions.", point.len(), self.ndim()); + "Dimension mismatch: the point has {:?} dimensions, the grid \ + expected {:?} dimensions.", point.len(), self.ndim()); point .iter() .zip(self.projections.iter()) From a89daca3a0b8e848e64ea99e6685cf7ae70d863d Mon Sep 17 00:00:00 2001 From: LukeMathWalker Date: Mon, 12 Nov 2018 08:21:08 +0000 Subject: [PATCH 105/146] Omitting turbofish notation --- src/histogram/grid.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/histogram/grid.rs b/src/histogram/grid.rs index 9f6a5649..aef50593 100644 --- a/src/histogram/grid.rs +++ b/src/histogram/grid.rs @@ -114,7 +114,7 @@ impl Grid { .iter() .zip(self.projections.iter()) .map(|(v, e)| e.index_of(v).ok_or(BinNotFound)) - .collect::, _>>() + .collect() } } From 9bc0daa281baebd3be9394eb309ec96ea6b1e086 Mon Sep 17 00:00:00 2001 From: LukeMathWalker Date: Mon, 12 Nov 2018 08:27:28 +0000 Subject: [PATCH 106/146] Using reference instead of view, index => index_of --- src/histogram/grid.rs | 7 +++++-- src/histogram/histograms.rs | 9 ++++++--- 2 files changed, 11 insertions(+), 5 deletions(-) diff --git a/src/histogram/grid.rs b/src/histogram/grid.rs index aef50593..565141c7 100644 --- a/src/histogram/grid.rs +++ b/src/histogram/grid.rs @@ -3,7 +3,7 @@ use super::errors::BinNotFound; use super::strategies::BinsBuildingStrategy; use std::ops::Range; use std::marker::PhantomData; -use ndarray::{ArrayBase, Data, Ix2, ArrayView1, Axis}; +use ndarray::{ArrayBase, Data, Ix1, Ix2, Axis}; /// A `Grid` is a partition of a rectangular region of an `n`-dimensional /// space - e.g. `[a_0, b_0)x...x[a_{n-1}, b_{n-1})` - into a collection of @@ -106,7 +106,10 @@ impl Grid { /// - `Ok(i)`, where `i=(i_0, ..., i_{n-1})`, if `p_j` belongs to `i_j`-th bin /// on the `j`-th grid projection on the coordinate axes for all `j` in `{0, ..., n-1}`; /// - `Err(BinNotFound)`, if `P` does not belong to the region of space covered by the grid. - pub fn index(&self, point: ArrayView1) -> Result, BinNotFound> { + pub fn index_of(&self, point: &ArrayBase) -> Result, BinNotFound> + where + S: Data, + { assert_eq!(point.len(), self.ndim(), "Dimension mismatch: the point has {:?} dimensions, the grid \ expected {:?} dimensions.", point.len(), self.ndim()); diff --git a/src/histogram/histograms.rs b/src/histogram/histograms.rs index b85415a1..440be362 100644 --- a/src/histogram/histograms.rs +++ b/src/histogram/histograms.rs @@ -49,8 +49,11 @@ impl Histogram { /// assert_eq!(histogram_matrix, expected.into_dyn()); /// # } /// ``` - pub fn add_observation(&mut self, observation: ArrayView1) -> Result<(), BinNotFound> { - let bin_index = self.grid.index(observation)?; + pub fn add_observation(&mut self, observation: &ArrayBase) -> Result<(), BinNotFound> + where + S: Data, + { + let bin_index = self.grid.index_of(observation)?; self.counts[IxDyn(&bin_index)] += 1; Ok(()) } @@ -135,7 +138,7 @@ impl HistogramExt for ArrayBase { let mut histogram = Histogram::new(grid); for point in self.axis_iter(Axis(0)) { - histogram.add_observation(point); + histogram.add_observation(&point); } histogram } From 506768651b32f1be7f2b1c4ba12bcb6b661e588a Mon Sep 17 00:00:00 2001 From: LukeMathWalker Date: Mon, 12 Nov 2018 08:28:27 +0000 Subject: [PATCH 107/146] Avoids cloning --- src/histogram/histograms.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/histogram/histograms.rs b/src/histogram/histograms.rs index 440be362..5133e7d7 100644 --- a/src/histogram/histograms.rs +++ b/src/histogram/histograms.rs @@ -54,7 +54,7 @@ impl Histogram { S: Data, { let bin_index = self.grid.index_of(observation)?; - self.counts[IxDyn(&bin_index)] += 1; + self.counts[&*bin_index] += 1; Ok(()) } From fe86055c7e49dcc67b14e7c6a9acf518628fe91d Mon Sep 17 00:00:00 2001 From: LukeMathWalker Date: Mon, 12 Nov 2018 08:29:00 +0000 Subject: [PATCH 108/146] Shortened imports --- src/histogram/histograms.rs | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/histogram/histograms.rs b/src/histogram/histograms.rs index 5133e7d7..20110d1b 100644 --- a/src/histogram/histograms.rs +++ b/src/histogram/histograms.rs @@ -101,8 +101,7 @@ pub trait HistogramExt /// extern crate ndarray; /// extern crate noisy_float; /// use ndarray_stats::HistogramExt; - /// use ndarray_stats::histogram::{Histogram, Grid, GridBuilder}; - /// use ndarray_stats::histogram::strategies::Sqrt; + /// use ndarray_stats::histogram::{Histogram, Grid, GridBuilder, strategies::Sqrt}; /// use noisy_float::types::{N64, n64}; /// /// # fn main() { From e348299e8baa8d7f6f0ad35c913f1bd46b36e9a9 Mon Sep 17 00:00:00 2001 From: LukeMathWalker Date: Mon, 12 Nov 2018 08:30:53 +0000 Subject: [PATCH 109/146] Add note on ignoring points outside the grid --- src/histogram/histograms.rs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/histogram/histograms.rs b/src/histogram/histograms.rs index 20110d1b..846c8d44 100644 --- a/src/histogram/histograms.rs +++ b/src/histogram/histograms.rs @@ -91,6 +91,8 @@ pub trait HistogramExt /// For example: a (3, 4) matrix `M` is a collection of 3 points in a /// 4-dimensional space. /// + /// Important: points outside the grid are ignored! + /// /// **Panics** if `d` is different from `grid.ndim()`. /// /// # Example: From 17253e569cbdda9df099f060d686a7453afab977 Mon Sep 17 00:00:00 2001 From: LukeMathWalker Date: Mon, 12 Nov 2018 08:31:22 +0000 Subject: [PATCH 110/146] Fix doctest --- src/histogram/histograms.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/histogram/histograms.rs b/src/histogram/histograms.rs index 846c8d44..88e1d0a2 100644 --- a/src/histogram/histograms.rs +++ b/src/histogram/histograms.rs @@ -39,7 +39,7 @@ impl Histogram { /// /// let observation = array![n64(0.5), n64(0.6)]; /// - /// histogram.add_observation(observation.view()); + /// histogram.add_observation(&observation); /// /// let histogram_matrix = histogram.as_view(); /// let expected = array![ From c969265ae573bb845015064c83408f8a5ab1c47d Mon Sep 17 00:00:00 2001 From: LukeMathWalker Date: Mon, 12 Nov 2018 08:32:26 +0000 Subject: [PATCH 111/146] Better formatting --- src/histogram/strategies.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/histogram/strategies.rs b/src/histogram/strategies.rs index 669e6384..970fab3f 100644 --- a/src/histogram/strategies.rs +++ b/src/histogram/strategies.rs @@ -70,7 +70,7 @@ pub struct Sqrt { /// /// Let `n` be the number of observations and `n_bins` the number of bins. /// -/// `n_bins` = 2 * `n^(1/3)` +/// `n_bins` = 2`n`1/3 /// /// `n_bins` is only proportional to cube root of `n`. It tends to overestimate /// the `n_bins` and it does not take into account data variability. From 56873fef97b25072f5241aca194b36be207561aa Mon Sep 17 00:00:00 2001 From: LukeMathWalker Date: Mon, 12 Nov 2018 08:32:43 +0000 Subject: [PATCH 112/146] Better formatting --- src/histogram/strategies.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/histogram/strategies.rs b/src/histogram/strategies.rs index 970fab3f..85064c10 100644 --- a/src/histogram/strategies.rs +++ b/src/histogram/strategies.rs @@ -95,7 +95,7 @@ pub struct Sturges { /// /// Let `n` be the number of observations. /// -/// `bin_width` = 2 * `IQR` / `n^(1/3)` +/// `bin_width` = 2×`IQR`×`n`−1/3 /// /// The bin width is proportional to the interquartile range ([`IQR`]) and inversely proportional to /// cube root of `n`. It can be too conservative for small datasets, but it is quite good for From b1ce607fb249a735d8a2a34b174d8dfe9cb86ae3 Mon Sep 17 00:00:00 2001 From: LukeMathWalker Date: Mon, 12 Nov 2018 08:33:44 +0000 Subject: [PATCH 113/146] Fix parenthesis --- src/histogram/strategies.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/histogram/strategies.rs b/src/histogram/strategies.rs index 85064c10..4bbbe308 100644 --- a/src/histogram/strategies.rs +++ b/src/histogram/strategies.rs @@ -209,7 +209,7 @@ impl BinsBuildingStrategy for Rice fn from_array(a: ArrayView1) -> Self { let n_elems = a.len(); - let n_bins = (2.*n_elems as f64).powf(1./3.).round() as usize; + let n_bins = (2. * (n_elems as f64).powf(1./3.)).round() as usize; let min = a.min().clone(); let max = a.max().clone(); let builder = EquiSpaced::new(n_bins, min, max); From e8b70eebcbec06b0626cadb19452b717f9ef3e0e Mon Sep 17 00:00:00 2001 From: LukeMathWalker Date: Mon, 12 Nov 2018 08:34:42 +0000 Subject: [PATCH 114/146] Fixed docs --- src/histogram/strategies.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/histogram/strategies.rs b/src/histogram/strategies.rs index 4bbbe308..bb9b67e6 100644 --- a/src/histogram/strategies.rs +++ b/src/histogram/strategies.rs @@ -82,7 +82,7 @@ pub struct Rice { /// underestimates number of bins for large non-gaussian datasets. /// /// Let `n` be the number of observations. -/// The number of bins is the base 2 log of `n`. This estimator assumes normality of data and +/// The number of bins is 1 plus the base 2 log of `n`. This estimator assumes normality of data and /// is too conservative for larger, non-normal datasets. /// /// This is the default method in R’s hist method. From b52fa1f25c0e341a72c58ba3f4b8d69b28c0bc47 Mon Sep 17 00:00:00 2001 From: LukeMathWalker Date: Mon, 12 Nov 2018 08:36:39 +0000 Subject: [PATCH 115/146] get => index --- src/histogram/bins.rs | 12 ++++++------ src/histogram/grid.rs | 2 +- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/src/histogram/bins.rs b/src/histogram/bins.rs index 95a577c9..d85ded19 100644 --- a/src/histogram/bins.rs +++ b/src/histogram/bins.rs @@ -217,12 +217,12 @@ impl Edges { /// let bins = Bins::new(edges); /// // first bin /// assert_eq!( -/// bins.get(0), +/// bins.index(0), /// n64(0.)..n64(1.) // n64(1.) is not included in the bin! /// ); /// // second bin /// assert_eq!( -/// bins.get(1), +/// bins.index(1), /// n64(1.)..n64(2.) /// ); /// ``` @@ -281,7 +281,7 @@ impl Bins { /// Some(0) /// ); /// assert_eq!( - /// bins.get(bins.index_of(&1).unwrap()), + /// bins.index(bins.index_of(&1).unwrap()), /// 0..2 /// ); /// ``` @@ -339,11 +339,11 @@ impl Bins { /// let edges = Edges::from(vec![1, 5, 10, 20]); /// let bins = Bins::new(edges); /// assert_eq!( - /// bins.get(1), + /// bins.index(1), /// 5..10 /// ); /// ``` - pub fn get(&self, index: usize) -> Range + pub fn index(&self, index: usize) -> Range where A: Clone, { @@ -434,6 +434,6 @@ mod bins_tests { let edges = Edges::from(vec![0]); let bins = Bins::new(edges); // we need at least two edges to make a valid bin! - bins.get(0); + bins.index(0); } } diff --git a/src/histogram/grid.rs b/src/histogram/grid.rs index 565141c7..85fdffd5 100644 --- a/src/histogram/grid.rs +++ b/src/histogram/grid.rs @@ -134,7 +134,7 @@ impl Grid { expected {1:?} dimensions.", index.len(), self.ndim()); let mut bin = vec![]; for (axis_index, i) in index.iter().enumerate() { - bin.push(self.projections[axis_index].get(*i)); + bin.push(self.projections[axis_index].index(*i)); } bin } From 6ac45f5996f7cf74eca7052db21df8b266d3a94f Mon Sep 17 00:00:00 2001 From: LukeMathWalker Date: Mon, 12 Nov 2018 08:37:20 +0000 Subject: [PATCH 116/146] Better formatting --- src/histogram/grid.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/histogram/grid.rs b/src/histogram/grid.rs index 85fdffd5..1b195d4b 100644 --- a/src/histogram/grid.rs +++ b/src/histogram/grid.rs @@ -5,9 +5,9 @@ use std::ops::Range; use std::marker::PhantomData; use ndarray::{ArrayBase, Data, Ix1, Ix2, Axis}; -/// A `Grid` is a partition of a rectangular region of an `n`-dimensional -/// space - e.g. `[a_0, b_0)x...x[a_{n-1}, b_{n-1})` - into a collection of -/// rectangular `n`-dimensional bins. +/// A `Grid` is a partition of a rectangular region of an *n*-dimensional +/// space—e.g. [*a*0, *b*0) × ⋯ × [*a**n*−1, +/// *b**n*−1)—into a collection of rectangular *n*-dimensional bins. /// /// The grid is **fully determined by its 1-dimensional projections** on the /// coordinate axes. For example, this is a partition that can be represented From eb7d204e7f304268af082387cd45505121a24d88 Mon Sep 17 00:00:00 2001 From: LukeMathWalker Date: Mon, 12 Nov 2018 08:43:07 +0000 Subject: [PATCH 117/146] Importing macro without macro_use --- src/histogram/grid.rs | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/histogram/grid.rs b/src/histogram/grid.rs index 1b195d4b..dc0a3a50 100644 --- a/src/histogram/grid.rs +++ b/src/histogram/grid.rs @@ -38,10 +38,9 @@ use ndarray::{ArrayBase, Data, Ix1, Ix2, Axis}; /// /// ``` /// extern crate ndarray_stats; -/// #[macro_use(array)] /// extern crate ndarray; /// extern crate noisy_float; -/// use ndarray::Array; +/// use ndarray::{Array, array}; /// use ndarray_stats::HistogramExt; /// use ndarray_stats::histogram::{Histogram, Grid, GridBuilder}; /// use ndarray_stats::histogram::strategies::Auto; From 095b7597ed724d4a29ebf9de702d0ce02cc8b1b1 Mon Sep 17 00:00:00 2001 From: LukeMathWalker Date: Mon, 12 Nov 2018 08:50:33 +0000 Subject: [PATCH 118/146] Return option instead of Result from index_of --- src/histogram/grid.rs | 14 ++++++++------ src/histogram/histograms.rs | 10 +++++++--- 2 files changed, 15 insertions(+), 9 deletions(-) diff --git a/src/histogram/grid.rs b/src/histogram/grid.rs index dc0a3a50..df0ca94d 100644 --- a/src/histogram/grid.rs +++ b/src/histogram/grid.rs @@ -101,11 +101,13 @@ impl Grid { &self.projections } - /// Given `P=(p_1, ..., p_n)`, a point, it returns: - /// - `Ok(i)`, where `i=(i_0, ..., i_{n-1})`, if `p_j` belongs to `i_j`-th bin - /// on the `j`-th grid projection on the coordinate axes for all `j` in `{0, ..., n-1}`; - /// - `Err(BinNotFound)`, if `P` does not belong to the region of space covered by the grid. - pub fn index_of(&self, point: &ArrayBase) -> Result, BinNotFound> + /// Returns the index of the *n*-dimensional bin containing the point, if + /// one exists. + /// + /// Returns `None` if the point is outside the grid. + /// + /// **Panics** if `point.len()` does not equal `self.ndim()`. + pub fn index_of(&self, point: &ArrayBase) -> Option> where S: Data, { @@ -115,7 +117,7 @@ impl Grid { point .iter() .zip(self.projections.iter()) - .map(|(v, e)| e.index_of(v).ok_or(BinNotFound)) + .map(|(v, e)| e.index_of(v)) .collect() } } diff --git a/src/histogram/histograms.rs b/src/histogram/histograms.rs index 88e1d0a2..f46e72f4 100644 --- a/src/histogram/histograms.rs +++ b/src/histogram/histograms.rs @@ -53,9 +53,13 @@ impl Histogram { where S: Data, { - let bin_index = self.grid.index_of(observation)?; - self.counts[&*bin_index] += 1; - Ok(()) + match self.grid.index_of(observation) { + Some(bin_index) => { + self.counts[&*bin_index] += 1; + Ok(()) + }, + None => Err(BinNotFound) + } } /// Returns the number of dimensions of the space the histogram is covering. From 76d60c78b5c2ccb98e902fd029dcfbc6ab37c341 Mon Sep 17 00:00:00 2001 From: LukeMathWalker Date: Mon, 12 Nov 2018 08:51:46 +0000 Subject: [PATCH 119/146] get => index --- src/histogram/grid.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/histogram/grid.rs b/src/histogram/grid.rs index df0ca94d..1be1fcc8 100644 --- a/src/histogram/grid.rs +++ b/src/histogram/grid.rs @@ -129,7 +129,7 @@ impl Grid { /// /// **Panics** if at least one among `(i_0, ..., i_{n-1})` is out of bounds on the respective /// coordinate axis - i.e. if there exists `j` such that `i_j >= self.projections[j].len()`. - pub fn get(&self, index: &[usize]) -> Vec> { + pub fn index(&self, index: &[usize]) -> Vec> { assert_eq!(index.len(), self.ndim(), "Dimension mismatch: the index has {0:?} dimensions, the grid \ expected {1:?} dimensions.", index.len(), self.ndim()); From d21b031da12cdfdf13dd2c37833adc2894f6cba5 Mon Sep 17 00:00:00 2001 From: LukeMathWalker Date: Mon, 12 Nov 2018 08:54:19 +0000 Subject: [PATCH 120/146] Simplified method body --- Cargo.toml | 1 + src/histogram/grid.rs | 10 ++++------ src/histogram/strategies.rs | 1 - src/lib.rs | 1 + 4 files changed, 6 insertions(+), 7 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 3c4b17ac..ff5dc083 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -8,6 +8,7 @@ ndarray = "0.12" noisy_float = "0.1" num-traits = "0.2" rand = "0.5" +itertools = { version = "0.7.0", default-features = false } [dev-dependencies] quickcheck = "0.7" diff --git a/src/histogram/grid.rs b/src/histogram/grid.rs index 1be1fcc8..a250863f 100644 --- a/src/histogram/grid.rs +++ b/src/histogram/grid.rs @@ -1,8 +1,8 @@ use super::bins::Bins; -use super::errors::BinNotFound; use super::strategies::BinsBuildingStrategy; use std::ops::Range; use std::marker::PhantomData; +use itertools::izip; use ndarray::{ArrayBase, Data, Ix1, Ix2, Axis}; /// A `Grid` is a partition of a rectangular region of an *n*-dimensional @@ -133,11 +133,9 @@ impl Grid { assert_eq!(index.len(), self.ndim(), "Dimension mismatch: the index has {0:?} dimensions, the grid \ expected {1:?} dimensions.", index.len(), self.ndim()); - let mut bin = vec![]; - for (axis_index, i) in index.iter().enumerate() { - bin.push(self.projections[axis_index].index(*i)); - } - bin + izip!(&self.projections, index) + .map(|(bins, &i)| bins.index(i)) + .collect() } } diff --git a/src/histogram/strategies.rs b/src/histogram/strategies.rs index bb9b67e6..6d3b1f9e 100644 --- a/src/histogram/strategies.rs +++ b/src/histogram/strategies.rs @@ -8,7 +8,6 @@ //! [`GridBuilder`]: ../struct.GridBuilder.html //! [`NumPy`]: https://docs.scipy.org/doc/numpy/reference/generated/numpy.histogram_bin_edges.html#numpy.histogram_bin_edges use ndarray::prelude::*; -use ndarray::Data; use num_traits::{FromPrimitive, NumOps}; use super::super::{QuantileExt, QuantileExt1d}; use super::super::interpolate::Nearest; diff --git a/src/lib.rs b/src/lib.rs index 4b8a81ff..b2543b71 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -4,6 +4,7 @@ extern crate ndarray; extern crate noisy_float; extern crate num_traits; extern crate rand; +extern crate itertools; #[cfg(test)] extern crate ndarray_rand; From 96632c790902a2c94bf479743956015f5dc10965 Mon Sep 17 00:00:00 2001 From: LukeMathWalker Date: Mon, 12 Nov 2018 08:55:30 +0000 Subject: [PATCH 121/146] Simplified method body --- src/histogram/grid.rs | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/src/histogram/grid.rs b/src/histogram/grid.rs index a250863f..ebd04958 100644 --- a/src/histogram/grid.rs +++ b/src/histogram/grid.rs @@ -161,11 +161,10 @@ impl> GridBuilder { where S: Data, { - let mut bin_builders = vec![]; - for subview in array.axis_iter(Axis(1)) { - let bin_builder = B::from_array(subview); - bin_builders.push(bin_builder); - } + let bin_builders = array + .axis_iter(Axis(1)) + .map(|data| B::from_array(data)) + .collect(); Self { bin_builders, phantom: PhantomData } } From 85cfc26e873f2c41b724c0662b2622a615caa538 Mon Sep 17 00:00:00 2001 From: LukeMathWalker Date: Mon, 12 Nov 2018 08:56:44 +0000 Subject: [PATCH 122/146] Simplified method body --- src/histogram/grid.rs | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/src/histogram/grid.rs b/src/histogram/grid.rs index ebd04958..dcc99200 100644 --- a/src/histogram/grid.rs +++ b/src/histogram/grid.rs @@ -175,10 +175,7 @@ impl> GridBuilder { /// [`strategy`]: strategies/index.html /// [`from_array`]: #method.from_array.html pub fn build(&self) -> Grid { - let mut projections = vec![]; - for bin_builder in &self.bin_builders { - projections.push(bin_builder.build()); - } + let projections: Vec<_> = self.bin_builders.iter().map(|b| b.build()).collect(); Grid::from(projections) } } From 3ed4d3a64efef9136e6d29b80d53491b2728b6fb Mon Sep 17 00:00:00 2001 From: LukeMathWalker Date: Mon, 12 Nov 2018 08:57:30 +0000 Subject: [PATCH 123/146] Silence compiler warning --- src/histogram/histograms.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/histogram/histograms.rs b/src/histogram/histograms.rs index f46e72f4..11c37a6d 100644 --- a/src/histogram/histograms.rs +++ b/src/histogram/histograms.rs @@ -143,7 +143,7 @@ impl HistogramExt for ArrayBase { let mut histogram = Histogram::new(grid); for point in self.axis_iter(Axis(0)) { - histogram.add_observation(&point); + let _ = histogram.add_observation(&point); } histogram } From d265ea03ddf7e1d4b4405b29ef35457ae30137ce Mon Sep 17 00:00:00 2001 From: LukeMathWalker Date: Mon, 12 Nov 2018 08:58:59 +0000 Subject: [PATCH 124/146] Using ? syntax --- src/histogram/histograms.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/histogram/histograms.rs b/src/histogram/histograms.rs index 11c37a6d..149be2db 100644 --- a/src/histogram/histograms.rs +++ b/src/histogram/histograms.rs @@ -25,13 +25,13 @@ impl Histogram { /// # Example: /// ``` /// extern crate ndarray_stats; - /// #[macro_use(array)] /// extern crate ndarray; /// extern crate noisy_float; + /// use ndarray::array; /// use ndarray_stats::histogram::{Edges, Bins, Histogram, Grid}; /// use noisy_float::types::n64; /// - /// # fn main() { + /// # fn main() -> Result<(), Box> { /// let edges = Edges::from(vec![n64(-1.), n64(0.), n64(1.)]); /// let bins = Bins::new(edges); /// let square_grid = Grid::from(vec![bins.clone(), bins.clone()]); @@ -39,7 +39,7 @@ impl Histogram { /// /// let observation = array![n64(0.5), n64(0.6)]; /// - /// histogram.add_observation(&observation); + /// histogram.add_observation(&observation)?; /// /// let histogram_matrix = histogram.as_view(); /// let expected = array![ From 10a5b46776678d7ee5166f789eb8a1c27e38c9b7 Mon Sep 17 00:00:00 2001 From: LukeMathWalker Date: Mon, 12 Nov 2018 08:59:40 +0000 Subject: [PATCH 125/146] Add Ok(()) --- src/histogram/histograms.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/src/histogram/histograms.rs b/src/histogram/histograms.rs index 149be2db..d86d737f 100644 --- a/src/histogram/histograms.rs +++ b/src/histogram/histograms.rs @@ -47,6 +47,7 @@ impl Histogram { /// [0, 1], /// ]; /// assert_eq!(histogram_matrix, expected.into_dyn()); + /// # Ok(()) /// # } /// ``` pub fn add_observation(&mut self, observation: &ArrayBase) -> Result<(), BinNotFound> From 42f086a5fbb6871a8162177e35ae8a0b7e1c6269 Mon Sep 17 00:00:00 2001 From: LukeMathWalker Date: Mon, 12 Nov 2018 09:01:55 +0000 Subject: [PATCH 126/146] as_view => counts --- src/histogram/histograms.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/histogram/histograms.rs b/src/histogram/histograms.rs index d86d737f..9302a9d1 100644 --- a/src/histogram/histograms.rs +++ b/src/histogram/histograms.rs @@ -69,8 +69,8 @@ impl Histogram { self.counts.ndim() } - /// Borrows a view on the histogram matrix. - pub fn as_view(&self) -> ArrayViewD { + /// Borrows a view on the histogram counts matrix. + pub fn counts(&self) -> ArrayViewD { self.counts.view() } From 3019d9b2d3dd1c3494fcc489e0bc12cedd591b23 Mon Sep 17 00:00:00 2001 From: LukeMathWalker Date: Mon, 12 Nov 2018 09:05:06 +0000 Subject: [PATCH 127/146] Fixed doc tests --- src/histogram/bins.rs | 13 ++++--------- src/histogram/grid.rs | 2 +- src/histogram/histograms.rs | 4 ++-- 3 files changed, 7 insertions(+), 12 deletions(-) diff --git a/src/histogram/bins.rs b/src/histogram/bins.rs index d85ded19..941b6beb 100644 --- a/src/histogram/bins.rs +++ b/src/histogram/bins.rs @@ -113,15 +113,6 @@ impl Index for Edges{ } } -impl IntoIterator for Edges { - type Item = A; - type IntoIter = ::std::vec::IntoIter; - - fn into_iter(self) -> Self::IntoIter { - self.edges.into_iter() - } -} - impl Edges { /// Number of edges in `self`. /// @@ -198,6 +189,10 @@ impl Edges { } } } + + pub fn iter(&self) -> impl Iterator { + self.edges.iter() + } } /// `Bins` is a sorted collection of non-overlapping diff --git a/src/histogram/grid.rs b/src/histogram/grid.rs index dcc99200..845a6c06 100644 --- a/src/histogram/grid.rs +++ b/src/histogram/grid.rs @@ -58,7 +58,7 @@ use ndarray::{ArrayBase, Data, Ix1, Ix2, Axis}; /// let grid = GridBuilder::>::from_array(&observations).build(); /// let histogram = observations.histogram(grid); /// -/// let histogram_matrix = histogram.as_view(); +/// let histogram_matrix = histogram.counts(); /// // Bins are left inclusive, right exclusive! /// let expected = array![4, 1, 2, 1, 2, 0, 1, 0, 0, 1, 0, 0]; /// assert_eq!(histogram_matrix, expected.into_dyn()); diff --git a/src/histogram/histograms.rs b/src/histogram/histograms.rs index 9302a9d1..5b92a7c0 100644 --- a/src/histogram/histograms.rs +++ b/src/histogram/histograms.rs @@ -41,7 +41,7 @@ impl Histogram { /// /// histogram.add_observation(&observation)?; /// - /// let histogram_matrix = histogram.as_view(); + /// let histogram_matrix = histogram.counts(); /// let expected = array![ /// [0, 0], /// [0, 1], @@ -121,7 +121,7 @@ pub trait HistogramExt /// let grid = GridBuilder::>::from_array(&observations).build(); /// let histogram = observations.histogram(grid); /// - /// let histogram_matrix = histogram.as_view(); + /// let histogram_matrix = histogram.counts(); /// // Bins are left inclusive, right exclusive! /// let expected = array![ /// [1, 0], From 8c6411e14dd9480771e02a649aae6d844f8695b4 Mon Sep 17 00:00:00 2001 From: LukeMathWalker Date: Mon, 12 Nov 2018 20:17:58 +0000 Subject: [PATCH 128/146] Bumped Rust version to 1.30 --- .travis.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index 1c07183a..1063b4c4 100644 --- a/.travis.yml +++ b/.travis.yml @@ -7,7 +7,7 @@ addons: - libssl-dev cache: cargo rust: - - 1.28.0 + - 1.30.0 - stable - beta - nightly From f09720cfa8fb7a87a42e435a01725bcc2799e924 Mon Sep 17 00:00:00 2001 From: LukeMathWalker Date: Mon, 12 Nov 2018 20:19:32 +0000 Subject: [PATCH 129/146] Reuse quantile_axis_mut implementation --- src/quantile.rs | 28 +--------------------------- 1 file changed, 1 insertion(+), 27 deletions(-) diff --git a/src/quantile.rs b/src/quantile.rs index 2b1a0058..3112d6ce 100644 --- a/src/quantile.rs +++ b/src/quantile.rs @@ -460,33 +460,7 @@ impl QuantileExt1d for ArrayBase S: DataMut, I: Interpolate, { - assert!((0. <= q) && (q <= 1.)); - let mut lower = None; - let mut higher = None; - let len = self.len(); - if I::needs_lower(q, len) { - let lower_index = I::lower_index(q,len); - lower = Some(self.sorted_get_mut(lower_index)); - if I::needs_higher(q, len) { - let higher_index = I::higher_index(q, len); - let relative_higher_index = higher_index - lower_index; - higher = Some( - self. - slice_mut(s![lower_index..]). - sorted_get_mut(relative_higher_index) - ); - }; - } else { - higher = Some( - self.sorted_get_mut(I::higher_index(q, len)), - ); - }; - I::interpolate( - lower.map(|x| Array::from_elem((1,), x)), - higher.map(|x| Array::from_elem((1,), x)), - q, - len - )[0].clone() + self.quantile_axis_mut::(Axis(0), q)[()].clone() } } From 3f089a4e028b3171396d7066a76bac02706ba200 Mon Sep 17 00:00:00 2001 From: LukeMathWalker Date: Mon, 12 Nov 2018 20:22:57 +0000 Subject: [PATCH 130/146] Convert as_slice to as_array_view --- src/histogram/bins.rs | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/src/histogram/bins.rs b/src/histogram/bins.rs index 941b6beb..15fe3892 100644 --- a/src/histogram/bins.rs +++ b/src/histogram/bins.rs @@ -134,23 +134,24 @@ impl Edges { self.edges.len() } - /// Borrow an immutable reference to the edges as a vector - /// slice. + /// Borrow an immutable reference to the edges as a 1-dimensional + /// array view. /// /// # Example: /// /// ``` /// extern crate ndarray_stats; + /// use ndarray::array; /// use ndarray_stats::histogram::Edges; /// /// let edges = Edges::from(vec![0, 5, 3]); /// assert_eq!( - /// edges.as_slice(), - /// vec![0, 3, 5].as_slice() + /// edges.as_array_view(), + /// array![0, 3, 5].view() /// ); /// ``` - pub fn as_slice(&self) -> &[A] { - &self.edges + pub fn as_array_view(&self) -> ArrayView1 { + ArrayView1::from(&self.edges) } /// Given `value`, it returns an option: From bddf12ccc7b679ed84c79aa3570d3c605801efde Mon Sep 17 00:00:00 2001 From: LukeMathWalker Date: Mon, 12 Nov 2018 20:34:01 +0000 Subject: [PATCH 131/146] Fixed broken tests --- src/histogram/bins.rs | 28 +++++++++++++++------------- 1 file changed, 15 insertions(+), 13 deletions(-) diff --git a/src/histogram/bins.rs b/src/histogram/bins.rs index 15fe3892..6c018359 100644 --- a/src/histogram/bins.rs +++ b/src/histogram/bins.rs @@ -140,6 +140,7 @@ impl Edges { /// # Example: /// /// ``` + /// extern crate ndarray; /// extern crate ndarray_stats; /// use ndarray::array; /// use ndarray_stats::histogram::Edges; @@ -386,12 +387,12 @@ mod edges_tests { fn edges_are_right_exclusive(v: Vec) -> bool { let edges = Edges::from(v); - let last = edges.as_slice().last(); - match last { - None => true, - Some(x) => { - edges.indices_of(x).is_none() - } + let view = edges.as_array_view(); + if view.len() == 0 { + true + } else { + let last = view[view.len()-1]; + edges.indices_of(&last).is_none() } } @@ -400,12 +401,12 @@ mod edges_tests { match edges.len() { 1 => true, _ => { - let first = edges.as_slice().first(); - match first { - None => true, - Some(x) => { - edges.indices_of(x).is_some() - } + let view = edges.as_array_view(); + if view.len() == 0 { + true + } else { + let first = view[0]; + edges.indices_of(&first).is_some() } } } @@ -414,7 +415,8 @@ mod edges_tests { fn edges_are_deduped(v: Vec) -> bool { let unique_elements = BTreeSet::from_iter(v.iter()); let edges = Edges::from(v.clone()); - let unique_edges = BTreeSet::from_iter(edges.as_slice().iter()); + let view = edges.as_array_view(); + let unique_edges = BTreeSet::from_iter(view.iter()); unique_edges == unique_elements } } From 9e432debc4118967dd00a74ff805891a956aba25 Mon Sep 17 00:00:00 2001 From: LukeMathWalker Date: Tue, 13 Nov 2018 06:40:30 +0000 Subject: [PATCH 132/146] Added expected grid --- src/histogram/grid.rs | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/src/histogram/grid.rs b/src/histogram/grid.rs index 845a6c06..2ff373a5 100644 --- a/src/histogram/grid.rs +++ b/src/histogram/grid.rs @@ -41,9 +41,9 @@ use ndarray::{ArrayBase, Data, Ix1, Ix2, Axis}; /// extern crate ndarray; /// extern crate noisy_float; /// use ndarray::{Array, array}; -/// use ndarray_stats::HistogramExt; -/// use ndarray_stats::histogram::{Histogram, Grid, GridBuilder}; -/// use ndarray_stats::histogram::strategies::Auto; +/// use ndarray_stats::{HistogramExt, +/// histogram::{Histogram, Grid, GridBuilder, +/// Edges, Bins, strategies::Auto}}; /// use noisy_float::types::{N64, n64}; /// /// # fn main() { @@ -56,6 +56,11 @@ use ndarray::{ArrayBase, Data, Ix1, Ix2, Axis}; /// // The optimal grid layout is inferred from the data, /// // specifying a strategy (Auto in this case) /// let grid = GridBuilder::>::from_array(&observations).build(); +/// let expected_grid = Grid::from(vec![Bins::new(Edges::from(vec![ +/// 1, 11, 21, 31, 41, 51, 61, 71, 81, 91, 101, 111, 121, +/// ]))]); +/// assert_eq!(grid, expected_grid); +/// /// let histogram = observations.histogram(grid); /// /// let histogram_matrix = histogram.counts(); From f42dcd7c39b84db34a7cb3f7371a7526eabe0d0c Mon Sep 17 00:00:00 2001 From: LukeMathWalker Date: Tue, 13 Nov 2018 06:55:28 +0000 Subject: [PATCH 133/146] Fixed FD --- src/histogram/grid.rs | 4 ++-- src/histogram/strategies.rs | 30 +++++++++++++++++++++--------- 2 files changed, 23 insertions(+), 11 deletions(-) diff --git a/src/histogram/grid.rs b/src/histogram/grid.rs index 2ff373a5..8a461f19 100644 --- a/src/histogram/grid.rs +++ b/src/histogram/grid.rs @@ -43,7 +43,7 @@ use ndarray::{ArrayBase, Data, Ix1, Ix2, Axis}; /// use ndarray::{Array, array}; /// use ndarray_stats::{HistogramExt, /// histogram::{Histogram, Grid, GridBuilder, -/// Edges, Bins, strategies::Auto}}; +/// Edges, Bins, strategies::FreedmanDiaconis}}; /// use noisy_float::types::{N64, n64}; /// /// # fn main() { @@ -55,7 +55,7 @@ use ndarray::{ArrayBase, Data, Ix1, Ix2, Axis}; /// /// // The optimal grid layout is inferred from the data, /// // specifying a strategy (Auto in this case) -/// let grid = GridBuilder::>::from_array(&observations).build(); +/// let grid = GridBuilder::>::from_array(&observations).build(); /// let expected_grid = Grid::from(vec![Bins::new(Edges::from(vec![ /// 1, 11, 21, 31, 41, 51, 61, 71, 81, 91, 101, 111, 121, /// ]))]); diff --git a/src/histogram/strategies.rs b/src/histogram/strategies.rs index 6d3b1f9e..3dfa26fe 100644 --- a/src/histogram/strategies.rs +++ b/src/histogram/strategies.rs @@ -8,6 +8,7 @@ //! [`GridBuilder`]: ../struct.GridBuilder.html //! [`NumPy`]: https://docs.scipy.org/doc/numpy/reference/generated/numpy.histogram_bin_edges.html#numpy.histogram_bin_edges use ndarray::prelude::*; +use ndarray::Data; use num_traits::{FromPrimitive, NumOps}; use super::super::{QuantileExt, QuantileExt1d}; use super::super::interpolate::Nearest; @@ -273,21 +274,16 @@ impl BinsBuildingStrategy for FreedmanDiaconis { fn from_array(a: ArrayView1) -> Self { - let n_bins = a.len(); + let n_points = a.len(); let mut a_copy = a.to_owned(); let first_quartile = a_copy.quantile_mut::(0.25); let third_quartile = a_copy.quantile_mut::(0.75); let iqr = third_quartile - first_quartile; - let bin_width = FreedmanDiaconis::compute_bin_width(n_bins, iqr); - let min = a_copy.min().clone(); - let max = a_copy.max().clone(); - let mut max_edge = min.clone(); - while max_edge < max { - max_edge = max_edge + bin_width.clone(); - } - let builder = EquiSpaced::new(n_bins, min, max_edge); + let bin_width = FreedmanDiaconis::compute_bin_width(n_points, iqr); + let (max_edge, min_edge, n_bins) = FreedmanDiaconis::compute_equispaced_parameters(a, &bin_width); + let builder = EquiSpaced::new(n_bins, min_edge, max_edge); Self { builder } } @@ -311,6 +307,22 @@ impl FreedmanDiaconis bin_width } + fn compute_equispaced_parameters(a: ArrayBase, bin_width: &T) -> (T, T, usize) + where + S: Data, + { + let min_edge = a.min().clone(); + let max = a.max().clone(); + let mut max_edge = min_edge.clone(); + let mut n_bins = 0; + while max_edge < max { + max_edge = max_edge + bin_width.clone(); + n_bins += 1; + } + n_bins = usize::max(n_bins, 1); + return (max_edge, min_edge, n_bins) + } + /// The bin width (or bin length) according to the fitted strategy. pub fn bin_width(&self) -> T { self.builder.bin_width() From f6b5dbd852953162b4958ed6ebb065229b07163d Mon Sep 17 00:00:00 2001 From: LukeMathWalker Date: Tue, 13 Nov 2018 06:57:53 +0000 Subject: [PATCH 134/146] Fixed doctest --- src/histogram/grid.rs | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/src/histogram/grid.rs b/src/histogram/grid.rs index 8a461f19..043fdf76 100644 --- a/src/histogram/grid.rs +++ b/src/histogram/grid.rs @@ -43,7 +43,7 @@ use ndarray::{ArrayBase, Data, Ix1, Ix2, Axis}; /// use ndarray::{Array, array}; /// use ndarray_stats::{HistogramExt, /// histogram::{Histogram, Grid, GridBuilder, -/// Edges, Bins, strategies::FreedmanDiaconis}}; +/// Edges, Bins, strategies::Auto}}; /// use noisy_float::types::{N64, n64}; /// /// # fn main() { @@ -55,17 +55,15 @@ use ndarray::{ArrayBase, Data, Ix1, Ix2, Axis}; /// /// // The optimal grid layout is inferred from the data, /// // specifying a strategy (Auto in this case) -/// let grid = GridBuilder::>::from_array(&observations).build(); -/// let expected_grid = Grid::from(vec![Bins::new(Edges::from(vec![ -/// 1, 11, 21, 31, 41, 51, 61, 71, 81, 91, 101, 111, 121, -/// ]))]); +/// let grid = GridBuilder::>::from_array(&observations).build(); +/// let expected_grid = Grid::from(vec![Bins::new(Edges::from(vec![1, 20, 39, 58, 77, 96]))]); /// assert_eq!(grid, expected_grid); /// /// let histogram = observations.histogram(grid); /// /// let histogram_matrix = histogram.counts(); /// // Bins are left inclusive, right exclusive! -/// let expected = array![4, 1, 2, 1, 2, 0, 1, 0, 0, 1, 0, 0]; +/// let expected = array![4, 3, 3, 1, 0]; /// assert_eq!(histogram_matrix, expected.into_dyn()); /// # } /// ``` From 004585fa52141e3239c7ab9f31548e08c0b15b3f Mon Sep 17 00:00:00 2001 From: LukeMathWalker Date: Tue, 13 Nov 2018 07:34:33 +0000 Subject: [PATCH 135/146] Refactored bin strategies - one extra bin is now added to the right to make sure the maximum does not get dropped --- src/histogram/grid.rs | 4 +- src/histogram/histograms.rs | 22 ++++++-- src/histogram/strategies.rs | 110 ++++++++++++++++-------------------- 3 files changed, 70 insertions(+), 66 deletions(-) diff --git a/src/histogram/grid.rs b/src/histogram/grid.rs index 043fdf76..5af4a095 100644 --- a/src/histogram/grid.rs +++ b/src/histogram/grid.rs @@ -56,14 +56,14 @@ use ndarray::{ArrayBase, Data, Ix1, Ix2, Axis}; /// // The optimal grid layout is inferred from the data, /// // specifying a strategy (Auto in this case) /// let grid = GridBuilder::>::from_array(&observations).build(); -/// let expected_grid = Grid::from(vec![Bins::new(Edges::from(vec![1, 20, 39, 58, 77, 96]))]); +/// let expected_grid = Grid::from(vec![Bins::new(Edges::from(vec![1, 20, 39, 58, 77, 96, 115]))]); /// assert_eq!(grid, expected_grid); /// /// let histogram = observations.histogram(grid); /// /// let histogram_matrix = histogram.counts(); /// // Bins are left inclusive, right exclusive! -/// let expected = array![4, 3, 3, 1, 0]; +/// let expected = array![4, 3, 3, 1, 0, 1]; /// assert_eq!(histogram_matrix, expected.into_dyn()); /// # } /// ``` diff --git a/src/histogram/histograms.rs b/src/histogram/histograms.rs index 5b92a7c0..b120d67d 100644 --- a/src/histogram/histograms.rs +++ b/src/histogram/histograms.rs @@ -107,8 +107,13 @@ pub trait HistogramExt /// #[macro_use(array)] /// extern crate ndarray; /// extern crate noisy_float; - /// use ndarray_stats::HistogramExt; - /// use ndarray_stats::histogram::{Histogram, Grid, GridBuilder, strategies::Sqrt}; + /// use ndarray_stats::{ + /// HistogramExt, + /// histogram::{ + /// Histogram, Grid, GridBuilder, + /// Edges, Bins, + /// strategies::Sqrt}, + /// }; /// use noisy_float::types::{N64, n64}; /// /// # fn main() { @@ -119,13 +124,22 @@ pub trait HistogramExt /// [n64(0.5), n64(-1.)] /// ]; /// let grid = GridBuilder::>::from_array(&observations).build(); + /// let expected_grid = Grid::from( + /// vec![ + /// Bins::new(Edges::from(vec![n64(-1.), n64(0.), n64(1.), n64(2.)])), + /// Bins::new(Edges::from(vec![n64(-1.), n64(0.), n64(1.), n64(2.)])), + /// ] + /// ); + /// assert_eq!(grid, expected_grid); + /// /// let histogram = observations.histogram(grid); /// /// let histogram_matrix = histogram.counts(); /// // Bins are left inclusive, right exclusive! /// let expected = array![ - /// [1, 0], - /// [1, 0], + /// [1, 0, 1], + /// [1, 0, 0], + /// [0, 1, 0], /// ]; /// assert_eq!(histogram_matrix, expected.into_dyn()); /// # } diff --git a/src/histogram/strategies.rs b/src/histogram/strategies.rs index 3dfa26fe..c5558a13 100644 --- a/src/histogram/strategies.rs +++ b/src/histogram/strategies.rs @@ -8,8 +8,7 @@ //! [`GridBuilder`]: ../struct.GridBuilder.html //! [`NumPy`]: https://docs.scipy.org/doc/numpy/reference/generated/numpy.histogram_bin_edges.html#numpy.histogram_bin_edges use ndarray::prelude::*; -use ndarray::Data; -use num_traits::{FromPrimitive, NumOps}; +use num_traits::{FromPrimitive, NumOps, Zero}; use super::super::{QuantileExt, QuantileExt1d}; use super::super::interpolate::Nearest; use super::{Edges, Bins}; @@ -50,7 +49,7 @@ pub trait BinsBuildingStrategy } struct EquiSpaced { - n_bins: usize, + bin_width: T, min: T, max: T, } @@ -129,49 +128,42 @@ pub struct Auto { impl EquiSpaced where - T: Ord + Clone + FromPrimitive + NumOps + T: Ord + Clone + FromPrimitive + NumOps + Zero { - fn new(n_bins: usize, min: T, max: T) -> Self + fn new(bin_width: T, min: T, max: T) -> Self { - Self { n_bins, min, max } + assert!(bin_width > T::zero()); + Self { bin_width, min, max } } fn build(&self) -> Bins { - let edges = match self.n_bins { - 0 => Edges::from(vec![]), - 1 => { - Edges::from( - vec![self.min.clone(), self.max.clone()] - ) - }, - _ => { - let bin_width = self.bin_width(); - let mut edges: Vec = vec![]; - for i in 0..(self.n_bins+1) { - let edge = self.min.clone() + T::from_usize(i).unwrap()*bin_width.clone(); - edges.push(edge); - } - Edges::from(edges) - }, - }; - Bins::new(edges) + let n_bins = self.n_bins(); + let mut edges: Vec = vec![]; + for i in 0..(n_bins+1) { + let edge = self.min.clone() + T::from_usize(i).unwrap()*self.bin_width.clone(); + edges.push(edge); + } + Bins::new(Edges::from(edges)) } fn n_bins(&self) -> usize { - self.n_bins + let mut max_edge = self.min.clone(); + let mut n_bins = 0; + while max_edge <= self.max { + max_edge = max_edge + self.bin_width.clone(); + n_bins += 1; + } + return n_bins } - /// The bin width (or bin length) according to the fitted strategy. fn bin_width(&self) -> T { - let range = self.max.clone() - self.min.clone(); - let bin_width = range / T::from_usize(self.n_bins).unwrap(); - bin_width + self.bin_width.clone() } } impl BinsBuildingStrategy for Sqrt where - T: Ord + Clone + FromPrimitive + NumOps + T: Ord + Clone + FromPrimitive + NumOps + Zero { fn from_array(a: ArrayView1) -> Self { @@ -179,7 +171,8 @@ impl BinsBuildingStrategy for Sqrt let n_bins = (n_elems as f64).sqrt().round() as usize; let min = a.min().clone(); let max = a.max().clone(); - let builder = EquiSpaced::new(n_bins, min, max); + let bin_width = compute_bin_width(min.clone(), max.clone(), n_bins); + let builder = EquiSpaced::new(bin_width, min, max); Self { builder } } @@ -194,7 +187,7 @@ impl BinsBuildingStrategy for Sqrt impl Sqrt where - T: Ord + Clone + FromPrimitive + NumOps + T: Ord + Clone + FromPrimitive + NumOps + Zero { /// The bin width (or bin length) according to the fitted strategy. pub fn bin_width(&self) -> T { @@ -204,7 +197,7 @@ impl Sqrt impl BinsBuildingStrategy for Rice where - T: Ord + Clone + FromPrimitive + NumOps + T: Ord + Clone + FromPrimitive + NumOps + Zero { fn from_array(a: ArrayView1) -> Self { @@ -212,7 +205,8 @@ impl BinsBuildingStrategy for Rice let n_bins = (2. * (n_elems as f64).powf(1./3.)).round() as usize; let min = a.min().clone(); let max = a.max().clone(); - let builder = EquiSpaced::new(n_bins, min, max); + let bin_width = compute_bin_width(min.clone(), max.clone(), n_bins); + let builder = EquiSpaced::new(bin_width, min, max); Self { builder } } @@ -227,7 +221,7 @@ impl BinsBuildingStrategy for Rice impl Rice where - T: Ord + Clone + FromPrimitive + NumOps + T: Ord + Clone + FromPrimitive + NumOps + Zero { /// The bin width (or bin length) according to the fitted strategy. pub fn bin_width(&self) -> T { @@ -237,7 +231,7 @@ impl Rice impl BinsBuildingStrategy for Sturges where - T: Ord + Clone + FromPrimitive + NumOps + T: Ord + Clone + FromPrimitive + NumOps + Zero { fn from_array(a: ArrayView1) -> Self { @@ -245,7 +239,8 @@ impl BinsBuildingStrategy for Sturges let n_bins = (n_elems as f64).log2().round() as usize + 1; let min = a.min().clone(); let max = a.max().clone(); - let builder = EquiSpaced::new(n_bins, min, max); + let bin_width = compute_bin_width(min.clone(), max.clone(), n_bins); + let builder = EquiSpaced::new(bin_width, min, max); Self { builder } } @@ -260,7 +255,7 @@ impl BinsBuildingStrategy for Sturges impl Sturges where - T: Ord + Clone + FromPrimitive + NumOps + T: Ord + Clone + FromPrimitive + NumOps + Zero { /// The bin width (or bin length) according to the fitted strategy. pub fn bin_width(&self) -> T { @@ -270,7 +265,7 @@ impl Sturges impl BinsBuildingStrategy for FreedmanDiaconis where - T: Ord + Clone + FromPrimitive + NumOps + T: Ord + Clone + FromPrimitive + NumOps + Zero { fn from_array(a: ArrayView1) -> Self { @@ -282,8 +277,9 @@ impl BinsBuildingStrategy for FreedmanDiaconis let iqr = third_quartile - first_quartile; let bin_width = FreedmanDiaconis::compute_bin_width(n_points, iqr); - let (max_edge, min_edge, n_bins) = FreedmanDiaconis::compute_equispaced_parameters(a, &bin_width); - let builder = EquiSpaced::new(n_bins, min_edge, max_edge); + let min = a_copy.min().clone(); + let max = a_copy.max().clone(); + let builder = EquiSpaced::new(bin_width, min, max); Self { builder } } @@ -298,7 +294,7 @@ impl BinsBuildingStrategy for FreedmanDiaconis impl FreedmanDiaconis where - T: Ord + Clone + FromPrimitive + NumOps + T: Ord + Clone + FromPrimitive + NumOps + Zero { fn compute_bin_width(n_bins: usize, iqr: T) -> T { @@ -307,22 +303,6 @@ impl FreedmanDiaconis bin_width } - fn compute_equispaced_parameters(a: ArrayBase, bin_width: &T) -> (T, T, usize) - where - S: Data, - { - let min_edge = a.min().clone(); - let max = a.max().clone(); - let mut max_edge = min_edge.clone(); - let mut n_bins = 0; - while max_edge < max { - max_edge = max_edge + bin_width.clone(); - n_bins += 1; - } - n_bins = usize::max(n_bins, 1); - return (max_edge, min_edge, n_bins) - } - /// The bin width (or bin length) according to the fitted strategy. pub fn bin_width(&self) -> T { self.builder.bin_width() @@ -331,7 +311,7 @@ impl FreedmanDiaconis impl BinsBuildingStrategy for Auto where - T: Ord + Clone + FromPrimitive + NumOps + T: Ord + Clone + FromPrimitive + NumOps + Zero { fn from_array(a: ArrayView1) -> Self { @@ -366,7 +346,7 @@ impl BinsBuildingStrategy for Auto impl Auto where - T: Ord + Clone + FromPrimitive + NumOps + T: Ord + Clone + FromPrimitive + NumOps + Zero { /// The bin width (or bin length) according to the fitted strategy. pub fn bin_width(&self) -> T { @@ -377,3 +357,13 @@ impl Auto } } } + +/// The bin width (or bin length) according to the fitted strategy. +fn compute_bin_width(min: T, max: T, n_bins: usize) -> T +where + T: Ord + Clone + FromPrimitive + NumOps + Zero, +{ + let range = max.clone() - min.clone(); + let bin_width = range / T::from_usize(n_bins).unwrap(); + bin_width +} From 74723f7f4071adbc1be3f6fef42aa3be5adea9c7 Mon Sep 17 00:00:00 2001 From: LukeMathWalker Date: Tue, 13 Nov 2018 07:43:50 +0000 Subject: [PATCH 136/146] Added explanation for extra bin to the docs --- src/histogram/strategies.rs | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/src/histogram/strategies.rs b/src/histogram/strategies.rs index c5558a13..5787010d 100644 --- a/src/histogram/strategies.rs +++ b/src/histogram/strategies.rs @@ -3,6 +3,17 @@ //! //! The docs for each strategy have been taken almost verbatim from [`NumPy`]. //! +//! Each strategy specifies how to compute the optimal number of [`Bins`] or +//! the optimal bin width. +//! For those strategies that prescribe the optimal number +//! of [`Bins`] we then compute the optimal bin width with +//! +//! `bin_width = (max - min)/n` +//! +//! All our bins are left-inclusive and right-exclusive: we make sure to add an extra bin +//! if it is necessary to include the maximum value of the array that has been passed as argument +//! to the `from_array` method. +//! //! [`Bins`]: ../struct.Bins.html //! [`Grid`]: ../struct.Grid.html //! [`GridBuilder`]: ../struct.GridBuilder.html From a3fc2de0e66ebd4a5f9fc2a398089cd1cabe9fec Mon Sep 17 00:00:00 2001 From: LukeMathWalker Date: Tue, 13 Nov 2018 07:54:21 +0000 Subject: [PATCH 137/146] Using an associated type for BinsBuildingStrategy - reduced type parameters from 2 to 1 --- src/histogram/grid.rs | 14 ++++++++------ src/histogram/histograms.rs | 2 +- src/histogram/strategies.rs | 29 +++++++++++++++++++---------- 3 files changed, 28 insertions(+), 17 deletions(-) diff --git a/src/histogram/grid.rs b/src/histogram/grid.rs index 5af4a095..328ee888 100644 --- a/src/histogram/grid.rs +++ b/src/histogram/grid.rs @@ -1,7 +1,6 @@ use super::bins::Bins; use super::strategies::BinsBuildingStrategy; use std::ops::Range; -use std::marker::PhantomData; use itertools::izip; use ndarray::{ArrayBase, Data, Ix1, Ix2, Axis}; @@ -55,7 +54,7 @@ use ndarray::{ArrayBase, Data, Ix1, Ix2, Axis}; /// /// // The optimal grid layout is inferred from the data, /// // specifying a strategy (Auto in this case) -/// let grid = GridBuilder::>::from_array(&observations).build(); +/// let grid = GridBuilder::>::from_array(&observations).build(); /// let expected_grid = Grid::from(vec![Bins::new(Edges::from(vec![1, 20, 39, 58, 77, 96, 115]))]); /// assert_eq!(grid, expected_grid); /// @@ -148,12 +147,15 @@ impl Grid { /// [`Grid`]: struct.Grid.html /// [`histogram`]: trait.HistogramExt.html /// [`strategy`]: strategies/index.html -pub struct GridBuilder> { +pub struct GridBuilder { bin_builders: Vec, - phantom: PhantomData } -impl> GridBuilder { +impl GridBuilder +where + A: Ord, + B: BinsBuildingStrategy, +{ /// Given some observations in a 2-dimensional array with shape `(n_observations, n_dimension)` /// it returns a `GridBuilder` instance that has learned the required parameter /// to build a [`Grid`] according to the specified [`strategy`]. @@ -168,7 +170,7 @@ impl> GridBuilder { .axis_iter(Axis(1)) .map(|data| B::from_array(data)) .collect(); - Self { bin_builders, phantom: PhantomData } + Self { bin_builders } } /// Returns a [`Grid`] instance, built accordingly to the specified [`strategy`] diff --git a/src/histogram/histograms.rs b/src/histogram/histograms.rs index b120d67d..825aadb7 100644 --- a/src/histogram/histograms.rs +++ b/src/histogram/histograms.rs @@ -123,7 +123,7 @@ pub trait HistogramExt /// [n64(-1.), n64(-0.5)], /// [n64(0.5), n64(-1.)] /// ]; - /// let grid = GridBuilder::>::from_array(&observations).build(); + /// let grid = GridBuilder::>::from_array(&observations).build(); /// let expected_grid = Grid::from( /// vec![ /// Bins::new(Edges::from(vec![n64(-1.), n64(0.), n64(1.), n64(2.)])), diff --git a/src/histogram/strategies.rs b/src/histogram/strategies.rs index 5787010d..6f985671 100644 --- a/src/histogram/strategies.rs +++ b/src/histogram/strategies.rs @@ -35,22 +35,21 @@ use super::{Edges, Bins}; /// [`Bins`]: ../struct.Bins.html /// [`Grid`]: ../struct.Grid.html /// [`GridBuilder`]: ../struct.GridBuilder.html -pub trait BinsBuildingStrategy - where - T: Ord +pub trait BinsBuildingStrategy { + type Elem: Ord; /// Given some observations in a 1-dimensional array it returns a `BinsBuildingStrategy` /// that has learned the required parameter to build a collection of [`Bins`]. /// /// [`Bins`]: ../struct.Bins.html - fn from_array(array: ArrayView1) -> Self; + fn from_array(array: ArrayView1) -> Self; /// Returns a [`Bins`] instance, built accordingly to the parameters /// inferred from observations in [`from_array`]. /// /// [`Bins`]: ../struct.Bins.html /// [`from_array`]: #method.from_array.html - fn build(&self) -> Bins; + fn build(&self) -> Bins; /// Returns the optimal number of bins, according to the parameters /// inferred from observations in [`from_array`]. @@ -172,10 +171,12 @@ impl EquiSpaced } } -impl BinsBuildingStrategy for Sqrt +impl BinsBuildingStrategy for Sqrt where T: Ord + Clone + FromPrimitive + NumOps + Zero { + type Elem = T; + fn from_array(a: ArrayView1) -> Self { let n_elems = a.len(); @@ -206,10 +207,12 @@ impl Sqrt } } -impl BinsBuildingStrategy for Rice +impl BinsBuildingStrategy for Rice where T: Ord + Clone + FromPrimitive + NumOps + Zero { + type Elem = T; + fn from_array(a: ArrayView1) -> Self { let n_elems = a.len(); @@ -240,10 +243,12 @@ impl Rice } } -impl BinsBuildingStrategy for Sturges +impl BinsBuildingStrategy for Sturges where T: Ord + Clone + FromPrimitive + NumOps + Zero { + type Elem = T; + fn from_array(a: ArrayView1) -> Self { let n_elems = a.len(); @@ -274,10 +279,12 @@ impl Sturges } } -impl BinsBuildingStrategy for FreedmanDiaconis +impl BinsBuildingStrategy for FreedmanDiaconis where T: Ord + Clone + FromPrimitive + NumOps + Zero { + type Elem = T; + fn from_array(a: ArrayView1) -> Self { let n_points = a.len(); @@ -320,10 +327,12 @@ impl FreedmanDiaconis } } -impl BinsBuildingStrategy for Auto +impl BinsBuildingStrategy for Auto where T: Ord + Clone + FromPrimitive + NumOps + Zero { + type Elem = T; + fn from_array(a: ArrayView1) -> Self { let fd_builder = FreedmanDiaconis::from_array(a.view()); From 5cb53b8dd5a3e45ca47b3b304dbe2532ae38556b Mon Sep 17 00:00:00 2001 From: LukeMathWalker Date: Tue, 13 Nov 2018 08:19:59 +0000 Subject: [PATCH 138/146] Added panics conditions. --- src/histogram/strategies.rs | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/src/histogram/strategies.rs b/src/histogram/strategies.rs index 6f985671..00ef1a4a 100644 --- a/src/histogram/strategies.rs +++ b/src/histogram/strategies.rs @@ -140,6 +140,7 @@ impl EquiSpaced where T: Ord + Clone + FromPrimitive + NumOps + Zero { + /// **Panics** if `bin_width<=0`. fn new(bin_width: T, min: T, max: T) -> Self { assert!(bin_width > T::zero()); @@ -177,6 +178,7 @@ impl BinsBuildingStrategy for Sqrt { type Elem = T; + /// **Panics** if the array is constant or if `a.len()==0` and division by 0 panics for `T`. fn from_array(a: ArrayView1) -> Self { let n_elems = a.len(); @@ -213,6 +215,7 @@ impl BinsBuildingStrategy for Rice { type Elem = T; + /// **Panics** if the array is constant or if `a.len()==0` and division by 0 panics for `T`. fn from_array(a: ArrayView1) -> Self { let n_elems = a.len(); @@ -249,6 +252,7 @@ impl BinsBuildingStrategy for Sturges { type Elem = T; + /// **Panics** if the array is constant or if `a.len()==0` and division by 0 panics for `T`. fn from_array(a: ArrayView1) -> Self { let n_elems = a.len(); @@ -285,6 +289,7 @@ impl BinsBuildingStrategy for FreedmanDiaconis { type Elem = T; + /// **Panics** if `IQR==0` or if `a.len()==0` and division by 0 panics for `T`. fn from_array(a: ArrayView1) -> Self { let n_points = a.len(); @@ -333,6 +338,8 @@ impl BinsBuildingStrategy for Auto { type Elem = T; + /// **Panics** if `IQR==0`, the array is constant or if + /// `a.len()==0` and division by 0 panics for `T`. fn from_array(a: ArrayView1) -> Self { let fd_builder = FreedmanDiaconis::from_array(a.view()); @@ -378,7 +385,12 @@ impl Auto } } -/// The bin width (or bin length) according to the fitted strategy. +/// Given a range (max, min) and the number of bins, it returns +/// the associated bin_width: +/// +/// `bin_width = (max - min)/n` +/// +/// **Panics** if division by 0 panics for `T`. fn compute_bin_width(min: T, max: T, n_bins: usize) -> T where T: Ord + Clone + FromPrimitive + NumOps + Zero, From b3c8e0b57f6f691eefc874346a206cf3e58ce6e4 Mon Sep 17 00:00:00 2001 From: Jim Turner Date: Sun, 18 Nov 2018 16:16:19 +0000 Subject: [PATCH 139/146] Update src/quantile.rs Co-Authored-By: LukeMathWalker --- src/quantile.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/quantile.rs b/src/quantile.rs index 3112d6ce..3e179539 100644 --- a/src/quantile.rs +++ b/src/quantile.rs @@ -460,7 +460,7 @@ impl QuantileExt1d for ArrayBase S: DataMut, I: Interpolate, { - self.quantile_axis_mut::(Axis(0), q)[()].clone() + self.quantile_axis_mut::(Axis(0), q).into_scalar() } } From af4a198540b2d0809e3255b95df8935df0402de7 Mon Sep 17 00:00:00 2001 From: LukeMathWalker Date: Sun, 18 Nov 2018 16:25:02 +0000 Subject: [PATCH 140/146] Added test for panic condition --- src/histogram/strategies.rs | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/src/histogram/strategies.rs b/src/histogram/strategies.rs index 00ef1a4a..f88ca931 100644 --- a/src/histogram/strategies.rs +++ b/src/histogram/strategies.rs @@ -399,3 +399,14 @@ where let bin_width = range / T::from_usize(n_bins).unwrap(); bin_width } + +#[cfg(test)] +mod equispaced_tests { + use super::*; + + #[should_panic] + #[test] + fn bin_width_has_to_be_positive() { + EquiSpaced::new(0, 0, 200); + } +} \ No newline at end of file From 45cea6a3fba6533469a853db8221f78712dd2d07 Mon Sep 17 00:00:00 2001 From: LukeMathWalker Date: Sun, 18 Nov 2018 16:35:02 +0000 Subject: [PATCH 141/146] For strategies, ask for a reference instead of a view --- src/histogram/grid.rs | 2 +- src/histogram/strategies.rs | 42 +++++++++++++++++++++++++++++-------- 2 files changed, 34 insertions(+), 10 deletions(-) diff --git a/src/histogram/grid.rs b/src/histogram/grid.rs index 328ee888..32a7161b 100644 --- a/src/histogram/grid.rs +++ b/src/histogram/grid.rs @@ -168,7 +168,7 @@ where { let bin_builders = array .axis_iter(Axis(1)) - .map(|data| B::from_array(data)) + .map(|data| B::from_array(&data)) .collect(); Self { bin_builders } } diff --git a/src/histogram/strategies.rs b/src/histogram/strategies.rs index f88ca931..f66f339a 100644 --- a/src/histogram/strategies.rs +++ b/src/histogram/strategies.rs @@ -19,6 +19,7 @@ //! [`GridBuilder`]: ../struct.GridBuilder.html //! [`NumPy`]: https://docs.scipy.org/doc/numpy/reference/generated/numpy.histogram_bin_edges.html#numpy.histogram_bin_edges use ndarray::prelude::*; +use ndarray::Data; use num_traits::{FromPrimitive, NumOps, Zero}; use super::super::{QuantileExt, QuantileExt1d}; use super::super::interpolate::Nearest; @@ -42,7 +43,9 @@ pub trait BinsBuildingStrategy /// that has learned the required parameter to build a collection of [`Bins`]. /// /// [`Bins`]: ../struct.Bins.html - fn from_array(array: ArrayView1) -> Self; + fn from_array(array: &ArrayBase) -> Self + where + S: Data; /// Returns a [`Bins`] instance, built accordingly to the parameters /// inferred from observations in [`from_array`]. @@ -179,7 +182,9 @@ impl BinsBuildingStrategy for Sqrt type Elem = T; /// **Panics** if the array is constant or if `a.len()==0` and division by 0 panics for `T`. - fn from_array(a: ArrayView1) -> Self + fn from_array(a: &ArrayBase) -> Self + where + S: Data { let n_elems = a.len(); let n_bins = (n_elems as f64).sqrt().round() as usize; @@ -216,7 +221,9 @@ impl BinsBuildingStrategy for Rice type Elem = T; /// **Panics** if the array is constant or if `a.len()==0` and division by 0 panics for `T`. - fn from_array(a: ArrayView1) -> Self + fn from_array(a: &ArrayBase) -> Self + where + S: Data { let n_elems = a.len(); let n_bins = (2. * (n_elems as f64).powf(1./3.)).round() as usize; @@ -253,7 +260,9 @@ impl BinsBuildingStrategy for Sturges type Elem = T; /// **Panics** if the array is constant or if `a.len()==0` and division by 0 panics for `T`. - fn from_array(a: ArrayView1) -> Self + fn from_array(a: &ArrayBase) -> Self + where + S: Data { let n_elems = a.len(); let n_bins = (n_elems as f64).log2().round() as usize + 1; @@ -290,7 +299,9 @@ impl BinsBuildingStrategy for FreedmanDiaconis type Elem = T; /// **Panics** if `IQR==0` or if `a.len()==0` and division by 0 panics for `T`. - fn from_array(a: ArrayView1) -> Self + fn from_array(a: &ArrayBase) -> Self + where + S: Data { let n_points = a.len(); @@ -340,10 +351,12 @@ impl BinsBuildingStrategy for Auto /// **Panics** if `IQR==0`, the array is constant or if /// `a.len()==0` and division by 0 panics for `T`. - fn from_array(a: ArrayView1) -> Self + fn from_array(a: &ArrayBase) -> Self + where + S: Data { - let fd_builder = FreedmanDiaconis::from_array(a.view()); - let sturges_builder = Sturges::from_array(a.view()); + let fd_builder = FreedmanDiaconis::from_array(&a); + let sturges_builder = Sturges::from_array(&a); let builder = { if fd_builder.bin_width() > sturges_builder.bin_width() { SturgesOrFD::Sturges(sturges_builder) @@ -409,4 +422,15 @@ mod equispaced_tests { fn bin_width_has_to_be_positive() { EquiSpaced::new(0, 0, 200); } -} \ No newline at end of file +} + +#[cfg(test)] +mod sqrt_tests { + use super::*; + + #[should_panic] + #[test] + fn constant_array_are_bad() { + Sqrt::from_array(&array![1, 1, 1, 1, 1, 1, 1]); + } +} From fe943e5a7195bff5fee9eef5cff43faccd4afd50 Mon Sep 17 00:00:00 2001 From: LukeMathWalker Date: Sun, 18 Nov 2018 16:38:04 +0000 Subject: [PATCH 142/146] Test panics for Sqrt --- src/histogram/strategies.rs | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/histogram/strategies.rs b/src/histogram/strategies.rs index f66f339a..85e36be9 100644 --- a/src/histogram/strategies.rs +++ b/src/histogram/strategies.rs @@ -433,4 +433,10 @@ mod sqrt_tests { fn constant_array_are_bad() { Sqrt::from_array(&array![1, 1, 1, 1, 1, 1, 1]); } + + #[should_panic] + #[test] + fn empty_arrays_cause_panic() { + let _: Sqrt = Sqrt::from_array(&array![]); + } } From bd2570ba502fed1722874a6eee1d61828a5a0fd6 Mon Sep 17 00:00:00 2001 From: LukeMathWalker Date: Sun, 18 Nov 2018 16:39:38 +0000 Subject: [PATCH 143/146] Test Rice panics --- src/histogram/strategies.rs | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/src/histogram/strategies.rs b/src/histogram/strategies.rs index 85e36be9..3d4978ff 100644 --- a/src/histogram/strategies.rs +++ b/src/histogram/strategies.rs @@ -440,3 +440,20 @@ mod sqrt_tests { let _: Sqrt = Sqrt::from_array(&array![]); } } + +#[cfg(test)] +mod rice_tests { + use super::*; + + #[should_panic] + #[test] + fn constant_array_are_bad() { + Rice::from_array(&array![1, 1, 1, 1, 1, 1, 1]); + } + + #[should_panic] + #[test] + fn empty_arrays_cause_panic() { + let _: Rice = Rice::from_array(&array![]); + } +} From 9cfc01320e827e6b6a9d2619cbc4466d5f58353d Mon Sep 17 00:00:00 2001 From: LukeMathWalker Date: Sun, 18 Nov 2018 16:40:20 +0000 Subject: [PATCH 144/146] Test Sturges panics --- src/histogram/strategies.rs | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/src/histogram/strategies.rs b/src/histogram/strategies.rs index 3d4978ff..a7cd3a1c 100644 --- a/src/histogram/strategies.rs +++ b/src/histogram/strategies.rs @@ -457,3 +457,20 @@ mod rice_tests { let _: Rice = Rice::from_array(&array![]); } } + +#[cfg(test)] +mod sturges_tests { + use super::*; + + #[should_panic] + #[test] + fn constant_array_are_bad() { + Sturges::from_array(&array![1, 1, 1, 1, 1, 1, 1]); + } + + #[should_panic] + #[test] + fn empty_arrays_cause_panic() { + let _: Sturges = Sturges::from_array(&array![]); + } +} From 77be941b6773093fa19c23be3bbb3789f32408d4 Mon Sep 17 00:00:00 2001 From: LukeMathWalker Date: Sun, 18 Nov 2018 16:41:45 +0000 Subject: [PATCH 145/146] Test FreedmanDiaconis panics --- src/histogram/strategies.rs | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/src/histogram/strategies.rs b/src/histogram/strategies.rs index a7cd3a1c..d1d077d4 100644 --- a/src/histogram/strategies.rs +++ b/src/histogram/strategies.rs @@ -474,3 +474,26 @@ mod sturges_tests { let _: Sturges = Sturges::from_array(&array![]); } } + +#[cfg(test)] +mod fd_tests { + use super::*; + + #[should_panic] + #[test] + fn constant_array_are_bad() { + FreedmanDiaconis::from_array(&array![1, 1, 1, 1, 1, 1, 1]); + } + + #[should_panic] + #[test] + fn zero_iqr_causes_panic() { + FreedmanDiaconis::from_array(&array![-20, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 20]); + } + + #[should_panic] + #[test] + fn empty_arrays_cause_panic() { + let _: FreedmanDiaconis = FreedmanDiaconis::from_array(&array![]); + } +} From e66f8a64c01140dbc49f9a15dfd340565cbee096 Mon Sep 17 00:00:00 2001 From: LukeMathWalker Date: Sun, 18 Nov 2018 16:42:19 +0000 Subject: [PATCH 146/146] Tested Auto panics --- src/histogram/strategies.rs | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/src/histogram/strategies.rs b/src/histogram/strategies.rs index d1d077d4..d25eb2c3 100644 --- a/src/histogram/strategies.rs +++ b/src/histogram/strategies.rs @@ -497,3 +497,26 @@ mod fd_tests { let _: FreedmanDiaconis = FreedmanDiaconis::from_array(&array![]); } } + +#[cfg(test)] +mod auto_tests { + use super::*; + + #[should_panic] + #[test] + fn constant_array_are_bad() { + Auto::from_array(&array![1, 1, 1, 1, 1, 1, 1]); + } + + #[should_panic] + #[test] + fn zero_iqr_causes_panic() { + Auto::from_array(&array![-20, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 20]); + } + + #[should_panic] + #[test] + fn empty_arrays_cause_panic() { + let _: Auto = Auto::from_array(&array![]); + } +}