From 5e6a6b40a712d4699c55624e72a6a6e8192f3417 Mon Sep 17 00:00:00 2001 From: Richard Janis Goldschmidt Date: Sat, 18 Mar 2017 18:44:45 +0100 Subject: [PATCH 1/4] Check if all array elements are equidistant --- src/dimension/dimension_trait.rs | 25 +++++++++++++++++++------ tests/dimension.rs | 12 ++++++++++++ 2 files changed, 31 insertions(+), 6 deletions(-) diff --git a/src/dimension/dimension_trait.rs b/src/dimension/dimension_trait.rs index 88b806658..4686ce0c1 100644 --- a/src/dimension/dimension_trait.rs +++ b/src/dimension/dimension_trait.rs @@ -269,20 +269,33 @@ pub unsafe trait Dimension : Clone + Eq + Debug + Send + Sync + Default + return true; } if dim.ndim() == 1 { return false; } + + match Self::equispaced_stride(dim, strides) { + Some(1) => true, + _ => false, + } + } + + /// Return the equispaced stride between all the array elements. + /// + /// Returns `Some(n)` if the strides in all dimensions are equispaced. Returns `None` if not. + #[doc(hidden)] + fn equispaced_stride(dim: &Self, strides: &Self) -> Option { let order = strides._fastest_varying_stride_order(); - let strides = strides.slice(); + let base_stride = strides[order[0]]; // FIXME: Negative strides let dim_slice = dim.slice(); - let mut cstride = 1; + let mut next_stride = base_stride; + let strides = strides.slice(); for &i in order.slice() { // a dimension of length 1 can have unequal strides - if dim_slice[i] != 1 && strides[i] != cstride { - return false; + if dim_slice[i] != 1 && strides[i] != next_stride { + return None; } - cstride *= dim_slice[i]; + next_stride *= dim_slice[i]; } - true + Some(base_stride) } /// Return the axis ordering corresponding to the fastest variation diff --git a/tests/dimension.rs b/tests/dimension.rs index 37d366700..b68fa32de 100644 --- a/tests/dimension.rs +++ b/tests/dimension.rs @@ -42,6 +42,18 @@ fn dyn_dimension() assert_eq!(z.shape(), &dim[..]); } +#[test] +fn equidistance_strides() { + let strides = Dim([4,2,1]); + assert_eq!(Dimension::equispaced_stride(&Dim([2,2,2]), &strides), Some(1)); + + let strides = Dim([8,4,2]); + assert_eq!(Dimension::equispaced_stride(&Dim([2,2,2]), &strides), Some(2)); + + let strides = Dim([16,4,1]); + assert_eq!(Dimension::equispaced_stride(&Dim([2,2,2]), &strides), None); +} + #[test] fn fastest_varying_order() { let strides = Dim([2, 8, 4, 1]); From 35a7dc6e00f40f897e24d4aa4955863e6b53856f Mon Sep 17 00:00:00 2001 From: Richard Janis Goldschmidt Date: Sat, 18 Mar 2017 23:33:39 +0100 Subject: [PATCH 2/4] Use axpy for scaled_add --- benches/bench1.rs | 13 +++++ src/dimension/dimension_trait.rs | 5 +- src/linalg/impl_linalg.rs | 83 ++++++++++++++++++++++++++++++++ tests/oper.rs | 16 ++++++ 4 files changed, 115 insertions(+), 2 deletions(-) diff --git a/benches/bench1.rs b/benches/bench1.rs index 7d948a6a7..173309187 100644 --- a/benches/bench1.rs +++ b/benches/bench1.rs @@ -1,6 +1,7 @@ #![feature(test)] #![allow(unused_imports)] +extern crate blas_sys; extern crate test; #[macro_use(s)] extern crate ndarray; @@ -468,6 +469,18 @@ fn scaled_add_2d_f32_regular(bench: &mut test::Bencher) }); } +#[bench] +fn scaled_add_2d_f32_axpy(bench: &mut test::Bencher) +{ + let mut av = Array::::zeros((64, 64)); + let bv = Array::::zeros((64, 64)); + let scalar = 3.1415926535; + bench.iter(|| { + av.scaled_add_axpy(scalar, &bv); + }); +} + + #[bench] fn assign_scalar_2d_corder(bench: &mut test::Bencher) { diff --git a/src/dimension/dimension_trait.rs b/src/dimension/dimension_trait.rs index 4686ce0c1..dabeb1cc4 100644 --- a/src/dimension/dimension_trait.rs +++ b/src/dimension/dimension_trait.rs @@ -280,7 +280,7 @@ pub unsafe trait Dimension : Clone + Eq + Debug + Send + Sync + Default + /// /// Returns `Some(n)` if the strides in all dimensions are equispaced. Returns `None` if not. #[doc(hidden)] - fn equispaced_stride(dim: &Self, strides: &Self) -> Option { + fn equispaced_stride(dim: &Self, strides: &Self) -> Option { let order = strides._fastest_varying_stride_order(); let base_stride = strides[order[0]]; @@ -295,7 +295,8 @@ pub unsafe trait Dimension : Clone + Eq + Debug + Send + Sync + Default + } next_stride *= dim_slice[i]; } - Some(base_stride) + + Some(base_stride as isize) } /// Return the axis ordering corresponding to the fastest variation diff --git a/src/linalg/impl_linalg.rs b/src/linalg/impl_linalg.rs index 1662f5c37..4d7ca0e1f 100644 --- a/src/linalg/impl_linalg.rs +++ b/src/linalg/impl_linalg.rs @@ -295,6 +295,60 @@ impl ArrayBase { self.zip_mut_with(rhs, move |y, &x| *y = *y + (alpha * x)); } + + /// Perform the operation `self += alpha * rhs` efficiently, where + /// `alpha` is a scalar and `rhs` is another array. This operation is + /// also known as `axpy` in BLAS. + /// + /// If their shapes disagree, `rhs` is broadcast to the shape of `self`. + /// + /// **Panics** if broadcasting isn’t possible. + #[cfg(feature="blas")] + pub fn scaled_add_axpy(&mut self, alpha: A, rhs: &ArrayBase) + where S: DataMut, + S2: Data, + A: LinalgScalar + ::std::fmt::Debug, + E: Dimension, + { + debug_assert_eq!(self.len(), rhs.len()); + assert!(self.len() == rhs.len()); + + { + macro_rules! axpy { + ($ty:ty, $func:ident) => {{ + if blas_compat::<$ty, _, _>(self) && blas_compat::<$ty, _, _>(rhs) { + let order = Dimension::_fastest_varying_stride_order(&self.strides); + let incx = self.strides()[order[0]]; + + let order = Dimension::_fastest_varying_stride_order(&rhs.strides); + let incy = self.strides()[order[0]]; + + unsafe { + let (lhs_ptr, n, incx) = blas_1d_params(self.ptr, + self.len(), + incx); + let (rhs_ptr, _, incy) = blas_1d_params(rhs.ptr, + rhs.len(), + incy); + blas_sys::c::$func( + n, + cast_as(&alpha), + rhs_ptr as *const $ty, + incy, + lhs_ptr as *mut $ty, + incx); + return; + } + } + }} + } + + axpy!{f32, cblas_saxpy}; + axpy!{f64, cblas_daxpy}; + } + + self.scaled_add(alpha, rhs); + } } // mat_mul_impl uses ArrayView arguments to send all array kinds into @@ -531,6 +585,35 @@ fn blas_compat_1d(a: &ArrayBase) -> bool true } +#[cfg(feature="blas")] +fn blas_compat(a: &ArrayBase) -> bool + where S: Data, + A: 'static, + S::Elem: 'static, + D: Dimension, +{ + if !same_type::() { + return false; + } + + if a.len() > blas_index::max_value() as usize { + return false; + } + + match D::equispaced_stride(&a.raw_dim(), &a.strides) { + Some(stride) => { + if stride > blas_index::max_value() as isize || + stride < blas_index::min_value() as isize { + return false; + } + }, + None => { + return false; + } + } + true +} + #[cfg(feature="blas")] fn blas_row_major_2d(a: &ArrayBase) -> bool where S: Data, diff --git a/tests/oper.rs b/tests/oper.rs index a8f8dffac..ed9684fad 100644 --- a/tests/oper.rs +++ b/tests/oper.rs @@ -459,6 +459,22 @@ fn scaled_add() { } +#[cfg(blas)] +#[test] +fn scaled_add_axpy() { + let a = range_mat(16, 15); + let mut b = range_mat(16, 15); + b.mapv_inplace(f32::exp); + + let alpha = 0.2_f32; + let mut c = a.clone(); + c.scaled_add(alpha, &b); + + let mut d = a.clone(); + d.scaled_add_axpy(alpha, &b); + assert_eq!(c, d); +} + #[test] fn gen_mat_mul() { let alpha = -2.3; From 38cdcc3babc46fa09b845ea6aa7c92abcc9e8fe5 Mon Sep 17 00:00:00 2001 From: Richard Janis Goldschmidt Date: Sun, 19 Mar 2017 11:18:28 +0100 Subject: [PATCH 3/4] Move axpy into a generic scaled_add interface --- benches/bench1.rs | 12 ----------- src/linalg/impl_linalg.rs | 44 ++++++++++++++++++++++----------------- tests/oper.rs | 16 -------------- 3 files changed, 25 insertions(+), 47 deletions(-) diff --git a/benches/bench1.rs b/benches/bench1.rs index 173309187..68d3da4bc 100644 --- a/benches/bench1.rs +++ b/benches/bench1.rs @@ -1,7 +1,6 @@ #![feature(test)] #![allow(unused_imports)] -extern crate blas_sys; extern crate test; #[macro_use(s)] extern crate ndarray; @@ -469,17 +468,6 @@ fn scaled_add_2d_f32_regular(bench: &mut test::Bencher) }); } -#[bench] -fn scaled_add_2d_f32_axpy(bench: &mut test::Bencher) -{ - let mut av = Array::::zeros((64, 64)); - let bv = Array::::zeros((64, 64)); - let scalar = 3.1415926535; - bench.iter(|| { - av.scaled_add_axpy(scalar, &bv); - }); -} - #[bench] fn assign_scalar_2d_corder(bench: &mut test::Bencher) diff --git a/src/linalg/impl_linalg.rs b/src/linalg/impl_linalg.rs index 4d7ca0e1f..be1c32046 100644 --- a/src/linalg/impl_linalg.rs +++ b/src/linalg/impl_linalg.rs @@ -292,27 +292,38 @@ impl ArrayBase S2: Data, A: LinalgScalar, E: Dimension, + { + self.scaled_add_impl(alpha, rhs); + } + + fn scaled_add_generic(&mut self, alpha: A, rhs: &ArrayBase) + where S: DataMut, + S2: Data, + A: LinalgScalar, + E: Dimension, { self.zip_mut_with(rhs, move |y, &x| *y = *y + (alpha * x)); } - /// Perform the operation `self += alpha * rhs` efficiently, where - /// `alpha` is a scalar and `rhs` is another array. This operation is - /// also known as `axpy` in BLAS. - /// - /// If their shapes disagree, `rhs` is broadcast to the shape of `self`. - /// - /// **Panics** if broadcasting isn’t possible. - #[cfg(feature="blas")] - pub fn scaled_add_axpy(&mut self, alpha: A, rhs: &ArrayBase) + #[cfg(not(feature = "blas"))] + fn scaled_add_impl(&mut self, alpha: A, rhs: &ArrayBase) where S: DataMut, S2: Data, - A: LinalgScalar + ::std::fmt::Debug, + A: LinalgScalar, + E: Dimension, + { + self.scaled_add_generic(alpha, rhs); + } + + #[cfg(feature = "blas")] + fn scaled_add_impl(&mut self, alpha: A, rhs: &ArrayBase) + where S: DataMut, + S2: Data, + A: LinalgScalar, E: Dimension, { debug_assert_eq!(self.len(), rhs.len()); assert!(self.len() == rhs.len()); - { macro_rules! axpy { ($ty:ty, $func:ident) => {{ @@ -342,12 +353,10 @@ impl ArrayBase } }} } - axpy!{f32, cblas_saxpy}; axpy!{f64, cblas_daxpy}; } - - self.scaled_add(alpha, rhs); + self.scaled_add_generic(alpha, rhs); } } @@ -596,13 +605,9 @@ fn blas_compat(a: &ArrayBase) -> bool return false; } - if a.len() > blas_index::max_value() as usize { - return false; - } - match D::equispaced_stride(&a.raw_dim(), &a.strides) { Some(stride) => { - if stride > blas_index::max_value() as isize || + if a.len() as isize * stride > blas_index::max_value() as isize || stride < blas_index::min_value() as isize { return false; } @@ -611,6 +616,7 @@ fn blas_compat(a: &ArrayBase) -> bool return false; } } + true } diff --git a/tests/oper.rs b/tests/oper.rs index ed9684fad..a8f8dffac 100644 --- a/tests/oper.rs +++ b/tests/oper.rs @@ -459,22 +459,6 @@ fn scaled_add() { } -#[cfg(blas)] -#[test] -fn scaled_add_axpy() { - let a = range_mat(16, 15); - let mut b = range_mat(16, 15); - b.mapv_inplace(f32::exp); - - let alpha = 0.2_f32; - let mut c = a.clone(); - c.scaled_add(alpha, &b); - - let mut d = a.clone(); - d.scaled_add_axpy(alpha, &b); - assert_eq!(c, d); -} - #[test] fn gen_mat_mul() { let alpha = -2.3; From d1f50b0d3cc66020cd2ad3c544d3c9a14f233b58 Mon Sep 17 00:00:00 2001 From: Richard Janis Goldschmidt Date: Tue, 21 Mar 2017 12:21:14 +0100 Subject: [PATCH 4/4] Add bench for scaled add --- benches/bench1.rs | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/benches/bench1.rs b/benches/bench1.rs index 68d3da4bc..218dbefca 100644 --- a/benches/bench1.rs +++ b/benches/bench1.rs @@ -457,17 +457,31 @@ fn iadd_2d_strided(bench: &mut test::Bencher) }); } +const SCALE_ADD_SZ: usize = 64; + #[bench] fn scaled_add_2d_f32_regular(bench: &mut test::Bencher) { - let mut av = Array::::zeros((64, 64)); - let bv = Array::::zeros((64, 64)); + let mut av = Array::::zeros((SCALE_ADD_SZ, SCALE_ADD_SZ)); + let bv = Array::::zeros((SCALE_ADD_SZ, SCALE_ADD_SZ)); let scalar = 3.1415926535; bench.iter(|| { av.scaled_add(scalar, &bv); }); } +#[bench] +fn scaled_add_2d_f32_stride(bench: &mut test::Bencher) +{ + let mut av = Array::::zeros((SCALE_ADD_SZ, 2 * SCALE_ADD_SZ)); + let bv = Array::::zeros((SCALE_ADD_SZ, 2 * SCALE_ADD_SZ)); + let mut av = av.slice_mut(s![.., ..;2]); + let bv = bv.slice(s![.., ..;2]); + let scalar = 3.1415926535; + bench.iter(|| { + av.scaled_add(scalar, &bv); + }); +} #[bench] fn assign_scalar_2d_corder(bench: &mut test::Bencher)