diff --git a/src/x86/cpuid.rs b/src/x86/cpuid.rs new file mode 100644 index 0000000000..16267becf5 --- /dev/null +++ b/src/x86/cpuid.rs @@ -0,0 +1,120 @@ +//! `cpuid` intrinsics + +#[cfg(test)] +use stdsimd_test::assert_instr; + +/// Result of the `cpuid` instruction. +#[derive(Copy, Clone, Eq, Ord, PartialEq, PartialOrd)] +#[cfg_attr(feature = "cargo-clippy", allow(stutter))] +pub struct CpuidResult { + /// EAX register. + pub eax: u32, + /// EBX register. + pub ebx: u32, + /// ECX register. + pub ecx: u32, + /// EDX register. + pub edx: u32, +} + +/// `cpuid` instruction. +/// +/// The [CPUID Wikipedia page][wiki_cpuid] contains how to query which +/// information using the `eax` and `ecx` registers, and the format in +/// which this information is returned in `eax...edx`. +/// +/// The `has_cpuid()` intrinsics can be used to query whether the `cpuid` +/// instruction is available. +/// +/// The definitive references are: +/// - [Intel 64 and IA-32 Architectures Software Developer's Manual Volume 2: +/// Instruction Set Reference, A-Z][intel64_ref]. +/// - [AMD64 Architecture Programmer's Manual, Volume 3: General-Purpose and +/// System Instructions][amd64_ref]. +/// +/// [wiki_cpuid]: https://en.wikipedia.org/wiki/CPUID +/// [intel64_ref]: http://www.intel.de/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf +/// [amd64_ref]: http://support.amd.com/TechDocs/24594.pdf +#[inline(always)] +#[cfg_attr(test, assert_instr(cpuid))] +#[cfg_attr(feature = "cargo-clippy", allow(stutter))] +pub unsafe fn __cpuid_count(eax: u32, ecx: u32) -> CpuidResult { + let mut r = ::std::mem::uninitialized::(); + asm!("cpuid" + : "={eax}"(r.eax), "={ebx}"(r.ebx), "={ecx}"(r.ecx), "={edx}"(r.edx) + : "{eax}"(eax), "{ecx}"(ecx) + : :); + r +} + +/// `cpuid` instruction. +/// +/// See `__cpuid_count`. +#[inline(always)] +#[cfg_attr(test, assert_instr(cpuid))] +#[cfg_attr(feature = "cargo-clippy", allow(stutter))] +pub unsafe fn __cpuid(eax: u32) -> CpuidResult { + __cpuid_count(eax, 0) +} + +/// Does the host support the `cpuid` instruction? +#[inline(always)] +#[cfg_attr(feature = "cargo-clippy", allow(stutter))] +pub fn has_cpuid() -> bool { + #[cfg(target_arch = "x86_64")] + { + true + } + #[cfg(target_arch = "x86")] + { + use super::ia32::{__readeflags, __writeeflags}; + + // On `x86` the `cpuid` instruction is not always available. + // This follows the approach indicated in: + // http://wiki.osdev.org/CPUID#Checking_CPUID_availability + unsafe { + // Read EFLAGS: + let eflags: u32 = __readeflags(); + + // Invert the ID bit in EFLAGS: + let eflags_mod: u32 = eflags | 0x0020_0000; + + // Store the modified EFLAGS (ID bit may or may not be inverted) + __writeeflags(eflags_mod); + + // Read EFLAGS again: + let eflags_after: u32 = __readeflags(); + + // Check if the ID bit changed: + eflags_after != eflags + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_always_has_cpuid() { + // all currently-tested targets have the instruction + // FIXME: add targets without `cpuid` to CI + assert!(has_cpuid()); + } + + #[cfg(target_arch = "x86")] + #[test] + fn test_has_cpuid() { + use vendor::__readeflags; + unsafe { + let before = __readeflags(); + + if has_cpuid() { + assert!(before != __readeflags()); + } else { + assert!(before == __readeflags()); + } + } + } + +} diff --git a/src/x86/ia32.rs b/src/x86/ia32.rs new file mode 100644 index 0000000000..ac7ab8b080 --- /dev/null +++ b/src/x86/ia32.rs @@ -0,0 +1,50 @@ +//! `i386/ia32` intrinsics + +/// Reads EFLAGS. +#[cfg(target_arch = "x86")] +#[inline(always)] +pub unsafe fn __readeflags() -> u32 { + let eflags: u32; + asm!("pushfd; popl $0" : "=r"(eflags) : : : "volatile"); + eflags +} + +/// Reads EFLAGS. +#[cfg(target_arch = "x86_64")] +#[inline(always)] +pub unsafe fn __readeflags() -> u64 { + let eflags: u64; + asm!("pushfq; popq $0" : "=r"(eflags) : : : "volatile"); + eflags +} + +/// Write EFLAGS. +#[cfg(target_arch = "x86")] +#[inline(always)] +pub unsafe fn __writeeflags(eflags: u32) { + asm!("pushl $0; popfd" : : "r"(eflags) : "cc", "flags" : "volatile"); +} + +/// Write EFLAGS. +#[cfg(target_arch = "x86_64")] +#[inline(always)] +pub unsafe fn __writeeflags(eflags: u64) { + asm!("pushq $0; popfq" : : "r"(eflags) : "cc", "flags" : "volatile"); +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_eflags() { + unsafe { + // reads eflags, writes them back, reads them again, + // and compare for equality: + let v = __readeflags(); + __writeeflags(v); + let u = __readeflags(); + assert_eq!(v, u); + } + } +} diff --git a/src/x86/mod.rs b/src/x86/mod.rs index ba84f9d890..66e9eadd16 100644 --- a/src/x86/mod.rs +++ b/src/x86/mod.rs @@ -1,5 +1,9 @@ //! `x86` and `x86_64` intrinsics. +pub use self::ia32::*; +pub use self::cpuid::*; +pub use self::xsave::*; + pub use self::sse::*; pub use self::sse2::*; pub use self::sse3::*; @@ -28,6 +32,10 @@ mod macros; #[macro_use] mod runtime; +mod ia32; +mod cpuid; +mod xsave; + mod sse; mod sse2; mod sse3; diff --git a/src/x86/runtime.rs b/src/x86/runtime.rs index 4b7e3aa56e..e3809bc0ed 100644 --- a/src/x86/runtime.rs +++ b/src/x86/runtime.rs @@ -159,31 +159,37 @@ fn test_bit(x: usize, bit: u32) -> bool { /// [intel64_ref]: http://www.intel.de/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf /// [amd64_ref]: http://support.amd.com/TechDocs/24594.pdf fn detect_features() -> usize { - let extended_features_ebx; - let proc_info_ecx; - let proc_info_edx; + use super::cpuid::{__cpuid, has_cpuid, CpuidResult}; + let mut value: usize = 0; - unsafe { - /// To obtain all feature flags we need two CPUID queries: + // If the x86 CPU does not support the CPUID instruction then it is too + // old to support any of the currently-detectable features. + if !has_cpuid() { + return value; + } - /// 1. EAX=1, ECX=0: Queries "Processor Info and Feature Bits" - /// This gives us most of the CPU features in ECX and EDX (see - /// below). - asm!("cpuid" - : "={ecx}"(proc_info_ecx), "={edx}"(proc_info_edx) - : "{eax}"(0x0000_0001_u32), "{ecx}"(0 as u32) - : :); + // Calling `cpuid` from here on is safe because the CPU has the `cpuid` + // instruction. - /// 2. EAX=7, ECX=0: Queries "Extended Features" - /// This gives us information about bmi,bmi2, and avx2 support - /// (see below); the result in ECX is not currently needed. - asm!("cpuid" - : "={ebx}"(extended_features_ebx) - : "{eax}"(0x0000_0007_u32), "{ecx}"(0 as u32) - : :); - } + // 1. EAX=1, ECX=0: Queries "Processor Info and Feature Bits"; + // Contains information about most x86 features. + let CpuidResult { + ecx: proc_info_ecx, + edx: proc_info_edx, + .. + } = unsafe { __cpuid(0x0000_0001_u32) }; - let mut value: usize = 0; + // 2. EAX=7, ECX=0: Queries "Extended Features"; + // Contains information about bmi,bmi2, and avx2 support. + let CpuidResult { + ebx: extended_features_ebx, + .. + } = unsafe { __cpuid(0x0000_0007_u32) }; + + let proc_info_ecx = proc_info_ecx as usize; + let proc_info_edx = proc_info_edx as usize; + + let extended_features_ebx = extended_features_ebx as usize; if test_bit(extended_features_ebx, 3) { value = set_bit(value, __Feature::bmi as u32); @@ -233,21 +239,10 @@ fn detect_features() -> usize { // org/mozilla-central/file/64bab5cbb9b6/mozglue/build/SSE.cpp#l190 // if test_bit(proc_info_ecx, 26) && test_bit(proc_info_ecx, 27) { - /// XGETBV: reads the contents of the extended control - /// register (XCR). - unsafe fn xgetbv(xcr_no: u32) -> u64 { - let eax: u32; - let edx: u32; - // xgetbv - asm!("xgetbv" - : "={eax}"(eax), "={edx}"(edx) - : "{ecx}"(xcr_no) - : :); - ((edx as u64) << 32) | (eax as u64) - } + use super::xsave::_xgetbv; // This is safe because on x86 `xgetbv` is always available. - if unsafe { xgetbv(0) } & 6 == 6 { + if unsafe { _xgetbv(0) } & 6 == 6 { if test_bit(proc_info_ecx, 28) { value = set_bit(value, __Feature::avx as u32); } diff --git a/src/x86/sse2.rs b/src/x86/sse2.rs index 9d2ee6f47e..104f43cf16 100644 --- a/src/x86/sse2.rs +++ b/src/x86/sse2.rs @@ -1792,7 +1792,9 @@ pub unsafe fn _mm_cvtsd_si64(a: f64x2) -> i64 { #[inline(always)] #[target_feature = "+sse2"] #[cfg_attr(test, assert_instr(cvtsd2si))] -pub unsafe fn _mm_cvtsd_si64x(a: f64x2) -> i64 { _mm_cvtsd_si64(a) } +pub unsafe fn _mm_cvtsd_si64x(a: f64x2) -> i64 { + _mm_cvtsd_si64(a) +} /// Convert the lower double-precision (64-bit) floating-point element in `b` /// to a single-precision (32-bit) floating-point element, store the result in @@ -1857,7 +1859,9 @@ pub unsafe fn _mm_cvttsd_si64(a: f64x2) -> i64 { #[inline(always)] #[target_feature = "+sse2"] #[cfg_attr(test, assert_instr(cvttsd2si))] -pub unsafe fn _mm_cvttsd_si64x(a: f64x2) -> i64 { _mm_cvttsd_si64(a) } +pub unsafe fn _mm_cvttsd_si64x(a: f64x2) -> i64 { + _mm_cvttsd_si64(a) +} /// Convert packed single-precision (32-bit) floating-point elements in `a` to /// packed 32-bit integers with truncation. diff --git a/src/x86/xsave.rs b/src/x86/xsave.rs new file mode 100644 index 0000000000..4458b00201 --- /dev/null +++ b/src/x86/xsave.rs @@ -0,0 +1,22 @@ +//! `xsave` target feature intrinsics + +#[cfg(test)] +use stdsimd_test::assert_instr; + +/// Reads the contents of the extended control register `XCR` +/// specified in `xcr_no`. +#[inline(always)] +// #[target_feature = "+xsave"] // FIXME: see +// https://github.com/rust-lang-nursery/stdsimd/issues/167 +#[cfg_attr(test, assert_instr(xgetbv))] +pub unsafe fn _xgetbv(xcr_no: u32) -> u64 { + let eax: u32; + let edx: u32; + + asm!("xgetbv" + : "={eax}"(eax), "={edx}"(edx) + : "{ecx}"(xcr_no) + : :); + + ((edx as u64) << 32) | (eax as u64) +}