From a16d5768313fc27ce0e673adb1ddbed514aac47b Mon Sep 17 00:00:00 2001 From: Jason White Date: Thu, 27 Oct 2022 10:56:09 -0700 Subject: [PATCH] Fix perf timer related things on aarch64 Summary: To do proper feature detection on the CPU to know if we can get perf events, there is more work to do here. See the comment on `get_rcb_perf_config` for more info. Reviewed By: VladimirMakaev Differential Revision: D40701837 fbshipit-source-id: 4c7a7c00be0ab14e4fe9f4be09c8094567d5d031 --- reverie-ptrace/src/perf.rs | 25 +++++++++++++-- reverie-ptrace/src/timer.rs | 52 +++++++++++++++++++++++--------- reverie-ptrace/src/validation.rs | 44 +++++++++++---------------- 3 files changed, 78 insertions(+), 43 deletions(-) diff --git a/reverie-ptrace/src/perf.rs b/reverie-ptrace/src/perf.rs index 232ea29..390e96e 100644 --- a/reverie-ptrace/src/perf.rs +++ b/reverie-ptrace/src/perf.rs @@ -503,10 +503,9 @@ unsafe fn read_once(v: *mut u32) -> u32 { } #[inline(always)] -#[cfg(any(target_arch = "x86", target_arch = "x86_64"))] fn smp_rmb() { - use std::sync::atomic::compiler_fence; - use std::sync::atomic::Ordering::SeqCst; + use core::sync::atomic::compiler_fence; + use core::sync::atomic::Ordering::SeqCst; compiler_fence(SeqCst); } @@ -589,6 +588,21 @@ pub fn do_branches(mut count: u64) { assert_eq!(count, 0); } +#[cfg(target_arch = "aarch64")] +#[inline(never)] +pub fn do_branches(mut count: u64) { + unsafe { + core::arch::asm!( + "2:", + "sub {0}, {0}, #0x1", + "b.ne 2b", + inout(reg) count, + ) + } + + assert_eq!(count, 0); +} + /// Perform exactly `count+1` conditional branch instructions. Useful for /// testing timer-related code. #[cfg(target_arch = "x86_64")] @@ -618,6 +632,11 @@ mod test { use super::*; + #[test] + fn test_do_branches() { + do_branches(1000); + } + #[test] fn trace_self() { ret_without_perf!(); diff --git a/reverie-ptrace/src/timer.rs b/reverie-ptrace/src/timer.rs index 7aa102d..2d81726 100644 --- a/reverie-ptrace/src/timer.rs +++ b/reverie-ptrace/src/timer.rs @@ -29,7 +29,6 @@ //! - before resumption of the guest, //! which _usually_ means immediately after the tool callback returns. -use raw_cpuid::CpuId; use reverie::Errno; use reverie::Pid; use reverie::Signal; @@ -47,11 +46,9 @@ use crate::perf::*; // This signal is unused, in that the kernel will never send it to a process. const MARKER_SIGNAL: Signal = reverie::PERF_EVENT_SIGNAL; -pub(crate) const AMD_VENDOR: &str = "AuthenticAMD"; -pub(crate) const INTEL_VENDOR: &str = "GenuineIntel"; - +#[cfg(target_arch = "x86_64")] pub(crate) fn get_rcb_perf_config() -> u64 { - let c = CpuId::new(); + let c = raw_cpuid::CpuId::new(); let fi = c.get_feature_info().unwrap(); // based on rr's PerfCounters_x86.h and PerfCounters.cc match (fi.family_id(), fi.model_id()) { @@ -75,6 +72,41 @@ pub(crate) fn get_rcb_perf_config() -> u64 { } } +#[cfg(target_arch = "aarch64")] +pub(crate) fn get_rcb_perf_config() -> u64 { + // TODO: + // 1. Compute the microarchitecture from + // `/sys/devices/system/cpu/cpu*/regs/identification/midr_el1` + // 2. Look up the microarchitecture in a table to determine what features + // we can enable. + // References: + // - https://github.com/rr-debugger/rr/blob/master/src/PerfCounters.cc#L156 + const BR_RETIRED: u64 = 0x21; + + // For now, always assume that we can get retired branch events. + BR_RETIRED +} + +/// Returns true if the current CPU supports precise_ip. +#[cfg(target_arch = "x86_64")] +pub(crate) fn has_precise_ip() -> bool { + let cpu = raw_cpuid::CpuId::new(); + let has_debug_store = cpu.get_feature_info().map_or(false, |info| info.has_ds()); + + debug!( + "Setting precise_ip to {} for cpu {:?}", + has_debug_store, cpu + ); + + has_debug_store +} + +#[cfg(target_arch = "aarch64")] +pub(crate) fn has_precise_ip() -> bool { + // Assume, for now, that aarch64 can use precise_ip. + true +} + /// A timer monitoring a single thread. The underlying implementation is eagerly /// initialized, but left empty if perf is not supported. In that case, any /// methods with semantics that require a functioning clock or timer will panic. @@ -356,9 +388,6 @@ const MAX_SINGLE_STEP_COUNT: u64 = SKID_MARGIN_RCBS + SINGLESTEP_TIMEOUT_RCBS; impl TimerImpl { pub fn new(guest_pid: Pid, guest_tid: Tid) -> Result { - let cpu = CpuId::new(); - let has_debug_store = cpu.get_feature_info().map_or(false, |info| info.has_ds()); - let evt = Event::Raw(get_rcb_perf_config()); // measure the target tid irrespective of CPU @@ -367,12 +396,7 @@ impl TimerImpl { .sample_period(PerfCounter::DISABLE_SAMPLE_PERIOD) .event(evt); - // Check if we can set precise_ip = 1 by checking if debug store is enabled. - debug!( - "Setting precise_ip to {} for cpu {:?}", - has_debug_store, cpu - ); - if has_debug_store { + if has_precise_ip() { // set precise_ip to lowest value to enable PEBS (TODO: AMD?) builder.precise_ip(1); } diff --git a/reverie-ptrace/src/validation.rs b/reverie-ptrace/src/validation.rs index 003afd2..1103f41 100644 --- a/reverie-ptrace/src/validation.rs +++ b/reverie-ptrace/src/validation.rs @@ -9,8 +9,6 @@ use core::mem; use perf_event_open_sys::bindings as perf; -use raw_cpuid::CpuId; -use raw_cpuid::FeatureInfo; use reverie::Errno; use thiserror::Error; use tracing::error; @@ -19,8 +17,7 @@ use tracing::warn; use crate::perf::do_branches; use crate::perf::PerfCounter; use crate::timer::get_rcb_perf_config; -use crate::timer::AMD_VENDOR; -use crate::timer::INTEL_VENDOR; +use crate::timer::has_precise_ip; const IN_TXCP: u64 = 1 << 33; const NUM_BRANCHES: u64 = 500; @@ -92,11 +89,7 @@ fn init_perf_event_attr( result.set_exclude_guest(1); result.set_exclude_kernel(1); - if precise_ip - && CpuId::new() - .get_feature_info() - .map_or(false, |info| info.has_ds()) - { + if precise_ip && has_precise_ip() { result.set_precise_ip(1); // This prevents EINVAL when creating a counter with precise_ip enabled @@ -332,7 +325,8 @@ fn check_working_counters(precise_ip: bool) -> Result<(), PmuValidationError> { /// check the cpu feature id to determine if it is a AMD-Zen vs AmdF15R30 /// This is much simpler in c++ because eax is available directly -fn is_amd_zen(cpu_feature: FeatureInfo) -> bool { +#[cfg(target_arch = "x86_64")] +fn is_amd_zen(cpu_feature: raw_cpuid::FeatureInfo) -> bool { let family_id = cpu_feature.base_family_id(); // 4 bits let model_id = cpu_feature.base_model_id(); // 4 bits let ext_model_id = cpu_feature.extended_model_id(); // 4 bits @@ -342,7 +336,7 @@ fn is_amd_zen(cpu_feature: FeatureInfo) -> bool { let cpu_type: u32 = ((model_id as u32) << 4) | ((family_id as u32) << 8) | ((ext_model_id as u32) << 16); - // There are lots of magic numbers here. They come directly from + // There are lots of magic numbers here. They come directly from // https://github.com/rr-debugger/rr/blob/master/src/PerfCounters_x86.h matches!( (cpu_type, ext_family_id), @@ -367,8 +361,9 @@ fn is_amd_zen(cpu_feature: FeatureInfo) -> bool { /// This is a transcription of the function with the same name in Mozilla-RR it will /// check for bugs specific to cpu architectures +#[cfg(target_arch = "x86_64")] fn check_for_arch_bugs(_precise_ip: bool) -> Result<(), PmuValidationError> { - let c = CpuId::new(); + let c = raw_cpuid::CpuId::new(); let vendor = c.get_vendor_info().unwrap(); let feature_info = c .get_feature_info() @@ -376,8 +371,8 @@ fn check_for_arch_bugs(_precise_ip: bool) -> Result<(), PmuValidationError> { let vendor_str = vendor.as_str(); match vendor_str { - AMD_VENDOR if is_amd_zen(feature_info) => check_for_zen_speclockmap(), - INTEL_VENDOR => { + "AuthenticAMD" if is_amd_zen(feature_info) => check_for_zen_speclockmap(), + "GenuineIntel" => { check_for_kvm_in_txcp_bug()?; #[cfg(feature = "llvm_asm")] check_for_xen_pmi_bug(_precise_ip)?; @@ -387,6 +382,12 @@ fn check_for_arch_bugs(_precise_ip: bool) -> Result<(), PmuValidationError> { } } +#[cfg(target_arch = "aarch64")] +fn check_for_arch_bugs(_precise_ip: bool) -> Result<(), PmuValidationError> { + // TODO: Do some aarch64-specific testing? + Ok(()) +} + fn check_for_zen_speclockmap() -> Result<(), PmuValidationError> { // When the SpecLockMap optimization is not disabled, rr will not work // reliably (e.g. it would work fine on a single process with a single @@ -653,10 +654,7 @@ mod test { fn test_check_for_ioc_period_bug_precise_ip() { // This assumes the machine running the test will not have this bug and only runs // if precise_ip will be enabled - if CpuId::new() - .get_feature_info() - .map_or(false, |info| info.has_ds()) - { + if has_precise_ip() { if let Err(pmu_err) = check_for_ioc_period_bug(true) { panic!( "Ioc period bug check failed when precise_ip was enabled - {}", @@ -670,10 +668,7 @@ mod test { fn test_check_working_counters_precise_ip() { // This assumes the machine running the test will have working counters and only runs // if precise_ip will be enabled - if CpuId::new() - .get_feature_info() - .map_or(false, |info| info.has_ds()) - { + if has_precise_ip() { if let Err(pmu_err) = check_working_counters(true) { panic!( "Working counters check failed when precise_ip was enabled - {}", @@ -687,10 +682,7 @@ mod test { fn test_check_for_arch_bugs_precise_ip() { // This assumes the machine running the test will not have arch bugs and only runs // if precise_ip will be enabled - if CpuId::new() - .get_feature_info() - .map_or(false, |info| info.has_ds()) - { + if has_precise_ip() { if let Err(pmu_err) = check_for_arch_bugs(true) { panic!( "Architecture-specific bug check failed when precise_ip was enabled - {}",