mirror of
https://github.com/facebookexperimental/reverie.git
synced 2025-01-23 13:10:04 +00:00
Fix perf timer related things on aarch64
Summary: To do proper feature detection on the CPU to know if we can get perf events, there is more work to do here. See the comment on `get_rcb_perf_config` for more info. Reviewed By: VladimirMakaev Differential Revision: D40701837 fbshipit-source-id: 4c7a7c00be0ab14e4fe9f4be09c8094567d5d031
This commit is contained in:
parent
caf0f12131
commit
a16d576831
3 changed files with 78 additions and 43 deletions
|
@ -503,10 +503,9 @@ unsafe fn read_once(v: *mut u32) -> u32 {
|
|||
}
|
||||
|
||||
#[inline(always)]
|
||||
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
|
||||
fn smp_rmb() {
|
||||
use std::sync::atomic::compiler_fence;
|
||||
use std::sync::atomic::Ordering::SeqCst;
|
||||
use core::sync::atomic::compiler_fence;
|
||||
use core::sync::atomic::Ordering::SeqCst;
|
||||
compiler_fence(SeqCst);
|
||||
}
|
||||
|
||||
|
@ -589,6 +588,21 @@ pub fn do_branches(mut count: u64) {
|
|||
assert_eq!(count, 0);
|
||||
}
|
||||
|
||||
#[cfg(target_arch = "aarch64")]
|
||||
#[inline(never)]
|
||||
pub fn do_branches(mut count: u64) {
|
||||
unsafe {
|
||||
core::arch::asm!(
|
||||
"2:",
|
||||
"sub {0}, {0}, #0x1",
|
||||
"b.ne 2b",
|
||||
inout(reg) count,
|
||||
)
|
||||
}
|
||||
|
||||
assert_eq!(count, 0);
|
||||
}
|
||||
|
||||
/// Perform exactly `count+1` conditional branch instructions. Useful for
|
||||
/// testing timer-related code.
|
||||
#[cfg(target_arch = "x86_64")]
|
||||
|
@ -618,6 +632,11 @@ mod test {
|
|||
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_do_branches() {
|
||||
do_branches(1000);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn trace_self() {
|
||||
ret_without_perf!();
|
||||
|
|
|
@ -29,7 +29,6 @@
|
|||
//! - before resumption of the guest,
|
||||
//! which _usually_ means immediately after the tool callback returns.
|
||||
|
||||
use raw_cpuid::CpuId;
|
||||
use reverie::Errno;
|
||||
use reverie::Pid;
|
||||
use reverie::Signal;
|
||||
|
@ -47,11 +46,9 @@ use crate::perf::*;
|
|||
// This signal is unused, in that the kernel will never send it to a process.
|
||||
const MARKER_SIGNAL: Signal = reverie::PERF_EVENT_SIGNAL;
|
||||
|
||||
pub(crate) const AMD_VENDOR: &str = "AuthenticAMD";
|
||||
pub(crate) const INTEL_VENDOR: &str = "GenuineIntel";
|
||||
|
||||
#[cfg(target_arch = "x86_64")]
|
||||
pub(crate) fn get_rcb_perf_config() -> u64 {
|
||||
let c = CpuId::new();
|
||||
let c = raw_cpuid::CpuId::new();
|
||||
let fi = c.get_feature_info().unwrap();
|
||||
// based on rr's PerfCounters_x86.h and PerfCounters.cc
|
||||
match (fi.family_id(), fi.model_id()) {
|
||||
|
@ -75,6 +72,41 @@ pub(crate) fn get_rcb_perf_config() -> u64 {
|
|||
}
|
||||
}
|
||||
|
||||
#[cfg(target_arch = "aarch64")]
|
||||
pub(crate) fn get_rcb_perf_config() -> u64 {
|
||||
// TODO:
|
||||
// 1. Compute the microarchitecture from
|
||||
// `/sys/devices/system/cpu/cpu*/regs/identification/midr_el1`
|
||||
// 2. Look up the microarchitecture in a table to determine what features
|
||||
// we can enable.
|
||||
// References:
|
||||
// - https://github.com/rr-debugger/rr/blob/master/src/PerfCounters.cc#L156
|
||||
const BR_RETIRED: u64 = 0x21;
|
||||
|
||||
// For now, always assume that we can get retired branch events.
|
||||
BR_RETIRED
|
||||
}
|
||||
|
||||
/// Returns true if the current CPU supports precise_ip.
|
||||
#[cfg(target_arch = "x86_64")]
|
||||
pub(crate) fn has_precise_ip() -> bool {
|
||||
let cpu = raw_cpuid::CpuId::new();
|
||||
let has_debug_store = cpu.get_feature_info().map_or(false, |info| info.has_ds());
|
||||
|
||||
debug!(
|
||||
"Setting precise_ip to {} for cpu {:?}",
|
||||
has_debug_store, cpu
|
||||
);
|
||||
|
||||
has_debug_store
|
||||
}
|
||||
|
||||
#[cfg(target_arch = "aarch64")]
|
||||
pub(crate) fn has_precise_ip() -> bool {
|
||||
// Assume, for now, that aarch64 can use precise_ip.
|
||||
true
|
||||
}
|
||||
|
||||
/// A timer monitoring a single thread. The underlying implementation is eagerly
|
||||
/// initialized, but left empty if perf is not supported. In that case, any
|
||||
/// methods with semantics that require a functioning clock or timer will panic.
|
||||
|
@ -356,9 +388,6 @@ const MAX_SINGLE_STEP_COUNT: u64 = SKID_MARGIN_RCBS + SINGLESTEP_TIMEOUT_RCBS;
|
|||
|
||||
impl TimerImpl {
|
||||
pub fn new(guest_pid: Pid, guest_tid: Tid) -> Result<Self, Errno> {
|
||||
let cpu = CpuId::new();
|
||||
let has_debug_store = cpu.get_feature_info().map_or(false, |info| info.has_ds());
|
||||
|
||||
let evt = Event::Raw(get_rcb_perf_config());
|
||||
|
||||
// measure the target tid irrespective of CPU
|
||||
|
@ -367,12 +396,7 @@ impl TimerImpl {
|
|||
.sample_period(PerfCounter::DISABLE_SAMPLE_PERIOD)
|
||||
.event(evt);
|
||||
|
||||
// Check if we can set precise_ip = 1 by checking if debug store is enabled.
|
||||
debug!(
|
||||
"Setting precise_ip to {} for cpu {:?}",
|
||||
has_debug_store, cpu
|
||||
);
|
||||
if has_debug_store {
|
||||
if has_precise_ip() {
|
||||
// set precise_ip to lowest value to enable PEBS (TODO: AMD?)
|
||||
builder.precise_ip(1);
|
||||
}
|
||||
|
|
|
@ -9,8 +9,6 @@
|
|||
use core::mem;
|
||||
|
||||
use perf_event_open_sys::bindings as perf;
|
||||
use raw_cpuid::CpuId;
|
||||
use raw_cpuid::FeatureInfo;
|
||||
use reverie::Errno;
|
||||
use thiserror::Error;
|
||||
use tracing::error;
|
||||
|
@ -19,8 +17,7 @@ use tracing::warn;
|
|||
use crate::perf::do_branches;
|
||||
use crate::perf::PerfCounter;
|
||||
use crate::timer::get_rcb_perf_config;
|
||||
use crate::timer::AMD_VENDOR;
|
||||
use crate::timer::INTEL_VENDOR;
|
||||
use crate::timer::has_precise_ip;
|
||||
|
||||
const IN_TXCP: u64 = 1 << 33;
|
||||
const NUM_BRANCHES: u64 = 500;
|
||||
|
@ -92,11 +89,7 @@ fn init_perf_event_attr(
|
|||
result.set_exclude_guest(1);
|
||||
result.set_exclude_kernel(1);
|
||||
|
||||
if precise_ip
|
||||
&& CpuId::new()
|
||||
.get_feature_info()
|
||||
.map_or(false, |info| info.has_ds())
|
||||
{
|
||||
if precise_ip && has_precise_ip() {
|
||||
result.set_precise_ip(1);
|
||||
|
||||
// This prevents EINVAL when creating a counter with precise_ip enabled
|
||||
|
@ -332,7 +325,8 @@ fn check_working_counters(precise_ip: bool) -> Result<(), PmuValidationError> {
|
|||
|
||||
/// check the cpu feature id to determine if it is a AMD-Zen vs AmdF15R30
|
||||
/// This is much simpler in c++ because eax is available directly
|
||||
fn is_amd_zen(cpu_feature: FeatureInfo) -> bool {
|
||||
#[cfg(target_arch = "x86_64")]
|
||||
fn is_amd_zen(cpu_feature: raw_cpuid::FeatureInfo) -> bool {
|
||||
let family_id = cpu_feature.base_family_id(); // 4 bits
|
||||
let model_id = cpu_feature.base_model_id(); // 4 bits
|
||||
let ext_model_id = cpu_feature.extended_model_id(); // 4 bits
|
||||
|
@ -367,8 +361,9 @@ fn is_amd_zen(cpu_feature: FeatureInfo) -> bool {
|
|||
|
||||
/// This is a transcription of the function with the same name in Mozilla-RR it will
|
||||
/// check for bugs specific to cpu architectures
|
||||
#[cfg(target_arch = "x86_64")]
|
||||
fn check_for_arch_bugs(_precise_ip: bool) -> Result<(), PmuValidationError> {
|
||||
let c = CpuId::new();
|
||||
let c = raw_cpuid::CpuId::new();
|
||||
let vendor = c.get_vendor_info().unwrap();
|
||||
let feature_info = c
|
||||
.get_feature_info()
|
||||
|
@ -376,8 +371,8 @@ fn check_for_arch_bugs(_precise_ip: bool) -> Result<(), PmuValidationError> {
|
|||
let vendor_str = vendor.as_str();
|
||||
|
||||
match vendor_str {
|
||||
AMD_VENDOR if is_amd_zen(feature_info) => check_for_zen_speclockmap(),
|
||||
INTEL_VENDOR => {
|
||||
"AuthenticAMD" if is_amd_zen(feature_info) => check_for_zen_speclockmap(),
|
||||
"GenuineIntel" => {
|
||||
check_for_kvm_in_txcp_bug()?;
|
||||
#[cfg(feature = "llvm_asm")]
|
||||
check_for_xen_pmi_bug(_precise_ip)?;
|
||||
|
@ -387,6 +382,12 @@ fn check_for_arch_bugs(_precise_ip: bool) -> Result<(), PmuValidationError> {
|
|||
}
|
||||
}
|
||||
|
||||
#[cfg(target_arch = "aarch64")]
|
||||
fn check_for_arch_bugs(_precise_ip: bool) -> Result<(), PmuValidationError> {
|
||||
// TODO: Do some aarch64-specific testing?
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn check_for_zen_speclockmap() -> Result<(), PmuValidationError> {
|
||||
// When the SpecLockMap optimization is not disabled, rr will not work
|
||||
// reliably (e.g. it would work fine on a single process with a single
|
||||
|
@ -653,10 +654,7 @@ mod test {
|
|||
fn test_check_for_ioc_period_bug_precise_ip() {
|
||||
// This assumes the machine running the test will not have this bug and only runs
|
||||
// if precise_ip will be enabled
|
||||
if CpuId::new()
|
||||
.get_feature_info()
|
||||
.map_or(false, |info| info.has_ds())
|
||||
{
|
||||
if has_precise_ip() {
|
||||
if let Err(pmu_err) = check_for_ioc_period_bug(true) {
|
||||
panic!(
|
||||
"Ioc period bug check failed when precise_ip was enabled - {}",
|
||||
|
@ -670,10 +668,7 @@ mod test {
|
|||
fn test_check_working_counters_precise_ip() {
|
||||
// This assumes the machine running the test will have working counters and only runs
|
||||
// if precise_ip will be enabled
|
||||
if CpuId::new()
|
||||
.get_feature_info()
|
||||
.map_or(false, |info| info.has_ds())
|
||||
{
|
||||
if has_precise_ip() {
|
||||
if let Err(pmu_err) = check_working_counters(true) {
|
||||
panic!(
|
||||
"Working counters check failed when precise_ip was enabled - {}",
|
||||
|
@ -687,10 +682,7 @@ mod test {
|
|||
fn test_check_for_arch_bugs_precise_ip() {
|
||||
// This assumes the machine running the test will not have arch bugs and only runs
|
||||
// if precise_ip will be enabled
|
||||
if CpuId::new()
|
||||
.get_feature_info()
|
||||
.map_or(false, |info| info.has_ds())
|
||||
{
|
||||
if has_precise_ip() {
|
||||
if let Err(pmu_err) = check_for_arch_bugs(true) {
|
||||
panic!(
|
||||
"Architecture-specific bug check failed when precise_ip was enabled - {}",
|
||||
|
|
Loading…
Reference in a new issue