mirror of
https://github.com/facebookexperimental/reverie.git
synced 2025-01-23 13:10:04 +00:00
Fix perf timer related things on aarch64
Summary: To do proper feature detection on the CPU to know if we can get perf events, there is more work to do here. See the comment on `get_rcb_perf_config` for more info. Reviewed By: VladimirMakaev Differential Revision: D40701837 fbshipit-source-id: 4c7a7c00be0ab14e4fe9f4be09c8094567d5d031
This commit is contained in:
parent
caf0f12131
commit
a16d576831
3 changed files with 78 additions and 43 deletions
|
@ -503,10 +503,9 @@ unsafe fn read_once(v: *mut u32) -> u32 {
|
||||||
}
|
}
|
||||||
|
|
||||||
#[inline(always)]
|
#[inline(always)]
|
||||||
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
|
|
||||||
fn smp_rmb() {
|
fn smp_rmb() {
|
||||||
use std::sync::atomic::compiler_fence;
|
use core::sync::atomic::compiler_fence;
|
||||||
use std::sync::atomic::Ordering::SeqCst;
|
use core::sync::atomic::Ordering::SeqCst;
|
||||||
compiler_fence(SeqCst);
|
compiler_fence(SeqCst);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -589,6 +588,21 @@ pub fn do_branches(mut count: u64) {
|
||||||
assert_eq!(count, 0);
|
assert_eq!(count, 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[cfg(target_arch = "aarch64")]
|
||||||
|
#[inline(never)]
|
||||||
|
pub fn do_branches(mut count: u64) {
|
||||||
|
unsafe {
|
||||||
|
core::arch::asm!(
|
||||||
|
"2:",
|
||||||
|
"sub {0}, {0}, #0x1",
|
||||||
|
"b.ne 2b",
|
||||||
|
inout(reg) count,
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
assert_eq!(count, 0);
|
||||||
|
}
|
||||||
|
|
||||||
/// Perform exactly `count+1` conditional branch instructions. Useful for
|
/// Perform exactly `count+1` conditional branch instructions. Useful for
|
||||||
/// testing timer-related code.
|
/// testing timer-related code.
|
||||||
#[cfg(target_arch = "x86_64")]
|
#[cfg(target_arch = "x86_64")]
|
||||||
|
@ -618,6 +632,11 @@ mod test {
|
||||||
|
|
||||||
use super::*;
|
use super::*;
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_do_branches() {
|
||||||
|
do_branches(1000);
|
||||||
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn trace_self() {
|
fn trace_self() {
|
||||||
ret_without_perf!();
|
ret_without_perf!();
|
||||||
|
|
|
@ -29,7 +29,6 @@
|
||||||
//! - before resumption of the guest,
|
//! - before resumption of the guest,
|
||||||
//! which _usually_ means immediately after the tool callback returns.
|
//! which _usually_ means immediately after the tool callback returns.
|
||||||
|
|
||||||
use raw_cpuid::CpuId;
|
|
||||||
use reverie::Errno;
|
use reverie::Errno;
|
||||||
use reverie::Pid;
|
use reverie::Pid;
|
||||||
use reverie::Signal;
|
use reverie::Signal;
|
||||||
|
@ -47,11 +46,9 @@ use crate::perf::*;
|
||||||
// This signal is unused, in that the kernel will never send it to a process.
|
// This signal is unused, in that the kernel will never send it to a process.
|
||||||
const MARKER_SIGNAL: Signal = reverie::PERF_EVENT_SIGNAL;
|
const MARKER_SIGNAL: Signal = reverie::PERF_EVENT_SIGNAL;
|
||||||
|
|
||||||
pub(crate) const AMD_VENDOR: &str = "AuthenticAMD";
|
#[cfg(target_arch = "x86_64")]
|
||||||
pub(crate) const INTEL_VENDOR: &str = "GenuineIntel";
|
|
||||||
|
|
||||||
pub(crate) fn get_rcb_perf_config() -> u64 {
|
pub(crate) fn get_rcb_perf_config() -> u64 {
|
||||||
let c = CpuId::new();
|
let c = raw_cpuid::CpuId::new();
|
||||||
let fi = c.get_feature_info().unwrap();
|
let fi = c.get_feature_info().unwrap();
|
||||||
// based on rr's PerfCounters_x86.h and PerfCounters.cc
|
// based on rr's PerfCounters_x86.h and PerfCounters.cc
|
||||||
match (fi.family_id(), fi.model_id()) {
|
match (fi.family_id(), fi.model_id()) {
|
||||||
|
@ -75,6 +72,41 @@ pub(crate) fn get_rcb_perf_config() -> u64 {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[cfg(target_arch = "aarch64")]
|
||||||
|
pub(crate) fn get_rcb_perf_config() -> u64 {
|
||||||
|
// TODO:
|
||||||
|
// 1. Compute the microarchitecture from
|
||||||
|
// `/sys/devices/system/cpu/cpu*/regs/identification/midr_el1`
|
||||||
|
// 2. Look up the microarchitecture in a table to determine what features
|
||||||
|
// we can enable.
|
||||||
|
// References:
|
||||||
|
// - https://github.com/rr-debugger/rr/blob/master/src/PerfCounters.cc#L156
|
||||||
|
const BR_RETIRED: u64 = 0x21;
|
||||||
|
|
||||||
|
// For now, always assume that we can get retired branch events.
|
||||||
|
BR_RETIRED
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Returns true if the current CPU supports precise_ip.
|
||||||
|
#[cfg(target_arch = "x86_64")]
|
||||||
|
pub(crate) fn has_precise_ip() -> bool {
|
||||||
|
let cpu = raw_cpuid::CpuId::new();
|
||||||
|
let has_debug_store = cpu.get_feature_info().map_or(false, |info| info.has_ds());
|
||||||
|
|
||||||
|
debug!(
|
||||||
|
"Setting precise_ip to {} for cpu {:?}",
|
||||||
|
has_debug_store, cpu
|
||||||
|
);
|
||||||
|
|
||||||
|
has_debug_store
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(target_arch = "aarch64")]
|
||||||
|
pub(crate) fn has_precise_ip() -> bool {
|
||||||
|
// Assume, for now, that aarch64 can use precise_ip.
|
||||||
|
true
|
||||||
|
}
|
||||||
|
|
||||||
/// A timer monitoring a single thread. The underlying implementation is eagerly
|
/// A timer monitoring a single thread. The underlying implementation is eagerly
|
||||||
/// initialized, but left empty if perf is not supported. In that case, any
|
/// initialized, but left empty if perf is not supported. In that case, any
|
||||||
/// methods with semantics that require a functioning clock or timer will panic.
|
/// methods with semantics that require a functioning clock or timer will panic.
|
||||||
|
@ -356,9 +388,6 @@ const MAX_SINGLE_STEP_COUNT: u64 = SKID_MARGIN_RCBS + SINGLESTEP_TIMEOUT_RCBS;
|
||||||
|
|
||||||
impl TimerImpl {
|
impl TimerImpl {
|
||||||
pub fn new(guest_pid: Pid, guest_tid: Tid) -> Result<Self, Errno> {
|
pub fn new(guest_pid: Pid, guest_tid: Tid) -> Result<Self, Errno> {
|
||||||
let cpu = CpuId::new();
|
|
||||||
let has_debug_store = cpu.get_feature_info().map_or(false, |info| info.has_ds());
|
|
||||||
|
|
||||||
let evt = Event::Raw(get_rcb_perf_config());
|
let evt = Event::Raw(get_rcb_perf_config());
|
||||||
|
|
||||||
// measure the target tid irrespective of CPU
|
// measure the target tid irrespective of CPU
|
||||||
|
@ -367,12 +396,7 @@ impl TimerImpl {
|
||||||
.sample_period(PerfCounter::DISABLE_SAMPLE_PERIOD)
|
.sample_period(PerfCounter::DISABLE_SAMPLE_PERIOD)
|
||||||
.event(evt);
|
.event(evt);
|
||||||
|
|
||||||
// Check if we can set precise_ip = 1 by checking if debug store is enabled.
|
if has_precise_ip() {
|
||||||
debug!(
|
|
||||||
"Setting precise_ip to {} for cpu {:?}",
|
|
||||||
has_debug_store, cpu
|
|
||||||
);
|
|
||||||
if has_debug_store {
|
|
||||||
// set precise_ip to lowest value to enable PEBS (TODO: AMD?)
|
// set precise_ip to lowest value to enable PEBS (TODO: AMD?)
|
||||||
builder.precise_ip(1);
|
builder.precise_ip(1);
|
||||||
}
|
}
|
||||||
|
|
|
@ -9,8 +9,6 @@
|
||||||
use core::mem;
|
use core::mem;
|
||||||
|
|
||||||
use perf_event_open_sys::bindings as perf;
|
use perf_event_open_sys::bindings as perf;
|
||||||
use raw_cpuid::CpuId;
|
|
||||||
use raw_cpuid::FeatureInfo;
|
|
||||||
use reverie::Errno;
|
use reverie::Errno;
|
||||||
use thiserror::Error;
|
use thiserror::Error;
|
||||||
use tracing::error;
|
use tracing::error;
|
||||||
|
@ -19,8 +17,7 @@ use tracing::warn;
|
||||||
use crate::perf::do_branches;
|
use crate::perf::do_branches;
|
||||||
use crate::perf::PerfCounter;
|
use crate::perf::PerfCounter;
|
||||||
use crate::timer::get_rcb_perf_config;
|
use crate::timer::get_rcb_perf_config;
|
||||||
use crate::timer::AMD_VENDOR;
|
use crate::timer::has_precise_ip;
|
||||||
use crate::timer::INTEL_VENDOR;
|
|
||||||
|
|
||||||
const IN_TXCP: u64 = 1 << 33;
|
const IN_TXCP: u64 = 1 << 33;
|
||||||
const NUM_BRANCHES: u64 = 500;
|
const NUM_BRANCHES: u64 = 500;
|
||||||
|
@ -92,11 +89,7 @@ fn init_perf_event_attr(
|
||||||
result.set_exclude_guest(1);
|
result.set_exclude_guest(1);
|
||||||
result.set_exclude_kernel(1);
|
result.set_exclude_kernel(1);
|
||||||
|
|
||||||
if precise_ip
|
if precise_ip && has_precise_ip() {
|
||||||
&& CpuId::new()
|
|
||||||
.get_feature_info()
|
|
||||||
.map_or(false, |info| info.has_ds())
|
|
||||||
{
|
|
||||||
result.set_precise_ip(1);
|
result.set_precise_ip(1);
|
||||||
|
|
||||||
// This prevents EINVAL when creating a counter with precise_ip enabled
|
// This prevents EINVAL when creating a counter with precise_ip enabled
|
||||||
|
@ -332,7 +325,8 @@ fn check_working_counters(precise_ip: bool) -> Result<(), PmuValidationError> {
|
||||||
|
|
||||||
/// check the cpu feature id to determine if it is a AMD-Zen vs AmdF15R30
|
/// check the cpu feature id to determine if it is a AMD-Zen vs AmdF15R30
|
||||||
/// This is much simpler in c++ because eax is available directly
|
/// This is much simpler in c++ because eax is available directly
|
||||||
fn is_amd_zen(cpu_feature: FeatureInfo) -> bool {
|
#[cfg(target_arch = "x86_64")]
|
||||||
|
fn is_amd_zen(cpu_feature: raw_cpuid::FeatureInfo) -> bool {
|
||||||
let family_id = cpu_feature.base_family_id(); // 4 bits
|
let family_id = cpu_feature.base_family_id(); // 4 bits
|
||||||
let model_id = cpu_feature.base_model_id(); // 4 bits
|
let model_id = cpu_feature.base_model_id(); // 4 bits
|
||||||
let ext_model_id = cpu_feature.extended_model_id(); // 4 bits
|
let ext_model_id = cpu_feature.extended_model_id(); // 4 bits
|
||||||
|
@ -367,8 +361,9 @@ fn is_amd_zen(cpu_feature: FeatureInfo) -> bool {
|
||||||
|
|
||||||
/// This is a transcription of the function with the same name in Mozilla-RR it will
|
/// This is a transcription of the function with the same name in Mozilla-RR it will
|
||||||
/// check for bugs specific to cpu architectures
|
/// check for bugs specific to cpu architectures
|
||||||
|
#[cfg(target_arch = "x86_64")]
|
||||||
fn check_for_arch_bugs(_precise_ip: bool) -> Result<(), PmuValidationError> {
|
fn check_for_arch_bugs(_precise_ip: bool) -> Result<(), PmuValidationError> {
|
||||||
let c = CpuId::new();
|
let c = raw_cpuid::CpuId::new();
|
||||||
let vendor = c.get_vendor_info().unwrap();
|
let vendor = c.get_vendor_info().unwrap();
|
||||||
let feature_info = c
|
let feature_info = c
|
||||||
.get_feature_info()
|
.get_feature_info()
|
||||||
|
@ -376,8 +371,8 @@ fn check_for_arch_bugs(_precise_ip: bool) -> Result<(), PmuValidationError> {
|
||||||
let vendor_str = vendor.as_str();
|
let vendor_str = vendor.as_str();
|
||||||
|
|
||||||
match vendor_str {
|
match vendor_str {
|
||||||
AMD_VENDOR if is_amd_zen(feature_info) => check_for_zen_speclockmap(),
|
"AuthenticAMD" if is_amd_zen(feature_info) => check_for_zen_speclockmap(),
|
||||||
INTEL_VENDOR => {
|
"GenuineIntel" => {
|
||||||
check_for_kvm_in_txcp_bug()?;
|
check_for_kvm_in_txcp_bug()?;
|
||||||
#[cfg(feature = "llvm_asm")]
|
#[cfg(feature = "llvm_asm")]
|
||||||
check_for_xen_pmi_bug(_precise_ip)?;
|
check_for_xen_pmi_bug(_precise_ip)?;
|
||||||
|
@ -387,6 +382,12 @@ fn check_for_arch_bugs(_precise_ip: bool) -> Result<(), PmuValidationError> {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[cfg(target_arch = "aarch64")]
|
||||||
|
fn check_for_arch_bugs(_precise_ip: bool) -> Result<(), PmuValidationError> {
|
||||||
|
// TODO: Do some aarch64-specific testing?
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
fn check_for_zen_speclockmap() -> Result<(), PmuValidationError> {
|
fn check_for_zen_speclockmap() -> Result<(), PmuValidationError> {
|
||||||
// When the SpecLockMap optimization is not disabled, rr will not work
|
// When the SpecLockMap optimization is not disabled, rr will not work
|
||||||
// reliably (e.g. it would work fine on a single process with a single
|
// reliably (e.g. it would work fine on a single process with a single
|
||||||
|
@ -653,10 +654,7 @@ mod test {
|
||||||
fn test_check_for_ioc_period_bug_precise_ip() {
|
fn test_check_for_ioc_period_bug_precise_ip() {
|
||||||
// This assumes the machine running the test will not have this bug and only runs
|
// This assumes the machine running the test will not have this bug and only runs
|
||||||
// if precise_ip will be enabled
|
// if precise_ip will be enabled
|
||||||
if CpuId::new()
|
if has_precise_ip() {
|
||||||
.get_feature_info()
|
|
||||||
.map_or(false, |info| info.has_ds())
|
|
||||||
{
|
|
||||||
if let Err(pmu_err) = check_for_ioc_period_bug(true) {
|
if let Err(pmu_err) = check_for_ioc_period_bug(true) {
|
||||||
panic!(
|
panic!(
|
||||||
"Ioc period bug check failed when precise_ip was enabled - {}",
|
"Ioc period bug check failed when precise_ip was enabled - {}",
|
||||||
|
@ -670,10 +668,7 @@ mod test {
|
||||||
fn test_check_working_counters_precise_ip() {
|
fn test_check_working_counters_precise_ip() {
|
||||||
// This assumes the machine running the test will have working counters and only runs
|
// This assumes the machine running the test will have working counters and only runs
|
||||||
// if precise_ip will be enabled
|
// if precise_ip will be enabled
|
||||||
if CpuId::new()
|
if has_precise_ip() {
|
||||||
.get_feature_info()
|
|
||||||
.map_or(false, |info| info.has_ds())
|
|
||||||
{
|
|
||||||
if let Err(pmu_err) = check_working_counters(true) {
|
if let Err(pmu_err) = check_working_counters(true) {
|
||||||
panic!(
|
panic!(
|
||||||
"Working counters check failed when precise_ip was enabled - {}",
|
"Working counters check failed when precise_ip was enabled - {}",
|
||||||
|
@ -687,10 +682,7 @@ mod test {
|
||||||
fn test_check_for_arch_bugs_precise_ip() {
|
fn test_check_for_arch_bugs_precise_ip() {
|
||||||
// This assumes the machine running the test will not have arch bugs and only runs
|
// This assumes the machine running the test will not have arch bugs and only runs
|
||||||
// if precise_ip will be enabled
|
// if precise_ip will be enabled
|
||||||
if CpuId::new()
|
if has_precise_ip() {
|
||||||
.get_feature_info()
|
|
||||||
.map_or(false, |info| info.has_ds())
|
|
||||||
{
|
|
||||||
if let Err(pmu_err) = check_for_arch_bugs(true) {
|
if let Err(pmu_err) = check_for_arch_bugs(true) {
|
||||||
panic!(
|
panic!(
|
||||||
"Architecture-specific bug check failed when precise_ip was enabled - {}",
|
"Architecture-specific bug check failed when precise_ip was enabled - {}",
|
||||||
|
|
Loading…
Reference in a new issue