Fix perf timer related things on aarch64

Summary: To do proper feature detection on the CPU to know if we can get perf events, there is more work to do here. See the comment on `get_rcb_perf_config` for more info.

Reviewed By: VladimirMakaev

Differential Revision: D40701837

fbshipit-source-id: 4c7a7c00be0ab14e4fe9f4be09c8094567d5d031
This commit is contained in:
Jason White 2022-10-27 10:56:09 -07:00 committed by Facebook GitHub Bot
parent caf0f12131
commit a16d576831
3 changed files with 78 additions and 43 deletions

View file

@ -503,10 +503,9 @@ unsafe fn read_once(v: *mut u32) -> u32 {
}
#[inline(always)]
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
fn smp_rmb() {
use std::sync::atomic::compiler_fence;
use std::sync::atomic::Ordering::SeqCst;
use core::sync::atomic::compiler_fence;
use core::sync::atomic::Ordering::SeqCst;
compiler_fence(SeqCst);
}
@ -589,6 +588,21 @@ pub fn do_branches(mut count: u64) {
assert_eq!(count, 0);
}
#[cfg(target_arch = "aarch64")]
#[inline(never)]
pub fn do_branches(mut count: u64) {
unsafe {
core::arch::asm!(
"2:",
"sub {0}, {0}, #0x1",
"b.ne 2b",
inout(reg) count,
)
}
assert_eq!(count, 0);
}
/// Perform exactly `count+1` conditional branch instructions. Useful for
/// testing timer-related code.
#[cfg(target_arch = "x86_64")]
@ -618,6 +632,11 @@ mod test {
use super::*;
#[test]
fn test_do_branches() {
do_branches(1000);
}
#[test]
fn trace_self() {
ret_without_perf!();

View file

@ -29,7 +29,6 @@
//! - before resumption of the guest,
//! which _usually_ means immediately after the tool callback returns.
use raw_cpuid::CpuId;
use reverie::Errno;
use reverie::Pid;
use reverie::Signal;
@ -47,11 +46,9 @@ use crate::perf::*;
// This signal is unused, in that the kernel will never send it to a process.
const MARKER_SIGNAL: Signal = reverie::PERF_EVENT_SIGNAL;
pub(crate) const AMD_VENDOR: &str = "AuthenticAMD";
pub(crate) const INTEL_VENDOR: &str = "GenuineIntel";
#[cfg(target_arch = "x86_64")]
pub(crate) fn get_rcb_perf_config() -> u64 {
let c = CpuId::new();
let c = raw_cpuid::CpuId::new();
let fi = c.get_feature_info().unwrap();
// based on rr's PerfCounters_x86.h and PerfCounters.cc
match (fi.family_id(), fi.model_id()) {
@ -75,6 +72,41 @@ pub(crate) fn get_rcb_perf_config() -> u64 {
}
}
#[cfg(target_arch = "aarch64")]
pub(crate) fn get_rcb_perf_config() -> u64 {
// TODO:
// 1. Compute the microarchitecture from
// `/sys/devices/system/cpu/cpu*/regs/identification/midr_el1`
// 2. Look up the microarchitecture in a table to determine what features
// we can enable.
// References:
// - https://github.com/rr-debugger/rr/blob/master/src/PerfCounters.cc#L156
const BR_RETIRED: u64 = 0x21;
// For now, always assume that we can get retired branch events.
BR_RETIRED
}
/// Returns true if the current CPU supports precise_ip.
#[cfg(target_arch = "x86_64")]
pub(crate) fn has_precise_ip() -> bool {
let cpu = raw_cpuid::CpuId::new();
let has_debug_store = cpu.get_feature_info().map_or(false, |info| info.has_ds());
debug!(
"Setting precise_ip to {} for cpu {:?}",
has_debug_store, cpu
);
has_debug_store
}
#[cfg(target_arch = "aarch64")]
pub(crate) fn has_precise_ip() -> bool {
// Assume, for now, that aarch64 can use precise_ip.
true
}
/// A timer monitoring a single thread. The underlying implementation is eagerly
/// initialized, but left empty if perf is not supported. In that case, any
/// methods with semantics that require a functioning clock or timer will panic.
@ -356,9 +388,6 @@ const MAX_SINGLE_STEP_COUNT: u64 = SKID_MARGIN_RCBS + SINGLESTEP_TIMEOUT_RCBS;
impl TimerImpl {
pub fn new(guest_pid: Pid, guest_tid: Tid) -> Result<Self, Errno> {
let cpu = CpuId::new();
let has_debug_store = cpu.get_feature_info().map_or(false, |info| info.has_ds());
let evt = Event::Raw(get_rcb_perf_config());
// measure the target tid irrespective of CPU
@ -367,12 +396,7 @@ impl TimerImpl {
.sample_period(PerfCounter::DISABLE_SAMPLE_PERIOD)
.event(evt);
// Check if we can set precise_ip = 1 by checking if debug store is enabled.
debug!(
"Setting precise_ip to {} for cpu {:?}",
has_debug_store, cpu
);
if has_debug_store {
if has_precise_ip() {
// set precise_ip to lowest value to enable PEBS (TODO: AMD?)
builder.precise_ip(1);
}

View file

@ -9,8 +9,6 @@
use core::mem;
use perf_event_open_sys::bindings as perf;
use raw_cpuid::CpuId;
use raw_cpuid::FeatureInfo;
use reverie::Errno;
use thiserror::Error;
use tracing::error;
@ -19,8 +17,7 @@ use tracing::warn;
use crate::perf::do_branches;
use crate::perf::PerfCounter;
use crate::timer::get_rcb_perf_config;
use crate::timer::AMD_VENDOR;
use crate::timer::INTEL_VENDOR;
use crate::timer::has_precise_ip;
const IN_TXCP: u64 = 1 << 33;
const NUM_BRANCHES: u64 = 500;
@ -92,11 +89,7 @@ fn init_perf_event_attr(
result.set_exclude_guest(1);
result.set_exclude_kernel(1);
if precise_ip
&& CpuId::new()
.get_feature_info()
.map_or(false, |info| info.has_ds())
{
if precise_ip && has_precise_ip() {
result.set_precise_ip(1);
// This prevents EINVAL when creating a counter with precise_ip enabled
@ -332,7 +325,8 @@ fn check_working_counters(precise_ip: bool) -> Result<(), PmuValidationError> {
/// check the cpu feature id to determine if it is a AMD-Zen vs AmdF15R30
/// This is much simpler in c++ because eax is available directly
fn is_amd_zen(cpu_feature: FeatureInfo) -> bool {
#[cfg(target_arch = "x86_64")]
fn is_amd_zen(cpu_feature: raw_cpuid::FeatureInfo) -> bool {
let family_id = cpu_feature.base_family_id(); // 4 bits
let model_id = cpu_feature.base_model_id(); // 4 bits
let ext_model_id = cpu_feature.extended_model_id(); // 4 bits
@ -342,7 +336,7 @@ fn is_amd_zen(cpu_feature: FeatureInfo) -> bool {
let cpu_type: u32 =
((model_id as u32) << 4) | ((family_id as u32) << 8) | ((ext_model_id as u32) << 16);
// There are lots of magic numbers here. They come directly from
// There are lots of magic numbers here. They come directly from
// https://github.com/rr-debugger/rr/blob/master/src/PerfCounters_x86.h
matches!(
(cpu_type, ext_family_id),
@ -367,8 +361,9 @@ fn is_amd_zen(cpu_feature: FeatureInfo) -> bool {
/// This is a transcription of the function with the same name in Mozilla-RR it will
/// check for bugs specific to cpu architectures
#[cfg(target_arch = "x86_64")]
fn check_for_arch_bugs(_precise_ip: bool) -> Result<(), PmuValidationError> {
let c = CpuId::new();
let c = raw_cpuid::CpuId::new();
let vendor = c.get_vendor_info().unwrap();
let feature_info = c
.get_feature_info()
@ -376,8 +371,8 @@ fn check_for_arch_bugs(_precise_ip: bool) -> Result<(), PmuValidationError> {
let vendor_str = vendor.as_str();
match vendor_str {
AMD_VENDOR if is_amd_zen(feature_info) => check_for_zen_speclockmap(),
INTEL_VENDOR => {
"AuthenticAMD" if is_amd_zen(feature_info) => check_for_zen_speclockmap(),
"GenuineIntel" => {
check_for_kvm_in_txcp_bug()?;
#[cfg(feature = "llvm_asm")]
check_for_xen_pmi_bug(_precise_ip)?;
@ -387,6 +382,12 @@ fn check_for_arch_bugs(_precise_ip: bool) -> Result<(), PmuValidationError> {
}
}
#[cfg(target_arch = "aarch64")]
fn check_for_arch_bugs(_precise_ip: bool) -> Result<(), PmuValidationError> {
// TODO: Do some aarch64-specific testing?
Ok(())
}
fn check_for_zen_speclockmap() -> Result<(), PmuValidationError> {
// When the SpecLockMap optimization is not disabled, rr will not work
// reliably (e.g. it would work fine on a single process with a single
@ -653,10 +654,7 @@ mod test {
fn test_check_for_ioc_period_bug_precise_ip() {
// This assumes the machine running the test will not have this bug and only runs
// if precise_ip will be enabled
if CpuId::new()
.get_feature_info()
.map_or(false, |info| info.has_ds())
{
if has_precise_ip() {
if let Err(pmu_err) = check_for_ioc_period_bug(true) {
panic!(
"Ioc period bug check failed when precise_ip was enabled - {}",
@ -670,10 +668,7 @@ mod test {
fn test_check_working_counters_precise_ip() {
// This assumes the machine running the test will have working counters and only runs
// if precise_ip will be enabled
if CpuId::new()
.get_feature_info()
.map_or(false, |info| info.has_ds())
{
if has_precise_ip() {
if let Err(pmu_err) = check_working_counters(true) {
panic!(
"Working counters check failed when precise_ip was enabled - {}",
@ -687,10 +682,7 @@ mod test {
fn test_check_for_arch_bugs_precise_ip() {
// This assumes the machine running the test will not have arch bugs and only runs
// if precise_ip will be enabled
if CpuId::new()
.get_feature_info()
.map_or(false, |info| info.has_ds())
{
if has_precise_ip() {
if let Err(pmu_err) = check_for_arch_bugs(true) {
panic!(
"Architecture-specific bug check failed when precise_ip was enabled - {}",