mirror of
https://github.com/facebookexperimental/reverie.git
synced 2025-02-08 21:33:03 +00:00
29ce9faab8
Summary: A quick fix for https://github.com/facebookexperimental/reverie/issues/30. Have not verified timer functionality, but this prevents a crash. Pull Request resolved: https://github.com/facebookexperimental/reverie/pull/31 Reviewed By: VladimirMakaev Differential Revision: D54647876 Pulled By: jasonwhite fbshipit-source-id: e1b1f110e64f5a78589d0b165cb93578efbe3e66
848 lines
31 KiB
Rust
848 lines
31 KiB
Rust
/*
|
|
* Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
* All rights reserved.
|
|
*
|
|
* This source code is licensed under the BSD-style license found in the
|
|
* LICENSE file in the root directory of this source tree.
|
|
*/
|
|
|
|
//! Timers monitor a specified thread using the PMU and deliver a signal
|
|
//! after a specified number of events occur. The signal is then identified
|
|
//! and transformed into a reverie timer event. This is intended to allow
|
|
//! tools to break busywaits or other spins in a reliable manner. Timers
|
|
//! are ideally deterministic so that `detcore` can use them.
|
|
//!
|
|
//! Due to PMU skid, precise timer events must be driven to completion via
|
|
//! single stepping. This means the PMI is scheduled early, and events with very
|
|
//! short timeouts require immediate single stepping. Immediate stepping is
|
|
//! acheived by artificially generating a signal that will then be delivered
|
|
//! immediately upon resumption of the guest.
|
|
//!
|
|
//! Proper use of timers requires that all delivered signals of type
|
|
//! `Timer::signal_type()` be passed through `Timer::handle_signal`, and that
|
|
//! `Timer::observe_event()` be called whenever a Tool-observable reverie event
|
|
//! occurs. Additionally, `Timer::finalize_requests()` must be called
|
|
//! - after the end of the tool callback in which the user could have
|
|
//! requested a timer event, i.e. those with `&mut guest` access.
|
|
//! - after any reverie-critical single-stepping occurs (e.g. in syscall
|
|
//! injections),
|
|
//! - before resumption of the guest,
|
|
//! which _usually_ means immediately after the tool callback returns.
|
|
|
|
use std::cmp::Ordering::Equal;
|
|
use std::cmp::Ordering::Greater;
|
|
use std::cmp::Ordering::Less;
|
|
|
|
use reverie::Errno;
|
|
use reverie::Pid;
|
|
use reverie::RegDisplay;
|
|
use reverie::RegDisplayOptions;
|
|
use reverie::Signal;
|
|
use reverie::Tid;
|
|
use safeptrace::Error as TraceError;
|
|
use safeptrace::Event as TraceEvent;
|
|
use safeptrace::Stopped;
|
|
use safeptrace::Wait;
|
|
use thiserror::Error;
|
|
use tracing::debug;
|
|
use tracing::trace;
|
|
use tracing::warn;
|
|
|
|
use crate::perf::*;
|
|
|
|
// This signal is unused, in that the kernel will never send it to a process.
|
|
const MARKER_SIGNAL: Signal = reverie::PERF_EVENT_SIGNAL;
|
|
|
|
#[cfg(target_arch = "x86_64")]
|
|
pub(crate) fn get_rcb_perf_config() -> u64 {
|
|
let c = raw_cpuid::CpuId::new();
|
|
let fi = c.get_feature_info().unwrap();
|
|
// based on rr's PerfCounters_x86.h and PerfCounters.cc
|
|
match (fi.family_id(), fi.model_id()) {
|
|
(0x06, 0x1A) | (0x06, 0x1E) | (0x06, 0x2E) => 0x5101c4, // Intel Nehalem
|
|
(0x06, 0x25) | (0x06, 0x2C) | (0x06, 0x2F) => 0x5101c4, // Intel Westmere
|
|
(0x06, 0x2A) | (0x06, 0x2D) | (0x06, 0x3E) => 0x5101c4, // Intel SanyBridge
|
|
(0x06, 0x3A) => 0x5101c4, // Intel IvyBridge
|
|
(0x06, 0x3C) | (0x06, 0x3F) | (0x06, 0x45) | (0x06, 0x46) => 0x5101c4, // Intel Haswell
|
|
(0x06, 0x3D) | (0x06, 0x47) | (0x06, 0x4F) | (0x06, 0x56) => 0x5101c4, // Intel Broadwell
|
|
(0x06, 0x4E) | (0x06, 0x55) | (0x06, 0x5E) => 0x5101c4, // Intel Skylake
|
|
(0x06, 0x8E) | (0x06, 0x9E) => 0x5101c4, // Intel Kabylake
|
|
(0x06, 0xA5) | (0x06, 0xA6) => 0x5101c4, // Intel Cometlake
|
|
(0x06, 0x8D) => 0x5101c4, // Intel Tiger Lake (e.g. i7-11800H laptop)
|
|
(0x06, 0x9A) => 0x5101c4, // Intel Alder Lake (e.g. i7-12700H laptop)
|
|
(0x06, 0x8F) => 0x5101c4, // Intel Sapphire Rapids
|
|
(0x06, 0x86) => 0x5101c4, // Intel Icelake
|
|
(0x17, 0x8) => 0x5100d1, // AMD Zen, Pinnacle Ridge
|
|
(0x17, 0x31) => 0x5100d1, // AMD Zen, Castle Peak
|
|
(0x17, 0x71) => 0x5100d1, // AMD Zen 2, Matisse
|
|
(0x19, 0x01) => 0x5100d1, // AMD Zen, Milan
|
|
(0x19, 0x50) => 0x5100d1, // AMD Zen, Cezanne
|
|
oth => panic!(
|
|
"Unsupported processor with feature info: {:?}\n Full family_model: {:?}",
|
|
fi, oth
|
|
),
|
|
}
|
|
}
|
|
|
|
#[cfg(target_arch = "aarch64")]
|
|
pub(crate) fn get_rcb_perf_config() -> u64 {
|
|
// TODO:
|
|
// 1. Compute the microarchitecture from
|
|
// `/sys/devices/system/cpu/cpu*/regs/identification/midr_el1`
|
|
// 2. Look up the microarchitecture in a table to determine what features
|
|
// we can enable.
|
|
// References:
|
|
// - https://github.com/rr-debugger/rr/blob/master/src/PerfCounters.cc#L156
|
|
const BR_RETIRED: u64 = 0x21;
|
|
|
|
// For now, always assume that we can get retired branch events.
|
|
BR_RETIRED
|
|
}
|
|
|
|
/// Returns true if the current CPU supports precise_ip.
|
|
#[cfg(target_arch = "x86_64")]
|
|
pub(crate) fn has_precise_ip() -> bool {
|
|
let cpu = raw_cpuid::CpuId::new();
|
|
let has_debug_store = cpu.get_feature_info().map_or(false, |info| info.has_ds());
|
|
|
|
debug!(
|
|
"Setting precise_ip to {} for cpu {:?}",
|
|
has_debug_store, cpu
|
|
);
|
|
|
|
has_debug_store
|
|
}
|
|
|
|
#[cfg(target_arch = "aarch64")]
|
|
pub(crate) fn has_precise_ip() -> bool {
|
|
// Assume, for now, that aarch64 can use precise_ip.
|
|
true
|
|
}
|
|
|
|
/// A timer monitoring a single thread. The underlying implementation is eagerly
|
|
/// initialized, but left empty if perf is not supported. In that case, any
|
|
/// methods with semantics that require a functioning clock or timer will panic.
|
|
#[derive(Debug)]
|
|
pub struct Timer {
|
|
inner: Option<TimerImpl>,
|
|
}
|
|
|
|
/// Data requires to request a timer event
|
|
#[derive(Debug, Copy, Clone)]
|
|
pub enum TimerEventRequest {
|
|
/// Event should fire after precisely this many RCBs.
|
|
Precise(u64),
|
|
|
|
/// Event should fire after at least this many RCBs.
|
|
Imprecise(u64),
|
|
|
|
/// Event should fire after precisely this many RCBS and this many instructions
|
|
PreciseInstruction(u64, u64),
|
|
}
|
|
|
|
/// The possible results of handling a timer signal.
|
|
#[derive(Error, Debug, Eq, PartialEq)]
|
|
pub enum HandleFailure {
|
|
#[error(transparent)]
|
|
TraceError(#[from] TraceError),
|
|
|
|
#[error("Unexpected event while single stepping")]
|
|
Event(Wait),
|
|
|
|
/// The timer signal was for a timer event that was otherwise cancelled. The
|
|
/// task is returned unchanged.
|
|
#[error("Timer event was cancelled and should not fire")]
|
|
Cancelled(Stopped),
|
|
|
|
/// The signal causing the signal-delivery stop was not actually meant for
|
|
/// this timer. The task is returned unchanged.
|
|
#[error("Pending signal was not for this timer")]
|
|
ImproperSignal(Stopped),
|
|
}
|
|
|
|
impl Timer {
|
|
/// Create a new timer monitoring the specified thread.
|
|
pub fn new(guest_pid: Pid, guest_tid: Tid) -> Self {
|
|
// No errors are exposed here, as the construction should be
|
|
// bullet-proof, and if it wasn't, consumers wouldn't be able to
|
|
// meaningfully handle the error anyway.
|
|
Self {
|
|
inner: if is_perf_supported() {
|
|
Some(TimerImpl::new(guest_pid, guest_tid).unwrap())
|
|
} else {
|
|
None
|
|
},
|
|
}
|
|
}
|
|
|
|
fn inner(&self) -> &TimerImpl {
|
|
self.inner.as_ref().expect("Perf support required")
|
|
}
|
|
|
|
fn inner_noinit(&self) -> Option<&TimerImpl> {
|
|
self.inner.as_ref()
|
|
}
|
|
|
|
fn inner_mut_noinit(&mut self) -> Option<&mut TimerImpl> {
|
|
self.inner.as_mut()
|
|
}
|
|
|
|
/// Read the thread-local deterministic clock. Represents total elapsed RCBs
|
|
/// on this thread since the timer was constructed, which should be at or
|
|
/// near thread creation time.
|
|
pub fn read_clock(&self) -> u64 {
|
|
self.inner().read_clock()
|
|
}
|
|
|
|
/// Approximately convert a duration to the internal notion of timer ticks.
|
|
pub fn as_ticks(dur: core::time::Duration) -> u64 {
|
|
// assumptions: 10% conditional branches, 3 GHz, avg 2 IPC
|
|
// this gives: 0.6B branch / sec = 0.6 branch / ns
|
|
(dur.as_secs() * 600_000_000) + (u64::from(dur.subsec_nanos()) * 6 / 10)
|
|
}
|
|
|
|
/// Return the signal type sent by the timer. This is intended to allow
|
|
/// pre-filtering signals without the full overhead of gathering signal info
|
|
/// to pass to ['Timer::generated_signal`].
|
|
pub fn signal_type() -> Signal {
|
|
MARKER_SIGNAL
|
|
}
|
|
|
|
/// Request a timer event to occur in the future at a time specified by
|
|
/// `evt`.
|
|
///
|
|
/// This is *not* idempotent and will replace the outstanding request. If it
|
|
/// is called repeatedly no events will be delivered.
|
|
pub fn request_event(&mut self, evt: TimerEventRequest) -> Result<(), Errno> {
|
|
self.inner_mut_noinit()
|
|
.ok_or(Errno::ENODEV)?
|
|
.request_event(evt)
|
|
}
|
|
|
|
/// Must be called whenever a Tool-observable reverie event occurs. This
|
|
/// ensures proper cancellation semantics are observed. See the internal
|
|
/// `timer::EventStatus` type for details.
|
|
pub fn observe_event(&mut self) {
|
|
if let Some(t) = self.inner_mut_noinit() {
|
|
t.observe_event();
|
|
}
|
|
}
|
|
|
|
/// Cancel pending timer notifications. This is idempotent.
|
|
///
|
|
/// If there was a previous call to [`Timer::enable_interval'], this
|
|
/// will prevent the delivery of that notification. This also has the effect
|
|
/// of reseting the "elapsed ticks." That is, if the current notification
|
|
/// duration is `N` ticks, then a full `N` ticks must elapse after the next
|
|
/// call to [`enable_interval`](Timer::enable_interval) before a
|
|
/// notification is delivered.
|
|
///
|
|
/// While [`Timer::cancel`] actually disables the counting of RCBs, this
|
|
/// method simply sets a flag to subsequent delivered signals until
|
|
/// [`Timer::request_event`] is called again. Thus, this method is lighter
|
|
/// if called multiple times, but still results in a signal delivery, while
|
|
/// [`Timer::cancel`] must perform a syscall, but will actually cancel the
|
|
/// signal.
|
|
#[allow(dead_code)]
|
|
pub fn schedule_cancellation(&mut self) {
|
|
if let Some(t) = self.inner_mut_noinit() {
|
|
t.schedule_cancellation();
|
|
}
|
|
}
|
|
|
|
/// Cancel pending timer notifications. This is idempotent.
|
|
///
|
|
/// If there was a previous call to [`Timer::enable_interval'], this
|
|
/// will prevent the delivery of that notification. This also has the effect
|
|
/// of reseting the "elapsed ticks." That is, if the current notification
|
|
/// duration is `N` ticks, then a full `N` ticks must elapse after the next
|
|
/// call to [`enable_interval`](Timer::enable_interval) before a
|
|
/// notification is delivered.
|
|
///
|
|
/// See [`Timer::schedule_cancellation`] for a comparison with this
|
|
/// method.
|
|
#[allow(dead_code)]
|
|
pub fn cancel(&self) -> Result<(), Errno> {
|
|
self.inner_noinit().map(|t| t.cancel()).unwrap_or(Ok(()))
|
|
}
|
|
|
|
/// Perform finalization actions on requests for timer events before guest
|
|
/// resumption. See the module-level documentation for rules about when this can and
|
|
/// should be called.
|
|
///
|
|
/// Currently, this will, if necessary, `tgkill` a timer signal to the guest
|
|
/// thread.
|
|
pub fn finalize_requests(&self) {
|
|
if let Some(t) = self.inner_noinit() {
|
|
t.finalize_requests();
|
|
}
|
|
}
|
|
|
|
/// When a signal is received, this method drives the timer event to
|
|
/// completion via single stepping, after checking that the signal was meant
|
|
/// for this specific timer. This *must* be called when a timer signal is
|
|
/// received for correctness.
|
|
///
|
|
/// Preconditions: task is in signal-delivery-stop.
|
|
/// Postconditions: if a signal meant for this timer was the cause of the
|
|
/// stop, the tracee will be at the precise instruction the timer event
|
|
/// should fire at.
|
|
pub async fn handle_signal(&mut self, task: Stopped) -> Result<Stopped, HandleFailure> {
|
|
match self.inner_mut_noinit() {
|
|
Some(t) => t.handle_signal(task).await,
|
|
None => {
|
|
warn!("Stray SIGSTKFLT indicates a bug!");
|
|
Err(HandleFailure::ImproperSignal(task))
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
/// The lazy-initialized part of a `Timer` that holds the functionality.
|
|
#[derive(Debug)]
|
|
struct TimerImpl {
|
|
/// A non-resetting counter functioning as a thread-local clock.
|
|
clock: PerfCounter,
|
|
|
|
/// A separate counter used to generate signals for timer events
|
|
timer: PerfCounter,
|
|
|
|
/// Information about the active timer event, including expected counter
|
|
/// values.
|
|
event: ActiveEvent,
|
|
|
|
/// The cancellation status of the active timer event.
|
|
timer_status: EventStatus,
|
|
|
|
/// Whether or not the active timer event requires an artificial signal
|
|
send_artificial_signal: bool,
|
|
|
|
/// Pid (tgid) of the monitored thread
|
|
guest_pid: Pid,
|
|
|
|
/// Tid of the monitored thread
|
|
guest_tid: Tid,
|
|
}
|
|
|
|
/// Tracks cancellation status of a timer event in response to other reverie
|
|
/// events.
|
|
///
|
|
/// Whenever a reverie event occurs, this should tick "forward" once. If the
|
|
/// timer signal is first to occur, then the cancellation will be pending, and
|
|
/// the event will fire. If instead some other event occured, the tick will
|
|
/// result in `Cancelled` and the event will not fire.
|
|
#[derive(Debug, Copy, Clone, PartialEq, Eq)]
|
|
enum EventStatus {
|
|
Scheduled,
|
|
Armed,
|
|
Cancelled,
|
|
}
|
|
|
|
#[derive(Debug, Copy, Clone, PartialEq, Eq)]
|
|
enum ActiveEvent {
|
|
Precise {
|
|
/// Expected clock value when event fires.
|
|
clock_target: u64,
|
|
/// Instruction offset from clock target
|
|
offset: u64,
|
|
},
|
|
Imprecise {
|
|
/// Expected minimum clock value when event fires.
|
|
clock_min: u64,
|
|
},
|
|
}
|
|
|
|
impl ActiveEvent {
|
|
/// Given the current clock, determine if another event is required to get the
|
|
/// clock to its expected state
|
|
fn reschedule_if_spurious_wakeup(&self, curr_clock: u64) -> Option<TimerEventRequest> {
|
|
match self {
|
|
ActiveEvent::Precise {
|
|
clock_target,
|
|
offset: _,
|
|
} => {
|
|
if clock_target.saturating_sub(curr_clock) > MAX_SINGLE_STEP_COUNT {
|
|
Some(TimerEventRequest::Precise(*clock_target - curr_clock))
|
|
} else {
|
|
None
|
|
}
|
|
}
|
|
ActiveEvent::Imprecise { clock_min } => {
|
|
if *clock_min > curr_clock {
|
|
Some(TimerEventRequest::Imprecise(*clock_min - curr_clock))
|
|
} else {
|
|
None
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
impl EventStatus {
|
|
pub fn next(self) -> Self {
|
|
match self {
|
|
EventStatus::Scheduled => EventStatus::Armed,
|
|
EventStatus::Armed => EventStatus::Cancelled,
|
|
EventStatus::Cancelled => EventStatus::Cancelled,
|
|
}
|
|
}
|
|
|
|
pub fn tick(&mut self) {
|
|
*self = self.next()
|
|
}
|
|
}
|
|
|
|
/// This is the experimentally determined maximum number of RCBs an overflow
|
|
/// interrupt is delivered after the originating RCB.
|
|
///
|
|
/// If this is number is too small, timer event delivery will be delayed and
|
|
/// non-deterministic, which, if observed, will result in a panic.
|
|
/// If this number is too big, we degrade performance from excessive single
|
|
/// stepping.
|
|
///
|
|
/// `rr` uses a value of 100 for almost all platforms, but with precise_ip = 0.
|
|
/// Enabling Intel PEBS via precise_ip > 0 seems to reduce observed skid by 1/2,
|
|
/// in synthetic benchmarks, though it makes counter _values_ incorrect. As a
|
|
/// result, we choose 60.
|
|
const SKID_MARGIN_RCBS: u64 = 60;
|
|
|
|
/// We refuse to schedule a "perf timeout" for this or fewer RCBs, instead
|
|
/// choosing to directly single step. This is because I am somewhat paranoid
|
|
/// about perf event throttling, which isn't well-documented.
|
|
const SINGLESTEP_TIMEOUT_RCBS: u64 = 5;
|
|
|
|
/// The maximum single step count we expect can occur when a precise timer event
|
|
/// is requested that leaves less than the minimum perf timeout remaining.
|
|
const MAX_SINGLE_STEP_COUNT: u64 = SKID_MARGIN_RCBS + SINGLESTEP_TIMEOUT_RCBS;
|
|
|
|
/// This ClockCounter represents a pair in a form of (rcb, instr) that gets increased
|
|
/// while single-stepping to reach target (target_rcb, target_instr)
|
|
#[derive(Debug, Eq, PartialEq)]
|
|
struct ClockCounter {
|
|
rcbs: u64,
|
|
instr: u64,
|
|
target_rcb: u64,
|
|
}
|
|
|
|
impl std::fmt::Display for ClockCounter {
|
|
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
|
write!(f, "rcb: {}, instr: {}", self.rcbs, self.instr)
|
|
}
|
|
}
|
|
|
|
impl ClockCounter {
|
|
pub fn new(rcb: u64, instr: u64, target_rcb: u64) -> Self {
|
|
Self {
|
|
rcbs: rcb,
|
|
instr,
|
|
target_rcb,
|
|
}
|
|
}
|
|
|
|
/// This method counts instructions & rcbs together in an attempt to reach target_rcb
|
|
///
|
|
/// With each attempt we either increment rcb or instruction counter based on the read clock value.
|
|
/// If we reach target_rcb we no longer increase rcb counter and allow to meet at the target instruction counter
|
|
pub fn single_step_with_clock(&mut self, rcbs: u64) {
|
|
match (self.rcbs.cmp(&self.target_rcb), self.rcbs.cmp(&rcbs)) {
|
|
(Less, Less) => {
|
|
self.instr = 0;
|
|
self.rcbs = rcbs;
|
|
}
|
|
|
|
(Less | Equal, Equal) => {
|
|
self.instr += 1;
|
|
}
|
|
|
|
(Equal, Less) => {
|
|
self.instr += 1;
|
|
}
|
|
|
|
(_, Greater) => panic!(
|
|
"current counter rcb value {} is greater than privided rcb value {}",
|
|
self.rcbs, rcbs
|
|
),
|
|
(Greater, _) => panic!(
|
|
"current counter rcb value {} is greater than target rcb value {}",
|
|
self.rcbs, self.target_rcb
|
|
),
|
|
}
|
|
}
|
|
|
|
/// If a counter behind a given (rcb, instr) pair.
|
|
///
|
|
/// Note: this is not always comparable. [None] will be returned in this case
|
|
fn is_behind(&self, rcbs: u64, instr: u64) -> Option<bool> {
|
|
match self.target_rcb.cmp(&rcbs) {
|
|
Less => None,
|
|
Greater | Equal => match self.rcbs.cmp(&rcbs) {
|
|
Less => Some(true),
|
|
Equal => Some(self.instr < instr),
|
|
Greater => Some(false),
|
|
},
|
|
}
|
|
}
|
|
|
|
fn rcbs(&self) -> u64 {
|
|
self.rcbs
|
|
}
|
|
}
|
|
|
|
impl TimerImpl {
|
|
pub fn new(guest_pid: Pid, guest_tid: Tid) -> Result<Self, Errno> {
|
|
let evt = Event::Raw(get_rcb_perf_config());
|
|
|
|
// measure the target tid irrespective of CPU
|
|
let mut builder = Builder::new(guest_tid.as_raw(), -1);
|
|
builder
|
|
.sample_period(PerfCounter::DISABLE_SAMPLE_PERIOD)
|
|
.event(evt);
|
|
|
|
if has_precise_ip() {
|
|
// set precise_ip to lowest value to enable PEBS (TODO: AMD?)
|
|
builder.precise_ip(1);
|
|
}
|
|
|
|
let timer = builder.check_for_pmu_bugs().create()?;
|
|
timer.set_signal_delivery(guest_tid, MARKER_SIGNAL)?;
|
|
timer.reset()?;
|
|
// measure the target tid irrespective of CPU
|
|
let clock = Builder::new(guest_tid.as_raw(), -1)
|
|
// counting event
|
|
.sample_period(0)
|
|
.event(evt)
|
|
.fast_reads(true)
|
|
.create()?;
|
|
clock.reset()?;
|
|
clock.enable()?;
|
|
|
|
Ok(Self {
|
|
timer,
|
|
clock,
|
|
event: ActiveEvent::Precise {
|
|
clock_target: 0,
|
|
offset: 0,
|
|
},
|
|
timer_status: EventStatus::Cancelled,
|
|
send_artificial_signal: false,
|
|
guest_pid,
|
|
guest_tid,
|
|
})
|
|
}
|
|
|
|
pub fn request_event(&mut self, evt: TimerEventRequest) -> Result<(), Errno> {
|
|
let (delivery, notification) = match evt {
|
|
TimerEventRequest::Precise(ticks) | TimerEventRequest::PreciseInstruction(ticks, _) => {
|
|
(ticks, ticks.saturating_sub(SKID_MARGIN_RCBS))
|
|
}
|
|
TimerEventRequest::Imprecise(ticks) => (ticks, ticks),
|
|
};
|
|
if delivery == 0 {
|
|
return Err(Errno::EINVAL); // bail before setting timer
|
|
}
|
|
self.send_artificial_signal = if notification <= SINGLESTEP_TIMEOUT_RCBS {
|
|
// If there's an existing event making use of the timer counter,
|
|
// we need to "overwrite" it the same way setting an actual RCB
|
|
// notification does.
|
|
self.timer.disable()?;
|
|
true
|
|
} else {
|
|
self.timer.reset()?;
|
|
self.timer.set_period(notification)?;
|
|
self.timer.enable()?;
|
|
false
|
|
};
|
|
let clock = self.read_clock() + delivery;
|
|
self.event = match evt {
|
|
TimerEventRequest::Precise(_) => ActiveEvent::Precise {
|
|
clock_target: clock,
|
|
offset: 0,
|
|
},
|
|
TimerEventRequest::PreciseInstruction(_, instr_offset) => ActiveEvent::Precise {
|
|
clock_target: clock,
|
|
offset: instr_offset,
|
|
},
|
|
TimerEventRequest::Imprecise(_) => ActiveEvent::Imprecise { clock_min: clock },
|
|
};
|
|
self.timer_status = EventStatus::Scheduled;
|
|
Ok(())
|
|
}
|
|
|
|
pub fn observe_event(&mut self) {
|
|
self.timer_status.tick()
|
|
}
|
|
|
|
pub fn schedule_cancellation(&mut self) {
|
|
self.timer_status = EventStatus::Cancelled;
|
|
}
|
|
|
|
pub fn cancel(&self) -> Result<(), Errno> {
|
|
self.timer.disable()
|
|
}
|
|
|
|
fn is_timer_generated_signal(signal: &libc::siginfo_t) -> bool {
|
|
// The signal that gets sent is SIGPOLL. We reconfigured the signal
|
|
// number, but the struct info is the same. Per the perf manpage, signal
|
|
// notifications will come indicating either POLL_IN or POLL_HUP.
|
|
signal.si_signo == MARKER_SIGNAL as i32
|
|
&& (signal.si_code == i32::from(libc::POLLIN)
|
|
|| signal.si_code == i32::from(libc::POLLHUP))
|
|
}
|
|
|
|
fn generated_signal(&self, signal: &libc::siginfo_t) -> bool {
|
|
signal.si_signo == MARKER_SIGNAL as i32
|
|
// If we sent an artificial signal, it doesn't have any siginfo
|
|
&& (self.send_artificial_signal
|
|
// If not, the fd should match. This could possibly lead to a
|
|
// collision, because an fd comparing-equal to this one in another
|
|
// process could also send a signal. However, that it would also do so
|
|
// as SIGSTKFLT is effectively not going to happen.
|
|
|| (Self::is_timer_generated_signal(signal)
|
|
&& get_si_fd(signal) == self.timer.raw_fd()))
|
|
}
|
|
|
|
pub fn read_clock(&self) -> u64 {
|
|
self.clock.ctr_value_fast().expect("Failed to read clock")
|
|
}
|
|
|
|
pub fn finalize_requests(&self) {
|
|
if self.send_artificial_signal {
|
|
debug!("Sending artificial timer signal");
|
|
|
|
// Give the guest a kick via an "artificial signal". This gives us something
|
|
// to handle in `handle_signal` and thus drives single-stepping.
|
|
Errno::result(unsafe {
|
|
libc::syscall(
|
|
libc::SYS_tgkill,
|
|
self.guest_pid.as_raw(),
|
|
self.guest_tid.as_raw(),
|
|
MARKER_SIGNAL as i32,
|
|
)
|
|
})
|
|
.expect("Timer tgkill error indicates a bug");
|
|
}
|
|
}
|
|
|
|
pub async fn handle_signal(&mut self, task: Stopped) -> Result<Stopped, HandleFailure> {
|
|
let signal = task.getsiginfo()?;
|
|
if !self.generated_signal(&signal) {
|
|
warn!(
|
|
?signal,
|
|
"Passed a signal that wasn't for this timer, likely indicating a bug!",
|
|
);
|
|
return Err(HandleFailure::ImproperSignal(task));
|
|
}
|
|
|
|
match self.timer_status {
|
|
EventStatus::Scheduled => panic!(
|
|
"Timer event status should tick at least once before the signal \
|
|
is handled. This is a bug!"
|
|
),
|
|
EventStatus::Armed => {}
|
|
EventStatus::Cancelled => {
|
|
debug!("Delivered timer signal cancelled due to status");
|
|
self.disable_timer_before_stepping();
|
|
return Err(HandleFailure::Cancelled(task));
|
|
}
|
|
};
|
|
|
|
// At this point, we've decided that a timer event is to be delivered.
|
|
|
|
// Ensure any new timer signals don't mess with us while single-stepping
|
|
self.disable_timer_before_stepping();
|
|
|
|
// Last check to see if this an unexpected wakeup (a signal before the minimum expected)
|
|
let ctr = self.read_clock();
|
|
|
|
if let Some(additional_timer_request) = self.event.reschedule_if_spurious_wakeup(ctr) {
|
|
debug!("Spurious wakeup - rescheduling new timer event");
|
|
if let Err(errno) = self.request_event(additional_timer_request) {
|
|
warn!(
|
|
"Attempted to reschedule a timer signal after an early wakeup, but failed with - {:?}. A panic will likely follow",
|
|
errno
|
|
);
|
|
} else {
|
|
return Err(HandleFailure::Cancelled(task));
|
|
};
|
|
}
|
|
|
|
// Before we drive the event to completion, clear `send_artificial_signal` flag so that:
|
|
// - another signal isn't generated anytime Timer::finalize_requests() is called
|
|
// - spurious SIGSTKFLTs aren't let errantly let through
|
|
// Cancellations should prevent spurious timer events in any case.
|
|
self.send_artificial_signal = false;
|
|
|
|
match self.event {
|
|
ActiveEvent::Precise {
|
|
clock_target,
|
|
offset,
|
|
} => {
|
|
self.attempt_single_step(task, ctr, clock_target, offset)
|
|
.await
|
|
}
|
|
ActiveEvent::Imprecise { clock_min } => {
|
|
debug!(
|
|
"Imprecise timer event delivered. Ctr val: {}, min val: {}",
|
|
ctr, clock_min
|
|
);
|
|
assert!(ctr >= clock_min, "ctr = {}, clock_min = {}", ctr, clock_min);
|
|
Ok(task)
|
|
}
|
|
}
|
|
}
|
|
|
|
async fn attempt_single_step(
|
|
&self,
|
|
task: Stopped,
|
|
ctr_initial: u64,
|
|
target_rcb: u64,
|
|
target_instr: u64,
|
|
) -> Result<Stopped, HandleFailure> {
|
|
assert!(
|
|
ctr_initial <= target_rcb,
|
|
"Clock perf counter exceeds target value at start of attempted single-step: \
|
|
{} > {}. Consider increasing SKID_MARGIN_RCBS.",
|
|
ctr_initial,
|
|
target_rcb
|
|
);
|
|
let mut current = ClockCounter::new(ctr_initial, 0, target_rcb);
|
|
assert!(
|
|
target_rcb - current.rcbs() <= MAX_SINGLE_STEP_COUNT,
|
|
"Single steps from {} to {} requested ({} steps), but that exceeds the skid margin + minimum perf timer steps ({}). \
|
|
This probably indicates a bug",
|
|
current.rcbs(),
|
|
target_rcb,
|
|
(target_rcb - current.rcbs()),
|
|
MAX_SINGLE_STEP_COUNT
|
|
);
|
|
debug!(
|
|
"Timer will single-step from ctr {} to {}",
|
|
current, target_rcb
|
|
);
|
|
let mut task = task;
|
|
loop {
|
|
if !current
|
|
.is_behind(target_rcb, target_instr)
|
|
.expect("counter should increase monotonically and stay at target_rcb until equal. This is most likely a BUG with counter tracking")
|
|
{
|
|
break;
|
|
}
|
|
#[cfg(target_arch = "x86_64")]
|
|
trace!(
|
|
"[instruction]\n{}\n{}",
|
|
crate::decoder::decode_instruction(&task)?,
|
|
task.getregs()?
|
|
.display_with_options(RegDisplayOptions { multiline: true })
|
|
);
|
|
task = match task.step(None)?.next_state().await? {
|
|
// a successful single step results in SIGTRAP stop
|
|
Wait::Stopped(new_task, TraceEvent::Signal(Signal::SIGTRAP)) => new_task,
|
|
wait => return Err(HandleFailure::Event(wait)),
|
|
};
|
|
current.single_step_with_clock(self.read_clock());
|
|
}
|
|
Ok(task)
|
|
}
|
|
|
|
/// Imagine our skid margin is 50 RCBs, and we set the timer for 5 RCBs.
|
|
/// Since we step for 50, the timer will trigger multiple times unless we
|
|
/// disable it before stepping. This would count as a state machine
|
|
/// transition and errantly cancel the delivery of the timer event.
|
|
fn disable_timer_before_stepping(&self) {
|
|
self.timer
|
|
.disable()
|
|
.expect("Must be able to disable timer before stepping");
|
|
}
|
|
}
|
|
|
|
#[cfg(target_os = "linux")]
|
|
fn get_si_fd(signal: &libc::siginfo_t) -> libc::c_int {
|
|
// This almost certainly broken for anything other than linux (glibc?).
|
|
//
|
|
// The `libc` crate doesn't expose these fields properly, because the
|
|
// current version was released before union support, and `siginfo_t` is a
|
|
// messy enum/union, making this super fragile.
|
|
//
|
|
// `libc` has an accessor system in place, but only for a few particular
|
|
// signal types as of right now. We could submit a PR for SIGPOLL/SIGIO, but
|
|
// until then, this is copies the currently used accessor idea.
|
|
|
|
#[repr(C)]
|
|
#[derive(Copy, Clone)]
|
|
struct sifields_sigpoll {
|
|
si_band: libc::c_long,
|
|
si_fd: libc::c_int,
|
|
}
|
|
#[repr(C)]
|
|
union sifields {
|
|
_align_pointer: *mut libc::c_void,
|
|
sigpoll: sifields_sigpoll,
|
|
}
|
|
#[repr(C)]
|
|
struct siginfo_f {
|
|
_siginfo_base: [libc::c_int; 3],
|
|
sifields: sifields,
|
|
padding: [libc::c_int; 24],
|
|
}
|
|
|
|
// These compile to no-op or unconditional runtime panic, which is good,
|
|
// because code not using timers continues to work.
|
|
assert_eq!(
|
|
core::mem::size_of::<siginfo_f>(),
|
|
core::mem::size_of_val(signal),
|
|
);
|
|
assert_eq!(
|
|
core::mem::align_of::<siginfo_f>(),
|
|
core::mem::align_of_val(signal),
|
|
);
|
|
|
|
unsafe {
|
|
(*(signal as *const _ as *const siginfo_f))
|
|
.sifields
|
|
.sigpoll
|
|
.si_fd
|
|
}
|
|
}
|
|
|
|
#[cfg(test)]
|
|
mod tests {
|
|
use test_case::test_case;
|
|
|
|
use super::ClockCounter;
|
|
|
|
#[test_case(ClockCounter::new(0, 0, 10), 0, 1, Some(true))]
|
|
#[test_case(ClockCounter::new(2, 100, 200), 3, 0, Some(true))]
|
|
#[test_case(ClockCounter::new(1, 10, 200), 1, 11, Some(true))]
|
|
#[test_case(ClockCounter::new(2, 100, 2), 3, 0, None)]
|
|
#[test_case(ClockCounter::new(4, 4, 4), 4, 5, Some(true))]
|
|
#[test_case(ClockCounter::new(4, 4, 4), 4, 3, Some(false))]
|
|
#[test_case(ClockCounter::new(4, 4, 4), 4, 4, Some(false))]
|
|
fn test_clock_counter_is_behind(
|
|
counter: ClockCounter,
|
|
target_rcb: u64,
|
|
target_instr: u64,
|
|
expected: Option<bool>,
|
|
) {
|
|
assert_eq!(counter.is_behind(target_rcb, target_instr), expected);
|
|
}
|
|
|
|
#[test_case(ClockCounter::new(0, 0, 0), 0, (0, 1))]
|
|
#[test_case(ClockCounter::new(0, 1, 0), 1, (0, 2))]
|
|
#[test_case(ClockCounter::new(0, 1, 0), 2, (0, 2))]
|
|
#[test_case(ClockCounter::new(0, 1, 1), 0, (0, 2))]
|
|
#[test_case(ClockCounter::new(0, 1, 1), 1, (1, 0))]
|
|
#[test_case(ClockCounter::new(0, 1, 1), 2, (2, 0))]
|
|
#[test_case(ClockCounter::new(0, 1, 1), 3, (3, 0))]
|
|
#[test_case(ClockCounter::new(10, 0, 11), 10, (10, 1))]
|
|
#[test_case(ClockCounter::new(10, 1, 11), 10, (10, 2))]
|
|
#[test_case(ClockCounter::new(10, 1, 11), 11, (11, 0))]
|
|
#[test_case(ClockCounter::new(10, 1, 11), 12, (12, 0))]
|
|
fn test_increment_counter_with_clock(
|
|
mut counter: ClockCounter,
|
|
new_clock: u64,
|
|
expected: (u64, u64),
|
|
) {
|
|
counter.single_step_with_clock(new_clock);
|
|
assert_eq!((counter.rcbs, counter.instr), expected);
|
|
}
|
|
}
|