Merge pull request #241 from matklad/oorandom

Switch from rand to oorandom
2025-02-02 09:46:06 +00:00 · 2020-07-29 12:11:46 -04:00 · 2020-07-29 12:11:46 -04:00 · 2e5b841405
commit 2e5b841405
parent 380c4c1dc8 35a420d0c0
3 changed files with 7 additions and 138 deletions
--- a/Cargo.toml
+++ b/Cargo.toml
@ -16,7 +16,7 @@ log = "0.4.5"
 parking_lot = "0.11.0"
 rustc-hash = "1.0"
 smallvec = "1.0.0"
-rand = { version = "0.7", features = [ "small_rng" ], default-features = false }
+oorandom = "11"

 salsa-macros = { version = "0.15.0", path = "components/salsa-macros" }

--- a/src/lru.rs
+++ b/src/lru.rs
@ -1,14 +1,10 @@
 use parking_lot::Mutex;
-use rand::rngs::SmallRng;
-use rand::Rng;
-use rand::SeedableRng;
+use oorandom::Rand64;
 use std::fmt::Debug;
 use std::sync::atomic::AtomicUsize;
 use std::sync::atomic::Ordering;
 use std::sync::Arc;

-mod test;
-
 /// A simple and approximate concurrent lru list.
 ///
 /// We assume but do not verify that each node is only used with one
@ -33,7 +29,7 @@ struct LruData<Node> {
    end_red_zone: usize,
    end_yellow_zone: usize,
    end_green_zone: usize,
-    rng: SmallRng,
+    rng: Rand64,
    entries: Vec<Arc<Node>>,
 }

@ -141,7 +137,7 @@ where
        Self::with_rng(rng_with_seed(seed_str))
    }

-    fn with_rng(rng: SmallRng) -> Self {
+    fn with_rng(rng: Rand64) -> Self {
        LruData {
            end_yellow_zone: 0,
            end_green_zone: 0,
@ -287,7 +283,7 @@ where

    fn pick_index(&mut self, zone: std::ops::Range<usize>) -> usize {
        let end_index = std::cmp::min(zone.end, self.entries.len());
-        self.rng.gen_range(zone.start, end_index)
+        self.rng.rand_range(zone.start as u64 .. end_index as u64) as usize
    }
 }

@ -317,12 +313,12 @@ impl LruIndex {
    }
 }

-fn rng_with_seed(seed_str: &str) -> SmallRng {
+fn rng_with_seed(seed_str: &str) -> Rand64 {
    let mut seed: [u8; 16] = [0; 16];
    for (i, &b) in seed_str.as_bytes().iter().take(16).enumerate() {
        seed[i] = b;
    }
-    SmallRng::from_seed(seed)
+    Rand64::new(u128::from_le_bytes(seed))
 }

 // A note on ordering:
--- a/src/lru/test.rs
+++ b/src/lru/test.rs
@ -1,127 +0,0 @@
-#![cfg(test)]
-
-use super::*;
-use linked_hash_map::LinkedHashMap;
-use rand_distr::{Distribution, Normal};
-
-#[derive(Debug)]
-struct TestNode {
-    id: usize,
-    index: LruIndex,
-}
-
-impl TestNode {
-    fn new(id: usize) -> Arc<Self> {
-        Arc::new(TestNode {
-            id,
-            index: Default::default(),
-        })
-    }
-}
-
-impl LruNode for TestNode {
-    fn lru_index(&self) -> &LruIndex {
-        &self.index
-    }
-}
-
-const PICK_SEED: &str = "Wippity WIP";
-
-/// Randomly requests nodes and compares the performance of a
-/// *perfect* LRU vs our more approximate version. Since all the
-/// random number generators use fixed seeds, these results are
-/// reproducible. Returns (oracle_hits, lru_hits) -- i.e., the number
-/// of times that the oracle had something in cache vs the number of
-/// times that our LRU did.
-fn compare(
-    standard_deviation: usize,
-    num_nodes: usize,
-    capacity: usize,
-    requests: usize,
-) -> (usize, usize) {
-    // Remember the clock each time we access a given element.
-    let mut last_access: Vec<usize> = (0..num_nodes).map(|_| 0).collect();
-
-    // Use a linked hash map as our *oracle* -- we track each node we
-    // requested and (as the value) the clock in which we requested
-    // it. When the capacity is exceed, we can pop the oldest.
-    let mut oracle = LinkedHashMap::new();
-
-    let lru = Lru::with_seed(super::LRU_SEED);
-    lru.set_lru_capacity(capacity);
-
-    let nodes: Vec<_> = (0..num_nodes).map(|i| TestNode::new(i)).collect();
-
-    let mut oracle_hits = 0;
-    let mut lru_hits = 0;
-
-    let mut pick_rng = super::rng_with_seed(PICK_SEED);
-    let normal = Normal::new((num_nodes / 2) as f64, standard_deviation as f64).unwrap();
-    for clock in (0..requests).map(|n| n + 1) {
-        let request_id = (normal.sample(&mut pick_rng) as usize).min(num_nodes - 1);
-        assert!(request_id < num_nodes);
-
-        last_access[request_id] = clock;
-
-        if oracle.contains_key(&request_id) {
-            oracle_hits += 1;
-        }
-
-        if nodes[request_id].index.is_in_lru() {
-            lru_hits += 1;
-        }
-
-        // maintain the oracle LRU
-        oracle.insert(request_id, ());
-        if oracle.len() > capacity {
-            oracle.pop_front().unwrap();
-        }
-
-        // maintain our own version
-        if let Some(lru_evicted) = lru.record_use(&nodes[request_id]) {
-            assert!(!lru_evicted.index.is_in_lru());
-        }
-    }
-
-    println!("oracle_hits = {}", oracle_hits);
-    println!("lru_hits = {}", lru_hits);
-    (oracle_hits, lru_hits)
-}
-
-// Compare performance of approximate LRU vs the perfect oracle in
-// various scenarios -- different standard deviations and total size.
-// Note that the `lru_hits` variable is just recording the current
-// state and would be expected to change if you tweak the
-// implementation (`oracle_hits` ought not to change).
-
-#[test]
-#[ignore] // these results seem to vary between CI and local machines, not sure why, maybe version of rand?
-fn scenario_20_of_1000() {
-    let (oracle_hits, lru_hits) = compare(20, 1000, 100, 10000);
-    assert_eq!(oracle_hits, 9662);
-    assert_eq!(lru_hits, 9428);
-}
-
-#[test]
-#[ignore] // these results seem to vary between CI and local machines, not sure why, maybe version of rand?
-fn scenario_200_of_1000() {
-    let (oracle_hits, lru_hits) = compare(200, 1000, 100, 10000);
-    assert_eq!(oracle_hits, 1496);
-    assert_eq!(lru_hits, 1488);
-}
-
-#[test]
-#[ignore] // these results seem to vary between CI and local machines, not sure why, maybe version of rand?
-fn scenario_500_of_1000() {
-    let (oracle_hits, lru_hits) = compare(500, 1000, 100, 10000);
-    assert_eq!(oracle_hits, 3835);
-    assert_eq!(lru_hits, 3839);
-}
-
-#[test]
-#[ignore] // these results seem to vary between CI and local machines, not sure why, maybe version of rand?
-fn scenario_2000_of_10000() {
-    let (oracle_hits, lru_hits) = compare(2000, 10000, 100, 10000);
-    assert_eq!(oracle_hits, 256);
-    assert_eq!(lru_hits, 229);
-}