settings: support human-readable byte sizes for max-new-file-size

This commit is contained in:
Benjamin Saunders 2023-08-11 12:52:58 -07:00
parent 351e7feef5
commit 6c4b8a7383
9 changed files with 154 additions and 23 deletions

1
Cargo.lock generated
View file

@ -1048,6 +1048,7 @@ dependencies = [
"rayon", "rayon",
"regex", "regex",
"rustix", "rustix",
"serde",
"serde_json", "serde_json",
"smallvec", "smallvec",
"strsim", "strsim",

View file

@ -335,9 +335,9 @@
"description": "Parameters governing automatic capture of files into the working copy commit", "description": "Parameters governing automatic capture of files into the working copy commit",
"properties": { "properties": {
"max-new-file-size": { "max-new-file-size": {
"type": "integer", "type": ["integer", "string"],
"description": "New files with a size in bytes above this threshold are not snapshotted, unless the threshold is 0", "description": "New files with a size in bytes above this threshold are not snapshotted, unless the threshold is 0",
"default": "1048576" "default": "1MiB"
} }
} }
} }

View file

@ -10,4 +10,4 @@ pager = { command = ["less", "-FRX"], env = { LESSCHARSET = "utf-8" } }
log-word-wrap = false log-word-wrap = false
[snapshot] [snapshot]
max-new-file-size = 1048576 max-new-file-size = "1MiB"

View file

@ -4,6 +4,7 @@ use std::sync::Mutex;
use std::time::{Duration, Instant}; use std::time::{Duration, Instant};
use crossterm::terminal::{Clear, ClearType}; use crossterm::terminal::{Clear, ClearType};
use jj_lib::fmt_util::binary_prefix;
use jj_lib::git; use jj_lib::git;
use jj_lib::repo_path::RepoPath; use jj_lib::repo_path::RepoPath;
@ -105,20 +106,6 @@ fn draw_progress(progress: f32, buffer: &mut String, width: usize) {
const UPDATE_HZ: u32 = 30; const UPDATE_HZ: u32 = 30;
const INITIAL_DELAY: Duration = Duration::from_millis(250); const INITIAL_DELAY: Duration = Duration::from_millis(250);
/// Find the smallest binary prefix with which the whole part of `x` is at most
/// three digits, and return the scaled `x` and that prefix.
fn binary_prefix(x: f32) -> (f32, &'static str) {
const TABLE: [&str; 9] = ["", "Ki", "Mi", "Gi", "Ti", "Pi", "Ei", "Zi", "Yi"];
let mut i = 0;
let mut scaled = x;
while scaled.abs() >= 1000.0 && i < TABLE.len() - 1 {
i += 1;
scaled /= 1024.0;
}
(scaled, TABLE[i])
}
struct RateEstimate { struct RateEstimate {
state: Option<RateEstimateState>, state: Option<RateEstimateState>,
} }

View file

@ -39,6 +39,7 @@ rand.workspace = true
rand_chacha.workspace = true rand_chacha.workspace = true
rayon.workspace = true rayon.workspace = true
regex.workspace = true regex.workspace = true
serde.workspace = true
serde_json.workspace = true serde_json.workspace = true
smallvec.workspace = true smallvec.workspace = true
strsim.workspace = true strsim.workspace = true

32
lib/src/fmt_util.rs Normal file
View file

@ -0,0 +1,32 @@
// Copyright 2023 The Jujutsu Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//! Common formatting helpers
/// Find the smallest binary prefix with which the whole part of `x` is at most
/// three digits, and return the scaled `x`, that prefix, and the associated
/// base-1024 exponent.
pub fn binary_prefix(x: f32) -> (f32, &'static str) {
/// Binary prefixes in ascending order, starting with the empty prefix. The
/// index of each prefix is the base-1024 exponent it represents.
const TABLE: [&str; 9] = ["", "Ki", "Mi", "Gi", "Ti", "Pi", "Ei", "Zi", "Yi"];
let mut i = 0;
let mut scaled = x;
while scaled.abs() >= 1000.0 && i < TABLE.len() - 1 {
i += 1;
scaled /= 1024.0;
}
(scaled, TABLE[i])
}

View file

@ -32,6 +32,7 @@ pub mod default_submodule_store;
pub mod diff; pub mod diff;
pub mod file_util; pub mod file_util;
pub mod files; pub mod files;
pub mod fmt_util;
pub mod fsmonitor; pub mod fsmonitor;
pub mod git; pub mod git;
pub mod git_backend; pub mod git_backend;

View file

@ -22,6 +22,7 @@ use rand::prelude::*;
use rand_chacha::ChaCha20Rng; use rand_chacha::ChaCha20Rng;
use crate::backend::{ChangeId, ObjectId, Signature, Timestamp}; use crate::backend::{ChangeId, ObjectId, Signature, Timestamp};
use crate::fmt_util::binary_prefix;
use crate::fsmonitor::FsmonitorKind; use crate::fsmonitor::FsmonitorKind;
#[derive(Debug, Clone)] #[derive(Debug, Clone)]
@ -198,7 +199,10 @@ impl UserSettings {
} }
pub fn max_new_file_size(&self) -> Result<u64, config::ConfigError> { pub fn max_new_file_size(&self) -> Result<u64, config::ConfigError> {
let cfg = self.config.get::<u64>("snapshot.max-new-file-size"); let cfg = self
.config
.get::<HumanByteSize>("snapshot.max-new-file-size")
.map(|x| x.0);
match cfg { match cfg {
Ok(0) => Ok(u64::MAX), Ok(0) => Ok(u64::MAX),
x @ Ok(_) => x, x @ Ok(_) => x,
@ -247,3 +251,107 @@ impl<T> ConfigResultExt<T> for Result<T, config::ConfigError> {
} }
} }
} }
/// A size in bytes optionally formatted/serialized with binary prefixes
#[derive(Debug, Copy, Clone, Eq, PartialEq, Ord, PartialOrd)]
pub struct HumanByteSize(pub u64);
impl std::fmt::Display for HumanByteSize {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
let (value, prefix) = binary_prefix(self.0 as f32);
write!(f, "{value:.1}{prefix}B")
}
}
impl<'de> serde::Deserialize<'de> for HumanByteSize {
fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
where
D: serde::Deserializer<'de>,
{
use serde::de::Error;
struct Visitor;
impl<'de> serde::de::Visitor<'de> for Visitor {
type Value = HumanByteSize;
fn expecting(&self, formatter: &mut std::fmt::Formatter) -> std::fmt::Result {
write!(formatter, "a size in bytes with an optional binary unit")
}
fn visit_u64<E>(self, v: u64) -> Result<Self::Value, E>
where
E: Error,
{
Ok(HumanByteSize(v))
}
fn visit_str<E>(self, v: &str) -> Result<Self::Value, E>
where
E: Error,
{
let bytes = parse_human_byte_size(v).map_err(Error::custom)?;
Ok(HumanByteSize(bytes))
}
}
if deserializer.is_human_readable() {
deserializer.deserialize_any(Visitor)
} else {
deserializer.deserialize_u64(Visitor)
}
}
}
fn parse_human_byte_size(v: &str) -> Result<u64, &str> {
let digit_end = v.find(|c: char| !c.is_ascii_digit()).unwrap_or(v.len());
if digit_end == 0 {
return Err("must start with a number");
}
let (digits, trailing) = v.split_at(digit_end);
let exponent = match trailing.trim_start() {
"" | "B" => 0,
unit => {
const PREFIXES: [char; 8] = ['K', 'M', 'G', 'T', 'P', 'E', 'Z', 'Y'];
let Some(prefix) = PREFIXES.iter().position(|&x| unit.starts_with(x)) else {
return Err("unrecognized unit prefix");
};
let ("" | "B" | "i" | "iB") = &unit[1..] else {
return Err("unrecognized unit");
};
prefix as u32 + 1
}
};
// A string consisting only of base 10 digits is either a valid u64 or really
// huge.
let factor = digits.parse::<u64>().unwrap_or(u64::MAX);
Ok(factor.saturating_mul(1024u64.saturating_pow(exponent)))
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn byte_size_parse() {
assert_eq!(parse_human_byte_size("0"), Ok(0));
assert_eq!(parse_human_byte_size("42"), Ok(42));
assert_eq!(parse_human_byte_size("42B"), Ok(42));
assert_eq!(parse_human_byte_size("42 B"), Ok(42));
assert_eq!(parse_human_byte_size("42K"), Ok(42 * 1024));
assert_eq!(parse_human_byte_size("42 K"), Ok(42 * 1024));
assert_eq!(parse_human_byte_size("42 KB"), Ok(42 * 1024));
assert_eq!(parse_human_byte_size("42 KiB"), Ok(42 * 1024));
assert_eq!(
parse_human_byte_size("42 LiB"),
Err("unrecognized unit prefix")
);
assert_eq!(parse_human_byte_size("42 KiC"), Err("unrecognized unit"));
assert_eq!(parse_human_byte_size("42 KC"), Err("unrecognized unit"));
assert_eq!(
parse_human_byte_size("KiB"),
Err("must start with a number")
);
assert_eq!(parse_human_byte_size(""), Err("must start with a number"));
}
}

View file

@ -55,6 +55,7 @@ use crate::merge::Merge;
use crate::merged_tree::MergedTree; use crate::merged_tree::MergedTree;
use crate::op_store::{OperationId, WorkspaceId}; use crate::op_store::{OperationId, WorkspaceId};
use crate::repo_path::{FsPathParseError, RepoPath, RepoPathComponent, RepoPathJoin}; use crate::repo_path::{FsPathParseError, RepoPath, RepoPathComponent, RepoPathJoin};
use crate::settings::HumanByteSize;
use crate::store::Store; use crate::store::Store;
use crate::tree::{Diff, Tree}; use crate::tree::{Diff, Tree};
@ -310,11 +311,11 @@ pub enum SnapshotError {
InternalBackendError(#[from] BackendError), InternalBackendError(#[from] BackendError),
#[error(transparent)] #[error(transparent)]
TreeStateError(#[from] TreeStateError), TreeStateError(#[from] TreeStateError),
#[error("New file {path} of size {size} exceeds snapshot.max-new-file-size ({max_size})")] #[error("New file {path} of size ~{size} exceeds snapshot.max-new-file-size ({max_size})")]
NewFileTooLarge { NewFileTooLarge {
path: PathBuf, path: PathBuf,
size: u64, size: HumanByteSize,
max_size: u64, max_size: HumanByteSize,
}, },
} }
@ -875,8 +876,8 @@ impl TreeState {
{ {
return Err(SnapshotError::NewFileTooLarge { return Err(SnapshotError::NewFileTooLarge {
path: entry.path().clone(), path: entry.path().clone(),
size: metadata.len(), size: HumanByteSize(metadata.len()),
max_size: max_new_file_size, max_size: HumanByteSize(max_new_file_size),
}); });
} }
if let Some(new_file_state) = file_state(&metadata) { if let Some(new_file_state) = file_state(&metadata) {