Skip to content

Commit

Permalink
Merge branch 'main' into local-version-semantics
Browse files Browse the repository at this point in the history
  • Loading branch information
charliermarsh committed Nov 5, 2024
2 parents 3f107aa + d238642 commit ad1b4e4
Show file tree
Hide file tree
Showing 15 changed files with 368 additions and 137 deletions.
79 changes: 52 additions & 27 deletions crates/uv-pep440/src/version.rs
Original file line number Diff line number Diff line change
Expand Up @@ -849,12 +849,12 @@ impl FromStr for Version {
/// `min, .devN, aN, bN, rcN, <no suffix>, .postN, max`.
/// Its representation is thus:
/// * The most significant 3 bits of Byte 2 corresponds to a value in
/// the range 0-7 inclusive, corresponding to min, dev, pre-a, pre-b, pre-rc,
/// no-suffix or post releases, respectively. `min` is a special version that
/// does not exist in PEP 440, but is used here to represent the smallest
/// possible version, preceding any `dev`, `pre`, `post` or releases. `max` is
/// an analogous concept for the largest possible version, following any `post`
/// or local releases.
/// the range 0-7 inclusive, corresponding to min, dev, pre-a, pre-b,
/// pre-rc, no-suffix, post or max releases, respectively. `min` is a
/// special version that does not exist in PEP 440, but is used here to
/// represent the smallest possible version, preceding any `dev`, `pre`,
/// `post` or releases. `max` is an analogous concept for the largest
/// possible version, following any `post` or local releases.
/// * The low 5 bits combined with the bits in bytes 1 and 0 correspond
/// to the release number of the suffix, if one exists. If there is no
/// suffix, then these bits are always 0.
Expand Down Expand Up @@ -913,6 +913,20 @@ struct VersionSmall {
}

impl VersionSmall {
// Constants for each suffix kind. They form an enumeration.
//
// The specific values are assigned in a way that provides the suffix kinds
// their ordering. i.e., No suffix should sort after a dev suffix but
// before a post suffix.
//
// The maximum possible suffix value is SUFFIX_KIND_MASK. If you need to
// add another suffix value and you're at the max, then the mask must gain
// another bit. And adding another bit to the mask will require taking it
// from somewhere else. (Usually the suffix version.)
//
// NOTE: If you do change the bit format here, you'll need to bump any
// cache versions in uv that use rkyv with `Version` in them. That includes
// *at least* the "simple" cache.
const SUFFIX_MIN: u64 = 0;
const SUFFIX_DEV: u64 = 1;
const SUFFIX_PRE_ALPHA: u64 = 2;
Expand All @@ -921,12 +935,29 @@ impl VersionSmall {
const SUFFIX_NONE: u64 = 5;
const SUFFIX_POST: u64 = 6;
const SUFFIX_MAX: u64 = 7;
const SUFFIX_MAX_VERSION: u64 = 0x001F_FFFF;

// The mask to get only the release segment bits.
//
// NOTE: If you change the release mask to have more or less bits,
// then you'll also need to change `push_release` below and also
// `Parser::parse_fast`.
const SUFFIX_RELEASE_MASK: u64 = 0xFFFF_FFFF_FF00_0000;
// The mask to get the version suffix.
const SUFFIX_VERSION_MASK: u64 = 0x001F_FFFF;
// The number of bits used by the version suffix. Shifting the `repr`
// right by this number of bits should put the suffix kind in the least
// significant bits.
const SUFFIX_VERSION_BIT_LEN: u64 = 21;
// The mask to get only the suffix kind, after shifting right by the
// version bits. If you need to add a bit here, then you'll probably need
// to take a bit from the suffix version. (Which requires a change to both
// the mask and the bit length above.)
const SUFFIX_KIND_MASK: u64 = 0b111;

#[inline]
fn new() -> Self {
Self {
repr: 0x0000_0000_00A0_0000,
repr: Self::SUFFIX_NONE << Self::SUFFIX_VERSION_BIT_LEN,
release: [0, 0, 0, 0],
len: 0,
}
Expand Down Expand Up @@ -954,7 +985,7 @@ impl VersionSmall {

#[inline]
fn clear_release(&mut self) {
self.repr &= !0xFFFF_FFFF_FF00_0000;
self.repr &= !Self::SUFFIX_RELEASE_MASK;
self.release = [0, 0, 0, 0];
self.len = 0;
}
Expand Down Expand Up @@ -1007,7 +1038,7 @@ impl VersionSmall {
self.set_suffix_kind(Self::SUFFIX_NONE);
}
Some(number) => {
if number > Self::SUFFIX_MAX_VERSION {
if number > Self::SUFFIX_VERSION_MASK {
return false;
}
self.set_suffix_kind(Self::SUFFIX_POST);
Expand Down Expand Up @@ -1054,7 +1085,7 @@ impl VersionSmall {
self.set_suffix_kind(Self::SUFFIX_NONE);
}
Some(Prerelease { kind, number }) => {
if number > Self::SUFFIX_MAX_VERSION {
if number > Self::SUFFIX_VERSION_MASK {
return false;
}
match kind {
Expand Down Expand Up @@ -1097,7 +1128,7 @@ impl VersionSmall {
self.set_suffix_kind(Self::SUFFIX_NONE);
}
Some(number) => {
if number > Self::SUFFIX_MAX_VERSION {
if number > Self::SUFFIX_VERSION_MASK {
return false;
}
self.set_suffix_kind(Self::SUFFIX_DEV);
Expand Down Expand Up @@ -1130,7 +1161,7 @@ impl VersionSmall {
self.set_suffix_kind(Self::SUFFIX_NONE);
}
Some(number) => {
if number > Self::SUFFIX_MAX_VERSION {
if number > Self::SUFFIX_VERSION_MASK {
return false;
}
self.set_suffix_kind(Self::SUFFIX_MIN);
Expand Down Expand Up @@ -1163,7 +1194,7 @@ impl VersionSmall {
self.set_suffix_kind(Self::SUFFIX_NONE);
}
Some(number) => {
if number > Self::SUFFIX_MAX_VERSION {
if number > Self::SUFFIX_VERSION_MASK {
return false;
}
self.set_suffix_kind(Self::SUFFIX_MAX);
Expand All @@ -1183,30 +1214,30 @@ impl VersionSmall {

#[inline]
fn suffix_kind(&self) -> u64 {
let kind = (self.repr >> 21) & 0b111;
let kind = (self.repr >> Self::SUFFIX_VERSION_BIT_LEN) & Self::SUFFIX_KIND_MASK;
debug_assert!(kind <= Self::SUFFIX_MAX);
kind
}

#[inline]
fn set_suffix_kind(&mut self, kind: u64) {
debug_assert!(kind <= Self::SUFFIX_MAX);
self.repr &= !0x00E0_0000;
self.repr |= kind << 21;
self.repr &= !(Self::SUFFIX_KIND_MASK << Self::SUFFIX_VERSION_BIT_LEN);
self.repr |= kind << Self::SUFFIX_VERSION_BIT_LEN;
if kind == Self::SUFFIX_NONE {
self.set_suffix_version(0);
}
}

#[inline]
fn suffix_version(&self) -> u64 {
self.repr & Self::SUFFIX_MAX_VERSION
self.repr & Self::SUFFIX_VERSION_MASK
}

#[inline]
fn set_suffix_version(&mut self, value: u64) {
debug_assert!(value <= Self::SUFFIX_MAX_VERSION);
self.repr &= !Self::SUFFIX_MAX_VERSION;
debug_assert!(value <= Self::SUFFIX_VERSION_MASK);
self.repr &= !Self::SUFFIX_VERSION_MASK;
self.repr |= value;
}
}
Expand Down Expand Up @@ -1675,17 +1706,11 @@ impl<'a> Parser<'a> {
*release.get_mut(usize::from(len))? = cur;
len += 1;
let small = VersionSmall {
// Clippy warns about no-ops like `(0x00 << 16)`, but I
// think it makes the bit logic much clearer, and makes it
// explicit that nothing was forgotten.
#[allow(clippy::identity_op)]
repr: (u64::from(release[0]) << 48)
| (u64::from(release[1]) << 40)
| (u64::from(release[2]) << 32)
| (u64::from(release[3]) << 24)
| (0xA0 << 16)
| (0x00 << 8)
| (0x00 << 0),
| (VersionSmall::SUFFIX_NONE << VersionSmall::SUFFIX_VERSION_BIT_LEN),
release: [
u64::from(release[0]),
u64::from(release[1]),
Expand Down
39 changes: 12 additions & 27 deletions crates/uv-pep508/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -709,18 +709,17 @@ fn parse_url<T: Pep508Url>(
len += c.len_utf8();

// If we see a top-level semicolon or hash followed by whitespace, we're done.
match c {
';' if cursor.peek_char().is_some_and(char::is_whitespace) => {
break;
}
'#' if cursor.peek_char().is_some_and(char::is_whitespace) => {
if cursor.peek_char().is_some_and(|c| matches!(c, ';' | '#')) {
let mut cursor = cursor.clone();
cursor.next();
if cursor.peek_char().is_some_and(char::is_whitespace) {
break;
}
_ => {}
}
}
(start, len)
};

let url = cursor.slice(start, len);
if url.is_empty() {
return Err(Pep508Error {
Expand Down Expand Up @@ -927,8 +926,6 @@ fn parse_pep508_requirement<T: Pep508Url>(
}
};

let requirement_end = cursor.pos();

// If the requirement consists solely of a package name, and that name appears to be an archive,
// treat it as a URL requirement, for consistency and security. (E.g., `requests-2.26.0.tar.gz`
// is a valid Python package name, but we should treat it as a reference to a file.)
Expand Down Expand Up @@ -959,25 +956,13 @@ fn parse_pep508_requirement<T: Pep508Url>(

// wsp*
cursor.eat_whitespace();
if let Some((pos, char)) = cursor.next() {
if marker.is_none() {
if let Some(VersionOrUrl::Url(url)) = requirement_kind {
let url = url.to_string();
for c in [';', '#'] {
if url.ends_with(c) {
return Err(Pep508Error {
message: Pep508ErrorSource::String(format!(
"Missing space before '{c}', the end of the URL is ambiguous"
)),
start: requirement_end - c.len_utf8(),
len: c.len_utf8(),
input: cursor.to_string(),
});
}
}
}
}
let message = if marker.is_none() {

if let Some((pos, char)) = cursor.next().filter(|(_, c)| *c != '#') {
let message = if char == '#' {
format!(
r#"Expected end of input or `;`, found `{char}`; comments must be preceded by a leading space"#
)
} else if marker.is_none() {
format!(r#"Expected end of input or `;`, found `{char}`"#)
} else {
format!(r#"Expected end of input, found `{char}`"#)
Expand Down
12 changes: 0 additions & 12 deletions crates/uv-pep508/src/tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -545,18 +545,6 @@ fn error_extras_not_closed() {
);
}

#[test]
fn error_no_space_after_url() {
assert_snapshot!(
parse_pep508_err(r"name @ https://example.com/; extra == 'example'"),
@r#"
Missing space before ';', the end of the URL is ambiguous
name @ https://example.com/; extra == 'example'
^
"#
);
}

#[test]
fn error_name_at_nothing() {
assert_snapshot!(
Expand Down
37 changes: 10 additions & 27 deletions crates/uv-pep508/src/unnamed.rs
Original file line number Diff line number Diff line change
Expand Up @@ -160,7 +160,6 @@ fn parse_unnamed_requirement<Url: UnnamedRequirementUrl>(

// Parse the URL itself, along with any extras.
let (url, extras) = parse_unnamed_url::<Url>(cursor, working_dir)?;
let requirement_end = cursor.pos();

// wsp*
cursor.eat_whitespace();
Expand All @@ -175,23 +174,11 @@ fn parse_unnamed_requirement<Url: UnnamedRequirementUrl>(
// wsp*
cursor.eat_whitespace();
if let Some((pos, char)) = cursor.next() {
if marker.is_none() {
if let Some(given) = url.given() {
for c in [';', '#'] {
if given.ends_with(c) {
return Err(Pep508Error {
message: Pep508ErrorSource::String(format!(
"Missing space before '{c}', the end of the URL is ambiguous"
)),
start: requirement_end - c.len_utf8(),
len: c.len_utf8(),
input: cursor.to_string(),
});
}
}
}
}
let message = if marker.is_none() {
let message = if char == '#' {
format!(
r#"Expected end of input or `;`, found `{char}`; comments must be preceded by a leading space"#
)
} else if marker.is_none() {
format!(r#"Expected end of input or `;`, found `{char}`"#)
} else {
format!(r#"Expected end of input, found `{char}`"#)
Expand Down Expand Up @@ -405,15 +392,11 @@ fn parse_unnamed_url<Url: UnnamedRequirementUrl>(
len += c.len_utf8();

// If we see a top-level semicolon or hash followed by whitespace, we're done.
if depth == 0 {
match c {
';' if cursor.peek_char().is_some_and(char::is_whitespace) => {
break;
}
'#' if cursor.peek_char().is_some_and(char::is_whitespace) => {
break;
}
_ => {}
if depth == 0 && cursor.peek_char().is_some_and(|c| matches!(c, ';' | '#')) {
let mut cursor = cursor.clone();
cursor.next();
if cursor.peek_char().is_some_and(char::is_whitespace) {
break;
}
}
}
Expand Down
28 changes: 7 additions & 21 deletions crates/uv-pep508/src/verbatim_url.rs
Original file line number Diff line number Diff line change
Expand Up @@ -413,20 +413,12 @@ fn split_fragment(path: &Path) -> (Cow<Path>, Option<&str>) {
}

/// A supported URL scheme for PEP 508 direct-URL requirements.
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum Scheme {
/// `file://...`
File,
/// `git+git://...`
GitGit,
/// `git+http://...`
GitHttp,
/// `git+file://...`
GitFile,
/// `git+ssh://...`
GitSsh,
/// `git+https://...`
GitHttps,
/// `git+{transport}://...` as git supports arbitrary transports through gitremote-helpers
Git(String),
/// `bzr+http://...`
BzrHttp,
/// `bzr+https://...`
Expand Down Expand Up @@ -470,13 +462,11 @@ pub enum Scheme {
impl Scheme {
/// Determine the [`Scheme`] from the given string, if possible.
pub fn parse(s: &str) -> Option<Self> {
if let Some(("git", transport)) = s.split_once('+') {
return Some(Self::Git(transport.into()));
}
match s {
"file" => Some(Self::File),
"git+git" => Some(Self::GitGit),
"git+http" => Some(Self::GitHttp),
"git+file" => Some(Self::GitFile),
"git+ssh" => Some(Self::GitSsh),
"git+https" => Some(Self::GitHttps),
"bzr+http" => Some(Self::BzrHttp),
"bzr+https" => Some(Self::BzrHttps),
"bzr+ssh" => Some(Self::BzrSsh),
Expand Down Expand Up @@ -510,11 +500,7 @@ impl std::fmt::Display for Scheme {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Self::File => write!(f, "file"),
Self::GitGit => write!(f, "git+git"),
Self::GitHttp => write!(f, "git+http"),
Self::GitFile => write!(f, "git+file"),
Self::GitSsh => write!(f, "git+ssh"),
Self::GitHttps => write!(f, "git+https"),
Self::Git(transport) => write!(f, "git+{transport}"),
Self::BzrHttp => write!(f, "bzr+http"),
Self::BzrHttps => write!(f, "bzr+https"),
Self::BzrSsh => write!(f, "bzr+ssh"),
Expand Down
Loading

0 comments on commit ad1b4e4

Please sign in to comment.