Skip to content

Commit

Permalink
First working version!
Browse files Browse the repository at this point in the history
  • Loading branch information
tonowak committed Jan 8, 2023
1 parent ee8162c commit cb43a45
Show file tree
Hide file tree
Showing 5 changed files with 91 additions and 118 deletions.
2 changes: 2 additions & 0 deletions scripts/run_on_popular_crates/.gitignore
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
crate_data
Cargo.lock
target
crate_current/Cargo.*
crate_baseline/Cargo.*
8 changes: 3 additions & 5 deletions scripts/run_on_popular_crates/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,8 @@ edition = "2021"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html

[dependencies]
reqwest = { version = "0.11.13", features = ["blocking"] }
flate2 = "1.0.25"
tar = "0.4.38"
anyhow = "1.0"
csv = "1.1"
serde = { version = "1", features = ["derive"] }
semver = "1"
crates_io_api = "0.8"
cargo_toml = "0.13.0"
toml = "0.5.9"
Empty file.
Empty file.
199 changes: 86 additions & 113 deletions scripts/run_on_popular_crates/src/main.rs
Original file line number Diff line number Diff line change
@@ -1,127 +1,100 @@
// use std::io;
// use std::fs::File;
// use core::error::Error;
use serde::Deserialize;
use std::collections::BTreeMap;
use std::path::Path;
use std::time::Duration;
use std::collections::HashMap;
use std::process::Command;
use std::io::Write;
use crates_io_api::{CratesQuery, Sort};

fn download_crates_data() -> String {
println!("Downloading db-dump.tar.gz (it can take a while)");
let resp = reqwest::blocking::get("https://static.crates.io/db-dump.tar.gz").unwrap();
let body = resp.bytes().unwrap();
/// To get the rustdoc of the baseline, we first create a placeholder project somewhere
/// with the baseline as a dependency, and run `cargo rustdoc` on it.
fn create_rustdoc_manifest_for_crate_version(
name: &str,
version: &str,
features: &HashMap<String, Vec<String>>
) -> cargo_toml::Manifest<()> {
use cargo_toml::*;

println!("Unpacking db-dump.tar.gz");
let tar = flate2::read::GzDecoder::new(body.as_ref());
let mut archive = tar::Archive::new(tar);
let crate_data_dir = "crate_data/";
let _ = std::fs::remove_dir_all(crate_data_dir);
archive.unpack(crate_data_dir).unwrap();
let mut paths = std::fs::read_dir(crate_data_dir).unwrap();
paths
.next()
.unwrap()
.unwrap()
.path()
.to_str()
.unwrap()
.to_string()
Manifest::<()> {
package: {
let mut package = Package::new(name, version);
package.publish = Inheritable::Set(Publish::Flag(false));
Some(package)
},
workspace: Some(Workspace::<()>::default()),
lib: {
let product = Product {
path: Some("lib.rs".to_string()),
..Product::default()
};
Some(product)
},
dependencies: {
let project_with_features = DependencyDetail {
version: Some(version.to_string()),
features: features
.iter()
.map(|(key, _values)| key.clone())
.collect(),
..DependencyDetail::default()
};
let mut deps = DepsSet::new();
deps.insert(
name.to_string(),
Dependency::Detailed(project_with_features),
);
deps
},
..Default::default()
}
}

#[derive(Debug, Deserialize)]
#[allow(dead_code)]
struct VersionData {
checksum: String,
crate_id: u64,
crate_size: Option<u64>,
created_at: String,
downloads: u64,
features: String,
id: u64,
license: String,
links: String,
num: String,
published_by: String,
updated_at: String,
yanked: String,
fn save_manifest(manifest: cargo_toml::Manifest<()>, path: &str) -> anyhow::Result<()> {
std::fs::write(path, toml::to_string(&manifest)?)?;
Ok(())
}

#[derive(Debug, Deserialize)]
#[allow(dead_code)]
struct RawCrateData {
created_at: String,
description: String,
documentation: String,
downloads: u64,
homepage: String,
id: u64,
max_upload_size: String,
name: String,
readme: String,
repository: String,
updated_at: String,
fn run_checks_on_manifest() -> anyhow::Result<()> {
let output = Command::new("../../../target/debug/cargo-semver-checks")
.current_dir("crate_current")
.args([
"semver-checks",
"check-release",
"--baseline-root=../crate_baseline/Cargo.toml",
])
.output()?;
std::io::stdout().write_all(&output.stdout)?;
std::io::stdout().write_all(&output.stderr)?;
println!();
Ok(())
}

#[derive(Debug)]
struct CrateData {
name: String,
downloads: u64,
versions: Vec<semver::Version>,
}
fn main() -> anyhow::Result<()> {
let client = crates_io_api::SyncClient::new(
"crates_io_api: cargo-semver-checks (obi1kenobi82@gmail.com)",
Duration::from_millis(10),
)?;

fn parse_versions(path: &Path) -> BTreeMap<u64, Vec<String>> {
let mut versions = BTreeMap::new();
for result in csv::Reader::from_path(path).unwrap().deserialize() {
let version: VersionData = result.unwrap();
if version.yanked != "t" {
versions.entry(version.crate_id).or_insert_with(Vec::new);
versions
.get_mut(&version.crate_id)
.unwrap()
.push(version.num.clone());
}
}
versions
}
let mut query = CratesQuery::builder()
.page_size(100)
.sort(Sort::Downloads)
.build();
query.set_page(1);

fn parse_crates(path: &Path, versions: &BTreeMap<u64, Vec<String>>) -> Vec<CrateData> {
let mut crates = Vec::new();
for result in csv::Reader::from_path(path).unwrap().deserialize() {
let crate_: RawCrateData = result.unwrap();
let mut crate_versions: Vec<semver::Version> = versions
.get(&crate_.id)
.unwrap_or(&Vec::new())
.iter()
.flat_map(|s| semver::Version::parse(s))
.collect();
crate_versions.sort();
let cutoff_downloads = 100000;
if crate_.downloads >= cutoff_downloads && !crate_versions.is_empty() {
crates.push(CrateData {
name: crate_.name,
downloads: crate_.downloads,
versions: crate_versions,
})
let crate_page = client.crates(query)?;

for crate_info in crate_page.crates.into_iter() {
let versions = client.get_crate(&crate_info.name)?.versions;
for (i, version_i0) in versions.iter().enumerate() {
if i + 1 < versions.len() {
let version_i1 = &versions[i + 1];
if version_i0.yanked && !version_i1.yanked { // it gives more interesting results for testing
println!("{} {} {} {}", crate_info.name, version_i0.num, version_i1.num, version_i0.yanked);
save_manifest(create_rustdoc_manifest_for_crate_version(&crate_info.name, &version_i0.num, &version_i0.features), "crate_current/Cargo.toml")?;
save_manifest(create_rustdoc_manifest_for_crate_version(&crate_info.name, &version_i1.num, &version_i1.features), "crate_baseline/Cargo.toml")?;
run_checks_on_manifest()?;
}
}
}
}
// sort descending by downloads
crates.sort_by(|a, b| b.downloads.partial_cmp(&a.downloads).unwrap());
crates
}

fn main() {
let crate_data_dir = download_crates_data();
let crate_data_dir = &Path::new(&crate_data_dir).join("data");

println!("Parsing crates");
let versions = parse_versions(&crate_data_dir.join("versions.csv"));
let crates = parse_crates(&crate_data_dir.join("crates.csv"), &versions);

println!("Running on each crate");
for crate_data in crates {
let last_version = crate_data.versions.last().unwrap();
println!(
"{} {} {}",
crate_data.name, crate_data.downloads, last_version
);
}
Ok(())
}

0 comments on commit cb43a45

Please sign in to comment.