Skip to content

Commit

Permalink
Merge remote-tracking branch 'origin/main' into qmm-fix3
Browse files Browse the repository at this point in the history
  • Loading branch information
LaurentMazare committed Sep 30, 2024
2 parents 08f18af + dfe9a00 commit 42b0993
Show file tree
Hide file tree
Showing 29 changed files with 2,933 additions and 403 deletions.
18 changes: 9 additions & 9 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ exclude = [
resolver = "2"

[workspace.package]
version = "0.7.1"
version = "0.7.2"
edition = "2021"
description = "Minimalist ML framework."
repository = "https://github.com/huggingface/candle"
Expand All @@ -33,14 +33,14 @@ ab_glyph = "0.2.23"
accelerate-src = { version = "0.3.2" }
anyhow = { version = "1", features = ["backtrace"] }
byteorder = "1.4.3"
candle = { path = "./candle-core", package = "candle-core", version = "0.7.1" }
candle-datasets = { path = "./candle-datasets", version = "0.7.1" }
candle-flash-attn = { path = "./candle-flash-attn", version = "0.7.1" }
candle-kernels = { path = "./candle-kernels", version = "0.7.1" }
candle-metal-kernels = { path = "./candle-metal-kernels", version = "0.7.1" }
candle-nn = { path = "./candle-nn", version = "0.7.1" }
candle-onnx = { path = "./candle-onnx", version = "0.7.1" }
candle-transformers = { path = "./candle-transformers", version = "0.7.1" }
candle = { path = "./candle-core", package = "candle-core", version = "0.7.2" }
candle-datasets = { path = "./candle-datasets", version = "0.7.2" }
candle-flash-attn = { path = "./candle-flash-attn", version = "0.7.2" }
candle-kernels = { path = "./candle-kernels", version = "0.7.2" }
candle-metal-kernels = { path = "./candle-metal-kernels", version = "0.7.2" }
candle-nn = { path = "./candle-nn", version = "0.7.2" }
candle-onnx = { path = "./candle-onnx", version = "0.7.2" }
candle-transformers = { path = "./candle-transformers", version = "0.7.2" }
clap = { version = "4.2.4", features = ["derive"] }
criterion = { version = "0.5.1", default-features=false }
cudarc = { version = "0.12.1", features = ["std", "cublas", "cublaslt", "curand", "driver", "nvrtc", "f16", "cuda-version-from-build-system", "dynamic-linking"], default-features=false }
Expand Down
44 changes: 3 additions & 41 deletions candle-examples/examples/clip/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@ use candle_nn::{ops::softmax, VarBuilder};
use candle_transformers::models::clip;

use tokenizers::Tokenizer;
use tracing::info;

#[derive(Parser)]
struct Args {
Expand Down Expand Up @@ -40,15 +39,12 @@ fn load_image<T: AsRef<std::path::Path>>(path: T, image_size: usize) -> anyhow::
height as u32,
image::imageops::FilterType::Triangle,
);

let img = img.to_rgb8();

let img = img.into_raw();
let img = Tensor::from_vec(img, (height, width, 3), &Device::Cpu)?
.permute((2, 0, 1))?
.to_dtype(DType::F32)?
.affine(2. / 255., -1.)?;
// .unsqueeze(0)?;
Ok(img)
}

Expand All @@ -57,24 +53,16 @@ fn load_images<T: AsRef<std::path::Path>>(
image_size: usize,
) -> anyhow::Result<Tensor> {
let mut images = vec![];

for path in paths {
let tensor = load_image(path, image_size)?;
images.push(tensor);
}

let images = Tensor::stack(&images, 0)?;

Ok(images)
}

pub fn main() -> anyhow::Result<()> {
// std::env::set_var("RUST_BACKTRACE", "full");

let args = Args::parse();

tracing_subscriber::fmt::init();

let model_file = match args.model {
None => {
let api = hf_hub::api::sync::Api::new()?;
Expand All @@ -89,57 +77,39 @@ pub fn main() -> anyhow::Result<()> {
}
Some(model) => model.into(),
};

let tokenizer = get_tokenizer(args.tokenizer)?;

let config = clip::ClipConfig::vit_base_patch32();

let device = candle_examples::device(args.cpu)?;

let vec_imgs = match args.images {
Some(imgs) => imgs,
None => vec![
"candle-examples/examples/stable-diffusion/assets/stable-diffusion-xl.jpg".to_string(),
"candle-examples/examples/yolo-v8/assets/bike.jpg".to_string(),
],
};

// let image = load_image(args.image, config.image_size)?.to_device(&device)?;
let images = load_images(&vec_imgs, config.image_size)?.to_device(&device)?;

let vb =
unsafe { VarBuilder::from_mmaped_safetensors(&[model_file.clone()], DType::F32, &device)? };

let model = clip::ClipModel::new(vb, &config)?;

let (input_ids, vec_seq) = tokenize_sequences(args.sequences, &tokenizer, &device)?;

let (_logits_per_text, logits_per_image) = model.forward(&images, &input_ids)?;

let softmax_image = softmax(&logits_per_image, 1)?;

let softmax_image_vec = softmax_image.flatten_all()?.to_vec1::<f32>()?;

info!("softmax_image_vec: {:?}", softmax_image_vec);

println!("softmax_image_vec: {:?}", softmax_image_vec);
let probability_vec = softmax_image_vec
.iter()
.map(|v| v * 100.0)
.collect::<Vec<f32>>();

let probability_per_image = probability_vec.len() / vec_imgs.len();

for (i, img) in vec_imgs.iter().enumerate() {
let start = i * probability_per_image;
let end = start + probability_per_image;
let prob = &probability_vec[start..end];
info!("\n\nResults for image: {}\n", img);

println!("\n\nResults for image: {}\n", img);
for (i, p) in prob.iter().enumerate() {
info!("Probability: {:.4}% Text: {} ", p, vec_seq[i]);
println!("Probability: {:.4}% Text: {} ", p, vec_seq[i]);
}
}

Ok(())
}

Expand All @@ -156,7 +126,6 @@ pub fn get_tokenizer(tokenizer: Option<String>) -> anyhow::Result<Tokenizer> {
}
Some(file) => file.into(),
};

Tokenizer::from_file(tokenizer).map_err(E::msg)
}

Expand All @@ -169,7 +138,6 @@ pub fn tokenize_sequences(
.get_vocab(true)
.get("<|endoftext|>")
.ok_or(E::msg("No pad token"))?;

let vec_seq = match sequences {
Some(seq) => seq,
None => vec![
Expand All @@ -178,25 +146,19 @@ pub fn tokenize_sequences(
"a robot holding a candle".to_string(),
],
};

let mut tokens = vec![];

for seq in vec_seq.clone() {
let encoding = tokenizer.encode(seq, true).map_err(E::msg)?;
tokens.push(encoding.get_ids().to_vec());
}

let max_len = tokens.iter().map(|v| v.len()).max().unwrap_or(0);

// Pad the sequences to have the same length
for token_vec in tokens.iter_mut() {
let len_diff = max_len - token_vec.len();
if len_diff > 0 {
token_vec.extend(vec![pad_id; len_diff]);
}
}

let input_ids = Tensor::new(tokens, device)?;

Ok((input_ids, vec_seq))
}
22 changes: 0 additions & 22 deletions candle-examples/examples/mobileclip/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,6 @@ fn load_images<T: AsRef<std::path::Path>>(
image_size: usize,
) -> anyhow::Result<Tensor> {
let mut images = vec![];

for path in paths {
let tensor = candle_examples::imagenet::load_image_with_std_mean(
path,
Expand All @@ -70,67 +69,49 @@ fn load_images<T: AsRef<std::path::Path>>(
)?;
images.push(tensor);
}

let images = Tensor::stack(&images, 0)?;

Ok(images)
}

pub fn main() -> anyhow::Result<()> {
let args = Args::parse();

let model_name = args.which.model_name();

let api = hf_hub::api::sync::Api::new()?;
let api = api.model(model_name);

let model_file = if args.use_pth {
api.get("open_clip_pytorch_model.bin")?
} else {
api.get("open_clip_model.safetensors")?
};

let tokenizer = api.get("tokenizer.json")?;

let tokenizer = Tokenizer::from_file(tokenizer).map_err(E::msg)?;

let config = &args.which.config();

let device = candle_examples::device(args.cpu)?;

let vec_imgs = match args.images {
Some(imgs) => imgs,
None => vec![
"candle-examples/examples/stable-diffusion/assets/stable-diffusion-xl.jpg".to_string(),
"candle-examples/examples/yolo-v8/assets/bike.jpg".to_string(),
],
};

let images = load_images(&vec_imgs, config.image_size)?.to_device(&device)?;

let vb = if args.use_pth {
VarBuilder::from_pth(&model_file, DType::F32, &device)?
} else {
unsafe { VarBuilder::from_mmaped_safetensors(&[model_file.clone()], DType::F32, &device)? }
};

let model = mobileclip::MobileClipModel::new(vb, config)?;

let (input_ids, vec_seq) = tokenize_sequences(args.sequences, &tokenizer, &device)?;

let (_logits_per_text, logits_per_image) = model.forward(&images, &input_ids)?;

let softmax_image = softmax(&logits_per_image, 1)?;

let softmax_image_vec = softmax_image.flatten_all()?.to_vec1::<f32>()?;

println!("softmax_image_vec: {:?}", softmax_image_vec);

let probability_vec = softmax_image_vec
.iter()
.map(|v| v * 100.0)
.collect::<Vec<f32>>();

let probability_per_image = probability_vec.len() / vec_imgs.len();

for (i, img) in vec_imgs.iter().enumerate() {
Expand Down Expand Up @@ -171,7 +152,6 @@ pub fn tokenize_sequences(
};

let mut tokens = vec![];

for seq in vec_seq.clone() {
let encoding = tokenizer.encode(seq, true).map_err(E::msg)?;
tokens.push(encoding.get_ids().to_vec());
Expand All @@ -185,8 +165,6 @@ pub fn tokenize_sequences(
token_vec.extend(vec![pad_id; len_diff]);
}
}

let input_ids = Tensor::new(tokens, device)?;

Ok((input_ids, vec_seq))
}
28 changes: 28 additions & 0 deletions candle-examples/examples/paligemma/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
# PaliGemma

[HuggingFace Model Card](https://huggingface.co/google/paligemma-3b-pt-224) -
[Model Page](https://ai.google.dev/gemma/docs/paligemma)

```bash
cargo run --features cuda --release --example paligemma -- \
--prompt "caption fr" --image candle-examples/examples/yolo-v8/assets/bike.jpg
```

```
loaded image with shape Tensor[dims 1, 3, 224, 224; bf16, cuda:0]
loaded the model in 1.267744448s
caption fr. Un groupe de cyclistes qui sont dans la rue.
13 tokens generated (56.52 token/s)
```

```bash
cargo run --features cuda --release --example paligemma -- \
--prompt "caption fr" --image candle-examples/examples/flux/assets/flux-robot.jpg
```

```
loaded image with shape Tensor[dims 1, 3, 224, 224; bf16, cuda:0]
loaded the model in 1.271492621s
caption fr une image d' un robot sur la plage avec le mot rouillé
15 tokens generated (62.78 token/s)
```
Loading

0 comments on commit 42b0993

Please sign in to comment.