Skip to content

Commit

Permalink
A macro to implement APIs more easily
Browse files Browse the repository at this point in the history
  • Loading branch information
jaywonchung committed May 27, 2024
1 parent 399b9d5 commit 5d4dd1f
Show file tree
Hide file tree
Showing 8 changed files with 233 additions and 216 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/zeusd_fmt_lint_test.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -32,5 +32,5 @@ jobs:
run: cargo clippy --all -- -D warnings
working-directory: zeusd
- name: Run tests
run: cargo test --features testing
run: cargo test
working-directory: zeusd
1 change: 1 addition & 0 deletions zeusd/Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

5 changes: 2 additions & 3 deletions zeusd/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -5,16 +5,14 @@ authors = ["Jae-Won Chung <jwnchung@umich.edu>"]
edition = "2021"

[lib]
name = "zeusd"
path = "src/lib.rs"
doctest = false

[[bin]]
path = "src/main.rs"
name = "zeusd"

[features]
testing = []

[dependencies]
actix-web = "4"
nvml-wrapper = "0.10"
Expand All @@ -28,6 +26,7 @@ tracing-subscriber = { version = "0.3", features = ["registry", "env-filter"] }
tracing-log = "0.2.0"
tracing-actix-web = "0.7.10"
nix = { version = "0.29", default-features = false, features = ["user"] }
paste = "1"

[dev-dependencies]
once_cell = "1.7.2"
Expand Down
41 changes: 40 additions & 1 deletion zeusd/src/devices/gpu.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,11 @@ use tracing::Span;

use crate::error::ZeusdError;

pub trait GpuManager: Send + 'static {
/// A trait for structs that manage one GPU.
///
/// This trait can be used to abstract over different GPU management libraries.
/// Currently, this was done to facilitate testing.
pub trait GpuManager {
fn device_count() -> Result<u32, ZeusdError>
where
Self: Sized;
Expand All @@ -18,11 +22,13 @@ pub trait GpuManager: Send + 'static {
min_clock_mhz: u32,
max_clock_mhz: u32,
) -> Result<(), ZeusdError>;
fn reset_gpu_locked_clocks(&mut self) -> Result<(), ZeusdError>;
fn set_mem_locked_clocks(
&mut self,
min_clock_mhz: u32,
max_clock_mhz: u32,
) -> Result<(), ZeusdError>;
fn reset_mem_locked_clocks(&mut self) -> Result<(), ZeusdError>;
}

pub struct NvmlGpu<'n> {
Expand All @@ -34,6 +40,7 @@ impl NvmlGpu<'static> {
pub fn init(index: u32) -> Result<Self, ZeusdError> {
// `Device` needs to hold a reference to `Nvml`, meaning that `Nvml` must outlive `Device`.
// We can achieve this by leaking a `Box` containing `Nvml` and holding a reference to it.
// `Nvml` will actually live until the server terminates inside the GPU management task.
let _nvml = Box::leak(Box::new(Nvml::init()?));
let device = _nvml.device_by_index(index)?;
Ok(Self { _nvml, device })
Expand Down Expand Up @@ -69,6 +76,11 @@ impl GpuManager for NvmlGpu<'static> {
Ok(self.device.set_gpu_locked_clocks(setting)?)
}

#[inline]
fn reset_gpu_locked_clocks(&mut self) -> Result<(), ZeusdError> {
Ok(self.device.reset_gpu_locked_clocks()?)
}

#[inline]
fn set_mem_locked_clocks(
&mut self,
Expand All @@ -79,6 +91,11 @@ impl GpuManager for NvmlGpu<'static> {
.device
.set_mem_locked_clocks(min_clock_mhz, max_clock_mhz)?)
}

#[inline]
fn reset_mem_locked_clocks(&mut self) -> Result<(), ZeusdError> {
Ok(self.device.reset_mem_locked_clocks()?)
}
}

/// A request to execute a GPU command.
Expand Down Expand Up @@ -183,11 +200,15 @@ pub enum GpuCommand {
min_clock_mhz: u32,
max_clock_mhz: u32,
},
/// Reset the GPU's locked clocks.
ResetGpuLockedClocks,
/// Set the GPU's memory locked clock range in MHz.
SetMemLockedClocks {
min_clock_mhz: u32,
max_clock_mhz: u32,
},
/// Reset the GPU's memory locked clocks.
ResetMemLockedClocks,
}

impl GpuCommand {
Expand Down Expand Up @@ -242,6 +263,15 @@ impl GpuCommand {
}
result
}
Self::ResetGpuLockedClocks => {
let result = device.reset_gpu_locked_clocks();
if result.is_ok() {
tracing::info!("GPU locked clocks reset");
} else {
tracing::warn!("Cannot reset GPU locked clocks");
}
result
}
Self::SetMemLockedClocks {
min_clock_mhz,
max_clock_mhz,
Expand All @@ -262,6 +292,15 @@ impl GpuCommand {
}
result
}
Self::ResetMemLockedClocks => {
let result = device.reset_mem_locked_clocks();
if result.is_ok() {
tracing::info!("Memory locked clocks reset");
} else {
tracing::warn!("Cannot reset memory locked clocks");
}
result
}
}
}
}
Loading

0 comments on commit 5d4dd1f

Please sign in to comment.