Skip to content

Commit

Permalink
Pass lint
Browse files Browse the repository at this point in the history
  • Loading branch information
jaywonchung committed Jul 10, 2023
1 parent b11b2cb commit 72aecf5
Show file tree
Hide file tree
Showing 3 changed files with 22 additions and 11 deletions.
6 changes: 3 additions & 3 deletions examples/imagenet/train_new.py
Original file line number Diff line number Diff line change
Expand Up @@ -531,7 +531,9 @@ def main_worker(gpu, ngpus_per_node, args):

if args.local_rank == 0 or args.gpu is not None:
monitor = ZeusMonitor(
gpu_indices=list(range(args.local_world_size)) if args.gpu is None else [args.gpu],
gpu_indices=list(range(args.local_world_size))
if args.gpu is None
else [args.gpu],
)
plo = GlobalPowerLimitOptimizer(
monitor=monitor,
Expand All @@ -545,8 +547,6 @@ def main_worker(gpu, ngpus_per_node, args):
monitor = None
plo = None



for epoch in range(args.start_epoch, args.epochs):
if args.local_rank == 0 or args.gpu is not None:
plo.on_epoch_begin()
Expand Down
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ ignore = [
"B019", # Usage of functools.lru_cache
"PLR0913", # Too many function arguments
"B905", # zip strict argument
"PLR0915", # Too many statements
]
line-length = 120

Expand Down
26 changes: 18 additions & 8 deletions zeus/optimizer/power_limit.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,7 @@ class Done:
@dataclass
class Measurement:
"""POD for GPU energy and time measurements for one power limit."""

power_limit: int # In Watts.
energy: float
time: float
Expand Down Expand Up @@ -187,9 +188,15 @@ def __init__(
else:
measurements = json.load(self.profile_path.open())["measurements"]
self.measurements = [Measurement(**m) for m in measurements]
self.logger.info("Loaded previous profiling results from '%s'.", str(self.profile_path))
self.logger.info(
"Loaded previous profiling results from '%s'.", str(self.profile_path)
)
optimal_power_limit = self._compute_optimal_power_limit()
self.logger.info("Optimal power limit is %d W.", optimal_power_limit // 1000)
self.logger.info(
"Optimal power limit is %d W for eta_knob %f.",
optimal_power_limit // 1000,
self.eta_knob,
)
self.state = Done(optimal_power_limit=optimal_power_limit)
self._set_power_limit(self.state.optimal_power_limit)

Expand Down Expand Up @@ -257,11 +264,13 @@ def on_step_begin(self) -> None:
"Finished profiling for power limit %d W.",
self.state.current_power_limit // 1000,
)
self.measurements.append(Measurement(
power_limit=self.state.current_power_limit // 1000,
energy=measurement.total_energy,
time=measurement.time,
))
self.measurements.append(
Measurement(
power_limit=self.state.current_power_limit // 1000,
energy=measurement.total_energy,
time=measurement.time,
)
)
# If we're done profiling all power limits, compute the optimal
# power limit and transition to the Done state. Otherwise, move
# on to the Warmup phase for the next power limit.
Expand Down Expand Up @@ -317,7 +326,8 @@ def _compute_optimal_power_limit(self) -> int:
"""
max_power = max(self.power_limits) // 1000 * len(self.monitor.gpu_indices)
cost_map = {
measurement.power_limit * 1000: zeus_cost(
measurement.power_limit
* 1000: zeus_cost(
energy=measurement.energy,
time=measurement.time,
eta_knob=self.eta_knob,
Expand Down

0 comments on commit 72aecf5

Please sign in to comment.