Skip to content

Commit

Permalink
Revise shutdown and initial value estimate
Browse files Browse the repository at this point in the history
  • Loading branch information
pkel committed May 12, 2024
1 parent 1f0fbb4 commit 837a97e
Show file tree
Hide file tree
Showing 4 changed files with 77 additions and 31 deletions.
46 changes: 27 additions & 19 deletions mdp/fc16sapirshtein.py
Original file line number Diff line number Diff line change
Expand Up @@ -177,25 +177,33 @@ def honest(self, s: BState) -> list[Action]:
return ADOPT

def shutdown(self, s: BState) -> list[Transition]:
# Rewards and progress are calculated on common chain. Terminating with
# a no-op is already fair.
# return [Transition(state=s, probability=1, reward=0, progress=0)]
# NOTE In principle, we could do and award a full release here, but this
# would change the model. Maybe evaluate this separately.
snew = BState(a=0, h=0, fork=IRRELEVANT)
if s.h > s.a:
return [Transition(state=snew, probability=1, reward=0, progress=s.h)]
if s.a > s.h:
return [Transition(state=snew, probability=1, reward=s.a, progress=s.a)]
if s.a == s.h:
return [
Transition(
state=snew, probability=self.gamma, reward=s.a, progress=s.a
),
Transition(
state=snew, probability=1 - self.gamma, reward=0, progress=s.h
),
]
# Abort attack in favor of attacker; go back to start.
ts = []
for snew, p in self.start():
if s.h > s.a:
ts.append(Transition(state=snew, probability=p, reward=0, progress=s.h))
elif s.a > s.h:
ts.append(
Transition(state=snew, probability=p, reward=s.a, progress=s.a)
)
elif s.a == s.h:
ts.append(
Transition(
state=snew, probability=p * self.gamma, reward=s.a, progress=s.a
)
)
ts.append(
Transition(
state=snew,
probability=p * (1 - self.gamma),
reward=0,
progress=s.h,
)
)
else:
raise Exception("logic error")
assert mdp.sum_to_one([t.probability for t in ts])
return ts


mappable_params = dict(alpha=0.125, gamma=0.25)
Expand Down
8 changes: 4 additions & 4 deletions mdp/measure-rtdp.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,15 +28,15 @@
dict(alpha=1 / 4, gamma=1 / 4, attacker="weak"),
dict(alpha=1 / 3, gamma=1 / 3, attacker="intermediate"),
dict(
alpha=0.42, gamma=0.82, attacker="strong"
alpha=0.42, gamma=0.84, attacker="strong"
), # TODO double check whether we can do 1/2
]

rows = [
dict(row=1, protocol="bitcoin", model="fc16", trunc=40, algo="aft20", ref=1),
# dict(row=2, protocol="bitcoin", model="aft20", trunc=40, algo="aft20", ref=1),
dict(row=2, protocol="bitcoin", model="aft20", trunc=40, algo="aft20", ref=1),
dict(row=3, protocol="bitcoin", model="fc16", trunc=40, algo="rtdp", ref=1),
# dict(row=4, protocol="bitcoin", model="aft20", trunc=40, algo="rtdp", ref=1),
dict(row=4, protocol="bitcoin", model="aft20", trunc=40, algo="rtdp", ref=1),
# dict(row=5, protocol="bitcoin", model="fc16", trunc=0, algo="rtdp", ref=1),
# dict(row=6, protocol="bitcoin", model="aft20", trunc=0, algo="rtdp", ref=1),
# dict(row=7, protocol="bitcoin", model="generic", trunc=10, algo="aft20", ref=1),
Expand Down Expand Up @@ -168,7 +168,7 @@ def implicit_mdp(*args, model, protocol, trunc, alpha, gamma, **kwargs):
argp.add_argument("--rtdp_eps", type=float, default=0.2, metavar="FLOAT")
argp.add_argument("--rtdp_es", type=float, default=0.9, metavar="FLOAT")
argp.add_argument("--rtdp_steps", type=int, default=50_000, metavar="INT")
argp.add_argument("--vi_delta", type=float, default=0.001, metavar="FLOAT")
argp.add_argument("--vi_delta", type=float, default=0.01, metavar="FLOAT")
args = argp.parse_args()

# Single measurement
Expand Down
28 changes: 26 additions & 2 deletions mdp/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,9 @@ def actions(self, state):
else:
return self.unwrapped.actions(state)

def continue_probability_of_progress(self, progress):
return (1.0 - (1.0 / self.horizon)) ** progress

def apply(self, action, state):
assert state is not self.terminal

Expand All @@ -101,7 +104,7 @@ def apply(self, action, state):
if t.progress == 0.0:
transitions.append(t)
else:
continue_p = (1.0 - (1.0 / self.horizon)) ** t.progress
continue_p = self.continue_probability_of_progress(t.progress)
assert 0 < continue_p < 1
# one transition for continuing
continue_t = Transition(
Expand Down Expand Up @@ -136,4 +139,25 @@ def shutdown(self, state):
if state is self.terminal:
return []
else:
return self.unwrapped.shutdown(state)
ts = []
for t in self.unwrapped.shutdown(state):
continue_p = self.continue_probability_of_progress(t.progress)
ts.append(
Transition(
probability=t.probability * continue_p,
state=t.state,
reward=t.reward,
progress=t.progress,
effect=t.effect,
)
)
ts.append(
Transition(
probability=t.probability * (1 - continue_p),
state=self.terminal,
reward=t.reward,
progress=t.progress,
effect=t.effect,
)
)
return ts
26 changes: 20 additions & 6 deletions mdp/rtdp.py
Original file line number Diff line number Diff line change
Expand Up @@ -164,8 +164,8 @@ def step(self):
if n_actions < 1:
# no action available, terminal state
self.reset()
assert state.value == 0
assert state.progress == 0
# assert state.value == 0
# assert state.progress == 0
return

# value iteration step:
Expand Down Expand Up @@ -272,7 +272,7 @@ def state_and_hash_of_full_state(self, full_state):
else:
state = State()
self.states[state_hash] = state
state.value = self.initial_value_estimate(full_state)
state.value, state.progress = self.initial_value_estimate(full_state)

return state, state_hash

Expand All @@ -283,11 +283,25 @@ def initial_value_estimate(self, full_state):
# - guide exploration by evaluating the honest policy
# - do a fair shutdown to get a partial estimate of the states potential

value = 0
v = 0.0
p = 0.0
for t in self.model.shutdown(full_state):
value += t.probability * t.reward
immediate_v = t.reward
immediate_p = t.progress

state_hash = collision_resistant_hash(t.state)
if state_hash in self.states:
state = self.states[state_hash]
future_v = state.value
future_p = state.progress
else:
future_v = 0
future_p = 0

v += t.probability * (immediate_v + future_v)
p += t.probability * (immediate_p + future_p)

return value
return v, p

def mdp(self):
# The agent operates on a partially explored MDP,
Expand Down

0 comments on commit 837a97e

Please sign in to comment.