Skip to content

Commit

Permalink
Guide exploration along honest policy
Browse files Browse the repository at this point in the history
  • Loading branch information
pkel committed Apr 21, 2024
1 parent ecce520 commit 7edb89b
Show file tree
Hide file tree
Showing 5 changed files with 45 additions and 7 deletions.
6 changes: 6 additions & 0 deletions mdp/aft20barzur.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,12 @@ def actions(self, s: BState) -> list[Action]:
actions.append(ADOPT)
return actions

def honest(self, s: BState) -> list[Action]:
if s.a > s.h:
return OVERRIDE
else:
return ADOPT

def apply_wait(self, s: BState) -> list[Transition]:
t = []
if s.fork != ACTIVE:
Expand Down
16 changes: 11 additions & 5 deletions mdp/mcvi_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@


def mcvi(model, *args, horizon=100, steps=10000, eps=0.1, report_steps=None, **kwargs):
agent = MCVI(model, eps=eps, horizon=horizon)
agent = MCVI(model, eps=eps, horizon=horizon, **kwargs)

j = 0
for i in range(steps):
Expand Down Expand Up @@ -54,8 +54,14 @@ def test_mcvi(*args, **kwargs):


if __name__ == "__main__":
model = SelfishMining(
Bitcoin(), alpha=0.30, gamma=1, maximum_size=20, merge_isomorphic=False
problem = dict(alpha=0.30, gamma=0.8)

model_a = SelfishMining(
Bitcoin(), **problem, maximum_size=20, merge_isomorphic=False
)
# mcvi(model_a, steps=1000000, report_steps=50, horizon=30, eps = 0.1, eps_honest = 0.1)

model_b = aft20barzur.BitcoinSM(**problem, maximum_fork_length=10000)
mcvi(
model_b, steps=1000000, report_steps=10000, horizon=30, eps=0.1, eps_honest=0.1
)
# model = aft20barzur.BitcoinSM(alpha=0.30, gamma=1, maximum_fork_length=10000)
mcvi(model, steps=1000000, report_steps=50, horizon=30)
6 changes: 6 additions & 0 deletions mdp/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,3 +44,9 @@ def apply(self, a: Action, s: State) -> list[Transition]:
Define state transitions. Action a is applied to state s.
"""
raise NotImplementedError

def honest(self, s: State) -> Action:
"""
What would an honest participant do?
"""
raise NotImplementedError
13 changes: 11 additions & 2 deletions mdp/monte_carlo_value_iteration.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,13 +13,17 @@ def sample(lst, p: lambda x: x[0]):


class MCVI:
def __init__(self, model: Model, *args, horizon: int, eps: float):
def __init__(
self, model: Model, *args, horizon: int, eps: float, eps_honest: float = 0
):
assert 0 < eps < 1
assert 0 <= eps_honest < 1
assert horizon > 0

self.model = model
self.horizon = horizon
self.eps = eps
self.eps_honest = eps_honest

self.state = None # current model state
self.state_id = None # current integer state
Expand Down Expand Up @@ -114,9 +118,14 @@ def step(self):

# epsilon greedy policy
i = max_i
if random.random() < self.eps:
x = random.random()
if x < self.eps:
# explore randomly
i = random.randrange(n)
elif x < self.eps + self.eps_honest:
# explore along honest policy
a = self.model.honest(state)
i = actions.index(a)

# apply action & transition
to = sample(action_transitions[i], lambda x: x.probability)
Expand Down
11 changes: 11 additions & 0 deletions mdp/sm.py
Original file line number Diff line number Diff line change
Expand Up @@ -758,6 +758,17 @@ def actions(self, s: State) -> list[Action]:

return actions

def honest(self, s: State) -> Action:
e = self.editor
e.load(s)

# honest policy: release then consider then continue
if len(e.to_release()) > 0:
return Release(0)
if len(e.to_consider()) > 0:
return Consider(0)
return Continue()

def apply(self, a: Action, s: State) -> list[Transition]:
if isinstance(a, Release):
return self.apply_release(a.i, s)
Expand Down

0 comments on commit 7edb89b

Please sign in to comment.