Skip to content

Commit

Permalink
mdp. prepare short term revenue and consistency MDPs
Browse files Browse the repository at this point in the history
  • Loading branch information
pkel committed Oct 31, 2023
1 parent 42d6d6f commit fc3dbe3
Show file tree
Hide file tree
Showing 6 changed files with 112 additions and 41 deletions.
2 changes: 2 additions & 0 deletions mdp/aft20barzur.py
Original file line number Diff line number Diff line change
Expand Up @@ -219,6 +219,7 @@ def ptmdp(old: mdp.MDP, *args, horizon: int):
probability=term_prob * t.probability,
reward=t.reward,
progress=t.progress,
effect=t.effect,
)
)
new_transitions.append(
Expand All @@ -227,6 +228,7 @@ def ptmdp(old: mdp.MDP, *args, horizon: int):
probability=(1 - term_prob) * t.probability,
reward=t.reward,
progress=t.progress,
effect=t.effect,
)
)
n_transitions += 2
Expand Down
5 changes: 1 addition & 4 deletions mdp/compiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,13 +76,10 @@ def handle_transition(self, state_id, action_id, to):
probability=to.probability,
reward=to.reward,
progress=to.progress,
effect=to.effect,
)
self._mdp.add_transition(state_id, action_id, t)

def record_transition(self, *args, src, act, dst, prb, rew, prg):
t = Transition(destination=dst, probability=prb, reward=rew, progress=prg)
self._mdp.add_transition(src, act, t)

def mdp(self):
# exploration might be incomplete
while self.queue.qsize() > 0:
Expand Down
11 changes: 7 additions & 4 deletions mdp/mdp.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,19 @@
from dataclasses import dataclass, field
from model import Effect
from time import time
from typing import Optional
import math
import numpy
import scipy


@dataclass(frozen=True, order=True)
class Transition:
destination: int
probability: float
reward: float
progress: float
probability: float # how likely is the transition?
destination: int # where do we transition to?
reward: float # MDP reward
progress: float # PTO progress
effect: Optional[Effect] = None # additional information


action = int
Expand Down
23 changes: 18 additions & 5 deletions mdp/model.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,29 @@
from dataclasses import dataclass
from typing import TypeVar
from typing import Optional, TypeVar

State = TypeVar("State")
Action = TypeVar("Action")


# @dataclass(frozen=True, kw_only=True) # requires Python 3.10
@dataclass(frozen=True)
class Effect:
blocks_mined: float # how many blocks have been mined? (0 or 1)
common_atk_reward: float # attacker reward on common chain
common_def_reward: float # defender reward on common chain
common_progress: float # progress made on common chain
defender_rewrite_length: float # number of history entries rewritten
defender_rewrite_progress: float # progress rewritten instead of length
defender_progress: float # progress made on defender's chain


@dataclass(frozen=True)
class Transition:
state: State
probability: float
reward: float
progress: float
probability: float # how likely is the transition?
state: State # where do we transition to?
reward: float # effect.common_atk_reward
progress: float # effect.common_progress
effect: Optional[Effect] = None


class Model:
Expand Down
110 changes: 82 additions & 28 deletions mdp/sm.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from enum import IntEnum
from dataclasses import dataclass, replace
from mdp import MDP
from model import Model, Transition
from model import Effect, Model, Transition
from protocol import Protocol, View
import numpy
import pynauty
Expand Down Expand Up @@ -548,40 +548,67 @@ def __repr__(self):
f"force_consider_own={self.force_consider_own})"
)

def common_history(self):
def history(self, block):
e = self.editor
hist_a = []
hist_b = []
a = e.attacker_prefers()
b = e.defender_prefers()
while a is not None:
hist_a.insert(0, a)
a = self.protocol.predecessor(e, a)
while b is not None:
hist_b.insert(0, b)
b = self.protocol.predecessor(e, b)
hist = []
while block is not None:
hist.insert(0, block)
block = self.protocol.predecessor(e, block)
return hist

def common_history(self, hist_a, hist_b):
assert hist_a[0] == hist_b[0], "old ca"
hist_c = []
while len(hist_a) > 0 and len(hist_b) > 0:
x = hist_a.pop(0)
if x == hist_b.pop(0):
hist_c.append(x)

i = 0
common = []
max_i = min(len(hist_a), len(hist_b))
while i < max_i:
x = hist_a[i]
if x == hist_b[i]:
common.append(x)
else:
break
return hist_c
i += 1
return common

def transition(self, *args, probability):
def transition(self, *args, probability, defender_preferred_before, block_mined):
e = self.editor

# find common history
common_history = self.common_history()
assert len(common_history) > 0
atk_pref = e.attacker_prefers()
def_pref = e.defender_prefers()

# histories
atk_hist = self.history(atk_pref)
def_hist = self.history(def_pref)
def_hist_old = self.history(defender_preferred_before)

# measure rewriting (defender old vs new)
assert len(def_hist) >= len(def_hist_old)
unchanged_history = self.common_history(def_hist_old, def_hist)
assert len(unchanged_history) > 0, "genesis cannot be rewritten"
rewrite_length = len(def_hist_old) - len(unchanged_history)
assert rewrite_length >= 0

rewrite_prg_beg = self.protocol.progress(e, unchanged_history[-1])
rewrite_prg_end = self.protocol.progress(e, defender_preferred_before)
rewrite_prg = rewrite_prg_end - rewrite_prg_beg
assert rewrite_prg >= 0.0
assert rewrite_prg == 0.0 or rewrite_length > 0.0

def_prg_was = rewrite_prg_end
def_prg_now = self.protocol.progress(e, def_pref)
def_prg_delta = def_prg_now - def_prg_was
assert def_prg_delta >= 0.0

# find common history (attacker vs defender)
common_history = self.common_history(atk_hist, def_hist)
assert len(common_history) > 0, "genesis should be agreed upon"

common_ancestor = common_history[-1]

# define which blocks to keep: only reachable blocks
reachable = set()
for entrypoint in [e.attacker_prefers(), e.defender_prefers()]:
for entrypoint in [atk_pref, def_pref]:
reachable.add(entrypoint)
reachable |= e.descendants(entrypoint)
for d in reachable.copy():
Expand Down Expand Up @@ -667,11 +694,23 @@ def reward_view(x):
else:
e.reorder_and_filter(sorted(list(keep)))

effect = Effect(
blocks_mined=1.0 if block_mined else 0.0,
common_atk_reward=rew_atk,
common_def_reward=rew_def,
common_progress=progress,
defender_rewrite_length=rewrite_length,
defender_rewrite_progress=rewrite_prg,
defender_progress=def_prg_delta,
)

return Transition(
state=e.save(),
probability=probability,
progress=progress,
reward=rew_atk,
effect=effect,
# Default to long-term revenue MDP:
reward=effect.common_atk_reward,
progress=effect.common_progress,
)

def start(self) -> list[tuple[State, float]]:
Expand Down Expand Up @@ -732,17 +771,23 @@ def apply(self, a: Action, s: State) -> list[Transition]:
def apply_release(self, i: int, s: State) -> list[Transition]:
e = self.editor
e.load(s)
dpb = e.defender_prefers()
# which block will be released?
b = e.to_release()[i]
# mark b as released
e.set_released(b)
# this transition is deterministic
lst = [self.transition(probability=1)]
lst = [
self.transition(
probability=1, defender_preferred_before=dpb, block_mined=False
)
]
return lst

def apply_consider(self, i: int, s: State) -> list[Transition]:
e = self.editor
e.load(s)
dpb = e.defender_prefers()
# which block will be considered?
b = e.to_consider()[i]
# mark b as considered
Expand All @@ -755,7 +800,11 @@ def apply_consider(self, i: int, s: State) -> list[Transition]:
)
e.set_attacker_prefers(pref)
# this transition is deterministic
lst = [self.transition(probability=1)]
lst = [
self.transition(
probability=1, defender_preferred_before=dpb, block_mined=False
)
]
return lst

def apply_continue(self, s: State) -> list[Transition]:
Expand Down Expand Up @@ -784,6 +833,7 @@ def _apply_continue(
):
e = self.editor
e.load(s)
dpb = e.defender_prefers()
prob = 1.0
# blocks just released by attacker
from_attacker = e.just_released()
Expand Down Expand Up @@ -830,7 +880,11 @@ def _apply_continue(
e.defender_prefers(),
)
e.append(parents, Miner.Defender)
return self.transition(probability=prob)
return self.transition(
probability=prob,
defender_preferred_before=dpb,
block_mined=not communication_only,
)


mappable_params = dict(alpha=0.125, gamma=0.25)
Expand Down
2 changes: 2 additions & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@ scikit-optimize
seaborn

networkx

--no-binary=pynauty
pynauty

-e .
Expand Down

0 comments on commit fc3dbe3

Please sign in to comment.