-
Notifications
You must be signed in to change notification settings - Fork 0
/
bpInference.py
90 lines (74 loc) · 2.18 KB
/
bpInference.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
import gym
import math
import random
import time
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
from collections import namedtuple, deque
from itertools import count
from PIL import Image
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import torchvision.transforms as T
from bpNet import bpPNet
from utils import DEVICE
# setting environment
env = gym.make('BipedalWalker-v3')
is_ipython = 'inline' in matplotlib.get_backend()
if is_ipython:
from IPython import display
plt.ion()
num_observations = env.observation_space.shape[0]
num_actions = env.action_space.shape[0]
net = bpPNet(num_observations, num_actions)
net.load(r"tasks\22.01.24-17.59_continue_training\pNet")
net.to(DEVICE)
net.eval()
want_to_exit = False
want_to_restart = False
paused = False
def key_press(key,mod):
global paused, want_to_exit, want_to_restart
print(key)
if key == 32:
paused = not paused
if paused: print("space key pressed, game is paused")
else: print("space key pressed, game is unpaused")
if key == 65307:
want_to_exit = True
print("escape key pressed, exiting")
if key == 114:
want_to_restart = True
print("R pressed, restarting")
def main():
global paused, want_to_exit, want_to_restart
env.render()
env.unwrapped.viewer.window.on_key_press = key_press
while True:
want_to_restart = False
observation = env.reset()
env.render()
for i in range(7):
if want_to_exit: break
if want_to_restart: break
time.sleep(0.1)
if want_to_exit: break
if want_to_restart: continue
done = False
while not done:
env.render()
if not paused:
action = net.select_action(torch.as_tensor(observation, device=DEVICE), sigma=0.1).cpu().detach()
observation, reward, done, _ = env.step(action)
time.sleep(0.01)
if want_to_exit:
print("break")
break
if want_to_restart:
print("restart")
break
env.close()
main()