# -*- coding: utf-8 -*- """Compute the value function for the Frozen Lake using Bellman iteration """ #!pip install gymnasium #!pip install pygame import time import numpy as np import matplotlib.pyplot as plt import gymnasium as gym #Bellman iterations for the value function def value_iteration(env, gamma=0.95, theta=1e-10): # get total states number and actions num_states = env.observation_space.n num_actions = env.action_space.n V = np.zeros(num_states)# Initialize value function while True: delta = 0 for s in range(num_states):#loop over states v = V[s] q_values = np.zeros(num_actions)#initialize action-value table for a in range(num_actions):#loop over actions for prob, next_state, reward, done in env.P[s][a]:#loop over transitions # updates the action-value table for action a in state s q_values[a] += None V[s] = np.max(q_values)#updates V delta = max(delta, np.abs(v - V[s]))#computes max error if delta < theta:#check convergence print('arrived at Bellman error=',delta) value_iteration.last_delta=delta break #computes Q table from V Q = np.zeros((num_states, num_actions)) for s in range(num_states): for a in range(num_actions): for prob, next_state, reward, done in env.P[s][a]: Q[s][a] += None # returns V,Q return V, Q env = gym.make('FrozenLake-v1',is_slippery=True, render_mode='rgb_array') V, Q = value_iteration(env,gamma=0.5)#Bellman iterations call print("Converged value function V:\n",np.round(V.reshape((4,4)),2), "\n Converged Q table:\n", np.round(Q,3)) state = env.reset()[0] step = 0 while True: action = np.argmax(Q[state,:]) next_state, reward, done, _,_ = env.step(action) screen = env.render() plt.imshow(screen) plt.title(f"Step : {step}") plt.axis('off') plt.show() if done: break state = next_state step += 1 time.sleep(0.5)