# -*- coding: utf-8 -*- """ Created on Thu Feb 8 18:28:50 2024 @author: turinici """ import numpy as np def initialize_bandit(k): # Initialize arrays to store the number of times each arm is chosen and the total reward obtained from each arm num_chosen = np.zeros(k) total_reward = np.zeros(k) # Initialize array to store the mean reward for each arm (randomly chosen from standard normal distribution) arm_means = np.random.normal(loc=0, scale=1, size=k) return num_chosen, total_reward, arm_means def play_bandit(k): # Initialize the bandit num_chosen, total_reward, arm_means = initialize_bandit(k) total_choices = 0 # Play the bandit game while True: # Display the current state of the bandit print("\nBandit state:") print("Arm\t|\tTimes\t|\tAverage") print("\t|\tChosen\t|\tReward") print("---------------------------") for arm in range(k): print(f"{arm+1}\t|\t{int(num_chosen[arm])}\t\t|\t{total_reward[arm]/(num_chosen[arm]+1e-6):.2f}") print("---------------------------") # Prompt user to choose an arm or quit choice = input(f"\nChoose an arm (1-{k}) or press 'q' to quit: ") # Check if the user wants to quit if choice == 'q': print(f"Total number of choices: {total_choices}") print("True mean values for each arm:") for arm in range(k): print(f"Arm {arm+1}: {arm_means[arm]:.2f}") best_arm = np.argmax(arm_means) + 1 print(f"The best arm is: {best_arm}") break # Convert input to integer try: choice = int(choice) except ValueError: print("Invalid choice. Please enter an integer or 'q' to quit.") continue # Check if the choice is valid if choice < 1 or choice > k: print(f"Invalid choice. Please choose an arm between 1 and {k}.") continue # Increment the number of times the chosen arm is selected arm_index = choice - 1 num_chosen[arm_index] += 1 total_choices += 1 # Simulate reward from chosen arm (normally distributed with mean arm_means[arm_index] and std deviation 3) reward = np.random.normal(loc=arm_means[arm_index], scale=3) print(f"Chosen arm={choice}, reward={reward:.3f}.") # Update total reward obtained from the chosen arm total_reward[arm_index] += reward if __name__ == "__main__": # Get the number of arms from the user k = int(input("Enter the number of arms (k): ")) play_bandit(k)