# -*- coding: utf-8 -*-
"""
Created on Thu Feb  8 18:28:50 2024

@author: turinici
"""

import numpy as np

def initialize_bandit(k):
    # Initialize arrays to store the number of times each arm is chosen and the total reward obtained from each arm
    num_chosen = np.zeros(k)
    total_reward = np.zeros(k)
    
    # Initialize array to store the mean reward for each arm (randomly chosen from standard normal distribution)
    arm_means = np.random.normal(loc=0, scale=1, size=k)
    
    return num_chosen, total_reward, arm_means

def play_bandit(k):
    # Initialize the bandit
    num_chosen, total_reward, arm_means = initialize_bandit(k)
    
    total_choices = 0
    
    # Play the bandit game
    while True:
        # Display the current state of the bandit
        print("\nBandit state:")
        print("Arm\t|\tTimes\t|\tAverage")
        print("\t|\tChosen\t|\tReward")
        print("---------------------------")
        for arm in range(k):
            print(f"{arm+1}\t|\t{int(num_chosen[arm])}\t\t|\t{total_reward[arm]/(num_chosen[arm]+1e-6):.2f}")
        print("---------------------------")
        
        # Prompt user to choose an arm or quit
        choice = input(f"\nChoose an arm (1-{k}) or press 'q' to quit: ")
        
        # Check if the user wants to quit
        if choice == 'q':
            print(f"Total number of choices: {total_choices}")
            print("True mean values for each arm:")
            for arm in range(k):
                print(f"Arm {arm+1}: {arm_means[arm]:.2f}")
            best_arm = np.argmax(arm_means) + 1
            print(f"The best arm is: {best_arm}")
            break
        
        # Convert input to integer
        try:
            choice = int(choice)
        except ValueError:
            print("Invalid choice. Please enter an integer or 'q' to quit.")
            continue
        
        # Check if the choice is valid
        if choice < 1 or choice > k:
            print(f"Invalid choice. Please choose an arm between 1 and {k}.")
            continue
        
        # Increment the number of times the chosen arm is selected
        arm_index = choice - 1
        num_chosen[arm_index] += 1
        total_choices += 1
        
        # Simulate reward from chosen arm (normally distributed with mean arm_means[arm_index] and std deviation 3)
        reward = np.random.normal(loc=arm_means[arm_index], scale=3)
        print(f"Chosen arm={choice}, reward={reward:.3f}.")
        
        # Update total reward obtained from the chosen arm
        total_reward[arm_index] += reward

if __name__ == "__main__":
    # Get the number of arms from the user
    k = int(input("Enter the number of arms (k): "))
    play_bandit(k)