# -*- coding: utf-8 -*- """ @ Gabriel Turinici 2022 Implementation of the multi-armed bandit """ import numpy as np import matplotlib.pyplot as plt from tqdm import tqdm k,T,M,epsilon=10,1000,2000,0.1 c_ucb=2 rewards= np.zeros((T,M)) qstar_vec=np.zeros((k,M)) for m in tqdm(range(M)): qstar = np.random.randn(k) qstar_vec[:,m]=qstar count_vector=np.zeros(k) Q=np.zeros(k)+5 for t in range(T): x = np.random.rand() if(x