# -*- coding: utf-8 -*-
"""
Spyder Editor

"""

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt


data = pd.read_csv('cac40_30.csv',sep=';', index_col = 0)

data.head()
data.keys()

#plot histograms of the prices, discard the output 
_=data.hist(bins=30, figsize = (15,15))


#compute the returns / rendements
cac40codes = data.keys()

#these are daily returns
#returns= np.log(data/data.shift(1))
#these are anual returns
returns= np.log(data/data.shift(250))

#for code in cac40codes:
#    returns[code]= np.log(data[code]/data[code].shift(1))

# plot histogram of returns
_=returns.hist(bins=30, figsize = (15,15))

n=10 #draw at random without repetition
stocks = np.random.choice(cac40codes,n,replace=False)

# choose "day' for day to day returns, 'year' for anualized returns and so on
# Note: any difference between these ways to proceed is due
# either to a non-temporal stability of the estimations
# or of fractal / scaling properties, i.e., one sees different 
# things at different scales
# we introduce a dictionary containing the shifts corresponding to the time scale
time_scale_shifts_dict={'day':1,'week':5,'year':250}
time_scale='day'
#time_scale='year'
time_scale_shift = time_scale_shifts_dict[time_scale]

#compute $M$ and Sigma: mean return and covariance of returns
returns_an= np.log(data[stocks]/data[stocks].shift(time_scale_shift))

# perturb data a little bit: take +/- perturb%
perturb=0.0
assert(perturb>=0.)
assert(perturb<1.)
returns_an=returns_an*np.random.uniform(1.-perturb, 1+perturb,returns_an.shape)
cov_an=np.array(returns_an.cov())#Sigma matrix
inverse_cov = np.linalg.inv(cov_an)#inverse of Sigma
returns_an_mean=returns_an.mean()
returns_an_mean=np.array(returns_an_mean)[:,None]



no_portef=1000
# plot with  x-axis= std deviation, y = mean

return_portef=[]
volatility_portef=[]

for _ in range(no_portef):
    #draw at random some portefolios without riskelss security
    weights= np.random.uniform(-1./n,2./n,(n,1))
    weights = weights - np.mean(weights) + 1.0/n  
#    weights = weights/np.sum(weights)
    #calculer return, volatility
    creturn= np.float64(weights.T @ returns_an_mean)
    return_portef.append(creturn)
    
    current_volatility= np.sqrt(np.float64(weights.T @ cov_an @ weights))
    volatility_portef.append(current_volatility)
    
#plot
plt.figure(3)
plt.plot(volatility_portef,return_portef,'.b')
plt.xlabel('sigma')
plt.ylabel('return')
plt.title('Return vs. sigma for n='+np.str(n))

# compute and draw the efficient frontier on the same graph
# compute '1_M', 'a' and 'b' factors as in the course document 
onesM=np.ones_like(returns_an_mean)
a=np.float(onesM.T @ inverse_cov @ onesM)
b=np.float(onesM.T @ inverse_cov @ returns_an_mean)


# dessiner la frontiere et sa symetrique par rapport à l'origine
sigmarange= np.linspace(1./np.sqrt(a)+1.e-10, 1.1*np.max(volatility_portef),47)

#compute the optimal portfolio return for these values 
factor = np.float((returns_an_mean- b/a*onesM).T @ inverse_cov @ (returns_an_mean- b/a*onesM))

optimal_return = b/a + np.sqrt(sigmarange**2-1./a)*np.sqrt(factor)


#the factor to rescale the variance to obtain an anual one
rescale_time_factor=time_scale_shifts_dict['year']/time_scale_shift
sigma_fact=np.sqrt(rescale_time_factor)*100
mean_fact=rescale_time_factor*100


plt.figure(4)
plt.plot(np.array(volatility_portef)*sigma_fact,
         mean_fact*np.array(return_portef),'.b',
         sigmarange*sigma_fact,mean_fact*optimal_return,'.r',
         sigmarange*sigma_fact,2*b/a*sigma_fact-mean_fact*optimal_return,'.r')
plt.xlabel('sigma (/year %)')
plt.ylabel('return(%)')
plt.title('Return vs. sigma for n='+np.str(n))
#plt.legend(['Random returns','efficient frontier+','efficient frontier-'])

# when n increases : difficult to find optimal frontier, but 
# large sample size (no_portef) helps

backupsigma=sigmarange*sigma_fact
backup_return=mean_fact*optimal_return
#old_backupsigma=backupsigma
#old_backup_return=backup_return
#plt.figure(5)
#plt.plot(backupsigma,backup_return,'-r',old_backupsigma,old_backup_return,'-b')
#plt.xlabel('sigma (/year %)')
#plt.ylabel('return(%)')