# -*- coding: utf-8 -*- """ Spyder Editor """ import pandas as pd import numpy as np import matplotlib.pyplot as plt data = pd.read_csv('cac40_30.csv',sep=';', index_col = 0) data.head() data.keys() #plot histograms of the prices, discard the output _=data.hist(bins=30, figsize = (15,15)) #compute the returns / rendements cac40codes = data.keys() #these are daily returns #returns= np.log(data/data.shift(1)) #these are anual returns returns= np.log(data/data.shift(250)) #for code in cac40codes: # returns[code]= np.log(data[code]/data[code].shift(1)) # plot histogram of returns _=returns.hist(bins=30, figsize = (15,15)) n=10 #draw at random without repetition stocks = np.random.choice(cac40codes,n,replace=False) # choose "day' for day to day returns, 'year' for anualized returns and so on # Note: any difference between these ways to proceed is due # either to a non-temporal stability of the estimations # or of fractal / scaling properties, i.e., one sees different # things at different scales # we introduce a dictionary containing the shifts corresponding to the time scale time_scale_shifts_dict={'day':1,'week':5,'year':250} time_scale='day' #time_scale='year' time_scale_shift = time_scale_shifts_dict[time_scale] #compute $M$ and Sigma: mean return and covariance of returns returns_an= np.log(data[stocks]/data[stocks].shift(time_scale_shift)) # perturb data a little bit: take +/- perturb% perturb=0.0 assert(perturb>=0.) assert(perturb<1.) returns_an=returns_an*np.random.uniform(1.-perturb, 1+perturb,returns_an.shape) cov_an=np.array(returns_an.cov())#Sigma matrix inverse_cov = np.linalg.inv(cov_an)#inverse of Sigma returns_an_mean=returns_an.mean() returns_an_mean=np.array(returns_an_mean)[:,None] no_portef=1000 # plot with x-axis= std deviation, y = mean return_portef=[] volatility_portef=[] for _ in range(no_portef): #draw at random some portefolios without riskelss security weights= np.random.uniform(-1./n,2./n,(n,1)) weights = weights - np.mean(weights) + 1.0/n # weights = weights/np.sum(weights) #calculer return, volatility creturn= np.float64(weights.T @ returns_an_mean) return_portef.append(creturn) current_volatility= np.sqrt(np.float64(weights.T @ cov_an @ weights)) volatility_portef.append(current_volatility) #plot plt.figure(3) plt.plot(volatility_portef,return_portef,'.b') plt.xlabel('sigma') plt.ylabel('return') plt.title('Return vs. sigma for n='+np.str(n)) # compute and draw the efficient frontier on the same graph # compute '1_M', 'a' and 'b' factors as in the course document onesM=np.ones_like(returns_an_mean) a=np.float(onesM.T @ inverse_cov @ onesM) b=np.float(onesM.T @ inverse_cov @ returns_an_mean) # dessiner la frontiere et sa symetrique par rapport à l'origine sigmarange= np.linspace(1./np.sqrt(a)+1.e-10, 1.1*np.max(volatility_portef),47) #compute the optimal portfolio return for these values factor = np.float((returns_an_mean- b/a*onesM).T @ inverse_cov @ (returns_an_mean- b/a*onesM)) optimal_return = b/a + np.sqrt(sigmarange**2-1./a)*np.sqrt(factor) #the factor to rescale the variance to obtain an anual one rescale_time_factor=time_scale_shifts_dict['year']/time_scale_shift sigma_fact=np.sqrt(rescale_time_factor)*100 mean_fact=rescale_time_factor*100 plt.figure(4) plt.plot(np.array(volatility_portef)*sigma_fact, mean_fact*np.array(return_portef),'.b', sigmarange*sigma_fact,mean_fact*optimal_return,'.r', sigmarange*sigma_fact,2*b/a*sigma_fact-mean_fact*optimal_return,'.r') plt.xlabel('sigma (/year %)') plt.ylabel('return(%)') plt.title('Return vs. sigma for n='+np.str(n)) #plt.legend(['Random returns','efficient frontier+','efficient frontier-']) # when n increases : difficult to find optimal frontier, but # large sample size (no_portef) helps backupsigma=sigmarange*sigma_fact backup_return=mean_fact*optimal_return #old_backupsigma=backupsigma #old_backup_return=backup_return #plt.figure(5) #plt.plot(backupsigma,backup_return,'-r',old_backupsigma,old_backup_return,'-b') #plt.xlabel('sigma (/year %)') #plt.ylabel('return(%)')