"""
Create an equally weighted index from finance yahoo! stock prices
Date: 2018-06-15
"""

from __future__ import print_function
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import os

# Global variable. Subdirectory where stock price data are stored
data_dir = 'Data'

###############################################################################
def read_data(fname):
   fn = data_dir + '/' + fname
   data = pd.read_csv(fn)

   # Exclude stocks that have stock splits 
   n = len(data)
   data.index = range(n) 
   if data['Stock Splits'].sum() != n:
      print('%s Stock split and excluded!' % (fname))
      return data[0:0]

   # Exclude days corresponding to Japanese holidays
   for i in range(n):
      if data.Volume[i] == 0:
         if data.High[i] == data.Low[i]:
            data = data.drop([i], axis=0)

   return data

###############################################################################
def is_end_of_quarter(d):

   dd =  np.diff(d)
   nd = len(dd)
   
   Q = [False]*nd         # Create a list of False
   for i in range(1,nd):
      if dd[i] > 31:      # Detection of month change
         s = str(d[i-1])
         m = int(s[4:6])
         if np.mod(m, 3) == 0:
            Q[i-1] = True
            print('%d %d %d' % (i, d[i-1], m))

   return [False] + Q    # d has nd numbers, so add 1 more False in front of Q

###############################################################################

# Obtain the list of stock data in the sub-directory data_dir 
stocks = os.listdir(data_dir)
n_stocks = len(stocks)

# Create a list of dataframes
# First dataframe
data = read_data(stocks[0])
data.index = pd.to_datetime(data.Date, format = '%Y-%m-%d') 
p = data.Close
q = pd.DataFrame({stocks[0][0:4] :p}, index = data.index)

for i in range(1, n_stocks):
   data = read_data(stocks[i])
   if len(data.Date) == 0:           # stocks with splits are skipped
      continue

   data.index = pd.to_datetime(data.Date, format = '%Y-%m-%d')  
   p = data.Close
   ticker = stocks[i][0:4]
   pp = pd.DataFrame({ticker :p}, index = data.index)
   q[ticker] = pp

n = len(q)
a, nc = q.shape
codes = list(q)

# Rplace today's nan by previous day's price
for i in range(1,n):
   s = sum(q.iloc[i])
   if np.isnan(s):
      for j in range(nc):
         if np.isnan(q.iloc[i][j]):
            q.loc[q.index[i], codes[j]] = q.iloc[i-1][j] 

# Find the end of quarter for re-balancing    
d = q.index.strftime('%Y%m%d')
d = d.astype(int)
Q = is_end_of_quarter(d)

# Initially 1 million yens on each stock. 
# Thus the portfolio value is nc * 1000000, where nc = number of components

money_per_stock = 1000000

divisor = 102000    # Divisor to make the initial index value 1000
ew =  np.zeros(n, dtype = float)
for i in range(n):
   p = q.iloc[i].values

   if i == 0:  # Initial number of shares for each stock
      shares = money_per_stock/p

   mc = sum(shares * p)   # market cap 

   if Q[i] is True:
      ds = q.index[i].strftime('%Y-%m-%d')
      money_per_stock = mc/nc
      shares = money_per_stock/p
      print("Rebalancing done on %s. Total shares = %0.2f" % (ds, sum(shares)))
   
   ew[i] = mc/divisor

# Create a time series of index levels
ts = pd.Series(ew, index=q.index)

# These two holidays were not dropped in the first pass in read_data(fname):
# https://www.timeanddate.com/holidays/japan/2005#!hol=1
ts['2005-11-03'] = np.nan
ts['2006-07-17'] = np.nan
ts = ts.dropna()

# Save the time series to a csv file called ew.csv
ts.to_csv('ew.csv')

# Plot the index
ts.plot()
plt.title('Equally Weighted, Re-balanced Quarterly')
plt.show()
   




