import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

current_path = Path('C:/Users/Yoann/Pole Keio FE Dropbox/WRDSdata/')
tickers = {"CMS", "ES"}
years = {"2011"}
#days = {"taqAAPL2016-01-04.csv"}
#tickers = os.listdir(current_path)
#tickers = [s for s in tickers if "." not in s]
d = 252 # nb of days in a year
nbyear = 1
T = 1
dailySeconds = 23400 # number of seconds in a working day, i.e. 6.5 hours
nbsecond = 600 # sampling in seconds
n = np.int(d*nbyear*dailySeconds/nbsecond)

## construction of X and Y from data
XY = np.zeros((2,n))
indticker=0
for ticker in tickers:
    print(ticker)
    current_file = current_path
    current_file += ticker
    current_file += "/"
    #years = os.listdir(current_file)
    #years = [s for s in years if "20" in s]
    indn = 0
    for year in years:
        print(year)
        current_file1 = current_file + year
        current_file1 += "/"
        days = os.listdir(current_file1)
        days = [s for s in days if "_" not in s]
        for da in days:
            print(da)
            ########################## Pre data  process #######################################
            current_file2 = current_file1 + da
            #current_file += "/taq"
            #current_file += ticker
            #current_file += year
            #current_file += "-"
            #current_file += da
            #current_file += ".csv"

            data = pd.read_csv(current_file2, usecols=["ts", "price"])

            # log-price
            data["price"] = np.log(data["price"])

            # parse data["ts"] to a float
            tsfloat = np.zeros(data["ts"].size)
            for i in range(data["ts"].size):
                j = data["ts"].index.values[i]
                hours = int(data["ts"][j][0:2])
                minutes = int(data["ts"][j][3:5])
                sec = float(data["ts"][j][6:])
                tsfloat[i] = 3600 * hours + 60 * minutes + sec
            data["tsfloat"] = tsfloat

            timeCur = 34200
            while timeCur<57600:
                if data[data["tsfloat"] > timeCur].size>0:
                    ind = data[data["tsfloat"] > timeCur].index[0]
                    XY[indticker, indn] = data["price"][ind]
                else:
                    XY[indticker, indn] = XY[indticker, indn-1]
                timeCur += nbsecond
                indn += 1
    indticker += 1

## estimation
M=1
nb_coint= 2
truncate0 = 1
sampling_list = [1,3,6,12,39,78]#[1]#!!!!!!! paper values [1,3,6,12,39,78]
rejectDictH0 = {'DFdemloc2': [], 'DFdem': [], 'ADFdem': [], 'PP_taudem': [], 'PP_alphadem': []}
c0 = {name: [] for name in rejectDictH0}
alpha = {name: [] for name in rejectDictH0}
rho = {name: [] for name in rejectDictH0}
modelList = len(sampling_list)*["bla"]
nList = []

for sample_step in sampling_list:
    Xnt = np.array([np.array([XY[0,::sample_step] for j in range(0,nb_coint-1)]) for m in range(M)])
    Ynt = np.array([XY[1,::sample_step] for m in range(M)])
    nsub = np.shape(Xnt[0])[1]-1
    if truncate0==1:
        X = np.array([np.array([truncate(Xnt[m][j], d*T/nsub, 3, 0.49) for j in range(0,nb_coint-1)]) for m in range(M)]) # truncated X
    else:
        X = Xnt
    if truncate0==1:
        Y = np.array([truncate(Ynt[m], d * T / nsub, 3, 0.49) for m in range(M)])
    else:
        Y=Ynt
    #### X, Y0 and Y1 demeaned from non truncated observations
    Xdet = np.array([np.array([np.array([Xnt[m][j][k] - np.mean(Xnt[m][j]) for k in range(nsub + 1)]) for j in range(0, nb_coint - 1)]) for m in range(M)])
    Ydet = np.array([np.array([Ynt[m][k] - np.mean(Ynt[m]) for k in range(nsub + 1)]) for m in range(M)])

    #### deflated from (truncated) returns
    X0 = X[0][0][0]
    Y0 = Y[0][0]
    dX = np.array([np.array([np.concatenate((np.zeros(1),np.diff(X[m][j]))) for j in range(0,nb_coint-1)]) for m in range(M)])
    dY = np.array([np.concatenate((np.zeros(1), np.diff(Y[m]))) for m in range(M)])
    dXdef = np.array([np.array([toBrownianProcess2(dX[m][j],dY[m], int(np.sqrt(nsub)),T)[0] for j in range(0,nb_coint-1)]) for m in range(M)])
    Xdef = X0 + np.array([np.array([ dXdef[m][j].cumsum() for j in range(0,nb_coint-1)]) for m in range(M)])
    dYdef = np.array([toBrownianProcess2(dX[m][0],dY[m], int(np.sqrt(nsub)), T)[1] for m in range(M)])
    Ydef = Y0 + np.array([dYdef[m].cumsum()  for m in range(M)])

    ### demeaned after deflating
    n0 = np.shape(Xdef[0][0])[0]-1
    Xdetdef2 = np.array([np.array([np.array([Xdef[m][j][k] - np.mean(Xdef[m][j]) for k in range(n0+1)]) for j in range(0, nb_coint - 1)]) for m in range(M)])
    Ydetdef2 = np.array([np.array([Ydef[m][k] - np.mean(Ydef[m]) for k in range(n0+1)]) for m in range(M)])

    #### deflated from observations
    #Xdef2 = np.array([np.array([toBrownianProcessFromObs(X[m][j],Y0[m], 30,T)[0] for j in range(0,nb_coint-1)]) for m in range(M)])
    #Y0def2 = np.array([toBrownianProcessFromObs(X[m][0], Y0[m], 30, T)[1] for m in range(M)])
    #Y1def2 = np.array([toBrownianProcessFromObs(X[m][0], Y1[m], 30, T)[1] for m in range(M)])

    #### estimating alpha
    alpha_hat_det = np.array([np.dot(np.linalg.inv(np.dot(Xdet[m], Xdet[m].transpose())), np.dot(Xdet[m], Ydet[m])) for m in range(M)])
    alpha_hat_detdef2 = np.array([np.dot(np.linalg.inv(np.dot(Xdetdef2[m], Xdetdef2[m].transpose())), np.dot(Xdetdef2[m], Ydetdef2[m])) for m in range(M)])

    #### estimating c0
    c0_hat_det = np.array([np.mean(Ynt[m]) - alpha_hat_det[m]*np.mean(Xnt[m]) for m in range(M)])
    c0_hat_detdef2 = np.array([np.mean(Ydef[m]) - alpha_hat_detdef2[m] * np.mean(Xdef[m]) for m in range(M)])

    ###residuals: equivalence between model in the paper and the construction here
    epsilon_det = np.array([Ydet[m] - np.dot(alpha_hat_det[m], Xdet[m]) for m in range(M)])
    epsilon_detdef2 = np.array([Ydetdef2[m] - np.dot(alpha_hat_detdef2[m], Xdetdef2[m]) for m in range(M)])

    ### estimating rho
    rho_hat_detdef2 = np.array([np.sum(epsilon_detdef2[m][0:n0 - 1] * np.diff(epsilon_detdef2[m][1:n0 + 1])) / np.sum(epsilon_detdef2[m][0:n0] * np.diff(epsilon_detdef2[m][0:n0 + 1])) for m in range(M)])
    rho_hat_det = np.array([np.sum(epsilon_det[m][0:n0] * epsilon_det[m][1:n0 + 1]) / np.sum(epsilon_det[m] ** 2) for m in range(M)])

    ### DF test with demeaned data
    DF_det = np.array([adfuller(epsilon_det[m, :], maxlag=1, store=True, regression='nc') for m in range(M)])
    test_DF_det = np.array([DF_det[m][0] for m in range(M)])
    reject_DF_det = test_DF_det < -3.37  # = p_val_DF_H0 < 0.05

    ### DF test with deflating and demeaned data II
    DF_detdef2 = np.array([adfuller(epsilon_detdef2[m, :], maxlag=1, store=True, regression='nc') for m in range(M)])
    test_DF_detdef2 = np.array([DF_detdef2[m][0] for m in range(M)])
    reject_DF_detdef2 = test_DF_detdef2 < -3.37#-3.80  # = p_val_DF_H0 < 0.05

    ### ADF
    #adf_p = 4
    #ADF_H0 =np.array([adfuller(epsilon_H0[m,:], maxlag=adf_p, store=True, regression='nc') for m in range(M)])
    #ADF_H1 = np.array([adfuller(epsilon_H1[m,:], maxlag=adf_p, store=True,regression='nc') for m in range(M)])
    #test_ADF_H0 = np.array([ADF_H0[m][0] for m in range(M)])#np.array([DF_model_ols_H0[m].tvalues[1] for m in range(M)])
    #test_ADF_H1 = np.array([ADF_H1[m][0] for m in range(M)])#np.array([DF_model_ols_H1[m].tvalues[1] for m in range(M)])
    #reject_ADF_H0 = test_ADF_H0 < -2.76
    #reject_ADF_H1 = test_ADF_H1 < -2.76

    ### ADF test with demeaned data
    adf_p = 4
    ADF_det = np.array([adfuller(epsilon_det[m, :], maxlag=adf_p, store=True, regression='nc') for m in range(M)])
    test_ADF_det = np.array([ADF_det[m][0] for m in range(M)])
    reject_ADF_det = test_ADF_det < -3.37

    ## PPtau test with demeaned data
    PP_tau_det = [PhillipsPerron(epsilon_det[m, :], test_type='tau', trend='nc') for m in range(M)]  # Z_t from Phillips and Ouliaris
    test_PP_tau_det = np.array([PP_tau_det[m].stat for m in range(M)])
    reject_PP_tau_det = test_PP_tau_det < -3.37

    ## PPalpha test with demeaned data
    PP_alpha_det = [PhillipsPerron(epsilon_det[m, :], test_type='rho') for m in range(M)]
    test_PP_alpha_det = np.array([PP_alpha_det[m].stat for m in range(M)])
    reject_PP_alpha_det = test_PP_alpha_det < -20.49

    ###summary of the tests
    rejectDictH0['DFdem'].append(sum(reject_DF_det) / M)
    rejectDictH0['DFdemloc2'].append(sum(reject_DF_detdef2) / M)
    rejectDictH0['ADFdem'].append(sum(reject_ADF_det) / M)
    rejectDictH0['PP_taudem'].append(sum(reject_PP_tau_det) / M)
    rejectDictH0['PP_alphadem'].append(sum(reject_PP_alpha_det) / M)

    c0['DFdem'].append(sum(c0_hat_det) / M)
    c0['DFdemloc2'].append(sum(c0_hat_detdef2) / M)
    c0['ADFdem'].append(sum(c0_hat_det) / M)
    c0['PP_taudem'].append(sum(c0_hat_det) / M)
    c0['PP_alphadem'].append(sum(c0_hat_det) / M)

    alpha['DFdem'].append(sum(alpha_hat_det) / M)
    alpha['DFdemloc2'].append(sum(alpha_hat_detdef2) / M)
    alpha['ADFdem'].append(sum(alpha_hat_det) / M)
    alpha['PP_taudem'].append(sum(alpha_hat_det) / M)
    alpha['PP_alphadem'].append(sum(alpha_hat_det) / M)

    rho['DFdem'].append(sum(rho_hat_det) / M)
    rho['DFdemloc2'].append(sum(rho_hat_detdef2) / M)
    rho['ADFdem'].append(sum(rho_hat_det) / M)
    rho['PP_taudem'].append(sum(rho_hat_det) / M)
    rho['PP_alphadem'].append(sum(rho_hat_det) / M)

    nList.append(int(n/sample_step))

out = DataFrame({'n':nList,
                 'model':modelList})

for key in rejectDictH0:
    out['rejectH0'+key] = rejectDictH0[key]
    out['c0' + key] = c0[key]
    out['alpha' + key] = alpha[key]
    out['rho' + key] = rho[key]

print(out)


### plots
plt.plot(X[0]-np.mean(X[0]))
plt.plot(Y[0]-np.mean(Y[0]))
plt.show()

#out["rhoH0DFdemloc2"][0] -= .08
plt.plot(out["rhoDFdemloc2"])
plt.plot(out["rhoPP_alphadem"])
plt.show()