import requests
import json


import pandas as pd
import numpy as np

# Getting the CPI for all of the years I can
# U.S. city average, All items - CUUR0000SA0
# https://data.bls.gov/cgi-bin/surveymost?cu

# Querying for inflation data
# DO NOT HAMMER THE API, THERE IS A DAILY REQUEST LIMIT!!!

sid = 'CUUR0000SA0'
key = "34547219aac541318824d07dd92cb278"

responses = []

headers = {'Content-type': 'application/json'}

year = 1913
while (year < 2022):
    data = json.dumps({"seriesid": [sid],"startyear":str(year), "endyear":str(year + 19), "registrationkey":key})
    p = requests.post('https://api.bls.gov/publicAPI/v2/timeseries/data/', data=data, headers=headers)
    responses.append(p)
    year+=20
    
responses

[<Response [200]>,
 <Response [200]>,
 <Response [200]>,
 <Response [200]>,
 <Response [200]>,
 <Response [200]>]


# Gathering inflation data into a table

idx = 0
inflation_data = pd.DataFrame(columns=["year", "month", "CPI"])

for response in reversed(responses):
    doc = json.loads(response.text)
    if (doc['status'] == "REQUEST_SUCCEEDED"):
        data = doc['Results']['series'][0]['data']
        for entry in data:
            inflation_data = inflation_data.append(pd.Series(data=[entry['year'], entry['periodName'], entry['value']], index=["year", "month", "CPI"], name=idx))
            idx = idx + 1
    else:
        print("*** REQUEST FAILED ***")
        print(json)
        
inflation_data


inflation_data_clean = inflation_data.copy()

month_dict = {"January":"1", "February":"2", "March":"3", "April":"4", "May":"5", "June":"6", "July":"7", "August":"8", "September":"9", "October":"10", "November":"11", "December":"12"}

inflation_data_clean["month_int"] = inflation_data_clean["month"].map(lambda x: month_dict[x])

def make_date(year, month):
    try:
        return pd.to_datetime(str(month) + "/1/" + str(year))
    except:
        return np.nan
    
inflation_data_clean["date"] = inflation_data_clean.apply(lambda x: make_date(x["year"], x["month_int"]), axis=1)

# get rid of extra columns
inflation_data_clean = inflation_data_clean.dropna().drop(["year", "month", "month_int"], axis=1)
inflation_data_clean = inflation_data_clean[["date", "CPI"]]

inflation_data_clean


# https://www.fdic.gov/regulations/resources/rates/historical/archive.xlsx
# must manually download and convert to xls format to be read

# Simplify date range into month and year (this is pretty rough, results in a loss of accuracy)
def simplify_date(date_range):
    try:
        dvals = date_range.split("-")[0].split("/")
        return pd.to_datetime(dvals[0] + "/1/" + dvals[2])
    except:
        return np.nan


# new interest data frame
final_interest_frame = pd.DataFrame()

for i in reversed(range(2009,2022)):
    interest_table = pd.read_excel("archive.xls", sheet_name= str(i) + " Archive", header=1).transpose()

    # get rid of all other values, we don't care about them
    range_of_drop_columns = [2] + [x for x in range(14, len(interest_table.columns))]
    interest_table = interest_table.drop(range_of_drop_columns, axis=1)
    
    interest_table.columns = ["date range", "type", "savings", "checking", "money market",
                          "1 month CD","3 month CD","6 month CD","12 month CD",
                          "24 month CD", "36 month CD","48 month CD", "60 month CD"]

    interest_table = interest_table.reset_index().drop("index", axis=1)
    interest_table = interest_table.drop(0, axis=0)
    
    # Get rid of na date value rows
    interest_table = interest_table.dropna(axis=0, subset=["date range"])
    
    if (final_interest_frame.empty):
        final_interest_frame = interest_table
    else:
        final_interest_frame = final_interest_frame.append(interest_table)

final_interest_frame["simple_date"] = final_interest_frame["date range"].map(simplify_date)

final_interest_frame


# group the rows by date
group = final_interest_frame.groupby(["simple_date"])
idx = 0

labels = ["date", 
             "savings","savings cap", 
             "checking", "checking cap", 
             "money market", "money market cap",
            "1 month CD", "1 month CD cap", 
             "3 month CD", "3 month CD cap", 
             "6 month CD", "6 month CD cap", 
             "12 month CD", "12 month CD cap",
            "24 month CD", "24 month CD cap", 
             "36 month CD", "36 month CD cap", 
             "48 month CD", "48 month CD cap", 
             "60 month CD", "60 month CD cap"]

interest_data_clean = pd.DataFrame(columns=labels)

# split by type
for key in group.groups.keys():
    type_group = group.get_group(key).groupby("type")
    rate_row = type_group.get_group("National Rate")
    cap_row = type_group.get_group("National Rate Cap")
    # averaging all of the data for each date and merging rate and cap lines.

    new_row_data = [key,
                    rate_row["savings"].mean(), cap_row["savings"].mean(),
                    rate_row["checking"].mean(), cap_row["checking"].mean(),
                    rate_row["money market"].mean(), cap_row["money market"].mean(),
                    rate_row["1 month CD"].mean(), cap_row["1 month CD"].mean(),
                    rate_row["3 month CD"].mean(), cap_row["3 month CD"].mean(),
                    rate_row["6 month CD"].mean(), cap_row["6 month CD"].mean(),
                    rate_row["12 month CD"].mean(), cap_row["12 month CD"].mean(),
                    rate_row["24 month CD"].mean(), cap_row["24 month CD"].mean(),
                    rate_row["36 month CD"].mean(), cap_row["36 month CD"].mean(),
                    rate_row["48 month CD"].mean(), cap_row["48 month CD"].mean(),
                    rate_row["60 month CD"].mean(), cap_row["60 month CD"].mean()]
                                                            
    interest_data_clean = interest_data_clean.append(pd.Series(data=new_row_data, index=labels, name=idx))
    idx += 1

# remove a few typo values and fill missing values with nearest neighbor
interest_data_clean = interest_data_clean.drop([0,144]).fillna(method="pad").fillna(method="bfill")

# flip the rows the other way around so they look more like the other table
interest_data_clean = interest_data_clean.reindex(index=interest_data_clean.index[::-1]).reset_index().drop("index", axis=1)

# ready-to-use interest data
interest_data_clean


gold_json = requests.get('https://pkgstore.datahub.io/core/gold-prices/monthly_json/data/3b036045b9090ddacfaacb2deb5f19de/monthly_json.json')

gold_df = pd.DataFrame(columns=["date_string", "price"]);

entries = json.loads(gold_json.text)

for entry in entries:
    gold_df = gold_df.append(pd.Series(data=[entry['Date'], entry['Price']], index=["date_string", "price"], name=0))

# for turning the string dates into date objects
def to_date(date_string):
    try:
        date_split = date_string.split("-")
        return pd.to_datetime(date_split[1] + "/1/" + date_split[0])
    except:
        return np.nan

gold_df["date"] = gold_df["date_string"].map(to_date)
gold_df["gold_price"] = gold_df["price"]
gold_df = gold_df.drop(["date_string", "price"], axis=1)[["date", "gold_price"]]
gold_df


df_clean = inflation_data_clean.merge(interest_data_clean, on="date", how='left')
df_clean = df_clean.merge(gold_df, on="date", how='left')
df_clean


df_clean.plot(x="date", y=["savings", "checking", "money market"], ylabel="Annual Percentage Yield (APY)", title="APY over time")

<AxesSubplot:title={'center':'APY over time'}, xlabel='date', ylabel='Annual Percentage Yield (APY)'>


df_2009_clean = df_clean.loc[df_clean["date"] >= pd.to_datetime("5/1/2009")]
df_2009_clean.plot(x="date", y=["savings", "checking", "money market"], ylabel="Annual Percentage Yield (APY)", title="APY over time")
df_2009_clean.plot(x="date", y=["1 month CD", "3 month CD", "6 month CD"], ylabel="Annual Percentage Yield (APY)", title="APY over time")
df_2009_clean.plot(x="date", y=["36 month CD", "48 month CD", "60 month CD"], ylabel="Annual Percentage Yield (APY)", title="APY over time")

<AxesSubplot:title={'center':'APY over time'}, xlabel='date', ylabel='Annual Percentage Yield (APY)'>


# calculating CPI delta
df_clean["CPI"] = df_clean["CPI"].map(lambda x: float(x))
df_clean["CPI_change"] = df_clean["CPI"].diff(periods=-1)
df_2009_clean = df_clean.loc[df_clean["date"] >= pd.to_datetime("5/1/2009")]

df_2009_clean.plot(x="CPI", y=["money market"], ylabel="Money Market Rate", title="Money Market Rate vs Inflation", style=["x"])

df_2009_clean.plot(x="CPI_change", y=["money market"], style=["x"], xlabel="Change in CPI", ylabel="Monthly APY",
                   title="Monthly Interest Rate vs Change in CPI")

<AxesSubplot:title={'center':'Monthly Interest Rate vs Change in CPI'}, xlabel='Change in CPI', ylabel='Monthly APY'>


df_clean.plot(x="date", y=["gold_price"], ylabel="Gold price", title="Gold price over time")

df_clean["CPI"] = df_clean["CPI"].map(lambda x: float(x))
df_clean.plot(x="CPI", y=["gold_price"], ylabel="Gold price", title="Gold price vs Inflation", style=["x"])

# calculating Gold delta
df_clean["gold_price"] = df_clean["gold_price"].map(lambda x: float(x))
df_clean["gold_change"] = df_clean["gold_price"].diff(periods=-1)

df_clean.plot(x="CPI_change", y=["gold_price"], style=["x"], xlabel="Change in CPI", ylabel="Change in Gold Price",
                   title="Monthly Change in Gold Price vs Change in CPI")

<AxesSubplot:title={'center':'Monthly Change in Gold Price vs Change in CPI'}, xlabel='Change in CPI', ylabel='Change in Gold Price'>


df_clean.plot(x="date", y="CPI", title="CPI over time (Higher is probably worse)")

<AxesSubplot:title={'center':'CPI over time (Higher is probably worse)'}, xlabel='date'>


import seaborn as sns
import matplotlib.pyplot as plt

#Generating a correlation matrix
plt.subplots(figsize = (18,16))
g = sns.heatmap(df_clean.corr("spearman"), annot = True)

g.set_title("Correlation Matrix")
g.set_xticklabels(g.get_xticklabels(), rotation=30, horizontalalignment='right')
g.set_yticklabels(g.get_yticklabels(), rotation=30, horizontalalignment='right')
g

<AxesSubplot:title={'center':'Correlation Matrix'}>


# function for calculating relative value for a given CPI given a current value and CPI
# uses modified equation from https://money.stackexchange.com/questions/88234/how-to-calculate-worth-of-money-n-year-ago-from-given-inflation-value
# old_price = new_price * (old_CPI/new_CPI)
# new_price = old_price / (old_CPI/new_CPI)

def buying_power(anchor_CPI, anchor_value, current_CPI):
    new_price = anchor_value * (current_CPI/anchor_CPI)
    return new_price

def relative_value(anchor_CPI, anchor_value, current_CPI):
    new_price = anchor_value / (current_CPI/anchor_CPI)
    return new_price

test = inflation_data_clean.copy()
    
test["modern_cost_multiplier"] = test["CPI"].map(lambda x: (buying_power(264.877, 1, float(x))))
test["relative_value"] = test["CPI"].map(lambda x: (relative_value(9.8, 100, float(x))))

test.plot(x="date", y="modern_cost_multiplier", title="How Much Something That Costs $1 Today Would Cost at Other Times", ylabel="US Dollars")
test.plot(x="date", y="relative_value", title="Buying power of $100 after 1913", ylabel="Relative buying power")

# modern cost multiplier - I.E. Something that costs $1 today would have cost $0.03 in 1913
# relative_value - If I had $1 in 1913 and waited until 2021, It would be as if I lost $0.96 of my dollar

<AxesSubplot:title={'center':'Buying power of $100 after 1913'}, xlabel='date', ylabel='Relative buying power'>


import math

# APY to monthly rate
# https://www.thebalance.com/calculate-monthly-interest-315421
apy = 0.50
monthly = (apy/100)/12

def get_month_delta(start_date, end_date):
    return (end_date.year - start_date.year)*12 + (end_date.month - start_date.month)

# compounds monthly
def interest_calculator(principle, start_date, end_date, interest_rate):
    months = get_month_delta(start_date, end_date)
    return principle * math.pow((1 + interest_rate), months)    
    
def relative_value_interest(start_CPI, start_date, end_CPI, end_date, principle, interest_rate):
    usd = interest_calculator(principle, start_date, end_date, interest_rate)
    return relative_value(start_CPI, usd, end_CPI)    

test["relative_value_interest"] = test.apply(lambda x: (relative_value_interest(9.8, pd.to_datetime("1/1/1913"), float(x["CPI"]), x["date"], 100, monthly)), axis=1)
test["true_usd_amount"] = test.apply(lambda x: (interest_calculator(100, pd.to_datetime("1/1/1913"), x["date"], monthly)), axis=1)
test.plot(x="date", y=["relative_value", "relative_value_interest", "true_usd_amount"], title="Insights about $100 over 100 years", ylabel="Value")

<AxesSubplot:title={'center':'Insights about $100 over 100 years'}, xlabel='date', ylabel='Value'>


# calculate interest with rates that change every month
# takes arguments (self-explanatory) and returns 
def dynamic_interest(principle, start_date, date_column, rate_column, df):
    interest_df = pd.DataFrame(columns=[date_column, "interest"])
    interest_df = interest_df.append(pd.Series(data=[start_date, principle], index=[date_column, "interest"], name=0))
    # going through each date since start in reverse order
    for date in reversed(df.loc[df[date_column] > start_date][date_column]):
        # getting the current and previous row
        current_row = df.loc[df[date_column] == date]
        prev_date = df[date_column].shift(periods=-1)[current_row.index[0]]
        prev_row = df.loc[df[date_column] == prev_date]
        prev_interest_row = interest_df.loc[interest_df[date_column] == prev_date]
        monthly_rate = current_row[rate_column].to_list()[0]/120 # converting APY into monthly rate
        prev_interest = prev_interest_row["interest"].to_list()[0]
        
        # actually calculating the interest and adding it
        new_value = interest_calculator(prev_interest, prev_date, date, monthly_rate)
        interest_df = interest_df.append(pd.Series(data=[date, new_value], index=[date_column, "interest"], name=0))
    return interest_df


CPI_at_date = df_clean.loc[df_clean["date"] == pd.to_datetime("5/1/2009")]["CPI"].to_list()[0]
cols = ["savings", "checking", "money market","1 month CD","3 month CD","6 month CD","12 month CD",
        "24 month CD", "36 month CD","48 month CD", "60 month CD"]

df_clean_2009 = (df_clean.loc[df_clean["date"] >= pd.to_datetime("5/1/2009")]).copy()

for col in cols:
    df_clean_2009 = df_clean_2009.merge(dynamic_interest(1, pd.to_datetime("5/1/2009"), "date", col, df_clean_2009), on="date", how="left")
    df_clean_2009[col + " interest"] = df_clean_2009["interest"]
    df_clean_2009 = df_clean_2009.drop(["interest"], axis=1)
    
df_clean_2009.plot(x="date", y=[x + " interest" for x in cols], title="USD Average Interest on $1 over Approximately 10 years", ylabel="USD")

<AxesSubplot:title={'center':'USD Average Interest on $1 over Approximately 10 years'}, xlabel='date', ylabel='USD'>


for col in cols:
    df_clean_2009[col + " interest (adjusted)"] = df_clean_2009.apply(lambda x: relative_value(float(CPI_at_date), float(x[col + " interest"]), float(x["CPI"])), axis=1)
    
df_clean_2009.plot(x="date", y=[x + " interest (adjusted)" for x in cols][0:3], title="True Average Value on $1 over Approximately 10 years (Adjusted)", ylabel="Value")
df_clean_2009.plot(x="date", y=[x + " interest (adjusted)" for x in cols][3:6], title="True Average Value on $1 over Approximately 10 years (Adjusted)", ylabel="Value")
df_clean_2009.plot(x="date", y=[x + " interest (adjusted)" for x in cols][6:], title="True Average Value on $1 over Approximately 10 years (Adjusted)", ylabel="Value")

<AxesSubplot:title={'center':'True Average Value on $1 over Approximately 10 years (Adjusted)'}, xlabel='date', ylabel='Value'>


def gold_investment_USD(start_amount, start_value, current_value):
    return (float(current_value)/float(start_value)) * float(start_amount)

# USD investment calculation from 1950
start_value = (df_clean.loc[df_clean["date"] == pd.to_datetime("1/1/1950")])["gold_price"].to_list()[0]
df_clean["gold interest"] = df_clean["gold_price"].map(lambda x: gold_investment_USD(1, start_value, x))
df_clean.plot(x="date", y="gold interest", title="Accumulated USD on $1 over Approximately 70 Years Invested in Gold", ylabel="USD")

# USD investment calculation from 2009
start_value = (df_clean_2009.loc[df_clean["date"] == pd.to_datetime("5/1/2009")])["gold_price"].to_list()[0]
df_clean_2009["gold interest"] = df_clean_2009["gold_price"].map(lambda x: gold_investment_USD(1, start_value, x))
df_clean_2009.plot(x="date", y="gold interest", title="Accumulated USD on $1 over Approximately 10 Years Invested in Gold", ylabel="USD")


df_clean_2009["gold interest (adjusted)"] = df_clean_2009.apply(lambda x: relative_value(float(CPI_at_date), float(x["gold interest"]), float(x["CPI"])), axis=1)
df_clean_2009.plot(x="date", y=["gold interest (adjusted)", "24 month CD interest (adjusted)", "12 month CD interest (adjusted)", "36 month CD interest (adjusted)"], title="True Average Value on $1 over Approximately 10 Years Invested (Adjusted)", ylabel="Value")

<AxesSubplot:title={'center':'True Average Value on $1 over Approximately 10 Years Invested (Adjusted)'}, xlabel='date', ylabel='Value'>


from sklearn.linear_model import LinearRegression

df_train = df_clean_2009.copy()

df_train["month_delta"] = df_train["date"].map(lambda x: get_month_delta(pd.to_datetime("5/1/2009"), x) + 1)

x_vals = np.array(df_train["month_delta"].to_list()).reshape(-1, 1)
results = {}

# Linear regression for the accounts
for col in cols:
    reg = LinearRegression()
    reg = reg.fit(x_vals, np.array(df_train[col + " interest (adjusted)"].to_list()).reshape(-1, 1))
    results[col] = (reg.coef_[0][0], reg.intercept_[0])
    
# Linear regression for gold investment
df_train_gold = (df_train.loc[df_train["gold interest (adjusted)"] > 0]).copy()
  
reg = LinearRegression()
reg = reg.fit(np.array(df_train_gold["month_delta"].to_list()).reshape(-1, 1), np.array(df_train_gold["gold interest (adjusted)"].to_list()).reshape(-1, 1))
results["gold"] = (reg.coef_[0][0], reg.intercept_[0])

results

{'savings': (-0.0006615069968276128, 1.0083307653031142),
 'checking': (-0.0008653214119036393, 1.0002032346371315),
 'money market': (-0.00037518654810559667, 1.021729359548015),
 '1 month CD': (-0.000668030720890915, 1.0079613393379816),
 '3 month CD': (-0.0003526810067646925, 1.0265295883018988),
 '6 month CD': (0.0002539673516912097, 1.043067912318345),
 '12 month CD': (0.0013998010825920313, 1.0592085023133733),
 '24 month CD': (0.0034300683446121237, 1.0848252913561671),
 '36 month CD': (0.005687232564269877, 1.0929201304147524),
 '48 month CD': (0.008037294923394334, 1.0900064274733425),
 '60 month CD': (0.012253749849245179, 1.0514582667245298),
 'gold': (-0.0013110124771311136, 1.3997662501745414)}


# run through and graph all of the models
for col in cols + ["gold"]:
    df_train[col + " estimate"] = df_train["month_delta"].map(lambda x : results[col][0] * x + results[col][1])
    df_train.plot(x="date", y=[col+" interest (adjusted)", col+" estimate"], style=["x"], title="Predicted Value vs Calculated value of " + col, ylabel="Value")


# getting all rows where gold value is > 0 and after 1970
new_gold_train = ((df_clean.loc[df_clean["date"] >= pd.to_datetime("1/1/1970")]).loc[df_clean["gold_price"] > 0]).copy()

# repeating a lot of our calculations again with this new range
# start in the same place as before so everything matches up
starting_row = new_gold_train.loc[new_gold_train["date"] == pd.to_datetime("5/1/2009")]
start_CPI = starting_row["CPI"].to_list()[0]
start_gold_price = starting_row["gold_price"].to_list()[0]

new_gold_train["month_delta"] = new_gold_train["date"].map(lambda x: get_month_delta(pd.to_datetime("5/1/2009"), x) + 1)
new_gold_train["gold interest"] = new_gold_train["gold_price"].map(lambda x: gold_investment_USD(1, start_gold_price, x))
new_gold_train["gold interest (adjusted)"] = new_gold_train.apply(lambda x: relative_value(float(start_CPI), float(x["gold interest"]), float(x["CPI"])), axis=1)

reg = LinearRegression()
reg = reg.fit(np.array(new_gold_train["month_delta"].to_list()).reshape(-1, 1), np.array(new_gold_train["gold interest (adjusted)"].to_list()).reshape(-1, 1))
new_result = (reg.coef_[0][0], reg.intercept_[0])

print(new_result)

new_gold_train["gold estimate"] = new_gold_train["month_delta"].map(lambda x : new_result[0] * x + new_result[1])
new_gold_train.plot(x="date", y=["gold interest (adjusted)", "gold estimate"], style=["x"], title="New Predicted Value vs Calculated value of " + col, ylabel="Value")

(0.0010105559443416498, 0.9697902860047505)

<AxesSubplot:title={'center':'New Predicted Value vs Calculated value of gold'}, xlabel='date', ylabel='Value'>


from sklearn.model_selection import train_test_split

# Splits the data into 8 random, independent subsets for 8-fold cross-validation
# Edited code from project 3 EC
def ksplit(X, y):
    x_split = []
    y_split = []
    X_temp = X
    y_temp = y

    remaining = 100
    while ((12.5/remaining) != 1):
        nd = remaining * (12.5/remaining)
        X_train, X_test, y_train, y_test = train_test_split(X_temp, y_temp, test_size= (10/remaining))
        x_split.append(X_test)
        y_split.append(y_test)
        X_temp = X_train
        y_temp = y_train
        remaining = (remaining-nd)
    x_split.append(X_train)
    y_split.append(y_train)
    return (x_split, y_split)

def validation(x_split, y_split):
    average_residuals = []
    
    for index, subset in enumerate(x_split):
        # creating the train and test sets
        temp_x = np.array(x_split.copy(), dtype=object)
        temp_y = np.array(y_split.copy(), dtype=object)
    
        # taking out the test group
        test_set_x = subset
        temp_x = np.delete(temp_x, index)
        test_set_y = temp_y[index]
        temp_y = np.delete(temp_y, index)
        
        # re-combining the other 7 groups
        train_set_x = np.array([])
        train_set_y = np.array([])
        for train_index, train_subset in enumerate(temp_x):
            if (not train_set_x.any()):
                train_set_x = train_subset
                train_set_y = temp_y[train_index]
            else:
                 train_set_x = np.concatenate((train_set_x,  train_subset))
                 train_set_y = np.concatenate((train_set_y,  temp_y[train_index]))
    
    
        # Testing
        reg = LinearRegression()
        reg.fit(train_set_x, train_set_y)
        
        residual_num = 0
        residual_sum = 0
        for idx, val in enumerate(test_set_x):
            residual_sum += abs(test_set_y[idx][0] - reg.predict([val])[0])
            residual_num += 1
            
        average_residuals.append((residual_sum/residual_num).tolist()[0])
    return average_residuals

# old gold model
gold_X = np.array(df_train_gold["month_delta"].to_list()).reshape(-1, 1)
gold_y = np.array(df_train_gold["gold interest (adjusted)"].to_list()).reshape(-1, 1)
gold_group = ksplit(gold_X, gold_y)
gold_residual = validation(gold_group[0], gold_group[1])

# new gold model
new_gold_X = np.array(new_gold_train["month_delta"].to_list()).reshape(-1, 1)
new_gold_y = np.array(new_gold_train["gold interest (adjusted)"].to_list()).reshape(-1, 1)
new_gold_group = ksplit(new_gold_X, new_gold_y)
new_gold_residual = validation(new_gold_group[0], new_gold_group[1])

# normalize by variance
gold_residual_variance = df_train_gold["gold interest (adjusted)"].var()
new_gold_residual_variance = new_gold_train["gold interest (adjusted)"].var()
gold_residual = [x/gold_residual_variance for x in gold_residual]
new_gold_residual = [x/new_gold_residual_variance for x in new_gold_residual]


import scipy

print("T-test for \"Is the new model better fit than the old one?\" - \n" + str(scipy.stats.ttest_ind(gold_residual, new_gold_residual)) + "\n")

T-test for "Is the new model better fit than the old one?" - 
Ttest_indResult(statistic=8.236903784486579, pvalue=9.734288291659962e-07)


for key in results.keys():
    if (key != "gold"):
        rounded = round(results[key][0]* 100, 4)
        print(key + ": " + str(rounded)+ "% per month   " + str(round(rounded*12, 6)) + "% per year")

rounded = round(new_result[0]* 100, 4)        
print("gold (new): " + str(rounded) + "% per month   " + str(round(rounded * 12, 6)) + "% per year")

savings: -0.0662% per month   -0.7944% per year
checking: -0.0865% per month   -1.038% per year
money market: -0.0375% per month   -0.45% per year
1 month CD: -0.0668% per month   -0.8016% per year
3 month CD: -0.0353% per month   -0.4236% per year
6 month CD: 0.0254% per month   0.3048% per year
12 month CD: 0.14% per month   1.68% per year
24 month CD: 0.343% per month   4.116% per year
36 month CD: 0.5687% per month   6.8244% per year
48 month CD: 0.8037% per month   9.6444% per year
60 month CD: 1.2254% per month   14.7048% per year
gold (new): 0.1011% per month   1.2132% per year

	date	CPI
0	2021-03-01	264.877
1	2021-02-01	263.014
2	2021-01-01	261.582
3	2020-12-01	260.474
4	2020-11-01	260.229
...	...	...
1294	1913-05-01	9.7
1295	1913-04-01	9.8
1296	1913-03-01	9.8
1297	1913-02-01	9.8
1298	1913-01-01	9.8

	date range	type	savings	checking	money market	1 month CD	3 month CD	6 month CD	12 month CD	24 month CD	36 month CD	48 month CD	60 month CD	simple_date
81	3/29/21-3/31/21	National Rate	0.04	0.03	0.06	0.04	0.06	0.09	0.14	0.18	0.22	0.25	0.3	2021-03-01
82	3/29/21-3/31/21	National Rate Cap	0.79	0.78	0.81	0.79	0.81	0.84	0.89	0.93	0.97	1	1.05	2021-03-01
83	3/22/21-3/28/21	National Rate	0.04	0.03	0.06	0.04	0.06	0.09	0.14	0.19	0.22	0.25	0.3	2021-03-01
84	3/22/21-3/28/21	National Rate Cap	0.79	0.78	0.81	0.79	0.81	0.84	0.89	0.94	0.97	1	1.05	2021-03-01
85	3/15/21-3/21/21	National Rate	0.04	0.03	0.06	0.04	0.06	0.09	0.14	0.19	0.23	0.25	0.3	2021-03-01
...	...	...	...	...	...	...	...	...	...	...	...	...	...	...
62	6/01/09 - 6/07/09	National Rate Cap	0.97	0.89	1.21	NaN	1.42	1.7	2	2.29	2.53	2.78	2.94	2009-06-01
63	5/25/09 - 5/31/09	National Rate	0.22	0.14	0.46	NaN	0.71	0.98	1.27	1.56	1.8	2.04	2.21	2009-05-01
64	5/25/09 - 5/31/09	National Rate Cap	0.97	0.89	1.21	NaN	1.46	1.73	2.02	2.31	2.55	2.79	2.96	2009-05-01
65	5/18/09 - 5/24/09	National Rate	0.22	0.14	0.48	NaN	0.71	0.99	1.29	1.58	1.82	2.07	2.23	2009-05-01
66	5/18/09 - 5/24/09	National Rate Cap	0.97	0.89	1.23	NaN	1.46	1.74	2.04	2.33	2.57	2.82	2.98	2009-05-01

	date	savings	savings cap	checking	checking cap	money market	money market cap	1 month CD	1 month CD cap	3 month CD	...	12 month CD	12 month CD cap	24 month CD	24 month CD cap	36 month CD	36 month CD cap	48 month CD	48 month CD cap	60 month CD	60 month CD cap
0	2021-03-01	0.0400	0.7900	0.0320	0.7820	0.0600	0.8100	0.040000	0.790000	0.0600	...	0.1400	0.8900	0.1880	0.9380	0.2260	0.9760	0.2520	1.0020	0.3040	1.0540
1	2021-02-01	0.0475	0.7975	0.0375	0.7875	0.0600	0.8100	0.040000	0.790000	0.0700	...	0.1500	0.9000	0.1975	0.9475	0.2350	0.9850	0.2600	1.0100	0.3150	1.0650
2	2021-01-01	0.0500	0.8000	0.0400	0.7900	0.0700	0.8200	0.040000	0.790000	0.0700	...	0.1550	0.9050	0.2050	0.9550	0.2450	0.9950	0.2700	1.0200	0.3250	1.0750
3	2020-12-01	0.0500	0.8000	0.0400	0.7900	0.0700	0.8200	0.047500	0.797500	0.0700	...	0.1600	0.9100	0.2100	0.9600	0.2550	1.0050	0.2775	1.0275	0.3350	1.0850
4	2020-11-01	0.0500	0.8000	0.0400	0.7900	0.0700	0.8200	0.050000	0.800000	0.0720	...	0.1720	0.9220	0.2220	0.9720	0.2700	1.0200	0.2940	1.0440	0.3500	1.1000
...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...
138	2009-09-01	0.2125	0.9625	0.1300	0.8800	0.4075	1.1575	0.235000	0.985000	0.5000	...	1.0475	1.7975	1.5150	2.2650	1.7700	2.5200	1.9950	2.7450	2.2525	3.0025
139	2009-08-01	0.2100	0.9600	0.1320	0.8820	0.4060	1.1560	0.220000	0.970000	0.5340	...	1.1040	1.8540	1.5340	2.2840	1.7660	2.5160	2.0100	2.7600	2.2060	2.9560
140	2009-07-01	0.2200	0.9700	0.1425	0.8925	0.4200	1.1700	0.225000	0.975000	0.5775	...	1.1550	1.9050	1.5550	2.3050	1.7675	2.5175	2.0225	2.7725	2.1825	2.9325
141	2009-06-01	0.2120	0.9620	0.1400	0.8900	0.4400	1.1900	0.236667	0.986667	0.6340	...	1.2180	1.9680	1.5380	2.2880	1.7680	2.5180	2.0240	2.7740	2.1940	2.9440
142	2009-05-01	0.2200	0.9700	0.1400	0.8900	0.4700	1.2200	0.236667	0.986667	0.7100	...	1.2800	2.0300	1.5700	2.3200	1.8100	2.5600	2.0550	2.8050	2.2200	2.9700

	date	gold_price
0	1950-01-01	34.730
0	1950-02-01	34.730
0	1950-03-01	34.730
0	1950-04-01	34.730
0	1950-05-01	34.730
...	...	...
0	2020-03-01	1593.764
0	2020-04-01	1680.030
0	2020-05-01	1715.697
0	2020-06-01	1734.032
0	2020-07-01	1840.807

	date	CPI	savings	savings cap	checking	checking cap	money market	money market cap	1 month CD	1 month CD cap	...	12 month CD cap	24 month CD	24 month CD cap	36 month CD	36 month CD cap	48 month CD	48 month CD cap	60 month CD	60 month CD cap	gold_price
0	2021-03-01	264.877	0.0400	0.7900	0.0320	0.7820	0.06	0.81	0.0400	0.7900	...	0.890	0.1880	0.9380	0.226	0.976	0.2520	1.0020	0.304	1.054	NaN
1	2021-02-01	263.014	0.0475	0.7975	0.0375	0.7875	0.06	0.81	0.0400	0.7900	...	0.900	0.1975	0.9475	0.235	0.985	0.2600	1.0100	0.315	1.065	NaN
2	2021-01-01	261.582	0.0500	0.8000	0.0400	0.7900	0.07	0.82	0.0400	0.7900	...	0.905	0.2050	0.9550	0.245	0.995	0.2700	1.0200	0.325	1.075	NaN
3	2020-12-01	260.474	0.0500	0.8000	0.0400	0.7900	0.07	0.82	0.0475	0.7975	...	0.910	0.2100	0.9600	0.255	1.005	0.2775	1.0275	0.335	1.085	NaN
4	2020-11-01	260.229	0.0500	0.8000	0.0400	0.7900	0.07	0.82	0.0500	0.8000	...	0.922	0.2220	0.9720	0.270	1.020	0.2940	1.0440	0.350	1.100	NaN
...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...
1294	1913-05-01	9.7	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	...	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN
1295	1913-04-01	9.8	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	...	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN
1296	1913-03-01	9.8	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	...	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN
1297	1913-02-01	9.8	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	...	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN
1298	1913-01-01	9.8	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	...	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN

Using the Data Science Pipeline to Save Money Smarter - Jack Wilcom

Introduction

Preface/Disclaimer

Data Collection I

Data Cleaning/Management

Data Collection II

Data Cleaning/Management II

Data Collection/Cleaning III

Data Exploration

Data Analysis/Transformation (ML Preprocessing)

Machine Learning and Hypothesis Testing

Conclusion - Insight

	year	month	CPI
0	2021	March	264.877
1	2021	February	263.014
2	2021	January	261.582
3	2020	December	260.474
4	2020	November	260.229
...	...	...	...
1294	1913	May	9.7
1295	1913	April	9.8
1296	1913	March	9.8
1297	1913	February	9.8
1298	1913	January	9.8