This notebook gathers stock ticker data, and merges data from older and newer sources. Note - the old yahoo and google api's that used to be reliable for getting finance data have become deprecated and/or non-usable. This dataset utilizes the pandas_datareader and the Quanld data source and IEX to get more recent data.
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import pandas_datareader
## Quandl - a financial data provider.
start_date = '1990-01-01'
end_date = '2018-11-12'
## Need a Quandl Key below
key="YOURKEYHERE"
##
q1 = pandas_datareader.quandl.QuandlReader("AMZN", start_date, end_date, api_key=key)
q2 = pandas_datareader.quandl.QuandlReader("AAPL", start_date, end_date, api_key=key)
#q3 = pandas_datareader.quandl.QuandlReader("AMZN", start_date, end_date, api_key=key)
#q4 = pandas_datareader.quandl.QuandlReader("FB", start_date, end_date, api_key=key)
amzn1=q1.read()
aapl1=q2.read()
# todo - figure out how to merge this in a meaningful way..
# df=pd.merge(df1,df2,how="outer")
print(amzn1.shape)
print(aapl1.shape)
# Actually fetch the data into a dataframe
amzn1.columns
# Note - the latest data is still quite old.. fwiw
amzn1.head()
aapl1.head()
# Fairly full, daily ticker data
amzn1.describe().transpose()
amzn1[['AdjClose','Close']].plot()
aapl1[['AdjClose','Close']].plot()
## IEX provides current data, up to 5 years
# We would like all available data from 01/01/2000 until 12/31/2016.
start_date = '2018-01-01'
end_date = '2018-11-12'
##
aapl2 = pandas_datareader.DataReader("AAPL", 'iex', start_date, end_date)
amzn2 = pandas_datareader.DataReader("AMZN", 'iex', start_date, end_date)
# Note - the index is a String, convert it to datetimeindex
print(amzn2.index)
print(amzn1.index)
aapl2.index=pd.to_datetime(aapl2.index)
amzn2.index=pd.to_datetime(amzn2.index)
print(amzn2.index)
# OP - first data is as expected, 2018-01-01
print(amzn2.index.min(),amzn2.index.max())
amzn2.head()
amzn2.describe().transpose()
fig, axes = plt.subplots(1, 2, figsize=(16, 4))
axes[0].set_title('AAPL Close')
axes[0].set_ylabel('Price ($)')
axes[1].set_title('AMZN Close')
axes[1].set_ylabel('Price ($)')
amzn2['close'].plot(subplots=True, grid=True, ax=axes[1])
aapl2['close'].plot(subplots=True, grid=True, ax=axes[0])
# I had trouble figuring out how best to rotate axes
# The followinig two things work, fig.autofmt_xdate() is good
#x = plt.setp( axes[1].xaxis.get_majorticklabels(), rotation=45 )
#x = plt.setp( axes[0].xaxis.get_majorticklabels(), rotation=45 )
fig.autofmt_xdate()
# Another way of doing the same type of plot with out relying on the DataFrame plotting
plt.figure(figsize=(16,4))
plt.subplot(1, 2, 1)
plt.plot(aapl2.index, aapl2.close,label='AAPL')
plt.xlabel('Date')
plt.ylabel('Price ($)')
plt.xticks(rotation=45)
plt.title('APPL Close')
plt.grid(True)
plt.subplot(1, 2, 2)
plt.plot(amzn2.index, amzn2.close,label='AAPL')
plt.xlabel('Date')
plt.ylabel('Price ($)')
plt.xticks(rotation=45)
plt.title('AMZN Close')
plt.grid(True)
## Now, combine the historic and current data.. (let's just keep track of the close and adjclose for the moment)
# First, let's make the columns names similar:
print(amzn1.columns)
print(amzn2.columns)
# First I'll rename
amzn1=amzn1.rename(str.lower, axis='columns')
aapl1=aapl1.rename(str.lower, axis='columns')
print(amzn1.columns)
# OK - can plot from the DF, while selecting from from the index
# aapl2['close'][aapl2.index>'2018-06-01'].plot()
# add adjclose to the current data.. simply by copying close
# This only works if there hasn't been a split in the IEX data time scale
amzn2['adjclose']=amzn2['close']
aapl2['adjclose']=aapl2['close']
# Rename the first dataframe index to be consistent with the later data
aapl1.index.name='date'
amzn1.index.name='date'
print(aapl1.index)
# Ok - slice off the data, but where? on 2018-01-01, should be fine
print(aapl1.index.max(), aapl1.index.min())
print(aapl2.index.max(), aapl2.index.min())
# Now merge the data, we'll only keep the close and adjclose columns
# concat should work fine, simply glue the newer data dataframe to the older one
aapl=pd.concat([aapl1[['close','adjclose']][aapl1.index<'2018-01-01'], aapl2[['close','adjclose']]])
# Note, the data isn't ordered, but that's not really important for our purposes
print(aapl.head())
print(aapl.tail())
# Cool - that worked ok.
aapl.plot(grid=True)
# To the same with Amazon
amzn=pd.concat([amzn1[['close','adjclose']][amzn1.index<'2018-01-01'], amzn2[['close','adjclose']]])
amzn.plot(grid=True)
# Plotting these two again, now you can see the ordering issue in the data..
plt.figure(figsize=(16,4))
plt.subplot(1, 2, 1)
plt.plot(aapl.index, aapl.close,label='AAPL')
plt.xlabel('Date')
plt.ylabel('Price ($)')
plt.xticks(rotation=45)
plt.title('APPL Close')
plt.grid(True)
plt.subplot(1, 2, 2)
plt.plot(amzn.index, amzn.close,label='AAPL')
plt.xlabel('Date')
plt.ylabel('Price ($)')
plt.xticks(rotation=45)
plt.title('AMZN Close')
plt.grid(True)
# Using the DataFrame built in plotting seems to correctly deal with the ordering
fig, axes = plt.subplots(1, 2, figsize=(16, 6))
axes[0].set_title('AMZN Closing Price')
axes[0].set_ylabel('Price ($)')
axes[1].set_title('APPL Adj Closing Price')
axes[1].set_ylabel('Price ($)')
amzn.plot(grid=True, ax=axes[0]) # Plot both adj and close, since it won't effect the scale
aapl['adjclose'].plot( grid=True, ax=axes[1])
fig.autofmt_xdate()
# Just because it may be interesting, let's look at these data sets around the Recession (Tech Stock Bubble) of 2000
# Using the DataFrame built in plotting seems to correctly deal with the ordering
fig, axes = plt.subplots(1, 2, figsize=(16, 6))
axes[0].set_title('AMZN Closing Price (Internet Bubble)')
axes[0].set_ylabel('Price ($)')
axes[0].set_xlabel('Date') ## <- for some reason this isn't setting the xlabel as a subplot
axes[1].set_title('APPL Adj Closing Price (Internet Bubble)')
axes[1].set_ylabel('Price ($)')
amzn[(amzn.index<'2002-01-01') & (amzn.index>'1998-01-01')].adjclose.plot(subplots=True, grid=True, ax=axes[0])
aapl[(aapl.index<'2002-01-01') & (aapl.index>'1998-01-01')].adjclose.plot(subplots=True, grid=True, ax=axes[1])
fig.autofmt_xdate()
# Now for one more interesting plot - the money plot - normalize share price over time??
# First, join the data
# Rename columns by appending a suffix to maintain clarity
data = aapl.join(amzn, lsuffix='_aapl', rsuffix='_amzn', how='outer')
# Peak
data.head()
# Are there any missing closes? doesn't appear to be
data.isna().sum()
# Can look at the first data point:
data[data.index == data.index.min()]
# Can also look at different point
data[(data.index.year==2010) & (data.index.month==1) & (data.index.day==4) ]
# To normalizatoin the data on closing price, have to choose a point in time to scale the data
# Let's choose 2001, after the main effects of the internet bubble:
data = data[data.index>'2001']
# At the head, we see AAPL at 0.95 and AMZN at 13.88, this will be our scale point
data.head()
# Scale AAPL
data['adjclose_aapl_scaled']=data['adjclose_aapl']/(data['adjclose_aapl'][data.index == data.index.min()][0])
# Scale AMZN
data['adjclose_amzn_scaled']=data['adjclose_amzn']/(data['adjclose_amzn'][data.index == data.index.min()][0])
data.head()
# Plot these two together on the same plot - Note, AAPL crushes since 2001
ax = data[['adjclose_aapl_scaled','adjclose_amzn_scaled']].plot(
grid=True,title="AMZN and AAPL since 2001", figsize=(12,6))
ax.set_ylabel("Normalized Share Price")
ax.set_xlabel("Year")
# Can also look at hose they've scaled since 2010
# Unfortunately, since the first day of that data is the 4th, a bit yucky to select that value:
data[data.index > '2010'].sort_index().head()
# Adding a second scaled comparison, this from 2010
aapl_scale=data['adjclose_aapl'][data.index == '2010-01-04'][0]
amzn_scale=data['adjclose_amzn'][data.index == '2010-01-04'][0]
data['adjclose_aapl_2010scaled']=data['adjclose_aapl']/(aapl_scale)
data['adjclose_amzn_2010scaled']=data['adjclose_amzn']/(amzn_scale)
data[data.index > '2010'].sort_index().head()
# Plot the 2010 scaled closes together on the same plot
ax = data[['adjclose_aapl_2010scaled','adjclose_amzn_2010scaled']][data.index>'2010'].plot(
grid=True,title="AMZN and AAPL since 2010", figsize=(12,6))
ax.set_ylabel("Normalized Share Price")
ax.set_xlabel("Year")
## Ok - note, with timeseries, can also use resample to adjust and scale data in various ways
## Here, resample by month, groups the data and applies an aggregate funciton
data2 = data[['adjclose_aapl','adjclose_amzn']].resample('BM').mean().rename(
columns={'adjclose_aapl':'adjclose_aapl_smooth','adjclose_amzn':'adjclose_amzn_smooth'})
data2.head()
# When overlaying the smoothed data - I want to FIRST plot the full data, to get the full time series.
# The smoothed data will basically be interpolated on top of the full data.
fig, axes = plt.subplots(1, 2, figsize=(15, 6))
axes[0].set_title('AMZN Adj Close')
axes[0].set_ylabel('Price ($)')
axes[1].set_title('APPL Adj Close')
axes[1].set_ylabel('Price ($)')
data[data.index>'2015'].adjclose_amzn.plot(color='blue',alpha=0.5,grid=True,ax=axes[0],legend=True)
data2[data2.index>'2015'].adjclose_amzn_smooth.plot(color='m',grid=True,legend=True,ax=axes[0])
data[data.index>'2015'].adjclose_aapl.plot(color='blue',alpha=0.5,grid=True,legend=True,ax=axes[1])
data2[data2.index>'2015'].adjclose_aapl_smooth.plot(color='m',grid=True,legend=True,ax=axes[1])
axes[0].set_xlabel('Year')
axes[1].set_xlabel('Year')