Stock Ticker Data¶

This notebook gathers stock ticker data, and merges data from older and newer sources. Note - the old yahoo and google api's that used to be reliable for getting finance data have become deprecated and/or non-usable. This dataset utilizes the pandas_datareader and the Quanld data source and IEX to get more recent data.

Goals¶

Gather and join financial data from different sources
Compare normalized financial data (AMZN and AAPL done here)
Resample data to perform basic smoothing

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

import pandas_datareader

## Quandl - a financial data provider.    

start_date = '1990-01-01'
end_date = '2018-11-12'
## Need a Quandl Key below
key="YOURKEYHERE"
## 
q1 = pandas_datareader.quandl.QuandlReader("AMZN",  start_date, end_date, api_key=key)
q2 = pandas_datareader.quandl.QuandlReader("AAPL",  start_date, end_date, api_key=key)
#q3 = pandas_datareader.quandl.QuandlReader("AMZN",  start_date, end_date, api_key=key)
#q4 = pandas_datareader.quandl.QuandlReader("FB",  start_date, end_date, api_key=key)
amzn1=q1.read()
aapl1=q2.read()

# todo - figure out how to merge this in a meaningful way..
# df=pd.merge(df1,df2,how="outer")

print(amzn1.shape)
print(aapl1.shape)

(5248, 12)
(7113, 12)

# Actually fetch the data into a dataframe
amzn1.columns

Index([u'Open', u'High', u'Low', u'Close', u'Volume', u'ExDividend',
       u'SplitRatio', u'AdjOpen', u'AdjHigh', u'AdjLow', u'AdjClose',
       u'AdjVolume'],
      dtype='object')

# Note - the latest data is still quite old.. fwiw
amzn1.head()

aapl1.head()

# Fairly full, daily ticker data
amzn1.describe().transpose()

amzn1[['AdjClose','Close']].plot()

<matplotlib.axes._subplots.AxesSubplot at 0x108367090>

aapl1[['AdjClose','Close']].plot()

<matplotlib.axes._subplots.AxesSubplot at 0x10837ddd0>

## IEX provides current data, up to 5 years

# We would like all available data from 01/01/2000 until 12/31/2016.
start_date = '2018-01-01'
end_date = '2018-11-12'

## 
aapl2 = pandas_datareader.DataReader("AAPL", 'iex', start_date, end_date)
amzn2 = pandas_datareader.DataReader("AMZN", 'iex', start_date, end_date)

# Note - the index is a String, convert it to datetimeindex
print(amzn2.index)
print(amzn1.index)

Index([u'2018-01-02', u'2018-01-03', u'2018-01-04', u'2018-01-05',
       u'2018-01-08', u'2018-01-09', u'2018-01-10', u'2018-01-11',
       u'2018-01-12', u'2018-01-16',
       ...
       u'2018-10-30', u'2018-10-31', u'2018-11-01', u'2018-11-02',
       u'2018-11-05', u'2018-11-06', u'2018-11-07', u'2018-11-08',
       u'2018-11-09', u'2018-11-12'],
      dtype='object', name=u'date', length=219)
DatetimeIndex(['2018-03-27', '2018-03-26', '2018-03-23', '2018-03-22',
               '2018-03-21', '2018-03-20', '2018-03-19', '2018-03-16',
               '2018-03-15', '2018-03-14',
               ...
               '1997-05-30', '1997-05-29', '1997-05-28', '1997-05-27',
               '1997-05-23', '1997-05-22', '1997-05-21', '1997-05-20',
               '1997-05-19', '1997-05-16'],
              dtype='datetime64[ns]', name=u'Date', length=5248, freq=None)

aapl2.index=pd.to_datetime(aapl2.index)
amzn2.index=pd.to_datetime(amzn2.index)
print(amzn2.index)

DatetimeIndex(['2018-01-02', '2018-01-03', '2018-01-04', '2018-01-05',
               '2018-01-08', '2018-01-09', '2018-01-10', '2018-01-11',
               '2018-01-12', '2018-01-16',
               ...
               '2018-10-30', '2018-10-31', '2018-11-01', '2018-11-02',
               '2018-11-05', '2018-11-06', '2018-11-07', '2018-11-08',
               '2018-11-09', '2018-11-12'],
              dtype='datetime64[ns]', name=u'date', length=219, freq=None)

# OP - first data is as expected, 2018-01-01
print(amzn2.index.min(),amzn2.index.max())
amzn2.head()

(Timestamp('2018-01-02 00:00:00'), Timestamp('2018-11-12 00:00:00'))

amzn2.describe().transpose()

fig, axes = plt.subplots(1, 2, figsize=(16, 4))
axes[0].set_title('AAPL Close')
axes[0].set_ylabel('Price ($)')

axes[1].set_title('AMZN Close')
axes[1].set_ylabel('Price ($)')

amzn2['close'].plot(subplots=True, grid=True, ax=axes[1])
aapl2['close'].plot(subplots=True, grid=True, ax=axes[0])


# I had trouble figuring out how best to rotate axes
# The followinig two things work, fig.autofmt_xdate() is good
#x = plt.setp( axes[1].xaxis.get_majorticklabels(), rotation=45 )
#x = plt.setp( axes[0].xaxis.get_majorticklabels(), rotation=45 )

fig.autofmt_xdate()

# Another way of doing the same type of plot with out relying on the DataFrame plotting 
plt.figure(figsize=(16,4))
plt.subplot(1, 2, 1)
plt.plot(aapl2.index, aapl2.close,label='AAPL')
plt.xlabel('Date')
plt.ylabel('Price ($)')
plt.xticks(rotation=45)
plt.title('APPL Close')
plt.grid(True)

plt.subplot(1, 2, 2)
plt.plot(amzn2.index, amzn2.close,label='AAPL')
plt.xlabel('Date')
plt.ylabel('Price ($)')
plt.xticks(rotation=45)
plt.title('AMZN Close')
plt.grid(True)

## Now, combine the historic and current data.. (let's just keep track of the close and adjclose for the moment)
# First, let's make the columns names similar:
print(amzn1.columns)
print(amzn2.columns)

Index([u'Open', u'High', u'Low', u'Close', u'Volume', u'ExDividend',
       u'SplitRatio', u'AdjOpen', u'AdjHigh', u'AdjLow', u'AdjClose',
       u'AdjVolume'],
      dtype='object')
Index([u'open', u'high', u'low', u'close', u'volume'], dtype='object')

# First I'll rename
amzn1=amzn1.rename(str.lower, axis='columns')
aapl1=aapl1.rename(str.lower, axis='columns')
print(amzn1.columns)

# OK - can plot from the DF, while selecting from from the index
# aapl2['close'][aapl2.index>'2018-06-01'].plot()

Index([u'open', u'high', u'low', u'close', u'volume', u'exdividend',
       u'splitratio', u'adjopen', u'adjhigh', u'adjlow', u'adjclose',
       u'adjvolume'],
      dtype='object')

# add adjclose to the current data..  simply by copying close  
# This only works if there hasn't been a split in the IEX data time scale
amzn2['adjclose']=amzn2['close']
aapl2['adjclose']=aapl2['close']

# Rename the first dataframe index to be consistent with the later data
aapl1.index.name='date'
amzn1.index.name='date'
print(aapl1.index)

DatetimeIndex(['2018-03-27', '2018-03-26', '2018-03-23', '2018-03-22',
               '2018-03-21', '2018-03-20', '2018-03-19', '2018-03-16',
               '2018-03-15', '2018-03-14',
               ...
               '1990-01-15', '1990-01-12', '1990-01-11', '1990-01-10',
               '1990-01-09', '1990-01-08', '1990-01-05', '1990-01-04',
               '1990-01-03', '1990-01-02'],
              dtype='datetime64[ns]', name=u'date', length=7113, freq=None)

# Ok - slice off the data, but where?  on 2018-01-01, should be fine
print(aapl1.index.max(), aapl1.index.min())
print(aapl2.index.max(), aapl2.index.min())

(Timestamp('2018-03-27 00:00:00'), Timestamp('1990-01-02 00:00:00'))
(Timestamp('2018-11-12 00:00:00'), Timestamp('2018-01-02 00:00:00'))

# Now merge the data, we'll only keep the close and adjclose columns
# concat should work fine, simply glue the newer data dataframe to the older one
aapl=pd.concat([aapl1[['close','adjclose']][aapl1.index<'2018-01-01'], aapl2[['close','adjclose']]])

# Note, the data isn't ordered, but that's not really important for our purposes
print(aapl.head())
print(aapl.tail())

             close  adjclose
date                        
2017-12-29  169.23    169.23
2017-12-28  171.08    171.08
2017-12-27  170.60    170.60
2017-12-26  170.57    170.57
2017-12-22  175.01    175.01
               close  adjclose
date                          
2018-11-06  203.0615  203.0615
2018-11-07  209.2200  209.2200
2018-11-08  208.4900  208.4900
2018-11-09  204.4700  204.4700
2018-11-12  194.1700  194.1700

# Cool - that worked ok.
aapl.plot(grid=True)

<matplotlib.axes._subplots.AxesSubplot at 0x109183e10>

# To the same with Amazon
amzn=pd.concat([amzn1[['close','adjclose']][amzn1.index<'2018-01-01'], amzn2[['close','adjclose']]])
amzn.plot(grid=True)

<matplotlib.axes._subplots.AxesSubplot at 0x109ae1e50>

# Plotting these two again, now you can see the ordering issue in the data..
plt.figure(figsize=(16,4))
plt.subplot(1, 2, 1)
plt.plot(aapl.index, aapl.close,label='AAPL')
plt.xlabel('Date')
plt.ylabel('Price ($)')
plt.xticks(rotation=45)
plt.title('APPL Close')
plt.grid(True)

plt.subplot(1, 2, 2)
plt.plot(amzn.index, amzn.close,label='AAPL')
plt.xlabel('Date')
plt.ylabel('Price ($)')
plt.xticks(rotation=45)
plt.title('AMZN Close')
plt.grid(True)

# Using the DataFrame built in plotting seems to correctly deal with the ordering
fig, axes = plt.subplots(1, 2, figsize=(16, 6))
axes[0].set_title('AMZN Closing Price')
axes[0].set_ylabel('Price ($)')
axes[1].set_title('APPL Adj Closing Price')
axes[1].set_ylabel('Price ($)')
amzn.plot(grid=True, ax=axes[0])  # Plot both adj and close, since it won't effect the scale
aapl['adjclose'].plot( grid=True, ax=axes[1])
fig.autofmt_xdate()

# Just because it may be interesting, let's look at these data sets around the Recession (Tech Stock Bubble) of 2000
# Using the DataFrame built in plotting seems to correctly deal with the ordering
fig, axes = plt.subplots(1, 2, figsize=(16, 6))
axes[0].set_title('AMZN Closing Price (Internet Bubble)')
axes[0].set_ylabel('Price ($)')
axes[0].set_xlabel('Date')  ##  <- for some reason this isn't setting the xlabel as a subplot
axes[1].set_title('APPL Adj Closing Price (Internet Bubble)')
axes[1].set_ylabel('Price ($)')
amzn[(amzn.index<'2002-01-01') & (amzn.index>'1998-01-01')].adjclose.plot(subplots=True, grid=True, ax=axes[0])
aapl[(aapl.index<'2002-01-01') & (aapl.index>'1998-01-01')].adjclose.plot(subplots=True, grid=True, ax=axes[1])
fig.autofmt_xdate()

# Now for one more interesting plot - the money plot - normalize share price over time??  
# First, join the data 
# Rename columns by appending a suffix to maintain clarity
data = aapl.join(amzn, lsuffix='_aapl', rsuffix='_amzn', how='outer')

# Peak
data.head()

# Are there any missing closes?  doesn't appear to be
data.isna().sum()

close_aapl          0
adjclose_aapl       0
close_amzn       1865
adjclose_amzn    1865
dtype: int64

# Can look at the first data point:
data[data.index == data.index.min()]

# Can also look at different point
data[(data.index.year==2010) & (data.index.month==1) & (data.index.day==4) ]

# To normalizatoin the data on closing price, have to choose a point in time to scale the data 
# Let's choose 2001, after the main effects of the internet bubble:
data = data[data.index>'2001']

# At the head, we see AAPL at 0.95 and AMZN at 13.88, this will be our scale point
data.head()

# Scale AAPL
data['adjclose_aapl_scaled']=data['adjclose_aapl']/(data['adjclose_aapl'][data.index == data.index.min()][0])

# Scale AMZN
data['adjclose_amzn_scaled']=data['adjclose_amzn']/(data['adjclose_amzn'][data.index == data.index.min()][0])

data.head()

# Plot these two together on the same plot - Note, AAPL crushes since 2001
ax = data[['adjclose_aapl_scaled','adjclose_amzn_scaled']].plot(
    grid=True,title="AMZN and AAPL since 2001", figsize=(12,6))
ax.set_ylabel("Normalized Share Price")
ax.set_xlabel("Year")

Text(0.5,0,u'Year')

# Can also look at hose they've scaled since 2010
# Unfortunately, since the first day of that data is the 4th, a bit yucky to select that value:
data[data.index > '2010'].sort_index().head()

# Adding a second scaled comparison, this from 2010
aapl_scale=data['adjclose_aapl'][data.index == '2010-01-04'][0]
amzn_scale=data['adjclose_amzn'][data.index == '2010-01-04'][0]
data['adjclose_aapl_2010scaled']=data['adjclose_aapl']/(aapl_scale)
data['adjclose_amzn_2010scaled']=data['adjclose_amzn']/(amzn_scale)
data[data.index > '2010'].sort_index().head()

# Plot the 2010 scaled closes together on the same plot 
ax = data[['adjclose_aapl_2010scaled','adjclose_amzn_2010scaled']][data.index>'2010'].plot(
    grid=True,title="AMZN and AAPL since 2010", figsize=(12,6))
ax.set_ylabel("Normalized Share Price")
ax.set_xlabel("Year")

Text(0.5,0,u'Year')

## Ok - note, with timeseries, can also use resample to adjust and scale data in various ways
## Here, resample by month, groups the data and applies an aggregate funciton
data2 = data[['adjclose_aapl','adjclose_amzn']].resample('BM').mean().rename(
    columns={'adjclose_aapl':'adjclose_aapl_smooth','adjclose_amzn':'adjclose_amzn_smooth'})

data2.head()

# When overlaying the smoothed data - I want to FIRST plot the full data, to get the full time series.
# The smoothed data will basically be interpolated on top of the full data.
fig, axes = plt.subplots(1, 2, figsize=(15, 6))
axes[0].set_title('AMZN Adj Close')
axes[0].set_ylabel('Price ($)')

axes[1].set_title('APPL Adj Close')
axes[1].set_ylabel('Price ($)')

data[data.index>'2015'].adjclose_amzn.plot(color='blue',alpha=0.5,grid=True,ax=axes[0],legend=True)
data2[data2.index>'2015'].adjclose_amzn_smooth.plot(color='m',grid=True,legend=True,ax=axes[0])
data[data.index>'2015'].adjclose_aapl.plot(color='blue',alpha=0.5,grid=True,legend=True,ax=axes[1])
data2[data2.index>'2015'].adjclose_aapl_smooth.plot(color='m',grid=True,legend=True,ax=axes[1])
axes[0].set_xlabel('Year')
axes[1].set_xlabel('Year')

Text(0.5,0,u'Year')

	Open	High	Low	Close	Volume	ExDividend	SplitRatio	AdjOpen	AdjHigh	AdjLow	AdjClose	AdjVolume
Date
2018-03-27	1572.40	1575.96	1482.32	1497.05	6793279.0	0.0	1.0	1572.40	1575.96	1482.32	1497.05	6793279.0
2018-03-26	1530.00	1556.99	1499.25	1555.86	5547618.0	0.0	1.0	1530.00	1556.99	1499.25	1555.86	5547618.0
2018-03-23	1539.01	1549.02	1495.36	1495.56	7843966.0	0.0	1.0	1539.01	1549.02	1495.36	1495.56	7843966.0
2018-03-22	1565.47	1573.85	1542.40	1544.10	6177737.0	0.0	1.0	1565.47	1573.85	1542.40	1544.10	6177737.0
2018-03-21	1586.45	1590.00	1563.17	1581.86	4667291.0	0.0	1.0	1586.45	1590.00	1563.17	1581.86	4667291.0

	Open	High	Low	Close	Volume	ExDividend	SplitRatio	AdjOpen	AdjHigh	AdjLow	AdjClose	AdjVolume
Date
2018-03-27	173.68	175.15	166.92	168.340	38962839.0	0.0	1.0	173.68	175.15	166.92	168.340	38962839.0
2018-03-26	168.07	173.10	166.44	172.770	36272617.0	0.0	1.0	168.07	173.10	166.44	172.770	36272617.0
2018-03-23	168.39	169.92	164.94	164.940	40248954.0	0.0	1.0	168.39	169.92	164.94	164.940	40248954.0
2018-03-22	170.00	172.68	168.60	168.845	41051076.0	0.0	1.0	170.00	172.68	168.60	168.845	41051076.0
2018-03-21	175.04	175.09	171.26	171.270	35247358.0	0.0	1.0	175.04	175.09	171.26	171.270	35247358.0

	count	mean	std	min	25%	50%	75%	max
Open	5248.0	2.091237e+02	2.783994e+02	5.910000	3.997750e+01	8.410500e+01	2.590250e+02	1.615960e+03
High	5248.0	2.117708e+02	2.804962e+02	6.100000	4.063000e+01	8.584500e+01	2.619075e+02	1.617540e+03
Low	5248.0	2.063162e+02	2.757918e+02	5.510000	3.927250e+01	8.247500e+01	2.567600e+02	1.590890e+03
Close	5248.0	2.091731e+02	2.783123e+02	5.970000	4.001000e+01	8.426000e+01	2.591425e+02	1.598390e+03
Volume	5248.0	6.545339e+06	5.522924e+06	40600.000000	3.499848e+06	5.472500e+06	8.002825e+06	1.043292e+08
ExDividend	5248.0	0.000000e+00	0.000000e+00	0.000000	0.000000e+00	0.000000e+00	0.000000e+00	0.000000e+00
SplitRatio	5248.0	1.000762e+00	3.380724e-02	1.000000	1.000000e+00	1.000000e+00	1.000000e+00	3.000000e+00
AdjOpen	5248.0	2.014841e+02	2.818628e+02	1.406667	3.573750e+01	7.291000e+01	2.578775e+02	1.615960e+03
AdjHigh	5248.0	2.038178e+02	2.840473e+02	1.448333	3.650000e+01	7.451000e+01	2.600400e+02	1.617540e+03
AdjLow	5248.0	1.989602e+02	2.791735e+02	1.312500	3.516750e+01	7.079915e+01	2.555775e+02	1.590890e+03
AdjClose	5248.0	2.015044e+02	2.817811e+02	1.395833	3.575000e+01	7.266000e+01	2.580125e+02	1.598390e+03
AdjVolume	5248.0	7.803634e+06	7.494447e+06	487200.000000	3.820240e+06	5.882000e+06	8.851525e+06	1.043292e+08

	open	high	low	close	volume
date
2018-01-02	1172.00	1190.00	1170.51	1189.01	2694494
2018-01-03	1188.30	1205.49	1188.30	1204.20	3108793
2018-01-04	1205.00	1215.87	1204.66	1209.59	3022089
2018-01-05	1217.51	1229.14	1210.00	1229.14	3544743
2018-01-08	1236.00	1253.08	1232.03	1246.87	4279475

	count	mean	std	min	25%	50%	75%	max
open	219.0	1.653629e+03	2.081205e+02	1172.00	1495.350	1648.90	1826.570	2038.11
high	219.0	1.670756e+03	2.074435e+02	1190.00	1524.110	1665.68	1841.000	2050.50
low	219.0	1.631394e+03	2.071662e+02	1170.51	1478.710	1614.55	1805.515	2013.00
close	219.0	1.651918e+03	2.059742e+02	1189.01	1500.125	1641.54	1818.615	2039.51
volume	219.0	5.371445e+06	2.503063e+06	2115639.00	3651673.000	4581568.00	6511478.000	14963783.00

	close_aapl	adjclose_aapl	close_amzn	adjclose_amzn
date
1990-01-02	37.25	1.118093	NaN	NaN
1990-01-03	37.50	1.125597	NaN	NaN
1990-01-04	37.63	1.129499	NaN	NaN
1990-01-05	37.75	1.133101	NaN	NaN
1990-01-08	38.00	1.140605	NaN	NaN

	close_aapl	adjclose_aapl	close_amzn	adjclose_amzn
date
2001-01-02	14.88	0.956144	13.88	13.88
2001-01-03	16.37	1.051887	17.56	17.56
2001-01-04	17.06	1.096224	15.50	15.50
2001-01-05	16.37	1.051887	14.56	14.56
2001-01-08	16.56	1.064095	14.94	14.94

	close_aapl	adjclose_aapl	close_amzn	adjclose_amzn	adjclose_aapl_scaled	adjclose_amzn_scaled
date
2010-01-04	214.01	27.503268	133.90	133.90	28.764785	9.646974
2010-01-05	214.38	27.550818	134.69	134.69	28.814516	9.703890
2010-01-06	210.97	27.112585	132.25	132.25	28.356183	9.528098
2010-01-07	210.58	27.062465	130.00	130.00	28.303763	9.365994
2010-01-08	211.98	27.242385	133.52	133.52	28.491935	9.619597

	adjclose_aapl_smooth	adjclose_amzn_smooth
date
2001-01-31	1.184501	17.814286
2001-02-28	1.261330	13.504737
2001-03-30	1.333391	10.910000
2001-04-30	1.459790	13.316500
2001-05-31	1.512054	15.811818