'''TAQ data analysis module.
The functions in the module analyze the data from the NASDAQ stock market,
computing the self- and cross-response functions.
This script requires the following modules:
* itertools
* multiprocessing
* numpy
* os
* pandas
* pickle
* taq_data_tools_responses_trade
The module contains the following functions:
* taq_midpoint_trade_data - obtains the midpoint price in trade time scale.
* taq_trade_signs_trade_data - computes the trade signs of every trade.
* taq_self_response_day_responses_trade_data - computes the self response
of a day.
* taq_self_response_year_responses_trade_data - computes the self response
of a year.
* taq_cross_response_day_responses_trade_data - computes the cross
response of a day.
* taq_cross_response_year_responses_trade_data - computes the cross
response of a year.
* main - the main function of the script.
.. moduleauthor:: Juan Camilo Henao Londono <www.github.com/juanhenao21>
'''
# ----------------------------------------------------------------------------
# Modules
from itertools import product as iprod
import multiprocessing as mp
import numpy as np
import os
import pandas as pd
import pickle
import taq_data_tools_responses_trade
__tau__ = 1000
# ----------------------------------------------------------------------------
[docs]def taq_trade_signs_trade_data(ticker, date):
"""Computes the trade signs of every trade.
Using the dayly TAQ data computes the trade signs of every trade in a day.
The trade signs are computed using Eq. 1 of the
`paper
<https://link.springer.com/content/pdf/10.1140/epjb/e2016-60818-y.pdf>`_.
As the trades signs are not directly given by the TAQ data, they must be
infered by the trades prices.
For further calculations, the function returns the values for the time
range from 9h40 to 15h50.
:param ticker: string of the abbreviation of the stock to be analyzed
(i.e. 'AAPL').
:param date: string with the date of the data to be extracted
(i.e. '2008-01-02').
:return: tuple -- The function returns a tuple with numpy arrays.
"""
date_sep = date.split('-')
year = date_sep[0]
month = date_sep[1]
day = date_sep[2]
function_name = taq_trade_signs_trade_data.__name__
try:
# Load data
# The module is used in other folders, so it is necessary to use
# absolute paths instead of relative paths
# Obtain the absolute path of the current file and split it
abs_path = os.path.abspath(__file__).split('/')
# Take the path from the start to the project folder
root_path = '/'.join(abs_path[:abs_path.index('project') + 1])
data_trades_trade = pd.read_hdf(root_path
+ f'/taq_data/hdf5_daily_data_{year}/'
+ f'taq_{ticker}_trades_{date}.h5',
key='/trades')
time_t = data_trades_trade['Time'].to_numpy()
ask_t = data_trades_trade['Ask'].to_numpy()
# All the trades must have a price different to zero
assert not np.sum(ask_t == 0)
# Trades identified using equation (1)
identified_trades = np.zeros(len(time_t))
identified_trades[-1] = 1
# Implementation of equation (1). Sign of the price change between
# consecutive trades
for t_idx in range(len(time_t)):
diff = ask_t[t_idx] - ask_t[t_idx - 1]
if (diff):
identified_trades[t_idx] = np.sign(diff)
else:
identified_trades[t_idx] = identified_trades[t_idx - 1]
# All the identified trades must be different to zero
assert not np.sum(identified_trades == 0)
# Saving data
taq_data_tools_responses_trade \
.taq_save_data(function_name, (time_t, ask_t, identified_trades),
ticker, ticker, year, month, day)
return (time_t, ask_t, identified_trades)
except FileNotFoundError as e:
print('No data')
print(e)
print()
return None
# ----------------------------------------------------------------------------
[docs]def taq_self_response_day_responses_trade_data(ticker, date):
"""Computes the self-response of a day.
Using the midpoint price and trade signs of a ticker computes the self-
response during different time lags (:math:`\\tau`) for a day.
:param ticker: string of the abbreviation of the stock to be analyzed
(i.e. 'AAPL').
:param date: string with the date of the data to be extracted
(i.e. '2008-01-02').
:return: tuple -- The function returns a tuple with numpy arrays.
"""
date_sep = date.split('-')
year = date_sep[0]
month = date_sep[1]
day = date_sep[2]
try:
# Load data
midpoint = pickle.load(open(
f'../../taq_data/responses_physical_data_{year}/taq'
+ f'_midpoint_physical_data/taq_midpoint_physical_data'
+ f'_midpoint_{year}{month}{day}_{ticker}.pickle', 'rb'))
time_t, _, trade_sign = pickle.load(open(
f'../../taq_data/responses_trade_data_{year}/taq_trade_signs_trade'
+ f'_data/taq_trade_signs_trade_data_{year}{month}{day}_{ticker}'
+ f'.pickle', 'rb'))
# As the midpoint price values are loaded from the responses physical
# module and their time is [34800, 56999] and the trade signs values
# are loaded from the responses trade module and their time is
# [34200, 57599], I set the time with reference to the midpoint price
time_m = np.array(range(34800, 57000))
cond_1 = (time_t >= 34801) * (time_t < 57001)
time_t = time_t[cond_1]
trade_sign = trade_sign[cond_1]
# Array of the average of each tau. 10^3 s is used in the paper
self_response_tau = np.zeros(__tau__)
num = np.zeros(__tau__)
# Calculating the midpoint price return and the self response function
# Depending on the tau value
for tau_idx in range(__tau__):
# midpoint price returns
# Obtain the midpoint price return. Displace the numerator tau
# values to the right and compute the return
log_return_sec = (midpoint[tau_idx + 1:]
- midpoint[:-tau_idx - 1]) \
/ midpoint[:-tau_idx - 1]
# Filter the trade sign values according with the values that can
# be taken by the midpoint price based on the time
trade_sign_tau = trade_sign[time_t < time_m[-tau_idx - 1]]
time_t_tau = time_t[time_t < time_m[-tau_idx - 1]]
trade_sign_no_0_len = len(trade_sign_tau[trade_sign_tau != 0])
num[tau_idx] = trade_sign_no_0_len
# Reduce the time to the corresponding length of returns
time_m_short = time_m[:-tau_idx - 1]
# The return of one second is multiplied with all the trade signs
# of the next second and added to obtain the response
for t_idx, t_val in enumerate(time_m_short):
# Obtain the self response value
# Multiply the return of tau with all the trade signs in one
# second and add for all the seconds
product = log_return_sec[t_idx] \
* trade_sign_tau[time_t_tau == t_val]
self_response_tau[tau_idx] += np.sum(product)
return (self_response_tau, num)
except FileNotFoundError as e:
print('No data')
print(e)
print()
zeros = np.zeros(__tau__)
return (zeros, zeros)
# ----------------------------------------------------------------------------
[docs]def taq_self_response_year_responses_trade_data(ticker, year):
"""Computes the self-response of a year.
Using the taq_self_response_day_responses_trade_data function computes the
self-response function for a year.
:param ticker: string of the abbreviation of stock to be analyzed
(i.e. 'AAPL').
:param year: string of the year to be analyzed (i.e '2016').
:return: tuple -- The function returns a tuple with numpy arrays.
"""
function_name = taq_self_response_year_responses_trade_data.__name__
taq_data_tools_responses_trade \
.taq_function_header_print_data(function_name, ticker, ticker, year,
'', '')
dates = taq_data_tools_responses_trade.taq_bussiness_days(year)
self_values = []
args_prod = iprod([ticker], dates)
# Parallel computation of the self-responses. Every result is appended to
# a list
with mp.Pool(processes=mp.cpu_count()) as pool:
self_values.append(pool.starmap(
taq_self_response_day_responses_trade_data, args_prod))
# To obtain the total self-response, I sum over all the self-response
# values and all the amount of trades (averaging values)
self_v_final = np.sum(self_values[0], axis=0)
self_response_val = self_v_final[0] / self_v_final[1]
self_response_avg = self_v_final[1]
# Saving data
taq_data_tools_responses_trade \
.taq_save_data(function_name, self_response_val, ticker, ticker, year,
'', '')
return (self_response_val, self_response_avg)
# ----------------------------------------------------------------------------
[docs]def taq_cross_response_day_responses_trade_data(ticker_i, ticker_j, date):
"""Computes the cross-response of a day.
Using the midpoint price of ticker i and trade signs of ticker j computes
the cross-response during different time lags (:math:`\\tau`) for a day.
:param ticker_i: string of the abbreviation of the stock to be analyzed
(i.e. 'AAPL').
:param ticker_j: string of the abbreviation of the stock to be analyzed
(i.e. 'AAPL').
:param date: string with the date of the data to be extracted
(i.e. '2008-01-02').
:return: tuple -- The function returns a tuple with numpy arrays.
"""
date_sep = date.split('-')
year = date_sep[0]
month = date_sep[1]
day = date_sep[2]
if (ticker_i == ticker_j):
# Self-response
return None
else:
try:
# Load data
midpoint_i = pickle.load(open(
f'../../taq_data/responses_physical_data_{year}/taq'
+ f'_midpoint_physical_data/taq_midpoint_physical_data'
+ f'_midpoint_{year}{month}{day}_{ticker_i}.pickle', 'rb'))
time_t, _, trade_sign_j = pickle.load(open(
f'../../taq_data/responses_trade_data_{year}/taq_trade'
+ f'_signs_trade_data/taq_trade_signs_trade_data'
+ f'_{year}{month}{day}_{ticker_j}.pickle', 'rb'))
# As the midpoint price values are loaded from the responses
# physical # module and their time is [34800, 56999] and the trade
# signs values # are loaded from the responses trade module and
# their time is [34200, 57599], I set the time equal to the
# midpoint price
time_m = np.array(range(34800, 57000))
cond_1 = (time_t >= 34801) * (time_t < 57001)
time_t = time_t[cond_1]
trade_sign_j = trade_sign_j[cond_1]
# Array of the average of each tau. 10^3 s is used in the paper
cross_response_tau = np.zeros(__tau__)
num = np.zeros(__tau__)
# Calculating the midpoint return and the cross response function
# Depending on the tau value
for tau_idx in range(__tau__):
# midpoint price returns
# Obtain the midpoint price return. Displace the numerator tau
# values to the right and compute the return
log_return_i_sec = (midpoint_i[tau_idx + 1:]
- midpoint_i[:-tau_idx - 1]) \
/ midpoint_i[:-tau_idx - 1]
# Filter the trade sign values according with the values that
# can be taken by the midpoint price based on the time
trade_sign_tau = trade_sign_j[time_t < time_m[-tau_idx - 1]]
time_t_tau = time_t[time_t < time_m[-tau_idx - 1]]
trade_sign_no_0_len = len(trade_sign_tau[trade_sign_tau != 0])
num[tau_idx] = trade_sign_no_0_len
# Reduce the time to the corresponding length of returns
time_m_short = time_m[:-tau_idx - 1]
# The return of one second is multiplied with all the trade
# signs of the next second and added to obtain the response
for t_idx, t_val in enumerate(time_m_short):
# Obtain the self response value
# Multiply the return of tau with all the trade signs in
# one second and add for all the seconds
product = log_return_i_sec[t_idx] \
* trade_sign_tau[time_t_tau == t_val]
cross_response_tau[tau_idx] += np.sum(product)
return (cross_response_tau, num)
except FileNotFoundError as e:
print('No data')
print(e)
print()
zeros = np.zeros(__tau__)
return (zeros, zeros)
# ----------------------------------------------------------------------------
[docs]def taq_cross_response_year_responses_trade_data(ticker_i, ticker_j, year):
"""Computes the cross-response of a year.
Using the taq_cross_response_day_responses_trade_data function computes the
cross-response function for a year.
:param ticker_i: string of the abbreviation of the stock to be analyzed
(i.e. 'AAPL').
:param ticker_j: string of the abbreviation of the stock to be analyzed
(i.e. 'AAPL').
:param year: string of the year to be analyzed (i.e '2016').
:return: tuple -- The function returns a tuple with numpy arrays.
"""
if (ticker_i == ticker_j):
# Self-response
return None
else:
function_name = taq_cross_response_year_responses_trade_data.__name__
taq_data_tools_responses_trade \
.taq_function_header_print_data(function_name, ticker_i, ticker_j,
year, '', '')
dates = taq_data_tools_responses_trade.taq_bussiness_days(year)
cross_values = []
args_prod = iprod([ticker_i], [ticker_j], dates)
# Parallel computation of the cross-responses. Every result is appended
# to a list
with mp.Pool(processes=mp.cpu_count()) as pool:
cross_values.append(pool.starmap(
taq_cross_response_day_responses_trade_data, args_prod))
# To obtain the total cross-response, I sum over all the cross-response
# values and all the amount of trades (averaging values)
cross_v_final = np.sum(cross_values[0], axis=0)
cross_response_val = cross_v_final[0] / cross_v_final[1]
cross_response_avg = cross_v_final[1]
# Saving data
taq_data_tools_responses_trade \
.taq_save_data(function_name, cross_response_val, ticker_i,
ticker_j, year, '', '')
return (cross_response_val, cross_response_avg)
# ----------------------------------------------------------------------------
[docs]def main():
"""The main function of the script.
The main function is used to test the functions in the script.
:return: None.
"""
pass
return None
# ----------------------------------------------------------------------------
if __name__ == "__main__":
main()