Source code for taq_data_analysis_trade_shift

'''TAQ data analysis module.

The functions in the module analyze the data from the NASDAQ stock market,
computing the self- and cross-response functions.

This script requires the following modules:
    * itertools
    * multiprocessing
    * numpy
    * pandas
    * tickle
    * taq_data_tools_trade_shift

The module contains the following functions:
    * taq_self_response_day_trade_shift_data - computes the self response of a
      day.
    * taq_self_response_year_trade_shift_data - computes the self response of
      a year.
    * taq_cross_response_day_trade_shift_data - computes the cross response of
      a day.
    * taq_cross_response_year_trade_shift_data - computes the cross response
      of a year.

.. moduleauthor:: Juan Camilo Henao Londono <www.github.com/juanhenao21>
'''

# -----------------------------------------------------------------------------
# Modules

from itertools import product as iprod
import multiprocessing as mp
import numpy as np
import pandas as pd
import pickle

import taq_data_tools_trade_shift

# ----------------------------------------------------------------------------


[docs]def taq_self_response_day_trade_shift_data(ticker, date, tau): """Computes the self-response of a day. Using the midpoint price and trade signs of a ticker computes the self- response during different trade shifts for a day. There is a constant :math:`\\tau` that most be set in the parameters. :param ticker: string of the abbreviation of the stock to be analyzed (i.e. 'AAPL'). :param date: string with the date of the data to be extracted (i.e. '2008-01-02'). :param tau: integer great than zero (i.e. 50). :return: tuple -- The function returns a tuple with numpy arrays. """ date_sep = date.split('-') year = date_sep[0] month = date_sep[1] day = date_sep[2] try: # Load data midpoint = pickle.load(open( f'../../taq_data/responses_physical_data_{year}/taq_midpoint' + f'_physical_data/taq_midpoint_physical_data_midpoint_{year}' + f'{month}{day}_{ticker}.pickle', 'rb')) time_t, _, trade_sign = pickle.load(open( f'../../taq_data/responses_trade_data_{year}/taq_trade_signs_trade' + f'_data/taq_trade_signs_trade_data_{year}{month}{day}_{ticker}' + f'.pickle', 'rb')) # As the midpoint price values are loaded from the responses physical # module and their time is [34800, 56999] and the trade signs values # are loaded from the responses trade module and their time is # [34200, 57599], I set the time equal to the midpoint price time_m = np.array(range(34800, 57000)) cond_1 = (time_t >= 34800) * (time_t < 57000) time_t = time_t[cond_1] trade_sign = trade_sign[cond_1] assert not np.sum(trade_sign == 0) assert not np.sum(midpoint == 0) # Array of the average of each tau. 10^3 s is used in the paper shift_val = range(- 10 * tau, 10 * tau, 1) self_response_shift = np.zeros(len(shift_val)) num = np.zeros(len(shift_val)) # Calculating the midpoint price return and the self response function midpoint_t = 0. * trade_sign # It is needed to associate each trade sign with a midpoint price for t_idx, t_val in enumerate(time_m): condition = time_t == t_val len_c = np.sum(condition) midpoint_t[condition] = midpoint[t_idx] * np.ones(len_c) assert not np.sum(midpoint_t == 0) # Depending on the trade shift value for s_idx, s_val in enumerate(shift_val): if (s_val < 0): midpoint_shift = midpoint_t[np.abs(s_val):] trade_sign_shift = trade_sign[:-np.abs(s_val)] elif (s_val > 0): midpoint_shift = midpoint_t[:-s_val] trade_sign_shift = trade_sign[s_val:] else: midpoint_shift = midpoint_t trade_sign_shift = trade_sign trade_sign_tau = trade_sign_shift[:-tau - 1] trade_sign_no_0_len = len(trade_sign_tau[trade_sign_tau != 0]) num[s_idx] = trade_sign_no_0_len # Obtain the midpoint price return. Displace the numerator tau # values to the right and compute the return # Midpoint price returns log_return_sec = (midpoint_shift[tau + 1:] - midpoint_shift[:-tau - 1]) \ / midpoint_shift[:-tau - 1] # Obtain the self response value if (trade_sign_no_0_len != 0): product = log_return_sec * trade_sign_tau self_response_shift[s_idx] = np.sum(product) return (self_response_shift, num) except FileNotFoundError as e: print('No data') print(e) print() shift_val = range(- 10 * tau, 10 * tau, 1) zeros = np.zeros(len(shift_val)) return (zeros, zeros)
# ----------------------------------------------------------------------------
[docs]def taq_self_response_year_trade_shift_data(ticker, year, tau): """Computes the self response of a year. Using the taq_self_response_day_trade_shift_data function computes the self-response function for a year. :param ticker: string of the abbreviation of stock to be analyzed (i.e. 'AAPL'). :param year: string of the year to be analyzed (i.e '2016'). :param tau: integer great than zero (i.e. 50). :return: tuple -- The function returns a tuple with numpy arrays. """ function_name = taq_self_response_year_trade_shift_data.__name__ taq_data_tools_trade_shift \ .taq_function_header_print_data(function_name, ticker, ticker, year, '', '') dates = taq_data_tools_trade_shift.taq_bussiness_days(year) self_values = [] args_prod = iprod([ticker], dates, [tau]) # Parallel computation of the self-responses. Every result is appended to # a list with mp.Pool(processes=mp.cpu_count()) as pool: self_values.append(pool.starmap( taq_self_response_day_trade_shift_data, args_prod)) # To obtain the total self-response, I sum over all the self-response # values and all the amount of trades (averaging values) self_v_final = np.sum(self_values[0], axis=0) self_response_val = self_v_final[0] / self_v_final[1] self_response_avg = self_v_final[1] # Saving data taq_data_tools_trade_shift \ .taq_save_data(f'{function_name}_tau_{tau}', self_response_val, ticker, ticker, year, '', '') return (self_response_val, self_response_avg)
# ----------------------------------------------------------------------------
[docs]def taq_cross_response_day_trade_shift_data(ticker_i, ticker_j, date, tau): """Computes the cross-response of a day. Using the midpoint price of ticker i and trade signs of ticker j computes the cross-response during different trade shifts for a day. There is a constant :math:`\\tau` that most be set in the parameters. :param ticker_i: string of the abbreviation of the stock to be analyzed (i.e. 'AAPL'). :param ticker_j: string of the abbreviation of the stock to be analyzed (i.e. 'AAPL'). :param date: string with the date of the data to be extracted (i.e. '2008-01-02'). :param tau: integer great than zero (i.e. 50). :return: tuple -- The function returns a tuple with numpy arrays. """ date_sep = date.split('-') year = date_sep[0] month = date_sep[1] day = date_sep[2] if (ticker_i == ticker_j): # Self-response return None else: try: # Load data midpoint_i = pickle.load(open( f'../../taq_data/responses_physical_data_{year}/taq' + f'_midpoint_physical_data/taq_midpoint_physical_data' + f'_midpoint_{year}{month}{day}_{ticker_i}.pickle', 'rb')) time_t, _, trade_sign_j = pickle.load(open( f'../../taq_data/responses_trade_data_{year}/taq_trade' + f'_signs_trade_data/taq_trade_signs_trade_data' + f'_{year}{month}{day}_{ticker_j}.pickle', 'rb')) # As the midpoint price values are loaded from the responses # physical # module and their time is [34800, 56999] and the trade # signs values # are loaded from the responses trade module and # their time is [34200, 57599], I set the time equal to the # midpoint price time_m = np.array(range(34800, 57000)) cond_1 = (time_t >= 34800) * (time_t < 57000) time_t = time_t[cond_1] trade_sign_j = trade_sign_j[cond_1] assert not np.sum(trade_sign_j == 0) assert not np.sum(midpoint_i == 0) # Array of the average of each tau. 10^3 s is used in the paper shift_val = range(- 10 * tau, 10 * tau, 1) cross_response_shift = np.zeros(len(shift_val)) num = np.zeros(len(shift_val)) # Calculating the midpoint return and the cross response function midpoint_t = 0. * trade_sign_j # It is needed to associate each trade sign with a midpoint price for t_idx, t_val in enumerate(time_m): condition = time_t == t_val len_c = np.sum(condition) midpoint_t[condition] = midpoint_i[t_idx] * np.ones(len_c) assert not np.sum(midpoint_t == 0) # Depending on the trade shift value for s_idx, s_val in enumerate(shift_val): if (s_val < 0): midpoint_shift = midpoint_t[np.abs(s_val):] trade_sign_shift = trade_sign_j[:-np.abs(s_val)] elif (s_val > 0): midpoint_shift = midpoint_t[:-s_val] trade_sign_shift = trade_sign_j[s_val:] else: midpoint_shift = midpoint_t trade_sign_shift = trade_sign_j trade_sign_tau = 1 * trade_sign_shift[:-tau - 1] trade_sign_no_0_len = len(trade_sign_tau[trade_sign_tau != 0]) num[s_idx] = trade_sign_no_0_len # Obtain the midpoint return. Displace the numerator tau # values to the right and compute the return # Midpoint price returns log_return_i_sec = (midpoint_shift[tau + 1:] - midpoint_shift[:-tau - 1]) \ / midpoint_shift[:-tau - 1] # Obtain the cross response value if (trade_sign_no_0_len != 0): product = log_return_i_sec * trade_sign_tau cross_response_shift[s_idx] = np.sum(product) return (cross_response_shift, num) except FileNotFoundError as e: print('No data') print(e) print() shift_val = range(- 10 * tau, 10 * tau, 1) zeros = np.zeros(len(shift_val)) return (zeros, zeros)
# ----------------------------------------------------------------------------
[docs]def taq_cross_response_year_trade_shift_data(ticker_i, ticker_j, year, tau): """Computes the cross response of a year. Using the taq_cross_response_day_trade_shift_data function computes the cross-response function for a year. :param ticker_i: string of the abbreviation of the stock to be analyzed (i.e. 'AAPL'). :param ticker_j: string of the abbreviation of the stock to be analyzed (i.e. 'AAPL'). :param year: string of the year to be analyzed (i.e '2016'). :param tau: integer great than zero (i.e. 50). :return: tuple -- The function returns a tuple with numpy arrays. """ if (ticker_i == ticker_j): # Cross-response return None else: function_name = taq_cross_response_year_trade_shift_data.__name__ taq_data_tools_trade_shift \ .taq_function_header_print_data(function_name, ticker_i, ticker_j, year, '', '') dates = taq_data_tools_trade_shift.taq_bussiness_days(year) cross_values = [] args_prod = iprod([ticker_i], [ticker_j], dates, [tau]) # Parallel computation of the cross-responses. Every result is appended # to a list with mp.Pool(processes=mp.cpu_count()) as pool: cross_values.append(pool.starmap( taq_cross_response_day_trade_shift_data, args_prod)) # To obtain the total cross-response, I sum over all the cross-response # values and all the amount of trades (averaging values) cross_v_final = np.sum(cross_values[0], axis=0) cross_response_val = cross_v_final[0] / cross_v_final[1] cross_response_avg = cross_v_final[1] # Saving data taq_data_tools_trade_shift \ .taq_save_data(f'{function_name}_tau_{tau}', cross_response_val, ticker_i, ticker_j, year, '', '') return (cross_response_val, cross_response_avg)
# ----------------------------------------------------------------------------
[docs]def main(): """The main function of the script. The main function is used to test the functions in the script. :return: None. """ pass return None
# ---------------------------------------------------------------------------- if __name__ == "__main__": main()