Source code for taq_data_analysis_responses_physical_short_long

''' TAQ data analysis module.

The functions in the module analyze the data from the NASDAQ stock market,
computing the self- and cross-response functions.

This script requires the following modules:
    * itertools
    * multiprocessing
    * numpy
    * pandas
    * pickle
    * taq_data_tools_responses_physical_short_long

The module contains the following functions:
    * taq_self_response_day_responses_physical_short_long_data - computes the
      self response of a day.
    * taq_self_response_year_responses_physical_short_long_data - computes the
      self response of a year.
    * taq_cross_response_day_responses_physical_short_long_data - computes the
      cross response of a day.
    * taq_cross_response_year_responses_physical_short_long_data - computes the
      cross response of a year.
    * main - the main function of the script.

.. moduleauthor:: Juan Camilo Henao Londono <www.github.com/juanhenao21>
'''

# ----------------------------------------------------------------------------
# Modules

from itertools import product as iprod
import multiprocessing as mp
import numpy as np
import pandas as pd
import pickle

import taq_data_tools_responses_physical_short_long

# ----------------------------------------------------------------------------


[docs]def taq_self_response_day_responses_physical_short_long_data(ticker, date, tau, tau_p): """Computes the self-response of a day. Using the midpoint price and trade signs of a ticker computes the self- response for a day. There is a constant :math:`\\tau` and :math:`\\tau'` that must be set in the parameters. :param ticker: string of the abbreviation of the stock to be analyzed (i.e. 'AAPL'). :param date: string with the date of the data to be extracted (i.e. '2008-01-02'). :param tau: integer greater than zero (i.e. 50). :param tau_p: integer greater than zero and smaller than tau (i.e. 10). :return: tuple -- The function returns a tuple with numpy arrays. """ date_sep = date.split('-') year = date_sep[0] month = date_sep[1] day = date_sep[2] try: # Load data midpoint = pickle.load(open( f'../../taq_data/responses_physical_data_{year}/taq_midpoint' + f'_physical_data/taq_midpoint_physical_data_midpoint' + f'_{year}{month}{day}_{ticker}.pickle', 'rb')) _, _, trade_sign = pickle.load(open( f'../../taq_data/responses_physical_data_{year}/taq_trade' + f'_signs_physical_data/taq_trade_signs_physical_data' + f'_{year}{month}{day}_{ticker}.pickle', 'rb')) # As the data is loaded from the responses physical module results, # the data have a shift of 1 second. assert len(midpoint) == len(trade_sign) # Array for the average of each tau self_short = np.zeros(tau) self_long = np.zeros(tau) self_response = np.zeros(tau) self_shuffle = np.zeros(tau) num_short = np.zeros(tau) num_long = np.zeros(tau) num_response = np.zeros(tau) num_shuffle = np.zeros(tau) # Short response after tau_p # Calculating the midpoint price return and the self response function trade_sign_tau_short = trade_sign[:-tau_p - 1] trade_sign_no_0_len_short = len(trade_sign_tau_short [trade_sign_tau_short != 0]) num_short[tau_p:] = trade_sign_no_0_len_short * np.ones(tau - tau_p) # Obtain the midpoint price return. Displace the numerator tau # values to the right and compute the return # Midpoint price returns log_return_sec_short = (midpoint[tau_p + 1:] - midpoint[:-tau_p - 1]) \ / midpoint[:-tau_p - 1] # Obtain the self response value if (trade_sign_no_0_len_short): product_short = log_return_sec_short * trade_sign_tau_short self_short[tau_p:] = np.sum(product_short) * np.ones(tau - tau_p) # Depending on the tau value for tau_idx in range(tau): if (tau_idx <= tau_p): # Short response trade_sign_tau_short = trade_sign[:-tau_idx - 1] trade_sign_tau_shuffle = 1 * trade_sign_tau_short trade_sign_no_0_len_short = len(trade_sign_tau_short [trade_sign_tau_short != 0]) num_short[tau_idx] = trade_sign_no_0_len_short num_long[tau_idx] = trade_sign_no_0_len_short num_response[tau_idx] = trade_sign_no_0_len_short num_shuffle[tau_idx] = trade_sign_no_0_len_short # Obtain the midpoint price return. Displace the numerator tau # values to the right and compute the return # midpoint price returns log_return_sec_short = (midpoint[tau_idx + 1:] - midpoint[:-tau_idx - 1]) \ / midpoint[:-tau_idx - 1] # Obtain the self response value if (trade_sign_no_0_len_short): product_short = log_return_sec_short * trade_sign_tau_short np.random.shuffle(trade_sign_tau_shuffle) product_shuffle = log_return_sec_short \ * trade_sign_tau_shuffle self_short[tau_idx] = np.sum(product_short) self_long[tau_idx] = np.sum(product_short) self_response[tau_idx] = np.sum(product_short) self_shuffle[tau_idx] = np.sum(product_shuffle) else: # Long response trade_sign_tau_long = trade_sign[:-(tau_idx + tau_p)] trade_sign_no_0_len_long = len(trade_sign_tau_long [trade_sign_tau_long != 0]) num_long[tau_idx] = trade_sign_no_0_len_long # Obtain the midpoint price return. Displace the numerator tau # values to the right and compute the return # midpoint price returns log_return_sec_long = (midpoint[tau_idx:-tau_p] - midpoint[tau_p:-tau_idx]) \ / midpoint[tau_p:-tau_idx] # Obtain the self response value if (trade_sign_no_0_len_long != 0): product_long = log_return_sec_long * trade_sign_tau_long self_long[tau_idx] = np.sum(product_long) # Normal response trade_sign_tau_resp = trade_sign[:-tau_idx - 1] trade_sign_no_0_len_resp = len(trade_sign_tau_resp [trade_sign_tau_resp != 0]) num_response[tau_idx] = trade_sign_no_0_len_resp # Obtain the midpoint price return. Displace the numerator tau # values to the right and compute the return # midpoint price returns log_return_sec_resp = (midpoint[tau_idx + 1:] - midpoint[:-tau_idx - 1]) \ / midpoint[:-tau_idx - 1] # Obtain the self response value if (trade_sign_no_0_len_resp != 0): product = log_return_sec_resp * trade_sign_tau_resp self_response[tau_idx] = np.sum(product) # Shuffle response trade_sign_tau_shuffle = 1 * trade_sign_tau_resp num_shuffle[tau_idx] = trade_sign_no_0_len_resp # Obtain the self response value if (trade_sign_no_0_len_resp != 0): np.random.shuffle(trade_sign_tau_shuffle) product_shuffle = log_return_sec_resp \ * trade_sign_tau_shuffle self_shuffle[tau_idx] = np.sum(product_shuffle) return (self_short, num_short, self_long, num_long, self_response, num_response, self_shuffle, num_shuffle) except FileNotFoundError as e: print('No data') print(e) print() zeros = np.zeros(tau) return (zeros, zeros, zeros, zeros, zeros, zeros, zeros, zeros)
# ----------------------------------------------------------------------------
[docs]def taq_self_response_year_responses_physical_short_long_data(ticker, year, tau, tau_p): """Computes the self-response of a year. Using the taq_self_response_day_responses_physical_short_long_data function computes the self-response function for a year. :param ticker: string of the abbreviation of stock to be analyzed (i.e. 'AAPL'). :param year: string of the year to be analyzed (i.e '2016'). :param tau: integer great than zero (i.e. 50). :param tau_p: integer greater than zero and smaller than tau (i.e. 10). :return: None – The function saves the data in a file and does not return a value. """ function_name = taq_self_response_year_responses_physical_short_long_data \ .__name__ taq_data_tools_responses_physical_short_long \ .taq_function_header_print_data(function_name, ticker, ticker, year, '', '') dates = taq_data_tools_responses_physical_short_long \ .taq_bussiness_days(year) self_values = [] args_prod = iprod([ticker], dates, [tau], [tau_p]) # Parallel computation of the self-responses. Every result is appended to # a list with mp.Pool(processes=mp.cpu_count()) as pool: self_values.append(pool.starmap( taq_self_response_day_responses_physical_short_long_data, args_prod)) # To obtain the total self-response, I sum over all the self-response # values and all the amount of trades (averaging values) self_v_final = np.sum(self_values[0], axis=0) self_response_short_val = self_v_final[0] / self_v_final[1] self_response_short_avg = self_v_final[1] self_response_long_val = self_v_final[2] / self_v_final[3] self_response_long_avg = self_v_final[3] self_response_resp_val = self_v_final[4] / self_v_final[5] self_response_resp_avg = self_v_final[5] self_response_shuffle_val = self_v_final[6] / self_v_final[7] self_response_shuffle_avg = self_v_final[7] # Saving data taq_data_tools_responses_physical_short_long \ .taq_save_data(f'{function_name}_tau_{tau}_tau_p_{tau_p}', (self_response_short_val, self_response_long_val, self_response_resp_val, self_response_shuffle_val), ticker, ticker, year, '', '') return (self_response_short_val, self_response_long_val, self_response_resp_val, self_response_shuffle_val)
# ----------------------------------------------------------------------------
[docs]def taq_cross_response_day_responses_physical_short_long_data(ticker_i, ticker_j, date, tau, tau_p): """Computes the cross-response of a day. Using the midpoint price of ticker i and trade signs of ticker j computes the cross-response for a day. There is a constant :math:`\\tau` and :math:`\\tau'` that must be set in the parameters. :param ticker_i: string of the abbreviation of the stock to be analyzed (i.e. 'AAPL'). :param ticker_j: string of the abbreviation of the stock to be analyzed (i.e. 'AAPL'). :param date: string with the date of the data to be extracted (i.e. '2008-01-02'). :param tau: integer great than zero (i.e. 50). :param tau_p: integer greater than zero and smaller than tau (i.e. 10). :return: tuple -- The function returns a tuple with positions. """ date_sep = date.split('-') year = date_sep[0] month = date_sep[1] day = date_sep[2] if (ticker_i == ticker_j): # Self-response return None else: try: # Load data midpoint_i = pickle.load(open( f'../../taq_data/responses_physical_data_{year}/taq_midpoint' + f'_physical_data/taq_midpoint_physical_data_midpoint' + f'_{year}{month}{day}_{ticker_i}.pickle', 'rb')) _, _, trade_sign_j = pickle.load(open( f'../../taq_data/responses_physical_data_{year}/taq_trade' + f'_signs_physical_data/taq_trade_signs_physical_data' + f'_{year}{month}{day}_{ticker_j}.pickle', 'rb')) # As the data is loaded from the article reproduction module # results, the data have a shift of 1 second. assert len(midpoint_i) == len(trade_sign_j) # Array of the average of each tau. 10^3 s used by Wang cross_short = np.zeros(tau) cross_long = np.zeros(tau) cross_response = np.zeros(tau) cross_shuffle = np.zeros(tau) num_short = np.zeros(tau) num_long = np.zeros(tau) num_response = np.zeros(tau) num_shuffle = np.zeros(tau) # Short response after tau_p # Calculating the midpoint return and the cross response function trade_sign_tau_short = trade_sign_j[:-tau_p - 1] trade_sign_no_0_len_short = \ len(trade_sign_tau_short[trade_sign_tau_short != 0]) num_short[tau_p:] = trade_sign_no_0_len_short \ * np.ones(tau - tau_p) # Obtain the midpoint price return. Displace the numerator # tau values to the right and compute the return log_return_i_sec_short = (midpoint_i[tau_p + 1:] - midpoint_i[:-tau_p - 1]) \ / midpoint_i[:-tau_p - 1] # Obtain the cross response value if (trade_sign_no_0_len_short): product_short = log_return_i_sec_short \ * trade_sign_tau_short cross_short[tau_p:] = np.sum(product_short) \ * np.ones(tau - tau_p) # Depending on the tau value for tau_idx in range(tau): if (tau_idx <= tau_p): # Short response trade_sign_tau_short = trade_sign_j[:-tau_idx - 1] trade_sign_tau_shuffle = 1 * trade_sign_tau_short trade_sign_no_0_len_short = \ len(trade_sign_tau_short[trade_sign_tau_short != 0]) num_short[tau_idx] = trade_sign_no_0_len_short num_long[tau_idx] = trade_sign_no_0_len_short num_response[tau_idx] = trade_sign_no_0_len_short num_shuffle[tau_idx] = trade_sign_no_0_len_short # Obtain the midpoint price return. Displace the numerator # tau values to the right and compute the return # midpoint price returns log_return_sec_short = (midpoint_i[tau_idx + 1:] - midpoint_i[:-tau_idx - 1]) \ / midpoint_i[:-tau_idx - 1] # Obtain the self response value if (trade_sign_no_0_len_short): product_short = log_return_sec_short \ * trade_sign_tau_short np.random.shuffle(trade_sign_tau_shuffle) product_shuffle = log_return_sec_short \ * trade_sign_tau_shuffle cross_short[tau_idx] = np.sum(product_short) cross_long[tau_idx] = np.sum(product_short) cross_response[tau_idx] = np.sum(product_short) cross_shuffle[tau_idx] = np.sum(product_shuffle) else: # Long response trade_sign_tau_long = trade_sign_j[:-(tau_idx + tau_p)] trade_sign_no_0_len_long = len(trade_sign_tau_long [trade_sign_tau_long != 0]) num_long[tau_idx] = trade_sign_no_0_len_long # Obtain the midpoint price return. Displace the numerator # tau values to the right and compute the return # midpoint price returns log_return_sec_long = (midpoint_i[tau_idx:-tau_p] - midpoint_i[tau_p:-tau_idx]) \ / midpoint_i[tau_p:-tau_idx] # Obtain the cross response value if (trade_sign_no_0_len_long != 0): product_long = log_return_sec_long \ * trade_sign_tau_long cross_long[tau_idx] = np.sum(product_long) # Normal response trade_sign_tau_resp = trade_sign_j[:-tau_idx - 1] trade_sign_no_0_len_resp = len(trade_sign_tau_resp [trade_sign_tau_resp != 0]) num_response[tau_idx] = trade_sign_no_0_len_resp # Obtain the midpoint price return. Displace the numerator # tau values to the right and compute the return # midpoint price returns log_return_sec_resp = (midpoint_i[tau_idx + 1:] - midpoint_i[:-tau_idx - 1]) \ / midpoint_i[:-tau_idx - 1] # Obtain the cross response value if (trade_sign_no_0_len_resp != 0): product = log_return_sec_resp * trade_sign_tau_resp cross_response[tau_idx] = np.sum(product) # Shuffle response trade_sign_tau_shuffle = 1 * trade_sign_tau_resp num_shuffle[tau_idx] = trade_sign_no_0_len_resp # Obtain the cross response value if (trade_sign_no_0_len_resp != 0): np.random.shuffle(trade_sign_tau_shuffle) product_shuffle = log_return_sec_resp \ * trade_sign_tau_shuffle cross_shuffle[tau_idx] = np.sum(product_shuffle) return (cross_short, num_short, cross_long, num_long, cross_response, num_response, cross_shuffle, num_shuffle) except FileNotFoundError as e: print('No data') print(e) print() zeros = np.zeros(tau) return (zeros, zeros, zeros, zeros, zeros, zeros, zeros, zeros)
# ----------------------------------------------------------------------------
[docs]def taq_cross_response_year_responses_physical_short_long_data(ticker_i, ticker_j, year, tau, tau_p): """Computes the cross-response of a year. Using the taq_cross_response_day_responses_physical_short_long_data function computes the cross-response function for a year. :param ticker_i: string of the abbreviation of the stock to be analyzed (i.e. 'AAPL'). :param ticker_j: string of the abbreviation of the stock to be analyzed (i.e. 'AAPL'). :param year: string of the year to be analyzed (i.e '2016'). :param tau: integer great than zero (i.e. 50). :param tau_p: integer greater than zero and smaller than tau (i.e. 10). :return: None – The function saves the data in a file and does not return a value. """ if (ticker_i == ticker_j): # Self-response return None else: function_name = \ taq_cross_response_year_responses_physical_short_long_data \ .__name__ taq_data_tools_responses_physical_short_long \ .taq_function_header_print_data(function_name, ticker_i, ticker_j, year, '', '') dates = taq_data_tools_responses_physical_short_long \ .taq_bussiness_days(year) cross_values = [] args_prod = iprod([ticker_i], [ticker_j], dates, [tau], [tau_p]) # Parallel computation of the cross-responses. Every result is appended # to a list with mp.Pool(processes=mp.cpu_count()) as pool: cross_values.append(pool.starmap( taq_cross_response_day_responses_physical_short_long_data, args_prod)) # To obtain the total cross-response, I sum over all the cross-response # values and all the amount of trades (averaging values) cross_v_final = np.sum(cross_values[0], axis=0) cross_response_short_val = cross_v_final[0] / cross_v_final[1] cross_response_short_avg = cross_v_final[1] cross_response_long_val = cross_v_final[2] / cross_v_final[3] cross_response_long_avg = cross_v_final[3] cross_response_resp_val = cross_v_final[4] / cross_v_final[5] cross_response_resp_avg = cross_v_final[5] cross_response_shuffle_val = cross_v_final[6] / cross_v_final[7] cross_response_shuffle_avg = cross_v_final[7] # Saving data taq_data_tools_responses_physical_short_long \ .taq_save_data(f'{function_name}_tau_{tau}_tau_p_{tau_p}', (cross_response_short_val, cross_response_long_val, cross_response_resp_val, cross_response_shuffle_val), ticker_i, ticker_j, year, '', '') return (cross_response_short_val, cross_response_long_val, cross_response_resp_val, cross_response_shuffle_val)
# ----------------------------------------------------------------------------
[docs]def main(): """The main function of the script. The main function is used to test the functions in the script. :return: None. """ pass return None
# ---------------------------------------------------------------------------- if __name__ == "__main__": main()