Source code for taq_data_analysis_responses_physical_short_long

''' TAQ data analysis module.

The functions in the module analyze the data from the NASDAQ stock market,
computing the self- and cross-response functions.

This script requires the following modules:
    * itertools
    * multiprocessing
    * numpy
    * pandas
    * pickle
    * taq_data_tools_responses_physical_short_long

The module contains the following functions:
    * taq_self_response_day_responses_physical_short_long_data - computes the
      self response of a day.
    * taq_self_response_year_responses_physical_short_long_data - computes the
      self response of a year.
    * taq_cross_response_day_responses_physical_short_long_data - computes the
      cross response of a day.
    * taq_cross_response_year_responses_physical_short_long_data - computes the
      cross response of a year.
    * main - the main function of the script.

.. moduleauthor:: Juan Camilo Henao Londono <www.github.com/juanhenao21>
'''

# ----------------------------------------------------------------------------
# Modules

from itertools import product as iprod
import multiprocessing as mp
import numpy as np
import pandas as pd
import pickle

import taq_data_tools_responses_physical_short_long

# ----------------------------------------------------------------------------


[docs]def taq_self_response_day_responses_physical_short_long_data(ticker, date, tau,
                                                             tau_p):
    """Computes the self-response of a day.

    Using the midpoint price and trade signs of a ticker computes the self-
    response for a day. There is a constant :math:`\\tau` and :math:`\\tau'`
    that must be set in the parameters.

    :param ticker: string of the abbreviation of the stock to be analyzed
     (i.e. 'AAPL').
    :param date: string with the date of the data to be extracted
     (i.e. '2008-01-02').
    :param tau: integer greater than zero (i.e. 50).
    :param tau_p: integer greater than zero and smaller than tau (i.e. 10).
    :return: tuple -- The function returns a tuple with numpy arrays.
    """

    date_sep = date.split('-')

    year = date_sep[0]
    month = date_sep[1]
    day = date_sep[2]

    try:
        # Load data
        midpoint = pickle.load(open(
                f'../../taq_data/responses_physical_data_{year}/taq_midpoint'
                + f'_physical_data/taq_midpoint_physical_data_midpoint'
                + f'_{year}{month}{day}_{ticker}.pickle', 'rb'))
        _, _, trade_sign = pickle.load(open(
                f'../../taq_data/responses_physical_data_{year}/taq_trade'
                + f'_signs_physical_data/taq_trade_signs_physical_data'
                + f'_{year}{month}{day}_{ticker}.pickle', 'rb'))

        # As the data is loaded from the responses physical module results,
        # the data have a shift of 1 second.
        assert len(midpoint) == len(trade_sign)

        # Array for the average of each tau
        self_short = np.zeros(tau)
        self_long = np.zeros(tau)
        self_response = np.zeros(tau)
        self_shuffle = np.zeros(tau)
        num_short = np.zeros(tau)
        num_long = np.zeros(tau)
        num_response = np.zeros(tau)
        num_shuffle = np.zeros(tau)

        # Short response after tau_p
        # Calculating the midpoint price return and the self response function
        trade_sign_tau_short = trade_sign[:-tau_p - 1]
        trade_sign_no_0_len_short = len(trade_sign_tau_short
                                        [trade_sign_tau_short != 0])
        num_short[tau_p:] = trade_sign_no_0_len_short * np.ones(tau - tau_p)

        # Obtain the midpoint price return. Displace the numerator tau
        # values to the right and compute the return
        # Midpoint price returns
        log_return_sec_short = (midpoint[tau_p + 1:]
                                - midpoint[:-tau_p - 1]) \
            / midpoint[:-tau_p - 1]

        # Obtain the self response value
        if (trade_sign_no_0_len_short):
            product_short = log_return_sec_short * trade_sign_tau_short
            self_short[tau_p:] = np.sum(product_short) * np.ones(tau - tau_p)

        # Depending on the tau value
        for tau_idx in range(tau):

            if (tau_idx <= tau_p):
                # Short response
                trade_sign_tau_short = trade_sign[:-tau_idx - 1]
                trade_sign_tau_shuffle = 1 * trade_sign_tau_short
                trade_sign_no_0_len_short = len(trade_sign_tau_short
                                                [trade_sign_tau_short != 0])
                num_short[tau_idx] = trade_sign_no_0_len_short
                num_long[tau_idx] = trade_sign_no_0_len_short
                num_response[tau_idx] = trade_sign_no_0_len_short
                num_shuffle[tau_idx] = trade_sign_no_0_len_short

                # Obtain the midpoint price return. Displace the numerator tau
                # values to the right and compute the return
                # midpoint price returns
                log_return_sec_short = (midpoint[tau_idx + 1:]
                                        - midpoint[:-tau_idx - 1]) \
                    / midpoint[:-tau_idx - 1]

                # Obtain the self response value
                if (trade_sign_no_0_len_short):
                    product_short = log_return_sec_short * trade_sign_tau_short
                    np.random.shuffle(trade_sign_tau_shuffle)
                    product_shuffle = log_return_sec_short \
                        * trade_sign_tau_shuffle
                    self_short[tau_idx] = np.sum(product_short)
                    self_long[tau_idx] = np.sum(product_short)
                    self_response[tau_idx] = np.sum(product_short)
                    self_shuffle[tau_idx] = np.sum(product_shuffle)

            else:

                # Long response

                trade_sign_tau_long = trade_sign[:-(tau_idx + tau_p)]
                trade_sign_no_0_len_long = len(trade_sign_tau_long
                                               [trade_sign_tau_long != 0])
                num_long[tau_idx] = trade_sign_no_0_len_long

                # Obtain the midpoint price return. Displace the numerator tau
                # values to the right and compute the return
                # midpoint price returns
                log_return_sec_long = (midpoint[tau_idx:-tau_p]
                                       - midpoint[tau_p:-tau_idx]) \
                    / midpoint[tau_p:-tau_idx]

                # Obtain the self response value
                if (trade_sign_no_0_len_long != 0):
                    product_long = log_return_sec_long * trade_sign_tau_long
                    self_long[tau_idx] = np.sum(product_long)

                # Normal response

                trade_sign_tau_resp = trade_sign[:-tau_idx - 1]
                trade_sign_no_0_len_resp = len(trade_sign_tau_resp
                                               [trade_sign_tau_resp != 0])
                num_response[tau_idx] = trade_sign_no_0_len_resp

                # Obtain the midpoint price return. Displace the numerator tau
                # values to the right and compute the return
                # midpoint price returns
                log_return_sec_resp = (midpoint[tau_idx + 1:]
                                       - midpoint[:-tau_idx - 1]) \
                    / midpoint[:-tau_idx - 1]

                # Obtain the self response value
                if (trade_sign_no_0_len_resp != 0):
                    product = log_return_sec_resp * trade_sign_tau_resp
                    self_response[tau_idx] = np.sum(product)

                # Shuffle response
                trade_sign_tau_shuffle = 1 * trade_sign_tau_resp
                num_shuffle[tau_idx] = trade_sign_no_0_len_resp

                # Obtain the self response value
                if (trade_sign_no_0_len_resp != 0):
                    np.random.shuffle(trade_sign_tau_shuffle)
                    product_shuffle = log_return_sec_resp \
                        * trade_sign_tau_shuffle
                    self_shuffle[tau_idx] = np.sum(product_shuffle)

        return (self_short, num_short,
                self_long, num_long,
                self_response, num_response,
                self_shuffle, num_shuffle)

    except FileNotFoundError as e:
        print('No data')
        print(e)
        print()
        zeros = np.zeros(tau)
        return (zeros, zeros, zeros, zeros, zeros, zeros, zeros, zeros)

# ----------------------------------------------------------------------------


[docs]def taq_self_response_year_responses_physical_short_long_data(ticker, year,
                                                              tau, tau_p):
    """Computes the self-response of a year.

    Using the taq_self_response_day_responses_physical_short_long_data function
    computes the self-response function for a year.

    :param ticker: string of the abbreviation of stock to be analyzed
     (i.e. 'AAPL').
    :param year: string of the year to be analyzed (i.e '2016').
    :param tau: integer great than zero (i.e. 50).
    :param tau_p: integer greater than zero and smaller than tau (i.e. 10).
    :return: None – The function saves the data in a file and does not return
     a value.
    """

    function_name = taq_self_response_year_responses_physical_short_long_data \
        .__name__
    taq_data_tools_responses_physical_short_long \
        .taq_function_header_print_data(function_name, ticker, ticker, year,
                                        '', '')

    dates = taq_data_tools_responses_physical_short_long \
        .taq_bussiness_days(year)

    self_values = []
    args_prod = iprod([ticker], dates, [tau], [tau_p])

    # Parallel computation of the self-responses. Every result is appended to
    # a list
    with mp.Pool(processes=mp.cpu_count()) as pool:
        self_values.append(pool.starmap(
            taq_self_response_day_responses_physical_short_long_data,
            args_prod))

    # To obtain the total self-response, I sum over all the self-response
    # values and all the amount of trades (averaging values)
    self_v_final = np.sum(self_values[0], axis=0)

    self_response_short_val = self_v_final[0] / self_v_final[1]
    self_response_short_avg = self_v_final[1]
    self_response_long_val = self_v_final[2] / self_v_final[3]
    self_response_long_avg = self_v_final[3]
    self_response_resp_val = self_v_final[4] / self_v_final[5]
    self_response_resp_avg = self_v_final[5]
    self_response_shuffle_val = self_v_final[6] / self_v_final[7]
    self_response_shuffle_avg = self_v_final[7]

    # Saving data
    taq_data_tools_responses_physical_short_long \
        .taq_save_data(f'{function_name}_tau_{tau}_tau_p_{tau_p}',
                       (self_response_short_val,
                        self_response_long_val,
                        self_response_resp_val,
                        self_response_shuffle_val),
                       ticker, ticker, year, '', '')

    return (self_response_short_val,
            self_response_long_val,
            self_response_resp_val,
            self_response_shuffle_val)

# ----------------------------------------------------------------------------


[docs]def taq_cross_response_day_responses_physical_short_long_data(ticker_i,
                                                              ticker_j, date,
                                                              tau, tau_p):
    """Computes the cross-response of a day.

    Using the midpoint price of ticker i and trade signs of ticker j computes
    the cross-response for a day. There is a constant :math:`\\tau` and
    :math:`\\tau'` that must be set in the parameters.

    :param ticker_i: string of the abbreviation of the stock to be analyzed
     (i.e. 'AAPL').
    :param ticker_j: string of the abbreviation of the stock to be analyzed
     (i.e. 'AAPL').
    :param date: string with the date of the data to be extracted
     (i.e. '2008-01-02').
    :param tau: integer great than zero (i.e. 50).
    :param tau_p: integer greater than zero and smaller than tau (i.e. 10).
    :return: tuple -- The function returns a tuple with positions.
    """

    date_sep = date.split('-')

    year = date_sep[0]
    month = date_sep[1]
    day = date_sep[2]

    if (ticker_i == ticker_j):

        # Self-response
        return None

    else:
        try:
            # Load data
            midpoint_i = pickle.load(open(
                f'../../taq_data/responses_physical_data_{year}/taq_midpoint'
                + f'_physical_data/taq_midpoint_physical_data_midpoint'
                + f'_{year}{month}{day}_{ticker_i}.pickle', 'rb'))
            _, _, trade_sign_j = pickle.load(open(
                f'../../taq_data/responses_physical_data_{year}/taq_trade'
                + f'_signs_physical_data/taq_trade_signs_physical_data'
                + f'_{year}{month}{day}_{ticker_j}.pickle', 'rb'))

            # As the data is loaded from the article reproduction module
            # results, the data have a shift of 1 second.
            assert len(midpoint_i) == len(trade_sign_j)

            # Array of the average of each tau. 10^3 s used by Wang
            cross_short = np.zeros(tau)
            cross_long = np.zeros(tau)
            cross_response = np.zeros(tau)
            cross_shuffle = np.zeros(tau)
            num_short = np.zeros(tau)
            num_long = np.zeros(tau)
            num_response = np.zeros(tau)
            num_shuffle = np.zeros(tau)

            # Short response after tau_p

            # Calculating the midpoint return and the cross response function
            trade_sign_tau_short = trade_sign_j[:-tau_p - 1]
            trade_sign_no_0_len_short = \
                len(trade_sign_tau_short[trade_sign_tau_short != 0])
            num_short[tau_p:] = trade_sign_no_0_len_short \
                * np.ones(tau - tau_p)
            # Obtain the midpoint price return. Displace the numerator
            # tau values to the right and compute the return
            log_return_i_sec_short = (midpoint_i[tau_p + 1:]
                                      - midpoint_i[:-tau_p - 1]) \
                / midpoint_i[:-tau_p - 1]

            # Obtain the cross response value
            if (trade_sign_no_0_len_short):
                product_short = log_return_i_sec_short \
                                * trade_sign_tau_short
                cross_short[tau_p:] = np.sum(product_short) \
                    * np.ones(tau - tau_p)

            # Depending on the tau value
            for tau_idx in range(tau):

                if (tau_idx <= tau_p):
                    # Short response
                    trade_sign_tau_short = trade_sign_j[:-tau_idx - 1]
                    trade_sign_tau_shuffle = 1 * trade_sign_tau_short
                    trade_sign_no_0_len_short = \
                        len(trade_sign_tau_short[trade_sign_tau_short != 0])
                    num_short[tau_idx] = trade_sign_no_0_len_short
                    num_long[tau_idx] = trade_sign_no_0_len_short
                    num_response[tau_idx] = trade_sign_no_0_len_short
                    num_shuffle[tau_idx] = trade_sign_no_0_len_short

                    # Obtain the midpoint price return. Displace the numerator
                    # tau values to the right and compute the return
                    # midpoint price returns

                    log_return_sec_short = (midpoint_i[tau_idx + 1:]
                                            - midpoint_i[:-tau_idx - 1]) \
                        / midpoint_i[:-tau_idx - 1]

                    # Obtain the self response value
                    if (trade_sign_no_0_len_short):
                        product_short = log_return_sec_short \
                            * trade_sign_tau_short
                        np.random.shuffle(trade_sign_tau_shuffle)
                        product_shuffle = log_return_sec_short \
                            * trade_sign_tau_shuffle
                        cross_short[tau_idx] = np.sum(product_short)
                        cross_long[tau_idx] = np.sum(product_short)
                        cross_response[tau_idx] = np.sum(product_short)
                        cross_shuffle[tau_idx] = np.sum(product_shuffle)

                else:
                    # Long response
                    trade_sign_tau_long = trade_sign_j[:-(tau_idx + tau_p)]
                    trade_sign_no_0_len_long = len(trade_sign_tau_long
                                                   [trade_sign_tau_long != 0])
                    num_long[tau_idx] = trade_sign_no_0_len_long
                    # Obtain the midpoint price return. Displace the numerator
                    # tau values to the right and compute the return
                    # midpoint price returns
                    log_return_sec_long = (midpoint_i[tau_idx:-tau_p]
                                           - midpoint_i[tau_p:-tau_idx]) \
                        / midpoint_i[tau_p:-tau_idx]

                    # Obtain the cross response value
                    if (trade_sign_no_0_len_long != 0):
                        product_long = log_return_sec_long \
                            * trade_sign_tau_long
                        cross_long[tau_idx] = np.sum(product_long)

                    # Normal response
                    trade_sign_tau_resp = trade_sign_j[:-tau_idx - 1]
                    trade_sign_no_0_len_resp = len(trade_sign_tau_resp
                                                   [trade_sign_tau_resp != 0])
                    num_response[tau_idx] = trade_sign_no_0_len_resp

                    # Obtain the midpoint price return. Displace the numerator
                    # tau values to the right and compute the return
                    # midpoint price returns
                    log_return_sec_resp = (midpoint_i[tau_idx + 1:]
                                           - midpoint_i[:-tau_idx - 1]) \
                        / midpoint_i[:-tau_idx - 1]

                    # Obtain the cross response value
                    if (trade_sign_no_0_len_resp != 0):
                        product = log_return_sec_resp * trade_sign_tau_resp
                        cross_response[tau_idx] = np.sum(product)

                    # Shuffle response
                    trade_sign_tau_shuffle = 1 * trade_sign_tau_resp
                    num_shuffle[tau_idx] = trade_sign_no_0_len_resp

                    # Obtain the cross response value
                    if (trade_sign_no_0_len_resp != 0):
                        np.random.shuffle(trade_sign_tau_shuffle)
                        product_shuffle = log_return_sec_resp \
                            * trade_sign_tau_shuffle
                        cross_shuffle[tau_idx] = np.sum(product_shuffle)

            return (cross_short, num_short,
                    cross_long, num_long,
                    cross_response, num_response,
                    cross_shuffle, num_shuffle)

        except FileNotFoundError as e:
            print('No data')
            print(e)
            print()
            zeros = np.zeros(tau)
            return (zeros, zeros, zeros, zeros, zeros, zeros, zeros, zeros)

# ----------------------------------------------------------------------------


[docs]def taq_cross_response_year_responses_physical_short_long_data(ticker_i,
                                                               ticker_j, year,
                                                               tau, tau_p):
    """Computes the cross-response of a year.

    Using the taq_cross_response_day_responses_physical_short_long_data
    function computes the cross-response function for a year.

    :param ticker_i: string of the abbreviation of the stock to be analyzed
     (i.e. 'AAPL').
    :param ticker_j: string of the abbreviation of the stock to be analyzed
     (i.e. 'AAPL').
    :param year: string of the year to be analyzed (i.e '2016').
    :param tau: integer great than zero (i.e. 50).
    :param tau_p: integer greater than zero and smaller than tau (i.e. 10).
    :return: None – The function saves the data in a file and does not return a
     value.
    """

    if (ticker_i == ticker_j):

        # Self-response
        return None

    else:
        function_name = \
             taq_cross_response_year_responses_physical_short_long_data \
             .__name__
        taq_data_tools_responses_physical_short_long \
            .taq_function_header_print_data(function_name, ticker_i, ticker_j,
                                            year, '', '')

        dates = taq_data_tools_responses_physical_short_long \
            .taq_bussiness_days(year)

        cross_values = []
        args_prod = iprod([ticker_i], [ticker_j], dates, [tau], [tau_p])

        # Parallel computation of the cross-responses. Every result is appended
        # to a list
        with mp.Pool(processes=mp.cpu_count()) as pool:
            cross_values.append(pool.starmap(
                taq_cross_response_day_responses_physical_short_long_data,
                args_prod))

        # To obtain the total cross-response, I sum over all the cross-response
        # values and all the amount of trades (averaging values)
        cross_v_final = np.sum(cross_values[0], axis=0)

        cross_response_short_val = cross_v_final[0] / cross_v_final[1]
        cross_response_short_avg = cross_v_final[1]
        cross_response_long_val = cross_v_final[2] / cross_v_final[3]
        cross_response_long_avg = cross_v_final[3]
        cross_response_resp_val = cross_v_final[4] / cross_v_final[5]
        cross_response_resp_avg = cross_v_final[5]
        cross_response_shuffle_val = cross_v_final[6] / cross_v_final[7]
        cross_response_shuffle_avg = cross_v_final[7]

        # Saving data
        taq_data_tools_responses_physical_short_long \
            .taq_save_data(f'{function_name}_tau_{tau}_tau_p_{tau_p}',
                           (cross_response_short_val,
                            cross_response_long_val,
                            cross_response_resp_val,
                            cross_response_shuffle_val),
                           ticker_i, ticker_j, year, '', '')

        return (cross_response_short_val,
                cross_response_long_val,
                cross_response_resp_val,
                cross_response_shuffle_val)

# ----------------------------------------------------------------------------


[docs]def main():
    """The main function of the script.

    The main function is used to test the functions in the script.

    :return: None.
    """

    pass

    return None

# ----------------------------------------------------------------------------


if __name__ == "__main__":
    main()