In [1]:

import os
import requests
import time
import polars as pl
from pathlib import Path
from datetime import datetime, timedelta
from lets_plot import *
LetsPlot.setup_html()

Before reaching out to the dark pool endpoint(s), let's begin with an interesting ticker. Institutional ownership is always a hot topic, so let's see what the Point72 13F filing gives us. We can access 13F data like this:

https://api.unusualwhales.com/docs#/operations/PublicApi.InstitutionController.holdings

We will sort the results by value descending to get a look at the largest positions in the portfolio at the top:

In [2]:

name = 'POINT72 ASSET MANAGEMENT LP'
uw_token = os.environ['UW_TOKEN']
headers = {'Accept': 'application/json, text/plain', 'Authorization': uw_token}
url = f'https://api.unusualwhales.com/api/institution/{name}/holdings'
rsp = requests.get(url, headers=headers)
p72_raw_df = pl.DataFrame(rsp.json()['data'])
p72_df = (
    p72_raw_df
    .sort('value', descending=True)
)
p72_df.head(10)

Out[2]:

shape: (10, 17)

avg_price	close	date	first_buy	full_name	historical_units	perc_of_share_value	perc_of_total	price_first_buy	put_call	sector	security_type	shares_outstanding	ticker	units	units_change	value
str	str	str	str	str	list[i64]	str	str	str	str	str	str	str	str	i64	i64	i64
null	"607.81"	"2024-09-30"	"2014-06-30"	"SPDR S&P 500 E…	[42087, 42307, … 12546]	"0"	"0.061377313145…	"195.72"	"put"	null	"Option"	null	"SPY"	42087	-220	2414783712
null	"526.48"	"2024-09-30"	"2021-12-31"	"INVESCO QQQ "	[34476, 1614, … 400]	"0"	"0.042768953218…	"397.85"	"put"	null	"Option"	null	"QQQ"	34476	32862	1682670132
null	"607.81"	"2024-09-30"	"2022-03-31"	"SPDR S&P 500 E…	[21626, 14638, … 6960]	"0"	"0.031538141803…	"451.64"	"call"	null	"Option"	null	"SPY"	21626	6988	1240813376
"64.22"	"142.44"	"2024-09-30"	"2023-03-31"	"NVIDIA"	[3689814, 2115018, … 0]	"0.015955424146…	"0.011389268586…	"27.78"	null	"Technology"	"Share"	"24530000000.0"	"NVDA"	3689814	1574796	448090978
"68.52"	"63.25"	"2024-09-30"	"2023-12-31"	"SHELL"	[5474461, 1859576, … 527400]	"0.012855776691…	"0.009176684510…	"65.80"	null	"Energy"	"Share"	null	"SHEL"	5474461	3614885	361040703
null	"238.93"	"2024-09-30"	"2021-06-30"	"ISHARES RUSSEL…	[15600, 9330, … 150]	"0"	"0.008758511177…	"229.37"	"put"	null	"Option"	null	"IWM"	15600	6270	344588400
"205.02"	"108.25"	"2024-09-30"	"2021-03-31"	"ARISTA NETWORK…	[878122, 666299, … 1812261]	"0.012001196759…	"0.008566669992…	"18.86812500000…	null	"Technology"	"Share"	"314939883"	"ANET"	878122	211823	337040742
"39.21"	"43.15"	"2024-09-30"	"2024-06-30"	"COMCAST"	[7543394, 5740384, … 0]	"0.011219497553…	"0.008008679046…	"39.16"	null	"Communication …	"Share"	"3817095628"	"CMCSA"	7543394	1803010	315087558
"98.65"	"139.07"	"2024-09-30"	"2024-03-31"	"MACOM TECHNOLO…	[2555357, 933044, … 0]	"0.010123548676…	"0.007226371036…	"95.64"	null	"Technology"	"Share"	"72399645"	"MTSI"	2555357	1622313	284309009
"55.9"	"90.07"	"2024-09-30"	"2021-09-30"	"BOSTON SCIENTI…	[3381819, 3125022, … 197869]	"0.010091055500…	"0.007203176823…	"43.39"	null	"Healthcare"	"Share"	"1473827485"	"BSX"	3381819	256797	283396473

Ticker SHEL jumps out as an Energy name amongst technology and communication firms, let's single that one out.

There are two dark pool endpoints available from UW: one for recent dark pool trades, and one for ticker-specific dark pool trades. Let's start with the ticker-specific endpoint in this example:

https://api.unusualwhales.com/docs#/operations/PublicApi.DarkpoolController.darkpool_ticker

The first thing we will do is hit the endpoint for some data to see if anything jumps out using Shell ticker SHEL:

In [3]:

ticker = 'SHEL'
uw_token = os.environ['UW_TOKEN']
headers = {'Accept': 'application/json, text/plain', 'Authorization': uw_token}
params = {
    'ticker': ticker
}
url = f'https://api.unusualwhales.com/api/darkpool/{ticker}'
rsp = requests.get(url, headers=headers, params=params)

Nice! rsp contains the API response in json format, so rsp.json()['data'] will give us a list of dictionaries like this:

{'canceled': False,
  'executed_at': '2024-12-06T22:06:46Z',
  'ext_hour_sold_codes': 'extended_hours_trade',
  'market_center': 'L',
  'nbbo_ask': '68.71',
  'nbbo_ask_quantity': 100,
  'nbbo_bid': '63.21',
  'nbbo_bid_quantity': 18,
  'premium': '1511485.25',
  'price': '63.25',
  'sale_cond_codes': 'prio_reference_price',
  'size': 23897,
  'ticker': 'SHEL',
  'tracking_id': 61606319088292,
  'trade_code': None,
  'trade_settlement': None,
  'volume': 5010543},
  ...
{'canceled': False,
  'executed_at': '2024-12-06T14:45:12Z',
  'ext_hour_sold_codes': None,
  'market_center': 'L',
  'nbbo_ask': '63.62',
  'nbbo_ask_quantity': 100,
  'nbbo_bid': '63.61',
  'nbbo_bid_quantity': 1185,
  'premium': '292652.00',
  'price': '63.62',
  'sale_cond_codes': None,
  'size': 4600,
  'ticker': 'SHEL',
  'tracking_id': 35112369931490,
  'trade_code': None,
  'trade_settlement': None,
  'volume': 571115}]

Which is a good start, but let's compile this data into a polars DataFrame to make it easier to manage and analyze:

In [4]:

raw_shel_df = pl.DataFrame(rsp.json()['data'])
raw_shel_df

Out[4]:

shape: (73, 17)

canceled	executed_at	ext_hour_sold_codes	market_center	nbbo_ask	nbbo_ask_quantity	nbbo_bid	nbbo_bid_quantity	premium	price	sale_cond_codes	size	ticker	tracking_id	trade_code	trade_settlement	volume
bool	str	str	str	str	i64	str	i64	str	str	str	i64	str	i64	str	null	i64
false	"2024-12-06T22:…	"extended_hours…	"L"	"68.71"	100	"63.21"	18	"1511485.25"	"63.25"	"prio_reference…	23897	"SHEL"	61606319088292	null	null	5010543
false	"2024-12-06T21:…	"extended_hours…	"L"	"68.71"	100	"63.27"	444	"152432.50"	"63.25"	"prio_reference…	2410	"SHEL"	59074438301536	null	null	4986432
false	"2024-12-06T21:…	"extended_hours…	"L"	"70"	20	"0"	0	"443192.75"	"63.25"	"prio_reference…	7007	"SHEL"	57614901371848	null	null	4979244
false	"2024-12-06T21:…	"extended_hours…	"L"	"70"	20	"0"	0	"252177.75"	"63.25"	"prio_reference…	3987	"SHEL"	57614897296981	null	null	4972237
false	"2024-12-06T20:…	null	"L"	"63.20"	470	"63.19"	2074	"113764.8600"	"63.2027"	"average_price_…	1800	"SHEL"	57369413277600	"derivative_pri…	null	4408772
…	…	…	…	…	…	…	…	…	…	…	…	…	…	…	…	…
false	"2024-12-06T15:…	null	"L"	"63.52"	2350	"63.51"	813	"270212.8038"	"63.5197"	null	4254	"SHEL"	36567328416450	null	null	855944
false	"2024-12-06T15:…	null	"L"	"63.47"	670	"63.46"	341	"196727.5500"	"63.4605"	null	3100	"SHEL"	36352129973098	null	null	804619
false	"2024-12-06T14:…	null	"L"	"63.51"	365	"63.50"	3137	"546143.000"	"63.505"	null	8600	"SHEL"	35706861790196	null	null	700235
false	"2024-12-06T14:…	null	"L"	"63.54"	420	"63.53"	301	"908550.500"	"63.535"	null	14300	"SHEL"	35527676888686	null	null	663859
false	"2024-12-06T14:…	null	"L"	"63.62"	100	"63.61"	1185	"292652.00"	"63.62"	null	4600	"SHEL"	35112369931490	null	null	571115

Let's do some basic data cleanup: then let's add a new column, executed_at_tz, which we will set to eastern time (since the source data is based on UTC):

executed_at from a string to a datetime
nbbo_ask from a string to a float64
nbbo_bid from a string to a float64
price from a string to a float64
premium (which is the dollar amount transacted) from a string to a float64

In [5]:

shel_df = (
    raw_shel_df
    .with_columns(
        pl.col('executed_at').str.strptime(pl.Datetime, '%Y-%m-%dT%H:%M:%S%.fZ'),
        pl.col('nbbo_ask').cast(pl.Float64),
        pl.col('nbbo_bid').cast(pl.Float64),
        pl.col('price').cast(pl.Float64),
        pl.col('premium').cast(pl.Float64)
    )
    .with_columns(
        pl.col('executed_at').dt.convert_time_zone('America/New_York').alias('executed_at_tz')
    )
)
shel_df

Out[5]:

shape: (73, 18)

canceled	executed_at	ext_hour_sold_codes	market_center	nbbo_ask	nbbo_ask_quantity	nbbo_bid	nbbo_bid_quantity	premium	price	sale_cond_codes	size	ticker	tracking_id	trade_code	trade_settlement	volume	executed_at_tz
bool	datetime[ns]	str	str	f64	i64	f64	i64	f64	f64	str	i64	str	i64	str	null	i64	datetime[ns, America/New_York]
false	2024-12-06 22:06:46	"extended_hours…	"L"	68.71	100	63.21	18	1.5115e6	63.25	"prio_reference…	23897	"SHEL"	61606319088292	null	null	5010543	2024-12-06 17:06:46 EST
false	2024-12-06 21:24:34	"extended_hours…	"L"	68.71	100	63.27	444	152432.5	63.25	"prio_reference…	2410	"SHEL"	59074438301536	null	null	4986432	2024-12-06 16:24:34 EST
false	2024-12-06 21:00:14	"extended_hours…	"L"	70.0	20	0.0	0	443192.75	63.25	"prio_reference…	7007	"SHEL"	57614901371848	null	null	4979244	2024-12-06 16:00:14 EST
false	2024-12-06 21:00:14	"extended_hours…	"L"	70.0	20	0.0	0	252177.75	63.25	"prio_reference…	3987	"SHEL"	57614897296981	null	null	4972237	2024-12-06 16:00:14 EST
false	2024-12-06 20:56:09	null	"L"	63.2	470	63.19	2074	113764.86	63.2027	"average_price_…	1800	"SHEL"	57369413277600	"derivative_pri…	null	4408772	2024-12-06 15:56:09 EST
…	…	…	…	…	…	…	…	…	…	…	…	…	…	…	…	…	…
false	2024-12-06 15:09:27	null	"L"	63.52	2350	63.51	813	270212.8038	63.5197	null	4254	"SHEL"	36567328416450	null	null	855944	2024-12-06 10:09:27 EST
false	2024-12-06 15:05:52	null	"L"	63.47	670	63.46	341	196727.55	63.4605	null	3100	"SHEL"	36352129973098	null	null	804619	2024-12-06 10:05:52 EST
false	2024-12-06 14:55:06	null	"L"	63.51	365	63.5	3137	546143.0	63.505	null	8600	"SHEL"	35706861790196	null	null	700235	2024-12-06 09:55:06 EST
false	2024-12-06 14:52:07	null	"L"	63.54	420	63.53	301	908550.5	63.535	null	14300	"SHEL"	35527676888686	null	null	663859	2024-12-06 09:52:07 EST
false	2024-12-06 14:45:12	null	"L"	63.62	100	63.61	1185	292652.0	63.62	null	4600	"SHEL"	35112369931490	null	null	571115	2024-12-06 09:45:12 EST

OK great, let's get a quick describe() going here on the most interesting columns, namely:

size
price
premium (a combination, since it is size * price)

In [6]:

shel_df.select(['size', 'price', 'premium']).describe()

Out[6]:

shape: (9, 4)

statistic	size	price	premium
str	f64	f64	f64
"count"	73.0	73.0	73.0
"null_count"	0.0	0.0	0.0
"mean"	8825.315068	63.32087	559915.466995
"std"	18457.824461	0.158873	1.1730e6
"min"	1595.0	63.025	100835.9
"25%"	1760.0	63.1957	110966.24
"50%"	2988.0	63.2802	189738.2988
"75%"	5000.0	63.4505	315125.0
"max"	100000.0	63.62	6.35637e6

Interesting, seeing that 100_000 size as the max makes me wonder if there were lots of transactions at a round 100K share size:

In [7]:

(
    shel_df
    .filter(
        pl.col('size') == 100_000
    )
)

Out[7]:

shape: (2, 18)

canceled	executed_at	ext_hour_sold_codes	market_center	nbbo_ask	nbbo_ask_quantity	nbbo_bid	nbbo_bid_quantity	premium	price	sale_cond_codes	size	ticker	tracking_id	trade_code	trade_settlement	volume	executed_at_tz
bool	datetime[ns]	str	str	f64	i64	f64	i64	f64	f64	str	i64	str	i64	str	null	i64	datetime[ns, America/New_York]
false	2024-12-06 17:32:51	null	"L"	63.26	225	63.25	707	6.35637e6	63.5637	"average_price_…	100000	"SHEL"	45171397784258	"derivative_pri…	null	2619290	2024-12-06 12:32:51 EST
false	2024-12-06 17:10:35	null	"L"	63.33	238	63.32	500	6.35637e6	63.5637	"average_price_…	100000	"SHEL"	43835701796092	"derivative_pri…	null	2225542	2024-12-06 12:10:35 EST

Hmm, so these transactions have Sold Condition Code of average_price_trade (which means the transactions may or may not have actually occurred at this price, since it reflects an average price over a period of time) and Trade Code of derivative_priced (which casts further doubt since it clearly states the material terms of the transaction were not determinable)... Let's cut these out of our dataframe, since we cannot say for sure that a trade actually occurred at those prices.

(Note: in polars I specifically need to allow null values to stick around when filtering this way, which is why you're seeing that is_null() check below.)

In [8]:

clean_shel_df = (
    shel_df
    .filter(
        (pl.col('sale_cond_codes').is_null()) |
        (pl.col('sale_cond_codes') != 'average_price_trade')
    )
)
clean_shel_df

Out[8]:

shape: (66, 18)

canceled	executed_at	ext_hour_sold_codes	market_center	nbbo_ask	nbbo_ask_quantity	nbbo_bid	nbbo_bid_quantity	premium	price	sale_cond_codes	size	ticker	tracking_id	trade_code	trade_settlement	volume	executed_at_tz
bool	datetime[ns]	str	str	f64	i64	f64	i64	f64	f64	str	i64	str	i64	str	null	i64	datetime[ns, America/New_York]
false	2024-12-06 22:06:46	"extended_hours…	"L"	68.71	100	63.21	18	1.5115e6	63.25	"prio_reference…	23897	"SHEL"	61606319088292	null	null	5010543	2024-12-06 17:06:46 EST
false	2024-12-06 21:24:34	"extended_hours…	"L"	68.71	100	63.27	444	152432.5	63.25	"prio_reference…	2410	"SHEL"	59074438301536	null	null	4986432	2024-12-06 16:24:34 EST
false	2024-12-06 21:00:14	"extended_hours…	"L"	70.0	20	0.0	0	443192.75	63.25	"prio_reference…	7007	"SHEL"	57614901371848	null	null	4979244	2024-12-06 16:00:14 EST
false	2024-12-06 21:00:14	"extended_hours…	"L"	70.0	20	0.0	0	252177.75	63.25	"prio_reference…	3987	"SHEL"	57614897296981	null	null	4972237	2024-12-06 16:00:14 EST
false	2024-12-06 20:42:25	null	"L"	63.22	1300	63.21	2100	126425.4	63.2127	null	2000	"SHEL"	56545446769025	null	null	4100074	2024-12-06 15:42:25 EST
…	…	…	…	…	…	…	…	…	…	…	…	…	…	…	…	…	…
false	2024-12-06 15:09:27	null	"L"	63.52	2350	63.51	813	270212.8038	63.5197	null	4254	"SHEL"	36567328416450	null	null	855944	2024-12-06 10:09:27 EST
false	2024-12-06 15:05:52	null	"L"	63.47	670	63.46	341	196727.55	63.4605	null	3100	"SHEL"	36352129973098	null	null	804619	2024-12-06 10:05:52 EST
false	2024-12-06 14:55:06	null	"L"	63.51	365	63.5	3137	546143.0	63.505	null	8600	"SHEL"	35706861790196	null	null	700235	2024-12-06 09:55:06 EST
false	2024-12-06 14:52:07	null	"L"	63.54	420	63.53	301	908550.5	63.535	null	14300	"SHEL"	35527676888686	null	null	663859	2024-12-06 09:52:07 EST
false	2024-12-06 14:45:12	null	"L"	63.62	100	63.61	1185	292652.0	63.62	null	4600	"SHEL"	35112369931490	null	null	571115	2024-12-06 09:45:12 EST

Now that our dataframe has been scrubbed, let's repeat these steps and collect dark pool data for lots of trading days, not just today.

Note: the sale_cond_codes, trade_code, and trade_settlement columns will intermittently have string values mixed with nulls and other times will be entirely nulls, which can cause combination issues. We will preemptively cast those columns to strings in order to avoid this hassle.

In [9]:

biz_dates = []
current_dt = datetime(2024, 1, 1)
while current_dt <= datetime.today():
    if current_dt.weekday() < 5: # Mon = 0, Sun = 6
        biz_dates.append(current_dt.date())
    current_dt += timedelta(days=1)
biz_date_strs = [d.strftime('%Y-%m-%d') for d in biz_dates]

raw_dfs_by_date = {}
ticker = 'SHEL'
uw_token = os.environ['UW_TOKEN']
headers = {'Accept': 'application/json, text/plain', 'Authorization': uw_token}
url = f'https://api.unusualwhales.com/api/darkpool/{ticker}'
for dt in biz_date_strs:
    params = {
        'ticker': ticker,
        'date': dt
    }
    rsp = requests.get(url, headers=headers, params=params)
    df = pl.DataFrame(rsp.json()['data'])
    if df.height > 0:
        raw_df = (
            df
            .with_columns(
                pl.col('sale_cond_codes').cast(pl.String).fill_null(''),
                pl.col('trade_code').cast(pl.String).fill_null(''),
                pl.col('trade_settlement').cast(pl.String).fill_null('')
            )
        )
        raw_dfs_by_date[dt] = raw_df
    time.sleep(1) # standard API access capped at 120 requests per minute

Awesome, now we have a dictionary with dates (strings) as keys and polars DataFrames as values, let's repeat the data scrubbing steps:

In [10]:

raw_shel_dfs = list(raw_dfs_by_date.values())
raw_shel_df = pl.concat(raw_shel_dfs)

shel_df = (
    raw_shel_df
    .with_columns(
        pl.col('executed_at').str.strptime(pl.Datetime, '%Y-%m-%dT%H:%M:%S%.fZ'),
        pl.col('nbbo_ask').cast(pl.Float64),
        pl.col('nbbo_bid').cast(pl.Float64),
        pl.col('price').cast(pl.Float64),
        pl.col('premium').cast(pl.Float64)
    )
    .with_columns(
        pl.col('executed_at').dt.convert_time_zone('America/New_York').alias('executed_at_tz')
    )
)

clean_shel_df = (
    shel_df
    .filter(
        pl.col('sale_cond_codes') != 'average_price_trade'
    )
)
clean_shel_df

Out[10]:

shape: (13_580, 18)

canceled	executed_at	ext_hour_sold_codes	market_center	nbbo_ask	nbbo_ask_quantity	nbbo_bid	nbbo_bid_quantity	premium	price	sale_cond_codes	size	ticker	tracking_id	trade_code	trade_settlement	volume	executed_at_tz
bool	datetime[ns]	str	str	f64	i64	f64	i64	f64	f64	str	i64	str	i64	str	str	i64	datetime[ns, America/New_York]
false	2024-01-02 21:00:15	"extended_hours…	"L"	66.5	1	65.77	4	384914.4	65.91	"prio_reference…	5840	"SHEL"	57615752945643	""	"regular_settle…	4932055	2024-01-02 16:00:15 EST
false	2024-01-02 21:00:15	"extended_hours…	"L"	66.5	1	65.77	4	112047.0	65.91	"prio_reference…	1700	"SHEL"	57615751123282	""	"regular_settle…	4926215	2024-01-02 16:00:15 EST
false	2024-01-02 21:00:15	"extended_hours…	"L"	66.5	1	65.77	4	107631.03	65.91	"prio_reference…	1633	"SHEL"	57615266129779	""	"regular_settle…	4924387	2024-01-02 16:00:15 EST
false	2024-01-02 20:59:01	null	"L"	65.9	1946	65.89	2700	164725.25	65.8901	""	2500	"SHEL"	57541156161904	""	"regular_settle…	4795442	2024-01-02 15:59:01 EST
false	2024-01-02 20:57:58	null	"L"	65.88	2311	65.87	1205	243719.0	65.87	""	3700	"SHEL"	57478512916606	""	"regular_settle…	4750663	2024-01-02 15:57:58 EST
…	…	…	…	…	…	…	…	…	…	…	…	…	…	…	…	…	…
false	2024-12-06 15:09:27	null	"L"	63.52	2350	63.51	813	270212.8038	63.5197	""	4254	"SHEL"	36567328416450	""	""	855944	2024-12-06 10:09:27 EST
false	2024-12-06 15:05:52	null	"L"	63.47	670	63.46	341	196727.55	63.4605	""	3100	"SHEL"	36352129973098	""	""	804619	2024-12-06 10:05:52 EST
false	2024-12-06 14:55:06	null	"L"	63.51	365	63.5	3137	546143.0	63.505	""	8600	"SHEL"	35706861790196	""	""	700235	2024-12-06 09:55:06 EST
false	2024-12-06 14:52:07	null	"L"	63.54	420	63.53	301	908550.5	63.535	""	14300	"SHEL"	35527676888686	""	""	663859	2024-12-06 09:52:07 EST
false	2024-12-06 14:45:12	null	"L"	63.62	100	63.61	1185	292652.0	63.62	""	4600	"SHEL"	35112369931490	""	""	571115	2024-12-06 09:45:12 EST

Finally, let's aggregate this "2024 dark pool" polars DataFrame by price to identify the levels at which the most money changed hands (as these might be interesting inflection points):

In [11]:

clean_shel_agg_df = (
    clean_shel_df
    .group_by('price')
    .agg(
        [
            pl.col('premium').sum().alias('total_premium'),
            pl.len().alias('count_of_trades'),
            pl.col('executed_at_tz').min().alias('first_trade')
        ]
    )
    .sort('total_premium', descending=True)
)
clean_shel_agg_df.head(10)

Out[11]:

shape: (10, 4)

price	total_premium	count_of_trades	first_trade
f64	f64	u32	datetime[ns, America/New_York]
73.27	1.4078e8	7	2024-04-25 12:50:42 EDT
72.18	1.1261e8	9	2024-04-22 11:44:05 EDT
69.72	1.1244e8	4	2024-08-06 16:00:11 EDT
63.22	1.08766849e8	10	2024-01-11 15:57:47 EST
73.55	1.0597e8	7	2024-05-09 16:00:14 EDT
73.13	1.0432e8	11	2024-04-12 12:47:30 EDT
71.66	8.8137e7	18	2024-04-08 12:19:09 EDT
71.94	8.7975e7	19	2024-04-08 15:12:48 EDT
70.95	8.7561e7	10	2024-04-16 10:44:34 EDT
72.37	8.7533e7	13	2024-04-23 10:12:56 EDT

Wow! I don't know the stats offhand but suffice it to say I would not have expected to see 7 of the 10 biggest dark pool trades in SHEL all happen in the month of April! If I were going to do some more research into SHEL I would definitely start in that time period... Let's take a quick look.

A little "inside baseball" regarding the UW API -- when I need quick closing prices for a security I actually reach for the Realized Volatility endpoint instead of the OHLC endpoint, since the response gives me the daily closing prices ONLY as well as the 30 calendar day implied volatility and 20 trading day realized volatility, two data points I find myself using almsot all the time.

https://api.unusualwhales.com/docs#/operations/PublicApi.TickerController.realized_volatility

In [12]:

ticker = 'SHEL'
uw_token = os.environ['UW_TOKEN']
headers = {'Accept': 'application/json, text/plain', 'Authorization': uw_token}
params = {
    'timeframe': 'YTD'
}
url = f'https://api.unusualwhales.com/api/stock/{ticker}/volatility/realized'
rsp = requests.get(url, headers=headers, params=params)
raw_shel_close_df = pl.DataFrame(rsp.json()['data'])
shel_close_df = (
    raw_shel_close_df
    .with_columns(
        pl.col('price').cast(pl.Float64)
    )
    .select(['date', 'price'])
)

p = ggplot(shel_close_df, aes(x='date', y='price')) + \
    geom_line() + \
    ggsize(800, 500) + \
    ggtitle(f'Shell (${ticker}) YTD Close Prices') + \
    theme(
        plot_title=element_text(hjust=0.5)
    )
p.show()

Very interesting to see that SHEL had such a steep rally into mid-April before market participants changed their stance, and even though modestly higher highs were observed in May it is clear to see that the steep price appreciation observed from March to mid-April ran out of gas there and since then has come all the way back to price levels last observed in March... Perhaps clusters of large dark pool transactions can signal inflection points in momentum or price?

Let's plot those 10 largest dark pool transactions onto our price chart to confirm or deny this suspicion:

In [13]:

price_df = (
    shel_close_df
    .with_columns(
        pl.lit('normal').alias('signal')
    )
)
top_10_darkpool_df = (
    clean_shel_agg_df
    .head(10)
    .with_columns(
        pl.col('first_trade').dt.strftime('%Y-%m-%d').alias('date'),
    )
    .with_columns(
        pl.lit('darkpool').alias('signal')
    )
    .select(['date', 'price', 'signal'])
)

plot_df = pl.concat([price_df, top_10_darkpool_df])
color_mapping = {
    'normal': 'black',
    'darkpool': 'red'
}

p = ggplot(data=plot_df, mapping=aes(x='date', y='price', color='signal')) + \
    geom_line(data=plot_df.filter(pl.col('signal') == 'normal')) + \
    geom_point(data=plot_df.filter(pl.col('signal') == 'darkpool'), size=3) + \
    ggtitle(f'Shell (${ticker}) YTD Close Prices') + \
    scale_color_manual(values=color_mapping) + \
    ggsize(800, 500) + \
    theme(
        plot_title=element_text(hjust=0.5),
        legend_position='right'
    )
p.show()

Sure looks like confirmation. Perhaps on a larger (multi-year) scale these "Top 10" trades are not really that big, but in the context of 2024 it is visually clear that these trades established a price "level" in April that became a turning point.