dark pool levels
import os
import requests
import time
import polars as pl
from pathlib import Path
from datetime import datetime, timedelta
from lets_plot import *
LetsPlot.setup_html()
Before reaching out to the dark pool endpoint(s), let's begin with an interesting ticker. Institutional ownership is always a hot topic, so let's see what the Point72 13F filing gives us. We can access 13F data like this:
https://api.unusualwhales.com/docs#/operations/PublicApi.InstitutionController.holdings
We will sort the results by value
descending to get
a look at the largest positions in the portfolio at the top:
name = 'POINT72 ASSET MANAGEMENT LP'
uw_token = os.environ['UW_TOKEN']
headers = {'Accept': 'application/json, text/plain', 'Authorization': uw_token}
url = f'https://api.unusualwhales.com/api/institution/{name}/holdings'
rsp = requests.get(url, headers=headers)
p72_raw_df = pl.DataFrame(rsp.json()['data'])
p72_df = (
p72_raw_df
.sort('value', descending=True)
)
p72_df.head(10)
avg_price | close | date | first_buy | full_name | historical_units | perc_of_share_value | perc_of_total | price_first_buy | put_call | sector | security_type | shares_outstanding | ticker | units | units_change | value |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
str | str | str | str | str | list[i64] | str | str | str | str | str | str | str | str | i64 | i64 | i64 |
null | "607.81" | "2024-09-30" | "2014-06-30" | "SPDR S&P 500 E… | [42087, 42307, … 12546] | "0" | "0.061377313145… | "195.72" | "put" | null | "Option" | null | "SPY" | 42087 | -220 | 2414783712 |
null | "526.48" | "2024-09-30" | "2021-12-31" | "INVESCO QQQ " | [34476, 1614, … 400] | "0" | "0.042768953218… | "397.85" | "put" | null | "Option" | null | "QQQ" | 34476 | 32862 | 1682670132 |
null | "607.81" | "2024-09-30" | "2022-03-31" | "SPDR S&P 500 E… | [21626, 14638, … 6960] | "0" | "0.031538141803… | "451.64" | "call" | null | "Option" | null | "SPY" | 21626 | 6988 | 1240813376 |
"64.22" | "142.44" | "2024-09-30" | "2023-03-31" | "NVIDIA" | [3689814, 2115018, … 0] | "0.015955424146… | "0.011389268586… | "27.78" | null | "Technology" | "Share" | "24530000000.0" | "NVDA" | 3689814 | 1574796 | 448090978 |
"68.52" | "63.25" | "2024-09-30" | "2023-12-31" | "SHELL" | [5474461, 1859576, … 527400] | "0.012855776691… | "0.009176684510… | "65.80" | null | "Energy" | "Share" | null | "SHEL" | 5474461 | 3614885 | 361040703 |
null | "238.93" | "2024-09-30" | "2021-06-30" | "ISHARES RUSSEL… | [15600, 9330, … 150] | "0" | "0.008758511177… | "229.37" | "put" | null | "Option" | null | "IWM" | 15600 | 6270 | 344588400 |
"205.02" | "108.25" | "2024-09-30" | "2021-03-31" | "ARISTA NETWORK… | [878122, 666299, … 1812261] | "0.012001196759… | "0.008566669992… | "18.86812500000… | null | "Technology" | "Share" | "314939883" | "ANET" | 878122 | 211823 | 337040742 |
"39.21" | "43.15" | "2024-09-30" | "2024-06-30" | "COMCAST" | [7543394, 5740384, … 0] | "0.011219497553… | "0.008008679046… | "39.16" | null | "Communication … | "Share" | "3817095628" | "CMCSA" | 7543394 | 1803010 | 315087558 |
"98.65" | "139.07" | "2024-09-30" | "2024-03-31" | "MACOM TECHNOLO… | [2555357, 933044, … 0] | "0.010123548676… | "0.007226371036… | "95.64" | null | "Technology" | "Share" | "72399645" | "MTSI" | 2555357 | 1622313 | 284309009 |
"55.9" | "90.07" | "2024-09-30" | "2021-09-30" | "BOSTON SCIENTI… | [3381819, 3125022, … 197869] | "0.010091055500… | "0.007203176823… | "43.39" | null | "Healthcare" | "Share" | "1473827485" | "BSX" | 3381819 | 256797 | 283396473 |
Ticker SHEL
jumps out as an Energy name amongst
technology and communication firms, let's single that one out.
There are two dark pool endpoints available from UW: one for recent dark pool trades, and one for ticker-specific dark pool trades. Let's start with the ticker-specific endpoint in this example:
https://api.unusualwhales.com/docs#/operations/PublicApi.DarkpoolController.darkpool_ticker
The first thing we will do is hit the endpoint for some data to
see if anything jumps out using Shell ticker SHEL
:
ticker = 'SHEL'
uw_token = os.environ['UW_TOKEN']
headers = {'Accept': 'application/json, text/plain', 'Authorization': uw_token}
params = {
'ticker': ticker
}
url = f'https://api.unusualwhales.com/api/darkpool/{ticker}'
rsp = requests.get(url, headers=headers, params=params)
Nice! rsp
contains the API response in json format,
so rsp.json()['data']
will give us a list of
dictionaries like this:
{'canceled': False,
'executed_at': '2024-12-06T22:06:46Z',
'ext_hour_sold_codes': 'extended_hours_trade',
'market_center': 'L',
'nbbo_ask': '68.71',
'nbbo_ask_quantity': 100,
'nbbo_bid': '63.21',
'nbbo_bid_quantity': 18,
'premium': '1511485.25',
'price': '63.25',
'sale_cond_codes': 'prio_reference_price',
'size': 23897,
'ticker': 'SHEL',
'tracking_id': 61606319088292,
'trade_code': None,
'trade_settlement': None,
'volume': 5010543},
...
{'canceled': False,
'executed_at': '2024-12-06T14:45:12Z',
'ext_hour_sold_codes': None,
'market_center': 'L',
'nbbo_ask': '63.62',
'nbbo_ask_quantity': 100,
'nbbo_bid': '63.61',
'nbbo_bid_quantity': 1185,
'premium': '292652.00',
'price': '63.62',
'sale_cond_codes': None,
'size': 4600,
'ticker': 'SHEL',
'tracking_id': 35112369931490,
'trade_code': None,
'trade_settlement': None,
'volume': 571115}]
Which is a good start, but let's compile this data into a
polars
DataFrame to make it easier to manage and
analyze:
raw_shel_df = pl.DataFrame(rsp.json()['data'])
raw_shel_df
canceled | executed_at | ext_hour_sold_codes | market_center | nbbo_ask | nbbo_ask_quantity | nbbo_bid | nbbo_bid_quantity | premium | price | sale_cond_codes | size | ticker | tracking_id | trade_code | trade_settlement | volume |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
bool | str | str | str | str | i64 | str | i64 | str | str | str | i64 | str | i64 | str | null | i64 |
false | "2024-12-06T22:… | "extended_hours… | "L" | "68.71" | 100 | "63.21" | 18 | "1511485.25" | "63.25" | "prio_reference… | 23897 | "SHEL" | 61606319088292 | null | null | 5010543 |
false | "2024-12-06T21:… | "extended_hours… | "L" | "68.71" | 100 | "63.27" | 444 | "152432.50" | "63.25" | "prio_reference… | 2410 | "SHEL" | 59074438301536 | null | null | 4986432 |
false | "2024-12-06T21:… | "extended_hours… | "L" | "70" | 20 | "0" | 0 | "443192.75" | "63.25" | "prio_reference… | 7007 | "SHEL" | 57614901371848 | null | null | 4979244 |
false | "2024-12-06T21:… | "extended_hours… | "L" | "70" | 20 | "0" | 0 | "252177.75" | "63.25" | "prio_reference… | 3987 | "SHEL" | 57614897296981 | null | null | 4972237 |
false | "2024-12-06T20:… | null | "L" | "63.20" | 470 | "63.19" | 2074 | "113764.8600" | "63.2027" | "average_price_… | 1800 | "SHEL" | 57369413277600 | "derivative_pri… | null | 4408772 |
… | … | … | … | … | … | … | … | … | … | … | … | … | … | … | … | … |
false | "2024-12-06T15:… | null | "L" | "63.52" | 2350 | "63.51" | 813 | "270212.8038" | "63.5197" | null | 4254 | "SHEL" | 36567328416450 | null | null | 855944 |
false | "2024-12-06T15:… | null | "L" | "63.47" | 670 | "63.46" | 341 | "196727.5500" | "63.4605" | null | 3100 | "SHEL" | 36352129973098 | null | null | 804619 |
false | "2024-12-06T14:… | null | "L" | "63.51" | 365 | "63.50" | 3137 | "546143.000" | "63.505" | null | 8600 | "SHEL" | 35706861790196 | null | null | 700235 |
false | "2024-12-06T14:… | null | "L" | "63.54" | 420 | "63.53" | 301 | "908550.500" | "63.535" | null | 14300 | "SHEL" | 35527676888686 | null | null | 663859 |
false | "2024-12-06T14:… | null | "L" | "63.62" | 100 | "63.61" | 1185 | "292652.00" | "63.62" | null | 4600 | "SHEL" | 35112369931490 | null | null | 571115 |
Let's do some basic data cleanup: then let's add a new column,
executed_at_tz
, which we will set to eastern time
(since the source data is based on UTC):
executed_at
from a string to a datetimenbbo_ask
from a string to a float64nbbo_bid
from a string to a float64price
from a string to a float64-
premium
(which is the dollar amount transacted) from a string to a float64
shel_df = (
raw_shel_df
.with_columns(
pl.col('executed_at').str.strptime(pl.Datetime, '%Y-%m-%dT%H:%M:%S%.fZ'),
pl.col('nbbo_ask').cast(pl.Float64),
pl.col('nbbo_bid').cast(pl.Float64),
pl.col('price').cast(pl.Float64),
pl.col('premium').cast(pl.Float64)
)
.with_columns(
pl.col('executed_at').dt.convert_time_zone('America/New_York').alias('executed_at_tz')
)
)
shel_df
canceled | executed_at | ext_hour_sold_codes | market_center | nbbo_ask | nbbo_ask_quantity | nbbo_bid | nbbo_bid_quantity | premium | price | sale_cond_codes | size | ticker | tracking_id | trade_code | trade_settlement | volume | executed_at_tz |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
bool | datetime[ns] | str | str | f64 | i64 | f64 | i64 | f64 | f64 | str | i64 | str | i64 | str | null | i64 | datetime[ns, America/New_York] |
false | 2024-12-06 22:06:46 | "extended_hours… | "L" | 68.71 | 100 | 63.21 | 18 | 1.5115e6 | 63.25 | "prio_reference… | 23897 | "SHEL" | 61606319088292 | null | null | 5010543 | 2024-12-06 17:06:46 EST |
false | 2024-12-06 21:24:34 | "extended_hours… | "L" | 68.71 | 100 | 63.27 | 444 | 152432.5 | 63.25 | "prio_reference… | 2410 | "SHEL" | 59074438301536 | null | null | 4986432 | 2024-12-06 16:24:34 EST |
false | 2024-12-06 21:00:14 | "extended_hours… | "L" | 70.0 | 20 | 0.0 | 0 | 443192.75 | 63.25 | "prio_reference… | 7007 | "SHEL" | 57614901371848 | null | null | 4979244 | 2024-12-06 16:00:14 EST |
false | 2024-12-06 21:00:14 | "extended_hours… | "L" | 70.0 | 20 | 0.0 | 0 | 252177.75 | 63.25 | "prio_reference… | 3987 | "SHEL" | 57614897296981 | null | null | 4972237 | 2024-12-06 16:00:14 EST |
false | 2024-12-06 20:56:09 | null | "L" | 63.2 | 470 | 63.19 | 2074 | 113764.86 | 63.2027 | "average_price_… | 1800 | "SHEL" | 57369413277600 | "derivative_pri… | null | 4408772 | 2024-12-06 15:56:09 EST |
… | … | … | … | … | … | … | … | … | … | … | … | … | … | … | … | … | … |
false | 2024-12-06 15:09:27 | null | "L" | 63.52 | 2350 | 63.51 | 813 | 270212.8038 | 63.5197 | null | 4254 | "SHEL" | 36567328416450 | null | null | 855944 | 2024-12-06 10:09:27 EST |
false | 2024-12-06 15:05:52 | null | "L" | 63.47 | 670 | 63.46 | 341 | 196727.55 | 63.4605 | null | 3100 | "SHEL" | 36352129973098 | null | null | 804619 | 2024-12-06 10:05:52 EST |
false | 2024-12-06 14:55:06 | null | "L" | 63.51 | 365 | 63.5 | 3137 | 546143.0 | 63.505 | null | 8600 | "SHEL" | 35706861790196 | null | null | 700235 | 2024-12-06 09:55:06 EST |
false | 2024-12-06 14:52:07 | null | "L" | 63.54 | 420 | 63.53 | 301 | 908550.5 | 63.535 | null | 14300 | "SHEL" | 35527676888686 | null | null | 663859 | 2024-12-06 09:52:07 EST |
false | 2024-12-06 14:45:12 | null | "L" | 63.62 | 100 | 63.61 | 1185 | 292652.0 | 63.62 | null | 4600 | "SHEL" | 35112369931490 | null | null | 571115 | 2024-12-06 09:45:12 EST |
OK great, let's get a quick describe()
going here
on the most interesting columns, namely:
size
price
-
premium
(a combination, since it issize
*price
)
shel_df.select(['size', 'price', 'premium']).describe()
statistic | size | price | premium |
---|---|---|---|
str | f64 | f64 | f64 |
"count" | 73.0 | 73.0 | 73.0 |
"null_count" | 0.0 | 0.0 | 0.0 |
"mean" | 8825.315068 | 63.32087 | 559915.466995 |
"std" | 18457.824461 | 0.158873 | 1.1730e6 |
"min" | 1595.0 | 63.025 | 100835.9 |
"25%" | 1760.0 | 63.1957 | 110966.24 |
"50%" | 2988.0 | 63.2802 | 189738.2988 |
"75%" | 5000.0 | 63.4505 | 315125.0 |
"max" | 100000.0 | 63.62 | 6.35637e6 |
Interesting, seeing that 100_000 size as the max makes me wonder if there were lots of transactions at a round 100K share size:
(
shel_df
.filter(
pl.col('size') == 100_000
)
)
canceled | executed_at | ext_hour_sold_codes | market_center | nbbo_ask | nbbo_ask_quantity | nbbo_bid | nbbo_bid_quantity | premium | price | sale_cond_codes | size | ticker | tracking_id | trade_code | trade_settlement | volume | executed_at_tz |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
bool | datetime[ns] | str | str | f64 | i64 | f64 | i64 | f64 | f64 | str | i64 | str | i64 | str | null | i64 | datetime[ns, America/New_York] |
false | 2024-12-06 17:32:51 | null | "L" | 63.26 | 225 | 63.25 | 707 | 6.35637e6 | 63.5637 | "average_price_… | 100000 | "SHEL" | 45171397784258 | "derivative_pri… | null | 2619290 | 2024-12-06 12:32:51 EST |
false | 2024-12-06 17:10:35 | null | "L" | 63.33 | 238 | 63.32 | 500 | 6.35637e6 | 63.5637 | "average_price_… | 100000 | "SHEL" | 43835701796092 | "derivative_pri… | null | 2225542 | 2024-12-06 12:10:35 EST |
Hmm, so these transactions have Sold Condition Code of
average_price_trade
(which means the transactions
may or may not have actually occurred at this price, since it
reflects an average price over a period of time) and Trade Code
of derivative_priced
(which casts further doubt
since it clearly states the material terms of the transaction
were not determinable)... Let's cut these out of our dataframe,
since we cannot say for sure that a trade actually occurred at
those prices.
(Note: in polars I specifically need to allow
null
values to stick around when filtering this
way, which is why you're seeing that
is_null()
check below.)
clean_shel_df = (
shel_df
.filter(
(pl.col('sale_cond_codes').is_null()) |
(pl.col('sale_cond_codes') != 'average_price_trade')
)
)
clean_shel_df
canceled | executed_at | ext_hour_sold_codes | market_center | nbbo_ask | nbbo_ask_quantity | nbbo_bid | nbbo_bid_quantity | premium | price | sale_cond_codes | size | ticker | tracking_id | trade_code | trade_settlement | volume | executed_at_tz |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
bool | datetime[ns] | str | str | f64 | i64 | f64 | i64 | f64 | f64 | str | i64 | str | i64 | str | null | i64 | datetime[ns, America/New_York] |
false | 2024-12-06 22:06:46 | "extended_hours… | "L" | 68.71 | 100 | 63.21 | 18 | 1.5115e6 | 63.25 | "prio_reference… | 23897 | "SHEL" | 61606319088292 | null | null | 5010543 | 2024-12-06 17:06:46 EST |
false | 2024-12-06 21:24:34 | "extended_hours… | "L" | 68.71 | 100 | 63.27 | 444 | 152432.5 | 63.25 | "prio_reference… | 2410 | "SHEL" | 59074438301536 | null | null | 4986432 | 2024-12-06 16:24:34 EST |
false | 2024-12-06 21:00:14 | "extended_hours… | "L" | 70.0 | 20 | 0.0 | 0 | 443192.75 | 63.25 | "prio_reference… | 7007 | "SHEL" | 57614901371848 | null | null | 4979244 | 2024-12-06 16:00:14 EST |
false | 2024-12-06 21:00:14 | "extended_hours… | "L" | 70.0 | 20 | 0.0 | 0 | 252177.75 | 63.25 | "prio_reference… | 3987 | "SHEL" | 57614897296981 | null | null | 4972237 | 2024-12-06 16:00:14 EST |
false | 2024-12-06 20:42:25 | null | "L" | 63.22 | 1300 | 63.21 | 2100 | 126425.4 | 63.2127 | null | 2000 | "SHEL" | 56545446769025 | null | null | 4100074 | 2024-12-06 15:42:25 EST |
… | … | … | … | … | … | … | … | … | … | … | … | … | … | … | … | … | … |
false | 2024-12-06 15:09:27 | null | "L" | 63.52 | 2350 | 63.51 | 813 | 270212.8038 | 63.5197 | null | 4254 | "SHEL" | 36567328416450 | null | null | 855944 | 2024-12-06 10:09:27 EST |
false | 2024-12-06 15:05:52 | null | "L" | 63.47 | 670 | 63.46 | 341 | 196727.55 | 63.4605 | null | 3100 | "SHEL" | 36352129973098 | null | null | 804619 | 2024-12-06 10:05:52 EST |
false | 2024-12-06 14:55:06 | null | "L" | 63.51 | 365 | 63.5 | 3137 | 546143.0 | 63.505 | null | 8600 | "SHEL" | 35706861790196 | null | null | 700235 | 2024-12-06 09:55:06 EST |
false | 2024-12-06 14:52:07 | null | "L" | 63.54 | 420 | 63.53 | 301 | 908550.5 | 63.535 | null | 14300 | "SHEL" | 35527676888686 | null | null | 663859 | 2024-12-06 09:52:07 EST |
false | 2024-12-06 14:45:12 | null | "L" | 63.62 | 100 | 63.61 | 1185 | 292652.0 | 63.62 | null | 4600 | "SHEL" | 35112369931490 | null | null | 571115 | 2024-12-06 09:45:12 EST |
Now that our dataframe has been scrubbed, let's repeat these steps and collect dark pool data for lots of trading days, not just today.
Note: the sale_cond_codes
, trade_code
,
and trade_settlement
columns will intermittently
have string values mixed with nulls and other times will be
entirely nulls, which can cause combination issues.
We will preemptively cast those columns to strings in order
to avoid this hassle.
biz_dates = []
current_dt = datetime(2024, 1, 1)
while current_dt <= datetime.today():
if current_dt.weekday() < 5: # Mon = 0, Sun = 6
biz_dates.append(current_dt.date())
current_dt += timedelta(days=1)
biz_date_strs = [d.strftime('%Y-%m-%d') for d in biz_dates]
raw_dfs_by_date = {}
ticker = 'SHEL'
uw_token = os.environ['UW_TOKEN']
headers = {'Accept': 'application/json, text/plain', 'Authorization': uw_token}
url = f'https://api.unusualwhales.com/api/darkpool/{ticker}'
for dt in biz_date_strs:
params = {
'ticker': ticker,
'date': dt
}
rsp = requests.get(url, headers=headers, params=params)
df = pl.DataFrame(rsp.json()['data'])
if df.height > 0:
raw_df = (
df
.with_columns(
pl.col('sale_cond_codes').cast(pl.String).fill_null(''),
pl.col('trade_code').cast(pl.String).fill_null(''),
pl.col('trade_settlement').cast(pl.String).fill_null('')
)
)
raw_dfs_by_date[dt] = raw_df
time.sleep(1) # standard API access capped at 120 requests per minute
Awesome, now we have a dictionary with dates (strings) as keys and polars DataFrames as values, let's repeat the data scrubbing steps:
raw_shel_dfs = list(raw_dfs_by_date.values())
raw_shel_df = pl.concat(raw_shel_dfs)
shel_df = (
raw_shel_df
.with_columns(
pl.col('executed_at').str.strptime(pl.Datetime, '%Y-%m-%dT%H:%M:%S%.fZ'),
pl.col('nbbo_ask').cast(pl.Float64),
pl.col('nbbo_bid').cast(pl.Float64),
pl.col('price').cast(pl.Float64),
pl.col('premium').cast(pl.Float64)
)
.with_columns(
pl.col('executed_at').dt.convert_time_zone('America/New_York').alias('executed_at_tz')
)
)
clean_shel_df = (
shel_df
.filter(
pl.col('sale_cond_codes') != 'average_price_trade'
)
)
clean_shel_df
canceled | executed_at | ext_hour_sold_codes | market_center | nbbo_ask | nbbo_ask_quantity | nbbo_bid | nbbo_bid_quantity | premium | price | sale_cond_codes | size | ticker | tracking_id | trade_code | trade_settlement | volume | executed_at_tz |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
bool | datetime[ns] | str | str | f64 | i64 | f64 | i64 | f64 | f64 | str | i64 | str | i64 | str | str | i64 | datetime[ns, America/New_York] |
false | 2024-01-02 21:00:15 | "extended_hours… | "L" | 66.5 | 1 | 65.77 | 4 | 384914.4 | 65.91 | "prio_reference… | 5840 | "SHEL" | 57615752945643 | "" | "regular_settle… | 4932055 | 2024-01-02 16:00:15 EST |
false | 2024-01-02 21:00:15 | "extended_hours… | "L" | 66.5 | 1 | 65.77 | 4 | 112047.0 | 65.91 | "prio_reference… | 1700 | "SHEL" | 57615751123282 | "" | "regular_settle… | 4926215 | 2024-01-02 16:00:15 EST |
false | 2024-01-02 21:00:15 | "extended_hours… | "L" | 66.5 | 1 | 65.77 | 4 | 107631.03 | 65.91 | "prio_reference… | 1633 | "SHEL" | 57615266129779 | "" | "regular_settle… | 4924387 | 2024-01-02 16:00:15 EST |
false | 2024-01-02 20:59:01 | null | "L" | 65.9 | 1946 | 65.89 | 2700 | 164725.25 | 65.8901 | "" | 2500 | "SHEL" | 57541156161904 | "" | "regular_settle… | 4795442 | 2024-01-02 15:59:01 EST |
false | 2024-01-02 20:57:58 | null | "L" | 65.88 | 2311 | 65.87 | 1205 | 243719.0 | 65.87 | "" | 3700 | "SHEL" | 57478512916606 | "" | "regular_settle… | 4750663 | 2024-01-02 15:57:58 EST |
… | … | … | … | … | … | … | … | … | … | … | … | … | … | … | … | … | … |
false | 2024-12-06 15:09:27 | null | "L" | 63.52 | 2350 | 63.51 | 813 | 270212.8038 | 63.5197 | "" | 4254 | "SHEL" | 36567328416450 | "" | "" | 855944 | 2024-12-06 10:09:27 EST |
false | 2024-12-06 15:05:52 | null | "L" | 63.47 | 670 | 63.46 | 341 | 196727.55 | 63.4605 | "" | 3100 | "SHEL" | 36352129973098 | "" | "" | 804619 | 2024-12-06 10:05:52 EST |
false | 2024-12-06 14:55:06 | null | "L" | 63.51 | 365 | 63.5 | 3137 | 546143.0 | 63.505 | "" | 8600 | "SHEL" | 35706861790196 | "" | "" | 700235 | 2024-12-06 09:55:06 EST |
false | 2024-12-06 14:52:07 | null | "L" | 63.54 | 420 | 63.53 | 301 | 908550.5 | 63.535 | "" | 14300 | "SHEL" | 35527676888686 | "" | "" | 663859 | 2024-12-06 09:52:07 EST |
false | 2024-12-06 14:45:12 | null | "L" | 63.62 | 100 | 63.61 | 1185 | 292652.0 | 63.62 | "" | 4600 | "SHEL" | 35112369931490 | "" | "" | 571115 | 2024-12-06 09:45:12 EST |
Finally, let's aggregate this "2024 dark pool" polars DataFrame by price to identify the levels at which the most money changed hands (as these might be interesting inflection points):
clean_shel_agg_df = (
clean_shel_df
.group_by('price')
.agg(
[
pl.col('premium').sum().alias('total_premium'),
pl.len().alias('count_of_trades'),
pl.col('executed_at_tz').min().alias('first_trade')
]
)
.sort('total_premium', descending=True)
)
clean_shel_agg_df.head(10)
price | total_premium | count_of_trades | first_trade |
---|---|---|---|
f64 | f64 | u32 | datetime[ns, America/New_York] |
73.27 | 1.4078e8 | 7 | 2024-04-25 12:50:42 EDT |
72.18 | 1.1261e8 | 9 | 2024-04-22 11:44:05 EDT |
69.72 | 1.1244e8 | 4 | 2024-08-06 16:00:11 EDT |
63.22 | 1.08766849e8 | 10 | 2024-01-11 15:57:47 EST |
73.55 | 1.0597e8 | 7 | 2024-05-09 16:00:14 EDT |
73.13 | 1.0432e8 | 11 | 2024-04-12 12:47:30 EDT |
71.66 | 8.8137e7 | 18 | 2024-04-08 12:19:09 EDT |
71.94 | 8.7975e7 | 19 | 2024-04-08 15:12:48 EDT |
70.95 | 8.7561e7 | 10 | 2024-04-16 10:44:34 EDT |
72.37 | 8.7533e7 | 13 | 2024-04-23 10:12:56 EDT |
Wow! I don't know the stats offhand but suffice it to say I
would not have expected to see 7 of the 10 biggest dark pool
trades in SHEL
all happen in the month of April!
If I were going to do some more research into
SHEL
I would definitely start in that time
period...
Let's take a quick look.
A little "inside baseball" regarding the UW API -- when I need quick closing prices for a security I actually reach for the Realized Volatility endpoint instead of the OHLC endpoint, since the response gives me the daily closing prices ONLY as well as the 30 calendar day implied volatility and 20 trading day realized volatility, two data points I find myself using almsot all the time.
https://api.unusualwhales.com/docs#/operations/PublicApi.TickerController.realized_volatility
ticker = 'SHEL'
uw_token = os.environ['UW_TOKEN']
headers = {'Accept': 'application/json, text/plain', 'Authorization': uw_token}
params = {
'timeframe': 'YTD'
}
url = f'https://api.unusualwhales.com/api/stock/{ticker}/volatility/realized'
rsp = requests.get(url, headers=headers, params=params)
raw_shel_close_df = pl.DataFrame(rsp.json()['data'])
shel_close_df = (
raw_shel_close_df
.with_columns(
pl.col('price').cast(pl.Float64)
)
.select(['date', 'price'])
)
p = ggplot(shel_close_df, aes(x='date', y='price')) + \
geom_line() + \
ggsize(800, 500) + \
ggtitle(f'Shell (${ticker}) YTD Close Prices') + \
theme(
plot_title=element_text(hjust=0.5)
)
p.show()
Very interesting to see that SHEL
had such a steep
rally into mid-April before market participants changed their
stance, and even though modestly higher highs were observed in
May it is clear to see that the steep price appreciation
observed from March to mid-April ran out of gas there and since
then has come all the way back to price levels last observed in
March...
Perhaps clusters of large dark pool transactions can signal
inflection points in momentum or price?
Let's plot those 10 largest dark pool transactions onto our price chart to confirm or deny this suspicion:
price_df = (
shel_close_df
.with_columns(
pl.lit('normal').alias('signal')
)
)
top_10_darkpool_df = (
clean_shel_agg_df
.head(10)
.with_columns(
pl.col('first_trade').dt.strftime('%Y-%m-%d').alias('date'),
)
.with_columns(
pl.lit('darkpool').alias('signal')
)
.select(['date', 'price', 'signal'])
)
plot_df = pl.concat([price_df, top_10_darkpool_df])
color_mapping = {
'normal': 'black',
'darkpool': 'red'
}
p = ggplot(data=plot_df, mapping=aes(x='date', y='price', color='signal')) + \
geom_line(data=plot_df.filter(pl.col('signal') == 'normal')) + \
geom_point(data=plot_df.filter(pl.col('signal') == 'darkpool'), size=3) + \
ggtitle(f'Shell (${ticker}) YTD Close Prices') + \
scale_color_manual(values=color_mapping) + \
ggsize(800, 500) + \
theme(
plot_title=element_text(hjust=0.5),
legend_position='right'
)
p.show()
Sure looks like confirmation. Perhaps on a larger (multi-year) scale these "Top 10" trades are not really that big, but in the context of 2024 it is visually clear that these trades established a price "level" in April that became a turning point.