Medication Analysis Tutorial¶
This tutorial covers medication lookup, treatment identification, episode detection, and adherence calculation.
Overview¶
The medications module helps you:
- Look up NDC codes and J-codes by generic name
- Get treatment claims for a cohort
- Identify treatment episodes and switches
- Calculate adherence (PDC)
- Classify payer types
from alx_heor.medications import (
lookup_medications,
get_treatment_claims,
assign_medication_category,
identify_treatment_episodes,
calculate_pdc,
classify_payer_type,
)
Step 1: Medication Lookup¶
Look up NDC codes and J-codes by generic name:
from alx_heor.database import RedshiftConnection
from alx_heor.medications import lookup_medications
conn = RedshiftConnection().connect()
# Look up C5 inhibitors
result = lookup_medications(
conn,
schema="iqvia_pharmetrics_2024q3",
generic_names=["eculizumab", "ravulizumab"],
)
print(f"NDC codes: {len(result.ndc_codes)}")
print(f"J-codes: {result.procedure_codes}")
Output:
Access Lookup Results¶
# NDC codes for pharmacy claims
ndc_list = result.ndc_codes
# J-codes for medical claims
jcode_list = result.procedure_codes
# Full lookup table
df_lookup = result.df_lookup
# Columns: ndc, generic_name, brand_name, strength, route
Step 2: Get Treatment Claims¶
from alx_heor.medications import get_treatment_claims
df_rx = get_treatment_claims(
conn,
source="iqvia",
schema="iqvia_pharmetrics_2024q3",
patient_ids=df_cohort["pat_id"].tolist(),
generic_names=["eculizumab", "ravulizumab", "efgartigimod"],
start_year=2015,
end_year=2024,
)
Output columns:
| Column | Description |
|---|---|
pat_id |
Patient identifier |
from_dt |
Service date |
ndc |
NDC code (if pharmacy claim) |
proc1 |
Procedure code (if J-code claim) |
generic_name |
Matched generic name |
days_supply |
Days supply (pharmacy claims) |
Step 3: Categorize Medications¶
Group medications into therapeutic classes:
from alx_heor.medications import assign_medication_category
# Define categories
categories = {
"C5_INHIBITOR": ["eculizumab", "ravulizumab"],
"FCRN_INHIBITOR": ["efgartigimod", "rozanolixizumab"],
"CHOLINESTERASE": ["pyridostigmine", "neostigmine"],
"IMMUNOSUPPRESSANT": ["azathioprine", "mycophenolate"],
}
df_rx = assign_medication_category(
df_rx,
category_mapping=categories,
drug_col="generic_name",
output_col="drug_class",
)
# Check distribution
print(df_rx["drug_class"].value_counts())
Output:
Step 4: Identify Treatment Episodes¶
Episodes are periods of continuous treatment, separated by gaps:
from alx_heor.medications import identify_treatment_episodes
df_episodes = identify_treatment_episodes(
df_rx,
patient_id_col="pat_id",
date_col="from_dt",
drug_col="drug_class",
gap_days=60, # New episode if gap > 60 days
)
Output:
| pat_id | drug_class | episode_num | episode_start | episode_end | claim_count |
|---|---|---|---|---|---|
| 123456 | C5_INHIBITOR | 1 | 2020-03-15 | 2022-08-10 | 45 |
| 123456 | C5_INHIBITOR | 2 | 2023-01-05 | 2024-02-28 | 22 |
Episode-Level Analysis¶
# Time on treatment
df_episodes["duration_days"] = (
df_episodes["episode_end"] - df_episodes["episode_start"]
).dt.days
# First episode per patient
df_first_episode = df_episodes[df_episodes["episode_num"] == 1]
# Patients with multiple episodes (restarters)
restarters = df_episodes.groupby("pat_id")["episode_num"].max()
df_restarters = restarters[restarters > 1]
print(f"Patients with multiple episodes: {len(df_restarters)}")
Step 5: Calculate Adherence (PDC)¶
Proportion of Days Covered (PDC) measures adherence:
from alx_heor.medications import calculate_pdc
df_pdc = calculate_pdc(
df_rx,
patient_id_col="pat_id",
date_col="from_dt",
days_supply_col="days_supply",
observation_start=df_cohort.set_index("pat_id")["index_date"],
observation_end="2024-03-31",
)
Output:
| pat_id | total_days_covered | observation_days | pdc |
|---|---|---|---|
| 123456 | 342 | 365 | 0.94 |
| 234567 | 180 | 365 | 0.49 |
Adherence Categories¶
# Categorize adherence
df_pdc["adherence_category"] = pd.cut(
df_pdc["pdc"],
bins=[0, 0.5, 0.8, 1.0],
labels=["Low (<50%)", "Medium (50-80%)", "High (≥80%)"],
)
print(df_pdc["adherence_category"].value_counts())
Step 6: Treatment Patterns¶
First-Line Treatment¶
# Get first treatment per patient
df_first_tx = (
df_rx
.sort_values(["pat_id", "from_dt"])
.groupby("pat_id")
.first()
.reset_index()
)
# Distribution of first-line treatments
print(df_first_tx["drug_class"].value_counts(normalize=True))
Treatment Sequences¶
# Get treatment sequence per patient
df_sequence = (
df_rx
.sort_values(["pat_id", "from_dt"])
.groupby("pat_id")["drug_class"]
.apply(lambda x: " → ".join(x.unique()))
.reset_index(name="treatment_sequence")
)
# Common sequences
print(df_sequence["treatment_sequence"].value_counts().head(10))
Output:
C5_INHIBITOR 2,345
CHOLINESTERASE → C5_INHIBITOR 1,234
C5_INHIBITOR → FCRN_INHIBITOR 567
IMMUNOSUPPRESSANT → C5_INHIBITOR 456
Step 7: Payer Classification¶
from alx_heor.medications import classify_payer_type
df_payer = classify_payer_type(
conn,
source="iqvia",
schema="iqvia_pharmetrics_2024q3",
patient_ids=df_cohort["pat_id"].tolist(),
)
Output:
| pat_id | pay_type | payer_type |
|---|---|---|
| 123456 | 1 | COMMERCIAL |
| 234567 | 2 | MEDICARE |
| 345678 | 3 | MEDICAID |
Complete Example¶
from alx_heor.database import RedshiftConnection
from alx_heor.cohort import get_cohort, CohortCriteria, DiagnosisCriteria
from alx_heor.medications import (
get_treatment_claims,
assign_medication_category,
identify_treatment_episodes,
calculate_pdc,
)
# Connect and build cohort
conn = RedshiftConnection().connect()
result = get_cohort(
conn, source="iqvia", schema="iqvia_pharmetrics_2024q3",
criteria=CohortCriteria(
primary_diagnosis=DiagnosisCriteria(codes=["G700", "G7000", "G7001"]),
min_age=18,
),
start_year=2015, end_year=2024,
)
# Get treatment claims
df_rx = get_treatment_claims(
conn, source="iqvia", schema="iqvia_pharmetrics_2024q3",
patient_ids=result.df_cohort["pat_id"].tolist(),
generic_names=["eculizumab", "ravulizumab", "efgartigimod", "pyridostigmine"],
start_year=2015, end_year=2024,
)
# Categorize
categories = {
"BIOLOGIC": ["eculizumab", "ravulizumab", "efgartigimod"],
"SYMPTOMATIC": ["pyridostigmine"],
}
df_rx = assign_medication_category(df_rx, categories)
# Episodes
df_episodes = identify_treatment_episodes(df_rx, gap_days=60)
# Summary
print(f"Cohort: {len(result.df_cohort)} patients")
print(f"Treatment claims: {len(df_rx)}")
print(f"Treatment episodes: {len(df_episodes)}")
print(f"\nTreatment distribution:")
print(df_rx["drug_class"].value_counts())
Next Steps¶
- Enrollment Analysis Tutorial - Censoring for survival
- API Reference: Medications - Complete documentation