Medication Analysis Tutorial¶

This tutorial covers medication lookup, treatment identification, episode detection, and adherence calculation.

Overview¶

The medications module helps you:

Look up NDC codes and J-codes by generic name
Get treatment claims for a cohort
Identify treatment episodes and switches
Calculate adherence (PDC)
Classify payer types

from alx_heor.medications import (
    lookup_medications,
    get_treatment_claims,
    assign_medication_category,
    identify_treatment_episodes,
    calculate_pdc,
    classify_payer_type,
)

Step 1: Medication Lookup¶

Look up NDC codes and J-codes by generic name:

from alx_heor.database import RedshiftConnection
from alx_heor.medications import lookup_medications

conn = RedshiftConnection().connect()

# Look up C5 inhibitors
result = lookup_medications(
    conn,
    schema="iqvia_pharmetrics_2024q3",
    generic_names=["eculizumab", "ravulizumab"],
)

print(f"NDC codes: {len(result.ndc_codes)}")
print(f"J-codes: {result.procedure_codes}")

Output:

NDC codes: 12
J-codes: ['J1300', 'J1303', 'J9332', 'J9334']

Access Lookup Results¶

# NDC codes for pharmacy claims
ndc_list = result.ndc_codes

# J-codes for medical claims
jcode_list = result.procedure_codes

# Full lookup table
df_lookup = result.df_lookup
# Columns: ndc, generic_name, brand_name, strength, route

Step 2: Get Treatment Claims¶

from alx_heor.medications import get_treatment_claims

df_rx = get_treatment_claims(
    conn,
    source="iqvia",
    schema="iqvia_pharmetrics_2024q3",
    patient_ids=df_cohort["pat_id"].tolist(),
    generic_names=["eculizumab", "ravulizumab", "efgartigimod"],
    start_year=2015,
    end_year=2024,
)

Output columns:

Column	Description
`pat_id`	Patient identifier
`from_dt`	Service date
`ndc`	NDC code (if pharmacy claim)
`proc1`	Procedure code (if J-code claim)
`generic_name`	Matched generic name
`days_supply`	Days supply (pharmacy claims)

Step 3: Categorize Medications¶

Group medications into therapeutic classes:

from alx_heor.medications import assign_medication_category

# Define categories
categories = {
    "C5_INHIBITOR": ["eculizumab", "ravulizumab"],
    "FCRN_INHIBITOR": ["efgartigimod", "rozanolixizumab"],
    "CHOLINESTERASE": ["pyridostigmine", "neostigmine"],
    "IMMUNOSUPPRESSANT": ["azathioprine", "mycophenolate"],
}

df_rx = assign_medication_category(
    df_rx,
    category_mapping=categories,
    drug_col="generic_name",
    output_col="drug_class",
)

# Check distribution
print(df_rx["drug_class"].value_counts())

Output:

C5_INHIBITOR       15,234
FCRN_INHIBITOR      8,921
CHOLINESTERASE     45,123
IMMUNOSUPPRESSANT  12,456

Step 4: Identify Treatment Episodes¶

Episodes are periods of continuous treatment, separated by gaps:

from alx_heor.medications import identify_treatment_episodes

df_episodes = identify_treatment_episodes(
    df_rx,
    patient_id_col="pat_id",
    date_col="from_dt",
    drug_col="drug_class",
    gap_days=60,  # New episode if gap > 60 days
)

Output:

pat_id	drug_class	episode_num	episode_start	episode_end	claim_count
123456	C5_INHIBITOR	1	2020-03-15	2022-08-10	45
123456	C5_INHIBITOR	2	2023-01-05	2024-02-28	22

Episode-Level Analysis¶

# Time on treatment
df_episodes["duration_days"] = (
    df_episodes["episode_end"] - df_episodes["episode_start"]
).dt.days

# First episode per patient
df_first_episode = df_episodes[df_episodes["episode_num"] == 1]

# Patients with multiple episodes (restarters)
restarters = df_episodes.groupby("pat_id")["episode_num"].max()
df_restarters = restarters[restarters > 1]
print(f"Patients with multiple episodes: {len(df_restarters)}")

Step 5: Calculate Adherence (PDC)¶

Proportion of Days Covered (PDC) measures adherence:

from alx_heor.medications import calculate_pdc

df_pdc = calculate_pdc(
    df_rx,
    patient_id_col="pat_id",
    date_col="from_dt",
    days_supply_col="days_supply",
    observation_start=df_cohort.set_index("pat_id")["index_date"],
    observation_end="2024-03-31",
)

Output:

pat_id	total_days_covered	observation_days	pdc
123456	342	365	0.94
234567	180	365	0.49

Adherence Categories¶

# Categorize adherence
df_pdc["adherence_category"] = pd.cut(
    df_pdc["pdc"],
    bins=[0, 0.5, 0.8, 1.0],
    labels=["Low (<50%)", "Medium (50-80%)", "High (≥80%)"],
)

print(df_pdc["adherence_category"].value_counts())

Step 6: Treatment Patterns¶

First-Line Treatment¶

# Get first treatment per patient
df_first_tx = (
    df_rx
    .sort_values(["pat_id", "from_dt"])
    .groupby("pat_id")
    .first()
    .reset_index()
)

# Distribution of first-line treatments
print(df_first_tx["drug_class"].value_counts(normalize=True))

Treatment Sequences¶

# Get treatment sequence per patient
df_sequence = (
    df_rx
    .sort_values(["pat_id", "from_dt"])
    .groupby("pat_id")["drug_class"]
    .apply(lambda x: " → ".join(x.unique()))
    .reset_index(name="treatment_sequence")
)

# Common sequences
print(df_sequence["treatment_sequence"].value_counts().head(10))

Output:

C5_INHIBITOR                          2,345
CHOLINESTERASE → C5_INHIBITOR         1,234
C5_INHIBITOR → FCRN_INHIBITOR           567
IMMUNOSUPPRESSANT → C5_INHIBITOR        456

Step 7: Payer Classification¶

from alx_heor.medications import classify_payer_type

df_payer = classify_payer_type(
    conn,
    source="iqvia",
    schema="iqvia_pharmetrics_2024q3",
    patient_ids=df_cohort["pat_id"].tolist(),
)

Output:

pat_id	pay_type	payer_type
123456	1	COMMERCIAL
234567	2	MEDICARE
345678	3	MEDICAID

# Distribution
print(df_payer["payer_type"].value_counts(normalize=True))

Complete Example¶

from alx_heor.database import RedshiftConnection
from alx_heor.cohort import get_cohort, CohortCriteria, DiagnosisCriteria
from alx_heor.medications import (
    get_treatment_claims,
    assign_medication_category,
    identify_treatment_episodes,
    calculate_pdc,
)

# Connect and build cohort
conn = RedshiftConnection().connect()
result = get_cohort(
    conn, source="iqvia", schema="iqvia_pharmetrics_2024q3",
    criteria=CohortCriteria(
        primary_diagnosis=DiagnosisCriteria(codes=["G700", "G7000", "G7001"]),
        min_age=18,
    ),
    start_year=2015, end_year=2024,
)

# Get treatment claims
df_rx = get_treatment_claims(
    conn, source="iqvia", schema="iqvia_pharmetrics_2024q3",
    patient_ids=result.df_cohort["pat_id"].tolist(),
    generic_names=["eculizumab", "ravulizumab", "efgartigimod", "pyridostigmine"],
    start_year=2015, end_year=2024,
)

# Categorize
categories = {
    "BIOLOGIC": ["eculizumab", "ravulizumab", "efgartigimod"],
    "SYMPTOMATIC": ["pyridostigmine"],
}
df_rx = assign_medication_category(df_rx, categories)

# Episodes
df_episodes = identify_treatment_episodes(df_rx, gap_days=60)

# Summary
print(f"Cohort: {len(result.df_cohort)} patients")
print(f"Treatment claims: {len(df_rx)}")
print(f"Treatment episodes: {len(df_episodes)}")
print(f"\nTreatment distribution:")
print(df_rx["drug_class"].value_counts())

Next Steps¶

Enrollment Analysis Tutorial - Censoring for survival
API Reference: Medications - Complete documentation