Skip to content

Medication Analysis Tutorial

This tutorial covers medication lookup, treatment identification, episode detection, and adherence calculation.

Overview

The medications module helps you:

  1. Look up NDC codes and J-codes by generic name
  2. Get treatment claims for a cohort
  3. Identify treatment episodes and switches
  4. Calculate adherence (PDC)
  5. Classify payer types
from alx_heor.medications import (
    lookup_medications,
    get_treatment_claims,
    assign_medication_category,
    identify_treatment_episodes,
    calculate_pdc,
    classify_payer_type,
)

Step 1: Medication Lookup

Look up NDC codes and J-codes by generic name:

from alx_heor.database import RedshiftConnection
from alx_heor.medications import lookup_medications

conn = RedshiftConnection().connect()

# Look up C5 inhibitors
result = lookup_medications(
    conn,
    schema="iqvia_pharmetrics_2024q3",
    generic_names=["eculizumab", "ravulizumab"],
)

print(f"NDC codes: {len(result.ndc_codes)}")
print(f"J-codes: {result.procedure_codes}")

Output:

NDC codes: 12
J-codes: ['J1300', 'J1303', 'J9332', 'J9334']

Access Lookup Results

# NDC codes for pharmacy claims
ndc_list = result.ndc_codes

# J-codes for medical claims
jcode_list = result.procedure_codes

# Full lookup table
df_lookup = result.df_lookup
# Columns: ndc, generic_name, brand_name, strength, route

Step 2: Get Treatment Claims

from alx_heor.medications import get_treatment_claims

df_rx = get_treatment_claims(
    conn,
    source="iqvia",
    schema="iqvia_pharmetrics_2024q3",
    patient_ids=df_cohort["pat_id"].tolist(),
    generic_names=["eculizumab", "ravulizumab", "efgartigimod"],
    start_year=2015,
    end_year=2024,
)

Output columns:

Column Description
pat_id Patient identifier
from_dt Service date
ndc NDC code (if pharmacy claim)
proc1 Procedure code (if J-code claim)
generic_name Matched generic name
days_supply Days supply (pharmacy claims)

Step 3: Categorize Medications

Group medications into therapeutic classes:

from alx_heor.medications import assign_medication_category

# Define categories
categories = {
    "C5_INHIBITOR": ["eculizumab", "ravulizumab"],
    "FCRN_INHIBITOR": ["efgartigimod", "rozanolixizumab"],
    "CHOLINESTERASE": ["pyridostigmine", "neostigmine"],
    "IMMUNOSUPPRESSANT": ["azathioprine", "mycophenolate"],
}

df_rx = assign_medication_category(
    df_rx,
    category_mapping=categories,
    drug_col="generic_name",
    output_col="drug_class",
)

# Check distribution
print(df_rx["drug_class"].value_counts())

Output:

C5_INHIBITOR       15,234
FCRN_INHIBITOR      8,921
CHOLINESTERASE     45,123
IMMUNOSUPPRESSANT  12,456

Step 4: Identify Treatment Episodes

Episodes are periods of continuous treatment, separated by gaps:

from alx_heor.medications import identify_treatment_episodes

df_episodes = identify_treatment_episodes(
    df_rx,
    patient_id_col="pat_id",
    date_col="from_dt",
    drug_col="drug_class",
    gap_days=60,  # New episode if gap > 60 days
)

Output:

pat_id drug_class episode_num episode_start episode_end claim_count
123456 C5_INHIBITOR 1 2020-03-15 2022-08-10 45
123456 C5_INHIBITOR 2 2023-01-05 2024-02-28 22

Episode-Level Analysis

# Time on treatment
df_episodes["duration_days"] = (
    df_episodes["episode_end"] - df_episodes["episode_start"]
).dt.days

# First episode per patient
df_first_episode = df_episodes[df_episodes["episode_num"] == 1]

# Patients with multiple episodes (restarters)
restarters = df_episodes.groupby("pat_id")["episode_num"].max()
df_restarters = restarters[restarters > 1]
print(f"Patients with multiple episodes: {len(df_restarters)}")

Step 5: Calculate Adherence (PDC)

Proportion of Days Covered (PDC) measures adherence:

from alx_heor.medications import calculate_pdc

df_pdc = calculate_pdc(
    df_rx,
    patient_id_col="pat_id",
    date_col="from_dt",
    days_supply_col="days_supply",
    observation_start=df_cohort.set_index("pat_id")["index_date"],
    observation_end="2024-03-31",
)

Output:

pat_id total_days_covered observation_days pdc
123456 342 365 0.94
234567 180 365 0.49

Adherence Categories

# Categorize adherence
df_pdc["adherence_category"] = pd.cut(
    df_pdc["pdc"],
    bins=[0, 0.5, 0.8, 1.0],
    labels=["Low (<50%)", "Medium (50-80%)", "High (≥80%)"],
)

print(df_pdc["adherence_category"].value_counts())

Step 6: Treatment Patterns

First-Line Treatment

# Get first treatment per patient
df_first_tx = (
    df_rx
    .sort_values(["pat_id", "from_dt"])
    .groupby("pat_id")
    .first()
    .reset_index()
)

# Distribution of first-line treatments
print(df_first_tx["drug_class"].value_counts(normalize=True))

Treatment Sequences

# Get treatment sequence per patient
df_sequence = (
    df_rx
    .sort_values(["pat_id", "from_dt"])
    .groupby("pat_id")["drug_class"]
    .apply(lambda x: " → ".join(x.unique()))
    .reset_index(name="treatment_sequence")
)

# Common sequences
print(df_sequence["treatment_sequence"].value_counts().head(10))

Output:

C5_INHIBITOR                          2,345
CHOLINESTERASE → C5_INHIBITOR         1,234
C5_INHIBITOR → FCRN_INHIBITOR           567
IMMUNOSUPPRESSANT → C5_INHIBITOR        456

Step 7: Payer Classification

from alx_heor.medications import classify_payer_type

df_payer = classify_payer_type(
    conn,
    source="iqvia",
    schema="iqvia_pharmetrics_2024q3",
    patient_ids=df_cohort["pat_id"].tolist(),
)

Output:

pat_id pay_type payer_type
123456 1 COMMERCIAL
234567 2 MEDICARE
345678 3 MEDICAID
# Distribution
print(df_payer["payer_type"].value_counts(normalize=True))

Complete Example

from alx_heor.database import RedshiftConnection
from alx_heor.cohort import get_cohort, CohortCriteria, DiagnosisCriteria
from alx_heor.medications import (
    get_treatment_claims,
    assign_medication_category,
    identify_treatment_episodes,
    calculate_pdc,
)

# Connect and build cohort
conn = RedshiftConnection().connect()
result = get_cohort(
    conn, source="iqvia", schema="iqvia_pharmetrics_2024q3",
    criteria=CohortCriteria(
        primary_diagnosis=DiagnosisCriteria(codes=["G700", "G7000", "G7001"]),
        min_age=18,
    ),
    start_year=2015, end_year=2024,
)

# Get treatment claims
df_rx = get_treatment_claims(
    conn, source="iqvia", schema="iqvia_pharmetrics_2024q3",
    patient_ids=result.df_cohort["pat_id"].tolist(),
    generic_names=["eculizumab", "ravulizumab", "efgartigimod", "pyridostigmine"],
    start_year=2015, end_year=2024,
)

# Categorize
categories = {
    "BIOLOGIC": ["eculizumab", "ravulizumab", "efgartigimod"],
    "SYMPTOMATIC": ["pyridostigmine"],
}
df_rx = assign_medication_category(df_rx, categories)

# Episodes
df_episodes = identify_treatment_episodes(df_rx, gap_days=60)

# Summary
print(f"Cohort: {len(result.df_cohort)} patients")
print(f"Treatment claims: {len(df_rx)}")
print(f"Treatment episodes: {len(df_episodes)}")
print(f"\nTreatment distribution:")
print(df_rx["drug_class"].value_counts())

Next Steps