πŸ›‘οΈ Known Exploited Vulnerabilities

CISA KEV Catalog Analysis

Author: Jim Wyatt

Contact: jim@wyatt.ltd

data refreshed weekly on sunday at midnight gmt

InΒ [1]:
from IPython.display import display, HTML
display(HTML("<h2 style='color: #f5f5f5; margin-top: 0; padding-top: 0;'>Configuration Settings</h2>"))


# Configuration Settings
import warnings
warnings.filterwarnings('ignore')

# Chart settings
CHART_STYLE = 'seaborn-v0_8'
FIGURE_SIZE = (14, 8)
DPI = 100

# Analysis parameters
TOP_N_VENDORS = 20
TOP_N_PRODUCTS = 20
TOP_N_CWES = 20
RISK_THRESHOLD = 0.25  # 25% ransomware threshold for high-risk classification
CUMULATIVE_THRESHOLD = 0.80  # 80% threshold for Pareto analysis

# Data caching
CACHE_FILE = 'cisa_kev_cache.csv'
CACHE_AGE_HOURS = 24  # Refresh cache after 24 hours

# Export settings
EXPORT_EXCEL = True
EXPORT_JSON = True

print("βœ“ Configuration loaded successfully")

Configuration Settings

βœ“ Configuration loaded successfully
InΒ [2]:
from IPython.display import display, HTML
display(HTML("<h2 style='color: #f5f5f5; margin-top: 0; padding-top: 0;'>Load CISA KEV Data</h2>"))


# Import required libraries
import pandas as pd
import matplotlib.pyplot as plt
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import plotly.io as pio
import os
from datetime import datetime
from IPython.display import HTML

# Configure Plotly for better HTML export compatibility
# This ensures Plotly charts are embedded as HTML in nbconvert output
pio.renderers.default = "notebook_connected"

# Apply matplotlib style
plt.style.use(CHART_STYLE)

# Data source URL
url = 'https://www.cisa.gov/sites/default/files/csv/known_exploited_vulnerabilities.csv'

# Load the data with caching for performance
if os.path.exists(CACHE_FILE):
    cache_age = (datetime.now() - datetime.fromtimestamp(
        os.path.getmtime(CACHE_FILE))).total_seconds() / 3600
    if cache_age < CACHE_AGE_HOURS:
        df = pd.read_csv(CACHE_FILE)
        print(f"βœ“ Loaded from cache (age: {cache_age:.1f} hours)")
    else:
        print(f"Fetching fresh data from CISA...")
        df = pd.read_csv(url)
        df.to_csv(CACHE_FILE, index=False)
        print(f"βœ“ Successfully loaded {len(df):,} vulnerabilities from CISA KEV catalog")
else:
    print(f"Fetching data from CISA...")
    df = pd.read_csv(url)
    df.to_csv(CACHE_FILE, index=False)
    print(f"βœ“ Successfully loaded {len(df):,} vulnerabilities from CISA KEV catalog")

Load CISA KEV Data

Fetching data from CISA...
βœ“ Successfully loaded 1,536 vulnerabilities from CISA KEV catalog
InΒ [3]:
from IPython.display import display, HTML
display(HTML("<h2 style='color: #f5f5f5; margin-top: 0; padding-top: 0;'>Plotly Helper Function</h2>"))

# Set default Plotly template to dark
import plotly.io as pio
pio.templates.default = "plotly_dark"

# Helper function to display Plotly figures with responsive width
def show_plotly(fig, height=600):
    """Display Plotly figure with responsive width for slides"""
    from IPython.display import display, HTML
    # Configure for full-width responsive sizing - use reasonable default width
    fig.update_layout(
        autosize=False,  # Disable autosize to force our width
        width=1000,  # Reasonable width that CSS can override
        height=height,
        margin=dict(l=40, r=40, t=60, b=40),
        template='plotly_dark'  # Apply dark theme
    )
    # Export with div style that forces 100% width via CSS
    html_str = fig.to_html(
        include_plotlyjs='cdn',
        full_html=False,
        config={'responsive': True, 'displayModeBar': True}
    )
    # Wrap in div with explicit width styling - this overrides Plotly's width
    wrapped_html = f'<div style="width: 100% !important; max-width: 95%; height: auto; margin: 0 auto;">{html_str}</div>'
    display(HTML(wrapped_html))

Plotly Helper Function

InΒ [4]:
from IPython.display import display, HTML
display(HTML("<h2 style='color: #f5f5f5; margin-top: 0; padding-top: 0;'>Data Quality Report</h2>"))


# Data Quality Checks
print("=" * 60)
print("DATA QUALITY REPORT")
print("=" * 60)

print(f"\nπŸ“Š Dataset Info:")
print(f"   Total Records: {len(df):,}")
print(f"   Columns: {len(df.columns)}")
print(f"   Memory Usage: {df.memory_usage(deep=True).sum() / 1024 / 1024:.2f} MB")

print(f"\nπŸ” Missing Values:")
missing = df.isnull().sum()
if missing.sum() > 0:
    for col in missing[missing > 0].index:
        print(f"   {col}: {missing[col]} ({missing[col]/len(df)*100:.1f}%)")
else:
    print("   None - Dataset is complete!")

print(f"\nπŸ”„ Duplicate Checks:")
print(f"   Duplicate CVE IDs: {df['cveID'].duplicated().sum()}")
print(f"   Duplicate Rows: {df.duplicated().sum()}")

print(f"\nπŸ“… Date Range Validation:")
try:
    temp_dates = pd.to_datetime(df['dueDate'], errors='coerce')
    invalid_dates = temp_dates.isnull().sum()
    if invalid_dates > 0:
        print(f"   ⚠️  Invalid dates found: {invalid_dates}")
    else:
        print(f"   βœ“ All dates valid")
        print(f"   Range: {temp_dates.min()} to {temp_dates.max()}")
except:
    print(f"   ⚠️  Date validation error")

print("\n" + "=" * 60)

Data Quality Report

============================================================
DATA QUALITY REPORT
============================================================

πŸ“Š Dataset Info:
   Total Records: 1,536
   Columns: 11
   Memory Usage: 1.55 MB

πŸ” Missing Values:
   cwes: 167 (10.9%)

πŸ”„ Duplicate Checks:
   Duplicate CVE IDs: 0
   Duplicate Rows: 0

πŸ“… Date Range Validation:
   βœ“ All dates valid
   Range: 2021-11-17 00:00:00 to 2026-03-26 00:00:00

============================================================
InΒ [5]:
from IPython.display import display, HTML
display(HTML("<h2 style='color: #f5f5f5; margin-top: 0; padding-top: 0;'>Data Processing & Initial Analysis</h2>"))


# Data Processing and Initial Analysis
df['dateAdded'] = pd.to_datetime(df['dateAdded'])
df['dueDate'] = pd.to_datetime(df['dueDate'])

# Create analysis datasets
df_vendor_count = df.groupby('vendorProject')['cveID'].count().reset_index(name='count').sort_values('count', ascending=False)
df_product_count = df.groupby(['vendorProject', 'product'])['cveID'].count().reset_index(name='count').sort_values('count', ascending=False)
df_cwe_count = df.groupby('cwes')['cveID'].count().reset_index(name='count').sort_values('count', ascending=False)

# Ransomware analysis
df['is_ransomware'] = (df['knownRansomwareCampaignUse'] == 'Known').astype(int)
df_ransomware = df[df['is_ransomware'] == 1]

# Vendor risk analysis
df_vendor_analysis = df.groupby('vendorProject').agg(
    total_cves=('cveID', 'count'),
    ransomware_cves=('is_ransomware', 'sum'),
    unique_products=('product', 'nunique')
).reset_index()

df_vendor_analysis['ransomware_pct'] = (df_vendor_analysis['ransomware_cves'] /
                                         df_vendor_analysis['total_cves'] * 100)

df_vendor_analysis['risk_score'] = (
    df_vendor_analysis['total_cves'] * 0.4 +
    df_vendor_analysis['ransomware_pct'] * 0.6
)

df_vendor_analysis = df_vendor_analysis.sort_values('risk_score', ascending=False)

# Due date analysis
df['days_to_patch'] = (df['dueDate'] - df['dateAdded']).dt.days
df_due_date_analysis = df.groupby('vendorProject').agg(
    avg_days_to_patch=('days_to_patch', 'mean'),
    median_days_to_patch=('days_to_patch', 'median'),
    total_cves=('cveID', 'count')
).reset_index().sort_values('avg_days_to_patch', ascending=False)

# Notes analysis
df_notes_summary = df.groupby('notes')['cveID'].count().reset_index(name='count').sort_values('count', ascending=False)

# Export to CSV
df_vendor_analysis.to_csv('vendor_analysis_enhanced_' + datetime.now().strftime('%Y-%m-%d_%H%M%S') + '.csv', index=False)
df_vendor_count.to_csv('vendor_vulnerabilities.csv', index=False)
df_product_count.to_csv('product_vulnerabilities.csv', index=False)
df_cwe_count.to_csv('cwe_vulnerabilities.csv', index=False)
df_due_date_analysis.to_csv('due_date_analysis.csv', index=False)
df_notes_summary.to_csv('notes_summary.csv', index=False)

print("βœ“ Data processing complete")

Data Processing & Initial Analysis

βœ“ Data processing complete
InΒ [6]:
from IPython.display import display, HTML
display(HTML("<h2 style='color: #f5f5f5; margin-top: 0; padding-top: 0;'>Interactive Filtering Controls</h2>"))


# Interactive Filtering Options
# Modify these variables to filter your analysis

# Date range filtering
filter_start_date = None  # e.g., '2023-01-01' or None for no filter
filter_end_date = None     # e.g., '2024-12-31' or None for no filter

# Vendor filtering
filter_vendors = None      # e.g., ['Microsoft', 'Apple'] or None for all

# Ransomware filtering
filter_ransomware_only = False  # True to show only ransomware-related vulnerabilities

# Apply filters to create filtered dataset
df_filtered = df.copy()

if filter_start_date:
    df_filtered = df_filtered[df_filtered['dateAdded'] >= pd.to_datetime(filter_start_date)]

if filter_end_date:
    df_filtered = df_filtered[df_filtered['dateAdded'] <= pd.to_datetime(filter_end_date)]

if filter_vendors:
    df_filtered = df_filtered[df_filtered['vendorProject'].isin(filter_vendors)]

if filter_ransomware_only:
    df_filtered = df_filtered[df_filtered['knownRansomwareCampaignUse'] == 'Known']

print(f"Filtered dataset: {len(df_filtered)} vulnerabilities (from {len(df)} total)")
print(f"Date range: {df_filtered['dateAdded'].min()} to {df_filtered['dateAdded'].max()}")
print(f"Unique vendors: {df_filtered['vendorProject'].nunique()}")

Interactive Filtering Controls

Filtered dataset: 1536 vulnerabilities (from 1536 total)
Date range: 2021-11-03 00:00:00 to 2026-03-05 00:00:00
Unique vendors: 253
InΒ [7]:
from IPython.display import display, HTML
display(HTML("<h2 style='color: #f5f5f5; margin-top: 0; padding-top: 0;'>Year-over-Year Trend Analysis</h2>"))


# Year-over-Year Analysis
df['year'] = df['dateAdded'].dt.year
df['month'] = df['dateAdded'].dt.month

# Annual summary
annual_summary = df.groupby('year').agg({
    'cveID': 'count',
    'vendorProject': 'nunique',
    'product': 'nunique'
}).rename(columns={'cveID': 'total_cves', 'vendorProject': 'unique_vendors', 'product': 'unique_products'})

# Calculate year-over-year growth
annual_summary['yoy_growth'] = annual_summary['total_cves'].pct_change() * 100
annual_summary['yoy_growth_vendors'] = annual_summary['unique_vendors'].pct_change() * 100

# Ransomware trends by year
ransomware_by_year = df[df['knownRansomwareCampaignUse'] == 'Known'].groupby('year').size()
annual_summary['ransomware_cves'] = ransomware_by_year
annual_summary['ransomware_pct'] = (annual_summary['ransomware_cves'] / annual_summary['total_cves'] * 100).fillna(0)

print("Year-over-Year Summary:")
print(annual_summary.to_string())
print("\n" + "="*80)

# Create interactive YoY visualization
fig = make_subplots(
    rows=2, cols=2,
    subplot_titles=('Total CVEs by Year', 'Year-over-Year Growth Rate',
                   'Ransomware CVEs by Year', 'Monthly Distribution (All Years)'),
    specs=[[{'type': 'bar'}, {'type': 'bar'}],
           [{'type': 'bar'}, {'type': 'box'}]]
)

# Total CVEs by year
fig.add_trace(
    go.Bar(x=annual_summary.index, y=annual_summary['total_cves'],
           name='Total CVEs', marker_color='steelblue'),
    row=1, col=1
)

# YoY Growth rate
fig.add_trace(
    go.Bar(x=annual_summary.index[1:], y=annual_summary['yoy_growth'].iloc[1:],
           name='YoY Growth %', marker_color='coral'),
    row=1, col=2
)

# Ransomware CVEs
fig.add_trace(
    go.Bar(x=annual_summary.index, y=annual_summary['ransomware_cves'].fillna(0),
           name='Ransomware CVEs', marker_color='crimson'),
    row=2, col=1
)

# Monthly distribution
monthly_data = df.groupby('month')['cveID'].count()
fig.add_trace(
    go.Box(y=df['month'], name='Monthly Distribution', marker_color='lightseagreen'),
    row=2, col=2
)

fig.update_layout(height=700, showlegend=False, hovermode='x unified')
show_plotly(fig, height=700)

Year-over-Year Trend Analysis

Year-over-Year Summary:
      total_cves  unique_vendors  unique_products  yoy_growth  yoy_growth_vendors  ransomware_cves  ransomware_pct
year                                                                                                              
2021         311              73              155         NaN                 NaN               76       24.437299
2022         555              98              239   78.456592           34.246575              123       22.162162
2023         187              67              112  -66.306306          -31.632653               43       22.994652
2024         186              64              109   -0.534759           -4.477612               42       22.580645
2025         245              99              146   31.720430           54.687500               25       10.204082
2026          52              29               35  -78.775510          -70.707071                3        5.769231

================================================================================
InΒ [8]:
from IPython.display import display, HTML
display(HTML("<h2 style='color: #f5f5f5; margin-top: 0; padding-top: 0;'>Key Statistics Summary</h2>"))


# Statistical Overview
df_vendor_count_sorted = df_vendor_count.head(TOP_N_VENDORS).rename(columns={'count': 'Count', 'vendorProject': 'vendorProject'})
df_product_count_sorted = df_product_count.head(TOP_N_PRODUCTS).rename(columns={'count': 'Count'})

print("=" * 60)
print("πŸ“Š KEY STATISTICS")
print("=" * 60)

print(f"\nπŸ“¦ Dataset Overview:")
print(f"   Total Vulnerabilities: {len(df):,}")
print(f"   Unique Vendors: {df['vendorProject'].nunique():,}")
print(f"   Unique Products: {df['product'].nunique():,}")
print(f"   Date Range: {df['dateAdded'].min().strftime('%Y-%m-%d')} to {df['dateAdded'].max().strftime('%Y-%m-%d')}")

print(f"\nπŸ”΄ Ransomware Threats:")
ransomware_known = df[df['knownRansomwareCampaignUse'] == 'Known'].shape[0]
ransomware_pct = (ransomware_known / len(df)) * 100
print(f"   Known Ransomware Use: {ransomware_known:,} ({ransomware_pct:.1f}%)")
print(f"   Unknown Ransomware Use: {len(df) - ransomware_known:,} ({100-ransomware_pct:.1f}%)")

print(f"\nπŸ“… Timeline:")
print(f"   Earliest Due Date: {df['dueDate'].min().strftime('%Y-%m-%d')}")
print(f"   Latest Due Date: {df['dueDate'].max().strftime('%Y-%m-%d')}")
print(f"   Median Due Date: {df['dueDate'].median().strftime('%Y-%m-%d')}")

print(f"\nπŸ† Top 3 Vendors:")
for idx, row in df_vendor_count_sorted.head(3).iterrows():
    pct = (row['Count'] / len(df)) * 100
    print(f"   {row['vendorProject']}: {row['Count']} ({pct:.1f}%)")

print(f"\n🎯 Top 3 Products:")
for idx, row in df_product_count_sorted.head(3).iterrows():
    pct = (row['Count'] / len(df)) * 100
    print(f"   {row['product']}: {row['Count']} ({pct:.1f}%)")

print("\n" + "=" * 60)

Key Statistics Summary

============================================================
πŸ“Š KEY STATISTICS
============================================================

πŸ“¦ Dataset Overview:
   Total Vulnerabilities: 1,536
   Unique Vendors: 253
   Unique Products: 626
   Date Range: 2021-11-03 to 2026-03-05

πŸ”΄ Ransomware Threats:
   Known Ransomware Use: 312 (20.3%)
   Unknown Ransomware Use: 1,224 (79.7%)

πŸ“… Timeline:
   Earliest Due Date: 2021-11-17
   Latest Due Date: 2026-03-26
   Median Due Date: 2022-08-10

πŸ† Top 3 Vendors:
   Microsoft: 361 (23.5%)
   Apple: 90 (5.9%)
   Cisco: 85 (5.5%)

🎯 Top 3 Products:
   Windows: 166 (10.8%)
   Multiple Products: 50 (3.3%)
   Chromium V8: 37 (2.4%)

============================================================
InΒ [9]:
from IPython.display import display, HTML
display(HTML("<h2 style='color: #f5f5f5; margin-top: 0; padding-top: 0;'>Vulnerability Trends Over Time</h2>"))


# Vulnerability Trends Over Time - Interactive
df_timeline = df.groupby(df['dueDate'].dt.to_period('M')).size().reset_index(name='Count')
df_timeline['dueDate'] = df_timeline['dueDate'].dt.to_timestamp()
df_timeline['Cumulative'] = df_timeline['Count'].cumsum()

fig = make_subplots(
    rows=1, cols=2,
    subplot_titles=('Vulnerability Due Dates by Month', 'Cumulative Vulnerabilities Over Time')
)

# Timeline plot
fig.add_trace(
    go.Scatter(x=df_timeline['dueDate'], y=df_timeline['Count'],
               mode='lines+markers', name='Monthly Count',
               line=dict(width=2), marker=dict(size=6),
               hovertemplate='<b>%{x|%Y-%m}</b><br>Count: %{y}<extra></extra>'),
    row=1, col=1
)

# Cumulative plot
fig.add_trace(
    go.Scatter(x=df_timeline['dueDate'], y=df_timeline['Cumulative'],
               mode='lines', name='Cumulative',
               fill='tozeroy', line=dict(width=2),
               hovertemplate='<b>%{x|%Y-%m}</b><br>Total: %{y}<extra></extra>'),
    row=1, col=2
)

fig.update_xaxes(title_text="Due Date", row=1, col=1)
fig.update_xaxes(title_text="Due Date", row=1, col=2)
fig.update_yaxes(title_text="Count", row=1, col=1)
fig.update_yaxes(title_text="Cumulative Count", row=1, col=2)

fig.update_layout(height=500, showlegend=False, hovermode='x unified')
show_plotly(fig)

Vulnerability Trends Over Time

InΒ [10]:
from IPython.display import display, HTML
display(HTML("<h2 style='color: #f5f5f5; margin-top: 0; padding-top: 0;'>Ransomware Analysis by Vendor</h2>"))


# Ransomware Analysis - Interactive
df_ransomware_vendors = df[df['knownRansomwareCampaignUse'] == 'Known'].groupby('vendorProject').size().reset_index(name='Count')
df_ransomware_vendors = df_ransomware_vendors.sort_values('Count', ascending=False).head(10)

# Ransomware percentage by top vendors
top_10_vendors = df_vendor_count_sorted.head(10)['vendorProject'].tolist()
df_vendor_ransomware = df[df['vendorProject'].isin(top_10_vendors)].groupby(['vendorProject', 'knownRansomwareCampaignUse']).size().unstack(fill_value=0)
df_vendor_ransomware['Total'] = df_vendor_ransomware.sum(axis=1)
df_vendor_ransomware['Ransomware_Pct'] = (df_vendor_ransomware.get('Known', 0) / df_vendor_ransomware['Total'] * 100)
df_vendor_ransomware = df_vendor_ransomware.sort_values('Ransomware_Pct', ascending=False)

fig = make_subplots(
    rows=1, cols=2,
    subplot_titles=('Top 10 Vendors with Known Ransomware Exploits',
                   'Ransomware Percentage in Top 10 Vendors')
)

# Top ransomware vendors
fig.add_trace(
    go.Bar(y=df_ransomware_vendors['vendorProject'], x=df_ransomware_vendors['Count'],
           orientation='h', marker_color='crimson',
           hovertemplate='<b>%{y}</b><br>Count: %{x}<extra></extra>'),
    row=1, col=1
)

# Ransomware percentage
fig.add_trace(
    go.Bar(y=df_vendor_ransomware.index, x=df_vendor_ransomware['Ransomware_Pct'],
           orientation='h', marker_color='orange',
           hovertemplate='<b>%{y}</b><br>Ransomware: %{x:.1f}%<extra></extra>'),
    row=1, col=2
)

fig.update_xaxes(title_text="Count", row=1, col=1)
fig.update_xaxes(title_text="Percentage (%)", row=1, col=2)
fig.update_yaxes(title_text="", row=1, col=1)
fig.update_yaxes(title_text="", row=1, col=2)

fig.update_layout(height=500, showlegend=False)
show_plotly(fig)

Ransomware Analysis by Vendor

InΒ [11]:
from IPython.display import display, HTML
display(HTML("<h2 style='color: #f5f5f5; margin-top: 0; padding-top: 0;'>Top Vulnerability Types (CWE Categories)</h2>"))


# CWE Category Insights - Interactive
cwe_descriptions = {
    'CWE-20': 'Improper Input Validation',
    'CWE-78': 'OS Command Injection',
    'CWE-416': 'Use After Free',
    'CWE-787': 'Out-of-bounds Write',
    'CWE-119': 'Buffer Errors',
    'CWE-22': 'Path Traversal',
    'CWE-94': 'Code Injection',
    'CWE-502': 'Deserialization of Untrusted Data',
    'CWE-843': 'Type Confusion',
    'CWE-264': 'Permissions/Privileges',
    'CWE-284': 'Improper Access Control',
    'CWE-287': 'Improper Authentication',
    'CWE-306': 'Missing Authentication',
    'CWE-79': 'Cross-site Scripting (XSS)',
    'CWE-200': 'Information Exposure',
    'CWE-89': 'SQL Injection'
}

# Extract CWE ID from cwes field
df['CWE'] = df['cwes'].str.extract(r'(CWE-\d+)', expand=False)
significant_cwes = df['CWE'].value_counts().head(15).reset_index()
significant_cwes.columns = ['CWE', 'Count']

# Add descriptions to significant CWEs
significant_cwes_labeled = significant_cwes.copy()
significant_cwes_labeled['Description'] = significant_cwes_labeled['CWE'].map(cwe_descriptions).fillna('Other')
significant_cwes_labeled['Label'] = significant_cwes_labeled.apply(lambda x: f"{x['CWE']}: {x['Description']}", axis=1)
significant_cwes_labeled = significant_cwes_labeled.sort_values('Count', ascending=False)

# Color code by count
colors_map = {'high': '#d62728', 'medium': '#ff7f0e', 'low': '#2ca02c'}
significant_cwes_labeled['color'] = significant_cwes_labeled['Count'].apply(
    lambda c: colors_map['high'] if c > 70 else colors_map['medium'] if c > 40 else colors_map['low']
)

# Create interactive bar chart
fig = go.Figure(data=[
    go.Bar(
        y=significant_cwes_labeled['Label'],
        x=significant_cwes_labeled['Count'],
        orientation='h',
        marker_color=significant_cwes_labeled['color'],
        hovertemplate='<b>%{y}</b><br>Count: %{x}<extra></extra>'
    )
])

fig.update_layout(
    title='Top Vulnerability Types (CWE Categories)',
    xaxis_title='Count',
    yaxis_title='',
    height=600,
    yaxis={'categoryorder':'total ascending'}
)

show_plotly(fig, height=600)

print("\nπŸ” CWE Category Analysis:")
print(f"   Most Common: {significant_cwes_labeled.iloc[0]['CWE']} - {significant_cwes_labeled.iloc[0]['Description']} ({significant_cwes_labeled.iloc[0]['Count']} vulnerabilities)")
print(f"   These top {len(significant_cwes)} CWE categories account for {significant_cwes['Count'].sum():,} vulnerabilities")

Top Vulnerability Types (CWE Categories)

πŸ” CWE Category Analysis:
   Most Common: CWE-20 - Improper Input Validation (113 vulnerabilities)
   These top 15 CWE categories account for 851 vulnerabilities
InΒ [12]:
from IPython.display import display, HTML
display(HTML("<h2 style='color: #f5f5f5; margin-top: 0; padding-top: 0;'>Vendor-Product Vulnerability Heatmap</h2>"))


# Vendor-Product Heatmap - Interactive
# Get top 10 vendors and their top products
top_vendors_list = df_vendor_count_sorted.head(10)['vendorProject'].tolist()
df_vendor_product = df[df['vendorProject'].isin(top_vendors_list)].groupby(['vendorProject', 'product']).size().reset_index(name='Count')

# For each vendor, get top 5 products
top_vendor_products = []
for vendor in top_vendors_list:
    vendor_data = df_vendor_product[df_vendor_product['vendorProject'] == vendor].nlargest(5, 'Count')
    top_vendor_products.append(vendor_data)

df_heatmap = pd.concat(top_vendor_products)

# Create a pivot table
pivot_data = df_heatmap.pivot_table(index='product', columns='vendorProject', values='Count', fill_value=0)

# Create interactive heatmap
fig = go.Figure(data=go.Heatmap(
    z=pivot_data.values,
    x=pivot_data.columns,
    y=pivot_data.index,
    colorscale='YlOrRd',
    hovertemplate='<b>Vendor:</b> %{x}<br><b>Product:</b> %{y}<br><b>Count:</b> %{z}<extra></extra>',
    colorbar=dict(title='Vulnerabilities')
))

fig.update_layout(
    title='Vendor-Product Vulnerability Heatmap (Top 5 Products per Top 10 Vendors)',
    xaxis_title='Vendor',
    yaxis_title='Product',
    height=700,
    xaxis={'tickangle': 45}
)

show_plotly(fig, height=700)

Vendor-Product Vulnerability Heatmap

InΒ [13]:
from IPython.display import display, HTML
display(HTML("<h2 style='color: #f5f5f5; margin-top: 0; padding-top: 0;'>Vendor Risk Profile: Volume vs Ransomware</h2>"))


# Market Share vs Vulnerability Count - Interactive Bubble Chart
# Compare vendor vulnerability counts with ransomware risk

top_20_vendors = df_vendor_count_sorted.head(20)

# Calculate ransomware percentage for each vendor
vendor_risk_data = []
for idx, row in top_20_vendors.iterrows():
    vendor = row['vendorProject']
    count = row['Count']
    ransomware_count = df[(df['vendorProject'] == vendor) & (df['knownRansomwareCampaignUse'] == 'Known')].shape[0]
    ransomware_pct = (ransomware_count / count) * 100 if count > 0 else 0
    vendor_risk_data.append({
        'vendor': vendor,
        'count': count,
        'ransomware_pct': ransomware_pct,
        'ransomware_count': ransomware_count
    })

df_bubble = pd.DataFrame(vendor_risk_data)

# Create interactive bubble chart
fig = px.scatter(df_bubble,
                 x='count',
                 y='ransomware_pct',
                 size='count',
                 color='vendor',
                 hover_name='vendor',
                 hover_data={
                     'count': True,
                     'ransomware_pct': ':.1f',
                     'ransomware_count': True,
                     'vendor': False
                 },
                 labels={
                     'count': 'Total Vulnerabilities',
                     'ransomware_pct': 'Ransomware Usage (%)',
                     'ransomware_count': 'Ransomware CVEs'
                 },
                 title='Vendor Vulnerability Profile: Volume vs Ransomware Risk')

fig.update_traces(marker=dict(line=dict(width=2, color='DarkSlateGrey')))
fig.update_layout(height=600, showlegend=True)
show_plotly(fig)

print("\nπŸ’‘ Insight: Bubble size represents total vulnerabilities. Position shows ransomware exploitation risk.")

Vendor Risk Profile: Volume vs Ransomware

πŸ’‘ Insight: Bubble size represents total vulnerabilities. Position shows ransomware exploitation risk.
InΒ [14]:
from IPython.display import display, HTML
display(HTML("<h2 style='color: #f5f5f5; margin-top: 0; padding-top: 0;'>Security Recommendations & Action Items</h2>"))


# Priority Action Items
print("=" * 70)
print("🎯 PRIORITY ACTION ITEMS FOR SECURITY TEAMS")
print("=" * 70)

# 1. High-risk vendors with ransomware
print("\n1. πŸ”΄ HIGH-RISK VENDORS (High Volume + Ransomware Exploitation):")
high_risk = df_vendor_ransomware[df_vendor_ransomware['Ransomware_Pct'] > 25].sort_values('Ransomware_Pct', ascending=False)
for vendor in high_risk.index:
    vuln_count = high_risk.loc[vendor, 'Total']
    ransomware_pct = high_risk.loc[vendor, 'Ransomware_Pct']
    print(f"   β€’ {vendor}: {int(vuln_count)} vulnerabilities ({ransomware_pct:.1f}% ransomware)")

# 2. Most critical CWE categories to address
print("\n2. 🎯 CRITICAL VULNERABILITY TYPES TO PATCH:")
for idx, row in significant_cwes_labeled.tail(5).iterrows():
    print(f"   β€’ {row['CWE']}: {row['Description']} ({row['Count']} instances)")

# 3. Products requiring immediate attention
print("\n3. ⚠️  PRODUCTS NEEDING IMMEDIATE ATTENTION:")
critical_products = df_product_count_sorted.head(5)
for idx, row in critical_products.iterrows():
    vendor = df[df['product'] == row['product']]['vendorProject'].mode()[0] if len(df[df['product'] == row['product']]) > 0 else 'Unknown'
    print(f"   β€’ {row['product']} ({vendor}): {row['Count']} vulnerabilities")

# 4. Due date urgency
print("\n4. πŸ“… REMEDIATION URGENCY:")
current_date = pd.Timestamp.now()
overdue = df[df['dueDate'] < current_date]
upcoming_30 = df[(df['dueDate'] >= current_date) & (df['dueDate'] <= current_date + pd.Timedelta(days=30))]
print(f"   β€’ Overdue patches: {len(overdue)} vulnerabilities")
print(f"   β€’ Due within 30 days: {len(upcoming_30)} vulnerabilities")
print(f"   β€’ Oldest overdue: {(current_date - df['dueDate'].min()).days} days")

# 5. Ransomware-specific recommendations
print("\n5. πŸ›‘οΈ  RANSOMWARE PROTECTION PRIORITIES:")
ransomware_vulns = df[df['knownRansomwareCampaignUse'] == 'Known']
top_ransomware_products = ransomware_vulns['product'].value_counts().head(3)
for product, count in top_ransomware_products.items():
    print(f"   β€’ Patch {product}: {count} known ransomware exploits")

print("\n" + "=" * 70)
print("πŸ’‘ TIP: Focus on vendors with both high volume AND high ransomware %")
print("=" * 70)

Security Recommendations & Action Items

======================================================================
🎯 PRIORITY ACTION ITEMS FOR SECURITY TEAMS
======================================================================

1. πŸ”΄ HIGH-RISK VENDORS (High Volume + Ransomware Exploitation):
   β€’ Ivanti: 31 vulnerabilities (38.7% ransomware)
   β€’ VMware: 26 vulnerabilities (34.6% ransomware)
   β€’ Microsoft: 361 vulnerabilities (28.3% ransomware)
   β€’ Oracle: 42 vulnerabilities (26.2% ransomware)

2. 🎯 CRITICAL VULNERABILITY TYPES TO PATCH:
   β€’ CWE-264: Permissions/Privileges (31 instances)
   β€’ CWE-306: Missing Authentication (30 instances)
   β€’ CWE-79: Cross-site Scripting (XSS) (29 instances)
   β€’ CWE-284: Improper Access Control (29 instances)
   β€’ CWE-122: Other (26 instances)

3. ⚠️  PRODUCTS NEEDING IMMEDIATE ATTENTION:
   β€’ Windows (Microsoft): 166 vulnerabilities
   β€’ Multiple Products (Apple): 50 vulnerabilities
   β€’ Chromium V8 (Google): 37 vulnerabilities
   β€’ Internet Explorer (Microsoft): 34 vulnerabilities
   β€’ Flash Player (Adobe): 33 vulnerabilities

4. πŸ“… REMEDIATION URGENCY:
   β€’ Overdue patches: 1521 vulnerabilities
   β€’ Due within 30 days: 15 vulnerabilities
   β€’ Oldest overdue: 1572 days

5. πŸ›‘οΈ  RANSOMWARE PROTECTION PRIORITIES:
   β€’ Patch Windows: 45 known ransomware exploits
   β€’ Patch Exchange Server: 12 known ransomware exploits
   β€’ Patch Multiple Products: 8 known ransomware exploits

======================================================================
πŸ’‘ TIP: Focus on vendors with both high volume AND high ransomware %
======================================================================
InΒ [15]:
from IPython.display import display, HTML
display(HTML("<h2 style='color: #f5f5f5; margin-top: 0; padding-top: 0;'>Risk Score Analysis for Critical Vulnerabilities</h2>"))


# Create interactive summary table for top vulnerabilities
print("\nπŸ“‹ TOP 10 MOST CRITICAL VULNERABILITY COMBINATIONS\n")
print("Ranking by: Vendor impact Γ— Ransomware risk Γ— CWE severity\n")

# Calculate risk scores
df_risk = df.copy()
df_risk['vendor_count'] = df_risk['vendorProject'].map(df_vendor_count.set_index('vendorProject')['count'])
df_risk['cwe_count'] = df_risk['cwes'].map(df_cwe_count.set_index('cwes')['count'])
df_risk['ransomware_score'] = df_risk['is_ransomware'] * 100

# Calculate composite risk score
df_risk['risk_score'] = (
    (df_risk['vendor_count'] / df_risk['vendor_count'].max()) * 30 +
    (df_risk['cwe_count'] / df_risk['cwe_count'].max()) * 30 +
    df_risk['ransomware_score'] * 0.4
)

# Get top 10 highest risk
top_risk = df_risk.nlargest(10, 'risk_score')[['cveID', 'vendorProject', 'product',
                                                  'cwes', 'knownRansomwareCampaignUse',
                                                  'risk_score']].reset_index(drop=True)

print(top_risk.to_string(index=False))
print(f"\n⚠️  These {len(top_risk)} CVEs represent the highest combined threat level")
print("   based on vendor prevalence, vulnerability type frequency, and ransomware usage.")

Risk Score Analysis for Critical Vulnerabilities

πŸ“‹ TOP 10 MOST CRITICAL VULNERABILITY COMBINATIONS

Ranking by: Vendor impact Γ— Ransomware risk Γ— CWE severity

         cveID vendorProject           product    cwes knownRansomwareCampaignUse  risk_score
 CVE-2016-0034     Microsoft       Silverlight  CWE-20                      Known   99.662921
CVE-2021-42278     Microsoft  Active Directory  CWE-20                      Known   99.662921
 CVE-2017-0148     Microsoft      SMBv1 server  CWE-20                      Known   99.662921
 CVE-2017-0146     Microsoft           Windows  CWE-20                      Known   99.662921
 CVE-2017-0145     Microsoft             SMBv1  CWE-20                      Known   99.662921
 CVE-2017-0144     Microsoft             SMBv1  CWE-20                      Known   99.662921
 CVE-2017-0143     Microsoft           Windows  CWE-20                      Known   99.662921
 CVE-2019-0604     Microsoft        SharePoint  CWE-20                      Known   99.662921
CVE-2025-29824     Microsoft           Windows CWE-416                      Known   98.988764
 CVE-2013-2551     Microsoft Internet Explorer CWE-416                      Known   98.988764

⚠️  These 10 CVEs represent the highest combined threat level
   based on vendor prevalence, vulnerability type frequency, and ransomware usage.
InΒ [16]:
from IPython.display import display, HTML
display(HTML("<h2 style='color: #f5f5f5; margin-top: 0; padding-top: 0;'>Export Analysis Results</h2>"))


# Export Summary and Data
export_timestamp = datetime.now().strftime('%Y-%m-%d_%H%M%S')

print("\n" + "=" * 70)
print("πŸ“ EXPORTING ANALYSIS RESULTS")
print("=" * 70)

# Create summary report
summary_report = {
    'Analysis_Date': datetime.now().strftime('%Y-%m-%d %H:%M:%S'),
    'Total_CVEs': len(df),
    'Unique_Vendors': df['vendorProject'].nunique(),
    'Unique_Products': df['product'].nunique(),
    'Ransomware_CVEs': df[df['knownRansomwareCampaignUse'] == 'Known'].shape[0],
    'Ransomware_Percentage': round((df[df['knownRansomwareCampaignUse'] == 'Known'].shape[0] / len(df)) * 100, 2),
    'Top_Vendor': df_vendor_count_sorted.iloc[0]['vendorProject'],
    'Top_Product': df_product_count_sorted.iloc[0]['product'],
    'Most_Common_CWE': f"{significant_cwes.iloc[0]['CWE']}: {cwe_descriptions.get(significant_cwes.iloc[0]['CWE'], 'N/A')}",
    'Date_Range': f"{df['dueDate'].min().strftime('%Y-%m-%d')} to {df['dueDate'].max().strftime('%Y-%m-%d')}"
}

# Enhanced vendor analysis
df_vendor_enhanced = df_vendor_count_sorted.copy()
df_vendor_enhanced['Ransomware_Count'] = df_vendor_enhanced['vendorProject'].apply(
    lambda x: len(df[(df['vendorProject'] == x) & (df['knownRansomwareCampaignUse'] == 'Known')])
)
df_vendor_enhanced['Ransomware_Pct'] = (df_vendor_enhanced['Ransomware_Count'] / df_vendor_enhanced['Count'] * 100).round(1)

# Save CSV exports
summary_df = pd.DataFrame([summary_report])
summary_df.to_csv(f'analysis_summary_{export_timestamp}.csv', index=False)
df_vendor_enhanced.to_csv(f'vendor_analysis_enhanced_{export_timestamp}.csv', index=False)

# Excel multi-sheet export (if enabled)
if EXPORT_EXCEL:
    with pd.ExcelWriter(f'kev_analysis_{export_timestamp}.xlsx', engine='openpyxl') as writer:
        summary_df.to_excel(writer, sheet_name='Summary', index=False)
        df_vendor_enhanced.to_excel(writer, sheet_name='Vendors', index=False)
        df_product_count_sorted.to_excel(writer, sheet_name='Products', index=False)
        significant_cwes_labeled.to_excel(writer, sheet_name='CWE Categories', index=False)
        annual_summary.to_excel(writer, sheet_name='Year-over-Year', index=True)
    print(f"   βœ… Excel workbook: kev_analysis_{export_timestamp}.xlsx")

# JSON export (if enabled)
if EXPORT_JSON:
    import json
    export_json = {
        'metadata': summary_report,
        'vendors': df_vendor_enhanced.to_dict('records'),
        'products': df_product_count_sorted.head(20).to_dict('records'),
        'cwe_categories': significant_cwes_labeled.to_dict('records'),
        'annual_trends': annual_summary.to_dict('index')
    }

    with open(f'kev_analysis_{export_timestamp}.json', 'w') as f:
        json.dump(export_json, f, indent=2)
    print(f"   βœ… JSON export: kev_analysis_{export_timestamp}.json")

print(f"\nβœ… CSV exports saved")
print(f"   β€’ analysis_summary_{export_timestamp}.csv")
print(f"   β€’ vendor_analysis_enhanced_{export_timestamp}.csv")

print("\n" + "=" * 70)
print("πŸ“Š ANALYSIS COMPLETE")
print("=" * 70)
print(f"\nπŸ“Œ Summary: {len(df):,} vulnerabilities analyzed across {df['vendorProject'].nunique()} vendors")
print(f"   Cache file: {CACHE_FILE}")
print(f"   To refresh: Re-run cell 2 to fetch latest data from CISA")
print("\n" + "=" * 70)

Export Analysis Results

======================================================================
πŸ“ EXPORTING ANALYSIS RESULTS
======================================================================
   βœ… Excel workbook: kev_analysis_2026-03-08_001611.xlsx
   βœ… JSON export: kev_analysis_2026-03-08_001611.json

βœ… CSV exports saved
   β€’ analysis_summary_2026-03-08_001611.csv
   β€’ vendor_analysis_enhanced_2026-03-08_001611.csv

======================================================================
πŸ“Š ANALYSIS COMPLETE
======================================================================

πŸ“Œ Summary: 1,536 vulnerabilities analyzed across 253 vendors
   Cache file: cisa_kev_cache.csv
   To refresh: Re-run cell 2 to fetch latest data from CISA

======================================================================