π‘οΈ Known Exploited Vulnerabilities
CISA KEV Catalog Analysis
Author: Jim Wyatt
Contact: jim@wyatt.ltd
data refreshed weekly on sunday at midnight gmt
InΒ [1]:
from IPython.display import display, HTML
display(HTML("<h2 style='color: #f5f5f5; margin-top: 0; padding-top: 0;'>Configuration Settings</h2>"))
# Configuration Settings
import warnings
warnings.filterwarnings('ignore')
# Chart settings
CHART_STYLE = 'seaborn-v0_8'
FIGURE_SIZE = (14, 8)
DPI = 100
# Analysis parameters
TOP_N_VENDORS = 20
TOP_N_PRODUCTS = 20
TOP_N_CWES = 20
RISK_THRESHOLD = 0.25 # 25% ransomware threshold for high-risk classification
CUMULATIVE_THRESHOLD = 0.80 # 80% threshold for Pareto analysis
# Data caching
CACHE_FILE = 'cisa_kev_cache.csv'
CACHE_AGE_HOURS = 24 # Refresh cache after 24 hours
# Export settings
EXPORT_EXCEL = True
EXPORT_JSON = True
print("β Configuration loaded successfully")
Configuration Settings
β Configuration loaded successfully
InΒ [2]:
from IPython.display import display, HTML
display(HTML("<h2 style='color: #f5f5f5; margin-top: 0; padding-top: 0;'>Load CISA KEV Data</h2>"))
# Import required libraries
import pandas as pd
import matplotlib.pyplot as plt
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import plotly.io as pio
import os
from datetime import datetime
from IPython.display import HTML
# Configure Plotly for better HTML export compatibility
# This ensures Plotly charts are embedded as HTML in nbconvert output
pio.renderers.default = "notebook_connected"
# Apply matplotlib style
plt.style.use(CHART_STYLE)
# Data source URL
url = 'https://www.cisa.gov/sites/default/files/csv/known_exploited_vulnerabilities.csv'
# Load the data with caching for performance
if os.path.exists(CACHE_FILE):
cache_age = (datetime.now() - datetime.fromtimestamp(
os.path.getmtime(CACHE_FILE))).total_seconds() / 3600
if cache_age < CACHE_AGE_HOURS:
df = pd.read_csv(CACHE_FILE)
print(f"β Loaded from cache (age: {cache_age:.1f} hours)")
else:
print(f"Fetching fresh data from CISA...")
df = pd.read_csv(url)
df.to_csv(CACHE_FILE, index=False)
print(f"β Successfully loaded {len(df):,} vulnerabilities from CISA KEV catalog")
else:
print(f"Fetching data from CISA...")
df = pd.read_csv(url)
df.to_csv(CACHE_FILE, index=False)
print(f"β Successfully loaded {len(df):,} vulnerabilities from CISA KEV catalog")
Load CISA KEV Data
Fetching data from CISA...
β Successfully loaded 1,536 vulnerabilities from CISA KEV catalog
InΒ [3]:
from IPython.display import display, HTML
display(HTML("<h2 style='color: #f5f5f5; margin-top: 0; padding-top: 0;'>Plotly Helper Function</h2>"))
# Set default Plotly template to dark
import plotly.io as pio
pio.templates.default = "plotly_dark"
# Helper function to display Plotly figures with responsive width
def show_plotly(fig, height=600):
"""Display Plotly figure with responsive width for slides"""
from IPython.display import display, HTML
# Configure for full-width responsive sizing - use reasonable default width
fig.update_layout(
autosize=False, # Disable autosize to force our width
width=1000, # Reasonable width that CSS can override
height=height,
margin=dict(l=40, r=40, t=60, b=40),
template='plotly_dark' # Apply dark theme
)
# Export with div style that forces 100% width via CSS
html_str = fig.to_html(
include_plotlyjs='cdn',
full_html=False,
config={'responsive': True, 'displayModeBar': True}
)
# Wrap in div with explicit width styling - this overrides Plotly's width
wrapped_html = f'<div style="width: 100% !important; max-width: 95%; height: auto; margin: 0 auto;">{html_str}</div>'
display(HTML(wrapped_html))
Plotly Helper Function
InΒ [4]:
from IPython.display import display, HTML
display(HTML("<h2 style='color: #f5f5f5; margin-top: 0; padding-top: 0;'>Data Quality Report</h2>"))
# Data Quality Checks
print("=" * 60)
print("DATA QUALITY REPORT")
print("=" * 60)
print(f"\nπ Dataset Info:")
print(f" Total Records: {len(df):,}")
print(f" Columns: {len(df.columns)}")
print(f" Memory Usage: {df.memory_usage(deep=True).sum() / 1024 / 1024:.2f} MB")
print(f"\nπ Missing Values:")
missing = df.isnull().sum()
if missing.sum() > 0:
for col in missing[missing > 0].index:
print(f" {col}: {missing[col]} ({missing[col]/len(df)*100:.1f}%)")
else:
print(" None - Dataset is complete!")
print(f"\nπ Duplicate Checks:")
print(f" Duplicate CVE IDs: {df['cveID'].duplicated().sum()}")
print(f" Duplicate Rows: {df.duplicated().sum()}")
print(f"\nπ
Date Range Validation:")
try:
temp_dates = pd.to_datetime(df['dueDate'], errors='coerce')
invalid_dates = temp_dates.isnull().sum()
if invalid_dates > 0:
print(f" β οΈ Invalid dates found: {invalid_dates}")
else:
print(f" β All dates valid")
print(f" Range: {temp_dates.min()} to {temp_dates.max()}")
except:
print(f" β οΈ Date validation error")
print("\n" + "=" * 60)
Data Quality Report
============================================================ DATA QUALITY REPORT ============================================================ π Dataset Info: Total Records: 1,536 Columns: 11 Memory Usage: 1.55 MB π Missing Values: cwes: 167 (10.9%) π Duplicate Checks: Duplicate CVE IDs: 0 Duplicate Rows: 0 π Date Range Validation: β All dates valid Range: 2021-11-17 00:00:00 to 2026-03-26 00:00:00 ============================================================
InΒ [5]:
from IPython.display import display, HTML
display(HTML("<h2 style='color: #f5f5f5; margin-top: 0; padding-top: 0;'>Data Processing & Initial Analysis</h2>"))
# Data Processing and Initial Analysis
df['dateAdded'] = pd.to_datetime(df['dateAdded'])
df['dueDate'] = pd.to_datetime(df['dueDate'])
# Create analysis datasets
df_vendor_count = df.groupby('vendorProject')['cveID'].count().reset_index(name='count').sort_values('count', ascending=False)
df_product_count = df.groupby(['vendorProject', 'product'])['cveID'].count().reset_index(name='count').sort_values('count', ascending=False)
df_cwe_count = df.groupby('cwes')['cveID'].count().reset_index(name='count').sort_values('count', ascending=False)
# Ransomware analysis
df['is_ransomware'] = (df['knownRansomwareCampaignUse'] == 'Known').astype(int)
df_ransomware = df[df['is_ransomware'] == 1]
# Vendor risk analysis
df_vendor_analysis = df.groupby('vendorProject').agg(
total_cves=('cveID', 'count'),
ransomware_cves=('is_ransomware', 'sum'),
unique_products=('product', 'nunique')
).reset_index()
df_vendor_analysis['ransomware_pct'] = (df_vendor_analysis['ransomware_cves'] /
df_vendor_analysis['total_cves'] * 100)
df_vendor_analysis['risk_score'] = (
df_vendor_analysis['total_cves'] * 0.4 +
df_vendor_analysis['ransomware_pct'] * 0.6
)
df_vendor_analysis = df_vendor_analysis.sort_values('risk_score', ascending=False)
# Due date analysis
df['days_to_patch'] = (df['dueDate'] - df['dateAdded']).dt.days
df_due_date_analysis = df.groupby('vendorProject').agg(
avg_days_to_patch=('days_to_patch', 'mean'),
median_days_to_patch=('days_to_patch', 'median'),
total_cves=('cveID', 'count')
).reset_index().sort_values('avg_days_to_patch', ascending=False)
# Notes analysis
df_notes_summary = df.groupby('notes')['cveID'].count().reset_index(name='count').sort_values('count', ascending=False)
# Export to CSV
df_vendor_analysis.to_csv('vendor_analysis_enhanced_' + datetime.now().strftime('%Y-%m-%d_%H%M%S') + '.csv', index=False)
df_vendor_count.to_csv('vendor_vulnerabilities.csv', index=False)
df_product_count.to_csv('product_vulnerabilities.csv', index=False)
df_cwe_count.to_csv('cwe_vulnerabilities.csv', index=False)
df_due_date_analysis.to_csv('due_date_analysis.csv', index=False)
df_notes_summary.to_csv('notes_summary.csv', index=False)
print("β Data processing complete")
Data Processing & Initial Analysis
β Data processing complete
InΒ [6]:
from IPython.display import display, HTML
display(HTML("<h2 style='color: #f5f5f5; margin-top: 0; padding-top: 0;'>Interactive Filtering Controls</h2>"))
# Interactive Filtering Options
# Modify these variables to filter your analysis
# Date range filtering
filter_start_date = None # e.g., '2023-01-01' or None for no filter
filter_end_date = None # e.g., '2024-12-31' or None for no filter
# Vendor filtering
filter_vendors = None # e.g., ['Microsoft', 'Apple'] or None for all
# Ransomware filtering
filter_ransomware_only = False # True to show only ransomware-related vulnerabilities
# Apply filters to create filtered dataset
df_filtered = df.copy()
if filter_start_date:
df_filtered = df_filtered[df_filtered['dateAdded'] >= pd.to_datetime(filter_start_date)]
if filter_end_date:
df_filtered = df_filtered[df_filtered['dateAdded'] <= pd.to_datetime(filter_end_date)]
if filter_vendors:
df_filtered = df_filtered[df_filtered['vendorProject'].isin(filter_vendors)]
if filter_ransomware_only:
df_filtered = df_filtered[df_filtered['knownRansomwareCampaignUse'] == 'Known']
print(f"Filtered dataset: {len(df_filtered)} vulnerabilities (from {len(df)} total)")
print(f"Date range: {df_filtered['dateAdded'].min()} to {df_filtered['dateAdded'].max()}")
print(f"Unique vendors: {df_filtered['vendorProject'].nunique()}")
Interactive Filtering Controls
Filtered dataset: 1536 vulnerabilities (from 1536 total) Date range: 2021-11-03 00:00:00 to 2026-03-05 00:00:00 Unique vendors: 253
InΒ [7]:
from IPython.display import display, HTML
display(HTML("<h2 style='color: #f5f5f5; margin-top: 0; padding-top: 0;'>Year-over-Year Trend Analysis</h2>"))
# Year-over-Year Analysis
df['year'] = df['dateAdded'].dt.year
df['month'] = df['dateAdded'].dt.month
# Annual summary
annual_summary = df.groupby('year').agg({
'cveID': 'count',
'vendorProject': 'nunique',
'product': 'nunique'
}).rename(columns={'cveID': 'total_cves', 'vendorProject': 'unique_vendors', 'product': 'unique_products'})
# Calculate year-over-year growth
annual_summary['yoy_growth'] = annual_summary['total_cves'].pct_change() * 100
annual_summary['yoy_growth_vendors'] = annual_summary['unique_vendors'].pct_change() * 100
# Ransomware trends by year
ransomware_by_year = df[df['knownRansomwareCampaignUse'] == 'Known'].groupby('year').size()
annual_summary['ransomware_cves'] = ransomware_by_year
annual_summary['ransomware_pct'] = (annual_summary['ransomware_cves'] / annual_summary['total_cves'] * 100).fillna(0)
print("Year-over-Year Summary:")
print(annual_summary.to_string())
print("\n" + "="*80)
# Create interactive YoY visualization
fig = make_subplots(
rows=2, cols=2,
subplot_titles=('Total CVEs by Year', 'Year-over-Year Growth Rate',
'Ransomware CVEs by Year', 'Monthly Distribution (All Years)'),
specs=[[{'type': 'bar'}, {'type': 'bar'}],
[{'type': 'bar'}, {'type': 'box'}]]
)
# Total CVEs by year
fig.add_trace(
go.Bar(x=annual_summary.index, y=annual_summary['total_cves'],
name='Total CVEs', marker_color='steelblue'),
row=1, col=1
)
# YoY Growth rate
fig.add_trace(
go.Bar(x=annual_summary.index[1:], y=annual_summary['yoy_growth'].iloc[1:],
name='YoY Growth %', marker_color='coral'),
row=1, col=2
)
# Ransomware CVEs
fig.add_trace(
go.Bar(x=annual_summary.index, y=annual_summary['ransomware_cves'].fillna(0),
name='Ransomware CVEs', marker_color='crimson'),
row=2, col=1
)
# Monthly distribution
monthly_data = df.groupby('month')['cveID'].count()
fig.add_trace(
go.Box(y=df['month'], name='Monthly Distribution', marker_color='lightseagreen'),
row=2, col=2
)
fig.update_layout(height=700, showlegend=False, hovermode='x unified')
show_plotly(fig, height=700)
Year-over-Year Trend Analysis
Year-over-Year Summary:
total_cves unique_vendors unique_products yoy_growth yoy_growth_vendors ransomware_cves ransomware_pct
year
2021 311 73 155 NaN NaN 76 24.437299
2022 555 98 239 78.456592 34.246575 123 22.162162
2023 187 67 112 -66.306306 -31.632653 43 22.994652
2024 186 64 109 -0.534759 -4.477612 42 22.580645
2025 245 99 146 31.720430 54.687500 25 10.204082
2026 52 29 35 -78.775510 -70.707071 3 5.769231
================================================================================
InΒ [8]:
from IPython.display import display, HTML
display(HTML("<h2 style='color: #f5f5f5; margin-top: 0; padding-top: 0;'>Key Statistics Summary</h2>"))
# Statistical Overview
df_vendor_count_sorted = df_vendor_count.head(TOP_N_VENDORS).rename(columns={'count': 'Count', 'vendorProject': 'vendorProject'})
df_product_count_sorted = df_product_count.head(TOP_N_PRODUCTS).rename(columns={'count': 'Count'})
print("=" * 60)
print("π KEY STATISTICS")
print("=" * 60)
print(f"\nπ¦ Dataset Overview:")
print(f" Total Vulnerabilities: {len(df):,}")
print(f" Unique Vendors: {df['vendorProject'].nunique():,}")
print(f" Unique Products: {df['product'].nunique():,}")
print(f" Date Range: {df['dateAdded'].min().strftime('%Y-%m-%d')} to {df['dateAdded'].max().strftime('%Y-%m-%d')}")
print(f"\nπ΄ Ransomware Threats:")
ransomware_known = df[df['knownRansomwareCampaignUse'] == 'Known'].shape[0]
ransomware_pct = (ransomware_known / len(df)) * 100
print(f" Known Ransomware Use: {ransomware_known:,} ({ransomware_pct:.1f}%)")
print(f" Unknown Ransomware Use: {len(df) - ransomware_known:,} ({100-ransomware_pct:.1f}%)")
print(f"\nπ
Timeline:")
print(f" Earliest Due Date: {df['dueDate'].min().strftime('%Y-%m-%d')}")
print(f" Latest Due Date: {df['dueDate'].max().strftime('%Y-%m-%d')}")
print(f" Median Due Date: {df['dueDate'].median().strftime('%Y-%m-%d')}")
print(f"\nπ Top 3 Vendors:")
for idx, row in df_vendor_count_sorted.head(3).iterrows():
pct = (row['Count'] / len(df)) * 100
print(f" {row['vendorProject']}: {row['Count']} ({pct:.1f}%)")
print(f"\nπ― Top 3 Products:")
for idx, row in df_product_count_sorted.head(3).iterrows():
pct = (row['Count'] / len(df)) * 100
print(f" {row['product']}: {row['Count']} ({pct:.1f}%)")
print("\n" + "=" * 60)
Key Statistics Summary
============================================================ π KEY STATISTICS ============================================================ π¦ Dataset Overview: Total Vulnerabilities: 1,536 Unique Vendors: 253 Unique Products: 626 Date Range: 2021-11-03 to 2026-03-05 π΄ Ransomware Threats: Known Ransomware Use: 312 (20.3%) Unknown Ransomware Use: 1,224 (79.7%) π Timeline: Earliest Due Date: 2021-11-17 Latest Due Date: 2026-03-26 Median Due Date: 2022-08-10 π Top 3 Vendors: Microsoft: 361 (23.5%) Apple: 90 (5.9%) Cisco: 85 (5.5%) π― Top 3 Products: Windows: 166 (10.8%) Multiple Products: 50 (3.3%) Chromium V8: 37 (2.4%) ============================================================
InΒ [9]:
from IPython.display import display, HTML
display(HTML("<h2 style='color: #f5f5f5; margin-top: 0; padding-top: 0;'>Vulnerability Trends Over Time</h2>"))
# Vulnerability Trends Over Time - Interactive
df_timeline = df.groupby(df['dueDate'].dt.to_period('M')).size().reset_index(name='Count')
df_timeline['dueDate'] = df_timeline['dueDate'].dt.to_timestamp()
df_timeline['Cumulative'] = df_timeline['Count'].cumsum()
fig = make_subplots(
rows=1, cols=2,
subplot_titles=('Vulnerability Due Dates by Month', 'Cumulative Vulnerabilities Over Time')
)
# Timeline plot
fig.add_trace(
go.Scatter(x=df_timeline['dueDate'], y=df_timeline['Count'],
mode='lines+markers', name='Monthly Count',
line=dict(width=2), marker=dict(size=6),
hovertemplate='<b>%{x|%Y-%m}</b><br>Count: %{y}<extra></extra>'),
row=1, col=1
)
# Cumulative plot
fig.add_trace(
go.Scatter(x=df_timeline['dueDate'], y=df_timeline['Cumulative'],
mode='lines', name='Cumulative',
fill='tozeroy', line=dict(width=2),
hovertemplate='<b>%{x|%Y-%m}</b><br>Total: %{y}<extra></extra>'),
row=1, col=2
)
fig.update_xaxes(title_text="Due Date", row=1, col=1)
fig.update_xaxes(title_text="Due Date", row=1, col=2)
fig.update_yaxes(title_text="Count", row=1, col=1)
fig.update_yaxes(title_text="Cumulative Count", row=1, col=2)
fig.update_layout(height=500, showlegend=False, hovermode='x unified')
show_plotly(fig)
Vulnerability Trends Over Time
InΒ [10]:
from IPython.display import display, HTML
display(HTML("<h2 style='color: #f5f5f5; margin-top: 0; padding-top: 0;'>Ransomware Analysis by Vendor</h2>"))
# Ransomware Analysis - Interactive
df_ransomware_vendors = df[df['knownRansomwareCampaignUse'] == 'Known'].groupby('vendorProject').size().reset_index(name='Count')
df_ransomware_vendors = df_ransomware_vendors.sort_values('Count', ascending=False).head(10)
# Ransomware percentage by top vendors
top_10_vendors = df_vendor_count_sorted.head(10)['vendorProject'].tolist()
df_vendor_ransomware = df[df['vendorProject'].isin(top_10_vendors)].groupby(['vendorProject', 'knownRansomwareCampaignUse']).size().unstack(fill_value=0)
df_vendor_ransomware['Total'] = df_vendor_ransomware.sum(axis=1)
df_vendor_ransomware['Ransomware_Pct'] = (df_vendor_ransomware.get('Known', 0) / df_vendor_ransomware['Total'] * 100)
df_vendor_ransomware = df_vendor_ransomware.sort_values('Ransomware_Pct', ascending=False)
fig = make_subplots(
rows=1, cols=2,
subplot_titles=('Top 10 Vendors with Known Ransomware Exploits',
'Ransomware Percentage in Top 10 Vendors')
)
# Top ransomware vendors
fig.add_trace(
go.Bar(y=df_ransomware_vendors['vendorProject'], x=df_ransomware_vendors['Count'],
orientation='h', marker_color='crimson',
hovertemplate='<b>%{y}</b><br>Count: %{x}<extra></extra>'),
row=1, col=1
)
# Ransomware percentage
fig.add_trace(
go.Bar(y=df_vendor_ransomware.index, x=df_vendor_ransomware['Ransomware_Pct'],
orientation='h', marker_color='orange',
hovertemplate='<b>%{y}</b><br>Ransomware: %{x:.1f}%<extra></extra>'),
row=1, col=2
)
fig.update_xaxes(title_text="Count", row=1, col=1)
fig.update_xaxes(title_text="Percentage (%)", row=1, col=2)
fig.update_yaxes(title_text="", row=1, col=1)
fig.update_yaxes(title_text="", row=1, col=2)
fig.update_layout(height=500, showlegend=False)
show_plotly(fig)
Ransomware Analysis by Vendor
InΒ [11]:
from IPython.display import display, HTML
display(HTML("<h2 style='color: #f5f5f5; margin-top: 0; padding-top: 0;'>Top Vulnerability Types (CWE Categories)</h2>"))
# CWE Category Insights - Interactive
cwe_descriptions = {
'CWE-20': 'Improper Input Validation',
'CWE-78': 'OS Command Injection',
'CWE-416': 'Use After Free',
'CWE-787': 'Out-of-bounds Write',
'CWE-119': 'Buffer Errors',
'CWE-22': 'Path Traversal',
'CWE-94': 'Code Injection',
'CWE-502': 'Deserialization of Untrusted Data',
'CWE-843': 'Type Confusion',
'CWE-264': 'Permissions/Privileges',
'CWE-284': 'Improper Access Control',
'CWE-287': 'Improper Authentication',
'CWE-306': 'Missing Authentication',
'CWE-79': 'Cross-site Scripting (XSS)',
'CWE-200': 'Information Exposure',
'CWE-89': 'SQL Injection'
}
# Extract CWE ID from cwes field
df['CWE'] = df['cwes'].str.extract(r'(CWE-\d+)', expand=False)
significant_cwes = df['CWE'].value_counts().head(15).reset_index()
significant_cwes.columns = ['CWE', 'Count']
# Add descriptions to significant CWEs
significant_cwes_labeled = significant_cwes.copy()
significant_cwes_labeled['Description'] = significant_cwes_labeled['CWE'].map(cwe_descriptions).fillna('Other')
significant_cwes_labeled['Label'] = significant_cwes_labeled.apply(lambda x: f"{x['CWE']}: {x['Description']}", axis=1)
significant_cwes_labeled = significant_cwes_labeled.sort_values('Count', ascending=False)
# Color code by count
colors_map = {'high': '#d62728', 'medium': '#ff7f0e', 'low': '#2ca02c'}
significant_cwes_labeled['color'] = significant_cwes_labeled['Count'].apply(
lambda c: colors_map['high'] if c > 70 else colors_map['medium'] if c > 40 else colors_map['low']
)
# Create interactive bar chart
fig = go.Figure(data=[
go.Bar(
y=significant_cwes_labeled['Label'],
x=significant_cwes_labeled['Count'],
orientation='h',
marker_color=significant_cwes_labeled['color'],
hovertemplate='<b>%{y}</b><br>Count: %{x}<extra></extra>'
)
])
fig.update_layout(
title='Top Vulnerability Types (CWE Categories)',
xaxis_title='Count',
yaxis_title='',
height=600,
yaxis={'categoryorder':'total ascending'}
)
show_plotly(fig, height=600)
print("\nπ CWE Category Analysis:")
print(f" Most Common: {significant_cwes_labeled.iloc[0]['CWE']} - {significant_cwes_labeled.iloc[0]['Description']} ({significant_cwes_labeled.iloc[0]['Count']} vulnerabilities)")
print(f" These top {len(significant_cwes)} CWE categories account for {significant_cwes['Count'].sum():,} vulnerabilities")
Top Vulnerability Types (CWE Categories)
π CWE Category Analysis: Most Common: CWE-20 - Improper Input Validation (113 vulnerabilities) These top 15 CWE categories account for 851 vulnerabilities
InΒ [12]:
from IPython.display import display, HTML
display(HTML("<h2 style='color: #f5f5f5; margin-top: 0; padding-top: 0;'>Vendor-Product Vulnerability Heatmap</h2>"))
# Vendor-Product Heatmap - Interactive
# Get top 10 vendors and their top products
top_vendors_list = df_vendor_count_sorted.head(10)['vendorProject'].tolist()
df_vendor_product = df[df['vendorProject'].isin(top_vendors_list)].groupby(['vendorProject', 'product']).size().reset_index(name='Count')
# For each vendor, get top 5 products
top_vendor_products = []
for vendor in top_vendors_list:
vendor_data = df_vendor_product[df_vendor_product['vendorProject'] == vendor].nlargest(5, 'Count')
top_vendor_products.append(vendor_data)
df_heatmap = pd.concat(top_vendor_products)
# Create a pivot table
pivot_data = df_heatmap.pivot_table(index='product', columns='vendorProject', values='Count', fill_value=0)
# Create interactive heatmap
fig = go.Figure(data=go.Heatmap(
z=pivot_data.values,
x=pivot_data.columns,
y=pivot_data.index,
colorscale='YlOrRd',
hovertemplate='<b>Vendor:</b> %{x}<br><b>Product:</b> %{y}<br><b>Count:</b> %{z}<extra></extra>',
colorbar=dict(title='Vulnerabilities')
))
fig.update_layout(
title='Vendor-Product Vulnerability Heatmap (Top 5 Products per Top 10 Vendors)',
xaxis_title='Vendor',
yaxis_title='Product',
height=700,
xaxis={'tickangle': 45}
)
show_plotly(fig, height=700)
Vendor-Product Vulnerability Heatmap
InΒ [13]:
from IPython.display import display, HTML
display(HTML("<h2 style='color: #f5f5f5; margin-top: 0; padding-top: 0;'>Vendor Risk Profile: Volume vs Ransomware</h2>"))
# Market Share vs Vulnerability Count - Interactive Bubble Chart
# Compare vendor vulnerability counts with ransomware risk
top_20_vendors = df_vendor_count_sorted.head(20)
# Calculate ransomware percentage for each vendor
vendor_risk_data = []
for idx, row in top_20_vendors.iterrows():
vendor = row['vendorProject']
count = row['Count']
ransomware_count = df[(df['vendorProject'] == vendor) & (df['knownRansomwareCampaignUse'] == 'Known')].shape[0]
ransomware_pct = (ransomware_count / count) * 100 if count > 0 else 0
vendor_risk_data.append({
'vendor': vendor,
'count': count,
'ransomware_pct': ransomware_pct,
'ransomware_count': ransomware_count
})
df_bubble = pd.DataFrame(vendor_risk_data)
# Create interactive bubble chart
fig = px.scatter(df_bubble,
x='count',
y='ransomware_pct',
size='count',
color='vendor',
hover_name='vendor',
hover_data={
'count': True,
'ransomware_pct': ':.1f',
'ransomware_count': True,
'vendor': False
},
labels={
'count': 'Total Vulnerabilities',
'ransomware_pct': 'Ransomware Usage (%)',
'ransomware_count': 'Ransomware CVEs'
},
title='Vendor Vulnerability Profile: Volume vs Ransomware Risk')
fig.update_traces(marker=dict(line=dict(width=2, color='DarkSlateGrey')))
fig.update_layout(height=600, showlegend=True)
show_plotly(fig)
print("\nπ‘ Insight: Bubble size represents total vulnerabilities. Position shows ransomware exploitation risk.")
Vendor Risk Profile: Volume vs Ransomware
π‘ Insight: Bubble size represents total vulnerabilities. Position shows ransomware exploitation risk.
InΒ [14]:
from IPython.display import display, HTML
display(HTML("<h2 style='color: #f5f5f5; margin-top: 0; padding-top: 0;'>Security Recommendations & Action Items</h2>"))
# Priority Action Items
print("=" * 70)
print("π― PRIORITY ACTION ITEMS FOR SECURITY TEAMS")
print("=" * 70)
# 1. High-risk vendors with ransomware
print("\n1. π΄ HIGH-RISK VENDORS (High Volume + Ransomware Exploitation):")
high_risk = df_vendor_ransomware[df_vendor_ransomware['Ransomware_Pct'] > 25].sort_values('Ransomware_Pct', ascending=False)
for vendor in high_risk.index:
vuln_count = high_risk.loc[vendor, 'Total']
ransomware_pct = high_risk.loc[vendor, 'Ransomware_Pct']
print(f" β’ {vendor}: {int(vuln_count)} vulnerabilities ({ransomware_pct:.1f}% ransomware)")
# 2. Most critical CWE categories to address
print("\n2. π― CRITICAL VULNERABILITY TYPES TO PATCH:")
for idx, row in significant_cwes_labeled.tail(5).iterrows():
print(f" β’ {row['CWE']}: {row['Description']} ({row['Count']} instances)")
# 3. Products requiring immediate attention
print("\n3. β οΈ PRODUCTS NEEDING IMMEDIATE ATTENTION:")
critical_products = df_product_count_sorted.head(5)
for idx, row in critical_products.iterrows():
vendor = df[df['product'] == row['product']]['vendorProject'].mode()[0] if len(df[df['product'] == row['product']]) > 0 else 'Unknown'
print(f" β’ {row['product']} ({vendor}): {row['Count']} vulnerabilities")
# 4. Due date urgency
print("\n4. π
REMEDIATION URGENCY:")
current_date = pd.Timestamp.now()
overdue = df[df['dueDate'] < current_date]
upcoming_30 = df[(df['dueDate'] >= current_date) & (df['dueDate'] <= current_date + pd.Timedelta(days=30))]
print(f" β’ Overdue patches: {len(overdue)} vulnerabilities")
print(f" β’ Due within 30 days: {len(upcoming_30)} vulnerabilities")
print(f" β’ Oldest overdue: {(current_date - df['dueDate'].min()).days} days")
# 5. Ransomware-specific recommendations
print("\n5. π‘οΈ RANSOMWARE PROTECTION PRIORITIES:")
ransomware_vulns = df[df['knownRansomwareCampaignUse'] == 'Known']
top_ransomware_products = ransomware_vulns['product'].value_counts().head(3)
for product, count in top_ransomware_products.items():
print(f" β’ Patch {product}: {count} known ransomware exploits")
print("\n" + "=" * 70)
print("π‘ TIP: Focus on vendors with both high volume AND high ransomware %")
print("=" * 70)
Security Recommendations & Action Items
====================================================================== π― PRIORITY ACTION ITEMS FOR SECURITY TEAMS ====================================================================== 1. π΄ HIGH-RISK VENDORS (High Volume + Ransomware Exploitation): β’ Ivanti: 31 vulnerabilities (38.7% ransomware) β’ VMware: 26 vulnerabilities (34.6% ransomware) β’ Microsoft: 361 vulnerabilities (28.3% ransomware) β’ Oracle: 42 vulnerabilities (26.2% ransomware) 2. π― CRITICAL VULNERABILITY TYPES TO PATCH: β’ CWE-264: Permissions/Privileges (31 instances) β’ CWE-306: Missing Authentication (30 instances) β’ CWE-79: Cross-site Scripting (XSS) (29 instances) β’ CWE-284: Improper Access Control (29 instances) β’ CWE-122: Other (26 instances) 3. β οΈ PRODUCTS NEEDING IMMEDIATE ATTENTION: β’ Windows (Microsoft): 166 vulnerabilities β’ Multiple Products (Apple): 50 vulnerabilities β’ Chromium V8 (Google): 37 vulnerabilities β’ Internet Explorer (Microsoft): 34 vulnerabilities β’ Flash Player (Adobe): 33 vulnerabilities 4. π REMEDIATION URGENCY: β’ Overdue patches: 1521 vulnerabilities β’ Due within 30 days: 15 vulnerabilities β’ Oldest overdue: 1572 days 5. π‘οΈ RANSOMWARE PROTECTION PRIORITIES: β’ Patch Windows: 45 known ransomware exploits β’ Patch Exchange Server: 12 known ransomware exploits β’ Patch Multiple Products: 8 known ransomware exploits ====================================================================== π‘ TIP: Focus on vendors with both high volume AND high ransomware % ======================================================================
InΒ [15]:
from IPython.display import display, HTML
display(HTML("<h2 style='color: #f5f5f5; margin-top: 0; padding-top: 0;'>Risk Score Analysis for Critical Vulnerabilities</h2>"))
# Create interactive summary table for top vulnerabilities
print("\nπ TOP 10 MOST CRITICAL VULNERABILITY COMBINATIONS\n")
print("Ranking by: Vendor impact Γ Ransomware risk Γ CWE severity\n")
# Calculate risk scores
df_risk = df.copy()
df_risk['vendor_count'] = df_risk['vendorProject'].map(df_vendor_count.set_index('vendorProject')['count'])
df_risk['cwe_count'] = df_risk['cwes'].map(df_cwe_count.set_index('cwes')['count'])
df_risk['ransomware_score'] = df_risk['is_ransomware'] * 100
# Calculate composite risk score
df_risk['risk_score'] = (
(df_risk['vendor_count'] / df_risk['vendor_count'].max()) * 30 +
(df_risk['cwe_count'] / df_risk['cwe_count'].max()) * 30 +
df_risk['ransomware_score'] * 0.4
)
# Get top 10 highest risk
top_risk = df_risk.nlargest(10, 'risk_score')[['cveID', 'vendorProject', 'product',
'cwes', 'knownRansomwareCampaignUse',
'risk_score']].reset_index(drop=True)
print(top_risk.to_string(index=False))
print(f"\nβ οΈ These {len(top_risk)} CVEs represent the highest combined threat level")
print(" based on vendor prevalence, vulnerability type frequency, and ransomware usage.")
Risk Score Analysis for Critical Vulnerabilities
π TOP 10 MOST CRITICAL VULNERABILITY COMBINATIONS
Ranking by: Vendor impact Γ Ransomware risk Γ CWE severity
cveID vendorProject product cwes knownRansomwareCampaignUse risk_score
CVE-2016-0034 Microsoft Silverlight CWE-20 Known 99.662921
CVE-2021-42278 Microsoft Active Directory CWE-20 Known 99.662921
CVE-2017-0148 Microsoft SMBv1 server CWE-20 Known 99.662921
CVE-2017-0146 Microsoft Windows CWE-20 Known 99.662921
CVE-2017-0145 Microsoft SMBv1 CWE-20 Known 99.662921
CVE-2017-0144 Microsoft SMBv1 CWE-20 Known 99.662921
CVE-2017-0143 Microsoft Windows CWE-20 Known 99.662921
CVE-2019-0604 Microsoft SharePoint CWE-20 Known 99.662921
CVE-2025-29824 Microsoft Windows CWE-416 Known 98.988764
CVE-2013-2551 Microsoft Internet Explorer CWE-416 Known 98.988764
β οΈ These 10 CVEs represent the highest combined threat level
based on vendor prevalence, vulnerability type frequency, and ransomware usage.
InΒ [16]:
from IPython.display import display, HTML
display(HTML("<h2 style='color: #f5f5f5; margin-top: 0; padding-top: 0;'>Export Analysis Results</h2>"))
# Export Summary and Data
export_timestamp = datetime.now().strftime('%Y-%m-%d_%H%M%S')
print("\n" + "=" * 70)
print("π EXPORTING ANALYSIS RESULTS")
print("=" * 70)
# Create summary report
summary_report = {
'Analysis_Date': datetime.now().strftime('%Y-%m-%d %H:%M:%S'),
'Total_CVEs': len(df),
'Unique_Vendors': df['vendorProject'].nunique(),
'Unique_Products': df['product'].nunique(),
'Ransomware_CVEs': df[df['knownRansomwareCampaignUse'] == 'Known'].shape[0],
'Ransomware_Percentage': round((df[df['knownRansomwareCampaignUse'] == 'Known'].shape[0] / len(df)) * 100, 2),
'Top_Vendor': df_vendor_count_sorted.iloc[0]['vendorProject'],
'Top_Product': df_product_count_sorted.iloc[0]['product'],
'Most_Common_CWE': f"{significant_cwes.iloc[0]['CWE']}: {cwe_descriptions.get(significant_cwes.iloc[0]['CWE'], 'N/A')}",
'Date_Range': f"{df['dueDate'].min().strftime('%Y-%m-%d')} to {df['dueDate'].max().strftime('%Y-%m-%d')}"
}
# Enhanced vendor analysis
df_vendor_enhanced = df_vendor_count_sorted.copy()
df_vendor_enhanced['Ransomware_Count'] = df_vendor_enhanced['vendorProject'].apply(
lambda x: len(df[(df['vendorProject'] == x) & (df['knownRansomwareCampaignUse'] == 'Known')])
)
df_vendor_enhanced['Ransomware_Pct'] = (df_vendor_enhanced['Ransomware_Count'] / df_vendor_enhanced['Count'] * 100).round(1)
# Save CSV exports
summary_df = pd.DataFrame([summary_report])
summary_df.to_csv(f'analysis_summary_{export_timestamp}.csv', index=False)
df_vendor_enhanced.to_csv(f'vendor_analysis_enhanced_{export_timestamp}.csv', index=False)
# Excel multi-sheet export (if enabled)
if EXPORT_EXCEL:
with pd.ExcelWriter(f'kev_analysis_{export_timestamp}.xlsx', engine='openpyxl') as writer:
summary_df.to_excel(writer, sheet_name='Summary', index=False)
df_vendor_enhanced.to_excel(writer, sheet_name='Vendors', index=False)
df_product_count_sorted.to_excel(writer, sheet_name='Products', index=False)
significant_cwes_labeled.to_excel(writer, sheet_name='CWE Categories', index=False)
annual_summary.to_excel(writer, sheet_name='Year-over-Year', index=True)
print(f" β
Excel workbook: kev_analysis_{export_timestamp}.xlsx")
# JSON export (if enabled)
if EXPORT_JSON:
import json
export_json = {
'metadata': summary_report,
'vendors': df_vendor_enhanced.to_dict('records'),
'products': df_product_count_sorted.head(20).to_dict('records'),
'cwe_categories': significant_cwes_labeled.to_dict('records'),
'annual_trends': annual_summary.to_dict('index')
}
with open(f'kev_analysis_{export_timestamp}.json', 'w') as f:
json.dump(export_json, f, indent=2)
print(f" β
JSON export: kev_analysis_{export_timestamp}.json")
print(f"\nβ
CSV exports saved")
print(f" β’ analysis_summary_{export_timestamp}.csv")
print(f" β’ vendor_analysis_enhanced_{export_timestamp}.csv")
print("\n" + "=" * 70)
print("π ANALYSIS COMPLETE")
print("=" * 70)
print(f"\nπ Summary: {len(df):,} vulnerabilities analyzed across {df['vendorProject'].nunique()} vendors")
print(f" Cache file: {CACHE_FILE}")
print(f" To refresh: Re-run cell 2 to fetch latest data from CISA")
print("\n" + "=" * 70)
Export Analysis Results
====================================================================== π EXPORTING ANALYSIS RESULTS ====================================================================== β Excel workbook: kev_analysis_2026-03-08_001611.xlsx β JSON export: kev_analysis_2026-03-08_001611.json β CSV exports saved β’ analysis_summary_2026-03-08_001611.csv β’ vendor_analysis_enhanced_2026-03-08_001611.csv ====================================================================== π ANALYSIS COMPLETE ====================================================================== π Summary: 1,536 vulnerabilities analyzed across 253 vendors Cache file: cisa_kev_cache.csv To refresh: Re-run cell 2 to fetch latest data from CISA ======================================================================