Preamble
import numpy as np # for multi-dimensional containers
import pandas as pd # for DataFrames
import plotly.graph_objects as go # for data visualisation
import matplotlib.pyplot as plt
from wordcloud import WordCloud # visualising word clouds
Dataset
data = pd.read_csv(
"https://datacrayon.com/datasets/enisa_vuln.csv", low_memory=False, index_col="id"
)
data["date_published"] = pd.to_datetime(data["date_published"]).dt.date
data.tail()
source_db | source_db_id | cna | cvss3_bscore | cvss3_severity | cvss3_impact | cvss3_exploitability | cvss3_attack | cvss3_complexity | cvss3_priveleges | ... | EOS_product | EOS_version | EOS_date | Type | 0day | Today | 0day_low_y | 0day_upper | Today_low | Today_upper | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
id | |||||||||||||||||||||
27466 | nvd.nist.gov | CVE-2018-20877 | MITRE Corporation | 5.4 | MEDIUM | 2.7 | 2.3 | Network | Low | Low | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
27467 | nvd.nist.gov | CVE-2018-20876 | MITRE Corporation | 5.4 | MEDIUM | 2.7 | 2.3 | Network | Low | Low | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
27468 | nvd.nist.gov | CVE-2018-20875 | MITRE Corporation | 5.4 | MEDIUM | 2.7 | 2.3 | Network | Low | Low | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
27469 | nvd.nist.gov | CVE-2018-20874 | MITRE Corporation | 5.4 | MEDIUM | 2.7 | 2.3 | Network | Low | Low | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
27470 | nvd.nist.gov | CVE-2018-20873 | MITRE Corporation | 3.3 | LOW | 1.4 | 1.8 | Local | Low | Low | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
5 rows × 57 columns
Introduction
data.info()
Int64Index: 27471 entries, 0 to 27470 Data columns (total 57 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 source_db 27471 non-null object 1 source_db_id 27471 non-null object 2 cna 27471 non-null object 3 cvss3_bscore 27471 non-null float64 4 cvss3_severity 27471 non-null object 5 cvss3_impact 27471 non-null float64 6 cvss3_exploitability 27471 non-null float64 7 cvss3_attack 27471 non-null object 8 cvss3_complexity 27471 non-null object 9 cvss3_priveleges 27471 non-null object 10 cvss3_user_interaction 27471 non-null object 11 cvss3_scope 27471 non-null object 12 cvss3_confidentiality 27471 non-null object 13 cvss3_integrity 27471 non-null object 14 cvss3_availability 27471 non-null object 15 cvss2_bscore 27471 non-null float64 16 cvss2_severity 27471 non-null object 17 cvss2_impact 27471 non-null float64 18 cvss2_exploitability 27471 non-null float64 19 cvss2_access 27471 non-null object 20 cvss2_complexity 27471 non-null object 21 cvss2_authentication 27471 non-null object 22 cvss2_confidentiality 27471 non-null object 23 cvss2_integrity 27471 non-null object 24 cvss2_availability 27471 non-null object 25 cwe 27471 non-null object 26 capec 21335 non-null object 27 cpe 27462 non-null object 28 description 27471 non-null object 29 n_exploits 27471 non-null int64 30 technique_id 8077 non-null object 31 tactic 8067 non-null object 32 date_published 27471 non-null object 33 date_modified 27471 non-null object 34 history_summary 308 non-null object 35 date_exploit 2371 non-null object 36 0day_low_x 3390 non-null object 37 0day_high 3353 non-null float64 38 today_low 3389 non-null float64 39 today_high 3389 non-null float64 40 exp_type 2371 non-null object 41 platform 2371 non-null object 42 exp_verified 2371 non-null object 43 vendor 23110 non-null object 44 product 23108 non-null object 45 sector 136 non-null object 46 incident 2169 non-null float64 47 EOS_product 381 non-null object 48 EOS_version 381 non-null object 49 EOS_date 381 non-null object 50 Type 3369 non-null object 51 0day 3369 non-null object 52 Today 3369 non-null object 53 0day_low_y 3369 non-null float64 54 0day_upper 3369 non-null float64 55 Today_low 3369 non-null float64 56 Today_upper 3369 non-null float64 dtypes: float64(14), int64(1), object(42) memory usage: 12.2+ MB
print(f"Earliest date {data.date_published.min()}")
print(f"Latest date {data.date_published.max()}")
print(f"Over {(data.date_published.max() - data.date_published.min()).days} days")
Earliest date 2018-01-01 Latest date 2019-08-30 Over 606 days
Two numerical features, cvss_score3
and cvss_score2
. There is a difference in severity classification and base score range between the two metrics. E.g. a score of
data.describe()
cvss3_bscore | cvss3_impact | cvss3_exploitability | cvss2_bscore | cvss2_impact | cvss2_exploitability | n_exploits | 0day_high | today_low | today_high | incident | 0day_low_y | 0day_upper | Today_low | Today_upper | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
count | 27471.000000 | 27471.000000 | 27471.000000 | 27471.000000 | 27471.000000 | 27471.000000 | 27471.000000 | 3353.000000 | 3389.000000 | 3389.000000 | 2169.000000 | 3369.000000 | 3369.0 | 3369.000000 | 3369.000000 |
mean | 7.339012 | 4.416301 | 2.783615 | 5.749459 | 5.229668 | 8.122318 | 0.131666 | 24634.059052 | 3375.627029 | 8068.161700 | 1.946519 | 13223.211636 | inf | 3538.438706 | 8420.302760 |
std | 1.612830 | 1.479722 | 0.913636 | 1.888787 | 2.552895 | 2.005292 | 1.286410 | 26930.853685 | 4372.583724 | 9670.596732 | 2.327816 | 18106.114904 | NaN | 4605.882592 | 10109.008754 |
min | 1.800000 | 1.400000 | 0.100000 | 1.200000 | 2.900000 | 1.200000 | 0.000000 | 1000.000000 | 0.000000 | 1000.000000 | 0.000000 | 0.000000 | 1000.0 | 0.000000 | 1000.000000 |
25% | 6.100000 | 3.600000 | 1.800000 | 4.300000 | 2.900000 | 8.000000 | 0.000000 | 5000.000000 | 0.000000 | 1000.000000 | 1.000000 | 2000.000000 | 5000.0 | 0.000000 | 1000.000000 |
50% | 7.500000 | 3.600000 | 2.800000 | 5.000000 | 4.900000 | 8.600000 | 0.000000 | 25000.000000 | 2000.000000 | 5000.000000 | 2.000000 | 10000.000000 | 25000.0 | 2000.000000 | 5000.000000 |
75% | 8.800000 | 5.900000 | 3.900000 | 7.200000 | 6.400000 | 10.000000 | 0.000000 | 25000.000000 | 5000.000000 | 10000.000000 | 2.000000 | 10000.000000 | 25000.0 | 5000.000000 | 10000.000000 |
max | 10.000000 | 6.000000 | 3.900000 | 10.000000 | 10.000000 | 10.000000 | 123.000000 | 100000.000000 | 25000.000000 | 50000.000000 | 57.000000 | 100000.000000 | inf | 25000.000000 | 50000.000000 |
How many missing vulnerability scores?
data.isna().sum()
source_db 0 source_db_id 0 cna 0 cvss3_bscore 0 cvss3_severity 0 cvss3_impact 0 cvss3_exploitability 0 cvss3_attack 0 cvss3_complexity 0 cvss3_priveleges 0 cvss3_user_interaction 0 cvss3_scope 0 cvss3_confidentiality 0 cvss3_integrity 0 cvss3_availability 0 cvss2_bscore 0 cvss2_severity 0 cvss2_impact 0 cvss2_exploitability 0 cvss2_access 0 cvss2_complexity 0 cvss2_authentication 0 cvss2_confidentiality 0 cvss2_integrity 0 cvss2_availability 0 cwe 0 capec 6136 cpe 9 description 0 n_exploits 0 technique_id 19394 tactic 19404 date_published 0 date_modified 0 history_summary 27163 date_exploit 25100 0day_low_x 24081 0day_high 24118 today_low 24082 today_high 24082 exp_type 25100 platform 25100 exp_verified 25100 vendor 4361 product 4363 sector 27335 incident 25302 EOS_product 27090 EOS_version 27090 EOS_date 27090 Type 24102 0day 24102 Today 24102 0day_low_y 24102 0day_upper 24102 Today_low 24102 Today_upper 24102 dtype: int64
fig = go.Figure()
fig.add_trace(go.Box(y=data.cvss3_bscore, name="CVSS3"))
fig.add_trace(go.Box(y=data.cvss2_bscore, name="CVSS2"))
fig.show()
CVSS distributions
fig = go.Figure()
fig.add_trace(go.Histogram(x=data.cvss3_bscore, name="CVSS3"))
fig.add_trace(go.Histogram(x=data.cvss2_bscore, name="CVSS2"))
fig.update_traces(opacity=0.75)
fig.show()
The summary field has some description of the event relating to the vulnerability detected. This could potentially be quantified and used for auxiliary analysis.
wordcloud = WordCloud(
width=600, height=600, background_color="white"
).generate(str(data.description.values))
plt.figure(figsize=(10, 10), dpi=80)
plt.imshow(wordcloud)
plt.axis("off")
plt.show()
Timeseries
data = data.set_index("date_published", drop=False)
data.sort_index(inplace=True)
data.tail()
source_db | source_db_id | cna | cvss3_bscore | cvss3_severity | cvss3_impact | cvss3_exploitability | cvss3_attack | cvss3_complexity | cvss3_priveleges | ... | EOS_product | EOS_version | EOS_date | Type | 0day | Today | 0day_low_y | 0day_upper | Today_low | Today_upper | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
date_published | |||||||||||||||||||||
2019-08-30 | nvd.nist.gov | CVE-2019-15817 | MITRE Corporation | 6.1 | MEDIUM | 2.7 | 2.8 | Network | Low | None | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
2019-08-30 | nvd.nist.gov | CVE-2019-15818 | MITRE Corporation | 6.1 | MEDIUM | 2.7 | 2.8 | Network | Low | None | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
2019-08-30 | nvd.nist.gov | CVE-2019-15819 | MITRE Corporation | 9.8 | CRITICAL | 5.9 | 3.9 | Network | Low | None | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
2019-08-30 | nvd.nist.gov | CVE-2019-1968 | Cisco Systems, Inc. | 7.5 | HIGH | 3.6 | 3.9 | Network | Low | None | ... | NaN | NaN | NaN | Router Operating System |
|
|
10000.0 | 25000.0 | 2000.0 | 5000.0 |
2019-08-30 | nvd.nist.gov | CVE-2019-5608 | FreeBSD | 9.8 | CRITICAL | 5.9 | 3.9 | Network | Low | None | ... | NaN | NaN | NaN | Operating System |
|
|
5000.0 | 10000.0 | 1000.0 | 2000.0 |
5 rows × 57 columns
Vunerabilities published daily
daily_frequency = data.date_published.value_counts()
daily_frequency.sort_index(inplace=True)
fig = go.Figure()
fig.add_trace(
go.Scatter(x=daily_frequency.index.values, y=daily_frequency.values)
)
fig.show()
Cumulative mean
daily_frequency = data.date_published.value_counts()
daily_frequency.sort_index(inplace=True)
fig = go.Figure()
fig.add_trace(
go.Scatter(
x=data.date_published,
y=data.cvss3_bscore.expanding().mean(),
name="CVSS3",
)
)
fig.add_trace(
go.Scatter(
x=data.date_published,
y=data.cvss2_bscore.expanding().mean(),
name="CVSS2",
)
)
fig.show()