Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Bayes net #85

Draft
wants to merge 8 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
158 changes: 158 additions & 0 deletions notebooks/bayes-net-editor.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,158 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 2,
"id": "bc83eb2c-32c1-4d5a-9e4f-9e0e0378c8a6",
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "660a13e6881c4d69b018e42528d36b35",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Box(children=(FloatSlider(value=0.2, continuous_update=False, description='answer_is_bogon', max=1.0, step=0.0…"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"import ipywidgets as widgets\n",
"from ipywidgets import VBox, Box, Layout\n",
"from IPython.display import display\n",
"\n",
"slider_map = {}\n",
"\n",
"values = [\n",
"\"answer_is_bogon\", \n",
"\"answer_matches_probe_asn\", \n",
"\"answer_matches_probe_cc\",\n",
"\"answer_is_cloud_provider\",\n",
"\"answer_other\"\n",
"]\n",
"\n",
"class Changer:\n",
" change_in_progress = False\n",
" \n",
" def callback(self, value):\n",
" if self.change_in_progress:\n",
" return\n",
" self.change_in_progress = True\n",
" changed_slider = value['owner'].description\n",
" other_slider_sum = 0\n",
" for k, s in slider_map.items():\n",
" if k == changed_slider:\n",
" continue\n",
" other_slider_sum += s.value\n",
" new_other_slider_sum = 1 - value['new']\n",
" mult_factor = new_other_slider_sum/other_slider_sum\n",
" for k, s in slider_map.items():\n",
" if k == changed_slider:\n",
" continue\n",
" s.value = mult_factor * s.value\n",
" self.change_in_progress = False\n",
" \n",
"changer = Changer()\n",
"slider_values = {v: 1.0/len(values) for v in values}\n",
"\n",
"for v in values:\n",
" slider = widgets.FloatSlider(\n",
" value=1/len(values), \n",
" min=0.0,\n",
" max=1.0,\n",
" description=v,\n",
" step=0.001,\n",
" continuous_update=False\n",
" )\n",
" slider.observe(changer.callback, names='value')\n",
" slider_map[v] = slider\n",
"\n",
"display(\n",
" Box(children=list(slider_map.values()), layout=Layout(display='flex', flex_flow='column', align_items='stretch'))\n",
")"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "97cc09d2-c71b-49a5-9a1c-3f6ace5e6b1f",
"metadata": {},
"outputs": [],
"source": [
"def validate_matrix(matrix):\n",
" cols = len(matrix[0])\n",
" rows = len(matrix)\n",
" valid = True\n",
" for col_idx in range(cols):\n",
" col_data = []\n",
" for row_idx in range(rows):\n",
" col_data.append(matrix[row_idx][col_idx])\n",
" if sum(col_data) != 1:\n",
" print(f\"col #{col_idx} INVALID ({sum(col_data)}!=1)\")\n",
" print(col_data)\n",
" valid = False\n",
" if valid:\n",
" print(\"ALL OK\")"
]
},
{
"cell_type": "code",
"execution_count": 10,
"id": "fee25b03-eec8-4db4-9021-ce92eee94dbf",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"ALL OK\n"
]
}
],
"source": [
"validate_matrix([\n",
" # consistent inconsistent\n",
" # \"none\", \"bad_certificate\", \"connection_reset_after_ch\", \"timeout_after_ch\", \"timeout\" \"connection_reset\" \"other\" \"down\"\n",
" [0.02, 0.95, 0.90, 0.85, 0.55, 0.65, 0.07, 0.07, 0.02, 0.02, 0.02, 0.02, 0.02, 0.02, 0.02, 0.02], # blocked\n",
" [0.95, 0.01, 0.01, 0.01, 0.02, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01], # ok\n",
" [0.02, 0.03, 0.07, 0.09, 0.40, 0.25, 0.07, 0.07, 0.05, 0.05, 0.05, 0.05, 0.05, 0.05, 0.05, 0.05], # down\n",
" [0.01, 0.01, 0.02, 0.05, 0.03, 0.09, 0.85, 0.85, 0.92, 0.92, 0.92, 0.92, 0.92, 0.92, 0.92, 0.92], # unknown\n",
" ])"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "1f8a25ac-80fe-4124-af4e-d5455faf71d2",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.12.5"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
7 changes: 7 additions & 0 deletions notebooks/bn-notes.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
Useful links:

- https://maucher.pages.mi.hdm-stuttgart.de/artificial-intelligence/05aBayesNetGarage.html
- https://github.com/paulgovan/BayesianNetwork?tab=readme-ov-file
- https://www.cs.washington.edu/dm/vfml/appendixes/bif.htm
- https://pgmpy.org/models/bayesiannetwork.html
- https://erdogant.github.io/bnlearn/pages/html/index.html
48 changes: 48 additions & 0 deletions notebooks/pyproject.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
[build-system]
requires = ["hatchling"]
build-backend = "hatchling.build"

[project]
name = "oonidata-notebooks"
dynamic = ["version"]
description = ''
readme = "README.md"
requires-python = ">=3.8"
license = "MIT"
keywords = []
authors = [
{ name = "Arturo Filastò", email = "arturo@filasto.net" },
]
classifiers = [
"Development Status :: 4 - Beta",
"Programming Language :: Python",
"Programming Language :: Python :: 3.8",
"Programming Language :: Python :: 3.9",
"Programming Language :: Python :: 3.10",
"Programming Language :: Python :: 3.11",
"Programming Language :: Python :: 3.12",
"Programming Language :: Python :: Implementation :: CPython",
"Programming Language :: Python :: Implementation :: PyPy",
]
dependencies = [
"jupyterlab",
"pandas",
"numpy",
"bnlearn",
"d3blocks",
"pygraphviz",
"bokeh",
"ipywidgets"
]

[tool.hatch.envs.types]
extra-dependencies = [
"mypy>=1.0.0",
]

[tool.coverage.report]
exclude_lines = [
"no cov",
"if __name__ == .__main__.:",
"if TYPE_CHECKING:",
]
1 change: 1 addition & 0 deletions notebooks/web-analysis-bn.ipynb
151 changes: 151 additions & 0 deletions notebooks/web-analysis-full.bif
Original file line number Diff line number Diff line change
@@ -0,0 +1,151 @@
network unknown {
}
variable DNS Answer {
type discrete [ 5 ] { answer_is_bogon, answer_matches_probe_asn, answer_matches_probe_cc, answer_is_cloud_provider, answer_other };
}
variable DNS Answer Consistency {
type discrete [ 2 ] { consistent, inconsistent };
}
variable DNS Answer TLS Consistency {
type discrete [ 3 ] { tls_consistent, tls_inconsistent, unknown };
}
variable DNS Ctrl Failure {
type discrete [ 3 ] { none, nxdomain, server_failure };
}
variable DNS Failure Consistency {
type discrete [ 3 ] { ok, expected_not_ok, unexpected_not_ok };
}
variable DNS Outcome {
type discrete [ 3 ] { blocked, ok, down };
}
variable DNS VP Failure {
type discrete [ 3 ] { none, nxdomain, server_failure };
}
variable TCP Ctrl Failure {
type discrete [ 4 ] { none, connection_reset, timeout, other };
}
variable TCP Outcome {
type discrete [ 4 ] { blocked, ok, down, unknown };
}
variable TCP VP Result {
type discrete [ 5 ] { none, connection_reset, timeout, other, down };
}
variable TLS Ctrl Failure {
type discrete [ 3 ] { none, bad_certificate, other };
}
variable TLS Outcome {
type discrete [ 4 ] { blocked, ok, down, unknown };
}
variable TLS VP Result {
type discrete [ 8 ] { none, bad_certificate, connection_reset_after_ch, timeout_after_ch, timeout, connection_reset, other, down };
}
probability ( DNS Answer ) {
table 0.004, 0.006, 0.06, 0.73, 0.2 ;
}
probability ( DNS Answer Consistency | DNS Answer, DNS Answer TLS Consistency ) {
( answer_is_bogon, tls_consistent ) 0.99, 0.01;
( answer_is_bogon, tls_inconsistent ) 0.001, 0.999;
( answer_is_bogon, unknown ) 0.001, 0.999;
( answer_matches_probe_asn, tls_consistent ) 0.999, 0.001;
( answer_matches_probe_asn, tls_inconsistent ) 0.01, 0.99;
( answer_matches_probe_asn, unknown ) 0.3, 0.7;
( answer_matches_probe_cc, tls_consistent ) 0.999, 0.001;
( answer_matches_probe_cc, tls_inconsistent ) 0.001, 0.999;
( answer_matches_probe_cc, unknown ) 0.3, 0.7;
( answer_is_cloud_provider, tls_consistent ) 0.999, 0.001;
( answer_is_cloud_provider, tls_inconsistent ) 0.01, 0.99;
( answer_is_cloud_provider, unknown ) 0.8, 0.2;
( answer_other, tls_consistent ) 0.999, 0.001;
( answer_other, tls_inconsistent ) 0.001, 0.999;
( answer_other, unknown ) 0.2, 0.8;

}
probability ( DNS Answer TLS Consistency | DNS Answer ) {
( answer_is_bogon ) 0.01, 0.05, 0.94;
( answer_matches_probe_asn ) 0.8, 0.15, 0.05;
( answer_matches_probe_cc ) 0.75, 0.2, 0.05;
( answer_is_cloud_provider ) 0.9, 0.08, 0.02;
( answer_other ) 0.94, 0.04, 0.02;

}
probability ( DNS Ctrl Failure ) {
table 0.994, 0.004, 0.002 ;
}
probability ( DNS Failure Consistency | DNS Ctrl Failure, DNS VP Failure ) {
( none, none ) 0.99, 0.01, 0.01;
( none, nxdomain ) 0.01, 0.05, 0.94;
( none, server_failure ) 0.01, 0.05, 0.94;
( nxdomain, none ) 0.94, 0.01, 0.05;
( nxdomain, nxdomain ) 0.01, 0.98, 0.01;
( nxdomain, server_failure ) 0.01, 0.92, 0.07;
( server_failure, none ) 0.94, 0.01, 0.05;
( server_failure, nxdomain ) 0.01, 0.92, 0.07;
( server_failure, server_failure ) 0.01, 0.98, 0.01;

}
probability ( DNS Outcome | DNS Answer Consistency, DNS Failure Consistency ) {
( consistent, ok ) 0.01, 0.99, 0.0;
( consistent, expected_not_ok ) 0.01, 0.01, 0.98;
( consistent, unexpected_not_ok ) 0.4, 0.4, 0.2;
( inconsistent, ok ) 0.8, 0.2, 0.0;
( inconsistent, expected_not_ok ) 0.2, 0.05, 0.75;
( inconsistent, unexpected_not_ok ) 0.9, 0.01, 0.09;

}
probability ( DNS VP Failure | DNS Ctrl Failure ) {
( none ) 0.994, 0.004, 0.002;
( nxdomain ) 0.02, 0.98, 0.01;
( server_failure ) 0.02, 0.01, 0.98;

}
probability ( TCP Ctrl Failure ) {
table 0.98, 0.001, 0.014, 0.005 ;
}
probability ( TCP Outcome | DNS Answer Consistency, TCP VP Result ) {
( consistent, none ) 0.02, 0.95, 0.02, 0.01;
( consistent, connection_reset ) 0.95, 0.01, 0.03, 0.01;
( consistent, timeout ) 0.9, 0.01, 0.08, 0.01;
( consistent, other ) 0.85, 0.01, 0.09, 0.05;
( consistent, down ) 0.01, 0.08, 0.01, 0.9;
( inconsistent, none ) 0.07, 0.01, 0.07, 0.85;
( inconsistent, connection_reset ) 0.07, 0.01, 0.07, 0.85;
( inconsistent, timeout ) 0.07, 0.01, 0.07, 0.85;
( inconsistent, other ) 0.07, 0.01, 0.07, 0.85;
( inconsistent, down ) 0.05, 0.01, 0.09, 0.85;

}
probability ( TCP VP Result | TCP Ctrl Failure ) {
( none ) 0.98, 0.001, 0.01, 0.009, 0.009;
( connection_reset ) 0.02, 0.14, 0.05, 0.07, 0.72;
( timeout ) 0.01, 0.02, 0.1, 0.02, 0.85;
( other ) 0.01, 0.06, 0.06, 0.12, 0.75;

}
probability ( TLS Ctrl Failure ) {
table 0.98, 0.005, 0.015 ;
}
probability ( TLS Outcome | DNS Answer Consistency, TLS VP Result ) {
( consistent, none ) 0.02, 0.95, 0.02, 0.01;
( consistent, bad_certificate ) 0.95, 0.01, 0.03, 0.01;
( consistent, connection_reset_after_ch ) 0.9, 0.01, 0.07, 0.02;
( consistent, timeout_after_ch ) 0.85, 0.01, 0.09, 0.05;
( consistent, timeout ) 0.55, 0.02, 0.4, 0.03;
( consistent, connection_reset ) 0.65, 0.01, 0.25, 0.09;
( consistent, other ) 0.07, 0.01, 0.07, 0.85;
( consistent, down ) 0.07, 0.01, 0.07, 0.85;
( inconsistent, none ) 0.02, 0.01, 0.05, 0.92;
( inconsistent, bad_certificate ) 0.02, 0.01, 0.05, 0.92;
( inconsistent, connection_reset_after_ch ) 0.02, 0.01, 0.05, 0.92;
( inconsistent, timeout_after_ch ) 0.02, 0.01, 0.05, 0.92;
( inconsistent, timeout ) 0.02, 0.01, 0.05, 0.92;
( inconsistent, connection_reset ) 0.02, 0.01, 0.05, 0.92;
( inconsistent, other ) 0.02, 0.01, 0.05, 0.92;
( inconsistent, down ) 0.02, 0.01, 0.05, 0.92;

}
probability ( TLS VP Result | TLS Ctrl Failure ) {
( none ) 0.97, 0.005, 0.005, 0.005, 0.005, 0.005, 0.004, 0.001;
( bad_certificate ) 0.02, 0.1, 0.01, 0.01, 0.01, 0.01, 0.02, 0.82;
( other ) 0.01, 0.01, 0.05, 0.05, 0.05, 0.05, 0.12, 0.66;

}
Loading
Loading