Skip to content

Commit

Permalink
Merge pull request #224 from mldiego/master
Browse files Browse the repository at this point in the history
DSN 2024 - malware examples
  • Loading branch information
mldiego authored Jun 12, 2024
2 parents 05faccb + b811936 commit 318e743
Show file tree
Hide file tree
Showing 20 changed files with 3,336 additions and 26 deletions.
Binary file not shown.
Binary file not shown.
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
function t = reach()
% function t = reach()

%% Reachability analysis of the Unicycle (benchmark 10)

Expand Down Expand Up @@ -43,6 +43,7 @@
% Compute plant reachable set
init_set = plantReach(plant,init_set,input_set,'lin');
reachAll = [reachAll init_set];
disp(i);
end
t = toc(t);

Expand Down Expand Up @@ -79,7 +80,7 @@
exportgraphics(f1,'unicycle_3v4.pdf', 'ContentType', 'vector');
end

end
% end

%% Helper function
function init_set = plantReach(plant,init_set,input_set,algoC)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,19 +4,19 @@

% Load components and set reachability parameters
net = load_NN_from_mat('controllerTora.mat');
reachStep = 0.1;
reachStep = 0.05;
controlPeriod = 1;
plant = NonLinearODE(4,1,@dynamics9, reachStep, controlPeriod, eye(4));
tFinal = 4;
tFinal = 9;
% Initial set
lb = [0.6; -0.7; -0.4; 0.5];
% ub = [0.7; -0.6; -0.3; 0.6];
ub = [0.6001; -0.7; -0.4; 0.5];
plant.options.tensorOrder = 2;
plant.options.tensorOrder = 3;
plant.options.taylorTerms = 4;
plant.options.zonotopeOrder = 200;
plant.options.errorOrder = 100;
plant.options.intermediateOrder = 200;
plant.options.errorOrder = 10;
plant.options.intermediateOrder = 50;


%% Reachability analysis
Expand All @@ -39,7 +39,7 @@
% init_set = init_set.toStar();
reachAll = [reachAll init_set];
end
t = toc(t);
t = toc(t); % split after 8 steps again? 9?

% Save results
if is_codeocean
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,8 @@

% Initial set
% lb = [1.0; 1.0;1.0;1.0];
lb = [1.29; 1.29;1.29;1.29];
ub = [1.3; 1.3;1.3;1.3];
lb = [1.29; 1.29; 1.29;1.29];
ub = [1.3 ; 1.3 ; 1.3 ;1.3 ];
% ub = [1.01; 1.01;1.01;1.01];
init_set = Star(lb,ub);
% Input set
Expand All @@ -30,7 +30,7 @@
reachAll = init_set;
% Execute reachabilty analysis
% for i =1:steps
num_steps = 4;
num_steps = 10; % 20 % begin split at 8 steps or so, we may be able to get something done but it would take forever
reachOptions.reachMethod = 'approx-star';
t = tic;
for i=1:num_steps
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
function t = reach_more()
% function t = reach_more()

%% Reachability analysis of Double Pendulum Benchmark

Expand Down Expand Up @@ -55,17 +55,17 @@
plant.get_interval_sets;

f = figure;
Star.plotBoxes_2D_noFill(plant.intermediate_reachSet,1,2,'b');
hold on;
rectangle('Position',[-1.5,-1.5,3,3],'FaceColor',[0 0.2 0 0.5],'EdgeColor','y', 'LineWidth',0.1)
hold on;
Star.plotBoxes_2D_noFill(plant.intermediate_reachSet,1,2,'b');
grid;
xlabel('x1');
ylabel('x2');

f1 = figure;
Star.plotBoxes_2D_noFill(plant.intermediate_reachSet,3,4,'b');
rectangle('Position',[-1.5,-1.5,3,3],'FaceColor',[0 0.2 0 0.5],'EdgeColor','y', 'LineWidth',0.1);
hold on;
rectangle('Position',[-1.5,-1.5,3,3],'FaceColor',[0 0.2 0 0.5],'EdgeColor','y', 'LineWidth',0.1)
Star.plotBoxes_2D_noFill(plant.intermediate_reachSet,3,4,'b');
grid;
xlabel('x3');
ylabel('x4');
Expand All @@ -79,7 +79,7 @@
exportgraphics(f1,'double_pendulum_more_3v4.pdf','ContentType', 'vector');
end

end
% end

%% Helper function
function init_set = plantReach(plant,init_set,input_set,algoC)
Expand Down
24 changes: 24 additions & 0 deletions code/nnv/examples/Submission/FORMALISE2024/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
# Case Study: Neural Network Malware Detection Verification for Feature and Image Datasets

Publication: https://doi.org/10.1145/3644033.3644372

Code: https://github.com/pkrobinette/verify_malware

#### Citation
```
@inproceedings{10.1145/3644033.3644372,
author = {Robinette, Preston K. and Manzanas Lopez, Diego and Serbinowska, Serena and Leach, Kevin and Johnson, Taylor T},
title = {Case Study: Neural Network Malware Detection Verification for Feature and Image Datasets},
year = {2024},
isbn = {9798400705892},
publisher = {Association for Computing Machinery},
address = {New York, NY, USA},
url = {https://doi.org/10.1145/3644033.3644372},
doi = {10.1145/3644033.3644372},
booktitle = {Proceedings of the 2024 IEEE/ACM 12th International Conference on Formal Methods in Software Engineering (FormaliSE)},
pages = {127–137},
numpages = {11},
location = {, Lisbon, Portugal, },
series = {FormaliSE '24}
}
```
25 changes: 25 additions & 0 deletions code/nnv/examples/Submission/VNN_COMP2024/processResults.m
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
res_dir = "results_approx_acasxu/";

ss = dir(res_dir + "*.txt");

nP = length(ss); % number of properties

vRes(nP) = string;
vTime(nP) = string;

for i=1:nP
fileName = res_dir + ss(i).name;
iN = split(fileName, "_");
iN = split(iN{end}, '.');
iN = str2double(iN{1});
fid = fopen(fileName, "r");
vRes(iN) = fgetl(fid);
vTime(iN) = fgetl(fid);
end

resTable = table(vRes', vTime');

unsat = sum(count(vRes, "unsat"));
sat = sum(count(vRes, "sat", "IgnoreCase",true));
unknown = sum(count(vRes, "unknown"));
sat = sat - unsat;
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,8 @@

notSupported = {'test'; 'traffic_signs_recognition'; 'cctsdb_yolo'}; % skip for now

for i=4:length(benchmarks)
% for i=4:length(benchmarks)
for i=4 % only do acasxu

if ~contains(notSupported, benchmarks(i).name) % skip evaluation of benchmarks not supported

Expand All @@ -17,7 +18,7 @@
results_dir = "results_approx_" + benchmarks(i).name;
mkdir(results_dir);

for k=1:size(instances,1)
for k=1:size(instances, 1)

onnx = benchpath + filesep + instances(k,1);
vnnlib = benchpath + filesep + instances(k,2);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,8 @@
reachOptions = struct;
reachOptions.reachMethod = 'approx-star';
% reachOptions.reachMethod = 'exact-star';
% reachOptions.device = 'cpu';
% reachOptions.numCores = 24; % logical cores


% Check if property was violated earlier
Expand Down Expand Up @@ -185,6 +187,10 @@

tTime = toc(t); % save total computation time

if status == 2 && strcmp(reachOptions.reachMethod, 'exact-star')
status = 0;
end

disp("Verification result: " + string(status));
disp("Counterexample search time: " + string(cEX_time));
disp("Reachability time: " + string(vT));
Expand Down Expand Up @@ -238,13 +244,7 @@
end

% Create input set
% IS = ImageStar(lb, ub); % too many constraints, slower
i_diff = ub - lb;
V(:,:,:,1) = lb; % assume lb is center of set (instead of img)
V(:,:,:,2) = i_diff ; % basis vectors
C = [1; -1]; % constraints
d = [1; -1];
IS = ImageStar(V, C, d, 0, 1); % input set
IS = ImageStar(lb, ub); %

end

Expand Down Expand Up @@ -502,6 +502,7 @@ function write_counterexample(outputfile, counterEx)
end
% check if property violated
yPred = reshape(yPred, [], 1); % convert to column vector (if needed)
% disp([x;yPred']);
for h=1:length(Hs)
if Hs(h).contains(double(yPred)) % property violated
counterEx = {x; yPred}; % save input/output of countex-example
Expand Down
33 changes: 33 additions & 0 deletions code/nnv/examples/Tutorial/NN/malware/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
## BODMAS: An Open Dataset for Learning based Temporal Analysis of PE Malware

Dataset info: https://github.com/whyisyoung/BODMAS

Publication: https://liminyang.web.illinois.edu/data/DLS21_BODMAS.pdf


#### Summary
```
BODMAS dataset includes 57,293 malware samples and 77,142 benign samples (134,435 in total).
The malware samples are randomly sampled each month from a security company’s internal malware database.
- The data collection was performed from August 29, 2019, to September 30, 2020.
- The benign samples were collected from January 1, 2007, to September 30, 2020.
- The dataset covers 581 malware families.
- These malware samples are from a diverse set of malware categories (14 categories in total).
- The most prevalent categories are Trojan (29,972 samples), Worm (16,697 samples), Backdoor (7,331 samples), Downloader (1,031 samples), and Ransomware (821 samples).
```
Due to large data size, we only provide a subset of 500 samples for this tutorial.


#### Tutorial
Perform local robustness verification for a malware verifier trained on the BODMAS dataset.

Verification examples:
```
1. Adversarial perturbations of continuous variables.
2. Adversarial perturbations of discrete variables.
3. Adversarial perturbations of continuous & discrete variables.
4. Adversarial perturbations of all variables.
```

Verification examples from [Formalise 2024](https://doi.org/10.1145/3644033.3644372) and [AiSOLA 2023](https://doi.org/10.1007/978-3-031-46002-9_17)

Loading

0 comments on commit 318e743

Please sign in to comment.