Skip to content

Commit

Permalink
added content and architecture
Browse files Browse the repository at this point in the history
  • Loading branch information
Geraldhub committed Jul 12, 2024
1 parent a531e77 commit 49852f3
Show file tree
Hide file tree
Showing 11 changed files with 320 additions and 1 deletion.
36 changes: 36 additions & 0 deletions docs/architecture/.$containers.drawio.bkp
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
<mxfile host="Electron" modified="2024-07-11T11:59:30.793Z" agent="Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) draw.io/24.6.4 Chrome/124.0.6367.207 Electron/30.0.6 Safari/537.36" etag="xYbDNBQ39VEejD0xDNPD" version="24.6.4" type="device">
<diagram name="Page-1" id="822b0af5-4adb-64df-f703-e8dfc1f81529">
<mxGraphModel dx="1434" dy="974" grid="1" gridSize="10" guides="1" tooltips="1" connect="1" arrows="1" fold="1" page="1" pageScale="1" pageWidth="1100" pageHeight="850" background="none" math="0" shadow="0">
<root>
<mxCell id="0" />
<mxCell id="1" parent="0" />
<mxCell id="GgpGNgUZ1yCXAx5dYiFa-10" style="edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;entryX=0;entryY=0.5;entryDx=0;entryDy=0;" edge="1" parent="1" source="GgpGNgUZ1yCXAx5dYiFa-2" target="GgpGNgUZ1yCXAx5dYiFa-3">
<mxGeometry relative="1" as="geometry" />
</mxCell>
<mxCell id="GgpGNgUZ1yCXAx5dYiFa-11" style="edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;entryX=0.5;entryY=0;entryDx=0;entryDy=0;" edge="1" parent="1" source="GgpGNgUZ1yCXAx5dYiFa-2" target="GgpGNgUZ1yCXAx5dYiFa-4">
<mxGeometry relative="1" as="geometry">
<Array as="points">
<mxPoint x="140" y="200" />
<mxPoint x="340" y="200" />
</Array>
</mxGeometry>
</mxCell>
<mxCell id="GgpGNgUZ1yCXAx5dYiFa-2" value="PHP-FPM" style="rounded=1;whiteSpace=wrap;html=1;" vertex="1" parent="1">
<mxGeometry x="80" y="40" width="120" height="120" as="geometry" />
</mxCell>
<mxCell id="GgpGNgUZ1yCXAx5dYiFa-3" value="NGINX" style="rounded=1;whiteSpace=wrap;html=1;" vertex="1" parent="1">
<mxGeometry x="280" y="40" width="120" height="120" as="geometry" />
</mxCell>
<mxCell id="GgpGNgUZ1yCXAx5dYiFa-4" value="Mariadb" style="rounded=1;whiteSpace=wrap;html=1;" vertex="1" parent="1">
<mxGeometry x="280" y="240" width="120" height="120" as="geometry" />
</mxCell>
<mxCell id="GgpGNgUZ1yCXAx5dYiFa-8" style="edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;entryX=0;entryY=0.5;entryDx=0;entryDy=0;" edge="1" parent="1" source="GgpGNgUZ1yCXAx5dYiFa-5" target="GgpGNgUZ1yCXAx5dYiFa-4">
<mxGeometry relative="1" as="geometry" />
</mxCell>
<mxCell id="GgpGNgUZ1yCXAx5dYiFa-5" value="PHP-CRON" style="rounded=1;whiteSpace=wrap;html=1;" vertex="1" parent="1">
<mxGeometry x="80" y="240" width="120" height="120" as="geometry" />
</mxCell>
</root>
</mxGraphModel>
</diagram>
</mxfile>
Binary file added docs/architecture/Thumbs.db
Binary file not shown.
Binary file added docs/architecture/basic-containers.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added docs/architecture/containers-Performance.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
90 changes: 90 additions & 0 deletions docs/architecture/containers.drawio
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
<mxfile host="Electron" modified="2024-07-11T12:13:09.909Z" agent="Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) draw.io/24.6.4 Chrome/124.0.6367.207 Electron/30.0.6 Safari/537.36" etag="5k_CzV7KKPTt4BgkEYlJ" version="24.6.4" type="device" pages="2">
<diagram name="Basic" id="822b0af5-4adb-64df-f703-e8dfc1f81529">
<mxGraphModel dx="1434" dy="974" grid="1" gridSize="10" guides="1" tooltips="1" connect="1" arrows="1" fold="1" page="1" pageScale="1" pageWidth="1100" pageHeight="850" background="none" math="0" shadow="0">
<root>
<mxCell id="0" />
<mxCell id="1" parent="0" />
<mxCell id="GgpGNgUZ1yCXAx5dYiFa-10" style="edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;entryX=0;entryY=0.5;entryDx=0;entryDy=0;" edge="1" parent="1" source="GgpGNgUZ1yCXAx5dYiFa-2" target="GgpGNgUZ1yCXAx5dYiFa-3">
<mxGeometry relative="1" as="geometry" />
</mxCell>
<mxCell id="GgpGNgUZ1yCXAx5dYiFa-11" style="edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;entryX=0.5;entryY=0;entryDx=0;entryDy=0;" edge="1" parent="1" source="GgpGNgUZ1yCXAx5dYiFa-2" target="GgpGNgUZ1yCXAx5dYiFa-4">
<mxGeometry relative="1" as="geometry">
<Array as="points">
<mxPoint x="140" y="200" />
<mxPoint x="340" y="200" />
</Array>
</mxGeometry>
</mxCell>
<mxCell id="GgpGNgUZ1yCXAx5dYiFa-2" value="PHP-FPM" style="rounded=1;whiteSpace=wrap;html=1;" vertex="1" parent="1">
<mxGeometry x="80" y="40" width="120" height="120" as="geometry" />
</mxCell>
<mxCell id="GgpGNgUZ1yCXAx5dYiFa-3" value="NGINX" style="rounded=1;whiteSpace=wrap;html=1;" vertex="1" parent="1">
<mxGeometry x="280" y="40" width="120" height="120" as="geometry" />
</mxCell>
<mxCell id="GgpGNgUZ1yCXAx5dYiFa-4" value="Mariadb" style="rounded=1;whiteSpace=wrap;html=1;" vertex="1" parent="1">
<mxGeometry x="280" y="240" width="120" height="120" as="geometry" />
</mxCell>
<mxCell id="GgpGNgUZ1yCXAx5dYiFa-8" style="edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;entryX=0;entryY=0.5;entryDx=0;entryDy=0;" edge="1" parent="1" source="GgpGNgUZ1yCXAx5dYiFa-5" target="GgpGNgUZ1yCXAx5dYiFa-4">
<mxGeometry relative="1" as="geometry" />
</mxCell>
<mxCell id="GgpGNgUZ1yCXAx5dYiFa-5" value="PHP-CRON" style="rounded=1;whiteSpace=wrap;html=1;" vertex="1" parent="1">
<mxGeometry x="80" y="240" width="120" height="120" as="geometry" />
</mxCell>
</root>
</mxGraphModel>
</diagram>
<diagram id="1YhsbRrlCuokWMPNEC6Z" name="Performance">
<mxGraphModel dx="2206" dy="1498" grid="1" gridSize="10" guides="1" tooltips="1" connect="1" arrows="1" fold="1" page="1" pageScale="1" pageWidth="1654" pageHeight="2336" math="0" shadow="0">
<root>
<mxCell id="0" />
<mxCell id="1" parent="0" />
<mxCell id="s28P5wAXleQJ63n-5oN3-1" style="edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;entryX=0;entryY=0.5;entryDx=0;entryDy=0;" edge="1" parent="1" source="s28P5wAXleQJ63n-5oN3-3" target="s28P5wAXleQJ63n-5oN3-4">
<mxGeometry relative="1" as="geometry" />
</mxCell>
<mxCell id="s28P5wAXleQJ63n-5oN3-2" style="edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;entryX=0.5;entryY=0;entryDx=0;entryDy=0;" edge="1" parent="1" source="s28P5wAXleQJ63n-5oN3-3" target="s28P5wAXleQJ63n-5oN3-5">
<mxGeometry relative="1" as="geometry">
<Array as="points">
<mxPoint x="270" y="220" />
<mxPoint x="360" y="220" />
</Array>
</mxGeometry>
</mxCell>
<mxCell id="s28P5wAXleQJ63n-5oN3-3" value="PHP-FPM" style="rounded=1;whiteSpace=wrap;html=1;" vertex="1" parent="1">
<mxGeometry x="210" y="60" width="120" height="120" as="geometry" />
</mxCell>
<mxCell id="s28P5wAXleQJ63n-5oN3-4" value="NGINX" style="rounded=1;whiteSpace=wrap;html=1;" vertex="1" parent="1">
<mxGeometry x="410" y="60" width="120" height="120" as="geometry" />
</mxCell>
<mxCell id="s28P5wAXleQJ63n-5oN3-5" value="Mariadb" style="rounded=1;whiteSpace=wrap;html=1;" vertex="1" parent="1">
<mxGeometry x="300" y="260" width="120" height="120" as="geometry" />
</mxCell>
<mxCell id="s28P5wAXleQJ63n-5oN3-14" style="edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;entryX=0;entryY=0.5;entryDx=0;entryDy=0;" edge="1" parent="1" source="s28P5wAXleQJ63n-5oN3-8" target="s28P5wAXleQJ63n-5oN3-5">
<mxGeometry relative="1" as="geometry">
<Array as="points">
<mxPoint x="200" y="320" />
</Array>
</mxGeometry>
</mxCell>
<mxCell id="s28P5wAXleQJ63n-5oN3-8" value="ImportExport" style="rounded=1;whiteSpace=wrap;html=1;" vertex="1" parent="1">
<mxGeometry x="140" y="460" width="120" height="120" as="geometry" />
</mxCell>
<mxCell id="s28P5wAXleQJ63n-5oN3-13" style="edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;entryX=0.5;entryY=1;entryDx=0;entryDy=0;" edge="1" parent="1" source="s28P5wAXleQJ63n-5oN3-9" target="s28P5wAXleQJ63n-5oN3-5">
<mxGeometry relative="1" as="geometry" />
</mxCell>
<mxCell id="s28P5wAXleQJ63n-5oN3-9" value="CheckNTD" style="rounded=1;whiteSpace=wrap;html=1;" vertex="1" parent="1">
<mxGeometry x="300" y="460" width="120" height="120" as="geometry" />
</mxCell>
<mxCell id="s28P5wAXleQJ63n-5oN3-12" style="edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;entryX=1;entryY=0.5;entryDx=0;entryDy=0;" edge="1" parent="1" source="s28P5wAXleQJ63n-5oN3-10" target="s28P5wAXleQJ63n-5oN3-5">
<mxGeometry relative="1" as="geometry">
<Array as="points">
<mxPoint x="550" y="320" />
</Array>
</mxGeometry>
</mxCell>
<mxCell id="s28P5wAXleQJ63n-5oN3-10" value="CRON" style="rounded=1;whiteSpace=wrap;html=1;" vertex="1" parent="1">
<mxGeometry x="470" y="460" width="120" height="120" as="geometry" />
</mxCell>
</root>
</mxGraphModel>
</diagram>
</mxfile>
Binary file added docs/architecture/img.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added docs/architecture/img_1.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
131 changes: 131 additions & 0 deletions docs/architecture/index.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,131 @@
# Architectural

---

## Application

The application is based on the following Model-View-Controle frameworks:

- Laravel version 9.1 or higher
- WinterCMS version 1.2.5 or higher

The Model-View-Controller (MVC) framework is an architectural/design pattern that separates
an application into three main logical components Model, View, and Controller.
Each architectural component is built to handle specific development aspects of an application.
It isolates the business logic and presentation layer from each other. It was traditionally
used for desktop graphical user interfaces (GUIs). Nowadays, MVC is one of the most frequently
used industry-standard web development frameworks to create scalable and extensible
applications.

![img.png](img.png)

Futhermore Object-Oriented Programming (OOP) is used for development of the software.
OOP is a programming paradigm based on the concept of objects, which can contain data
and code: data in the form of fields (often known as attributes or properties), and
code in the form of procedures (often known as methods). In OOP, computer programs
are designed by making them out of objects that interact with one another.

Important is the use of the WinterCMS framework. SCARt uses as much as possible the
standard functionality of this framework. See here for the [WinterCMS Development Guide](https://wintercms.com/docs/v1.2/docs/architecture/developer-guide#html-element-naming).

SCARt also adapts the version numbering of "major.minor.point". For example v1.0.1 or v5.3.2.

- **major**; should be increased for substantial changes made to the plugin, such as complete
rewrites or pivoting of the purpose of the plugin. These changes are assumed to be
backwards-incompatible and will require manual intervention by the users of the plugin.
- **minor**; should be increased for smaller changes or new features that may still be
backwards-incompatible with adequate justification. This can include changes to the
database schema or changes to component settings.
- **point**; should be increased for minor fixes, translation updates or very minor new
features that maintain backwards compatibility.

Release notes are referencing to a version and contain (if applicable) the following
elements:

- Breaking changes
- New features
- Other changes
- Bug fixes
- Translation updates

See here for more information about the versioning: [WinterCMS plugin version history](https://wintercms.com/docs/v1.2/docs/plugin/updates).

## Containers

The MVC is futher enhanced in SCART by containerizing the different system components. This
makes them scalable and provide failover functionality. With containers the application
components are also placed in a seperated (local) network.

A basic container setup:

![basic-containers.png](basic-containers.png)

You see the basic system components. The PHP-FPM is (only) used for the frontend processing
and servers the UI. The PHP-CRON is responsible for the background processes.

## Background processes

Within the PHP-CRON the background work exists of the following jobs:

| Name | Description |
|:--------------|:--------------------------------------------------------------------------------------------------------------------------------------|
| ImportExport | Import by email or ICCAM the reports |
| AnalyzeInput | Read the imported reports, scrape them and get the WhoIs information |
| CheckNTD | Checks if the illegal reports are still online and the WHoIs the same (*) |
| SendNTD | Send NTD by email (or API) to the hoster, registrar, site owner or LEA |
| SendAlert | Send alerts to the info mailbox about the actions done by the background jobs |
| UpdateWHoIs | Update the WhoIs every 12 hours |
| CreateReports | Create the user reports (export CSV files) |
| Cleanup | Every night this background job runs to cleanup the SCARt environment (*) |
| Archive | In some SCARt environments the number of reports are such big, archiving is needed to keep the runtime performance optimized |

(*) see seperated chapters for more information

It's easy within docker to make for each of these jobs a seperated container. In this
way the performance can be optimzied.

The following container setup is an example for an optimized setup:

![containers-Performance.png](containers-Performance.png)

In this setup the ImportExport and CheckNTD are placed in a seperated containers with
an own work and resource environment.

## Realtime online check

The CheckNTD job is responsible for the check if an URL is still online.

The standard CheckNTD job is a single PHP job (threat) which starts a headless browser an
checks each URL.

For bigger hotlines with a lot of illegal URLs to check, there is also the realtime version
of the CheckNTD job. This realtime version used pooling to start and stop dynamically threats
for checking the online status. Configuration consist of:

- maximum time within an URL has be checked again (default 4 hours)
- minimum time after which an uRL has to be checked (default 1 hour)
- minimum time after which spinning down a worker (default 15 min)

There is no limit from the number of concurrent threats other then the resources on the
hosting server(s).

Note that the threat job not only checks the online status of an image but also the
WhoIs information. The hosting (country) information can be changed. On that moment the
report is placed in the status "CHANGED".

A report is set "offline" when 3 times after eachother, with a delay of 3 minutes, the
image (hash) is not found online.

### Illegal content browsing

With the docker and the threating setup, the following secure browsing environment exists:
![img_1.png](img_1.png)<br />
Within the browser container (docker), the website is analyzed and media is download for
classification. The browser context (website with illegal content) is reset after the
scraping of the website. The headless browser environment has no direct contact with the
other SCARt components and/or server and/or SCART client user.

The setup of this realtime version is done by the S3group. They have to knowledge to configure
and maintain this environment. Please ask your SCARt contact for more information.


60 changes: 60 additions & 0 deletions docs/details/cleanup.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
# Cleanup

---

## Automatic background process

Every night the cleanup background job runs to do a checkup from the SCARt environment.
The following actions are done:

1. Recycle of the SCARt application logfile
2. Reset for scraping-again from inputs-open-for-classify and not look at in the past 24 hours
3. Remove cached images that have finished being analyzed
4. Cleanup of the WhoIs cache; removal not active domain and/or IP records
5. Rewind the ICCAM import one day to be sure every ICCAM report is imported
6. Make anonymous if the retention time is met
7. Cleanup "deleted marked records" in the database

## Anonymous

SCARt can be configured (not standard) to anonymouse privacy related fields. These fields
include:

- URL
- URL_host
- URL_base
- URL_referer
- URL_IP
- URL_HASH

After the retention time (for example 1 year) the values in the database of these fields
are overwriten with "anonymouse-[record-id]".

The status or for example the classification will be kept in the database, so general
reports without the privacy information can be reported (exported).

The specific retention time has to be configured. Please contact your SCARt contact for
more information.























2 changes: 2 additions & 0 deletions mkdocs.yml
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,8 @@ nav:
- Custom Webforms: details/custom_webforms.md
- Email import whitelist: details/whitelist.md
- ICCAM: details/iccam.md
- Cleanup: details/cleanup.md
- "": architecture/architecture.md



2 changes: 1 addition & 1 deletion themes/mkdocs/base.html
Original file line number Diff line number Diff line change
Expand Up @@ -238,7 +238,7 @@
{%- if config.copyright %}
<p>{{ config.copyright }}</p>
{%- endif %}
<p>SCARt.io - version 0.9 <strong>PRE-RELEASE</strong> documentation</p>
<p>SCARt.io - version 0.9.2 <strong>PRE-RELEASE</strong> documentation</p>
{%- endblock %}
</footer>

Expand Down

0 comments on commit 49852f3

Please sign in to comment.