Skip to content

Commit

Permalink
Refactor: Project structure refactoring (#42)
Browse files Browse the repository at this point in the history
* refactor(project): Save point of project and project structure refactoring

* fix(tests): Fixed tests after all changes

* fix(tests): Fixed tests after all changes

* chore(docs): Updated README.md file
  • Loading branch information
breadrock1 authored Oct 26, 2024
1 parent a28553e commit 7016fe9
Show file tree
Hide file tree
Showing 52 changed files with 2,298 additions and 2,602 deletions.
24 changes: 12 additions & 12 deletions .env
Original file line number Diff line number Diff line change
@@ -1,15 +1,15 @@
SERVICE_RUN_MODE: 'production'
SERVICE_WORKERS_NUM: 6
SERVICE_ENABLE_TLS: 'false'
SERVICE_ALLOWED_CORS: '*'
SERVICE_RUN_MODE=production
SERVICE_WORKERS_NUM=6
SERVICE_ENABLE_TLS=false
SERVICE_ALLOWED_CORS=*

EMBEDDINGS_ENABLE_TLS: 'false'
EMBEDDINGS_ENABLE_TLS=false

ELASTIC_USERNAME: 'elastic'
ELASTIC_PASSWORD: 'elastic'
ELASTIC_ENABLE_TLS: 'false'
ELASTIC_USERNAME=elastic
ELASTIC_PASSWORD=elastic
ELASTIC_ENABLE_TLS=false

REDIS_ROOT_PASSWORD: 'cacher'
REDIS_CLIENT_USERNAME: 'cacher'
REDIS_CLIENT_PASSWORD: 'cacher'
REDIS_DATA_EXPIRED: 3600
REDIS_ROOT_PASSWORD=redis
REDIS_CLIENT_USERNAME=redis
REDIS_CLIENT_PASSWORD=redis
REDIS_DATA_EXPIRED=3600
7 changes: 6 additions & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,13 +1,15 @@
workspace = { members = ["crates/elschema"] }
[package]
name = "doc-search"
version = "0.2.0"
version = "0.2.1"
edition = "2021"

[badges]
maintenance = { status = "actively-developed" }

[features]
enable-cacher = ["dep:redis"]
enable-semantic = []
default = []

[dependencies]
Expand Down Expand Up @@ -64,6 +66,9 @@ path = "crates/datetime"
[dependencies.elquery]
path = "crates/elquery"

[dependencies.elschema]
path = "crates/elschema"

[[bin]]
name = "doc-searcher-run"
path = "src/bin/doc-searcher-run.rs"
Expand Down
4 changes: 3 additions & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
ARG FEATURES='--features default'

FROM rust:1.75 AS chef

WORKDIR /app
Expand Down Expand Up @@ -27,7 +29,7 @@ RUN cargo chef cook --release --recipe-path recipe.json

COPY . .

RUN cargo install --bins --path .
RUN cargo install ${FEATURES} --bins --path .


# Target layer based on tiny official ubuntu image with neccessary binaries and data to run.
Expand Down
18 changes: 10 additions & 8 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -16,11 +16,13 @@ The main goal - implement simple but powerful system of storing and indexing doc
I decided to use elasticsearch as default searching engine, but you may use own solutions by implementing several async traits
for Tantivy, QDrant or own solution:

- FolderService - API (CRUD) of indexed folders to store documents;
- DocumentService - API (CRUD) of documents stored into folders;
- WatcherService - API of doc-notifier service interactions;
- SearcherService - API of searcher functionalities (fulltext, vector, similar);
- PaginatorService - API of searcher results pagination.

- CacherService - API of doc-notifier service interactions;
- EmbeddingsService - API of doc-notifier service interactions;
- MetricsService - API of metrics to monitoring;
- StorageService - API (CRUD) of indexed folders and documents;
- SearcherService - API of searcher functionalities (fulltext, vector, similar).


## Features

Expand All @@ -43,6 +45,7 @@ These instructions will get you a copy of the project up and running on your loc

- Rust
- Docker & docker-compose
- Cache (Redis)
- Elasticsearch

### Installation
Expand All @@ -56,9 +59,8 @@ These instructions will get you a copy of the project up and running on your loc
### Features of project

Features to parse and store documents localy from current service (Not stable):
- enable-cacher : enable cacher service like redis oe other custom implementation;

default = []
- enable-cacher - enable cacher service like redis oe other custom implementation;
- enable-semantic - enable llm service for semantic searching.

[![Bread White - doc-searcher](https://img.shields.io/static/v1?label=Bread%20White&message=author&color=blue&logo=github)](https://github.com/breadrock1/doc-searcher)

Expand Down
31 changes: 0 additions & 31 deletions config/default.toml

This file was deleted.

1 change: 1 addition & 0 deletions config/development.toml
Original file line number Diff line number Diff line change
Expand Up @@ -27,3 +27,4 @@ expired = 3600
address = "localhost:8085"
is_truncate = "false"
is_normalize = "false"
enabled_tls = "false"
1 change: 1 addition & 0 deletions config/production.toml
Original file line number Diff line number Diff line change
Expand Up @@ -27,3 +27,4 @@ expired = 3600
address = "embeddings:8085"
is_truncate = "false"
is_normalize = "false"
enabled_tls = "false"
9 changes: 3 additions & 6 deletions crates/datetime/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -26,14 +26,11 @@ where
D: Deserializer<'de>,
{
String::deserialize(deserializer)
.and_then(|value| Ok(format_datetime(value.as_str())))
.and_then(|value| Ok(value.ok()))
.map(|value| format_datetime(value.as_str()))
.map(|value| value.ok())
}

fn format_datetime(value: &str) -> ParseResult<DateTime<Utc>> {
#[allow(deprecated)]
match Utc.datetime_from_str(value, DATE_TIME_FORMAT) {
Ok(datetime) => Ok(datetime),
Err(err) => Err(err),
}
Utc.datetime_from_str(value, DATE_TIME_FORMAT)
}
30 changes: 20 additions & 10 deletions crates/elquery/src/filter/must_filter.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,31 +12,41 @@ pub struct BoolMustFilter {
}

impl BoolMustFilter {
pub fn with_term<T>(mut self, key: &str, value: T) -> Self
pub fn with_term<T>(mut self, key: &str, value: Option<T>) -> Self
where
T: serde::Serialize,
{
let term_item = TermFilterItem::term_value(key, value);
let term_item_val = serde_json::to_value(term_item).unwrap();
self.must.push(term_item_val);
if value.is_some() {
let term_item = TermFilterItem::term_value(key, value);
let term_item_val = serde_json::to_value(term_item).unwrap();
self.must.push(term_item_val);
}

self
}

pub fn with_range<T, U>(mut self, key: &str, gte: T, lte: Option<U>) -> Self
pub fn with_range<T, U>(mut self, key: &str, gte: Option<T>, lte: Option<U>) -> Self
where
T: serde::Serialize,
U: serde::Serialize,
{
if gte.is_none() && lte.is_none() {
return self;
}

let range_item = RangeFilterItem::range_value(gte, lte);
let range_value = json!({"range": { key: range_item }});
self.must.push(range_value);
self
}

pub fn with_exists(mut self, field: &str) -> Self {
let exists_query = ExistsFilterItem::exists_value(field);
let exists_query_val = serde_json::to_value(exists_query).unwrap();
self.must.push(exists_query_val);
pub fn with_exists(mut self, field: Option<&str>) -> Self {
if let Some(value) = field {
let exists_query = ExistsFilterItem::exists_value(value);
let exists_query_val = serde_json::to_value(exists_query).unwrap();
self.must.push(exists_query_val);
}

self
}

Expand Down Expand Up @@ -67,7 +77,7 @@ struct RangeFilterItem {
}

impl RangeFilterItem {
pub fn range_value<T, U>(gte: T, lte: Option<U>) -> Self
pub fn range_value<T, U>(gte: Option<T>, lte: Option<U>) -> Self
where
T: serde::Serialize,
U: serde::Serialize,
Expand Down
12 changes: 12 additions & 0 deletions crates/elschema/Cargo.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
[package]
name = "elschema"
version = "0.1.0"
edition = "2021"

# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html

[dependencies]
derive_builder = "^0.20"
serde = "^1.0"
serde_derive = "^1.0"
serde_json = "^1.0"
92 changes: 92 additions & 0 deletions crates/elschema/src/base.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
use serde::Serializer;
use serde_derive::Serialize;

#[derive(Serialize)]
pub struct EnabledFlag {
enabled: bool,
}

#[allow(dead_code)]
impl EnabledFlag {
pub fn new(is_enabled: bool) -> Self {
EnabledFlag {
enabled: is_enabled,
}
}
}

#[derive(Serialize)]
pub struct SettingsSchema {
number_of_shards: i32,
number_of_replicas: i32,
}

impl Default for SettingsSchema {
fn default() -> Self {
SettingsSchema {
number_of_shards: 1,
number_of_replicas: 1,
}
}
}

#[derive(Clone, Default)]
pub enum FieldType {
Date,
DenseVector,
Integer,
Object,
Nested,
#[default]
Keyword,
Text,
Boolean,
}

impl serde::Serialize for FieldType {
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
where
S: Serializer,
{
let field_type_str = match self {
FieldType::Date => "date",
FieldType::Text => "text",
FieldType::Object => "object",
FieldType::Nested => "nested",
FieldType::Boolean => "boolean",
FieldType::Integer => "integer",
FieldType::Keyword => "keyword",
FieldType::DenseVector => "dense_vector",
};

serializer.collect_str(field_type_str)
}
}

#[derive(Serialize)]
pub struct SchemaFieldType {
#[serde(rename(serialize = "type"))]
field_type: FieldType,
}

impl SchemaFieldType {
pub fn new(field_type: FieldType) -> Self {
SchemaFieldType { field_type }
}
}

#[derive(Serialize)]
pub struct AsDateField {
#[serde(rename(serialize = "type"))]
field_type: FieldType,
ignore_malformed: bool,
}

impl Default for AsDateField {
fn default() -> Self {
AsDateField {
field_type: FieldType::Date,
ignore_malformed: true,
}
}
}
48 changes: 48 additions & 0 deletions crates/elschema/src/embeddings.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
use crate::base::{FieldType, SchemaFieldType};

use serde_derive::Serialize;

#[derive(Serialize)]
pub struct EmbeddingsSchema {
#[serde(rename(serialize = "type"))]
field_type: FieldType,
properties: EmbeddingProperties,
}

impl Default for EmbeddingsSchema {
fn default() -> Self {
EmbeddingsSchema {
field_type: FieldType::Nested,
properties: EmbeddingProperties {
text_chunk: SchemaFieldType::new(FieldType::Text),
vector: VectorSchema::default(),
},
}
}
}

#[derive(Serialize)]
struct EmbeddingProperties {
text_chunk: SchemaFieldType,
vector: VectorSchema,
}

#[derive(Serialize)]
struct VectorSchema {
#[serde(rename(serialize = "type"))]
field_type: FieldType,
similarity: String,
index: bool,
dims: u32,
}

impl Default for VectorSchema {
fn default() -> Self {
VectorSchema {
field_type: FieldType::DenseVector,
similarity: "cosine".to_string(),
index: true,
dims: 1024,
}
}
}
6 changes: 6 additions & 0 deletions crates/elschema/src/lib.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
pub mod base;
pub mod embeddings;

pub trait ElasticSchema {
fn build() -> Self;
}
Loading

0 comments on commit 7016fe9

Please sign in to comment.