-
Notifications
You must be signed in to change notification settings - Fork 0
/
app.py
72 lines (57 loc) · 2.23 KB
/
app.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
# Dependencies (FastAPI)
import uvicorn
import joblib
from fastapi import FastAPI
from pydantic import BaseModel
# Dependencies 2 (Processing Message)
import re
import nltk
from nltk.stem import PorterStemmer
from nltk.corpus import stopwords
app = FastAPI(title="Ham or Spam API", description="API to predict if SMS is spam")
model = joblib.load("model/finalized_model.sav")
vectorizer = joblib.load("model/vectorizer.sav")
class request_body(BaseModel):
message : str # A free service for you ONLY!! Please click on the link now! // String value
def process_msg(msg):
"""
Replace email address with 'email'
Replace URLS with 'http'
Replace currency symbols with 'moneysymb'
Replace phone numbers with 'phonenumb'
Replace numbers with 'numb'
"""
ps = PorterStemmer()
clean = []
cleaned_msg = msg
cleaned_msg = re.sub('\b[\w\-.]+?@\w+?\.\w{2,4}\b', 'email', cleaned_msg)
cleaned_msg = re.sub('(http[s]?\S+)|(\w+\.[A-Za-z]{2,4}\S*)', 'https', cleaned_msg)
cleaned_msg = re.sub('£|\$', 'moneysymb', cleaned_msg)
cleaned_msg = re.sub('\b(?:\+?(\d{1,3})\s?)?[\-(.]?\d{3}[\s.-]?\d{3}[\s.-]?\d{4}\b', 'phonenumb', cleaned_msg)
cleaned_msg = re.sub('\d+(\.\d+)?','numb', cleaned_msg)
cleaned_msg = re.sub('[^\w\d\s]', ' ', cleaned_msg)
cleaned_msg = cleaned_msg.lower()
tokenized_msg = cleaned_msg.split()
stemmed_msg = [ps.stem(word) for word in tokenized_msg if word not in set(stopwords.words('english'))]
final_msg = ' '.join(stemmed_msg)
clean.append(final_msg)
clean_input = vectorizer.transform(clean)
return clean_input
@app.get('/')
def Welcome():
return{'message': 'Welcome to the Spam classifier API!'}
@app.post('/api_predict')
def classify_msg(msg : request_body):
# Check if the message exists
if (not (msg.message)):
raise HTTPException(status_code=400, detail="Please provide a valid message")
# Process the message to fit with the model
dense = process_msg(msg.message)
# classification results
label = model.predict(dense)[0]
# proba = model.predict_proba(dense) // check again after test
# extract the corresponding information
if label == 0:
return {'Answer': "This is a Ham email!"}
else:
return {'Answer': "This is a Spam email!"}