-
Notifications
You must be signed in to change notification settings - Fork 426
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add some working metrics for log analyser project (#1983)
* adds folder for log analyser project * adds code for input and log segregation * Adds open handles and read pattern metrics * Adds usage info, .txt support and minor changes * Adds stat/list call metric * Adds closed handles metric * Adds structured code * Adds output module * minor fix * Adds csv generation function * Adds some structural changes * Deletes extra_log_info.go * Minor changes * Changed output to gsheets * Minor changes * Adds write patterns, GCS read patterns, GCS call stats for top files * Adds JSON support for GKE logs * Minor fixes * Adds support for files which contain logs with decreasing timestamp * Adds some documentation --------- Co-authored-by: Ankita Luthra <lankita@google.com>
- Loading branch information
1 parent
3525748
commit 2a07dc3
Showing
14 changed files
with
1,205 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,54 @@ | ||
This log analyzer takes log files and a few filters and outputs the analysis. | ||
|
||
Install python if not installed already. | ||
|
||
Create a python environment using the following steps- | ||
|
||
|
||
(Replace python3 with python if you don't have python3) | ||
|
||
1. Run this command to install python3-venv, `sudo apt install python3-venv` | ||
|
||
2. Run `python3 -m venv /path`, replace path with the location you want to create virtual environment (preferably outside the repo, to avoid creating unwanted files) | ||
3. Activate the environment using command `source venv_name/bin/activate`, replace venv_name with the location where you created the environment | ||
|
||
(Deactivate the environment using the command `deactivate`, once you finish running the code) | ||
|
||
|
||
|
||
Install numpy using command- `pip install numpy` | ||
|
||
Install gspread using command - `pip install gspread` | ||
|
||
|
||
Run the code using command- | ||
`python3 main.py` ,if python3 is installed, | ||
else use `python main.py` | ||
|
||
Enter the name of the directory that contains log files (with absolute paths) | ||
for exp- `/usr/local/google/home/patelvishvesh/tmp/test_dir` | ||
|
||
Make sure that directory contains only files and not folders. | ||
|
||
You can also give a zip file inside the directory. This zip should contain files only and not folders. | ||
|
||
|
||
Choose if you want a time window filter (by pressing y/n) | ||
|
||
If yes, enter the start and end time (epoch) | ||
|
||
Enter the type of logs (gke/gcsfuse) | ||
|
||
If chosen gke, enter the format in which logs are (CSV/JSON) | ||
|
||
Enter your ldap (to give access of the created sheet) | ||
|
||
Exp- `patelvishvesh` | ||
|
||
|
||
Enter the name and location of the credential file | ||
|
||
Exp- `/usr/local/google/home/patelvishvesh/Downloads/credentials.json` | ||
|
||
After this a google sheet link will be generated. | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,193 @@ | ||
import zipfile | ||
import json | ||
import csv | ||
import datetime | ||
|
||
|
||
class GetLogs: | ||
def iso_to_epoch(self, timestamp_str): | ||
""" | ||
converts iso to epoch time (seconds and nanoseconds) | ||
:param timestamp_str: string with iso time | ||
:return: epoch time json object | ||
""" | ||
try: | ||
datetime_obj = datetime.datetime.fromisoformat(timestamp_str) | ||
seconds = int(datetime_obj.timestamp()) | ||
nanos = datetime_obj.microsecond * 1000 | ||
return {"seconds": seconds, "nanos": nanos} | ||
except ValueError as e: | ||
print(f"Error parsing timestamp: {e}") | ||
return None | ||
|
||
def get_sorted_files(self, files, log_type, log_format): | ||
""" | ||
for each file it reads the first log with valid timestamp and then arranges | ||
the files in order of timestamps | ||
:param files: list of file names | ||
:param log_type: gke/gcsfuse | ||
:param log_format: json/csv | ||
:return: ordered list of files | ||
""" | ||
unordered_list = [] | ||
for file in files: | ||
if file.find(".zip") != -1: | ||
destination_dir = file[0:file.rfind("/")+1] | ||
zip_ref = zipfile.ZipFile(file, "r") | ||
zip_list = zip_ref.namelist() | ||
zip_ref.extractall(destination_dir) | ||
# adding the extracted files to the files | ||
for zipf in zip_list: | ||
files.append(destination_dir+zipf) | ||
else: | ||
unordered_list.append(file) | ||
# to arrange the files sequentially | ||
file_tuple = [] | ||
pos = 0 | ||
if log_type == "gcsfuse": | ||
for file in unordered_list: | ||
with open(file, "r") as handle: | ||
for line in handle: | ||
data = line.strip() | ||
try: | ||
json_object = json.loads(data) | ||
sec = json_object["timestamp"]["seconds"] | ||
nano = json_object["timestamp"]["nanos"] | ||
file_tuple.append([[sec, nano], pos]) | ||
break | ||
except json.JSONDecodeError: | ||
print(f"Error parsing line: {line}") | ||
pos += 1 | ||
elif log_type == "gke": | ||
for file in unordered_list: | ||
if log_format == "JSON": | ||
with open(file, "r") as handle: | ||
data = json.load(handle) | ||
for obj in data: | ||
timestamp = self.iso_to_epoch(obj["timestamp"]) | ||
if timestamp is not None: | ||
file_tuple.append([[timestamp["seconds"], timestamp["nanos"]], pos]) | ||
break | ||
else: | ||
with open(file, 'r') as csvfile: | ||
reader = csv.reader(csvfile) | ||
header_row = next(reader) | ||
fields_to_extract = ["timestamp", "textPayload"] | ||
field_indices = {field: header_row.index(field) for field in fields_to_extract if field in header_row} | ||
|
||
for row in reader: | ||
timestamp = self.iso_to_epoch(row[field_indices["timestamp"]]) | ||
if timestamp is not None: | ||
file_tuple.append([[timestamp["seconds"], timestamp["nanos"]], pos]) | ||
break | ||
pos += 1 | ||
else: | ||
for file in files: | ||
with open(file, "r") as handle: | ||
for line in handle: | ||
start_ind = line.find("time=\"") + len("time=\"") | ||
end_ind = line.find("\"", start_ind) | ||
if start_ind != -1 and end_ind != -1: | ||
time = line[start_ind:end_ind] | ||
datetime_obj = datetime.datetime.strptime(time, "%d/%m/%Y %H:%M:%S.%f") | ||
epoch_time = datetime_obj.timestamp() | ||
microseconds_str = time.split('.')[-1] | ||
microseconds = int(microseconds_str) | ||
nanoseconds = microseconds * 1000 | ||
file_tuple.append([[epoch_time, nanoseconds], pos]) | ||
break | ||
pos += 1 | ||
file_tuple.sort() | ||
ordered_list = [] | ||
for file_tup in file_tuple: | ||
ordered_list.append(unordered_list[file_tup[1]]) | ||
return ordered_list | ||
|
||
def append_logs(self, logs, temp_logs): | ||
""" | ||
extracts timestamp and message (textPayload) and appends to the list of logs | ||
:param logs: list of logs | ||
:param temp_logs: logs of a single file with more fields than we need | ||
""" | ||
first_log = self.iso_to_epoch(temp_logs[0]["timestamp"]) | ||
last_log = self.iso_to_epoch(temp_logs[len(temp_logs) - 1]["timestamp"]) | ||
first_log_time = first_log["seconds"] + 1e-9*first_log["nanos"] | ||
last_log_time = last_log["seconds"] + 1e-9*last_log["nanos"] | ||
if first_log_time < last_log_time: | ||
for obj in temp_logs: | ||
if "timestamp" in obj.keys() and "textPayload" in obj.keys(): | ||
json_log = {"timestamp": self.iso_to_epoch(obj["timestamp"]), "message": obj["textPayload"]} | ||
logs.append(json_log) | ||
else: | ||
file_len = len(temp_logs) - 1 | ||
for i in range(len(temp_logs)): | ||
obj = temp_logs[file_len - i] | ||
if "timestamp" in obj.keys() and "textPayload" in obj.keys(): | ||
json_log = {"timestamp": self.iso_to_epoch(obj["timestamp"]), "message": obj["textPayload"]} | ||
logs.append(json_log) | ||
|
||
def get_json_logs(self, files, log_type, interval, log_format): | ||
""" | ||
calls get_sorted_files to get sorted files and the depending on the format | ||
it opens file and calls append_logs or just directly appends logs (for json gcsfuse logs) | ||
:param files: list of file names | ||
:param log_type: gke/gcsfuse | ||
:param interval: time interval for which logs are wanted | ||
:param log_format: json/csv | ||
:return: a list of json logs with two fields message and timestamp(epoch) | ||
""" | ||
ordered_files = self.get_sorted_files(files, log_type, log_format) | ||
logs = [] | ||
for file in ordered_files: | ||
if log_type == "gcsfuse": | ||
with open(file, "r") as handle: | ||
for line in handle: | ||
data = line.strip() | ||
try: | ||
json_object = json.loads(data) | ||
if json_object["timestamp"]["seconds"] < interval[0]: | ||
continue | ||
elif json_object["timestamp"]["seconds"] > interval[1]: | ||
break | ||
logs.append(json_object) | ||
except json.JSONDecodeError: | ||
print(f"Error parsing line: {line}") | ||
|
||
elif log_type == "gke": | ||
if log_format == "JSON": | ||
with open(file, "r") as handle: | ||
data = json.load(handle) | ||
if not isinstance(data, list): | ||
raise ValueError("Expected a JSON list in the file") | ||
self.append_logs(logs, data) | ||
else: | ||
temp_logs = [] | ||
with open(file, 'r') as csvfile: | ||
reader = csv.reader(csvfile) | ||
header_row = next(reader) | ||
fields_to_extract = ["timestamp", "textPayload"] | ||
field_indices = {field: header_row.index(field) for field in fields_to_extract if field in header_row} | ||
for row in reader: | ||
json_log = {"timestamp": row[field_indices["timestamp"]], "textPayload": row[field_indices["textPayload"]]} | ||
temp_logs.append(json_log) | ||
self.append_logs(logs, temp_logs) | ||
else: | ||
with open(file, 'r') as handle: | ||
for line in handle: | ||
start_ind = line.find("time=\"") + len("time=\"") | ||
end_ind = line.find("\"", start_ind) | ||
start_ind1 = line.find("message=\"") + len("message=\"") | ||
end_ind1 = line.rfind("\"") | ||
if start_ind1 != -1 and end_ind1 != -1 and start_ind != -1 and end_ind != -1: | ||
time = line[start_ind:end_ind] | ||
message = line[start_ind1:end_ind1] | ||
message = message.replace(r"\"", "\"") | ||
datetime_obj = datetime.datetime.strptime(time, "%d/%m/%Y %H:%M:%S.%f") | ||
epoch_time = int(datetime_obj.timestamp()) | ||
microseconds_str = time.split('.')[-1] | ||
microseconds = int(microseconds_str) | ||
nanoseconds = microseconds * 1000 | ||
log_data = {"timestamp": {"seconds": epoch_time, "nanos": nanoseconds}, "message": message} | ||
logs.append(log_data) | ||
|
||
return logs |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,35 @@ | ||
from inputreader.get_logs import GetLogs | ||
import os | ||
|
||
|
||
class UserInput: | ||
def get_input(self): | ||
""" | ||
takes the directory and log information from user | ||
and calls appropriate functions to get sorted logs | ||
:return: list of sorted logs | ||
""" | ||
files = [] | ||
directory_path = input("Enter the path to the directory containing log files: ") | ||
for filename in os.listdir(directory_path): | ||
# Construct the full path to the file | ||
file_path = os.path.join(directory_path, filename) | ||
|
||
# Check if it's a regular file (not a directory or hidden file) | ||
if os.path.isfile(file_path): | ||
files.append(file_path) | ||
|
||
add_time_filter = input("Do you want the time filter(y/n):" ) | ||
if add_time_filter == "y" or add_time_filter == "Y": | ||
start_time = int(input("start time(epoch): ")) | ||
end_time = int(input("end time(epoch): ")) | ||
else: | ||
start_time = 0 | ||
end_time = 1e18 | ||
get_logs_obj = GetLogs() | ||
log_type = input("Enter the type of logs (gcsfuse/gke): ") | ||
log_format = "" | ||
if log_type == "gke": | ||
log_format = input("Enter the format of the GKE logs (CSV/JSON): ") | ||
logs = get_logs_obj.get_json_logs(files, log_type, [start_time, end_time], log_format) | ||
return logs |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,7 @@ | ||
from parser import log_parser | ||
from outputgenerator import generate_gsheet | ||
from inputreader.user_input import UserInput | ||
user_input_obj = UserInput() | ||
logs = user_input_obj.get_input() | ||
global_data = log_parser.general_parser(logs) | ||
generate_gsheet.main_gsheet_generator(global_data) |
Oops, something went wrong.