From 54120e1b3351a4b6adb8c3f8fc98a60cb0ad3836 Mon Sep 17 00:00:00 2001
From: chylex <contact@chylex.com>
Date: Fri, 13 Jan 2023 16:26:03 +0100
Subject: [PATCH] Add access log parser

---
 src/log_parser.rs | 58 +++++++++++++++++++++++++++++++++++++++++++++++
 src/main.rs       |  5 ++--
 2 files changed, 61 insertions(+), 2 deletions(-)
 create mode 100644 src/log_parser.rs

diff --git a/src/log_parser.rs b/src/log_parser.rs
new file mode 100644
index 0000000..0e860ac
--- /dev/null
+++ b/src/log_parser.rs
@@ -0,0 +1,58 @@
+use std::fmt::{Display, Error, Formatter};
+
+pub struct AccessLogLineParts<'a> {
+	pub time: &'a str,
+	pub remote_host: &'a str,
+	pub request: &'a str,
+	pub response_status: &'a str,
+	pub response_bytes: &'a str,
+	pub response_time_ms: &'a str,
+	pub referer: &'a str,
+	pub user_agent: &'a str,
+}
+
+impl Display for AccessLogLineParts<'_> {
+	fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), Error> {
+		write!(f, "[{}] {} \"{}\" {} {} {} \"{}\" \"{}\"", self.time, self.remote_host, self.request, self.response_status, self.response_bytes, self.response_time_ms, self.referer, self.user_agent)
+	}
+}
+
+impl<'a> AccessLogLineParts<'a> {
+	pub fn parse(line: &'a str) -> Result<AccessLogLineParts<'a>, ParseError> {
+		let (time, line) = extract_between_chars(line, '[', ']').ok_or(ParseError::TimeBracketsNotFound)?;
+		let (remote_host, line) = next_space_delimited_part(line).ok_or(ParseError::RemoteHostNotFound)?;
+		let (request, line) = extract_between_chars(line.trim_start_matches(' '), '"', '"').ok_or(ParseError::RequestNotFound)?;
+		let (response_status, line) = next_space_delimited_part(line).ok_or(ParseError::ResponseStatusNotFound)?;
+		let (response_bytes, line) = next_space_delimited_part(line).ok_or(ParseError::ResponseBytesNotFound)?;
+		let (response_time_ms, line) = next_space_delimited_part(line).ok_or(ParseError::ResponseTimeNotFound)?;
+		let (referer, line) = extract_between_chars(line.trim_start_matches(' '), '"', '"').ok_or(ParseError::RefererNotFound)?;
+		let (user_agent, _) = extract_between_chars(line.trim_start_matches(' '), '"', '"').ok_or(ParseError::UserAgentNotFound)?;
+		Ok(AccessLogLineParts { time, remote_host, request, response_status, response_bytes, response_time_ms, referer, user_agent })
+	}
+}
+
+fn next_space_delimited_part(str: &str) -> Option<(&str, &str)> {
+	return str.trim_start_matches(' ').split_once(' ')
+}
+
+fn extract_between_chars(str: &str, left_side: char, right_side: char) -> Option<(&str, &str)> {
+	let str = str.trim_start_matches(' ');
+	let next_char = str.chars().next()?;
+	return if next_char == left_side {
+		str.get(1..)?.split_once(right_side)
+	} else {
+		None
+	};
+}
+
+#[derive(Debug, Copy, Clone)]
+pub enum ParseError {
+	TimeBracketsNotFound,
+	RemoteHostNotFound,
+	RequestNotFound,
+	ResponseStatusNotFound,
+	ResponseBytesNotFound,
+	ResponseTimeNotFound,
+	RefererNotFound,
+	UserAgentNotFound,
+}
diff --git a/src/main.rs b/src/main.rs
index 1b769e0..ba10a8c 100644
--- a/src/main.rs
+++ b/src/main.rs
@@ -10,9 +10,10 @@ use crate::log_file_pattern::{LogFilePath, parse_log_file_pattern_from_env};
 use crate::log_watcher::watch_logs_task;
 use crate::web_server::{create_web_server, run_web_server};
 
-mod log_file_pattern;
-mod log_watcher;
 mod apache_metrics;
+mod log_file_pattern;
+mod log_parser;
+mod log_watcher;
 mod web_server;
 
 const ACCESS_LOG_FILE_PATTERN: &'static str = "ACCESS_LOG_FILE_PATTERN";