From 54120e1b3351a4b6adb8c3f8fc98a60cb0ad3836 Mon Sep 17 00:00:00 2001 From: chylex <contact@chylex.com> Date: Fri, 13 Jan 2023 16:26:03 +0100 Subject: [PATCH] Add access log parser --- src/log_parser.rs | 58 +++++++++++++++++++++++++++++++++++++++++++++++ src/main.rs | 5 ++-- 2 files changed, 61 insertions(+), 2 deletions(-) create mode 100644 src/log_parser.rs diff --git a/src/log_parser.rs b/src/log_parser.rs new file mode 100644 index 0000000..0e860ac --- /dev/null +++ b/src/log_parser.rs @@ -0,0 +1,58 @@ +use std::fmt::{Display, Error, Formatter}; + +pub struct AccessLogLineParts<'a> { + pub time: &'a str, + pub remote_host: &'a str, + pub request: &'a str, + pub response_status: &'a str, + pub response_bytes: &'a str, + pub response_time_ms: &'a str, + pub referer: &'a str, + pub user_agent: &'a str, +} + +impl Display for AccessLogLineParts<'_> { + fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), Error> { + write!(f, "[{}] {} \"{}\" {} {} {} \"{}\" \"{}\"", self.time, self.remote_host, self.request, self.response_status, self.response_bytes, self.response_time_ms, self.referer, self.user_agent) + } +} + +impl<'a> AccessLogLineParts<'a> { + pub fn parse(line: &'a str) -> Result<AccessLogLineParts<'a>, ParseError> { + let (time, line) = extract_between_chars(line, '[', ']').ok_or(ParseError::TimeBracketsNotFound)?; + let (remote_host, line) = next_space_delimited_part(line).ok_or(ParseError::RemoteHostNotFound)?; + let (request, line) = extract_between_chars(line.trim_start_matches(' '), '"', '"').ok_or(ParseError::RequestNotFound)?; + let (response_status, line) = next_space_delimited_part(line).ok_or(ParseError::ResponseStatusNotFound)?; + let (response_bytes, line) = next_space_delimited_part(line).ok_or(ParseError::ResponseBytesNotFound)?; + let (response_time_ms, line) = next_space_delimited_part(line).ok_or(ParseError::ResponseTimeNotFound)?; + let (referer, line) = extract_between_chars(line.trim_start_matches(' '), '"', '"').ok_or(ParseError::RefererNotFound)?; + let (user_agent, _) = extract_between_chars(line.trim_start_matches(' '), '"', '"').ok_or(ParseError::UserAgentNotFound)?; + Ok(AccessLogLineParts { time, remote_host, request, response_status, response_bytes, response_time_ms, referer, user_agent }) + } +} + +fn next_space_delimited_part(str: &str) -> Option<(&str, &str)> { + return str.trim_start_matches(' ').split_once(' ') +} + +fn extract_between_chars(str: &str, left_side: char, right_side: char) -> Option<(&str, &str)> { + let str = str.trim_start_matches(' '); + let next_char = str.chars().next()?; + return if next_char == left_side { + str.get(1..)?.split_once(right_side) + } else { + None + }; +} + +#[derive(Debug, Copy, Clone)] +pub enum ParseError { + TimeBracketsNotFound, + RemoteHostNotFound, + RequestNotFound, + ResponseStatusNotFound, + ResponseBytesNotFound, + ResponseTimeNotFound, + RefererNotFound, + UserAgentNotFound, +} diff --git a/src/main.rs b/src/main.rs index 1b769e0..ba10a8c 100644 --- a/src/main.rs +++ b/src/main.rs @@ -10,9 +10,10 @@ use crate::log_file_pattern::{LogFilePath, parse_log_file_pattern_from_env}; use crate::log_watcher::watch_logs_task; use crate::web_server::{create_web_server, run_web_server}; -mod log_file_pattern; -mod log_watcher; mod apache_metrics; +mod log_file_pattern; +mod log_parser; +mod log_watcher; mod web_server; const ACCESS_LOG_FILE_PATTERN: &'static str = "ACCESS_LOG_FILE_PATTERN";