diff --git a/Cargo.lock b/Cargo.lock index 89c4c88..2ab56cc 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -6,6 +6,7 @@ version = 3 name = "apache_prometheus_exporter" version = "0.1.0" dependencies = [ + "path-slash", "prometheus-client", ] @@ -78,6 +79,12 @@ dependencies = [ "windows-sys", ] +[[package]] +name = "path-slash" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e91099d4268b0e11973f036e885d652fb0b21fedcf69738c627f94db6a44f42" + [[package]] name = "proc-macro2" version = "1.0.43" diff --git a/Cargo.toml b/Cargo.toml index 20eaf6e..84bc3e0 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -7,3 +7,4 @@ edition = "2021" [dependencies] prometheus-client = "0.18.0" +path-slash = "0.2.1" diff --git a/src/log_file_pattern.rs b/src/log_file_pattern.rs new file mode 100644 index 0000000..54e05fd --- /dev/null +++ b/src/log_file_pattern.rs @@ -0,0 +1,231 @@ +use std::{env, io}; +use std::env::VarError; +use std::fs::DirEntry; +use std::io::ErrorKind; +use std::path::{Path, PathBuf}; + +use path_slash::PathExt; + +/// Environment variable that determines the path and file name pattern of log files. +/// +/// Supports 3 pattern types: +/// +/// 1. A simple path to a file. +/// 2. A path with a wildcard anywhere in the file name. +/// 3. A path with a standalone wildcard component (i.e. no prefix or suffix in the folder name). +pub const LOG_FILE_PATTERN: &'static str = "LOG_FILE_PATTERN"; + +pub fn parse_log_file_pattern_from_env() -> Result<LogFilePattern, String> { + return match env::var(LOG_FILE_PATTERN) { + Ok(str) => { + let pattern_str = Path::new(&str).to_slash().ok_or(format!("Environment variable {} contains an invalid path.", LOG_FILE_PATTERN))?; + parse_log_file_pattern_from_str(&pattern_str) + } + Err(err) => match err { + VarError::NotPresent => Err(format!("Environment variable {} must be set.", LOG_FILE_PATTERN)), + VarError::NotUnicode(_) => Err(format!("Environment variable {} contains invalid characters.", LOG_FILE_PATTERN)) + } + }; +} + +fn parse_log_file_pattern_from_str(pattern_str: &str) -> Result<LogFilePattern, String> { + if pattern_str.trim().is_empty() { + return Err(String::from("Path is empty.")); + } + + return if let Some((left, right)) = pattern_str.split_once('*') { + parse_log_file_pattern_split_on_wildcard(left, right) + } else { + Ok(LogFilePattern::WithoutWildcard(pattern_str.to_string())) + }; +} + +fn parse_log_file_pattern_split_on_wildcard(left: &str, right: &str) -> Result<LogFilePattern, String> { + if left.contains('*') || right.contains('*') { + return Err(String::from("Path has too many wildcards.")); + } + + if left.ends_with('/') && right.starts_with('/') { + return Ok(LogFilePattern::WithFolderNameWildcard(PatternWithFolderNameWildcard { + path_prefix: left.to_string(), + path_suffix: right[1..].to_string(), + })); + } + + if right.contains('/') { + return Err(String::from("Path has a folder wildcard with a prefix or suffix.")); + } + + return if let Some((folder_path, file_name_prefix)) = left.rsplit_once('/') { + Ok(LogFilePattern::WithFileNameWildcard(PatternWithFileNameWildcard { + path: folder_path.to_string(), + file_name_prefix: file_name_prefix.to_string(), + file_name_suffix: right.to_string(), + })) + } else { + Ok(LogFilePattern::WithFileNameWildcard(PatternWithFileNameWildcard { + path: String::new(), + file_name_prefix: left.to_string(), + file_name_suffix: right.to_string(), + })) + }; +} + +#[derive(Debug)] +pub struct PatternWithFileNameWildcard { + path: String, + file_name_prefix: String, + file_name_suffix: String, +} + +impl PatternWithFileNameWildcard { + fn match_wildcard<'a>(&self, file_name: &'a str) -> Option<&'a str> { + return file_name.strip_prefix(&self.file_name_prefix).and_then(|r| r.strip_suffix(&self.file_name_suffix)); + } + + fn match_wildcard_on_dir_entry(&self, dir_entry: &DirEntry) -> Option<String> { + return if let Some(wildcard_match) = dir_entry.file_name().to_str().and_then(|file_name| self.match_wildcard(file_name)) { + Some(wildcard_match.to_string()) + } else { + None + }; + } +} + +#[derive(Debug)] +pub struct PatternWithFolderNameWildcard { + path_prefix: String, + path_suffix: String, +} + +impl PatternWithFolderNameWildcard { + fn match_wildcard_on_dir_entry(dir_entry: &DirEntry) -> Option<String> { + return if matches!(dir_entry.file_type(), Ok(entry_type) if entry_type.is_dir()) { + dir_entry.file_name().to_str().map(|s| s.into()) + } else { + None + }; + } +} + +#[derive(Debug)] +pub enum LogFilePattern { + WithoutWildcard(String), + WithFileNameWildcard(PatternWithFileNameWildcard), + WithFolderNameWildcard(PatternWithFolderNameWildcard), +} + +impl LogFilePattern { + pub fn search(&self) -> Result<Vec<LogFilePath>, io::Error> { // TODO error message + return match self { + Self::WithoutWildcard(path) => Self::search_without_wildcard(path), + Self::WithFileNameWildcard(pattern) => Self::search_with_file_name_wildcard(pattern), + Self::WithFolderNameWildcard(pattern) => Self::search_with_folder_name_wildcard(pattern) + }; + } + + fn search_without_wildcard(path_str: &String) -> Result<Vec<LogFilePath>, io::Error> { + let path = Path::new(path_str); + let is_valid = path.is_file() || matches!(path.parent(), Some(parent) if parent.is_dir()); + + return if is_valid { + Ok(vec![LogFilePath::with_empty_label(path_str)]) + } else { + Err(io::Error::from(ErrorKind::NotFound)) + }; + } + + fn search_with_file_name_wildcard(pattern: &PatternWithFileNameWildcard) -> Result<Vec<LogFilePath>, io::Error> { + let mut result = Vec::new(); + + for dir_entry in Path::new(&pattern.path).read_dir()? { + let dir_entry = dir_entry?; + if let Some(wildcard_match) = pattern.match_wildcard_on_dir_entry(&dir_entry) { + result.push(LogFilePath { path: dir_entry.path(), label: wildcard_match }); + } + } + + return Ok(result); + } + + fn search_with_folder_name_wildcard(pattern: &PatternWithFolderNameWildcard) -> Result<Vec<LogFilePath>, io::Error> { + let mut result = Vec::new(); + + for dir_entry in Path::new(&pattern.path_prefix).read_dir()? { + let dir_entry = dir_entry?; + if let Some(wildcard_match) = PatternWithFolderNameWildcard::match_wildcard_on_dir_entry(&dir_entry) { + let full_path = dir_entry.path().join(&pattern.path_suffix); + if full_path.is_file() { + result.push(LogFilePath { path: full_path, label: wildcard_match }) + } + } + } + + return Ok(result); + } +} + +pub struct LogFilePath { + pub path: PathBuf, + pub label: String, +} + +impl LogFilePath { + fn with_empty_label(s: &String) -> LogFilePath { + return LogFilePath { + path: PathBuf::from(s), + label: String::default(), + }; + } +} + +#[cfg(test)] +mod tests { + use crate::log_file_pattern::{LogFilePattern, parse_log_file_pattern_from_str}; + + #[test] + fn empty_path() { + assert!(matches!(parse_log_file_pattern_from_str(""), Err(err) if err == "Path is empty.")); + assert!(matches!(parse_log_file_pattern_from_str(" "), Err(err) if err == "Path is empty.")); + } + + #[test] + fn too_many_wildcards() { + assert!(matches!(parse_log_file_pattern_from_str("/path/*/to/files/*.log"), Err(err) if err == "Path has too many wildcards.")); + } + + #[test] + fn folder_wildcard_with_prefix_not_supported() { + assert!(matches!(parse_log_file_pattern_from_str("/path/*abc/to/files/access.log"), Err(err) if err == "Path has a folder wildcard with a prefix or suffix.")); + } + + #[test] + fn folder_wildcard_with_suffix_not_supported() { + assert!(matches!(parse_log_file_pattern_from_str("/path/abc*/to/files/access.log"), Err(err) if err == "Path has a folder wildcard with a prefix or suffix.")); + } + + #[test] + fn valid_without_wildcard() { + assert!(matches!(parse_log_file_pattern_from_str("/path/to/file/access.log"), Ok(LogFilePattern::WithoutWildcard(path)) if path == "/path/to/file/access.log")); + } + + #[test] + fn valid_with_file_name_wildcard_prefix() { + assert!(matches!(parse_log_file_pattern_from_str("/path/to/files/access_*"), Ok(LogFilePattern::WithFileNameWildcard(pattern)) if pattern.path == "/path/to/files" && pattern.file_name_prefix == "access_" && pattern.file_name_suffix == "")); + } + + #[test] + fn valid_with_file_name_wildcard_suffix() { + assert!(matches!(parse_log_file_pattern_from_str("/path/to/files/*_access.log"), Ok(LogFilePattern::WithFileNameWildcard(pattern)) if pattern.path == "/path/to/files" && pattern.file_name_prefix == "" && pattern.file_name_suffix == "_access.log")); + } + + #[test] + fn valid_with_file_name_wildcard_both() { + assert!(matches!(parse_log_file_pattern_from_str("/path/to/files/access_*.log"), Ok(LogFilePattern::WithFileNameWildcard(pattern)) if pattern.path == "/path/to/files" && pattern.file_name_prefix == "access_" && pattern.file_name_suffix == ".log")); + } + + #[test] + fn valid_with_folder_wildcard() { + assert!(matches!(parse_log_file_pattern_from_str("/path/to/*/files/access.log"), Ok(LogFilePattern::WithFolderNameWildcard(pattern)) if pattern.path_prefix == "/path/to/" && pattern.path_suffix == "files/access.log")); + } +} diff --git a/src/main.rs b/src/main.rs index f328e4d..46124c7 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1 +1,25 @@ -fn main() {} +use crate::log_file_pattern::parse_log_file_pattern_from_env; + +mod log_file_pattern; + +fn main() { + let log_file_pattern = match parse_log_file_pattern_from_env() { + Ok(pattern) => pattern, + Err(error) => { + println!("Error: {}", error); + return; + } + }; + + let log_files = match log_file_pattern.search() { + Ok(files) => files, + Err(error) => { + println!("Error searching log files: {}", error); + return; + } + }; + + for log_file in log_files { + println!("Found log file: {} (label \"{}\")", log_file.path.display(), log_file.label); + } +}