rgrep
Cargo.toml
Cargo.toml
[package]
name = "rgrep"
version = "0.1.0"
edition = "2021"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies]
anyhow = "1"
clap = { version = "3", features = ["derive"] }
colored = "2"
glob = "0.3"
itertools = "0.10"
rayon = "1"
regex = "1"
thiserror = "1"
src/error.rs: thiserror会自动转换
它们都是需要进行转换的错误。thiserror 能够通过宏帮我们完成错误类型的转换。
它们都是需要进行转换的错误。thiserror 能够通过宏帮我们完成错误类型的转换。
use thiserror::Error; #[derive(Error, Debug)] pub enum GrepError { #[error("Glob pattern error")] GlobPatternError(#[from] glob::PatternError), #[error("Regex pattern error")] RegexPatternError(#[from] regex::Error), #[error("I/O error")] IoError(#[from] std::io::Error), }
src/lib.rs:定义结构体+实现方法+单元测试
定义结构体: 专门简化复杂类型
这里其实就是传入一个指定结构的函数对象
/// 定义类型,这样,在使用时可以简化复杂类型的书写 pub type StrategyFn = fn(&Path, &mut dyn BufRead, &Regex, &mut dyn Write) -> Result<(), GrepError>;
专门的结合版本grep结构体
/// 简化版本的 grep,支持正则表达式和文件通配符 #[derive(Parser, Debug)] #[clap(version = "1.0", author = "Tyr Chen <tyr@chen.com>")] pub struct GrepConfig { /// 用于查找的正则表达式 pattern: String, /// 文件通配符 glob: String, }
lib.rs: 给结构体实现方法
impl GrepConfig { /// 使用缺省策略来查找匹配 pub fn match_with_default_strategy(&self) -> Result<(), GrepError> { self.match_with(default_strategy) } /// 使用某个策略函数来查找匹配 pub fn match_with(&self, strategy: StrategyFn) -> Result<(), GrepError> { let regex = Regex::new(&self.pattern)?; // 生成所有符合通配符的文件列表 let files: Vec<_> = glob::glob(&self.glob)?.collect(); // 并行处理所有文件 files.into_par_iter().for_each(|v| { if let Ok(filename) = v { if let Ok(file) = File::open(&filename) { let mut reader = BufReader::new(file); let mut stdout = io::stdout(); if let Err(e) = strategy(filename.as_path(), &mut reader, ®ex, &mut stdout) { println!("Internal error: {:?}", e); } } } }); Ok(()) } }
主要实现两种解析策略:
- 默认策略:match_with_default_strategy, 使用default_strategy
- 指定策略:match_with, 使用传入的strategy: StrategyFn
默认策略: default_strategy
/// 缺省策略,从头到尾串行查找,最后输出到 writer pub fn default_strategy( path: &Path, reader: &mut dyn BufRead, pattern: &Regex, writer: &mut dyn Write, ) -> Result<(), GrepError> { let matches: String = reader .lines() .enumerate() .map(|(lineno, line)| { line.ok() .map(|line| { pattern .find(&line) .map(|m| format_line(&line, lineno + 1, m.range())) }) .flatten() }) .filter_map(|v| v.ok_or(()).ok()) .join("\n"); if !matches.is_empty() { writer.write_all(path.display().to_string().green().as_bytes())?; writer.write_all(b"\n")?; writer.write_all(matches.as_bytes())?; writer.write_all(b"\n")?; } Ok(()) }
格式化输出
/// 格式化输出匹配的行,包含行号,列号和带有高亮的第一个匹配项 pub fn format_line(line: &str, lineno: usize, range: Range<usize>) -> String { let Range { start, end } = range; let prefix = &line[..start]; format!( "{0: >6}:{1: <3} {2}{3}{4}", lineno.to_string().blue(), // 找到匹配项的起始位置,注意对汉字等非 ascii 字符,我们不能使用 prefix.len() // 这是一个 O(n) 的操作,会拖累效率,这里只是为了演示的效果 (prefix.chars().count() + 1).to_string().cyan(), prefix, &line[start..end].red(), &line[end..] ) }
单元测试
#[cfg(test)] mod tests { use super::*; #[test] fn format_line_should_work() { let result = format_line("Hello, Tyr~", 1000, 7..10); let expected = format!( "{0: >6}:{1: <3} Hello, {2}~", "1000".blue(), "8".cyan(), "Tyr".red() ); assert_eq!(result, expected); } #[test] fn default_strategy_should_work() { let path = Path::new("src/main.rs"); let input = b"hello world!\nhey Tyr!"; let mut reader = BufReader::new(&input[..]); let pattern = Regex::new(r"he\w+").unwrap(); let mut writer = Vec::new(); default_strategy(path, &mut reader, &pattern, &mut writer).unwrap(); let result = String::from_utf8(writer).unwrap(); let expected = [ String::from("src/main.rs"), format_line("hello world!", 1, 0..5), format_line("hey Tyr!\n", 2, 0..3), ]; assert_eq!(result, expected.join("\n")); } }
src/main.rs
主函数:main()
use regex::Regex; use std::{ fs::File, io::{self, BufRead, BufReader, Write}, ops::Range, path::Path, };
使用
示例:cargo run –quiet – “正则表达式” “src/*.rs”
cargo run --quiet -- "Re[^\\s]+" "src/*.rs" ─╯
src/main.rs
1:13 use anyhow::Result;
5:14 fn main() -> Result<()> {
src/error.rs
7:14 #[error("Regex pattern error")]
8:5 RegexPatternError(#[from] regex::Error),
src/lib.rs
5:12 use regex::Regex;
8:19 io::{self, BufRead, BufReader, Write},
17:45 pub type StrategyFn = fn(&Path, &mut dyn BufRead, &Regex, &mut dyn Write) -> Result<(), GrepError>;
31:50 pub fn match_with_default_strategy(&self) -> Result<(), GrepError> {
36:55 pub fn match_with(&self, strategy: StrategyFn) -> Result<(), GrepError> {
37:21 let regex = Regex::new(&self.pattern)?;
44:41 let mut reader = BufReader::new(file);
60:25 reader: &mut dyn BufRead,
61:15 pattern: &Regex,
63:6 ) -> Result<(), GrepError> {
126:29 let mut reader = BufReader::new(&input[..]);
127:23 let pattern = Regex::new(r"he\w+").unwrap();