diff --git a/.trunk/trunk.yaml b/.trunk/trunk.yaml index 9172b7d..e1cafaf 100644 --- a/.trunk/trunk.yaml +++ b/.trunk/trunk.yaml @@ -11,7 +11,7 @@ cli: lint: linters: - name: horton - type: lsp_json + type: sarif files: [ALL] command: ["${workspace}/target/debug/horton", "--file", "${path}"] success_codes: [0, 1] diff --git a/Cargo.toml b/Cargo.toml index 8e8b114..c00440a 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -12,13 +12,22 @@ name = "horton" path = "src/lib.rs" [dependencies] +anyhow = "1.0.64" clap = { version = "4.0.8", features = ["derive"] } +env_logger = "0.9.1" +git2 = { version = "0.15", default-features = false } +lazy_static = "1.4.0" +log = "0.4.17" regex = "1.6.0" serde = { version = "1.0.145", features = ["derive"] } serde_json = "1.0.85" -anyhow = "1.0.64" -log = "0.4.17" -env_logger = "0.9.1" +serde-sarif = "0.3.4" + +[dev-dependencies] +assert_cmd = "2.0" +function_name = "0.2.0" +predicates = "2.1" +tempfile = "3.3.0" [profile.release] codegen-units = 1 diff --git a/rust-toolchain.toml b/rust-toolchain.toml new file mode 100644 index 0000000..716c1fa --- /dev/null +++ b/rust-toolchain.toml @@ -0,0 +1,3 @@ +[toolchain] +channel = "1.64.0" +components = ["cargo-watch"] diff --git a/src/lsp_json.rs b/src/diagnostic.rs similarity index 90% rename from src/lsp_json.rs rename to src/diagnostic.rs index c84cef6..9cc582a 100644 --- a/src/lsp_json.rs +++ b/src/diagnostic.rs @@ -16,6 +16,7 @@ pub struct Position { #[derive(Serialize)] pub struct Range { + pub path: String, pub start: Position, pub end: Position, } @@ -29,11 +30,11 @@ pub struct Diagnostic { } #[derive(Serialize, Default)] -pub struct LspJson { +pub struct Diagnostics { pub diagnostics: Vec, } -impl LspJson { +impl Diagnostics { pub fn to_string(&self) -> anyhow::Result { let as_string = serde_json::to_string(&self)?; Ok(as_string) diff --git a/src/git.rs b/src/git.rs new file mode 100644 index 0000000..bf73af9 --- /dev/null +++ b/src/git.rs @@ -0,0 +1,68 @@ +use git2::{Delta, DiffOptions, Repository}; +use std::collections::HashSet; +use std::path::PathBuf; + +#[derive(Debug)] +pub struct Hunk { + pub path: PathBuf, + pub begin: i64, + pub end: i64, +} + +#[derive(Debug, Default)] +pub struct NewOrModified { + /// Set of modified line ranges in new/existing files + pub hunks: Vec, + + /// Set of new/modified files + pub paths: HashSet, +} + +pub fn modified_since(upstream: &str) -> anyhow::Result { + let repo = Repository::open(".")?; + + let upstream_tree = repo.find_reference(upstream)?.peel_to_tree()?; + + let diff = { + let mut diff_opts = DiffOptions::new(); + diff_opts.include_untracked(true); + + repo.diff_tree_to_workdir_with_index(Some(&upstream_tree), Some(&mut diff_opts))? + }; + + let mut ret = NewOrModified::default(); + diff.foreach( + &mut |_, _| true, + None, + Some(&mut |delta, hunk| { + match delta.status() { + Delta::Unmodified + | Delta::Added + | Delta::Modified + | Delta::Renamed + | Delta::Copied + | Delta::Untracked => { + if let Some(path) = delta.new_file().path() { + let path = path.to_path_buf(); + + ret.paths.insert(path.clone()); + ret.hunks.push(Hunk { + path, + begin: hunk.new_start() as i64, + end: (hunk.new_start() + hunk.new_lines()) as i64, + }); + } else { + // TODO(sam): accumulate errors and return them + // See https://doc.rust-lang.org/rust-by-example/error/iter_result.html + log::error!("Found git delta where new_file had no path"); + } + } + _ => (), + } + true + }), + None, + )?; + + Ok(ret) +} diff --git a/src/lib.rs b/src/lib.rs index 278b3bd..1e0ab4f 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1 +1,3 @@ -pub mod lsp_json; +pub mod diagnostic; +pub mod git; +pub mod rules; diff --git a/src/main.rs b/src/main.rs index ccaa741..7436641 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,70 +1,91 @@ -use std::fs::File; -use std::io::{BufRead, BufReader}; - -use anyhow::Context; use clap::Parser; -use horton::lsp_json; -use regex::Regex; +use horton::diagnostic; +use horton::git; +use horton::rules::if_change_then_change::ictc; +use horton::rules::pls_no_land::pls_no_land; +use serde_sarif::sarif; #[derive(Parser, Debug)] #[clap(version = "0.1", author = "Trunk Technologies Inc.")] struct Opts { - #[clap(short, long)] - file: String, -} - -type LinesView = Vec; - -fn lines_view(reader: R) -> anyhow::Result { - let mut ret: LinesView = LinesView::default(); - for line in reader.lines() { - let line = line?; - ret.push(line); - } - Ok(ret) + #[clap(long)] + // #[arg(default_value_t = String::from("refs/heads/main"))] + #[arg(default_value_t = String::from("HEAD"))] + upstream: String, } fn run() -> anyhow::Result<()> { let opts: Opts = Opts::parse(); - let re = Regex::new(r"(?i)(DO[\s_-]+NOT[\s_-]+LAND)").unwrap(); - let in_file = - File::open(&opts.file).with_context(|| format!("failed to open: {}", opts.file))?; - let in_buff = BufReader::new(in_file); - let lines_view = lines_view(in_buff).context("failed to build lines view")?; - let mut ret = lsp_json::LspJson::default(); + let mut ret = diagnostic::Diagnostics::default(); + let modified = git::modified_since(&opts.upstream)?; - for (i, line) in lines_view.iter().enumerate() { - // trunk-ignore(horton/do-not-land) - if line.contains("trunk-ignore(horton/do-not-land)") { - continue; - } - let m = if let Some(m) = re.find(line) { - m - } else { - continue; - }; + ret.diagnostics.extend(pls_no_land(&modified.paths)?); + ret.diagnostics.extend(ictc(&modified.hunks)?); - ret.diagnostics.push(lsp_json::Diagnostic { - range: lsp_json::Range { - start: lsp_json::Position { - line: i as u64, - character: m.start() as u64, - }, - end: lsp_json::Position { - line: i as u64, - character: m.end() as u64, - }, - }, - severity: lsp_json::Severity::Error, - // trunk-ignore(horton/do-not-land) - code: "do-not-land".to_string(), - message: format!("Found '{}'", m.as_str()), - }); - } + // TODO(sam): figure out how to stop using unwrap() inside the map() calls below + let results: Vec = ret + .diagnostics + .iter() + .map(|d| { + sarif::ResultBuilder::default() + .level("error") + .locations([sarif::LocationBuilder::default() + .physical_location( + sarif::PhysicalLocationBuilder::default() + .artifact_location( + sarif::ArtifactLocationBuilder::default() + .uri(d.range.path.clone()) + .build() + .unwrap(), + ) + .region( + sarif::RegionBuilder::default() + .start_line(d.range.start.line as i64 + 1) + .start_column(d.range.start.character as i64 + 1) + .end_line(d.range.end.line as i64 + 1) + .end_column(d.range.end.character as i64 + 1) + .build() + .unwrap(), + ) + .build() + .unwrap(), + ) + .build() + .unwrap()]) + .message( + sarif::MessageBuilder::default() + .text(d.message.clone()) + .build() + .unwrap(), + ) + .rule_id(d.code.clone()) + .build() + .unwrap() + }) + .collect(); + + let run = sarif::RunBuilder::default() + .tool( + sarif::ToolBuilder::default() + .driver( + sarif::ToolComponentBuilder::default() + .name("horton") + .build() + .unwrap(), + ) + .build() + .unwrap(), + ) + .results(results) + .build()?; + let sarif_built = sarif::SarifBuilder::default() + .version("2.1.0") + .runs([run]) + .build()?; - let diagnostics_str = ret.to_string()?; - println!("{}", diagnostics_str); + let sarif = serde_json::to_string_pretty(&sarif_built)?; + println!("{}", sarif); Ok(()) } diff --git a/src/rules/bin-bash.rs b/src/rules/bin-bash.rs deleted file mode 100644 index e69de29..0000000 diff --git a/src/rules/if_change_then_change.rs b/src/rules/if_change_then_change.rs new file mode 100644 index 0000000..e2d3f15 --- /dev/null +++ b/src/rules/if_change_then_change.rs @@ -0,0 +1,123 @@ +use anyhow::Context; +use log::debug; +use std::collections::{HashMap, HashSet}; +use std::fs::File; +use std::io::{BufRead, BufReader}; +use std::path::PathBuf; + +use regex::Regex; + +use crate::diagnostic; +use crate::git::Hunk; + +#[derive(Debug)] +pub struct IctcBlock { + pub path: PathBuf, + pub begin: i64, + pub end: i64, + pub thenchange: PathBuf, +} + +lazy_static::lazy_static! { + static ref RE_BEGIN: Regex = Regex::new(r" *(//|#) *ifchange").unwrap(); + static ref RE_END: Regex = Regex::new(r" *(//|#) *thenchange (.*)").unwrap(); + +} + +pub fn ictc(hunks: &Vec) -> anyhow::Result> { + // TODO(sam): this _should_ be a iter-map-collect, but unclear how to apply a reducer + // between the map and collect (there can be multiple hunks with the same path) + let mut modified_lines_by_path: HashMap> = HashMap::new(); + for h in hunks { + modified_lines_by_path + .entry(h.path.clone()) + .or_insert_with(HashSet::new) + .extend(h.begin..h.end); + } + let modified_lines_by_path = modified_lines_by_path; + + let mut blocks: Vec = Vec::new(); + for h in hunks { + let mut ifttt_begin: i64 = -1; + + let in_file = + File::open(&h.path).with_context(|| format!("failed to open: {:#?}", h.path))?; + let in_buf = BufReader::new(in_file); + for (i, line) in lines_view(in_buf) + .context("failed to build lines view")? + .iter() + .enumerate() + .map(|(i, line)| (i + 1, line)) + { + if RE_BEGIN.find(line).is_some() { + ifttt_begin = i as i64; + } else if let Some(end) = RE_END.captures(line) { + if ifttt_begin != -1 { + let block = IctcBlock { + path: h.path.clone(), + begin: ifttt_begin, + end: i as i64, + thenchange: PathBuf::from(end.get(2).unwrap().as_str()), + }; + + let block_lines = HashSet::from_iter(block.begin..block.end); + + if !block_lines.is_disjoint( + modified_lines_by_path + .get(&block.path) + .unwrap_or(&HashSet::new()), + ) { + blocks.push(block); + } + + ifttt_begin = -1; + } + } + } + } + + let blocks_by_path: HashMap<&PathBuf, &IctcBlock> = + blocks.iter().map(|b| (&b.path, b)).collect(); + + let ret: Vec = blocks + .iter() + .filter(|b| blocks_by_path.get(&b.thenchange).is_none()) + .map(|b| diagnostic::Diagnostic { + range: diagnostic::Range { + path: b.path.to_str().unwrap().to_string(), + start: diagnostic::Position { + line: b.begin as u64, + character: 0, + }, + end: diagnostic::Position { + line: b.end as u64, + character: 0, + }, + }, + severity: diagnostic::Severity::Error, + code: "if-change-then-change".to_string(), + message: format!( + "{}[{}, {}) was modified, but no if-change-then-change block in {} was modified", + b.path.display(), + b.begin, + b.end, + b.thenchange.display() + ), + }) + .collect(); + + debug!("ICTC blocks are:\n{:?}", blocks); + + Ok(ret) +} + +type LinesView = Vec; + +fn lines_view(reader: R) -> anyhow::Result { + let mut ret: LinesView = LinesView::default(); + for line in reader.lines() { + let line = line?; + ret.push(line); + } + Ok(ret) +} diff --git a/src/rules/mod.rs b/src/rules/mod.rs new file mode 100644 index 0000000..a0163b0 --- /dev/null +++ b/src/rules/mod.rs @@ -0,0 +1,2 @@ +pub mod if_change_then_change; +pub mod pls_no_land; diff --git a/src/rules/pls_no_land.rs b/src/rules/pls_no_land.rs new file mode 100644 index 0000000..0a21645 --- /dev/null +++ b/src/rules/pls_no_land.rs @@ -0,0 +1,72 @@ +extern crate regex; + +use crate::diagnostic; +use anyhow::Context; +use regex::Regex; +use std::collections::HashSet; +use std::fs::File; +use std::io::{BufRead, BufReader}; +use std::path::PathBuf; + +lazy_static::lazy_static! { + static ref RE: Regex = Regex::new(r"(?i)(DO[\s_-]+NOT[\s_-]+LAND)").unwrap(); +} + +// Checks for $re and other forms thereof in source code +// +// Note that this is named "pls_no_land" to avoid causing DNL matches everywhere in horton. +pub fn pls_no_land(paths: &HashSet) -> anyhow::Result> { + let mut ret = Vec::new(); + + for path in paths { + ret.splice(0..0, pls_no_land_impl(path)?); + } + + Ok(ret) +} + +type LinesView = Vec; + +fn lines_view(reader: R) -> anyhow::Result { + let mut ret: LinesView = LinesView::default(); + for line in reader.lines() { + let line = line?; + ret.push(line); + } + Ok(ret) +} + +fn pls_no_land_impl(path: &PathBuf) -> anyhow::Result> { + let in_file = File::open(path).with_context(|| format!("failed to open: {:#?}", path))?; + let in_buf = BufReader::new(in_file); + let lines_view = lines_view(in_buf).context("failed to build lines view")?; + + let mut ret = Vec::new(); + + for (i, line) in lines_view.iter().enumerate() { + if line.contains("trunk-ignore(|-begin|-end|-all)\\(horton/do-not-land\\)") { + continue; + } + + if let Some(m) = RE.find(line) { + ret.push(diagnostic::Diagnostic { + range: diagnostic::Range { + path: path.to_str().unwrap().to_string(), + start: diagnostic::Position { + line: i as u64, + character: m.start() as u64, + }, + end: diagnostic::Position { + line: i as u64, + character: m.end() as u64, + }, + }, + severity: diagnostic::Severity::Error, + code: "do-not-land".to_string(), + message: format!("Found '{}'", m.as_str()), + }); + } + } + + Ok(ret) +} diff --git a/tests/do_not_land_test.rs b/tests/do_not_land_test.rs new file mode 100644 index 0000000..4e7391d --- /dev/null +++ b/tests/do_not_land_test.rs @@ -0,0 +1,16 @@ +use predicates::prelude::*; // Used for writing assertions + +mod integration_testing; +use integration_testing::run_horton; + +#[test] +fn do_not_land() { + let mut tmpfile = tempfile::tempfile().unwrap(); + + let horton = run_horton("src/main.rs").unwrap(); + + assert_eq!( + true, + predicates::str::contains("Found 'do-not-land'").eval(&horton) + ); +} diff --git a/tests/integration_testing.rs b/tests/integration_testing.rs new file mode 100644 index 0000000..705346e --- /dev/null +++ b/tests/integration_testing.rs @@ -0,0 +1,15 @@ +use assert_cmd::prelude::*; // Add methods on commands + +use std::error::Error; +use std::process::Command; +use std::result::Result; + +pub fn run_horton(file: &str) -> Result> { + let mut cmd = Command::cargo_bin("horton")?; + + cmd.arg("--file").arg(file); + + let output = cmd.output()?; + + return Ok(String::from_utf8(output.stdout)?); +}