diff --git a/examples/client-app/src/main.rs b/examples/client-app/src/main.rs index c4d624bc..a57f2c11 100644 --- a/examples/client-app/src/main.rs +++ b/examples/client-app/src/main.rs @@ -26,6 +26,7 @@ async fn main() { let language: Language = Language { name: "json".to_owned(), query: TopiaryQuery::new(&grammar, query).unwrap(), + comment_query: None, grammar, indent: None, }; diff --git a/topiary-cli/src/io.rs b/topiary-cli/src/io.rs index a01cdd7d..bee0f6e6 100644 --- a/topiary-cli/src/io.rs +++ b/topiary-cli/src/io.rs @@ -113,6 +113,7 @@ pub struct InputFile<'cfg> { source: InputSource, language: &'cfg topiary_config::language::Language, query: QuerySource, + comment_query: Option, } impl<'cfg> InputFile<'cfg> { @@ -125,9 +126,21 @@ impl<'cfg> InputFile<'cfg> { }; let query = TopiaryQuery::new(&grammar, &contents)?; + // Can't use `map` because of closures, async, and Result. + let comment_contents = match &self.comment_query { + Some(QuerySource::Path(query)) => Some(tokio::fs::read_to_string(query).await?), + Some(QuerySource::BuiltIn(contents)) => Some(contents.to_owned()), + None => None, + }; + let comment_query = match comment_contents { + Some(c) => Some(TopiaryQuery::new(&grammar, &c)?), + None => None, + }; + Ok(Language { name: self.language.name.clone(), query, + comment_query, grammar, indent: self.language().config.indent.clone(), }) @@ -178,9 +191,9 @@ impl<'cfg, 'i> Inputs<'cfg> { InputFrom::Stdin(language_name, query) => { vec![(|| { let language = config.get_language(&language_name)?; - let query_source: QuerySource = match query { + let (query, comment_query) = match query { // The user specified a query file - Some(p) => p, + Some(p) => (p, None), // The user did not specify a file, try the default locations None => to_query_from_language(language)?, }; @@ -188,7 +201,8 @@ impl<'cfg, 'i> Inputs<'cfg> { Ok(InputFile { source: InputSource::Stdin, language, - query: query_source, + query, + comment_query, }) })()] } @@ -197,12 +211,13 @@ impl<'cfg, 'i> Inputs<'cfg> { .into_iter() .map(|path| { let language = config.detect(&path)?; - let query: QuerySource = to_query_from_language(language)?; + let (query, comment_query) = to_query_from_language(language)?; Ok(InputFile { source: InputSource::Disk(path, None), language, query, + comment_query, }) }) .collect(), @@ -212,16 +227,21 @@ impl<'cfg, 'i> Inputs<'cfg> { } } -fn to_query_from_language(language: &topiary_config::language::Language) -> CLIResult { - let query: QuerySource = match language.find_query_file() { - Ok(p) => p.into(), +fn to_query_from_language( + language: &topiary_config::language::Language, +) -> CLIResult<(QuerySource, Option)> { + let query: (QuerySource, Option) = match language.find_query_file() { + Ok((path, comment_path)) => (path.into(), comment_path.map(|p| p.into())), // For some reason, Topiary could not find any // matching file in a default location. As a final attempt, try the // builtin ones. Store the error, return that if we // fail to find anything, because the builtin error might be unexpected. Err(e) => { log::warn!("No query files found in any of the expected locations. Falling back to compile-time included files."); - to_query(&language.name).map_err(|_| e)? + ( + to_query(&language.name).map_err(|_| e)?, + to_comment_query(&language.name)?, + ) } }; Ok(query) @@ -364,3 +384,45 @@ where )), } } + +fn to_comment_query(name: T) -> CLIResult> +where + T: AsRef + fmt::Display, +{ + match name.as_ref() { + #[cfg(feature = "bash")] + "bash" => Ok(Some(topiary_queries::bash_comment().into())), + + #[cfg(feature = "css")] + "css" => Ok(Some(topiary_queries::css_comment().into())), + + #[cfg(feature = "json")] + "json" => Ok(None), + + #[cfg(feature = "nickel")] + "nickel" => Ok(Some(topiary_queries::nickel_comment().into())), + + #[cfg(feature = "ocaml")] + "ocaml" => Ok(Some(topiary_queries::ocaml_comment().into())), + + #[cfg(feature = "ocaml_interface")] + "ocaml_interface" => Ok(Some(topiary_queries::ocaml_interface_comment().into())), + + #[cfg(feature = "ocamllex")] + "ocamllex" => Ok(Some(topiary_queries::ocamllex_comment().into())), + + #[cfg(feature = "rust")] + "rust" => Ok(Some(topiary_queries::rust_comment().into())), + + #[cfg(feature = "toml")] + "toml" => Ok(Some(topiary_queries::toml_comment().into())), + + #[cfg(feature = "tree_sitter_query")] + "tree_sitter_query" => Ok(Some(topiary_queries::tree_sitter_query_comment().into())), + + name => Err(TopiaryError::Bin( + format!("The specified language is unsupported: {}", name), + Some(CLIError::UnsupportedLanguage(name.to_string())), + )), + } +} diff --git a/topiary-cli/tests/samples/expected/nickel.ncl b/topiary-cli/tests/samples/expected/nickel.ncl index 125c9187..8276aad3 100644 --- a/topiary-cli/tests/samples/expected/nickel.ncl +++ b/topiary-cli/tests/samples/expected/nickel.ncl @@ -182,6 +182,7 @@ x, # let blocks + let x = 1, y = 2 in x + y, let x = 1, diff --git a/topiary-cli/tests/samples/expected/ocaml-interface.mli b/topiary-cli/tests/samples/expected/ocaml-interface.mli index 86a80c48..e641a3d9 100644 --- a/topiary-cli/tests/samples/expected/ocaml-interface.mli +++ b/topiary-cli/tests/samples/expected/ocaml-interface.mli @@ -437,8 +437,7 @@ module Gas : sig val pp_cost_as_gas : Format.formatter -> cost -> unit - type error += Operation_quota_exceeded - (* `Temporary *) + type error += Operation_quota_exceeded (* `Temporary *) (** [consume ctxt cost] subtracts [cost] to the current operation gas level in [ctxt]. This operation may fail with @@ -453,11 +452,9 @@ module Gas : sig would fall below [0]. *) val consume_from : Arith.fp -> cost -> Arith.fp tzresult - type error += Block_quota_exceeded - (* `Temporary *) + type error += Block_quota_exceeded (* `Temporary *) - type error += Gas_limit_too_high - (* `Permanent *) + type error += Gas_limit_too_high (* `Permanent *) (** See {!Raw_context.consume_gas_limit_in_block}. *) val consume_limit_in_block : context -> 'a Arith.t -> context tzresult @@ -1398,11 +1395,9 @@ module Sapling : sig val rpc_arg : t RPC_arg.arg - val parse_z : Z.t -> t - (* To be used in parse_data only *) + val parse_z : Z.t -> t (* To be used in parse_data only *) - val unparse_to_z : t -> Z.t - (* To be used in unparse_data only *) + val unparse_to_z : t -> Z.t (* To be used in unparse_data only *) end (** Create a fresh sapling state in the context. *) @@ -4925,11 +4920,9 @@ module Operation : sig val compare_by_passes : packed_operation -> packed_operation -> int - type error += Missing_signature - (* `Permanent *) + type error += Missing_signature (* `Permanent *) - type error += Invalid_signature - (* `Permanent *) + type error += Invalid_signature (* `Permanent *) val check_signature : public_key -> Chain_id.t -> _ operation -> unit tzresult @@ -5458,14 +5451,11 @@ module Fees : sig Z.t -> (context * Z.t * Receipt.balance_updates) tzresult Lwt.t - type error += Cannot_pay_storage_fee - (* `Temporary *) + type error += Cannot_pay_storage_fee (* `Temporary *) - type error += Operation_quota_exceeded - (* `Temporary *) + type error += Operation_quota_exceeded (* `Temporary *) - type error += Storage_limit_too_high - (* `Permanent *) + type error += Storage_limit_too_high (* `Permanent *) val check_storage_limit : context -> storage_limit: Z.t -> unit tzresult end diff --git a/topiary-cli/tests/samples/expected/ocaml.ml b/topiary-cli/tests/samples/expected/ocaml.ml index 706e0cba..28dbe63c 100644 --- a/topiary-cli/tests/samples/expected/ocaml.ml +++ b/topiary-cli/tests/samples/expected/ocaml.ml @@ -51,11 +51,10 @@ let id6 = function x -> x (* Extensible buffers *) -type t = (* Multi- - * line comment with - * too much padding. - *) -{ +type t = { (* Multi- + * line comment with + * too much padding. + *) mutable buffer: bytes; mutable position: int; (* End-of-line comment *) mutable length: int; @@ -838,12 +837,12 @@ let _ = [@@@deprecated "writing code is deprecated, use ai-generated code instead"] type t = { - verbose: int; (** Verbosity level. *) - loggers: string; + verbose: int; (** Loggers enabled. *) + loggers: string; bflags: bool StrMap.t - (** Boolean flags. *) +(** Boolean flags. *) } let _ = { diff --git a/topiary-cli/tests/samples/expected/rust.rs b/topiary-cli/tests/samples/expected/rust.rs index 3f449888..16c6b8e1 100644 --- a/topiary-cli/tests/samples/expected/rust.rs +++ b/topiary-cli/tests/samples/expected/rust.rs @@ -14,10 +14,10 @@ pub fn node_kind_for_id(&self, id: u16) -> &'static str { // More comments. -enum OneLine { Leaf { content: String, /* comment */ id: usize /* another comment */, size: usize, }, Hardline { content: String, id: usize, }, Space, } // End of line comment +enum OneLine { Leaf { content: String, id: usize, size: usize, }, Hardline { content: String, id: usize, }, Space, } /* comment */ /* another comment */ // End of line comment enum ExpandEnum { - Leaf { content: String, /* Comment between fields. */ id: usize, size: usize, }, + Leaf { content: String, id: usize, size: usize, }, /* Comment between fields. */ Hardline { content: String, id: usize, }, Space, } @@ -91,21 +91,25 @@ enum Mode6 { fn inline_let() { let hi = 1; } // While loop spacing -while i == true { - let i = 42; +fn my_while() { + while i == true { + let i = 42; + } } // Scoped blocks -{ - let i = 42; -} -{ - let i = 43; +fn foo() { + { + let i = 42; + } + { + let i = 43; + } } // Empty block inside of impl function impl MyTrait for MyStruct { fn foo() { - // ... logic ... + // ... logic ... } } diff --git a/topiary-cli/tests/samples/expected/tree_sitter_query.scm b/topiary-cli/tests/samples/expected/tree_sitter_query.scm index e31588ab..a139165d 100644 --- a/topiary-cli/tests/samples/expected/tree_sitter_query.scm +++ b/topiary-cli/tests/samples/expected/tree_sitter_query.scm @@ -1023,8 +1023,8 @@ ":" @append_indent_start (_) @append_indent_end . - ; just doing _ above doesn't work, because it matches the final named node as - ; well as the final non-named node, causing double indentation. +; just doing _ above doesn't work, because it matches the final named node as +; well as the final non-named node, causing double indentation. ) (value_specification diff --git a/topiary-cli/tests/samples/input/rust.rs b/topiary-cli/tests/samples/input/rust.rs index 3ef36584..200aa83e 100644 --- a/topiary-cli/tests/samples/input/rust.rs +++ b/topiary-cli/tests/samples/input/rust.rs @@ -91,17 +91,21 @@ enum Mode6 { fn inline_let() { let hi = 1; } // While loop spacing -while i == true { - let i = 42; +fn my_while() { + while i == true { + let i = 42; + } } // Scoped blocks -{ - let i = 42; -} -{ - let i = 43; +fn foo() { + { + let i = 42; + } + { + let i = 43; + } } // Empty block inside of impl function diff --git a/topiary-config/src/language.rs b/topiary-config/src/language.rs index 297f62c5..daa4b80f 100644 --- a/topiary-config/src/language.rs +++ b/topiary-config/src/language.rs @@ -81,8 +81,10 @@ impl Language { } #[cfg(not(target_arch = "wasm32"))] - pub fn find_query_file(&self) -> TopiaryConfigResult { - let basename = PathBuf::from(self.name.as_str()).with_extension("scm"); + pub fn find_query_file(&self) -> TopiaryConfigResult<(PathBuf, Option)> { + let name = self.name.clone(); + let basename = PathBuf::from(&name).with_extension("scm"); // "clang.scm" + let comment_basename = PathBuf::from(&name).with_extension("comment.scm"); // "clang.comment.scm" #[rustfmt::skip] let potentials: [Option; 4] = [ @@ -92,12 +94,20 @@ impl Language { Some(PathBuf::from("../topiary-queries/queries")), ]; - potentials + let query_file = potentials .into_iter() .flatten() .map(|path| path.join(&basename)) .find(|path| path.exists()) - .ok_or_else(|| TopiaryConfigError::QueryFileNotFound(basename)) + .ok_or_else(|| TopiaryConfigError::QueryFileNotFound(basename))?; + + let comment_query_file = query_file.parent().unwrap().join(comment_basename); + + if comment_query_file.exists() { + Ok((query_file, Some(comment_query_file))) + } else { + Ok((query_file, None)) + } } #[cfg(not(target_arch = "wasm32"))] diff --git a/topiary-core/benches/benchmark.rs b/topiary-core/benches/benchmark.rs index f25b9b53..49fe55b8 100644 --- a/topiary-core/benches/benchmark.rs +++ b/topiary-core/benches/benchmark.rs @@ -7,6 +7,8 @@ use topiary_core::{formatter, Language, Operation, TopiaryQuery}; async fn format() { let input = fs::read_to_string("../topiary-cli/tests/samples/input/ocaml.ml").unwrap(); let query_content = fs::read_to_string("../topiary-queries/queries/ocaml.scm").unwrap(); + let comment_query_content = + fs::read_to_string("../topiary-queries/queries/ocaml.comment.scm").unwrap(); let ocaml = tree_sitter_ocaml::LANGUAGE_OCAML; let mut input = input.as_bytes(); @@ -15,6 +17,9 @@ async fn format() { let language: Language = Language { name: "ocaml".to_owned(), query: TopiaryQuery::new(&ocaml.clone().into(), &query_content).unwrap(), + comment_query: Some( + TopiaryQuery::new(&ocaml.clone().into(), &comment_query_content).unwrap(), + ), grammar: ocaml.into(), indent: None, }; diff --git a/topiary-core/src/atom_collection.rs b/topiary-core/src/atom_collection.rs index 574cd05d..872da2fd 100644 --- a/topiary-core/src/atom_collection.rs +++ b/topiary-core/src/atom_collection.rs @@ -1,6 +1,6 @@ use std::{ borrow::Cow, - collections::{HashMap, HashSet}, + collections::{HashMap, HashSet, VecDeque}, mem, ops::Deref, }; @@ -8,7 +8,10 @@ use std::{ use topiary_tree_sitter_facade::Node; use crate::{ - tree_sitter::NodeExt, Atom, FormatterError, FormatterResult, ScopeCondition, ScopeInformation, + comments::{AnchoredComment, Comment, Commented}, + common::InputSection, + tree_sitter::NodeExt, + Atom, FormatterError, FormatterResult, ScopeCondition, ScopeInformation, }; /// A struct that holds sets of node IDs that have line breaks before or after them. @@ -37,6 +40,10 @@ pub struct AtomCollection { /// something to a node, a new Atom is added to this HashMap. /// The key of the hashmap is the identifier of the node. append: HashMap>, + /// Maps node IDs to comments before that node. Comments are stored in reading order. + comments_before: HashMap>, + /// Maps node IDs to comments after that node. Comments are stored in reading order. + comments_after: HashMap>, /// A query file can define custom leaf nodes (nodes that Topiary should not /// touch during formatting). When such a node is encountered, its id is stored in /// this HashSet. @@ -72,6 +79,8 @@ impl AtomCollection { atoms, prepend: HashMap::new(), append: HashMap::new(), + comments_before: HashMap::new(), + comments_after: HashMap::new(), specified_leaf_nodes: HashSet::new(), parent_leaf_nodes: HashMap::new(), multi_line_nodes: HashSet::new(), @@ -87,6 +96,7 @@ impl AtomCollection { root: &Node, source: &[u8], specified_leaf_nodes: HashSet, + mut comments: Vec, ) -> FormatterResult { // Flatten the tree, from the root node, in a depth-first traversal let dfs_nodes = dfs_flatten(root); @@ -100,6 +110,8 @@ impl AtomCollection { atoms: Vec::new(), prepend: HashMap::new(), append: HashMap::new(), + comments_before: HashMap::new(), + comments_after: HashMap::new(), specified_leaf_nodes, parent_leaf_nodes: HashMap::new(), multi_line_nodes, @@ -109,9 +121,18 @@ impl AtomCollection { counter: 0, }; - atoms.collect_leafs_inner(root, source, &Vec::new(), 0)?; + atoms.collect_leafs_inner(root, source, &mut comments, &Vec::new(), 0)?; - Ok(atoms) + if let Some(comment) = comments.pop() { + // Some anchored couldn't be attached back to the code: + // raise an error with the first of them + Err(FormatterError::CommentAbandoned( + comment.comment_text, + format!("{:?}", comment.commented), + )) + } else { + Ok(atoms) + } } // wrap inside a conditional atom if #single/multi_line_scope_only! is set @@ -495,11 +516,14 @@ impl AtomCollection { /// A leaf node is either a node with no children or a node that is specified as a leaf node by the formatter. /// A leaf parent is the closest ancestor of a leaf node. /// + /// This function also attach comments to the leaf node that contains their anchor. + /// /// # Arguments /// /// * `node` - The current node to process. /// * `source` - The full source code as a byte slice. /// * `parent_ids` - A vector of node ids that are the ancestors of the current node. + /// * `comments` - A vector that stores unattached comments. /// * `level` - The depth of the current node in the CST tree. /// /// # Errors @@ -509,6 +533,7 @@ impl AtomCollection { &mut self, node: &Node, source: &[u8], + comments: &mut Vec, parent_ids: &[usize], level: usize, ) -> FormatterResult<()> { @@ -535,15 +560,38 @@ impl AtomCollection { self.atoms.push(Atom::Leaf { content: String::from(node.utf8_text(source)?), id, - original_position: node.start_position().into(), + original_column: node.start_position().column() as i32, single_line_no_indent: false, multi_line_indent_all: false, }); // Mark all sub-nodes as having this node as a "leaf parent" self.mark_leaf_parent(node, node.id()); + // Test the node for comments + // `comments` is sorted in reverse order, so `pop()` gets the first one. + while let Some(comment) = comments.pop() { + let node_section: InputSection = node.into(); + // REVIEW: the double borrow is sort of weird, is this idiomatic? + if node_section.contains(&(&comment).into()) { + match comment.commented { + Commented::CommentedAfter { .. } => self + .comments_before + .entry(id) + .or_default() + .push_back((&comment).into()), + Commented::CommentedBefore(_) => self + .comments_after + .entry(id) + .or_default() + .push_back((&comment).into()), + } + } else { + comments.push(comment); + break; + } + } } else { for child in node.children(&mut node.walk()) { - self.collect_leafs_inner(&child, source, &parent_ids, level + 1)?; + self.collect_leafs_inner(&child, source, comments, &parent_ids, level + 1)?; } } @@ -883,14 +931,138 @@ impl AtomCollection { } } + /// Create atoms out of comments, and put them at the correct place in the atom stream. + fn post_process_comments(&mut self) { + struct CommentWithContext { + comment: Atom, + blank_line_after: bool, + blank_line_before: bool, + } + let mut comments_queue_with_context: VecDeque = VecDeque::new(); + + // First pass: get atoms in reverse order, put comments that come before atoms at the correct position + let mut atoms_with_comments_before: VecDeque = VecDeque::new(); + + while let Some(atom) = self.atoms.pop() { + if let Atom::Leaf { id, .. } = atom { + while let Some(Comment { + content, + original_column, + blank_line_after, + blank_line_before, + }) = self.comments_before.entry(id).or_default().pop_back() + { + let comment_atom = Atom::Leaf { + content, + id: self.next_id(), + original_column, + single_line_no_indent: false, + multi_line_indent_all: true, + }; + comments_queue_with_context.push_front(CommentWithContext { + comment: comment_atom, + blank_line_after, + blank_line_before, + }); + } + atoms_with_comments_before.push_front(atom); + } else if Atom::Hardline == atom || Atom::Blankline == atom { + let mut blank_line_before_first_comment = false; + // Prepend the comments, each one followed by a newline + while let Some(CommentWithContext { + comment, + blank_line_after, + blank_line_before, + }) = comments_queue_with_context.pop_back() + { + if blank_line_after { + atoms_with_comments_before.push_front(Atom::Blankline) + } else { + atoms_with_comments_before.push_front(Atom::Hardline); + } + atoms_with_comments_before.push_front(comment); + blank_line_before_first_comment = blank_line_before; + } + if blank_line_before_first_comment && atom == Atom::Hardline { + atoms_with_comments_before.push_front(Atom::Blankline); + } else { + atoms_with_comments_before.push_front(atom); + } + } else { + atoms_with_comments_before.push_front(atom); + } + } + let mut blank_line_before_first_comment = false; + // If we still have comments left, add them at the beginning of the file + while let Some(CommentWithContext { + comment, + blank_line_after, + blank_line_before, + }) = comments_queue_with_context.pop_back() + { + if blank_line_after { + atoms_with_comments_before.push_front(Atom::Blankline) + } else { + atoms_with_comments_before.push_front(Atom::Hardline); + } + atoms_with_comments_before.push_front(comment); + blank_line_before_first_comment = blank_line_before; + } + if blank_line_before_first_comment { + atoms_with_comments_before.push_front(Atom::Blankline); + } + + // Second pass: get atoms in reverse order, put comments that come after atoms at the correct position + let mut atoms_with_all_comments: VecDeque = VecDeque::new(); + let mut comments_queue: VecDeque = VecDeque::new(); + while let Some(atom) = atoms_with_comments_before.pop_front() { + if let Atom::Leaf { id, .. } = atom { + while let Some(Comment { + content, + original_column, + // We don't care about blank lines here: they only matter for comments that + // come before atoms. + .. + }) = self.comments_after.entry(id).or_default().pop_front() + { + let comment_atom = Atom::Leaf { + content, + id: self.next_id(), + original_column, + single_line_no_indent: false, + multi_line_indent_all: true, + }; + comments_queue.push_front(comment_atom); + } + } + if Atom::Hardline == atom || Atom::Blankline == atom { + // Append the comments, each one preceded by a space + while let Some(comment) = comments_queue.pop_back() { + atoms_with_all_comments.push_back(Atom::Space); + atoms_with_all_comments.push_back(comment); + } + } + atoms_with_all_comments.push_back(atom); + } + // If we still have comments left, add them at the end of the file + while let Some(comment) = comments_queue.pop_back() { + atoms_with_all_comments.push_back(Atom::Space); + atoms_with_all_comments.push_back(comment); + } + + self.atoms = atoms_with_all_comments.into() + } + /// This function merges the spaces, new lines and blank lines. /// If there are several tokens of different kind one after the other, /// the blank line is kept over the new line which itself is kept over the space. /// Furthermore, this function put the indentation delimiters before any space/line atom. pub fn post_process(&mut self) { self.post_process_scopes(); - self.post_process_deletes(); self.post_process_inner(); + // Comments must be processed before deletes, because comment anchors might be deleted. + self.post_process_comments(); + self.post_process_deletes(); // We have taken care of spaces following an antispace. Now fix the // preceding spaces. @@ -900,7 +1072,7 @@ impl AtomCollection { // antispaces may have produced more empty atoms. self.post_process_inner(); - log::debug!("List of atoms after post-processing: {:?}", self.atoms); + log::debug!("List of atoms after post-processing: {:#?}", self.atoms); } /// This function post-processes the atoms in the collection. @@ -950,12 +1122,13 @@ impl AtomCollection { ) => { if head.dominates(moved_prev) { *moved_prev = Atom::Empty; + prev = head; + remaining = tail; } else { *head = Atom::Empty; + prev = moved_prev; + remaining = tail; } - - prev = moved_prev; - remaining = tail; } // If a whitespace atom is followed by an indent atom, swap their positions. ( diff --git a/topiary-core/src/comments.rs b/topiary-core/src/comments.rs new file mode 100644 index 00000000..b733c84e --- /dev/null +++ b/topiary-core/src/comments.rs @@ -0,0 +1,518 @@ +use std::collections::HashSet; + +use topiary_tree_sitter_facade::{InputEdit, Language, Node, Tree}; + +use crate::{ + common::{parse, Diff, InputSection, Position}, + error::FormatterError, + FormatterResult, +}; + +/// When you remove a block of text from the input, it changes the positions of every subsequent character. +/// This is what this Diff instance does. +impl Diff for Position { + type ErrorType = FormatterError; + + fn subtract(&mut self, other: InputSection) -> FormatterResult<()> { + if *self <= other.start { + // The point is before the removed block: nothing happens. + Ok(()) + } else if other.end <= *self { + // The point is after the removed block: its new coordinates depend on whether it was + // on the same row as the last point of the removed block. + // + // See in the following example how the positions of characters `a` and `b` + // change when the bracketed block is removed: + // + // Before: + // .......... + // ...[--------- + // --------- + // -------]...a.. + // ...b...... + // ............. + // + // After: + // .......... + // ......a.. + // ...b...... + // ............. + let mut row = self.row; + let mut column = self.column; + if row == other.end.row { + column = column + other.start.column - other.end.column + } + row = row + other.start.row - other.end.row; + *self = Position { row, column }; + Ok(()) + } else { + // The point is within the removed block: + // fail, because the point can't be accessed after the subtraction + Err(FormatterError::Internal( + "Tried to remove a section from a point it contains".into(), + None, + )) + } + } +} + +impl Diff for InputSection { + type ErrorType = FormatterError; + + fn subtract(&mut self, other: Self) -> FormatterResult<()> { + self.start.subtract(other)?; + self.end.subtract(other) + } +} + +fn into_edit(node: &Node<'_>) -> InputEdit { + InputEdit::new( + node.start_byte(), + node.end_byte(), + node.start_byte(), + &node.start_position(), + &node.end_position(), + &node.start_position(), + ) +} + +fn find_comments( + node: Node, + input: &str, + comment_ids: &HashSet, + comments: &mut Vec<(InputEdit, AnchoredComment)>, +) -> FormatterResult<()> { + if comment_ids.contains(&node.id()) { + let commented = find_anchor(&node, input, comment_ids)?; + // Build the corresponding InputEdit: + // - If the comment is not alone on its line, return its bounds + // - If the comment is alone on its line, return the bounds of all its line + // (we don't want to create undue blank lines) + let prev = previous_disjoint_node(&node); + let next = next_disjoint_node(&node); + let is_alone_before = prev + .as_ref() + .map(|n| n.start_position().row() < node.start_position().row()); + let is_alone_after = next + .as_ref() + .map(|n| node.end_position().row() < n.start_position().row()); + + // The logic is a bit complex here. Each case gives an example of a comment it would match + let edit: InputEdit = match (is_alone_before, is_alone_after) { + // /* define a foo */ let _ = foo + (Some(false), _) | + // let _ = foo /* we defined a foo */ + (_, Some(false)) | + // /* this file has a comment and nothing else */ + (None, None) => { + into_edit(&node) + }, + // let _ = foo + // /* This comment is alone on its line, but has stuff before and after */ + // let _ = bar + (Some(true), Some(true)) | + // /* This is the first line of the file */ + // let _ = foo + (None, Some(true)) => { + InputEdit::new( + node.start_byte(), + next.as_ref().unwrap().start_byte(), + node.start_byte(), + &node.start_position(), + &next.as_ref().unwrap().start_position(), + &node.start_position() + ) + }, + // let _ = foo + // /* This is the last line of the file */ + (Some(true), None) => { + InputEdit::new( + prev.as_ref().unwrap().end_byte(), + node.end_byte(), + prev.as_ref().unwrap().end_byte(), + &prev.as_ref().unwrap().end_position(), + &node.end_position(), + &prev.as_ref().unwrap().end_position(), + ) + } + }; + comments.push(( + edit, + AnchoredComment { + comment_text: node.utf8_text(input.as_bytes())?.to_string(), + original_column: node.start_position().column() as i32, + commented, + }, + )); + Ok(()) + } else { + let mut walker = node.walk(); + for child in node.children(&mut walker) { + find_comments(child, input, comment_ids, comments)?; + } + Ok(()) + } +} + +/// The section of code to which a comment refers. We also remember whether the comment +/// is positioned before or after the section. +#[derive(Copy, Clone, Debug, PartialEq, Eq)] +pub enum Commented { + /// The code section is before the comment, as in: + /// ``` + /// struct Foo { + /// baz: usize, // this is baz + /// quz: usize, // this is qux + /// } + /// ``` + CommentedBefore(InputSection), + /// The code section is after the comment, as in: + /// ``` + /// struct Foo { + /// // let's have a baz + /// baz: usize, + /// // and a qux + /// qux: usize, + /// } + /// ``` + CommentedAfter { + section: InputSection, + /// Whether or not there is a blank line before/after the comment, as in: + /// ``` + /// // The previous section was tiring, let's rest a little + /// + /// // Alright, back to the code: + /// let foo = 1; + /// ``` + blank_line_after: bool, + blank_line_before: bool, + }, +} + +impl Diff for Commented { + type ErrorType = FormatterError; + + fn subtract(&mut self, other: InputSection) -> FormatterResult<()> { + match self { + Commented::CommentedBefore(section) => section.subtract(other), + Commented::CommentedAfter { section, .. } => section.subtract(other), + } + } +} + +/// A comment, as part of Topiary's output. +/// We forget node information here, because the struct +/// is supposed to be attached to the node it comments. +#[derive(Debug)] +pub struct Comment { + pub content: String, + pub original_column: i32, + pub blank_line_after: bool, + pub blank_line_before: bool, +} + +impl From<&AnchoredComment> for Comment { + fn from(value: &AnchoredComment) -> Self { + Comment { + content: value.comment_text.clone(), + original_column: value.original_column, + blank_line_after: match value.commented { + Commented::CommentedBefore(_) => false, + Commented::CommentedAfter { + blank_line_after, .. + } => blank_line_after, + }, + blank_line_before: match value.commented { + Commented::CommentedBefore(_) => false, + Commented::CommentedAfter { + blank_line_before, .. + } => blank_line_before, + }, + } + } +} + +fn next_disjoint_node<'tree>(starting_node: &'tree Node<'tree>) -> Option> { + let mut node: Node<'tree> = starting_node.clone(); + // move up until we find a next sibling + while node.next_sibling().is_none() { + match node.parent() { + None => return None, + Some(parent) => node = parent, + } + } + node.next_sibling() +} + +fn previous_disjoint_node<'tree>(starting_node: &'tree Node<'tree>) -> Option> { + let mut node: Node<'tree> = starting_node.clone(); + // move up until we find a previous sibling + while node.prev_sibling().is_none() { + match node.parent() { + None => return None, + Some(parent) => node = parent, + } + } + node.prev_sibling() +} + +// TODO: if performance is an issue, use TreeCursor to navigate the tree +fn next_non_comment_leaf<'tree>( + starting_node: Node<'tree>, + comment_ids: &HashSet, +) -> Option> { + let mut node: Node<'tree> = starting_node; + loop { + // get the next leaf: + // 1) move up until we find a next sibling + loop { + match node.next_sibling() { + None => { + if let Some(parent) = node.parent() { + node = parent + } else { + return None; + } // we've reached the root and found nothing + } + Some(sibling) => { + node = sibling; + if comment_ids.contains(&node.id()) { + // get the following sibling + continue; + } else { + break; + } + } + } + } + // 2) get the leftmost leaf of the sibling. + // If we encounter a comment, we stop. We'll get back to 1) after the loop + while let Some(child) = node.child(0) { + if comment_ids.contains(&child.id()) { + break; + } else { + node = child + } + } + // check if the leaf is a comment. If it is not, start over again. + if comment_ids.contains(&node.id()) { + continue; + } else { + return Some(node); + } + } +} + +// TODO: if performance is an issue, use TreeCursor to navigate the tree +fn previous_non_comment_leaf<'tree>( + starting_node: Node<'tree>, + comment_ids: &HashSet, +) -> Option> { + let mut node: Node<'tree> = starting_node; + loop { + // get the previous leaf: + // 1) move up until we find a previous sibling + loop { + match node.prev_sibling() { + None => { + if let Some(parent) = node.parent() { + node = parent + } else { + // we've reached the root and found nothing + return None; + } + } + Some(sibling) => { + node = sibling; + if comment_ids.contains(&node.id()) { + // get the previous sibling + continue; + } else { + break; + } + } + } + } + // 2) get the rightmost leaf of the sibling. + // If we encounter a comment, we stop. We'll get back to 1) after the loop + while let Some(child) = { + if node.child_count() == 0 { + None + } else { + node.child(node.child_count() - 1) + } + } { + if comment_ids.contains(&child.id()) { + break; + } else { + node = child + } + } + // check if the leaf is a comment. If it is not, start over again. + if comment_ids.contains(&node.id()) { + continue; + } else { + return Some(node); + } + } +} + +// Use the following heuristics to find a comment's anchor: +// If the comment is only prefixed by blank symbols on its line, then the anchor is the +// next non-comment sibling node. +// Otherwise, the anchor is the previous non-comment sibling node. +// If there is no such node, we anchor to the first non-comment sibling node +// in the other direction. +#[allow(clippy::collapsible_else_if)] +fn find_anchor<'tree>( + node: &'tree Node<'tree>, + input: &str, + comment_ids: &HashSet, +) -> FormatterResult { + let point = node.start_position(); + let mut lines = input.lines(); + let prefix = lines + .nth(point.row() as usize) + .map(|line| &line[..point.column() as usize]) + .ok_or_else(|| { + FormatterError::Internal( + format!( + "Trying to access nonexistent line {} in text:\n{}", + point.row(), + input, + ), + None, + ) + })?; + if prefix.trim_start() == "" { + if let Some(anchor) = next_non_comment_leaf(node.clone(), comment_ids) { + let prev = previous_disjoint_node(node); + let next = next_disjoint_node(node); + Ok(Commented::CommentedAfter { + section: (&anchor).into(), + blank_line_after: next + .map(|next| next.start_position().row() > node.end_position().row() + 1) + .unwrap_or(false), + blank_line_before: prev + .map(|prev| prev.end_position().row() + 1 < node.start_position().row()) + .unwrap_or(false), + }) + } else if let Some(anchor) = previous_non_comment_leaf(node.clone(), comment_ids) { + Ok(Commented::CommentedBefore((&anchor).into())) + } else { + Err(FormatterError::CommentOrphaned( + node.utf8_text(input.as_bytes())?.to_string(), + )) + } + } else { + if let Some(anchor) = previous_non_comment_leaf(node.clone(), comment_ids) { + Ok(Commented::CommentedBefore((&anchor).into())) + } else if let Some(anchor) = next_non_comment_leaf(node.clone(), comment_ids) { + let prev = previous_disjoint_node(node); + let next = next_disjoint_node(node); + Ok(Commented::CommentedAfter { + section: (&anchor).into(), + blank_line_after: next + .map(|next| next.start_position().row() > node.end_position().row() + 1) + .unwrap_or(false), + blank_line_before: prev + .map(|prev| prev.end_position().row() + 1 < node.start_position().row()) + .unwrap_or(false), + }) + } else { + Err(FormatterError::CommentOrphaned( + node.utf8_text(input.as_bytes())?.to_string(), + )) + } + } +} + +#[derive(Clone, Debug, Eq, PartialEq)] +pub struct AnchoredComment { + pub comment_text: String, + // We need to keep track of the column for indentation purposes + pub original_column: i32, + pub commented: Commented, +} + +impl From<&AnchoredComment> for InputSection { + fn from(value: &AnchoredComment) -> Self { + match value.commented { + Commented::CommentedBefore(section) => section, + Commented::CommentedAfter { section, .. } => section, + } + } +} + +pub struct SeparatedInput { + pub input_tree: Tree, + pub input_string: String, + pub comments: Vec, +} + +pub fn extract_comments<'a>( + tree: &'a Tree, + input: &'a str, + comment_ids: HashSet, + grammar: &Language, + tolerate_parsing_errors: bool, +) -> FormatterResult { + let mut anchors: Vec<(InputEdit, AnchoredComment)> = Vec::new(); + let mut anchored_comments: Vec = Vec::new(); + let mut new_input: String = input.to_string(); + let mut new_tree: Tree = tree.clone(); + find_comments(tree.root_node(), input, &comment_ids, &mut anchors)?; + anchors.sort_by_key(|(node, _)| node.start_byte()); + let mut edits: Vec = Vec::new(); + // for each (comment, anchor) pair in reverse order, we: + // 1) remove the comment from the input, + // 2) register an InputEdit to modify the tree, + // 3) edit all anchors to account for the removed comment. + // + // The order is reversed so that all InputEdits can be applied in succession: + // one will not affect the others. + while let Some((edit, anchored_comment)) = anchors.pop() { + // 1) + new_input.replace_range( + (edit.start_byte() as usize)..(edit.old_end_byte() as usize), + "", + ); + // 2) + edits.push(edit); + anchored_comments.push(anchored_comment); + } + for edit in edits { + new_tree.edit(&edit); + // 3) + let section: InputSection = (&edit).into(); + anchored_comments = anchored_comments + .iter() + .map( + |AnchoredComment { + mut commented, + original_column, + comment_text, + }| + -> FormatterResult { + commented.subtract(section)?; + Ok(AnchoredComment { + commented, + original_column: *original_column, + comment_text: comment_text.to_string(), + }) + }, + ) + .collect::>>()?; + } + new_tree = parse( + new_input.as_str(), + grammar, + tolerate_parsing_errors, + Some(&new_tree), + )?; + Ok(SeparatedInput { + input_tree: new_tree, + input_string: new_input, + comments: anchored_comments, + }) +} diff --git a/topiary-core/src/common.rs b/topiary-core/src/common.rs new file mode 100644 index 00000000..c9b52389 --- /dev/null +++ b/topiary-core/src/common.rs @@ -0,0 +1,119 @@ +use std::{cmp::Ord, fmt::Display}; + +use serde::Serialize; +use topiary_tree_sitter_facade::{InputEdit, Node, Parser, Point, Tree}; + +use crate::{error::FormatterError, FormatterResult}; + +/// A module for common, low-level types and functions in the topiary-core crate + +/// Refers to a position within the code. Used for error reporting, and for +/// comparing input with formatted output. The numbers are 1-based, because that +/// is how editors usually refer to a position. Derived from tree_sitter::Point. +/// Note that the order is the standard western reading order. +#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Serialize)] +pub struct Position { + pub row: u32, + pub column: u32, +} + +impl Display for Position { + fn fmt(&self, f: &mut std::fmt::Formatter) -> Result<(), std::fmt::Error> { + write!(f, "({},{})", self.row, self.column) + } +} + +impl From for Position { + fn from(point: Point) -> Self { + Self { + row: point.row() + 1, + column: point.column() + 1, + } + } +} + +/// Some section of contiguous characters in the input. +/// It is assumed that `start <= end`, according to the order on `Position`. +#[derive(Copy, Clone, Debug, Eq, PartialEq, Serialize)] +pub struct InputSection { + pub start: Position, + pub end: Position, +} + +impl InputSection { + pub fn contains(self, other: &Self) -> bool { + self.start <= other.start && other.end <= self.end + } +} + +impl From<&Node<'_>> for InputSection { + fn from(value: &Node) -> Self { + InputSection { + start: value.start_position().into(), + end: value.end_position().into(), + } + } +} + +impl From<&InputEdit> for InputSection { + fn from(value: &InputEdit) -> Self { + InputSection { + start: value.start_position().into(), + end: value.old_end_position().into(), + } + } +} + +/// A generic trait to subtract stuff from other stuff. The function can be partial. +/// In practice, it will be used to update text positions within the input, +/// when removing parts of it. +pub trait Diff { + type ErrorType; + + fn subtract(&mut self, other: T) -> Result<(), Self::ErrorType>; +} + +/// Parses some string into a syntax tree, given a tree-sitter grammar. +pub fn parse( + content: &str, + grammar: &topiary_tree_sitter_facade::Language, + tolerate_parsing_errors: bool, + old_tree: Option<&Tree>, +) -> FormatterResult { + let mut parser = Parser::new()?; + parser.set_language(grammar).map_err(|_| { + FormatterError::Internal("Could not apply Tree-sitter grammar".into(), None) + })?; + + let tree = parser + .parse(content, old_tree)? + .ok_or_else(|| FormatterError::Internal("Could not parse input".into(), None))?; + + // Fail parsing if we don't get a complete syntax tree. + if !tolerate_parsing_errors { + check_for_error_nodes(&tree.root_node())?; + } + + Ok(tree) +} + +fn check_for_error_nodes(node: &Node) -> FormatterResult<()> { + if node.kind() == "ERROR" { + let start = node.start_position(); + let end = node.end_position(); + + // Report 1-based lines and columns. + return Err(FormatterError::Parsing { + start_line: start.row() + 1, + start_column: start.column() + 1, + end_line: end.row() + 1, + end_column: end.column() + 1, + }); + } + + for child in node.children(&mut node.walk()) { + check_for_error_nodes(&child)?; + } + + Ok(()) +} diff --git a/topiary-core/src/error.rs b/topiary-core/src/error.rs index 4c93c8fb..18e1344c 100644 --- a/topiary-core/src/error.rs +++ b/topiary-core/src/error.rs @@ -6,6 +6,14 @@ use std::{error::Error, fmt, io, ops::Deref, str, string}; /// The various errors the formatter may return. #[derive(Debug)] pub enum FormatterError { + /// Found an anchored comment that couldn't be re-attached to its anchor. + /// The second argument should be an InputSection, but cyclic dependencies + /// make it difficult. + CommentAbandoned(String, String), + + /// Found a comment for which no anchor could be found. + CommentOrphaned(String), + /// The input produced output that isn't idempotent, i.e. formatting the /// output again made further changes. If this happened using our provided /// query files, it is a bug. Please log an issue. @@ -57,6 +65,20 @@ impl fmt::Display for FormatterError { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { let please_log_message = "If this happened with the built-in query files, it is a bug. It would be\nhelpful if you logged this error at\nhttps://github.com/tweag/topiary/issues/new?assignees=&labels=type%3A+bug&template=bug_report.md"; match self { + Self::CommentAbandoned(comment, anchor) => { + write!( + f, + "Found an anchored comment, but could not attach it back to its anchor\n{comment}\nThe anchor was {anchor}", + ) + } + + Self::CommentOrphaned(comment) => { + write!( + f, + "Found a comment for which no anchor could be found:\n{comment}", + ) + } + Self::Idempotence => { write!( f, @@ -100,6 +122,8 @@ impl Error for FormatterError { fn source(&self) -> Option<&(dyn Error + 'static)> { match self { Self::Idempotence + | Self::CommentAbandoned(..) + | Self::CommentOrphaned(_) | Self::Parsing { .. } | Self::PatternDoesNotMatch | Self::Io(IoError::Generic(_, None)) => None, diff --git a/topiary-core/src/language.rs b/topiary-core/src/language.rs index 1c903a47..bfacd81b 100644 --- a/topiary-core/src/language.rs +++ b/topiary-core/src/language.rs @@ -13,6 +13,9 @@ pub struct Language { /// The Query Topiary will use to get the formating captures, must be /// present. The topiary engine does not include any formatting queries. pub query: TopiaryQuery, + /// The Query Topiary will use to determine which nodes are comments. + /// When missing, ther ewill be no separate comment processing. + pub comment_query: Option, /// The tree-sitter Language. Topiary will use this Language for parsing. pub grammar: topiary_tree_sitter_facade::Language, /// The indentation string used for that particular language. Defaults to " " diff --git a/topiary-core/src/lib.rs b/topiary-core/src/lib.rs index 7f9968e9..d4d28d12 100644 --- a/topiary-core/src/lib.rs +++ b/topiary-core/src/lib.rs @@ -14,20 +14,22 @@ use std::io; use itertools::Itertools; use pretty_assertions::StrComparison; -use tree_sitter::Position; pub use crate::{ + common::{parse, Position}, error::{FormatterError, IoError}, language::Language, tree_sitter::{apply_query, CoverageData, SyntaxNode, TopiaryQuery, Visualisation}, }; mod atom_collection; +pub mod comments; +pub mod common; mod error; mod graphviz; mod language; mod pretty; -mod tree_sitter; +pub mod tree_sitter; #[doc(hidden)] pub mod test_utils; @@ -63,7 +65,7 @@ pub enum Atom { Leaf { content: String, id: usize, - original_position: Position, + original_column: i32, // marks the leaf to be printed on a single line, with no indentation single_line_no_indent: bool, // if the leaf is multi-line, each line will be indented, not just the first @@ -197,6 +199,7 @@ pub enum Operation { /// let language: Language = Language { /// name: "json".to_owned(), /// query: TopiaryQuery::new(&json.clone().into(), &query_content).unwrap(), +/// comment_query: None, /// grammar: json.into(), /// indent: None, /// }; @@ -238,6 +241,7 @@ pub fn formatter( let mut atoms = tree_sitter::apply_query( &content, &language.query, + &language.comment_query, &language.grammar, tolerate_parsing_errors, )?; @@ -262,7 +266,7 @@ pub fn formatter( } Operation::Visualise { output_format } => { - let tree = tree_sitter::parse(&content, &language.grammar, false)?; + let tree = parse(&content, &language.grammar, false, None)?; let root: SyntaxNode = tree.root_node().into(); match output_format { @@ -393,6 +397,7 @@ mod tests { let language = Language { name: "json".to_owned(), query: TopiaryQuery::new(&grammar, query_content).unwrap(), + comment_query: None, grammar, indent: None, }; @@ -429,6 +434,7 @@ mod tests { let language = Language { name: "json".to_owned(), query: TopiaryQuery::new(&grammar, &query_content).unwrap(), + comment_query: None, grammar, indent: None, }; diff --git a/topiary-core/src/pretty.rs b/topiary-core/src/pretty.rs index 1625850d..aca930c8 100644 --- a/topiary-core/src/pretty.rs +++ b/topiary-core/src/pretty.rs @@ -41,7 +41,7 @@ pub fn render(atoms: &[Atom], indent: &str) -> FormatterResult { Atom::Leaf { content, - original_position, + original_column, single_line_no_indent, multi_line_indent_all, .. @@ -57,9 +57,6 @@ pub fn render(atoms: &[Atom], indent: &str) -> FormatterResult { let content = if *multi_line_indent_all { let cursor = current_column(&buffer) as i32; - // original_position is 1-based - let original_column = original_position.column as i32 - 1; - let indenting = cursor - original_column; // The following assumes spaces are used for indenting diff --git a/topiary-core/src/tree_sitter.rs b/topiary-core/src/tree_sitter.rs index 6b47f58d..6d000704 100644 --- a/topiary-core/src/tree_sitter.rs +++ b/topiary-core/src/tree_sitter.rs @@ -7,13 +7,15 @@ use std::{collections::HashSet, fmt::Display}; use serde::Serialize; use topiary_tree_sitter_facade::{ - Node, Parser, Point, Query, QueryCapture, QueryCursor, QueryMatch, QueryPredicate, Tree, + Node, Point, Query, QueryCapture, QueryCursor, QueryMatch, QueryPredicate, Tree, }; use streaming_iterator::StreamingIterator; use crate::{ atom_collection::{AtomCollection, QueryPredicates}, + comments::{extract_comments, AnchoredComment, SeparatedInput}, + common::{parse, Position}, error::FormatterError, FormatterResult, }; @@ -25,21 +27,6 @@ pub enum Visualisation { Json, } -/// Refers to a position within the code. Used for error reporting, and for -/// comparing input with formatted output. The numbers are 1-based, because that -/// is how editors usually refer to a position. Derived from tree_sitter::Point. -#[derive(Clone, Copy, Debug, PartialEq, Eq, Serialize)] -pub struct Position { - pub row: u32, - pub column: u32, -} - -impl Display for Position { - fn fmt(&self, f: &mut std::fmt::Formatter) -> Result<(), std::fmt::Error> { - write!(f, "({},{})", self.row, self.column) - } -} - /// Topiary often needs both the tree-sitter `Query` and the original content /// beloging to the file from which the query was parsed. This struct is a simple /// convenience wrapper that combines the `Query` with its original string. @@ -97,15 +84,6 @@ impl TopiaryQuery { } } -impl From for Position { - fn from(point: Point) -> Self { - Self { - row: point.row() + 1, - column: point.column() + 1, - } - } -} - // Simplified syntactic node struct, for the sake of serialisation. #[derive(Serialize)] pub struct SyntaxNode { @@ -217,6 +195,30 @@ pub struct CoverageData { pub missing_patterns: Vec, } +/// Run a tree-sitter query to identify comments in the tree, then return their IDs +pub fn collect_comment_ids( + tree: &Tree, + input_content: &str, + query: &TopiaryQuery, +) -> HashSet { + let mut cursor = QueryCursor::new(); + let mut query_matches = + query + .query + .matches(&tree.root_node(), input_content.as_bytes(), &mut cursor); + let capture_names = query.query.capture_names(); + let mut ids = HashSet::new(); + #[allow(clippy::while_let_on_iterator)] // This is not a normal iterator + while let Some(query_match) = query_matches.next() { + for capture in query_match.captures() { + if capture.name(capture_names.as_slice()) == "comment" { + ids.insert(capture.node().id()); + } + } + } + ids +} + /// Applies a query to an input content and returns a collection of atoms. /// /// # Errors @@ -230,12 +232,45 @@ pub struct CoverageData { pub fn apply_query( input_content: &str, query: &TopiaryQuery, + comment_query: &Option, grammar: &topiary_tree_sitter_facade::Language, tolerate_parsing_errors: bool, ) -> FormatterResult { - let tree = parse(input_content, grammar, tolerate_parsing_errors)?; - let root = tree.root_node(); - let source = input_content.as_bytes(); + let tree = parse(input_content, grammar, tolerate_parsing_errors, None)?; + + // Remove comments in a separate stream before applying queries, if applicable + let SeparatedInput { + input_string, + input_tree, + comments, + } = match comment_query { + Some(comment_query) => { + let comment_ids = collect_comment_ids(&tree, input_content, comment_query); + extract_comments( + &tree, + input_content, + comment_ids, + grammar, + tolerate_parsing_errors, + )? + } + None => SeparatedInput { + input_string: input_content.to_string(), + input_tree: tree, + comments: Vec::new(), + }, + }; + let root = input_tree.root_node(); + let source = input_string.as_bytes(); + + for AnchoredComment { + comment_text, + commented, + .. + } in comments.iter() + { + log::debug!("Found comment \"{comment_text}\" with anchor {commented:?}"); + } // Match queries let mut cursor = QueryCursor::new(); @@ -258,9 +293,9 @@ pub fn apply_query( let specified_leaf_nodes: HashSet = collect_leaf_ids(&matches, capture_names.clone()); // The Flattening: collects all terminal nodes of the tree-sitter tree in a Vec - let mut atoms = AtomCollection::collect_leafs(&root, source, specified_leaf_nodes)?; + let mut atoms = AtomCollection::collect_leafs(&root, source, specified_leaf_nodes, comments)?; - log::debug!("List of atoms before formatting: {atoms:?}"); + log::debug!("List of atoms before formatting: {atoms:#?}"); // Memoization of the pattern positions let mut pattern_positions: Vec> = Vec::new(); @@ -332,50 +367,6 @@ pub fn apply_query( Ok(atoms) } -/// Parses some string into a syntax tree, given a tree-sitter grammar. -pub fn parse( - content: &str, - grammar: &topiary_tree_sitter_facade::Language, - tolerate_parsing_errors: bool, -) -> FormatterResult { - let mut parser = Parser::new()?; - parser.set_language(grammar).map_err(|_| { - FormatterError::Internal("Could not apply Tree-sitter grammar".into(), None) - })?; - - let tree = parser - .parse(content, None)? - .ok_or_else(|| FormatterError::Internal("Could not parse input".into(), None))?; - - // Fail parsing if we don't get a complete syntax tree. - if !tolerate_parsing_errors { - check_for_error_nodes(&tree.root_node())?; - } - - Ok(tree) -} - -fn check_for_error_nodes(node: &Node) -> FormatterResult<()> { - if node.kind() == "ERROR" { - let start = node.start_position(); - let end = node.end_position(); - - // Report 1-based lines and columns. - return Err(FormatterError::Parsing { - start_line: start.row() + 1, - start_column: start.column() + 1, - end_line: end.row() + 1, - end_column: end.column() + 1, - }); - } - - for child in node.children(&mut node.walk()) { - check_for_error_nodes(&child)?; - } - - Ok(()) -} - /// Collects the IDs of all leaf nodes in a set of query matches. /// /// This function takes a slice of `LocalQueryMatch` and a slice of capture names, @@ -526,7 +517,7 @@ pub fn check_query_coverage( original_query: &TopiaryQuery, grammar: &topiary_tree_sitter_facade::Language, ) -> FormatterResult { - let tree = parse(input_content, grammar, false)?; + let tree = parse(input_content, grammar, false, None)?; let root = tree.root_node(); let source = input_content.as_bytes(); let mut missing_patterns = Vec::new(); diff --git a/topiary-core/tests/comment_tests.rs b/topiary-core/tests/comment_tests.rs new file mode 100644 index 00000000..c7945507 --- /dev/null +++ b/topiary-core/tests/comment_tests.rs @@ -0,0 +1,106 @@ +use topiary_core::{ + comments::{ + extract_comments, AnchoredComment, + Commented::{CommentedAfter, CommentedBefore}, + SeparatedInput, + }, + common::{parse, InputSection}, + tree_sitter::collect_comment_ids, + Position, TopiaryQuery, +}; + +const OCAML_WITH_COMMENTS: &str = r#"(* starting comment *) +fun (* fun comment *) x (* var comment *) -> + (** multi-lined + * body comment + *) + body +(* final comment *) +"#; + +const OCAML_WITHOUT_COMMENTS: &str = r#"fun x -> + body +"#; + +const OCAML_COMMENTS_QUERY: &str = "(comment) @comment"; + +// The section corresponding to `fun` in the curated code +const FUN_SECTION: InputSection = InputSection { + start: Position { row: 1, column: 1 }, + end: Position { row: 1, column: 4 }, +}; + +// The section corresponding to `x` in the curated code +const VAR_SECTION: InputSection = InputSection { + start: Position { row: 1, column: 6 }, + end: Position { row: 1, column: 7 }, +}; + +// The section corresponding to `body` in the curated code +const BODY_SECTION: InputSection = InputSection { + start: Position { row: 2, column: 3 }, + end: Position { row: 2, column: 7 }, +}; + +#[test] +fn test_extract_comments() { + let input = OCAML_WITH_COMMENTS; + let ocaml = tree_sitter_ocaml::LANGUAGE_OCAML; + + let tree = parse(input, &ocaml.into(), false, None).unwrap(); + let comment_query = TopiaryQuery::new(&ocaml.into(), OCAML_COMMENTS_QUERY).unwrap(); + let comment_ids = collect_comment_ids(&tree, input, &comment_query); + + let SeparatedInput { + input_tree: _, + input_string: new_input_string, + mut comments, + } = extract_comments(&tree, input, comment_ids, &ocaml.into(), false).unwrap(); + + let mut expected_comments: Vec = vec![ + AnchoredComment { + comment_text: "(* starting comment *)".into(), + commented: CommentedAfter { + section: FUN_SECTION, + blank_line_after: false, + blank_line_before: false, + }, + original_column: 0, + }, + AnchoredComment { + comment_text: "(* fun comment *)".into(), + commented: CommentedBefore(FUN_SECTION), + original_column: 4, + }, + AnchoredComment { + comment_text: "(* var comment *)".into(), + commented: CommentedBefore(VAR_SECTION), + original_column: 24, + }, + AnchoredComment { + comment_text: "(** multi-lined\n * body comment\n *)".into(), + commented: CommentedAfter { + section: BODY_SECTION, + blank_line_after: false, + blank_line_before: false, + }, + original_column: 2, + }, + AnchoredComment { + comment_text: "(* final comment *)".into(), + commented: CommentedBefore(BODY_SECTION), + original_column: 0, + }, + ]; + + // sort the comments so that we're order-independent + comments.sort_by_key(|comment| comment.comment_text.clone()); + expected_comments.sort_by_key(|comment| comment.comment_text.clone()); + + assert_eq!(new_input_string, OCAML_WITHOUT_COMMENTS); + + assert_eq!(comments.len(), 5); + for (comment, expected_comment) in comments.iter().zip(expected_comments.iter()) { + assert_eq!(comment, expected_comment) + } +} diff --git a/topiary-playground/src/lib.rs b/topiary-playground/src/lib.rs index b74fcdff..7ab7547e 100644 --- a/topiary-playground/src/lib.rs +++ b/topiary-playground/src/lib.rs @@ -44,6 +44,7 @@ mod wasm_mod { let language = Language { name: language.name, query, + comment_query: None, grammar, indent: language.config.indent, }; diff --git a/topiary-queries/queries/bash.comment.scm b/topiary-queries/queries/bash.comment.scm new file mode 100644 index 00000000..b733adac --- /dev/null +++ b/topiary-queries/queries/bash.comment.scm @@ -0,0 +1,2 @@ +; Identify nodes for comment processing +(comment) @comment diff --git a/topiary-queries/queries/bash.scm b/topiary-queries/queries/bash.scm index 1c1874c2..9cdde807 100644 --- a/topiary-queries/queries/bash.scm +++ b/topiary-queries/queries/bash.scm @@ -6,7 +6,6 @@ ; variable expansions (simple or otherwise) ; FIXME The first line of heredocs are affected by the indent level [ - (comment) (expansion) (heredoc_redirect) (string) @@ -39,7 +38,6 @@ (case_item) (case_statement) (command) - (comment) (compound_statement) (declaration_command) (for_statement) @@ -145,53 +143,6 @@ "in" ] @prepend_space -;; Comments - -; Comments come in two flavours: standalone (i.e., it's the only thing -; on a line, starting at the current indent level); and trailing (i.e., -; following some other statement on the same line, with a space -; interposed). Bash does not have multi-line comments; they are all -; single-line. -; -; The grammar parses all comments as the (comment) node, which are -; siblings under a common parent. -; -; Formatting Rules: -; -; 1. A comment's contents must not be touched; some (namely the shebang) -; have a syntactic purpose. -; 2. All comments must end with a new line. -; 3. Comments can be interposed by blank lines, if they exist in the -; input (i.e., blank lines shouldn't be engineered elsewhere). -; 4. A comment can never change flavour (i.e., standalone to trailing, -; or vice versa). -; 5. Trailing comments should be interposed by a space. - -; Rule 1: See @leaf rule, above - -; Rule 2 -(comment) @append_hardline - -; Rule 3: See @allow_blank_line_before rule, above. -; FIXME This doesn't quite get us what we want. It's close, but blank -; lines between comments can get consumed. - -; Rule 4: We only have to protect against the case of a standalone -; comment, after a statement, being slurped on to that statement's line -; and becoming a trailing comment. That case is satisfied by Rule 5. - -; Rule 5 -( - (comment) @prepend_begin_scope @append_begin_measuring_scope - . - _ @prepend_end_measuring_scope @prepend_end_scope - (#scope_id! "line_break_after_comment") -) -( - (comment) @prepend_space - (#multi_line_scope_only! "line_break_after_comment") -) - ;; Compound Statements and Subshells ; Compound statements and subshells are formatted in exactly the same diff --git a/topiary-queries/queries/css.comment.scm b/topiary-queries/queries/css.comment.scm new file mode 100644 index 00000000..b733adac --- /dev/null +++ b/topiary-queries/queries/css.comment.scm @@ -0,0 +1,2 @@ +; Identify nodes for comment processing +(comment) @comment diff --git a/topiary-queries/queries/css.scm b/topiary-queries/queries/css.scm index a98992bd..7888c58c 100644 --- a/topiary-queries/queries/css.scm +++ b/topiary-queries/queries/css.scm @@ -50,9 +50,6 @@ ; Spacing before and after a rule_set (rule_set) @allow_blank_line_before @prepend_hardline -; Allow comments to have a blank line before them -(comment) @allow_blank_line_before - ; Allow blank lines before any declaration in a block except the first one (block . (declaration) (declaration) @allow_blank_line_before) diff --git a/topiary-queries/queries/nickel.comment.scm b/topiary-queries/queries/nickel.comment.scm new file mode 100644 index 00000000..b733adac --- /dev/null +++ b/topiary-queries/queries/nickel.comment.scm @@ -0,0 +1,2 @@ +; Identify nodes for comment processing +(comment) @comment diff --git a/topiary-queries/queries/nickel.scm b/topiary-queries/queries/nickel.scm index 8409ceaf..b0fab53e 100644 --- a/topiary-queries/queries/nickel.scm +++ b/topiary-queries/queries/nickel.scm @@ -11,7 +11,6 @@ ; Allow a blank line before the following nodes [ - (comment) (record_field) (record_last_field) ] @allow_blank_line_before @@ -208,10 +207,6 @@ (pattern_fun) ) -;; Comments - -(comment) @prepend_input_softline @append_hardline - ;; Bound Expressions ; i.e., Let expressions and record fields @@ -281,16 +276,6 @@ ) @append_indent_end ) -; If the RHS starts with a comment, which itself is followed by a hard -; line, then we apply the normal indent block formatting in a multi-line -; context (i.e., no exceptions) -(_ - "=" @append_indent_start - . - (comment) - (term) @append_indent_end -) - ; A let expression looks like: ; ; let [rec] IDENT = EXPR in EXPR @@ -654,8 +639,6 @@ ] . ["," ";"] @append_spaced_scoped_softline - . - (comment)? @do_nothing ) ; Enums and records can have a `;` at the very beginning; allow spaces after @@ -664,6 +647,4 @@ (#scope_id! "container") . ";" @append_spaced_scoped_softline - . - (comment)? @do_nothing ) diff --git a/topiary-queries/queries/ocaml.comment.scm b/topiary-queries/queries/ocaml.comment.scm new file mode 100644 index 00000000..b733adac --- /dev/null +++ b/topiary-queries/queries/ocaml.comment.scm @@ -0,0 +1,2 @@ +; Identify nodes for comment processing +(comment) @comment diff --git a/topiary-queries/queries/ocaml.scm b/topiary-queries/queries/ocaml.scm index 7f6721d2..fc7a6fff 100644 --- a/topiary-queries/queries/ocaml.scm +++ b/topiary-queries/queries/ocaml.scm @@ -31,8 +31,6 @@ ] ) @leaf -(comment) @multi_line_indent_all - ; line number directives must be alone on their line, and can't be indented (line_number_directive) @single_line_no_indent @@ -41,7 +39,6 @@ (class_definition) (class_initializer) (class_type_definition) - (comment) (exception_definition) (external) (floating_attribute) @@ -84,9 +81,7 @@ "and" @allow_blank_line_before ) -; Append line breaks. If there is a comment following, we don't add anything, -; because the input softlines and spaces above will already have sorted out the -; formatting. +; Append line breaks. ( [ (exception_definition) @@ -97,8 +92,6 @@ ] @append_spaced_softline . "in"? @do_nothing - . - (comment)* @do_nothing ) ; Also append line breaks after open_module, except when it's ; preceded by "let", because in this case it's in a let_open_expression. @@ -106,17 +99,14 @@ "let"? @do_nothing . (open_module) @append_hardline - . - (comment)* @do_nothing ) ; Append line break after module include, except if it's alone in a single-lined struct ( [ - ; start equivalence class + ; both elements are in an equivalence class (include_module) (include_module_type) - ; end equivalence class ] @append_hardline . "end"? @do_nothing @@ -125,10 +115,9 @@ "struct" . [ - ; start equivalence class + ; both elements are in an equivalence class (include_module) (include_module_type) - ; end equivalence class ] @append_spaced_softline . "end" @@ -432,7 +421,7 @@ ) ; Softlines. These become either a space or a newline, depending on whether we -; format their node as single-line or multi-line. If there is a comment +; format their node as single-line or multi-line. If there is an attribute ; following, we don't add anything, because they will have their own line break ; processing applied to them. ; @@ -452,7 +441,6 @@ . [ (attribute) - (comment) "%" ]* @do_nothing ) @@ -472,18 +460,13 @@ "%" . (attribute_id) @append_spaced_softline - . - (comment)* @do_nothing ) ; only add softlines after "else" if it's not part of an "else if" construction ( "else" @append_spaced_softline . - [ - (comment) - (if_expression) - ]? @do_nothing + (if_expression)? @do_nothing ) ; ":" must not always be followed by a softline, we explicitly enumerate @@ -740,10 +723,9 @@ (module_definition) (value_specification) (type_definition) - ; start equivalence class + ; the following two elements are in an equivalence class (include_module) (include_module_type) - ; end equivalence class ] @append_spaced_softline ) @@ -794,10 +776,7 @@ (record_declaration (field_declaration) @append_delimiter . - [ - (comment) - (attribute) - ]* + (attribute)* . ";" @delete (#delimiter! ";") @@ -821,7 +800,6 @@ [ (field_declaration) (attribute) - (comment) ]? @append_end_scope . (field_declaration) @prepend_begin_scope @@ -831,7 +809,6 @@ [ (field_declaration) (attribute) - (comment) ] @append_end_scope . "}" @@ -845,10 +822,7 @@ (record_expression (field_expression) @append_delimiter . - [ - (comment) - (attribute) - ]* + (attribute)* . ";" @delete (#delimiter! ";") @@ -863,7 +837,6 @@ [ (field_expression) (attribute) - (comment) ]? @append_end_scope . (field_expression) @prepend_begin_scope @@ -873,7 +846,6 @@ [ (field_expression) (attribute) - (comment) ] @append_end_scope . "}" @@ -937,8 +909,8 @@ ":" @append_indent_start (_) @append_indent_end . - ; just doing _ above doesn't work, because it matches the final named node as - ; well as the final non-named node, causing double indentation. +; just doing _ above doesn't work, because it matches the final named node as +; well as the final non-named node, causing double indentation. ) (value_specification @@ -1807,15 +1779,3 @@ ")" @prepend_indent_end @prepend_empty_softline . ) - -; Input softlines before and after all comments. This means that the input -; decides if a comment should have line breaks before or after. But don't put a -; softline directly in front of commas or semicolons. - -(comment) @prepend_input_softline - -( - (comment) @append_input_softline - . - ["," ";"]* @do_nothing -) diff --git a/topiary-queries/queries/ocaml_interface.comment.scm b/topiary-queries/queries/ocaml_interface.comment.scm new file mode 120000 index 00000000..8867762c --- /dev/null +++ b/topiary-queries/queries/ocaml_interface.comment.scm @@ -0,0 +1 @@ +ocaml.comment.scm \ No newline at end of file diff --git a/topiary-queries/queries/ocamllex.comment.scm b/topiary-queries/queries/ocamllex.comment.scm new file mode 100644 index 00000000..b733adac --- /dev/null +++ b/topiary-queries/queries/ocamllex.comment.scm @@ -0,0 +1,2 @@ +; Identify nodes for comment processing +(comment) @comment diff --git a/topiary-queries/queries/ocamllex.scm b/topiary-queries/queries/ocamllex.scm index 27cb7ab9..88aae330 100644 --- a/topiary-queries/queries/ocamllex.scm +++ b/topiary-queries/queries/ocamllex.scm @@ -1,8 +1,6 @@ ; NOTE[regexp] regexp is a unnamed node without a field name, so we typically ; account for places it can be instead of formatting it directly. -(comment) @multi_line_indent_all @allow_blank_line_before @prepend_input_softline @append_input_softline @multi_line_indent_all - ( (#scope_id! "action") (action) @prepend_begin_scope @append_end_scope @@ -14,17 +12,17 @@ "{" @append_spaced_scoped_softline @append_indent_start (ocaml) @multi_line_indent_all "}" @prepend_spaced_scoped_softline @prepend_indent_end - ; This last capture name is a bit unfortunate, but it resolves an issue where - ; the @append_input_softline of the comment is not resolved because the \n is - ; after it. +; This last capture name is a bit unfortunate, but it resolves an issue where +; the @append_input_softline of the comment is not resolved because the \n is +; after it. ) @prepend_input_softline ; Regular expression related rules (named_regexp "let" @append_space "=" @prepend_space @append_space - ; If the regexp spand multiple lines, we may want it to be indented - ; regexp: (_) @prepend_indent_start @append_indent_end +; If the regexp spand multiple lines, we may want it to be indented +; regexp: (_) @prepend_indent_start @append_indent_end ) @allow_blank_line_before @append_hardline ; Actual regex rules @@ -91,7 +89,7 @@ (#scope_id! "lexer_entry") ; TODO: Is this allowed to be a space? Use hardline if not "|" @prepend_hardline @allow_blank_line_before - ; @prepend_space is for NOTE[regexp] +; @prepend_space is for NOTE[regexp] ) (lexer_entry diff --git a/topiary-queries/queries/rust.comment.scm b/topiary-queries/queries/rust.comment.scm new file mode 100644 index 00000000..142d9f68 --- /dev/null +++ b/topiary-queries/queries/rust.comment.scm @@ -0,0 +1,5 @@ +; Identify nodes for comment processing +[ + (block_comment) + (line_comment) +] @comment diff --git a/topiary-queries/queries/rust.scm b/topiary-queries/queries/rust.scm index 5d205ec8..116eef36 100644 --- a/topiary-queries/queries/rust.scm +++ b/topiary-queries/queries/rust.scm @@ -2,15 +2,12 @@ ; not be formatted, but taken as is. We use the leaf capture name to inform the ; tool of this. [ - (block_comment) - (line_comment) (string_literal) ] @leaf ; Allow blank line before [ (attribute_item) - (block_comment) (call_expression) (enum_item) (enum_variant) @@ -18,7 +15,6 @@ (function_item) (impl_item) (let_declaration) - (line_comment) (mod_item) (struct_item) (type_item) @@ -54,25 +50,7 @@ ":" ] @append_space -; Input softlines before and after all comments. This means that the input -; decides if a comment should have line breaks before or after. A line comment -; always ends with a line break. -[ - (block_comment) - (line_comment) -] @prepend_input_softline - -; Input softline after block comments unless followed by comma or semicolon, as -; they are always put directly after. -( - (block_comment) @append_input_softline - . - ["," ";"]* @do_nothing -) - -; Append line breaks. If there is a comment following, we don't add anything, -; because the input softlines and spaces above will already have sorted out the -; formatting. +; Append line breaks. ( [ (attribute_item) @@ -87,32 +65,14 @@ (type_item) (use_declaration) ] @append_spaced_softline - . - [ - (block_comment) - (line_comment) - ]* @do_nothing ) -(line_comment) @append_hardline - -(block_comment) @multi_line_indent_all - -; Allow line break after block comments -( - (block_comment) - . - _ @prepend_input_softline -) - -; Append softlines, unless followed by comments. +; Append softlines ( [ "," ";" ] @append_spaced_softline - . - [(block_comment) (line_comment)]* @do_nothing ) ; Prepend softlines before dots diff --git a/topiary-queries/queries/toml.comment.scm b/topiary-queries/queries/toml.comment.scm new file mode 100644 index 00000000..b733adac --- /dev/null +++ b/topiary-queries/queries/toml.comment.scm @@ -0,0 +1,2 @@ +; Identify nodes for comment processing +(comment) @comment diff --git a/topiary-queries/queries/toml.scm b/topiary-queries/queries/toml.scm index 945472f9..70ef5d5f 100644 --- a/topiary-queries/queries/toml.scm +++ b/topiary-queries/queries/toml.scm @@ -8,16 +8,12 @@ ; Allow blank line before [ - (comment) (table) (table_array_element) (pair) ] @allow_blank_line_before ; Append line breaks -[ - (comment) -] @append_hardline (document (pair) @append_hardline @@ -50,19 +46,12 @@ "}" ] @prepend_space @append_space -; Input softlines before all comments. This means that the input decides if a -; comment should have line breaks in front of it. -(comment) @prepend_input_softline - ; Softlines. These become either a space or a newline, depending on whether we ; format their node as single-line or multi-line. ( "," @append_spaced_softline . - [ - (comment) - "]" - ]? @do_nothing + "]"? @do_nothing ) ; remove trailing comma from last element of single line array @@ -72,7 +61,7 @@ ; add trailing comma if absent to last string of multiline array (array - (((string) @append_delimiter) . ","* @do_nothing . (comment)? . "]")(#delimiter! ",") + (((string) @append_delimiter) . ","* @do_nothing . "]")(#delimiter! ",") (#multi_line_only!) ) diff --git a/topiary-queries/queries/tree_sitter_query.comment.scm b/topiary-queries/queries/tree_sitter_query.comment.scm new file mode 100644 index 00000000..b733adac --- /dev/null +++ b/topiary-queries/queries/tree_sitter_query.comment.scm @@ -0,0 +1,2 @@ +; Identify nodes for comment processing +(comment) @comment diff --git a/topiary-queries/queries/tree_sitter_query.scm b/topiary-queries/queries/tree_sitter_query.scm index 2d58f617..9afd9062 100644 --- a/topiary-queries/queries/tree_sitter_query.scm +++ b/topiary-queries/queries/tree_sitter_query.scm @@ -7,8 +7,6 @@ (string) ] @leaf -(comment) @prepend_input_softline @append_hardline @allow_blank_line_before - ; Elements at top-level must be alone on their line. Blank lines are allowed (program _ @allow_blank_line_before @prepend_hardline diff --git a/topiary-queries/src/lib.rs b/topiary-queries/src/lib.rs index 20c886e6..cfb6f8a9 100644 --- a/topiary-queries/src/lib.rs +++ b/topiary-queries/src/lib.rs @@ -58,3 +58,58 @@ pub fn toml() -> &'static str { pub fn tree_sitter_query() -> &'static str { include_str!("../queries/tree_sitter_query.scm") } + +/// Returns the Topiary-compatible comment query file for Bash. +#[cfg(feature = "bash")] +pub fn bash_comment() -> &'static str { + include_str!("../queries/bash.comment.scm") +} + +/// Returns the Topiary-compatible comment query file for CSS. +#[cfg(feature = "css")] +pub fn css_comment() -> &'static str { + include_str!("../queries/css.comment.scm") +} + +/// Returns the Topiary-compatible comment query file for Nickel. +#[cfg(feature = "nickel")] +pub fn nickel_comment() -> &'static str { + include_str!("../queries/nickel.comment.scm") +} + +/// Returns the Topiary-compatible comment query file for Ocaml. +#[cfg(feature = "ocaml")] +pub fn ocaml_comment() -> &'static str { + include_str!("../queries/ocaml.comment.scm") +} + +/// Returns the Topiary-compatible comment query file for Ocaml Interface. +#[cfg(feature = "ocaml_interface")] +pub fn ocaml_interface_comment() -> &'static str { + include_str!("../queries/ocaml_interface.comment.scm") +} + +/// Returns the Topiary-compatible comment query file for Ocamllex. +#[cfg(feature = "ocamllex")] +pub fn ocamllex_comment() -> &'static str { + include_str!("../queries/ocamllex.comment.scm") +} + +/// Returns the Topiary-compatible query file for Rust. +#[cfg(feature = "rust")] +pub fn rust_comment() -> &'static str { + include_str!("../queries/rust.comment.scm") +} + +/// Returns the Topiary-compatible query file for Toml. +#[cfg(feature = "toml")] +pub fn toml_comment() -> &'static str { + include_str!("../queries/toml.comment.scm") +} + +/// Returns the Topiary-compatible query file for the +/// Tree-sitter query language. +#[cfg(feature = "tree_sitter_query")] +pub fn tree_sitter_query_comment() -> &'static str { + include_str!("../queries/tree_sitter_query.comment.scm") +}