From a63df9646018f940526bfa2a4cee186e912b6ef7 Mon Sep 17 00:00:00 2001 From: Will Eccles Date: Tue, 2 Jan 2024 17:32:00 -0500 Subject: [PATCH 1/4] add pioasm support --- languages.json | 7 ++++++ src/language/embedding.rs | 27 +++++++++++++++++++++++ src/language/language_type.rs | 6 ++++++ src/language/syntax.rs | 40 +++++++++++++++++++++++++++++++++++ 4 files changed, 80 insertions(+) diff --git a/languages.json b/languages.json index efdd6de45..400ab8e10 100644 --- a/languages.json +++ b/languages.json @@ -1062,6 +1062,13 @@ "quotes": [["\\\"", "\\\""], ["'", "'"]], "extensions": ["php"] }, + "PioAsm": { + "name": "RP2040 PIO ASM", + "line_comment": [";", "//"], + "multi_line_comments": [["/*", "*/"]], + "important_syntax": ["% c-sdk {"], + "extensions": ["pio"] + }, "Poke": { "multi_line_comments": [["/*", "*/"]], "extensions": ["pk"] diff --git a/src/language/embedding.rs b/src/language/embedding.rs index 4a649ba14..00cc2ed1e 100644 --- a/src/language/embedding.rs +++ b/src/language/embedding.rs @@ -19,6 +19,11 @@ pub static END_TEMPLATE: Lazy = Lazy::new(|| Regex::new(r#""#) pub static STARTING_MARKDOWN_REGEX: Lazy = Lazy::new(|| Regex::new(r#"```\S+\s"#).unwrap()); pub static ENDING_MARKDOWN_REGEX: Lazy = Lazy::new(|| Regex::new(r#"```\s?"#).unwrap()); +pub static START_PIO_CSDK: Lazy = + Lazy::new(|| Regex::new(r#"% c-sdk \{"#).unwrap()); +pub static END_PIO_CSDK: Lazy = + Lazy::new(|| Regex::new(r#"%}"#).unwrap()); + /// A memory of a regex matched. /// The values provided by `Self::start` and `Self::end` are in the same space as the /// start value supplied to `RegexCache::build` @@ -62,6 +67,7 @@ pub(crate) struct RegexCache<'a> { pub(crate) enum RegexFamily<'a> { HtmlLike(HtmlLike<'a>), Markdown(Markdown<'a>), + PioAsm(PioAsm<'a>), Rust, } @@ -75,6 +81,10 @@ pub(crate) struct Markdown<'a> { starts: Option]>>, } +pub(crate) struct PioAsm<'a> { + starts: Option]>>, +} + impl<'a> HtmlLike<'a> { pub fn start_script_in_range( &'a self, @@ -107,6 +117,12 @@ impl<'a> Markdown<'a> { } } +impl<'a> PioAsm<'a> { + pub fn starts_in_range(&'a self, start: usize, end: usize) -> Option<&Capture<'a>> { + filter_range(self.starts.as_ref()?, start, end).and_then(|mut it| it.next()) + } +} + fn filter_range<'a>( dataset: &'a [Capture<'a>], start: usize, @@ -169,6 +185,17 @@ impl<'a> RegexCache<'a> { None } } + LanguageType::PioAsm => { + let pioasm = PioAsm { + starts: save_captures(&START_PIO_CSDK, lines, start, end), + }; + + if pioasm.starts.is_some() { + Some(RegexFamily::PioAsm(pioasm)) + } else { + None + } + } _ => None, }; Self { inner } diff --git a/src/language/language_type.rs b/src/language/language_type.rs index 2cb949fb7..56c37b3a9 100644 --- a/src/language/language_type.rs +++ b/src/language/language_type.rs @@ -183,6 +183,12 @@ impl LanguageType { // Add all the markdown blobs. *stats.blobs.entry(language).or_default() += blob; } + LanguageContext::PioAsm { balanced, language } => { + // Add the lines for the code fences. + stats.comments += if balanced { 2 } else { 1 }; + // Add the code inside the fence to the stats. + *stats.blobs.entry(language).or_default() += blob; + } } // Advance to after the language code and the delimiter.. diff --git a/src/language/syntax.rs b/src/language/syntax.rs index 5e1bc4a39..b55bf84b8 100644 --- a/src/language/syntax.rs +++ b/src/language/syntax.rs @@ -8,6 +8,7 @@ use once_cell::sync::Lazy; use super::embedding::{ RegexCache, RegexFamily, ENDING_MARKDOWN_REGEX, END_SCRIPT, END_STYLE, END_TEMPLATE, + END_PIO_CSDK, }; use crate::{stats::CodeStats, utils::ext::SliceExt, Config, LanguageType}; @@ -57,6 +58,10 @@ pub(crate) enum LanguageContext { balanced: bool, language: LanguageType, }, + PioAsm { + balanced: bool, + language: LanguageType, + }, Rust, } @@ -517,6 +522,41 @@ impl SyntaxCounter { None } } + RegexFamily::PioAsm(pioasm) => { + // TODO: This logic is copied from Markdown. That is, it allows for unbalanced code + // blocks, which I don't think is valid. Not sure what the proper way to handle + // this is, though. + if !lines[start..end].contains_slice(b"% c-sdk {") { + return None; + } + + let opening_fence = pioasm.starts_in_range(start, end)?; + let start_of_code = opening_fence.end(); + let closing_fence = END_PIO_CSDK.find(&lines[start_of_code..]); + if let Some(m) = &closing_fence { + trace!("{:?}", String::from_utf8_lossy(m.as_bytes())); + } + let end_of_code = closing_fence + .map_or_else(|| lines.len(), |fence| start_of_code + fence.start()); + let end_of_code_block = + closing_fence.map_or_else(|| lines.len(), |fence| start_of_code + fence.end()); + let balanced = closing_fence.is_some(); + + let language = LanguageType::C; + trace!( + "{} BLOCK: {:?}", + language, + String::from_utf8_lossy(&lines[start_of_code..end_of_code]) + ); + let stats = + language.parse_from_slice(&lines[start_of_code..end_of_code].trim(), config); + + Some(FileContext::new( + LanguageContext::PioAsm { balanced, language }, + end_of_code_block, + stats, + )) + } } } From 38ad5265f86b29f734b9fa6e9b0d982b99c76e03 Mon Sep 17 00:00:00 2001 From: Will Eccles Date: Tue, 2 Jan 2024 17:48:46 -0500 Subject: [PATCH 2/4] pioasm: add test --- tests/data/pioasm.pio | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) create mode 100644 tests/data/pioasm.pio diff --git a/tests/data/pioasm.pio b/tests/data/pioasm.pio new file mode 100644 index 000000000..98bdacff9 --- /dev/null +++ b/tests/data/pioasm.pio @@ -0,0 +1,29 @@ +; 30 lines 9 code 14 comments 7 blanks +; test program +.program hello + +; A comment. +// A C++-style comment +/* A C-style comment */ + +/* + * + * a multiline comment + */ + +loop: + pull ; pull word from TX FIFO + out pins, 1 /* set pins */ + jmp loop // jump + +% c-sdk { +/* A dummy function to test pioasm parsing. */ +static inline uint my_cool_function(uint a) { + uint ret = a >> 1; + + // a comment in C + return ret; +} +%} + +; a comment after the C code From fae41057bd27dec238fa82086c9027a58501cb0d Mon Sep 17 00:00:00 2001 From: Will Eccles Date: Tue, 2 Jan 2024 17:49:03 -0500 Subject: [PATCH 3/4] README: add pioasm to list of languages --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index 4ddafe99d..f430b7ade 100644 --- a/README.md +++ b/README.md @@ -467,6 +467,7 @@ Perl Perl6 Pest Php +PioAsm Poke Polly Pony From 22f5408e334c3d447bd9e902a655a6e6e128b104 Mon Sep 17 00:00:00 2001 From: Will Eccles Date: Tue, 2 Jan 2024 17:50:20 -0500 Subject: [PATCH 4/4] style: single-line declarations when possible --- src/language/embedding.rs | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/src/language/embedding.rs b/src/language/embedding.rs index 00cc2ed1e..39fed1af4 100644 --- a/src/language/embedding.rs +++ b/src/language/embedding.rs @@ -19,10 +19,8 @@ pub static END_TEMPLATE: Lazy = Lazy::new(|| Regex::new(r#""#) pub static STARTING_MARKDOWN_REGEX: Lazy = Lazy::new(|| Regex::new(r#"```\S+\s"#).unwrap()); pub static ENDING_MARKDOWN_REGEX: Lazy = Lazy::new(|| Regex::new(r#"```\s?"#).unwrap()); -pub static START_PIO_CSDK: Lazy = - Lazy::new(|| Regex::new(r#"% c-sdk \{"#).unwrap()); -pub static END_PIO_CSDK: Lazy = - Lazy::new(|| Regex::new(r#"%}"#).unwrap()); +pub static START_PIO_CSDK: Lazy = Lazy::new(|| Regex::new(r#"% c-sdk \{"#).unwrap()); +pub static END_PIO_CSDK: Lazy = Lazy::new(|| Regex::new(r#"%}"#).unwrap()); /// A memory of a regex matched. /// The values provided by `Self::start` and `Self::end` are in the same space as the