Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Quoted symbols and printing for smtlib-related languages #198

Merged
merged 6 commits into from
Nov 13, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions CHANGES.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,11 +13,20 @@ next

### Parsing

- Treat quoted symbols from the stdlib as symbols, regardless of
their contents. Previously a `|assert|` would be understood as the
reserved work `assert`, allowing e.g. `(|assert| false)`. From now on
these are understood as symbols, so one can `(declare |assert| () Bool)`
(PR#198)
- Add parsing extensions for the smtlib2 language (PR#190, PR#194)
- Better split elements of clauses in `cnf` TPTP statements (PR#190)
- Ensure illegal chars raise the correct error during lexing
(Issue#191, PR#192)

### Printing

- Add printers for smtlib identifiers (PR#198)

### Typing

- Enforce some missing constraints on bitvectors
Expand Down
2 changes: 2 additions & 0 deletions src/languages/smtlib2/poly/dolmen_smtlib2_poly.ml
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@ module type Term = Ast.Term
module type Statement = Ast.Statement
module type Extension = Ast.Extension

module Print = Print

module Make
(L : Dolmen_intf.Location.S)
(I : Id)
Expand Down
3 changes: 3 additions & 0 deletions src/languages/smtlib2/poly/dolmen_smtlib2_poly.mli
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,9 @@ module type Statement = Ast.Statement
module type Extension = Ast.Extension
(** Implementation requirement for the Smtlib format. *)

module Print = Print
(** Printing functions. *)

module Make
(L : Dolmen_intf.Location.S)
(I : Id)
Expand Down
2 changes: 1 addition & 1 deletion src/languages/smtlib2/poly/dune
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
(public_name dolmen.smtlib2.poly)
(instrumentation (backend bisect_ppx))
(libraries dolmen_std dolmen_intf menhirLib)
(modules Dolmen_smtlib2_poly Tokens Lexer Parser Ast Syntax_messages)
(modules Dolmen_smtlib2_poly Tokens Lexer Parser Ast Syntax_messages Print)
)

; Common include
Expand Down
18 changes: 13 additions & 5 deletions src/languages/smtlib2/poly/lexer.mll
Original file line number Diff line number Diff line change
Expand Up @@ -134,10 +134,19 @@
| '\n' -> newline lexbuf
| _ -> ()
done;
(* Check whetehr the symbol is a reserved word. *)
(* Check whether the symbol is a reserved word. *)
try M.find s reserved_words
with Not_found -> SYMBOL s

let quoted_symbol newline lexbuf s =
(* register the newlines in quoted symbols to maintain correct locations.*)
Gbury marked this conversation as resolved.
Show resolved Hide resolved
for i = 0 to (String.length s - 1) do
match s.[i] with
| '\n' -> newline lexbuf
| _ -> ()
done;
(* Quoted symbols allow to use reserved words as symbols *)
SYMBOL s
}

let white_space_char = ['\t' '\n' '\r' ' ']
Expand Down Expand Up @@ -185,10 +194,9 @@ rule token newline = parse
| binary as s { BIN s }
| '"' { string newline (Buffer.create 42) lexbuf }
| keyword as s { KEYWORD s }
| simple_symbol as s
| '|' (quoted_symbol_char* as s) '|'
{ symbol newline lexbuf s }
| _ { raise Error }
| simple_symbol as s { symbol newline lexbuf s }
| '|' (quoted_symbol_char* as s) '|' { quoted_symbol newline lexbuf s }
| _ { raise Error }

and string newline b = parse
| '"' '"' { Buffer.add_char b '"'; string newline b lexbuf }
Expand Down
117 changes: 117 additions & 0 deletions src/languages/smtlib2/poly/print.ml
Original file line number Diff line number Diff line change
@@ -0,0 +1,117 @@

(* This file is free software, part of dolmen. See file "LICENSE" for more information *)

(* Printing of identifiers *)
(* ************************************************************************* *)

module L = Lexer

exception Cannot_print of string

let _cannot_print format =
Format.kasprintf (fun msg -> raise (Cannot_print msg)) format

(* lexical definitions taken from the smtlib specification *)

let[@inline] is_whitespace c =
let c = Char.code c in
c = 9 (* tab *) || c = 10 (* line feed *) ||
c = 13 (* cariage return *) || c = 32 (* space *)

let[@inline] is_printable c =
let c = Char.code c in
(32 <= c && c <= 126) || 128 <= c

let is_quoted_symbol_char c =
(is_whitespace c || is_printable c) &&
(c <> '|' && c <> '\\')

let[@inline] is_letter = function
| 'a'..'z' | 'A'..'Z' -> true
| _ -> false

let[@inline] is_digit = function
| '0'..'9' -> true
| _ -> false

let[@inline] is_other_simple_symbol_chars = function
| '~' | '!' | '@' | '$' | '%' | '^' | '&' | '*' | '_'
| '-' | '+' | '=' | '<' | '>' | '.' | '?' | '/' -> true
| _ -> false

let is_simple_symbol_char c =
is_letter c || is_digit c || is_other_simple_symbol_chars c

(* symbol categorization *)

type symbol =
| Simple
| Quoted
| Unprintable

let categorize_symbol s =
match s with
| "" -> Unprintable
| "_" | "!" | "as" | "let"
| "exists" | "forall"
| "match" | "par"
| "assert"
| "check-sat"
| "check-sat-assuming"
| "declare-const"
| "declare-datatype"
| "declare-datatypes"
| "declare-fun"
| "declare-sort"
| "define-fun"
| "define-fun-rec"
| "define-funs-rec"
| "define-sort"
| "echo"
| "exit"
| "get-assertions"
| "get-assignment"
| "get-info"
| "get-model"
| "get-option"
| "get-proof"
| "get-unsat-assumptions"
| "get-unsat-core"
| "get-value"
| "pop"
| "push"
| "reset"
| "reset-assertions"
| "set-info"
| "set-logic"
| "set-option" -> Quoted
| _ ->
(* we are guaranteed that `s` is not the empty string *)
if not (is_digit s.[0]) &&
(Dolmen_std.Misc.string_for_all is_simple_symbol_char s) then
Simple
else if Dolmen_std.Misc.string_for_all is_quoted_symbol_char s then
Quoted
else
Unprintable

let id fmt name =
let aux fmt s =
(* TODO: expose/add a cache to not redo the `categorize_symbol` computation each time *)
match categorize_symbol s with
| Simple -> Format.pp_print_string fmt s
| Quoted -> Format.fprintf fmt "|%s|" s
| Unprintable ->
_cannot_print "symbol \"%s\" cannot be printed due to lexical constraints" s
in
match (name : Dolmen_std.Name.t) with
| Simple s -> aux fmt s
| Indexed { basename = _; indexes = [] } ->
_cannot_print "indexed id with no indexes: %a" Dolmen_std.Name.print name
| Indexed { basename; indexes; } ->
let pp_sep fmt () = Format.fprintf fmt " " in
Format.fprintf fmt "(_ %a %a)"
aux basename (Format.pp_print_list ~pp_sep aux) indexes
| Qualified _ ->
_cannot_print "qualified identifier: %a" Dolmen_std.Name.print name

16 changes: 12 additions & 4 deletions src/languages/smtlib2/v2.6/response/lexer.mll
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,15 @@
try M.find s reserved_words
with Not_found -> SYMBOL s

let quoted_symbol newline lexbuf s =
(* register the newlines in quoted symbols to maintain correct locations.*)
for i = 0 to (String.length s - 1) do
match s.[i] with
| '\n' -> newline lexbuf
| _ -> ()
done;
(* Quoted symbols allow to use reserved words as symbols *)
SYMBOL s
}

let white_space_char = ['\t' '\n' '\r' ' ']
Expand Down Expand Up @@ -135,10 +144,9 @@ rule token newline = parse
| binary as s { BIN s }
| '"' { string newline (Buffer.create 42) lexbuf }
| keyword as s { KEYWORD s }
| simple_symbol as s
| '|' (quoted_symbol_char* as s) '|'
{ symbol newline lexbuf s }
| _ { raise Error }
| simple_symbol as s { symbol newline lexbuf s }
| '|' (quoted_symbol_char* as s) '|' { quoted_symbol newline lexbuf s }
| _ { raise Error }

and string newline b = parse
| '"' '"' { Buffer.add_char b '"'; string newline b lexbuf }
Expand Down
2 changes: 2 additions & 0 deletions src/languages/smtlib2/v2.6/script/dolmen_smtlib2_v6_script.ml
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@ module type Term = Ast.Term
module type Statement = Ast.Statement
module type Extension = Ast.Extension

module Print = Print

module Make
(L : Dolmen_intf.Location.S)
(I : Id)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,9 @@ module type Statement = Ast.Statement
module type Extension = Ast.Extension
(** Implementation requirement for the Smtlib format. *)

module Print = Print
(** Printing functions. *)

module Make
(L : Dolmen_intf.Location.S)
(I : Id)
Expand Down
2 changes: 1 addition & 1 deletion src/languages/smtlib2/v2.6/script/dune
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
(public_name dolmen.smtlib2.v6_script)
(instrumentation (backend bisect_ppx))
(libraries dolmen_std dolmen_intf menhirLib)
(modules Dolmen_smtlib2_v6_script Tokens Lexer Parser Ast Syntax_messages)
(modules Dolmen_smtlib2_v6_script Tokens Lexer Parser Ast Syntax_messages Print)
)

; Common include
Expand Down
17 changes: 13 additions & 4 deletions src/languages/smtlib2/v2.6/script/lexer.mll
Original file line number Diff line number Diff line change
Expand Up @@ -138,6 +138,16 @@
try M.find s reserved_words
with Not_found -> SYMBOL s

let quoted_symbol newline lexbuf s =
(* register the newlines in quoted symbols to maintain correct locations.*)
for i = 0 to (String.length s - 1) do
match s.[i] with
| '\n' -> newline lexbuf
| _ -> ()
done;
(* Quoted symbols allow to use reserved words as symbols *)
SYMBOL s

}

let white_space_char = ['\t' '\n' '\r' ' ']
Expand Down Expand Up @@ -182,10 +192,9 @@ rule token newline = parse
| binary as s { BIN s }
| '"' { string newline (Buffer.create 42) lexbuf }
| keyword as s { KEYWORD s }
| simple_symbol as s
| '|' (quoted_symbol_char* as s) '|'
{ symbol newline lexbuf s }
| _ { raise Error }
| simple_symbol as s { symbol newline lexbuf s}
| '|' (quoted_symbol_char* as s) '|' { quoted_symbol newline lexbuf s }
| _ { raise Error }

and string newline b = parse
| '"' '"' { Buffer.add_char b '"'; string newline b lexbuf }
Expand Down
Loading