Skip to content

Commit

Permalink
Fix the parsing of filenames coming from GNU/git diff
Browse files Browse the repository at this point in the history
Co-authored-by: Marek Kubica <[email protected]>
  • Loading branch information
kit-ty-kate and Leonidas-from-XIV committed Oct 4, 2024
1 parent 6a754d0 commit ae28123
Show file tree
Hide file tree
Showing 8 changed files with 187 additions and 65 deletions.
5 changes: 3 additions & 2 deletions src/dune
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,9 @@
(name patch)
(synopsis "Patch purely in OCaml")
(public_name patch)
(modules patch)
(wrapped false))
(modules patch patch_lib fname))

(ocamllex fname)

(executable
(name patch_command)
Expand Down
5 changes: 5 additions & 0 deletions src/fname.mli
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
val parse : string -> (string option, string) result
(** [parse s] parses [s] and returns a filename or [None] if the filename
is equivalent to [/dev/null].
Returns [Error msg] in case of error. *)
69 changes: 69 additions & 0 deletions src/fname.mll
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
{
module String = Patch_lib.String

type lexer_output =
| Quoted of string
| Unquoted
| Error of string

exception Cant_parse_octal

let ascii_zero = Char.code '0'
let octal_to_char c1 c2 c3 =
let char_to_digit c = Char.code c - ascii_zero in
try
Char.chr (
(char_to_digit c1 lsl 6) lor
(char_to_digit c2 lsl 3) lor
char_to_digit c3
)
with Invalid_argument _ -> raise Cant_parse_octal
}

let octal = ['0'-'7']

rule lex_quoted_filename buf = parse
| "\\a" { Buffer.add_char buf '\007'; lex_quoted_filename buf lexbuf }
| "\\b" { Buffer.add_char buf '\b'; lex_quoted_filename buf lexbuf }
| "\\f" { Buffer.add_char buf '\012'; lex_quoted_filename buf lexbuf }
| "\\n" { Buffer.add_char buf '\n'; lex_quoted_filename buf lexbuf }
| "\\r" { Buffer.add_char buf '\r'; lex_quoted_filename buf lexbuf }
| "\\t" { Buffer.add_char buf '\t'; lex_quoted_filename buf lexbuf }
| "\\v" { Buffer.add_char buf '\011'; lex_quoted_filename buf lexbuf }
| "\\\\" { Buffer.add_char buf '\\'; lex_quoted_filename buf lexbuf }
| "\\\"" { Buffer.add_char buf '"'; lex_quoted_filename buf lexbuf }
| '\\' (['0'-'3'] as c1) (octal as c2) (octal as c3)
{
match octal_to_char c1 c2 c3 with
| octal ->
Buffer.add_char buf octal;
lex_quoted_filename buf lexbuf
| exception Cant_parse_octal -> Unquoted
}
| '\\' _ { Unquoted }
| '"' eof { Quoted (Buffer.contents buf) }
| '"' _ { Unquoted }
| _ as c { Buffer.add_char buf c; lex_quoted_filename buf lexbuf }
| eof { Unquoted }

and lex_filename buf = parse
| '"' { lex_quoted_filename buf lexbuf }
| _ { Unquoted }
| eof { Error "empty filename" }

{
let parse s =
let filename, _date =
match String.cut '\t' s with
| None -> (s, "")
| Some x -> x
in
if filename = "/dev/null" then
Ok None
else
let lexbuf = Lexing.from_string filename in
match lex_filename (Buffer.create 128) lexbuf with
| Quoted x -> Ok (Some x)
| Unquoted -> Ok (Some filename)
| Error msg -> Error msg
}
91 changes: 33 additions & 58 deletions src/patch.ml
Original file line number Diff line number Diff line change
@@ -1,47 +1,4 @@
module String = struct
let is_prefix ~prefix str =
let pl = String.length prefix in
if String.length str < pl then
false
else
String.sub str 0 (String.length prefix) = prefix

let cut sep str =
try
let idx = String.index str sep
and l = String.length str
in
let sidx = succ idx in
Some (String.sub str 0 idx, String.sub str sidx (l - sidx))
with
Not_found -> None

let cuts sep str =
let rec doit acc s =
match cut sep s with
| None -> List.rev (s :: acc)
| Some (a, b) -> doit (a :: acc) b
in
doit [] str

let slice ?(start = 0) ?stop str =
let stop = match stop with
| None -> String.length str
| Some x -> x
in
let len = stop - start in
String.sub str start len

let trim = String.trim

let get = String.get

let concat = String.concat

let length = String.length

let equal = String.equal
end
module String = Patch_lib.String

type hunk = {
mine_start : int ;
Expand All @@ -52,6 +9,14 @@ type hunk = {
their : string list ;
}

type parse_error = {
msg : string;
lines : string list;
(* TODO: add the start position of the error *)
}

exception Parse_error of parse_error

let unified_diff ~mine_no_nl ~their_no_nl hunk =
let no_nl_str = ["\\ No newline at end of file"] in
(* TODO *)
Expand Down Expand Up @@ -247,19 +212,30 @@ let pp ~git ppf {operation; hunks; mine_no_nl; their_no_nl} =
let pp_list ~git ppf diffs =
List.iter (Format.fprintf ppf "%a" (pp ~git)) diffs

(* TODO: remove this and let users decide the prefix level they want *)
let process_git_prefix ~git ~prefix s =
if git && String.is_prefix ~prefix s then
String.slice ~start:(String.length prefix) s
else
s

let operation_of_strings git mine their =
let get_filename_opt n =
let s = match String.cut '\t' n with None -> n | Some (x, _) -> x in
if s = no_file then None else
if git && (String.is_prefix ~prefix:"a/" s || String.is_prefix ~prefix:"b/" s) then
Some (String.slice ~start:2 s)
else Some s
in
match get_filename_opt mine, get_filename_opt their with
| None, Some n -> Create n
| Some n, None -> Delete n
| Some a, Some b -> Edit (a, b)
| None, None -> assert false (* ??!?? *)
let mine_fn = String.slice ~start:4 mine
and their_fn = String.slice ~start:4 their in
match Fname.parse mine_fn, Fname.parse their_fn with
| Ok None, Ok (Some b) ->
let b = process_git_prefix ~git ~prefix:"b/" b in
Create b
| Ok (Some a), Ok None ->
let a = process_git_prefix ~git ~prefix:"a/" a in
Delete a
| Ok (Some a), Ok (Some b) ->
let a = process_git_prefix ~git ~prefix:"a/" a in
let b = process_git_prefix ~git ~prefix:"b/" b in
Edit (a, b)
| Ok None, Ok None -> assert false (* ??!?? *)
| Error msg, _ -> raise (Parse_error {msg; lines = [mine]})
| _, Error msg -> raise (Parse_error {msg; lines = [their]})

let parse_one data =
(* first locate --- and +++ lines *)
Expand All @@ -271,8 +247,7 @@ let parse_one data =
let hdr = Rename_only (String.slice ~start:12 x, String.slice ~start:10 y) in
find_start git ~hdr xs
| x::y::xs when String.is_prefix ~prefix:"--- " x ->
let mine = String.slice ~start:4 x and their = String.slice ~start:4 y in
Some (operation_of_strings git mine their), xs
Some (operation_of_strings git x y), xs
| _::xs -> find_start git ?hdr xs
in
match find_start false data with
Expand Down
11 changes: 10 additions & 1 deletion src/patch.mli
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,13 @@ type hunk = {
(** A hunk contains some difference between two files: each with a start line
and length, and then the content as lists of string. *)

type parse_error = {
msg : string;
lines : string list;
}

exception Parse_error of parse_error

val pp_hunk : mine_no_nl:bool -> their_no_nl:bool -> Format.formatter -> hunk -> unit
(** [pp_hunk ppf hunk] pretty-prints the [hunk] on [ppf], the printing is in the
same format as [diff] does. *)
Expand Down Expand Up @@ -49,7 +56,9 @@ val pp_list : git:bool -> Format.formatter -> t list -> unit
"git diff" style will be printed. *)

val parse : string -> t list
(** [parse data] decodes [data] as a list of diffs. *)
(** [parse data] decodes [data] as a list of diffs.
@raise Parse_error if a filename was unable to be parsed *)

val patch : string option -> t -> string option
(** [patch file_contents diff] applies [diff] on [file_contents], resulting in
Expand Down
51 changes: 51 additions & 0 deletions src/patch_lib.ml
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
module String = struct
let is_prefix ~prefix str =
let pl = String.length prefix in
if String.length str < pl then
false
else
String.sub str 0 (String.length prefix) = prefix

let is_suffix ~suffix str =
let pl = String.length suffix in
if String.length str < pl then
false
else
String.sub str (String.length str - pl) pl = suffix

let cut sep str =
try
let idx = String.index str sep
and l = String.length str
in
let sidx = succ idx in
Some (String.sub str 0 idx, String.sub str sidx (l - sidx))
with
Not_found -> None

let cuts sep str =
let rec doit acc s =
match cut sep s with
| None -> List.rev (s :: acc)
| Some (a, b) -> doit (a :: acc) b
in
doit [] str

let slice ?(start = 0) ?stop str =
let stop = match stop with
| None -> String.length str
| Some x -> x
in
let len = stop - start in
String.sub str start len

let trim = String.trim

let get = String.get

let concat = String.concat

let length = String.length

let equal = String.equal
end
12 changes: 12 additions & 0 deletions src/patch_lib.mli
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
module String : sig
val is_prefix : prefix:string -> string -> bool
val is_suffix : suffix:string -> string -> bool
val cut : char -> string -> (string * string) option
val cuts : char -> string -> string list
val slice : ?start:int -> ?stop:int -> string -> string
val trim : string -> string
val get : string -> int -> char
val concat : string -> string list -> string
val length : string -> int
val equal : string -> string -> bool
end
8 changes: 4 additions & 4 deletions test/test.ml
Original file line number Diff line number Diff line change
Expand Up @@ -819,7 +819,7 @@ let unified_diff_spaces = {|\
|}

let unified_diff_spaces =
operations [Patch.Edit ("\"a/foo bar\"", "\"b/foo bar\"")] unified_diff_spaces
operations [Patch.Edit ("a/foo bar", "b/foo bar")] unified_diff_spaces

let git_diff_spaces = {|\
diff --git a/foo bar b/foo bar
Expand Down Expand Up @@ -854,7 +854,7 @@ let unified_diff_quotes = {|\
|}

let unified_diff_quotes =
operations [Patch.Edit ({|"foo bar \"baz\""|}, {|"\"foo\" bar baz"|})] unified_diff_quotes
operations [Patch.Edit ({|foo bar "baz"|}, {|"foo" bar baz|})] unified_diff_quotes

let git_diff_quotes = {|\
diff --git "a/foo bar \"baz\"" "b/\"foo\" bar baz"
Expand All @@ -867,7 +867,7 @@ index 88adca3..ef00db3 100644
|}

let git_diff_quotes =
operations [Patch.Edit ({|"a/foo bar \"baz\""|}, {|"b/\"foo\" bar baz"|})] git_diff_quotes
operations [Patch.Edit ({|foo bar "baz"|}, {|"foo" bar baz|})] git_diff_quotes

let busybox_diff_quotes = {|\
--- foo bar "baz"
Expand Down Expand Up @@ -941,7 +941,7 @@ let quoted_filename = {|\
|}

let quoted_filename =
operations [Patch.Create {|"\a\b\f\n\r\t\v\\\"\001\177\046"|}] quoted_filename
operations [Patch.Create "\007\b\012\n\r\t\011\\\"\001\127&"] quoted_filename

let unquoted_filename = {|\
--- /dev/null
Expand Down

0 comments on commit ae28123

Please sign in to comment.