Skip to content

Commit

Permalink
Add a marshalled input/output format for search indexes
Browse files Browse the repository at this point in the history
Signed-off-by: Paul-Elliot <[email protected]>
  • Loading branch information
panglesd committed Feb 20, 2024
1 parent 8668acf commit 2861f69
Show file tree
Hide file tree
Showing 10 changed files with 207 additions and 43 deletions.
4 changes: 4 additions & 0 deletions src/model/paths.ml
Original file line number Diff line number Diff line change
Expand Up @@ -636,6 +636,10 @@ module Identifier = struct
mk_parent LocalName.to_string "sli" (fun (p, n) ->
`SourceLocationInternal (p, n))
end

module Hashtbl = struct
module Any = Hashtbl.Make (Any)
end
end

module Path = struct
Expand Down
4 changes: 4 additions & 0 deletions src/model/paths.mli
Original file line number Diff line number Diff line change
Expand Up @@ -236,6 +236,10 @@ module Identifier : sig
end
end

module Hashtbl : sig
module Any : Hashtbl.S with type key = Any.t
end

module Mk : sig
open Names

Expand Down
36 changes: 23 additions & 13 deletions src/odoc/bin/main.ml
Original file line number Diff line number Diff line change
Expand Up @@ -403,46 +403,56 @@ module Source_tree = struct
end

module Indexing = struct
let output_file ~dst =
match dst with
| Some file -> Fs.File.of_string file
| None -> Fs.File.of_string "index.json"

let index dst warnings_options inputs_in_file inputs =
let output = output_file ~dst in
let output_file ~dst marshall =
match (dst, marshall) with
| Some file, _ -> Fs.File.of_string file
| None, `JSON -> Fs.File.of_string "index.json"
| None, `Marshall -> Fs.File.of_string "index-index.odoc"

let index dst marshall warnings_options inputs_in_file inputs =
let marshall = if marshall then `Marshall else `JSON in
let output = output_file ~dst marshall in
match (inputs_in_file, inputs) with
| [], [] ->
Result.Error
(`Msg
"At least one of --file-list or an .odocl file must be passed to \
odoc compile-index")
| _ -> Indexing.compile ~output ~warnings_options inputs_in_file inputs
| _ ->
Indexing.compile marshall ~output ~warnings_options inputs_in_file
inputs

let cmd =
let dst =
let doc =
"Output file path. Non-existing intermediate directories are created. \
Defaults to index.json"
Defaults to index.json, or index-index.odoc if --marshall is passed \
(in which case, the $(i,index-) prefix is mandatory)."
in
Arg.(
value & opt (some string) None & info ~docs ~docv:"PATH" ~doc [ "o" ])
in
let inputs_in_file =
let doc =
"Input text file containing a line-separated list of paths to .odocl \
files to index."
"Input text file containing a line-separated list of paths to \
.odocl/.json files to index."
in
Arg.(
value & opt_all convert_fpath []
& info ~doc ~docv:"FILE" [ "file-list" ])
in
let marshall =
let doc = "whether to output a json file, or an .odoc file" in
Arg.(value & flag & info ~doc [ "marshall" ])
in
let inputs =
let doc = ".odocl file to index" in
let doc = ".odocl/.json file to index" in
Arg.(value & pos_all convert_fpath [] & info ~doc ~docv:"FILE" [])
in
Term.(
const handle_error
$ (const index $ dst $ warnings_options $ inputs_in_file $ inputs))
$ (const index $ dst $ marshall $ warnings_options $ inputs_in_file
$ inputs))

let info ~docs =
let doc =
Expand Down
88 changes: 75 additions & 13 deletions src/odoc/indexing.ml
Original file line number Diff line number Diff line change
Expand Up @@ -3,18 +3,26 @@ open Odoc_json_index
open Or_error
open Odoc_model

let handle_file file ~unit ~page =
Odoc_file.load file >>= fun unit' ->
match unit' with
| { Odoc_file.content = Unit_content unit'; _ } when unit'.hidden ->
Error (`Msg "Hidden units are ignored when generating an index")
| { Odoc_file.content = Unit_content unit'; _ } (* when not unit'.hidden *) ->
Ok (unit unit')
| { Odoc_file.content = Page_content page'; _ } -> Ok (page page')
| _ ->
Error
(`Msg
"Only pages and unit are allowed as input when generating an index")
module H = Odoc_model.Paths.Identifier.Hashtbl.Any

let handle_file file ~unit ~page ~occ =
match Fpath.basename file with
| s when String.is_prefix ~affix:"index-" s ->
Odoc_file.load_index file >>= fun index -> Ok (occ index)
| _ -> (
Odoc_file.load file >>= fun unit' ->
match unit' with
| { Odoc_file.content = Unit_content unit'; _ } when unit'.hidden ->
Error (`Msg "Hidden units are ignored when generating an index")
| { Odoc_file.content = Unit_content unit'; _ }
(* when not unit'.hidden *) ->
Ok (unit unit')
| { Odoc_file.content = Page_content page'; _ } -> Ok (page page')
| _ ->
Error
(`Msg
"Only pages and unit are allowed as input when generating an \
index"))

let parse_input_file input =
let is_sep = function '\n' | '\r' -> true | _ -> false in
Expand All @@ -32,7 +40,7 @@ let parse_input_files input =
(Ok []) input
>>= fun files -> Ok (List.concat files)

let compile ~output ~warnings_options inputs_in_file inputs =
let compile_to_json ~output ~warnings_options inputs_in_file inputs =
parse_input_files inputs_in_file >>= fun files ->
let files = List.rev_append inputs files in
let output_channel =
Expand All @@ -53,6 +61,7 @@ let compile ~output ~warnings_options inputs_in_file inputs =
handle_file
~unit:(print Json_search.unit acc)
~page:(print Json_search.page acc)
~occ:(print Json_search.index acc)
file
with
| Ok acc -> acc
Expand All @@ -66,3 +75,56 @@ let compile ~output ~warnings_options inputs_in_file inputs =
result |> Error.handle_warnings ~warnings_options >>= fun (_ : bool) ->
Format.fprintf output "]";
Ok ()

let compile_to_marshall ~output ~warnings_options inputs_in_file inputs =
parse_input_files inputs_in_file >>= fun files ->
let files = List.rev_append inputs files in
let final_index = H.create 10 in
let unit u =
Odoc_model.Fold.unit
~f:(fun () item ->
let entries =
Odoc_search.Entry.entries_of_item
(* (u.Odoc_model.Lang.Compilation_unit.id *)
(* :> Odoc_model.Paths.Identifier.t) *)
item
in
List.iter
(fun entry -> H.add final_index entry.Odoc_search.Entry.id entry)
entries)
() u
in
let page p =
Odoc_model.Fold.page
~f:(fun () item ->
let entries =
Odoc_search.Entry.entries_of_item
(* (p.Odoc_model.Lang.Page.name :> Odoc_model.Paths.Identifier.t) *)
item
in
List.iter
(fun entry -> H.add final_index entry.Odoc_search.Entry.id entry)
entries)
() p
in
let index i = H.iter (H.add final_index) i in
let index () =
List.fold_left
(fun acc file ->
match handle_file ~unit ~page ~occ:index file with
| Ok acc -> acc
| Error (`Msg m) ->
Error.raise_warning ~non_fatal:true
(Error.filename_only "%s" m (Fs.File.to_string file));
acc)
() files
in
let result = Error.catch_warnings index in
result |> Error.handle_warnings ~warnings_options >>= fun () ->
Ok (Odoc_file.save_index output final_index)

let compile out_format ~output ~warnings_options inputs_in_file inputs =
match out_format with
| `JSON -> compile_to_json ~output ~warnings_options inputs_in_file inputs
| `Marshall ->
compile_to_marshall ~output ~warnings_options inputs_in_file inputs
2 changes: 2 additions & 0 deletions src/odoc/indexing.mli
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,13 @@ val handle_file :
Fpath.t ->
unit:(Odoc_model.Lang.Compilation_unit.t -> 'a) ->
page:(Odoc_model.Lang.Page.t -> 'a) ->
occ:(Odoc_search.Entry.t Odoc_model.Paths.Identifier.Hashtbl.Any.t -> 'a) ->
('a, [> msg ]) result
(** This function is exposed for custom indexers that uses [odoc] as a library
to generate their search index *)

val compile :
[ `JSON | `Marshall ] ->
output:Fs.file ->
warnings_options:Odoc_model.Error.warnings_options ->
Fs.file list ->
Expand Down
28 changes: 20 additions & 8 deletions src/odoc/odoc_file.ml
Original file line number Diff line number Diff line change
Expand Up @@ -30,14 +30,18 @@ type t = { content : content; warnings : Odoc_model.Error.t list }
let magic = "odoc-%%VERSION%%"

(** Exceptions while saving are allowed to leak. *)
let save_unit file (root : Root.t) (t : t) =
let save_ file f =
Fs.Directory.mkdir_p (Fs.File.dirname file);
let oc = open_out_bin (Fs.File.to_string file) in
output_string oc magic;
Marshal.to_channel oc root [];
Marshal.to_channel oc t [];
f oc;
close_out oc

let save_unit file (root : Root.t) (t : t) =
save_ file (fun oc ->
Marshal.to_channel oc root [];
Marshal.to_channel oc t [])

let save_page file ~warnings page =
let dir = Fs.File.dirname file in
let base = Fs.File.(to_string @@ basename file) in
Expand Down Expand Up @@ -70,9 +74,7 @@ let load_ file f =
let res =
try
let actual_magic = really_input_string ic (String.length magic) in
if actual_magic = magic then
let root = Marshal.from_channel ic in
f ic root
if actual_magic = magic then f ic
else
let msg =
Printf.sprintf "%s: invalid magic number %S, expected %S\n%!" file
Expand All @@ -89,7 +91,17 @@ let load_ file f =
close_in ic;
res

let load file = load_ file (fun ic _ -> Ok (Marshal.from_channel ic))
let load file =
load_ file (fun ic ->
let _root = Marshal.from_channel ic in
Ok (Marshal.from_channel ic))

(** The root is saved separately in the files to support this function. *)
let load_root file = load_ file (fun _ root -> Ok root)
let load_root file =
load_ file (fun ic ->
let root = Marshal.from_channel ic in
Ok root)

let save_index dst idx = save_ dst (fun oc -> Marshal.to_channel oc idx [])

let load_index file = load_ file (fun ic -> Ok (Marshal.from_channel ic))
8 changes: 8 additions & 0 deletions src/odoc/odoc_file.mli
Original file line number Diff line number Diff line change
Expand Up @@ -49,3 +49,11 @@ val load : Fs.File.t -> (t, [> msg ]) result

val load_root : Fs.File.t -> (Root.t, [> msg ]) result
(** Only load the root. Faster than {!load}, used for looking up imports. *)

val save_index :
Fs.File.t -> Odoc_search.Entry.t Paths.Identifier.Hashtbl.Any.t -> unit

val load_index :
Fs.File.t ->
(Odoc_search.Entry.t Paths.Identifier.Hashtbl.Any.t, [> msg ]) result
(** Load an [.odoc] file. *)
10 changes: 10 additions & 0 deletions src/search/json_index/json_search.ml
Original file line number Diff line number Diff line change
Expand Up @@ -214,3 +214,13 @@ let page ppf (page : Odoc_model.Lang.Page.t) =
in
let _first = Odoc_model.Fold.page ~f true page in
()

let index ppf (index : Entry.t Odoc_model.Paths.Identifier.Hashtbl.Any.t) =
let _first =
Odoc_model.Paths.Identifier.Hashtbl.Any.fold
(fun _id entry first ->
let entry = (entry, Html.of_entry entry) in
output_json ppf first [ entry ])
index true
in
()
4 changes: 4 additions & 0 deletions src/search/json_index/json_search.mli
Original file line number Diff line number Diff line change
Expand Up @@ -2,3 +2,7 @@

val unit : Format.formatter -> Odoc_model.Lang.Compilation_unit.t -> unit
val page : Format.formatter -> Odoc_model.Lang.Page.t -> unit
val index :
Format.formatter ->
Odoc_search.Entry.t Odoc_model.Paths.Identifier.Hashtbl.Any.t ->
unit
66 changes: 57 additions & 9 deletions test/search/html_search.t/run.t
Original file line number Diff line number Diff line change
Expand Up @@ -48,14 +48,21 @@ we will generate.
$ odoc html-generate --search-uri fuse.js.js --search-uri index.js -o html page-page.odocl
$ odoc support-files -o html

We now focus on how to generate the index.js file. There are mainly two ways: by
using odoc as a library, or by using the the `compile-index` command. This
command generates a json index containing all .odocl given as input, to be
consumed later by a search engine. If -o is not provided, the file is saved as
index.json.
Odocl files can be given either in a list (using --file-list,
passing a file with the list of line-separated files), or by passing directly
the name of the files.
We now focus on how to generate the index.js file.

For this, we compute an index of all the values contained in a given list of
odoc files, using the `compile-index` command.

This command generates has two output format: a json output for consumption by
external search engine, and an `odoc` specific extension. The odoc file is
meant to be consumed either by search engine written in OCaml, which would
depend on `odoc` as a library, or by `odoc` itself to build a global index
incrementally: the `compile-index` command can take indexes as input!

If -o is not provided, the file is saved as index.json, or index-index.odoc if
the --marshall flag is passed. Odocl files can be given either in a list (using
--file-list, passing a file with the list of line-separated files), or by
passing directly the name of the files.

$ printf "main.odocl\npage-page.odocl\nj.odocl\n" > index_map
$ odoc compile-index -o index1.json --file-list index_map
Expand All @@ -74,7 +81,48 @@ Let's check that the previous commands are indeed independent:
$ diff index.json index1.json
$ diff index.json index2.json
The index file contains a json array, each element of the array corresponding to
Let's now test the --marshall flag.
We compare:
- the result of outputing as a marshalled file, and then use that to output a json file.
- Directly outputing a json file

$ odoc compile-index -o index-main.odoc --marshall main.odocl
$ odoc compile-index -o main.json index-main.odoc
$ cat main.json | jq sort | jq '.[]' -c | sort > main1.json

$ odoc compile-index -o main.json main.odocl
$ cat main.json | jq sort | jq '.[]' -c | sort > main2.json

$ diff main1.json main2.json

$ odoc compile-index -o index-j.odoc --marshall j.odocl
$ odoc compile-index -o j.json index-j.odoc
$ cat j.json | jq sort | jq '.[]' -c | sort > j1.json

$ odoc compile-index -o j.json j.odocl
$ cat j.json | jq sort | jq '.[]' -c | sort > j2.json

$ diff j1.json j2.json

$ odoc compile-index -o index-page.odoc --marshall page-page.odocl
$ odoc compile-index -o page.json index-page.odoc
$ cat page.json | jq sort | jq '.[]' -c | sort > page1.json

$ odoc compile-index -o page.json page-page.odocl
$ cat page.json | jq sort | jq '.[]' -c | sort > page2.json

$ diff page1.json page2.json

Now, we compare the combination of the three marshalled files (index-main.odoc,
index-page.odoc, index-j.odoc).

$ odoc compile-index -o all.json index-page.odoc index-j.odoc index-main.odoc
$ cat all.json | jq sort | jq '.[]' -c | sort > all1.json

$ cat index.json | jq sort | jq '.[]' -c | sort > all2.json
$ diff all1.json all2.json

The json index file contains a json array, each element of the array corresponding to
a search entry.
An index entry contains:
- an ID,
Expand Down

0 comments on commit 2861f69

Please sign in to comment.