sig
type sketch
type sketchdocs = (Wshiml.sketch * int) list
type tokeniser = bytes -> bytes list
val compare_docs :
?tokenise:Wshiml.tokeniser -> ?n:int -> bytes -> bytes -> float
val sketch_of_doc :
?tokenise:Wshiml.tokeniser -> ?n:int -> bytes -> Wshiml.sketch
val sketch_docs :
?tokenise:Wshiml.tokeniser ->
?n:int -> ?slurp_file:(bytes -> bytes) -> bytes list -> Wshiml.sketchdocs
val supersketches : ?n:int -> Wshiml.sketchdocs -> Wshiml.sketchdocs
type scoreddocs = ((int * int) * int) list
val score_sketches :
?threshold:float -> Wshiml.sketchdocs -> Wshiml.scoreddocs
type clusters = int list list
val cluster_scores : ?ndocs:int -> Wshiml.scoreddocs -> Wshiml.clusters
end