Module Melange_compiler_libs.Misc

Miscellaneous useful types and functions

Warning: this module is unstable and part of compiler-libs.

let array_of_list_rev: list('a) => array('a);

Reporting fatal errors

let fatal_error: string => 'a;

Raise the Fatal_error exception with the given string.

let fatal_errorf: Stdlib.format4('a, Stdlib.Format.formatter, unit, 'b) => 'a;

Format the arguments according to the given format string and raise Fatal_error with the resulting string.

exception Fatal_error;

Exceptions and finalization

let try_finally: ?always:(unit => unit) => ?exceptionally:(unit => unit) => (unit => 'a) => 'a;

try_finally work ~always ~exceptionally is designed to run code in work that may fail with an exception, and has two kind of cleanup routines: always, that must be run after any execution of the function (typically, freeing system resources), and exceptionally, that should be run only if work or always failed with an exception (typically, undoing user-visible state changes that would only make sense if the function completes correctly). For example:

let objfile = outputprefix ^ ".cmo" in
let oc = open_out_bin objfile in
Misc.try_finally
  (fun () ->
     bytecode
     ++ Timings.(accumulate_time (Generate sourcefile))
         (Emitcode.to_file oc modulename objfile);
     Warnings.check_fatal ())
  ~always:(fun () -> close_out oc)
  ~exceptionally:(fun _exn -> remove_file objfile);

If exceptionally fail with an exception, it is propagated as usual.

If always or exceptionally use exceptions internally for control-flow but do not raise, then try_finally is careful to preserve any exception backtrace coming from work or always for easier debugging.

let reraise_preserving_backtrace: exn => (unit => unit) => 'a;

reraise_preserving_backtrace e f is (f (); raise e) except that the current backtrace is preserved, even if f uses exceptions internally.

List operations

let map_end: ('a => 'b) => list('a) => list('b) => list('b);

map_end f l t is map f l @ t, just more efficient.

let map_left_right: ('a => 'b) => list('a) => list('b);

Like List.map, with guaranteed left-to-right evaluation order

let for_all2: ('a => 'b => bool) => list('a) => list('b) => bool;

Same as List.for_all but for a binary predicate. In addition, this for_all2 never fails: given two lists with different lengths, it returns false.

let replicate_list: 'a => int => list('a);

replicate_list elem n is the list with n elements all identical to elem.

let list_remove: 'a => list('a) => list('a);

list_remove x l returns a copy of l with the first element equal to x removed.

let split_last: list('a) => (list('a), 'a);

Return the last element and the other elements of the given list.

Hash table operations

let create_hashtable: int => list(('a, 'b)) => Stdlib.Hashtbl.t('a, 'b);

Create a hashtable with the given initial size and fills it with the given bindings.

Extensions to the standard library

module Stdlib: { ... };

Operations on files and file paths

let find_in_path: list(string) => string => string;

Search a file in a list of directories.

let find_in_path_rel: list(string) => string => string;

Search a relative file in a list of directories.

let find_in_path_uncap: list(string) => string => string;

Same, but search also for uncapitalized name, i.e. if name is Foo.ml, allow /path/Foo.ml and /path/foo.ml to match.

let remove_file: string => unit;

Delete the given file if it exists and is a regular file. Does nothing for other kinds of files. Never raises an error.

let expand_directory: string => string => string;

expand_directory alt file eventually expands a + at the beginning of file into alt (an alternate root directory)

let split_path_contents: ?sep:char => string => list(string);

split_path_contents ?sep s interprets s as the value of a "PATH"-like variable and returns the corresponding list of directories. s is split using the platform-specific delimiter, or ~sep if it is passed.

Returns the empty list if s is empty.

let copy_file: Stdlib.in_channel => Stdlib.out_channel => unit;

copy_file ic oc reads the contents of file ic and copies them to oc. It stops when encountering EOF on ic.

let copy_file_chunk: Stdlib.in_channel => Stdlib.out_channel => int => unit;

copy_file_chunk ic oc n reads n bytes from ic and copies them to oc. It raises End_of_file when encountering EOF on ic.

let string_of_file: Stdlib.in_channel => string;

string_of_file ic reads the contents of file ic and copies them to a string. It stops when encountering EOF on ic.

let output_to_bin_file_directly: string => (string => Stdlib.out_channel => 'a) => 'a;
let output_to_file_via_temporary: ?mode:list(Stdlib.open_flag) => string => (string => Stdlib.out_channel => 'a) => 'a;

Produce output in temporary file, then rename it (as atomically as possible) to the desired output file name. output_to_file_via_temporary filename fn opens a temporary file which is passed to fn (name + output channel). When fn returns, the channel is closed and the temporary file is renamed to filename.

let protect_writing_to_file: filename:string => f:(Stdlib.out_channel => 'a) => 'a;

Open the given filename for writing (in binary mode), pass the out_channel to the given function, then close the channel. If the function raises an exception then filename will be removed.

let concat_null_terminated: list(string) => string;

concat_null_terminated [x1;x2; ... xn] is x1 ^ "\000" ^ x2 ^ "\000" ^ ... ^ xn ^ "\000"

let split_null_terminated: string => list(string);

split_null_terminated s is similar String.split_on_char '\000' but ignores the trailing separator, if any

let chop_extensions: string => string;

Return the given file name without its extensions. The extensions is the longest suffix starting with a period and not including a directory separator, .xyz.uvw for instance.

Return the given name if it does not contain an extension.

Integer operations

let log2: int => int;

log2 n returns s such that n = 1 lsl s if n is a power of 2

let align: int => int => int;

align n a rounds n upwards to a multiple of a (a power of 2).

let no_overflow_add: int => int => bool;

no_overflow_add n1 n2 returns true if the computation of n1 + n2 does not overflow.

let no_overflow_sub: int => int => bool;

no_overflow_sub n1 n2 returns true if the computation of n1 - n2 does not overflow.

let no_overflow_mul: int => int => bool;

no_overflow_mul n1 n2 returns true if the computation of n1 * n2 does not overflow.

let no_overflow_lsl: int => int => bool;

no_overflow_lsl n k returns true if the computation of n lsl k does not overflow.

module Int_literal_converter: { ... };
let find_first_mono: (int => bool) => int;

find_first_mono p takes an integer predicate p : int -> bool that we assume: 1. is monotonic on natural numbers: if a <= b then p a implies p b, 2. is satisfied for some natural numbers in range 0; max_int (this is equivalent to: p max_int = true).

find_first_mono p is the smallest natural number N that satisfies p, computed in O(log(N)) calls to p.

Our implementation supports two cases where the preconditions on p are not respected:

  • If p is always false, we silently return max_int instead of looping or crashing.
  • If p is non-monotonic but eventually true, we return some satisfying value.

String operations

let search_substring: string => string => int => int;

search_substring pat str start returns the position of the first occurrence of string pat in string str. Search starts at offset start in str. Raise Not_found if pat does not occur.

let replace_substring: before:string => after:string => string => string;

replace_substring ~before ~after str replaces all occurrences of before with after in str and returns the resulting string.

let rev_split_words: string => list(string);

rev_split_words s splits s in blank-separated words, and returns the list of words in reverse order.

let cut_at: string => char => (string, string);

String.cut_at s c returns a pair containing the sub-string before the first occurrence of c in s, and the sub-string after the first occurrence of c in s. let (before, after) = String.cut_at s c in before ^ String.make 1 c ^ after is the identity if s contains c.

Raise Not_found if the character does not appear in the string

  • since 4.01
let ordinal_suffix: int => string;

ordinal_suffix n is the appropriate suffix to append to the numeral n as an ordinal number: 1 -> "st", 2 -> "nd", 3 -> "rd", 4 -> "th", and so on. Handles larger numbers (e.g., 42 -> "nd") and the numbers 11--13 (which all get "th") correctly.

let normalise_eol: string => string;

normalise_eol s returns a fresh copy of s with any '\r' characters removed. Intended for pre-processing text which will subsequently be printed on a channel which performs EOL transformations (i.e. Windows)

let delete_eol_spaces: string => string;

delete_eol_spaces s returns a fresh copy of s with any end of line spaces removed. Intended to normalize the output of the toplevel for tests.

Operations on references

type ref_and_value =
  1. | R(Stdlib.ref('a), 'a) : ref_and_value
;
let protect_refs: list(ref_and_value) => (unit => 'a) => 'a;

protect_refs l f temporarily sets r to v for each R (r, v) in l while executing f. The previous contents of the references is restored even if f raises an exception, without altering the exception backtrace.

let get_ref: Stdlib.ref(list('a)) => list('a);

get_ref lr returns the content of the list reference lr and reset its content to the empty list.

let set_or_ignore: ('a => option('b)) => Stdlib.ref(option('b)) => 'a => unit;

set_or_ignore f opt x sets opt to f x if it returns Some _, or leaves it unmodified if it returns None.

Operations on triples and quadruples

let fst3: ('a, 'b, 'c) => 'a;
let snd3: ('a, 'b, 'c) => 'b;
let thd3: ('a, 'b, 'c) => 'c;
let fst4: ('a, 'b, 'c, 'd) => 'a;
let snd4: ('a, 'b, 'c, 'd) => 'b;
let thd4: ('a, 'b, 'c, 'd) => 'c;
let for4: ('a, 'b, 'c, 'd) => 'd;

Long strings

``Long strings'' are mutable arrays of characters that are not limited in length to Sys.max_string_length.

module LongString: { ... };

Spell checking and ``did you mean'' suggestions

let edit_distance: string => string => int => option(int);

edit_distance a b cutoff computes the edit distance between strings a and b. To help efficiency, it uses a cutoff: if the distance d is smaller than cutoff, it returns Some d, else None.

The distance algorithm currently used is Damerau-Levenshtein: it computes the number of insertion, deletion, substitution of letters, or swapping of adjacent letters to go from one word to the other. The particular algorithm may change in the future.

let spellcheck: list(string) => string => list(string);

spellcheck env name takes a list of names env that exist in the current environment and an erroneous name, and returns a list of suggestions taken from env, that are close enough to name that it may be a typo for one of them.

let did_you_mean: Stdlib.Format.formatter => (unit => list(string)) => unit;

did_you_mean ppf get_choices hints that the user may have meant one of the option returned by calling get_choices. It does nothing if the returned list is empty.

The unit -> ... thunking is meant to delay any potentially-slow computation (typically computing edit-distance with many things from the current environment) to when the hint message is to be printed. You should print an understandable error message before calling did_you_mean, so that users get a clear notification of the failure even if producing the hint is slow.

Colored terminal output

module Color: { ... };
module Error_style: { ... };

Formatted output

let print_if: Stdlib.Format.formatter => Stdlib.ref(bool) => (Stdlib.Format.formatter => 'a => unit) => 'a => 'a;

print_if ppf flag fmt x prints x with fmt on ppf if b is true.

let pp_two_columns: ?sep:string => ?max_lines:int => Stdlib.Format.formatter => list((string, string)) => unit;

pp_two_columns ?sep ?max_lines ppf l prints the lines in l as two columns separated by sep ("|" by default). max_lines can be used to indicate a maximum number of lines to print -- an ellipsis gets inserted at the middle if the input has too many lines.

Example:

pp_two_columns ~max_lines:3 Format.std_formatter [
      "abc", "hello";
      "def", "zzz";
      "a"  , "bllbl";
      "bb" , "dddddd";
    ]

prints

    abc | hello
    ...
    bb  | dddddd
let print_see_manual: Stdlib.Format.formatter => list(int) => unit;

See manual section

Displaying configuration variables

let show_config_and_exit: unit => unit;

Display the values of all compiler configuration variables from module Config, then exit the program with code 0.

let show_config_variable_and_exit: string => unit;

Display the value of the given configuration variable, then exit the program with code 0.

Handling of build maps

Build maps cause the compiler to normalize file names embedded in object files, thus leading to more reproducible builds.

let get_build_path_prefix_map: unit => option(Build_path_prefix_map.map);

Returns the map encoded in the BUILD_PATH_PREFIX_MAP environment variable.

let debug_prefix_map_flags: unit => list(string);

Returns the list of --debug-prefix-map flags to be passed to the assembler, built from the BUILD_PATH_PREFIX_MAP environment variable.

Handling of magic numbers

module Magic_number: { ... };

a typical magic number is "Caml1999I011"; it is formed of an alphanumeric prefix, here Caml1990I, followed by a version, here 011. The prefix identifies the kind of the versioned data: here the I indicates that it is the magic number for .cmi files.

Miscellaneous type aliases

type filepath = string;
type modname = string;
type crcs = list((modname, option(Stdlib.Digest.t)));
type alerts = Stdlib.String.Map.t(string);