(** * Programming Pearls Column 2 Exercise A * * Given a file of positive integers that can be represented in * 'int_size_in_bits' bits, this program will find an integer in the range [0 * .. 2 ** (int_size_in_bits) - 1] that is missing from the file. * * The assumption is that you have little memory to work with, but you are * allowed to write to scratch files with impunity. We thus read from files * using streams, so that the input is read a little bit at a time (although I'm * not quite sure how channels are implemented in OCaml, so it may be that * they're buffering a heck of a lot of data at once). * * The algorithm works by treating each number as a sequence of bits, where the * least significant bit is assumed to be at index 0. * * Starting at index i = 'int_size_in_bits' - 1, we push all of the integers * that have a 0 at index i into one file, and all the integers with a 1 at * index i into another. The first time around, the 0-bit file can have at most * 2 * 'int_size_in_bits' / 2 in it. If it has fewer than this, we know there's * an integer missing in that range, so we recurse on the 0 file and look at the * next bit index. Otherwise, we recurse on the 1 file. * * We construct the missing number as we go by adding 2 ** i to the number so * far every time we recurse on the 1 file, since the missing number must * naturally have the ith bit set. * * Author: Michael DiBernardo *) (* How large we 'allow' integers to be. *) let int_size_in_bits = 16;; (** * Emulate 'for line in file' behaviour from python. Returns a stream that will * produce the next line in the file until it is empty. * * Thanks to pango from #ocaml for pointing me to the Stream.from_ functions. *) let make_linestream filename = let line_channel = open_in filename in Stream.from (fun stream_element_index -> try (Some (input_line line_channel)) with End_of_file -> None ) (** * I'm sure there's a good reason that the 'Stream.empty' function does not * return a boolean value, but the fact that it returns unit or throws an * exception baffles the heck out of me. * * So, I wrote my own 'is_empty' function for streams that peeks on the stream, * and if it finds anything, it will return 'false'. *) let stream_is_empty stream = match (Stream.peek stream) with | Some _ -> false | None -> true (** * Finds the missing integer in the given stream of numbers, given that the * missing integer thus far is 'num_so_far' and the index that we're examining * is 'bit_index'. * * 'number_stream' is a stream that generates stringified positive integers. * 'num_so_far' is the value of the missing integer from the most significant * bit to the bit before 'bit_index'. * 'bit_index' is the bit that we're examining right now. *) let rec find_missing_integer number_stream num_so_far bit_index = (* Base case: If the stream is empty, the number we've constructed so far is * certainly missing. *) if (stream_is_empty number_stream) then num_so_far (* Recursive case: Push numbers into files based on the value of the bit at * bit_index, and recurse on the file that has fewer numbers than it could * possibly have at this stage. *) else ( let out0_filename = ("0-" ^ (string_of_int bit_index)) and out1_filename = ("1-" ^ (string_of_int bit_index)) and (* The value of the number that has only the 2^ith bit set. I use lsl * because there's no obvious way to use integer exponentiation, and * float_of_int (2 ** i) seems like overkill. *) value_of_bin = (1 lsl bit_index) in let out0 = (open_out out0_filename) and out1 = (open_out out1_filename) in (* We want to count how many integers in the stream have a 0-bit at index i. * The intuitive way to do this is to map a predicate that checks for the * 0-bit across the stream and then accumulate all the 'true's, but that * would consume a fair amount of memory to store the mapped list, and we're * assuming memory constraints here. Also, there's no 'map' for streams, * just 'iter', which relies on side-effects. So we use a ref to accumulate * the count. *) let size_of_bin_0 = ref 0 in let count_and_write number_as_string = let number = int_of_string number_as_string in let number_to_output = number_as_string ^ "\n" in (* Is the bit at index i a 0? *) if number land value_of_bin = 0 then ( incr size_of_bin_0; (output_string out0 number_to_output) ) else (output_string out1 number_to_output) in ( Stream.iter count_and_write number_stream; close_out out0; close_out out1; if (!size_of_bin_0 <= (value_of_bin / 2)) then (find_missing_integer (make_linestream out0_filename) num_so_far (bit_index - 1)) else (find_missing_integer (make_linestream out1_filename) (num_so_far + value_of_bin) (bit_index - 1)) ) ) let _ = if (Array.length Sys.argv) = 2 then let input_file_name = (Array.get Sys.argv 1) and num_so_far = 0 and bit_index = (int_size_in_bits - 1) in (Printf.printf "%d\n" (find_missing_integer (make_linestream input_file_name) num_so_far bit_index)) else Printf.printf "USAGE: find_missing_integer \n"