File Checksum Example

This example demonstrates:

GC.full () is used while reading to ensure that the read buffers, allocated on the C heap, are not kept longer than necessary. Although a compiler would eventually trigger GC, this avoids unnecessarily allocating memory which may trigger performance issues relating to virtual memory, particularly with large files.

Download

Checksum.tar.gz

See Using Make for build instructions.

Library dependencies

  • GLib 2.0 (GLib, GObject, Gio)

File listings

checksum.sml

local
  open GLib.ChecksumType
in
  val checksumTypeToString =
    fn
      MD_5    => "MD5"
    | SHA_1   => "SHA-1"
    | SHA_256 => "SHA-256"
    | SHA_512 => "SHA-512"
end

fun main () : unit =
  let
    val () = GObject.typeInit ()
    open GLib Gio

    (* get filename from command line *)
    val file =
      case CommandLine.arguments () of
        arg :: _ => File.newForCommandlineArg arg
      | []       => Giraffe.error 1 ["usage: checksum <file>\n"]

    (* create the checksum object *)
    val checksumType = ChecksumType.SHA_256
    val checksum = Checksum.new checksumType

    (* buffer for reading the file into *)
    val bufferSize = 5 * 1024 * 1024

    (* open the file *)
    val istream =
      File.read file NONE
        handle
          Error _ =>
            Giraffe.error 1 ["failed to read file \"", File.getParseName file, "\"\n"]

    (* read the input stream and compute the checksum *)
    val () =
      let
        fun updateChecksum () =
          let
            val (n, buffer) = InputStream.read istream (bufferSize, NONE)
          in
            if n > 0
            then
              let
                val data = GUInt8CArrayN.subslice (buffer, n)
                val () = Checksum.update checksum data
                val () = GC.full ()  (* ensure memory for `buffer` is released *)
              in
                updateChecksum ()
              end
            else
              ()
          end
      in
        updateChecksum ()
      end
        handle e => (
          (* try closing the input stream on error *)
          InputStream.close istream NONE handle _ => ();
          raise e
        )

    (* get the string representation of the checksum *)
    val checksumStr = Checksum.getString checksum

    (* close the input stream *)
    val () =
      InputStream.close istream NONE
        handle
          GLib.Error _ =>
            Giraffe.error 1 ["failed to close file \"", File.getParseName file, "\"\n"]

    (* print the checksum *)
    val () =
      app print [checksumTypeToString checksumType, ": ", checksumStr, "\n"]
  in
    Giraffe.exit 0
  end
    handle e => Giraffe.error 1 ["Uncaught exception\n", exnMessage e, "\n"]

mlton-main.sml

val () = main ()

mlton.mlb

local
  $(SML_LIB)/basis/basis.mlb
  $(SML_LIB)/basis/mlton.mlb
  $(GIRAFFE_SML_LIB)/general/mlton.mlb
  $(GIRAFFE_SML_LIB)/glib-2.0/mlton.mlb
  $(GIRAFFE_SML_LIB)/gobject-2.0/mlton.mlb
  $(GIRAFFE_SML_LIB)/gio-2.0/mlton.mlb
in
  checksum.sml
  mlton-main.sml
end

polyml-libs.sml

use "$(GIRAFFE_SML_LIB)/general/polyml.sml";
use "$(GIRAFFE_SML_LIB)/ffi/polyml.sml";
use "$(GIRAFFE_SML_LIB)/gir/polyml.sml";
use "$(GIRAFFE_SML_LIB)/glib-2.0/polyml.sml";
use "$(GIRAFFE_SML_LIB)/gobject-2.0/polyml.sml";
use "$(GIRAFFE_SML_LIB)/gio-2.0/polyml.sml";

polyml-app.sml

(* For each line of the form
 *
 *   use "<file>";
 *
 * <file> is taken as a build dependency.
 *)

use "checksum.sml";

app.mk

################################################################################
# Application-specific values

NAME := checksum


# MLton target
#
# Define:
#   SRC_MLTON       - the SML source files for MLton
#   TARGET_MLTON    - the binary to be built with MLton

ifdef MLTON_VERSION

SRC_MLTON := $(shell $(MLTON_MLTON) -mlb-path-var 'GIRAFFE_SML_LIB $(GIRAFFE_SML_LIBDIR)' -stop f mlton.mlb)

TARGET_MLTON := $(NAME)-mlton

endif


# Poly/ML target
#
# Define:
#   SRC_POLYML      - the SML source files for Poly/ML
#   TARGET_POLYML   - the binary to be built with Poly/ML

ifdef POLYML_VERSION

SRC_POLYML := $(shell cat polyml-app.sml | sed -n 's|^use "\([^"]*\)";$$|\1|p')

TARGET_POLYML := $(NAME)-polyml

endif


# Library dependencies
#
# Define:
#   LIB_NAMES       - list of the libraries that the application references

LIB_NAMES := \
	glib-2.0 \
	gobject-2.0 \
	gio-2.0

# Note that LIB_NAMES does _not_ contain pkg-config names but GIR namespace
# names, which are also the directory names in $(GIRAFFEHOME)/lib/sml.
#
# One method to determine the list is as follows: for each instance of
#
#   $(GIRAFFE_SML_LIB)/$(LIB_NAME)/mlton.mlb
#
# in mlton.mlb, the list should include $(LIB_NAME).