// Lossless reachability set storage -*- c++ -*-

/** @file FullSet.C
 * Transient, lossless reachability set storage
 */

/* Copyright  2002-2003 Marko Mkel (msmakela@tcs.hut.fi).

   This file is part of MARIA, a reachability analyzer and model checker
   for high-level Petri nets.

   MARIA is free software; you can redistribute it and/or modify it
   under the terms of the GNU General Public License as published by
   the Free Software Foundation; either version 2, or (at your option)
   any later version.

   MARIA is distributed in the hope that it will be useful, but
   WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
   General Public License for more details.

   The GNU General Public License is often shipped with GNU software, and
   is generally kept in a file called COPYING or LICENSE.  If you do not
   have a copy of the license, write to the Free Software Foundation,
   59 Temple Place, Suite 330, Boston, MA 02111 USA. */

#ifdef __GNUC__
# pragma implementation
# ifdef __sgi
#  define _XOPEN_SOURCE 1
#  define _XOPEN_SOURCE_EXTENDED 1
# endif // __sgi
#endif // __GNUC__

#include "FullSet.h"
#include "BTree.h"
#include "ByteBuffer.h"

#include <stdlib.h> // abort(3)

#if defined __digital__
# define fileno(f) ((f)->_file)
#endif // __digital__

#if defined __CYGWIN__
# define fileno(f) __sfileno(f)
#endif // __CYGWIN__

#ifdef NO_MMAP
/** an empty file structure */
# define NULL_F { 0, 0, 0 }
#else // NO_MMAP
# ifdef USE_MMAP
/** an empty file structure */
#  define NULL_F { -1, 0, 0, 0 }
# else // USE_MMAP
/** an empty file structure */
#  define NULL_F 0
# endif // USE_MMAP
#endif // NO_MMAP

/** Check if a file is open
 * @param file	the file
 * @return	true if the file has been opened
 */
static bool
isOpen (const file_t& file)
{
#ifdef USE_MMAP
#ifdef NO_MMAP
  return bool (file.addr);
# else // NO_MMAP
  return file.fd >= 0;
# endif // NO_MMAP
#else // USE_MMAP
  return bool (file);
#endif // USE_MMAP
}

/** an unopened file */
static const file_t nofile = NULL_F;

/** Open a temporary file
 * @return	an opened file, or something on which isOpen() does not hold
 */
static file_t
tempFile (void)
{
#ifdef USE_MMAP
# ifdef NO_MMAP
  file_t f = { 0, 16384, malloc (16384) };
  if (!f.addr) {
    perror ("malloc");
    abort ();
  }
# else // NO_MMAP
  FILE* file = tmpfile ();
  if (!file) {
    perror ("FullSet: tmpfile");
    return nofile;
  }
  file_t f;
  f.fd = dup (fileno (file));
  fclose (file);
  if (f.fd < 0) {
    perror ("FullSet: dup");
    return nofile;
  }
  f.len = 0;
  f.alloc = 16384;
  if (ftruncate (f.fd, f.alloc)) {
    perror ("FullSet: ftruncate");
    close (f.fd);
    return nofile;
  }
  f.addr = mmap (0, f.alloc, PROT_READ | PROT_WRITE, MAP_SHARED, f.fd, 0);
  if (f.addr == reinterpret_cast<void*>(MAP_FAILED)) {
    close (f.fd);
    perror ("FullSet: mmap");
    return nofile;
  }
# endif // NO_MMAP
  return f;
#else // USE_MMAP
  FILE* file = tmpfile ();
  if (!file) {
    perror ("FullSet: tmpfile");
    return nofile;
  }
  return file;
#endif // USE_MMAP
}

#ifdef USE_MMAP
/** Extend a file
 * @param file	the file
 */
static void
extend (file_t& file)
{
  assert (isOpen (file) && file.addr);
  if (file.len < file.alloc)
    return;
# ifndef NO_MMAP
  if (file.addr)
    munmap (file.addr, file.alloc);
# endif // !NO_MMAP
  while (file.alloc < file.len) {
    if (!(file.alloc *= 2)) {
      fputs ("FullSet: extend: file size overflow\n", stderr);
      abort ();
    }
  }
# ifdef NO_MMAP
  if (!(file.addr = realloc (file.addr, file.alloc))) {
    perror ("extend: realloc");
    abort ();
  }
# else // NO_MMAP
  if (ftruncate (file.fd, file.alloc)) {
    perror ("FullSet: extend: ftruncate");
    abort ();
  }
  file.addr =
#  ifdef __sun
    (caddr_t)
#  endif // __sun
    mmap (0, file.alloc, PROT_READ | PROT_WRITE, MAP_SHARED, file.fd, 0);
  if (file.addr == reinterpret_cast<void*>(MAP_FAILED)) {
    perror ("FullSet: extend: mmap");
    abort ();
  }
# endif // NO_MMAP
}
#endif // USE_MMAP

/** Close a temporary file
 * @param f	the file to be closed
 */
static void
closeFile (file_t& f)
{
#ifdef USE_MMAP
# ifdef NO_MMAP
  if (f.addr)
    free (f.addr);
# else // NO_MMAP
  munmap (f.addr, f.alloc);
  close (f.fd);
# endif // NO_MMAP
#else // USE_MMAP
  fclose (f);
#endif // USE_MMAP
}

FullSet::FullSet () : StateSet (),
  myHash (0), myStates (nofile)
{
}

FullSet::~FullSet ()
{
#ifdef USE_MMAP
  myHash->cleanup ();
#endif // USE_MMAP
  delete myHash;
  if (isOpen (myStates))
    closeFile (myStates);
  mySearch.clear_allocated ();
}

bool
FullSet::init (bool path)
{
#ifndef NO_MMAP
  assert (!isOpen (myStates));
#endif // !NO_MMAP
  if (path && !openFile ())
    return false;
  file_t f;
  if (!isOpen (myStates = tempFile ()) ||
      !isOpen (f = tempFile ())) {
    if (isOpen (myStates))
      closeFile (myStates);
    return false;
  }
  myHash = new class BTree (f);
  return true;
}

void
FullSet::clear ()
{
  assert (isOpen (myStates));
  assert (!!myHash);
  myHash->clear ();
#ifdef USE_MMAP
  myStates.len = 0;
#else // USE_MMAP
  fclose (myStates);
  myStates = tempFile ();
#endif // USE_MMAP
  mySearch.clear_allocated ();
  if (myPathFile)
    fseek (myPathFile, myPathFileLength = myOffset = 0, SEEK_SET);
}

/** Compute a hash value for a sequence of bytes
 * @param buf		the sequence
 * @param bytes		number of bytes in the sequence
 */
inline static size_t
getHashValue (const unsigned char* buf, size_t bytes)
{
  size_t h = 0;
  while (bytes--)
    h ^= (h << 3 | h >> (CHAR_BIT - 3)) ^ buf[bytes];
  return h;
}

/** Read an encoded state from the file
 * @param f		the state storage file
 * @param offset	offset to the file (bytes from the start of the file)
 * @param len		number of bytes to read
 */
static word_t*
getState (const file_t& f,
	  long offset,
	  unsigned len)
{
  word_t* state =
    new word_t[(len + (sizeof (word_t) - 1)) / sizeof (word_t)];
#ifdef USE_MMAP
  assert (!len || ssize_t (offset + len) <= f.len);
  memcpy (state, static_cast<char*>(f.addr) + offset, len);
#else // USE_MMAP
  if (fseek (f, offset, SEEK_SET)) {
    perror ("FullSet: fseek");
    abort ();
  }
  if (fread (state, 1, len, f) != len) {
    perror ("FullSet: fread");
    abort ();
  }
#endif // USE_MMAP
  return state;
}

/** Find an encoded state in the set
 * @param f		the state file
 * @param offset	offset to the state file
 * @param buf		compare this state against the one at offset
 * @param tmp		work storage for encoded states
 * @param bytes		length of the encoded state in bytes
 * @return		whether buf matches the state
 */
static bool
cmpfetch (const file_t& f,
	  unsigned offset,
	  const void* buf,
#ifndef USE_MMAP
	  void* tmp,
#endif // !USE_MMAP
	  size_t bytes)
{
#ifdef USE_MMAP
  return !bytes ||
    (ssize_t (offset + bytes) <= f.len &&
     !memcmp (static_cast<char*>(f.addr) + offset, buf, bytes));
#else // USE_MMAP
  if (fseek (f, offset, SEEK_SET)) {
    perror ("FullSet: fseek");
    abort ();
  }
  return
    fread (tmp, 1, bytes, f) == bytes &&
    !memcmp (tmp, buf, bytes);
#endif // USE_MMAP
}

bool
FullSet::do_add (const void* buf,
		 size_t size)
{
  size_t h = getHashValue (reinterpret_cast<const unsigned char*>(buf), size);
  if (unsigned* offsets = myHash->search (h)) {
#ifndef USE_MMAP
    word_t* tmp =
      new word_t[(size + (sizeof (word_t) - 1)) / sizeof (word_t)];
#endif // !USE_MMAP

    for (unsigned i = *offsets; i; i--) {
      if (::cmpfetch (myStates, offsets[i], buf,
#ifndef USE_MMAP
		      tmp,
#endif // !USE_MMAP
		      size)) {
#ifndef USE_MMAP
	delete[] tmp;
#endif // !USE_MMAP
	delete[] offsets;
	return false;
      }
    }

#ifndef USE_MMAP
    delete[] tmp;
#endif // !USE_MMAP
    delete[] offsets;
  }

  // a new state: add it to disk
#ifdef USE_MMAP
  const long len = myStates.len;
  myStates.len += size;
  extend (myStates);
  memcpy (static_cast<char*>(myStates.addr) + len, buf, size);
#else // USE_MMAP
  if (fseek (myStates, 0, SEEK_END)) {
    perror ("FullSet: fseek");
    abort ();
  }
  const long len = ftell (myStates);
  if (fwrite (buf, 1, size, myStates) != size) {
    perror ("FullSet: fwrite");
    abort ();
  }
#endif // USE_MMAP

  myHash->insert (h, len);
  newState ();

  assert (!myPathFile || myPathFileLength == ftell (myPathFile));
  assert (!myOffset || myOffset < myPathFileLength);

  mySearch.push_allocated (reinterpret_cast<word_t*>(len),
			   size, myPathFileLength);
  if (myPathFile) {
    class BytePacker p;
    p.append (myOffset), p.append (len), p.append (size);
    fwrite (p.getBuf (), 1, p.getLength (), myPathFile);
    myPathFileLength += p.getLength ();
  }
  return true;
}

word_t*
FullSet::getState (long pos, size_t* size) const
{
  unsigned char rbuf[12];
  class ByteUnpacker u (rbuf);
  assert (!!myPathFile);
  assert (pos < myPathFileLength);
  fseek (myPathFile, pos, SEEK_SET);
  fread (rbuf, sizeof rbuf, 1, myPathFile);
  u.extract ();
  unsigned offset = u.extract (), len = u.extract ();
  word_t* state = new word_t[len + (sizeof (word_t) - 1) / sizeof (word_t)];
#ifdef USE_MMAP
  assert (!len || ssize_t (offset + len) <= myStates.len);
  memcpy (state, static_cast<char*>(myStates.addr) + offset, len);
#else // USE_MMAP
  if (fseek (myStates, offset, SEEK_SET)) {
    perror ("FullSet: fseek");
    abort ();
  }
  fread (state, 1, len, myStates);
#endif // USE_MMAP
  *size = len;
  return state;
}

word_t*
FullSet::pop (bool tail, size_t& size)
{
  if (mySearch.empty ())
    return 0;
  else {
    unsigned long ofs = reinterpret_cast<unsigned long>
      (mySearch.pop (tail, myOffset, &size));
    return ::getState (myStates, ofs, size);
  }
}
