/*-
 * Copyright (c) 2007 Robert N. M. Watson
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 */

/*-
 * Kernel text-dump support: implement a byte stream KPI for the purposes of
 * logging kernel debugger output.  This allows for a more compact
 * representation of critical debugging information than traditional binary
 * dumps, as well as allowing dump information to be used without access to
 * kernel symbols, source code, etc.
 *
 * TODO:
 * - Reformulate around a large fixed-size buffer, as it transpires we can't
 *   call the dumper routine from within DDB without restarting the kerenl
 *   and related unhappiness.  Then modify the dumping code in panic() to
 *   call our own dumpsys() rather than the standard one if there's a pending
 *   text dump.
 * - Modify start/finish routines to allow use to set and suspend logging to
 *   the buffer.  This would allow scripts to append sequentially to the
 *   buffer as they run, say on a series of breakpoints.
 * - Provide access to the buffer from sysctl.
 * - Include a copy of the message buffer when writing out the text dump, and
 *   teach savecore to extract them to separate files.
 */

#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");

#include <sys/param.h>
#include <sys/conf.h>
#include <sys/kernel.h>
#include <sys/kerneldump.h>
#include <sys/systm.h>

#include <ddb/ddb.h>

#define	TEXTDUMP_BLOCKSIZE	512

CTASSERT(sizeof(struct kerneldumpheader) == TEXTDUMP_BLOCKSIZE);

/* XXXRW: DEBUG */
static int blockcounter;
static off_t blocks[100];
static int blockcountermax = 100;

/*
 * Don't touch the first SIZEOF_METADATA bytes on the dump device. This
 * is to protect us from metadata and to protect metadata from us.
 */
#define	SIZEOF_METADATA		(64*1024)

/*-
 * Crash dumps are aligned to the end of the dump or swawp partition in order
 * to minimize the chances of swap duing fsck eating into the dump.  However,
 * this requires knowing at the start of writing the dump how large the dump
 * will be, in order to calculate the start address.  We are faced with four
 * options: relocate the dump to the front of the partition, buffer all
 * output, write the blocks "backwards", and set a fixed maximum size and
 * teach savecore(8) how to terminate early.  We pick the last because it
 * represents the minimum change to layout without statically allocating
 * large buffers to DDB(4).  Any attempt to generate more data than the max
 * size will result in truncation in the dump.  To mark the termination point,
 * we will pad out any last block with ASCII nul's, and then write a complete
 * block of nul's.
 *
 * This means that the layout is something like this:
 *
 *   SIZEOF_METADATA ... hdr1 textdump nuls ... hdr2
 *
 * sizeof(textdump) + sizeof(nuls) must be <= TEXTDUMP_MAXSIZE.
 */
#define	TEXTDUMP_MAXSIZE	(2 * 1024 * 1024)

/*
 * Text dumps use a single statically sized buffer; we carry various
 * persisting bits of state during a particular dump session, such as the
 * current offset into the buffer, offset on disk, bytes written, and an
 * error, if any.
 */
static char td_buffer[TEXTDUMP_BLOCKSIZE]; /* Statically allocated buffer. */
static u_int td_bufoff;			/* Current data in buffer. */
static off_t td_written;		/* Total bytes written so far. */
static off_t td_offset;			/* Offset of next sequential write. */
static int td_started;			/* Is a text dump in progress? */
static int td_error;			/* Write error, if any. */
static struct kerneldumpheader kdh;

static struct dumperinfo *di;		/* Local cache of dumper pointer. */

/*
 * Text dumps are prefixed with a normal kernel dump header but with a
 * different magic number to allow them to be uniquely identified.
 */
static void
mkdumpheader(struct kerneldumpheader *kdh, uint32_t archver, uint64_t dumplen,
    uint32_t blksz)
{

	bzero(kdh, sizeof(*kdh));
	strncpy(kdh->magic, TEXTDUMPMAGIC, sizeof(kdh->magic));
	strncpy(kdh->architecture, MACHINE_ARCH, sizeof(kdh->architecture));
	kdh->version = htod32(KERNELDUMPVERSION);
	kdh->architectureversion = htod32(archver);
	kdh->dumplength = htod64(dumplen);
	kdh->dumptime = htod64(time_second);
	kdh->blocksize = htod32(blksz);
	strncpy(kdh->hostname, hostname, sizeof(kdh->hostname));
	strncpy(kdh->versionstring, version, sizeof(kdh->versionstring));
	if (panicstr != NULL)
		strncpy(kdh->panicstring, panicstr, sizeof(kdh->panicstring));
	kdh->parity = kerneldump_parity(kdh);
}

/*
 * textdump_writeblock() writes TEXTDUMP_BLOCKSIZE-sized blocks of data to
 * the space between di->mediaoffset and di->mediaoffset + di->mediasize.  It
 * accepts an offset relative to di->mediaoffset.
 */
static int
textdump_writeblock(u_int offset, char *buffer)
{
	int error;

	if (offset + TEXTDUMP_BLOCKSIZE > di->mediasize)
		return (ENOSPC);
	if (offset < SIZEOF_METADATA)
		return (EIO);

#if 0
	/* XXXRW: DEBUG */
	error = di->dumper(di->priv, buffer, 0, offset + di->mediaoffset,
	    TEXTDUMP_BLOCKSIZE);
#endif
	error = 0;
	td_written += TEXTDUMP_BLOCKSIZE;

	/* XXXRW: DEBUG */
	if (blockcounter < blockcountermax) {
		blocks[blockcounter] = offset;
		blockcounter++;
	}

	return (error);
}

static int
textdump_writenextblock(char *buffer)
{
	int error;

	error = textdump_writeblock(td_offset, buffer);
	td_offset += TEXTDUMP_BLOCKSIZE;
	return (error);
}

/*
 * Start a new textdump.  Only one session is allowed at any time.
 */
int
textdump_start(void)
{
	int error;

	if (td_started) {
		db_printf("Textdump already started\n");
		return (EINPROGRESS);
	}

	if (di == NULL) {
		db_printf("No dumper defined\n");
		return (ENODEV);
	}

	/* XXXRW: DEBUG */
	printf("Partition block size %ju offset %ju size %ju\n",
	    (uintmax_t)di->blocksize, (uintmax_t)di->mediaoffset,
	    (uintmax_t)di->mediasize);

	if (di->blocksize != TEXTDUMP_BLOCKSIZE) {
		db_printf("Dump partition block size (%ju) not textdump "
		    "block size (%ju)", (uintmax_t)di->blocksize,
		    (uintmax_t)TEXTDUMP_BLOCKSIZE);
		return (ENODEV);
	}

	/*
	 * Determinate size and calculate starting offset of dump.  We will
	 * write headers just before and just after the dump space.
	 */
	if (di->mediasize < SIZEOF_METADATA + TEXTDUMP_MAXSIZE + sizeof(kdh)
	    * 2) {
		db_printf("Insufficient space on dump partition.\n");
		db_printf("%ju found, %ju required\n", di->mediasize,
		    (uintmax_t) SIZEOF_METADATA + TEXTDUMP_MAXSIZE +
		    sizeof(kdh) * 2);
		return (ENOSPC);
	}

	blockcounter = 0;
	td_offset = di->mediasize - TEXTDUMP_MAXSIZE - (sizeof(kdh) * 2);

	td_error = 0;
	td_started = 1;
	td_bufoff = 0;
	td_written = 0;

	mkdumpheader(&kdh, KERNELDUMP_TEXT_VERSION, TEXTDUMP_MAXSIZE,
	    TEXTDUMP_BLOCKSIZE);
	error = textdump_writenextblock((char *)&kdh);
	if (error) {
		td_started = 0;
		db_printf("textdump_start: error %d\n", error);
		td_error = error;
	}
	return (error);
}

/*
 * We don't want to call db_printf() during a dump, or it might lead to
 * recursion.  We don't print a message when not in a dump, as the write and
 * writech calls will be made continuously by DDB's output routines.  Save
 * any error until the end, and don't continue once an error has been
 * generated.
 *
 * XXXRW: Should we have db_textdump_write/writech that don't print if not
 * started, but normal versions that do in case anyone else wants to use the
 * textdump facility than DDB?
 */
int
textdump_write(char *buffer, u_int buflen)
{
	u_int len, sofar;
	int error;

	/*
	 * It is normal for textdump_write() to be called when a textdump is
	 * not started, so no diagnostic printf here.
	 */
	if (td_started == 0 || td_started > 1)
		return (EBADF);

	if (td_error)
		return (td_error);

	error = 0;
	sofar = 0;
	while (sofar < buflen) {
		len = min(buflen - sofar, TEXTDUMP_BLOCKSIZE - td_bufoff);
		bcopy(buffer + sofar, td_buffer + td_bufoff, len);
		td_bufoff += len;
		sofar += len;
		if (TEXTDUMP_BLOCKSIZE - td_bufoff == 0) {
			error = textdump_writenextblock(td_buffer);
			td_bufoff = 0;
			if (error)
				break;
		}
	}

	return (error);
}

int
textdump_writech(char ch)
{

	return (textdump_write(&ch, sizeof(ch)));
}

/*
 * If there are any stray bytes left in fhe buffer, pad the remainder of
 * block with 0's and flush.  Then write a complete block of 0's as the
 * termination.
 *
 * savecore(8) can strip all trailing 0's off the end of the file for user
 * presentation.
 */
static int
textdump_flushblock(void)
{

	if (td_bufoff) {
		bzero(td_buffer + td_bufoff, TEXTDUMP_BLOCKSIZE - td_bufoff);
		td_bufoff = 0;
		td_error = textdump_writenextblock(td_buffer);
	}
	if (td_error == 0) {
		bzero(td_buffer, TEXTDUMP_BLOCKSIZE);
		td_error = textdump_writenextblock(td_buffer);
	}
	return (0);
}

int
textdump_finish(void)
{
	int i;

	if (!td_started) {
		db_printf("textdump_finish: not started\n");
		return (EBADF);
	}

	/*
	 * Flush any partial blocks at the end of the text stream.
	 */
	if (td_error == 0)
		td_error = textdump_flushblock();

	/*
	 * If no errors so far, write out the trailer.
	 */
	if (td_error == 0)
		td_error = textdump_writenextblock((char *)&kdh);

	/*
	 * XXXRW: It is odd that di->dumper() is called to terminate a dump
	 * only if the dump succeeds, but that is what dumpsys() appears to
	 * do.
	 */
#if 0
	/* XXXRW: DEBUG */
	if (td_error == 0)
		(void)di->dumper(di->priv, NULL, 0, 0, 0);
#endif

	/*
	 * Wrap up with some stats.
	 */
	td_started = 0;
	if (td_error)
		db_printf("textdump_finish: error %d\n", td_error);
	db_printf("text dump summary: offset: %jd written: %jd\n",
	    (intmax_t)td_offset, (intmax_t)td_written);
	/* XXXRW: DEBUG */
	for (i = 0; i < blockcounter; i++)
		db_printf("  block %jd\n", (intmax_t)blocks[i]);
	return (0);
}

void
textdump_setdumper(struct dumperinfo *di_arg)
{

	di = di_arg;
}
