/** rtfn.c -- RTF (rich text format) file normalizer.
 * @author Eric Laroche
 * @version @(#)$Id: rtfn.c,v 1.7 1999/05/23 08:05:58 laroche Exp $
 */
/* rtfn -- RTF (rich text format) file normalizer.
 * Copyright (C) 1998 Eric Laroche.
 *
 * This program is free software;
 * you can redistribute it and/or modify it.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
 */

static char const rcsid[] =
	"@(#)$Id: rtfn.c,v 1.7 1999/05/23 08:05:58 laroche Exp $";

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
#include <stdarg.h>

#ifdef unix
#include <unistd.h>
#include <errno.h>
#else
#include <io.h>
#endif

static char const app[] = "rtfn";

static void	quit( char const* fmt, ... );
static void normalizefile( char const* infile );
static void normalize( FILE* fpin, FILE* fpout );

/** main -- program entry point.
 */
int main( int argc, char** argv )
{
	int i;

	/* usage */
	if ( argc > 1 && ( ! strcmp( argv[ 1 ], "-?" ) || argv[ 1 ][ 0 ] == '-' ) )
	{
		printf( "Usage: %s [files]\n", app );
		printf( "\n" );
		printf( "%s normalizes RTF files (rich text format files).\n", app );
		printf( "%s enables the reasonable use of line based file difference tools\n"
			"(diff, RCS, etc.).\n", app );
		printf( "Output files will have the last character of the file name replaced\n"
			"by 'n' (or 'm' if they end on 'n').\n" );
		printf( "\n"
			"This program is free software;\n"
			"you can redistribute it and/or modify it.\n"
			"\n"
			"This program is distributed in the hope that it will be useful,\n"
			"but WITHOUT ANY WARRANTY; without even the implied warranty of\n"
			"MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.\n" );
		return 0;
	}

	for ( i = 1; i < argc; i++ )
		normalizefile( argv[ i ] );

	if ( i == 1 )
		normalize( stdin, stdout );

	return 0;
}

/** quit -- display an error message and terminate.
 */
static void quit( char const* fmt, ... )
{
	va_list p;

	fprintf( stderr, "%s: ", app );
	va_start( p, fmt );
	vfprintf( stderr, fmt, p );
	va_end( p );

	fprintf( stderr, "\n" );
	fflush( stderr );

	exit( 1 );
}

/** normalizefile -- normalize a rtf file.
 */
static void normalizefile( char const* infile )
{
	char* outfile;
	FILE *fpin, *fpout;

	fpin = fopen( infile, "r" );
	if ( fpin == NULL )
		quit( "can't open %s: %s", infile, strerror( errno ) );

	outfile = malloc( strlen( infile ) + sizeof ( "" ) );
	if ( outfile == NULL )
		quit( "out of memory: %s", strerror( errno ) );
	strcpy( outfile, infile );
	if ( strlen( outfile ) > 0 )
		outfile[ strlen( outfile ) - 1 ] =
			(char)( outfile[ strlen( outfile ) - 1 ] == 'n' ? 'm' : 'n' );

	if ( access( outfile, 00 ) != -1 )
		quit( "output file %s already exists", outfile );

	fpout= fopen( outfile, "w" );
	if ( fpout == NULL )
		quit( "can't write %s: %s", outfile, strerror( errno ) );

	normalize( fpin, fpout );
	if ( ferror( fpin ) )
		quit( "%s: %s", infile, strerror( errno ) );
	if ( ferror( fpout ) )
		quit( "%s: %s", outfile, strerror( errno ) );

	fclose( fpout );
	free( outfile );
	fclose( fpin );
}

/** normalize -- normalize rtf file contents.
 */
static void normalize( FILE* fpin, FILE* fpout )
{
	int curr = EOF;
	int prev = EOF;
	int col = 0;
	int i;

/* helper macros 1:
 * -quit if error on output
 */
#define get( ) ( getc( fpin ) )
#define put( c ) { if ( putc( ( c ), fpout ) == EOF ) return; col++; }

/* helper macros 2:
 * -ignore input line breaks
 * -quit if end-of-file
 */
#define current( ) ( curr )
#define previous( ) ( prev )
#define freshline( ) { if ( col > 0 ) { newline( ); } }
#define newline( ) { put( '\n' ); col = 0; }
#define column( ) ( col )
#define save( ) { prev = current( ); }
#ifdef unix
#define skip( ) { while ( ( curr = get( ) ) == '\n' || curr == '\r' ) ; }
#else
#define skip( ) { while ( ( curr = get( ) ) == '\n' ) ; }
#endif
#define testeof( ) { if ( curr == EOF ) { freshline( ); return; } }
#define advance( ) { save( ); skip( ); testeof( ); }


	/* initial character */
	advance( );

	/* copy magic header "{\rtfx\x"..} (e.g. "{\rtf1\ansi"..})
	 * that is needed without line breaks
	 */
	for ( i = 0; i < 8; i++ )
	{
		put( current( ) );
		advance( );
	}

	/* body */
	for ( ; ; )
	{
		/* wrap at column 72 */
		if ( column( ) >= 72 )
			freshline( );

		switch ( current( ) )
		{

		/* check for rtf keywords (\[a-z][a-z]*) and control symbols (\[^a-z])
		 * -for simplicity and robustness we could have decided not to scan
		 * for the end of rtf keywords
		 * -we don't embed unknown control symbols for robustness (are there
		 * control symbols even larger than 4 bytes?)
		 */
		case '\\' :

			/* note that we will lose this \ if it's the last in a file,
			 * which is not the case in a correct rtf document
			 */
			advance( );

			/* check for control symbols */
			if ( ! isalpha( current( ) ) )
			{

				switch ( current( ) )
				{

				/* escaped \\, \{, \} */
				case '\\' :
				case '{' :
				case '}' :

				/* \~ (non-breaking space),
				 * \_ (non-breaking hyphen),
				 * \- (not-required hyphen)
				 */
				case '~' :
				case '_' :
				case '-' :

					/* embed in text
					 * note that this can enlarge the lines until column 73
					 */

					put( previous( ) );

					put( current( ) );
					advance( );

					break;

				/* \* */
				case '*' :

					/* isolate on a line of its own */

					freshline( );
					put( previous( ) );

					put( current( ) );
					advance( );

					freshline( );

					break;

				/* \'xx (non-ascii character) */
				case '\'' :

					/* embed in text
					 * note that this can enlarge the lines until column 75
					 */

					put( previous( ) );

					put( current( ) );
					advance( );

					put( current( ) );
					advance( );

					put( current( ) );
					advance( );

					break;

				/* other control symbols */
				default :

					/* put them on a new line to avoid breaking them
					 * by line wrapping
					 */
					freshline( );
					put( previous( ) );

					/* at least one character is part of the control symbol */
					put( current( ) );
					advance( );

					break;
				}
			}
			else
			{
				/* keyword */

				freshline( );
				put( previous( ) );

				while (
					! isspace( current( ) ) &&
					current( ) != '\\' &&
					current( ) != '{' &&
					current( ) != '}' )
				{
					put( current( ) );
					advance( );
				}

				/* blank(s) */
				while ( isspace( current( ) ) )
				{
					put( current( ) );
					advance( );
				}

				freshline( );
			}

			break;

		/* check for begin ({) and end (}) of block */
		case '{' :
		case '}' :

			freshline( );
			put( current( ) );
			advance( );

			freshline( );

			break;

		default :

			put( current( ) );
			advance( );

			break;
		}
	}
}
