/* ccomment.c -- transform C++ programming language comments to C comments
 *
 * Author: Eric Laroche
 *
 *   This program is distributed in the hope that it will be useful,
 *   but WITHOUT ANY WARRANTY; without even the implied warranty of
 *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
 *
 * 'ccomment' uses a simple, compact C programming language parser,
 * which is implemented in C (not lex or yacc) as a state engine
 * (10 * 7 matrix and accessor function).  The focus was to keep it compact.
 *
 * Known limitations.
 * - Trigraphs are not handled.
 *
 * Performance.
 * - Most functions (especially the ones that are called for each input
 *   character) have been implemented as macros.
 * - A cache array is used for the transitions.
 */

static char const rcsid[] =
	"@(#) $Id: ccomment.c,v 1.4 1997/01/26 14:07:22 laroche Exp $";

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

/* use the C++ comment able parser */
#define CPP_COMMENTS

/* handled states */
enum states {
	code = 0, precomment, comment, postcomment,
	literal, literalquote, string, stringquote,
#ifdef CPP_COMMENTS
	cppcomment, cppcommentquote,
#endif
};

/* initial state */
static enum states const startstate = code;

/* characters that trigger transitions */
static char const transitions[] =
	{ '/', '*', '\'', '"', '\\', '\n', };

/* transition table */
static enum states const parser[][
	sizeof ( transitions ) / sizeof ( transitions[ 0 ] ) + 1 ] = {
	/* from code */ { precomment, code, literal,
		string, code, code, code, },
	/* from precomment */ {
#ifdef CPP_COMMENTS
		cppcomment,
#else
		precomment,
#endif
		comment, literal,
		string, code, code, code, },
	/* from comment */ { comment, postcomment, comment,
		comment, comment, comment, comment, },
	/* from postcomment */ { code, postcomment, comment,
		comment, comment, comment, comment, },
	/* from literal */ { literal, literal, code,
		literal, literalquote, literal, literal, },
	/* from literalquote */ { literal, literal, literal,
		literal, literal, literal, literal, },
	/* from string */ { string, string, string,
		code, stringquote, string, string, },
	/* from stringquote */ { string, string, string,
		string, string, string, string, },
#ifdef CPP_COMMENTS
	/* from cppcomment */ { cppcomment, cppcomment, cppcomment,
		cppcomment, cppcommentquote, code, cppcomment, },
	/* from cppcommentquote */ { cppcomment, cppcomment, cppcomment,
		cppcomment, cppcomment, cppcomment, cppcomment, },
#endif
};

/* cache for the transitions */
static int transitioncache[ 1 << 8 ] = { 0, };
static void inittransitioncache( void )
{
	int i;
	/* first set all to default cases */
	for ( i = 0; i < sizeof ( transitioncache ) /
		sizeof ( transitioncache[ 0 ] ); i++ )
		transitioncache[ i ] = sizeof ( transitions ) /
			sizeof ( transitions[ 0 ] );
	/* then digest 'transitions' */
	for ( i = 0; i < sizeof ( transitions ) /
		sizeof ( transitions[ 0 ] ); i++ )
		transitioncache[ (unsigned char)transitions[ i ] ] = i;
}
#define transitionindex( c ) ( transitioncache[ 0 ] != 0 ? \
	transitioncache[ (unsigned char)( c ) ] : \
	( inittransitioncache( ), transitioncache[ (unsigned char)( c ) ] ) )

/* nextstate -- get the next state */
#define nextstate( state, c ) ( parser[ ( state ) ][ \
	transitionindex( ( c ) ) ] )

/* main -- program entry point */
int main( int argc, char** argv )
{
	enum states state, laststate;
	int c, lastc;

	/* usage */
	if ( argc != 1 )
	{
		fprintf( stderr, "usage: %s <c-source\n", argv[ 0 ] );
		exit( 1 );
	}

	state = startstate;
	for ( lastc = EOF; ( c = getchar( ) ) != EOF; lastc = c )
	{
		laststate = state;
		state = nextstate( state, c );

		/* replace starting C++ comments through starting C comments */
		if ( laststate == precomment && state == cppcomment )
		{
			putchar( '*' );
			continue;
		}

		/* change ending C comments inside C++ comment */
		if ( state == cppcomment && lastc == '*' && c == '/' )
			putchar( ' ' );

		/* finish C comment */
		if ( laststate == cppcomment && state == code )
		{
			putchar( ' ' );
			putchar( '*' );
			putchar( '/' );
		}

		putchar( c );
	}

	if ( ferror( stdin ) )
		perror( argv[ 0 ] );

	/* clean up */
	if ( state == cppcomment )
	{
		putchar( ' ' );
		putchar( '*' );
		putchar( '/' );
	}

	return 0;
}

/* Simple (C++ comment able) C source code parser
 * [use a fixed font to view this]
 *
 *          +-------------------+                                      *
 *          |  cppcommentquote  |                                      *
 *          +-------------------+                                      *
 *                         ^  |                                        *
 *            +----+    '/'|  |           +----+                       *
 *            |    v       |  v           |    v                       *
 *          +-------------------+ '\n'  +-------------------+          *
 *          |    cppcomment     |--+    |      comment      |          *
 *          +-------------------+  |    +-------------------+          *
 *                         ^       |      ^            ^  |            *
 *                      '/'|  +---------- +    +----+  |  |'*'         *
 *                         |  |'*' |        '*'|    v  |  v            *
 *          +-------------------+  |    +-------------------+          *
 *          |    precomment     |  |    |    postcomment    |          *
 *          +-------------------+  |    +-------------------+          *
 *               |  |       ^  |   |        |                          *
 *           '\''|  |'"' '/'|  |   | +----+ |'/'                       *
 *               |  |       |  v   v |    v v                          *
 *               |  |      +-------------------+                       *
 *               |  |      |       code        |                       *
 *               |  |      +-------------------+                       *
 *               |  |       ^  |'\''     ^  |'"'                       *
 *               |  +------ |  | ------- |  | --+                      *
 *               v      '\''|  v      '"'|  v   v                      *
 *          +-------------------+       +-------------------+          *
 *          |      literal      |       |      string       |          *
 *          +-------------------+       +-------------------+          *
 *            ^    |        ^  |         ^  |      ^    |              *
 *            +----+        |  |'\\'     |  |'\\'  +----+              *
 *                          |  v         |  v                          *
 *          +-------------------+       +-------------------+          *
 *          |   literalquote    |       |    stringquote    |          *
 *          +-------------------+       +-------------------+          *
 */

