
          /***********************************************************\
          **                                                         **
          **                                                         **
          **       FILE:        DIFF.C                               **
          **                                                         **
          **       VERSION:     V1.10                                **
          **                                                         **
          **       LAST UPDATE: 13:55:19  21 July 1989               **
          **                                                         **
          **       Author: Heinz Zid,                                **
          **               DIGITAL VIDEO SYSTEMS GROUP               **
          **                                                         **
          **       Copyright (c) Cableshare Inc. 1988, 1989          **
          **       All rights reserved.                              **
          **                                                         **
          ** * CONFIDENTIAL * Company Secret Proprietary Information **
          **                                                         **
          **                                                         **
          \***********************************************************/


 /*****************************************************************************\
 **                                                                           **
 **                         REVISION HISTORY:                                 **
 **                         ================                                  **
 **  DATE         VERSION   WHO                  DESCRIPTION                  **
 **  ----         -------   ---                  -----------                  **
 **                                                                           **
 **                                                                           **
 **                                                                           **
 \*****************************************************************************/

#define VERSION     "V1.10"

#if defined(RMX)
    #include <i286.h>
    #include <rmx.h>
#endif

#include <stdio.h>
#include <io.h>
#include <ctype.h>
#include <string.h>
#include <stdlib.h>
#include <fcntl.h>


#if !defined(RMX)
    /************************/
    /* Mainline exit codes. */
    /************************/
    #define E_OK          0
    #define E_PARAM       0x8004
    #define E_FACCESS     0x0026
    #define E_MEM         0x0002
    #define E_STATE       0x0007
#endif


/****************************/
/* Boolean type and values. */
/****************************/
typedef int    BOOLEAN;
#define TRUE   1
#define FALSE  0

/****************************/
/* LOCAL and PUBLIC def'ns. */
/****************************/
#define LOCAL  static
#define PUBLIC

/******************/
/* Minimum macro. */
/******************/
#if !defined(min)
    #define min(x, y)   ( ( (x) <= (y) ) ? (x) : (y) )
#endif

/****************************************************/
/* Value to indicate identical strings with strcmp. */
/****************************************************/
#define ALIKE   0

/***************************************/
/* Define the option leadin character. */
/***************************************/
#if defined(RMX)
    #define SWITCH_CHAR     '-'
    #define ALT_SWITCH_CHAR '-'
#else
    #define SWITCH_CHAR     '-'
    #define ALT_SWITCH_CHAR '/'
#endif

/**************************************************/
/* Result of hashing function for a line of text. */
/**************************************************/
typedef unsigned int  HASH;

/*****************************************************/
/* Mask for number of bits in hash code.  (12 bits). */
/*****************************************************/
#define MASK  (unsigned int) 0x0FFF

/**********************************/
/* Number of possible hash codes. */
/**********************************/
#define HASHSIZ  (MASK + 1)

/*************************************************/
/* Information about an entry in the hash table. */
/*************************************************/
typedef struct tblentry
{
    int  frst;               /* First line # with this hash code. */
    int  last;               /* Last line # with this hash code.  */
} TBLENTRY;

/*************************************/
/* Information about a line of text. */
/*************************************/
typedef struct lineinf
{
    HASH  hash;              /* Hash code value. */
    int   nxtln;             /* Next line with same hash (or 0). */
} LINEINF;

/*****************************/
/* Information about a file. */
/*****************************/
typedef struct fileinf
{
    char      **txt;        /* Array of lines of text. */
    LINEINF    *line;       /* Array of line info structs. */
    TBLENTRY   *hashtbl;    /* Hash table. */
} FILEINF;

/**************************/
/* Function declarations. */
/**************************/
PUBLIC int     main           ( int argc, char *argv[] );
LOCAL  BOOLEAN isblankline    ( register char *line_ptr );
LOCAL  BOOLEAN get_inf        ( char **line_array_ptr, int nr_lines, FILEINF *file_info) ;
LOCAL  HASH    calc_hash      ( char *line_ptr );
LOCAL  void    fnd_seq        ( FILEINF *file1_info, int begin1, int end1,
                                FILEINF *file2_info, int begin2, int end2, int match_length );
LOCAL  BOOLEAN chk_hashes     ( LINEINF *line1_info, LINEINF *line2_info, int count );
LOCAL  int     cnt_matches    ( char **line1_array_ptr, char **line2_array_ptr, int most );
LOCAL  void    report         ( int begin_line1, int begin_line2, int nr_lines_matched );
LOCAL  void    usage          ( void );
LOCAL  void    no_memory      ( void );
LOCAL  void    squeeze_ws     ( char *line );
LOCAL  int     binary_compare ( char *file1_txt, char *file2_txt );
LOCAL  BOOLEAN compare        ( char **a1, int n1, char **a2, int n2, int lngval );
LOCAL  void    print_invocation_line( int main_argc, char *main_argv[] );


/*********************************/
/* Static variable declarations. */
/*********************************/
LOCAL  int   old_e1;            /* Previous match end line for file 1.    */
LOCAL  int   old_e2;            /* Previous match end line for file 2.    */
LOCAL  char *line_buffer_ptr;   /* Pointer to the file line buffer.       */
LOCAL  int   max_line_length;   /* The maximum line length in both files. */
LOCAL  FILE *fptr_1;            /* File pointer for filename_1.           */
LOCAL  FILE *fptr_2;            /* File pointer for filename_2.           */
LOCAL  char *file_1;            /* Name of file 1.                        */
LOCAL  char *file_2;            /* Name of file 2.                        */
LOCAL  int     f1_line_count;   /* Just what the name implies.            */
LOCAL  int     f2_line_count;   /* Just what the name implies.            */

LOCAL  BOOLEAN report_invoked;  /* Set to TRUE upon the first invocation. */
LOCAL  BOOLEAN report_matches;
LOCAL  BOOLEAN skip_blank_lines;


/*============================================================================*/
/*============================================================================*/

/**************************************************************
** The mainline parses the command line and preprocesses the **
** text in the files according to the options specified.     **
**************************************************************/

PUBLIC int  main( int argc, char *argv[] )
{
    BOOLEAN skip_leading_ws;
    BOOLEAN skip_trailing_ws;
    BOOLEAN ignore_case;
    BOOLEAN ignore_multiple_ws;
    BOOLEAN bin_compare;
    int     match_length;       /* The "long enough" value: defaults to 25 */
    int     i;                  /* The ever popular loop counter.          */
    int     j;                  /* Generic counter.                        */
    int     file_count;         /* The number of files specified thus far. */
    char  **f1_lines;           /* Pointer to array of lines for file 1.   */
    char  **f2_lines;           /* Pointer to array of lines for file 2.   */
    char  **c_ptr2;             /* Pointer to the above arrays.            */
    char   *c_ptr;              /* Generic character pointer.              */
    char   *ch_ptr;             /* Generic character pointer.              */


    if ( argc < 2 )
    {
        usage();
        exit (E_PARAM);
    }


    /**************************************/
    /* Initialisation and default values. */
    /**************************************/

    fptr_2             = stdin;
    match_length       = 25;
    file_count         = 0;
    max_line_length    = 132;
    line_buffer_ptr    = NULL;
    file_1             = NULL;
    file_2             = NULL;
    old_e1             = 1;
    old_e2             = 1;
    skip_leading_ws    = FALSE;
    skip_trailing_ws   = FALSE;
    ignore_case        = FALSE;
    ignore_multiple_ws = FALSE;
    report_matches     = FALSE;
    skip_blank_lines   = FALSE;
    bin_compare        = FALSE;
    report_invoked     = FALSE;


    /*****************************************************/
    /* Parse the command line for options and filenames. */
    /*****************************************************/

    for ( i = 1; i < argc; i++ )
    {
        strupr( argv[i] );
        if ( *argv[i] == SWITCH_CHAR || *argv[i] == ALT_SWITCH_CHAR )
        {
            for ( j = 1; argv[i][j] != '\0'; j++ )
            {
                switch ( argv[i][j] )
                {
                    case ( 'A' ) :
                        skip_blank_lines = TRUE;
                        break;


                    case ( 'B' ) :
                        bin_compare = TRUE;
                        break;


                    case ( 'C' ) :
                        ignore_case = TRUE;
                        break;


                    case ( 'E' ) :
                        report_matches = TRUE;
                        break;


                    case ( 'F' ) :
                        j++;
                        switch ( ++file_count )
                        {
                            case ( 1 ) :
                                fptr_1 = fopen( file_1 = &argv[i][j], "r" );
                                if ( fptr_1 == NULL )
                                {
                                    fprintf( stderr, "DIFF ERROR (%d) opening file \'%s\'\n", __LINE__, &argv[i][j] );
                                    exit (E_FACCESS);
                                }
                                break;


                            case ( 2 ) :
                                fptr_2 = fopen( file_2 = &argv[i][j], "r" );
                                if ( fptr_2 == NULL )
                                {
                                    fprintf( stderr, "DIFF ERROR (%d) opening file \'%s\'\n", __LINE__, &argv[i][j] );
                                    exit (E_FACCESS);
                                }
                                break;


                            default :
                                fprintf( stderr, "DIFF ERROR: Too many files specified.\n" );
                                usage();
                                exit (E_PARAM);
                                break;
                        }

                        j += (strlen( &argv[i][j] ) - 1);
                        break;


                    case ( 'H' ) :
                        usage();
                        exit (E_OK);
                        break;


                    case ( 'L' ) :
                        sscanf( &argv[1][++j], "%d", &max_line_length );
                        max_line_length += 2;
                        line_buffer_ptr = malloc( max_line_length );

                        if ( line_buffer_ptr == NULL )
                            no_memory();

                        while ( !isdigit( argv[i][j] ) )
                            j++;

                        j--;
                        break;


                    case ( 'M' ) :
                        sscanf( &argv[i][++j], "%d", &match_length );

                        while ( isdigit( argv[i][j] ) )
                            j++;

                        j--;
                        break;


                    case ( 'S' ) :
                        skip_leading_ws = TRUE;
                        break;


                    case ( 'T' ) :
                        skip_trailing_ws = TRUE;
                        break;


                    case ( 'W' ) :
                        ignore_multiple_ws = TRUE;
                        break;


                    default :
                    {
                        int save, length;

                        save = i;
                        fprintf( stderr, "DIFF ERROR: Invalid syntax:\n" );

                        for ( length = j + save, i = 0; i < argc; i++ )
                        {
                            fprintf( stderr, "%s ", argv[i] );
                            if ( i < save )
                                length += strlen( argv[i] );
                        }

                        fprintf( stderr, "\n" );
                        for ( ; length; length-- )
                            fprintf( stderr, "-" );

                        fprintf( stderr, "^" );
                        usage();
                        exit (E_PARAM);
                    }

                    break;

                }
            }
        }

        else
        {
            /******************************************/
            /* No switch character : Assume filespec. */
            /******************************************/

            switch ( ++file_count )
            {
                case ( 1 ) :
                    fptr_1 = fopen( file_1 = argv[i], "r" );
                    if ( fptr_1 == NULL )
                    {
                        fprintf( stderr, "DIFF ERROR (%d) opening file \'%s\'\n", __LINE__, argv[i] );
                        exit (E_FACCESS);
                    }
                    break;


                case ( 2 ) :
                    fptr_2 = fopen( file_2 = argv[i], "r" );
                    if ( fptr_2 == NULL )
                    {
                        fprintf( stderr, "DIFF ERROR (%d) opening file \'%s\'\n", __LINE__, argv[i] );
                        exit (E_FACCESS);
                    }
                    break;


                default :
                    fprintf( stderr, "DIFF ERROR: Too many files specified or invalid option.\n" );
                    usage();
                    exit (E_PARAM);
                    break;
            }
        }
    }


    /*************************************************/
    /* Make sure that two files have been specified. */
    /*************************************************/

    if ( file_count != 2 )
    {
        fprintf( stderr, "DIFF ERROR: Must specify two files.\n" );
        exit (E_PARAM);
    }


    if ( bin_compare )
    {
        print_invocation_line( argc, argv );
        fclose( fptr_1 );
        fclose( fptr_2 );
        exit ( binary_compare( file_1, file_2 ) );
    }


    /**************************************************************/
    /* Check if the max_line_length parameter has been specified. */
    /* If it has not been specified, then allocate the default.   */
    /**************************************************************/

    if ( line_buffer_ptr == NULL )
    {
        line_buffer_ptr = malloc( max_line_length );

        if ( line_buffer_ptr == NULL )
            no_memory();
    }


    /*****************************************************/
    /* Count the number of lines in each specified file. */
    /*****************************************************/

    for ( f1_line_count = 0; f1_line_count >= 0; f1_line_count++ )
    {
        if ( fgets( line_buffer_ptr, max_line_length, fptr_1 ) == NULL )
            break;
    }

    if ( f1_line_count < 0 )     /* Integer overflow test. */
    {
        fprintf( stderr, "DIFF ERROR: (%d) Integer arithmetic overflow.\n", __LINE__ );
        exit (E_STATE);
    }


    for ( f2_line_count = 0; f2_line_count >= 0; f2_line_count++ )
    {
        if ( fgets( line_buffer_ptr, max_line_length, fptr_2 ) == NULL )
            break;
    }


    if ( f2_line_count < 0 )     /* Integer overflow test. */
    {
        fprintf( stderr, "DIFF ERROR (%d) Integer arithmetic overflow.\n", __LINE__ );
        exit (E_STATE);
    }



    /*********************************************/
    /* Allocate memory for the file line arrays. */
    /*********************************************/

    f1_lines = (char **)malloc( (f1_line_count + 1) * sizeof(char *) );
    f2_lines = (char **)malloc( (f2_line_count + 1) * sizeof(char *) );

    if ( f1_lines == NULL || f2_lines == NULL )
        no_memory();

    if ( fseek( fptr_1, 0L, 0 ) || fseek( fptr_2, 0L, 0 ) )
    {
        fprintf( stderr, "DIFF ERROR (%d) seeking to beginning of file.\n", __LINE__ );
        exit (E_STATE);
    }


    /*******************************/
    /* Build the file line arrays. */
    /*******************************/

    for ( i = 1, c_ptr2 = &f1_lines[1]; i <= f1_line_count; i++ )
    {
        if ( fgets( line_buffer_ptr, max_line_length, fptr_1 ) == NULL )
        {
            fprintf( stderr, "DIFF ERROR (%d) reading file.\n", __LINE__ );
            exit (E_STATE);
        }

        c_ptr = line_buffer_ptr;

        if ( ignore_case )
            strupr( line_buffer_ptr );

        if ( skip_leading_ws )
        {
            while ( isspace( *c_ptr ) )
                c_ptr++;
        }

        if ( skip_trailing_ws )
        {
            j = strlen( line_buffer_ptr ) - 1;
            for ( ch_ptr = line_buffer_ptr + j; j >= 0; j--, ch_ptr-- )
            {
                if ( !isspace( *ch_ptr ) )
                    break;
            }

            if ( j >= 0 )
            {
                *++ch_ptr = '\n';
                *++ch_ptr = '\0';
            }
        }

        if ( ignore_multiple_ws )
            squeeze_ws( c_ptr );


        *c_ptr2 = malloc( strlen( c_ptr ) + 1 );
        if ( *c_ptr2 == NULL )
            no_memory();

        strcpy( *c_ptr2, c_ptr );
        c_ptr2++;
    }


    for ( i = 1, c_ptr2 = &f2_lines[1]; i <= f2_line_count; i++ )
    {
        if ( fgets( line_buffer_ptr, max_line_length, fptr_2 ) == NULL )
        {
            fprintf( stderr, "DIFF ERROR (%d) reading file.\n", __LINE__ );
            exit (E_STATE);
        }

        c_ptr = line_buffer_ptr;

        if ( ignore_case )
            strupr( line_buffer_ptr );

        if ( skip_leading_ws )
        {
            while ( isspace( *c_ptr ) )
                c_ptr++;
        }

        if ( skip_trailing_ws )
        {
            j = strlen( line_buffer_ptr ) - 1;
            for ( ch_ptr = line_buffer_ptr + j; j >= 0; j--, ch_ptr-- )
            {
                if ( !isspace( *ch_ptr ) )
                    break;
            }

            if ( j >= 0 )
            {
                *++ch_ptr = '\n';
                *++ch_ptr = '\0';
            }
        }

        if ( ignore_multiple_ws )
            squeeze_ws( c_ptr );


        *c_ptr2 = malloc( strlen( c_ptr ) + 1 );
        if ( *c_ptr2 == NULL )
            no_memory();

        strcpy( *c_ptr2, c_ptr );
        c_ptr2++;
    }


    /************************************************************************/
    /* Seek to the beginning of both files. (This is required by report()). */
    /************************************************************************/

    if ( fseek( fptr_1, 0L, 0 ) || fseek( fptr_2, 0L, 0 ) )
    {
        fprintf( stderr, "DIFF ERROR (%d) seeking to beginning of file.\n", __LINE__);
        exit (E_STATE);
    }

    print_invocation_line( argc, argv );
    if ( compare( f1_lines, f1_line_count, f2_lines, f2_line_count, match_length ) )
        exit (E_OK);
    else
        no_memory();
    
    return (E_OK);
}


/*============================================================================*/

/**********************************************************************
** no_memory() simply reports a shortage of memory and exits with an **
** error code of E_MEM. Note that exit() closes all open files.      **
**********************************************************************/

LOCAL void no_memory( void )
{
    fprintf( stderr, "DIFF ERROR: (%d) Insufficent memory.\n", __LINE__ );
    exit (E_MEM);
}


/*============================================================================*/

/************************************************
** usage() simply explains the command syntax. **
************************************************/

LOCAL void usage( void )
{
    printf( "\nFile comparison utility, %s\n", VERSION );
    printf( "Authors: Heinz Zid and Tom Steppe.\n" );
    printf( "USAGE: diff [%ca %cb %cc %ce %ch %clx %cmx %cs %ct %cw] <[%cf]FILESPEC1> <[%cf]FILESPEC2>\n\n",
                      SWITCH_CHAR, SWITCH_CHAR, SWITCH_CHAR, SWITCH_CHAR, SWITCH_CHAR, SWITCH_CHAR,
                      SWITCH_CHAR, SWITCH_CHAR, SWITCH_CHAR, SWITCH_CHAR, SWITCH_CHAR, SWITCH_CHAR );
    printf( "       %ca   - ignore blank lines; defaults to FALSE. Ignored with %ce option.\n", SWITCH_CHAR, SWITCH_CHAR );
    printf( "       %cb   - binary compare; defaults to FALSE. All other options are ignored.\n", SWITCH_CHAR );
    printf( "       %cc   - ignore case; defaults to FALSE.\n", SWITCH_CHAR );
    printf( "       %ce   - report matches as opposed to differences.\n", SWITCH_CHAR );
    printf( "       %cfx  - specifies file \"x\".\n", SWITCH_CHAR );
    printf( "       %ch   - help: prints this text.\n", SWITCH_CHAR );
    printf( "       %clx  - x >= maximum line length; defaults to 132.\n", SWITCH_CHAR );
    printf( "       %cmx  - x > line length of longest non-unique block; defaults to 25.\n", SWITCH_CHAR );
    printf( "       %cs   - skip leading  white space; defaults to FALSE.\n", SWITCH_CHAR );
    printf( "       %ct   - skip trailing white space; defaults to FALSE.\n", SWITCH_CHAR );
    printf( "       %cw   - consecutive white space == 1 blank; defaults to FALSE.\n", SWITCH_CHAR );

    if ( SWITCH_CHAR != ALT_SWITCH_CHAR )
        printf( "Valid switch characters are \'%c\' and \'%c\'\n",
                          SWITCH_CHAR, ALT_SWITCH_CHAR );

    printf( "Options may be concatenated without intervening switch characters. e.g. -astw\n" );
    printf( "Options may appear in any order; Filespecs need not appear at the end.\n" );
    printf( "If a filespec begins with a valid switch character,\n" );
    printf( "then preceed the filespec with the \'f\' option.\n" );
}


/*============================================================================*/

/************************************************************\
** Print the invocation line if stdout has been redirected. **
\************************************************************/

LOCAL void print_invocation_line( int argc, char *argv[] )
{
    int  i;

#   if defined(RMX)
        if ( !isatty( stdout->_fd ) )
#   else
        if ( !isatty( stdout->_file ) )
#   endif
    {

        printf( "\n\nDIFF %s\n", VERSION );

        for ( i = 0; i < argc; i++ )
            printf( "%s ", argv[i] );

        printf( "\nAuthors: Heinz Zid and Tom Steppe.\n\n\n" );
    }
}


/*============================================================================*/

/**********************************************************\
** squeeze_ws() compresses white space in a line of text. **
\**********************************************************/

LOCAL void squeeze_ws( char *c_ptr )
{
    char *d_ptr;   /* destination pointer */
    char *s_ptr;   /* source      pointer */

    for ( d_ptr = c_ptr; *d_ptr; d_ptr++ )
    {
        if ( isspace( *d_ptr ) )
        {
            *d_ptr = ' ';

            if ( isspace( d_ptr[1] ) )
            {
                /*************************************************/
                /* We have two or more consecutive white spaces. */
                /* Find the next non-white space before the end. */
                /*************************************************/

                for ( s_ptr = ++d_ptr; isspace( *s_ptr ) && *s_ptr; s_ptr++ )
                    ;

                if ( *s_ptr == '\0' )
                {
                    if ( *(d_ptr + 1) != '\0' )
                        *d_ptr = '\0';

                    return;
                }

                strcpy( d_ptr, s_ptr );
            }
        }
    }
}


/*============================================================================*/

/********************************************************************\
** isblankline() returns TRUE if the line contains only whitespace. **
\********************************************************************/

LOCAL BOOLEAN isblankline( register char *line_ptr )
{
    for (; *line_ptr && isspace( *line_ptr ); line_ptr++ )
        ;

    return ( *line_ptr == '\0' );
}


/*============================================================================*/

/*******************************************************\
** report() prints the result of the file comparision. **
\*******************************************************/

LOCAL void report( m1, m2, cnt )
    int  m1;    /* (I) Location of matching sequence in #1.  */
    int  m2;    /* (I) Location of matching sequence in #2.  */
    int  cnt;   /* (I) Number of lines in matching sequence. */
{
    int  e1;    /* Last line of match for file 1. */
    int  e2;    /* Last line of match for file 2. */
    int  i;     /* The ubiquitous loop counter.   */
    int  print_banner1, print_banner2, print_tail;
    int  print_missing1, print_missing2;

    e1 = m1 + cnt - 1;
    e2 = m2 + cnt - 1;
    print_banner1 = print_banner2 = TRUE;
    print_tail = FALSE;
    print_missing1 = print_missing2 = TRUE;


    if ( report_matches )
    {
        if ( cnt )
            printf( "Matched %5d lines:   (%5d - %5d) and (%5d - %5d)\n",
                     cnt, m1, e1, m2, e2 );
        else
        {
            if ( !report_invoked )
                printf( "Matched 0 lines.\n" );
        }

        report_invoked = TRUE;
        return;
    }




    if ( old_e1 != m1 || old_e2 != m2 || cnt == 0 )
    {
        /************************************************/
        /* We possibly have some differences to report. */
        /************************************************/

        for ( i = old_e1; i < m1; i++ )
        {
            if ( fgets( line_buffer_ptr, max_line_length, fptr_1 ) == NULL )
            {
                fprintf( stderr, "DIFF ERROR (%d) reading file \"%s\".\n", __LINE__, file_1 );
                exit (E_STATE);
            }

            line_buffer_ptr[ strlen( line_buffer_ptr ) - 1 ] = '\0';

            if ( skip_blank_lines && isblankline( line_buffer_ptr ) )
                continue;

            if ( print_banner1 )
            {
                print_banner1 = print_missing1 = FALSE;
                print_tail = TRUE;
                printf( "|==============================================================================\n" );
                printf( "|| FILE: %s\n", file_1 );
            }

            printf( "|| LINE: %5u \"%s\"\n", i, line_buffer_ptr );
        }



        for ( i = old_e2; i < m2; i++ )
        {
            if ( fgets( line_buffer_ptr, max_line_length, fptr_2 ) == NULL )
            {
                fprintf( stderr, "DIFF ERROR (%d) reading file \"%s\".\n", __LINE__, file_2 );
                exit (E_STATE);
            }

            line_buffer_ptr[ strlen( line_buffer_ptr ) - 1 ] = '\0';

            if ( skip_blank_lines && isblankline( line_buffer_ptr ) )
                continue;

            if ( print_banner1 )
            {
                printf( "|==============================================================================\n" );
                printf( "|| FILE: %s\n", file_1 );

                if ( old_e1 == m1 || old_e1 == m1 + 1 )
                    printf( "|| Missing text! LINE: %5u.5\n", old_e1 - 1 );
                else
                    printf( "|| Missing text! LINES: %5u - %5u\n", old_e1, m1 - 1 );

                print_tail = TRUE;
                print_banner1 = print_missing1 = FALSE;
            }

            if ( print_banner2 )
            {
                printf( "|------------------------------------------------------------------------------\n" );
                printf( "|| FILE: %s\n", file_2 );
                print_tail = TRUE;
                print_banner2 = FALSE;
            }

            printf( "|| LINE: %5u \"%s\"\n", i, line_buffer_ptr );
            print_missing2 = FALSE;
        }


        if ( print_banner2 && !print_banner1 )
        {
            printf( "||-----------------------------------------------------------------------------\n" );
            printf( "|| FILE: %s\n", file_2 );

            if ( old_e2 == m2 || old_e2 == m2 + 1 )
                printf( "|| Missing text! LINE: %5u.5\n", old_e2 - 1 );
            else
                printf( "|| Missing text! LINES: %5u - %5u\n", old_e2, m2 - 1 );

            printf( "|==============================================================================\n\n\n" );
            print_tail = FALSE;
        }


        if ( print_tail )
        {
            if ( print_missing2 )
            {
                if ( old_e2 == m2 || old_e2 == m2 + 1 )
                    printf( "|| Missing text! LINE: %5u.5\n", old_e2 - 1 );
                else
                    printf( "|| Missing text! LINES: %5u - %5u\n", old_e2, m2 - 1 );
            }

            printf( "|==============================================================================\n\n\n" );
        }
    }

    /*******************************************/
    /* Now skip over the matched regions of    */
    /* the files and update old_e1 and old_e2. */
    /*******************************************/

    if ( e1 < f1_line_count )
    {
        for ( i = m1; i <= e1 && i < f1_line_count; i++ )
        {
            if ( fgets( line_buffer_ptr, max_line_length, fptr_1 ) == NULL )
            {
                fprintf( stderr, "DIFF ERROR (%d) reading file.\n", __LINE__ );
                exit (E_STATE);
            }
        }
    }
    old_e1 = e1 + 1;


    if ( e2 < f2_line_count )
    {
        for ( i = m2; i <= e2 && i < f2_line_count; i++ )
        {
            if ( fgets( line_buffer_ptr, max_line_length, fptr_2 ) == NULL )
            {
                fprintf( stderr, "DIFF ERROR (%d) reading file.\n", __LINE__ );
                exit (E_STATE);
            }
        }
    }

    old_e2 = e2 + 1;
    report_invoked = TRUE;
}


/*============================================================================*/

/************************************************************************\
** binary_compare() performs a byte-by-byte comparision of two files    **
** and reports the differences. This routine does not attempt to find   **
** matches. i.e. It does not attempt to resynchronize once a difference **
** is found.                                                            **
\************************************************************************/

LOCAL  int  binary_compare( char *file_1, char *file_2 )
{
    FILE *fptr_1;
    FILE *fptr_2;
    int     char_1;             /* Char from file 1; used for binary comp. */
    int     char_2;             /* Char from file 2; used for binary comp. */
    unsigned long int offset;   /* File offset; used for binary compare.   */
    BOOLEAN printed_banner;


    fptr_1 = fopen( file_1, "rb" );
    if ( fptr_1 == NULL )
    {
        fprintf( stderr, "DIFF ERROR (%d) opening file \'%s\'\n", __LINE__, file_1 );
        return (E_FACCESS);
    }

    fptr_2 = fopen( file_2, "rb" );
    if ( fptr_1 == NULL )
    {
        fprintf( stderr, "DIFF ERROR (%d) opening file \'%s\'\n", __LINE__, file_2 );
        return (E_FACCESS);
    }

    printed_banner = FALSE;
    offset         = 0L;

    for ( char_1 = 0, char_2 = 0; char_1 != EOF || char_2 != EOF; offset++ )
    {
        if ( char_1 != EOF )
            char_1 = getc( fptr_1 );

        if ( char_2 != EOF )
            char_2 = getc( fptr_2 );

        if ( char_1 != char_2 )
        {
            if ( !printed_banner )
            {
                printf( "**********************************************************\n" );
                printf( "FILE 1 = %s   FILE 2 = %s\n", file_1, file_2 );
                printf( "OFFSET:   FILE 1  FILE 2\n\n" );
                printed_banner = TRUE;
            }

            printf( "%8.8lu  ", offset );

            if ( char_1 == EOF )
                printf( "EOF     " );
            else
            {
                printf( "%02X ", char_1 );

                if ( isprint( char_1 ) && char_1 != 0x09 && char_1 != 0x0A && char_1 != 0x0D )
                    printf( "\'%c\'  ", char_1 );
                else
                    printf( "     " );
            }


            if ( char_2 == EOF )
                printf( "EOF\n" );
            else
            {
                printf( "%02X", char_2 );
                if ( isprint( char_2 ) && char_2 != 0x09 && char_2 != 0x0A && char_2 != 0x0D )
                    printf( " \'%c\'\n", char_2 );
                else
                    printf( "\n" );
            }
        }
    }


    if ( printed_banner )
    {
        printf( "%8.8lu  EOF     EOF\n", --offset );
        printf( "**********************************************************\n" );
    }

    return (E_OK);
}


/*============================================================================*/
/*============================================================================*/


            /********************************************************/
            /* The following algorithms and source code were taken  */
            /* from the September 1987 issue of Dr. Dobb's Journal. */
            /********************************************************/

/*
** Copyright (c) 1987,  Tom Steppe.  All rights reserved.
**
** This module compares two arrays of lines (representing
** files) and reports the sequences of consecutive matching
** lines bwtween them using the "recursive longest matching
** sequence" algorithm. This is useful for implementing a
** file comparision utility.
**
*/

/*============================================================================*/

/************************************************************
** compare() compares two arrays of lines and reports the  **
** sequences of consecutive matching lines. The zeroth     **
** element of each array is unused so that the index into  **
** the array is identical to the associated line number.   **
**                                                         **
** RETURNS:  TRUE  if comparision succeeded,               **
**           FALSE if not enough memory.                   **
************************************************************/

LOCAL  BOOLEAN compare( char **a1, int n1, char **a2, int n2, int lngval )
{
    FILEINF   f1;   /* File information for #1. */
    FILEINF   f2;   /* File information for #2. */
    BOOLEAN   rtn;  /* Return value.            */


  /***************************************************/
  /* Gather information for each file, then compare. */
  /***************************************************/

    if ( rtn = ( get_inf( a1, n1, &f1 ) && get_inf( a2, n2, &f2 ) ) )
        fnd_seq( &f1, 1, n1, &f2, 1, n2, lngval );

    return (rtn);
}


/*============================================================================*/

/*************************************************************
** get_inf() calculates hash codes and builds a hash table. **
**                                                          **
** RETURNS:  TRUE  if get_inf succeeded,                    **
**           FALSE if not enough memory.                    **
*************************************************************/

LOCAL BOOLEAN get_inf( char **a, int n, FILEINF *f )
{
    size_t        size;   /* Size of the hash table. */
    register int  i;      /* Counter.                */
    TBLENTRY     *_entry; /* Entry in hash table.    */


    /*****************************/
    /* Assign the array of text. */
    /*****************************/

    f->txt = a;


    /*****************************************/
    /* Allocate and initialize a hash table. */
    /*****************************************/

    size = HASHSIZ * sizeof(TBLENTRY);

    if ( f->hashtbl = (TBLENTRY *)malloc( size ) )
        memset( f->hashtbl, 0, size );
    else
        return (FALSE);


    if ( n > 0 )    /* If there are any lines: */
    {

        /*****************************************/
        /* Allocate an array of line structures. */
        /*****************************************/

        if ( f->line = (LINEINF *)malloc( (n + 1 ) * sizeof(LINEINF *) ) )
        {
            /***************************/
            /* Loop through the lines. */
            /***************************/

            for ( i = 1; i <= n; i++ )
            {
                /**********************************/
                /* Calculate the hash code value. */
                /**********************************/

                f->line[i].hash = calc_hash( f->txt[i] );


                /***************************************/
                /* Locate the entry in the hash table. */
                /***************************************/

                _entry = f->hashtbl + f->line[i].hash;


                /************************************************************/
                /* Update the linked list of lines with the same hash code. */
                /************************************************************/

                f->line[_entry->last].nxtln = i;
                f->line[i].nxtln = 0;


                /*****************************************************************/
                /* Update the first and last line information in the hash table. */
                /*****************************************************************/

                if ( _entry->frst == 0 )
                    _entry->frst = i;
                _entry->last = i;
            }
        }
        else
            return (FALSE);

    }
    else
        f->line = NULL;

    return (TRUE);
}


/*============================================================================*/

/***********************************************************
** calc_hash() calculates a hash code for a line of text. **
**                                                        **
** RETURNS:  a hash value code.                           **
***********************************************************/

LOCAL HASH calc_hash( char *buf )
{
    register unsigned int   chksum;   /* Checksum.        */
    char                   *s;        /* Pointer.         */
    HASH                    hash;     /* Hash code value. */


    /******************************************************/
    /* Build up a checksum of the characters in the text. */
    /******************************************************/

    for ( chksum = 0, s = buf; *s; chksum ^= *s++ )
        ;


    /************************************************************************/
    /* Combine the 7-bit checksum and as much of the length as is possible. */
    /************************************************************************/

    hash = ( ( chksum & 0x7F ) | ( ( s - buf ) << 7 ) ) & MASK;

    return (hash);
}


/*============================================================================*/

/*****************************************************************
** Given starting and ending line numbers, fnd_seq() finds a    **
** "good sequence" of lines within those ranges. fnd_seq() then **
** recursively finds "good sequences" in the sections of lines  **
** above the "good sequence" and below it.                      **
*****************************************************************/

LOCAL void fnd_seq( FILEINF *f1, int beg1, int end1,
                    FILEINF *f2, int beg2, int end2, int lngval )
{
    LINEINF      *line1;    /* Line information ptr in #1.  */
    LINEINF      *line2;    /* Line information ptr in #2.  */
    register int  limit;    /* Looping limit.               */
    int           ln1;      /* Line number in #1.           */
    int           ln2;      /* Line number in #2.           */
    register int  ln;       /* Working line number;         */
    BOOLEAN       go;       /* Continue loop?               */
    int           most;     /* Longest possible seq.        */
    int           most1;    /* Longest possible due to #1.  */
    int           most2;    /* Longest possible due to #2.  */
    int           cnt;      /* Length of longest seq.       */
    int           oldcnt;   /* Length of prev. longest seq. */
    int           n;        /* Length of cur. longest seq.  */
    int           m1;       /* Line of longest seq. in #1.  */
    int           m2;       /* Line of longest seq. in #2.  */


    /***************/
    /* Initialise. */
    /***************/

    go    = TRUE;
    line1 = f1->line;
    line2 = f2->line;


    /********************************************/
    /* Initialise longest sequence information. */
    /********************************************/

    cnt    = 0;          /* Length of longest sequence.   */
    m1     = beg1 - 1;   /* Line # of longest seq. in #1. */
    m2     = beg2 - 1;   /* Line # of longest seq. in #2. */
    oldcnt = 0;          /* Length of prev. longest seq.  */


    /****************************************************/
    /* Calculate maximum possible number of consecutive */
    /* lines that can match (based on line # ranges).   */
    /****************************************************/

    end1  = min( end1, f1_line_count );
    end2  = min( end2, f2_line_count );
    most1 = end1 - beg1 + 1;
    most2 = end2 - beg2 + 1;


    /*********************************************************/
    /* Scan lines looking for a "good sequence".             */
    /* Compare lines in the following order of line numbers: */
    /*                                                       */
    /*                        (1, 1)                         */
    /*                (1, 2), (2, 1), (2, 2)                 */
    /*        (1, 3), (2, 3), (3, 1), (3, 2), (3, 3)         */
    /* etc.                                                  */
    /*********************************************************/

    for ( ln1 = beg1, ln2 = beg2; TRUE; ln1++, ln2++ )
    {
        if ( ln2 <= end2 - cnt )
        {

            /**************************************************/
            /* There are enough lines left in #2 such that it */
            /* is possible to find a longer sequence.         */
            /* Determine the limit in #1 that both enforces   */
            /* the order scheme and still makes it possible   */
            /* to find a longer sequence.                     */
            /**************************************************/

            limit = min( (ln1 - 1), (end1 - cnt) );


            /******************************************/
            /* Calculate first potential match in #1. */
            /******************************************/

            for ( ln = f1->hashtbl[line2[ln2].hash].frst; ln && ln < beg1;
                ln = line1[ln].nxtln )
                ;


            /*********************************/
            /* Loop through the lines in #1. */
            /*********************************/

            for ( ; ln && ln <= limit; ln = line1[ln].nxtln )
            {
                if ( line1[ln].hash == line2[ln2].hash &&
                    line1[ln + cnt].hash == line2[ln2 + cnt].hash &&
                    !( ln - m1 == ln2 - m2 && ln < m1 + cnt && m1 != beg1 - 1 ) )
                {

                    /******************************************************************/
                    /* A candidate for a longer sequence hasbeen located. The current */
                    /* lines match, the current lines + cnt match, and this sequence  */
                    /* is not a subset of the longest sequence thus far.              */
                    /******************************************************************/

                    /****************************************/
                    /* Calculate the most possible matches. */
                    /****************************************/

                    if ( ln > end1 || ln2 > end2 )
                        most = 0;
                    else
                        most = min( (end1 - ln + 1), most2 );


                    /**************************************************************/
                    /* First compare hash codes. If the number of matches exceeds */
                    /* the longest sequence so far, then compare the actual text. */
                    /**************************************************************/

                    if ( chk_hashes( line1 + ln, line2 + ln2, cnt ) &&
                        ( n = cnt_matches( f1->txt + ln, f2->txt + ln2, most ) ) > cnt )
                    {
                        /******************************************/
                        /* This is the longest sequence thus far. */
                        /* Update longest sequence information.   */
                        /******************************************/

                        oldcnt = cnt;
                        cnt    = n;
                        m1     = ln;
                        m2     = ln2;


                        /**********************************************/
                        /* If it is long enough, then end the search. */
                        /**********************************************/

                        if ( cnt >= lngval )
                            break;


                        /**********************************/
                        /* Update limit, using new count. */
                        /**********************************/

                        limit = min( (ln1 - 1), (end1 - cnt) );
                    }
                }
            }

            /**********************************************/
            /* If it is long enough, then end the search. */
            /**********************************************/

            if ( cnt >= lngval )
                break;

            if ( --most2 < 0 )
                most2 = 0;
        }

        else
            go = FALSE;


        /******************************************/
        /* Repeat the process for the other file. */
        /******************************************/

        if ( ln1 <= end1 - cnt )
        {

            limit = min( ln2, (end2 - cnt) );

            for ( ln = f2->hashtbl[line1[ln1].hash].frst; ln && ln < beg2;
                ln = line2[ln].nxtln )
                ;

            for ( ; ln && ln <= limit; ln = line2[ln].nxtln )
            {
                if ( line1[ln1].hash == line2[ln].hash &&
                    line1[ln1 + cnt].hash == line2[ln + cnt].hash &&
                    !( ln - m1 == ln - m2 && ln1 < m1 + cnt && m2 != beg2 - 1 ) )
                {
                    if ( ln > end2 || ln1 > end1 )
                        most = 0;
                    else
                        most = min( (end2 - ln + 1), most1 );

                    if ( chk_hashes( line1 + ln1, line2 + ln, cnt ) &&
                        ( n = cnt_matches( f1->txt + ln1, f2->txt + ln, most ) ) > cnt )
                    {
                        oldcnt = cnt;
                        cnt    = n;
                        m1     = ln1;
                        m2     = ln;

                        if ( cnt >= lngval )
                            break;

                        limit = min( ln2, (end2 - cnt) );
                    }
                }
            }


            if ( cnt >= lngval )
                break;

            if ( --most1 < 0 )
                most1 = 0;

        }
        else
            if ( !go )
                break;    /* This file is exhausted, also. */
    }

    /********************************************************************/
    /* If the longest sequence is shorter than the "long enough" value, */
    /* the "long enough" value can be adjusted for the rest of the      */
    /* comparision process.                                             */
    /********************************************************************/

    if ( cnt < lngval )
        lngval = cnt;


    if ( cnt >= 1 )
    {
        /****************************************************/
        /* Longest sequence exceeds minimum necessary size. */
        /****************************************************/

        if ( m1 != beg1 && m2 != beg2 && oldcnt > 0 )
        {
            /*************************************************************/
            /* There is still something worth comparing previous to this */
            /* sequence. Use knowledge of the previous longest sequence. */
            /*************************************************************/

            fnd_seq( f1, beg1, m1 - 1, f2, beg2, m2 - 1, oldcnt );
        }

        /************************/
        /* Report the sequence. */
        /************************/

        report( m1, m2, cnt );

        if ( m1 + cnt - 1 != end1 && m2 + cnt - 1 != end2 )
        {
            /************************************************************************/
            /* There is still something worth comparing subsequent to the sequence. */
            /************************************************************************/

            fnd_seq( f1, m1 + cnt, end1, f2, m2 + cnt, end2, lngval );
        }

        else
        {
            /**********************************************************/
            /* Check if one (and only one) of the files is exhausted. */
            /**********************************************************/
            
            if ( end1 != end2 )
                report( end1, end2, 0 );
        }
    }

    else
    {
        if ( !report_invoked )
            report( f1_line_count + 1, f2_line_count + 1, 0 );
    }
}


/*============================================================================*/

/***********************************************************************
** chk_hash() determines whether this sequence of matching hash codes **
** is longer than cnt. It knows that the first pair of hash codes is  **
** guarenteed to match.                                               **
**                                                                    **
** RETURNS: TRUE  if this sequence is longer than cnt,                **
**          FALSE if this sequence is not longer than cnt.            **
***********************************************************************/

LOCAL BOOLEAN chk_hashes( LINEINF *line1, LINEINF *line2, register int cnt )
{
    register int  n;      /* Count of recursive matches. */

    for ( n = 1; n <= cnt && ( (++line1)->hash == (++line2)->hash ); n++ )
        ;

    return ( n > cnt );
}


/*============================================================================*/

/***************************************************
** cnt_matches counts the number of consecutive   **
** matching lines of text.                        **
**                                                **
** RETURNS: number of consecutive matching lines. **
***************************************************/

LOCAL int cnt_matches( char **s1, char **s2, register int most )
{
    register int   n;       /* Count of consecutive matches.     */


    /**********************************/
    /* Count the consecutive matches. */
    /**********************************/

    for ( n = 0; n < most && strcmp( *s1++, *s2++ ) == ALIKE; n++ )
        ;

    return (n);
}

