/*
 * countalpha-t.c : Routines to find/Count English alphabets in input string.
 *                  Routines in this file take input from stdin. You can
 *                  call desired one in main function at the bottom.
 *
 * Copyright (C) 2006, Sandeep Kumar <shimple0@yahoo.com>
 *                     Homepage : http://sandeepkumar.fortunecity.com/
 *
 * This program is free software; you can redistribute it and/or modify it
 * under the terms of the GNU General Public License as published by the Free
 * Software Foundation; either version 2 of the License, or (at your option)
 * any later version.
 *
 * This program is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
 * more details.
 */


#include <stdint.h>
#include <stdio.h>


#define  NUM_ALPHABETS  26


/*
 * count_ascii_english_alpha ASSUMES that the input character string
 * consists of only valid english alphabets (A-Z, a-z) and nothing else.
 *
 * It takes advantage of English alphabets' layout in ascii character
 * space [A-Z : (64+1)-(64+26), a-z : (96+1)-(96+26)] and clubs the count
 * of small and capital letters.
 */
void count_ascii_english_alpha (void)
{
    char ch;
    unsigned int counts[NUM_ALPHABETS + 1], i;

    for (i = 1; i < (NUM_ALPHABETS + 1); i++)
        counts[i] = 0;

    while ((ch = getchar()) != EOF) {
        /*
         * We reduce some arithmetic at the cost of _one_ additional element
         * in counts array. Else it would have been ((ch & 0x1F) - 1) below.
         */
        counts[(ch & 0x1F)] ++;
    }

    printf("Count of English Alphabets in input string :\n\n");

    for (i = 1; i < (NUM_ALPHABETS + 1); i++)
        printf("%c : %d\n", (96+i), counts[i]);

    return;
}


#define  ALL_ALPHABETS_PRESENT  0x07FFFFFE

/*
 * find_ascii_english_alpha ASSUMES that the input character string
 * consists of only valid english alphabets (A-Z, a-z) and nothing else.
 *
 * It takes advantage of English alphabets' layout in ascii character
 * space [A-Z : (64+1)-(64+26), a-z : (96+1)-(96+26)] and outputs the
 * English alphabets that occurred in the string either as capital letter
 * or small letter.
 */
void find_ascii_english_alpha (void)
{
    register uint32_t presence_mask = 0;
    int i;
    char ch;

    while ((ch = getchar()) != EOF) {
        /*
         * We reduce some arithmetic by keeping the presence mask from bit
         * no. 1 instead of bit no. 0, else it would have been
         * ((ch & 0x1F) - 1) below.
         */
        presence_mask |= (1 << (ch & 0x1F));

#if 0  /* Enable it, if conditions in comment below are acceptable to you. */

        /*
         * There could be situations when input string is very large and 
         * all the English alphabets have appeared atleast once long way
         * before the string end. In that case there is no point going
         * till the end, as our requirement is already met.
         *
         * However in worst case, this optimisation will cost length-of-string
         * number of additional comparisons.
         */
        if (ALL_ALPHABETS_PRESENT == presence_mask)
            break;
#endif

    }

    printf("English Alphabets present in the input string :\n\n");

    for (i = 1; i < (NUM_ALPHABETS + 1); i++)
        if ((1 << i) & presence_mask)
            printf("%c ", (96+i));

    putchar('\n');

    return;
}


/*
 * count_english_alpha ASSUMES that the input string consists of 8-bit
 * characters (ASCII/EBCDIC), and may/may-not contain characters other
 * than alphabets [A-Za-z].
 *
 * It clubs the count of small and capital letters.
 */
void count_english_alpha (void)
{
    char ch, engalpha[(2 * NUM_ALPHABETS) + 1] = 
         "AaBbCcDdEeFfGgHhIiJjKkLlMmNnOoPpQqRrSsTtUuVvWwXxYyZz";

    unsigned int counts[256], i;

    for (i = 0; i < 256; i++)
        counts[i] = 0;

    while ((ch = getchar()) != EOF)
        counts[(int)ch] ++;

    printf("Count of English Alphabets in input string :\n\n");

    for (i = 0; i < (2 * NUM_ALPHABETS); i += 2)
        printf("%c : %d\n", engalpha[i], (counts[(int)engalpha[i]] + counts[(int)engalpha[i+1]]));

    return;
}


#ifdef  SPACE_SAVING_FIND_ALPHA

typedef  uint32_t  MaskElement;

#define  NUM_MASK_ELEMENTS  (256 / (sizeof(MaskElement) * 8))

/* Right shift by log_base_2(number-of-bits in MaskElement) */
#define  MASK_ELEMENT(ch)  ((unsigned char)ch >> 5)
#define  POSITION(ch)  ((unsigned char)ch & ((sizeof(MaskElement) * 8) - 1))

/*
 * find_english_alpha ASSUMES that the input string consists of 8-bit
 * characters (ASCII/EBCDIC), and may/may-not contain characters other
 * than alphabets [A-Za-z].
 *
 * It outputs the English alphabets that occurred in the string either as
 * capital letter or small letter.
 */
void find_english_alpha (void)
{
    char ch, engalpha[(2 * NUM_ALPHABETS) + 1] = 
         "AaBbCcDdEeFfGgHhIiJjKkLlMmNnOoPpQqRrSsTtUuVvWwXxYyZz";

    MaskElement presence_mask[NUM_MASK_ELEMENTS];
    int i;

    for (i = 0; i < NUM_MASK_ELEMENTS; i++)
        presence_mask[i] = 0;

    while ((ch = getchar()) != EOF)
        presence_mask[MASK_ELEMENT(ch)] |= (1 << POSITION(ch));

    printf("English Alphabets present in input string :\n\n");

    for (i = 0; i < (2 * NUM_ALPHABETS); i += 2)
        if ((presence_mask[(int)MASK_ELEMENT(engalpha[i])] & (1 << POSITION(engalpha[i]))) |
            (presence_mask[(int)MASK_ELEMENT(engalpha[i+1])] & (1 << POSITION(engalpha[i+1])))) {

            printf("%c \n", engalpha[i]);
        }

    return;
}

#else  /* !SPACE_SAVING_FIND_ALPHA */

/*
 * find_english_alpha ASSUMES that the input string consists of 8-bit
 * characters (ASCII/EBCDIC), and may/may-not contain characters other
 * than alphabets [A-Za-z].
 *
 * It outputs the English alphabets that occurred in the string either as
 * capital letter or small letter.
 */
void find_english_alpha (void)
{
    char engalpha[(2 * NUM_ALPHABETS) + 1] = 
         "AaBbCcDdEeFfGgHhIiJjKkLlMmNnOoPpQqRrSsTtUuVvWwXxYyZz";

    char ch, counts[256];
    int i;

    for (i = 0; i < 256; i++)
        counts[i] = 0;

    while ((ch = getchar()) != EOF)
        counts[(int)ch] = 1;

    printf("English Alphabets present in input string :\n\n");

    for (i = 0; i < (2 * NUM_ALPHABETS); i += 2)
        if (counts[(int)engalpha[i]] | counts[(int)engalpha[i+1]])
            printf("%c \n", engalpha[i]);

    return;
}

#endif  /* SPACE_SAVING_FIND_ALPHA */


int main ()
{
    count_english_alpha ();
#if 0
    count_ascii_english_alpha ();
    find_ascii_english_alpha ();
    find_english_alpha ();
#endif

    return 0;
}

