#
# Script for formatting muller dictionary search results
# Author Moskovkin Vitaliy <moskovkin@mtu-net.ru>
# This script tested on gawk 3.0.3  
# requare starting gawk with '--re-interval' or '--posix' options
# 

BEGIN{
  RomanDigits["I"] = 1;
  RomanDigits["V"] = 5;
  RomanDigits["X"] = 10;
  RomanDigits["L"] = 50;
  RomanDigits["M"] = 100;
  split("abcdefghijklmnopqrstuvwxyz",  EnglishLetters,"");
  split("",RussianLetters,"");
 
  #
  # Define some useful regexps
  #
  rxpOpenSqrBracerWord  = "[\\[][^\\]]*$"
  rxpCloseSqrBracerWord = "^[^\\[]*[\\]]"
  rxpAllRussianLetters  = ""
  rxpRussianWord        = "^.*[" rxpAllRussianLetters "]+.*"
  rxpNothing            = "[^[:print:]]+"

  #
  #this strings got from tmac.an (man formatting macro for groff)
  #hope =) this define paragraph parameters like in man page
  #but not define page colontituls and page size
  #
#  print".nr IN 7.2n"
  print".nr IN 7n"
#  print".nr LL 6.5i"
  print".PD"  
}

#
# reurn first index of element Elem
# in Array if contain not Elem return -1 
#
function elemindex(Array, Elem,  i)
{ 
  for (i in Array)
    if (Array[i] == Elem) return(i);
  return(-1);  
}

#
#Convert roman number to arbic number (Roman2Arabic(IV) == 4)
#
function Roman2Arabic(RomanNumber,   DA,L,R,i,MDP,MD,Ln){
                                      #==Local variables==
  Ln = length(RomanNumber);
  if (Ln == 0) return(0);
  
  #
  #find maximal roman digit MD an they position MDP
  #
  split(RomanNumber, DA, "");
  for (i in DA){
    if ( MD < RomanDigits[DA[i]] ){
      MD  = RomanDigits[DA[i]];
      MDP = i;
    }
  }
  
  #
  #subtract left number and add right number 
  #
  L = substr(RomanNumber, 1, MDP - 1);
  R = substr(RomanNumber, MDP + 1, Ln - MDP); 
  return(MD - Roman2Arabic(L) + Roman2Arabic(R) );
}

#
#Convert arabic number to roman number (Arabic2Roman(4) == IV)
#
function Arabic2Roman(ArabicNumber, i,L,R)
{                                  #Local variables
  #find first arbic digit that greater or equal ArabicNumber
  R = 1;
  for (i = 1; i <= 5; i++){
    if (R - ArabicNumber >= 0) break;
    R = ((i % 2 == 0) ? R * 2 : R * 5);
  }
  
  #if found digit is equal to ArabicNumber that all
  if (R - ArabicNumber == 0) return(elemindex(RomanDigits,R));
  
  #first arabic digit that less then ArabicNumber
  L = (i % 2 == 0) ?  R/5 : R/2;
  
  if (i % 2 == 0){
  #L = 1(I) 10(X) 100(M)
    if ( R - ArabicNumber > L){
      return(elemindex(RomanDigits,L) Arabic2Roman(ArabicNumber - L))
    }else{
      return(Arabic2Roman(R - ArabicNumber) elemindex(RomanDigits,R)) 
    }
  }else{
  #L = 5(V) 50(L) 
    if ( R - ArabicNumber > R / 10 ){
      return(elemindex(RomanDigits,L) Arabic2Roman(ArabicNumber - L))
    }else{
      return(Arabic2Roman(R - ArabicNumber) elemindex(RomanDigits,R))
    }
  } 
}

function ItemizeIndexType(Str)
{
  if (Str ~ /^(I{1,3}|I{0,1}V|VI{1,3}|I{0,1}X|XI{1,3})$/) return(1); # I
  if (Str ~ /^[[:digit:]]+\.$/)         return(2); # 2.
  if (Str ~ /^[[:digit:]]+)$/)          return(3); # 3)
  if (Str ~ /^[]\)$/)return(4); # )
  if (Str ~ /^[a-z]\)$/)                return(5); # d)
  return(-1)
}

function NextItemizeIndex(PredIndex, Ind,Len)
{
  IndexType = ItemizeIndexType( PredIndex );

  #Next roman number
  if ( IndexType == 1 ){
    return( Arabic2Roman(Roman2Arabic(PredIndex) + 1));    
  }
  
  if ( IndexType == 2 ){ 
    Len = length(PredIndex);
    return( substr(PredIndex,1,Len - 1) + 1  "." );
  }
  
  if ( IndexType == 3 ){ 
    Len = length(PredIndex);
    return( substr(PredIndex,1,Len - 1) + 1  ")" );
  }

  if ( IndexType == 4 ){
    Ind = elemindex(RussianLetters, substr(PredIndex,1,1));
    return ( RussianLetters[ Ind + 1 ] ")")
  }

  if ( IndexType == 5 ){
    Ind = elemindex(EnglishLetters, substr(PredIndex,1,1));
    return ( EnglishLetters[ Ind + 1 ] ")")
  }

  return("error");
}


function PrintWord(Str)
{
  if ($i ~ rxpRussianWord && BOLD == 1)
    # Bold russian words if BOLD=1 given as command argument
    print ".B ", Str  
  else
    # If start from . or ' protect this character
    print "\\&" $i
}

function StartIndentedParagraph()
{
  print ".RS 4" 
}

function EndIndentedParagraph()
{
  print ".RE"
}

function PrintItemizeIndex(Str)
{
  print ".IP ", Str, " 4"
}

function ParsePaper()
{
  #
  # Define states of automat
  #
  stLevel0Paper   = 0
  stLevel1Paper   = 1
  stLevel2Paper   = 2
  stLevel3Paper   = 3
  stSqrBracers    = 4 
  
  #
  # Define hierarchy of indexes
  #
  ValidFirstIndexes[1]  = "^(1\\)|1\\.)$"
  ValidFirstIndexes[2]  = "^1\\)$"
  ValidFirstIndexes[3]  = rxpNothing
  ValidFirstIndexes[4]  = "^a\\)$"     #'a' is english
  ValidFirstIndexes[5]  = rxpNothing   
  
  #
  # Initialise ValidFirstIndex by all set of first indexes in dictionary
  #
  ValidFirstIndex       = "^(I|1\\.|1\\)|\\)|a\\))$"

  State  = stLevel0Paper
  
  #
  # iterate all words in paper
  #
  for (i = 1; i <= NF; i++)
  {
    #
    # Ignore all indexes in '[]' (stSqrBracers) 
    # papers like this '... somefing ... [. . some word 5) ]'
    #
    if (State == stSqrBracers) 
    { 
      if ($i ~ rxpCloseSqrBracerWord)
        State = SaveState
      PrintWord($i)
      continue  
    }   
     
    #
    # Swich to ignoring all indexes in '[]'
    # do it on all states exept stSqrBracers
    #
#    print ""
#    print "------->", rxpOpenSqrBracerWord, $i
    if ($i ~ rxpOpenSqrBracerWord)
    {
      SaveState = State
      State     = stSqrBracers
      PrintWord($i)
      continue      
    }  
      
    
    #
    # in (Lvel0 - Level3) paper states
    #
    if (State <= stLevel3Paper)
    {
      #
      # Max deep, where looking for 
      # first index of next level, is 2
      #
      if (State < stLevel3Paper)
#        print ""
#        print "------>", $i, ValidFirstIndex
        if ($i ~ ValidFirstIndex)
        {
          State++
          NextIndex[State] = NextItemizeIndex($i)
          ValidFirstIndex  = ValidFirstIndexes[ItemizeIndexType($i)]
#         print "ass1"
          if (State > stLevel1Paper) StartIndentedParagraph()
          PrintItemizeIndex($i)
          continue
        }
      
      #
      # Lookig for next index on some started level
      #
      NextIndexState = elemindex(NextIndex,$i)
#      print "  ", "NextIndexState=",NextIndexState
      if (NextIndexState != -1)
      {
        #
        # End started indented paragraphs and
        # delete not used more Next Indexes for
        # sub paragraphs
        #
        #print "00000"
        for (j = State; j > NextIndexState; j--)
        {
          delete NextIndex[j]
          if (j > stLevel1Paper) EndIndentedParagraph()
        }
        #print "00001"
        State            = NextIndexState
        NextIndex[State] = NextItemizeIndex($i)
        ValidFirstIndex  = ValidFirstIndexes[ItemizeIndexType($i)]
#        print ""
#        print ValidFirstIndex, ItemizeIndexType($i)
#        if (State > stLevel1Paper){ StartIndentedParagraph();print "ass2"}
        PrintItemizeIndex($i)
        continue
      } # found next index
      
      #
      # Protect recognizing I in ( I m doing stupid things ) 
      # like first index 
      # not in all cases =( 
      # 
      if ($i ~ rxpRussianWord) sub(/\I\|/,"",ValidFirstIndex)
      
      PrintWord($i)
    } # State <= stLevel3Paper
  } # for i     

  #
  # End all sub paragraphs
  #
  for (j = stLevel2Paper; j <= State; j++) EndIndentedParagraph()
}

{ 
  print ".SH \t"
  ParsePaper();
  print "" 
}
