# Description:
#
#   A script to reformat a plain text file document which contains no
#   particular format. The text file stores the content of a web page which is
#   currently at http://poseidonia.ella-associates.org/ . This script is a
#   direct derivation of 'plaintext2html-forum.sh' This current script is
#   designed specifically for a particular webpage or text document.
#
#   The script will work in conjuction with the cgi script '?' to allow the
#   visitor to the web page to edit the page.
#
# Special Text Structures:
#
#   The script also generates an HTML form which allows the reader to edit the text
#   of the document The script recognises some 'cues' within the plain text document.
#   I refer to these cues or 'structures' as 'Invisible Markup Language' (IML) or Mas
#   o Menos Markup Language (MMML). The basic ideas is to have as little actual
#   'markup' in the text document as possible, and the markup which is present should
#   'look good' in the plain text file. So,
#   instead of using, say, 
#      %^* Section Heading                   
#    which is valid markup but looks ugly in the text file, we use all capitals
#    which looks better in the text file
#   
#   A line beginning with = is a page title.
#   A line beginning with '*' will be hyperlinked.
#   URLs get automatically hyperlinked in some non-determinate way.  All
#   Capitals lines are section headings. These section headings may then be
#   used as a table of contents and hyperlinked in various ways
#
#   This script, like the linkdoc2html.sh script also accepts the format
#     * Document Title|Html-Url-Or-Path|Text-Url-Or-Path|
#   The script will render this into an emphasised 'document title' with
#   hyper-links to the different formats for the document.
#
#   Blocks of text surrounded by '-->>' and '--<<' are not 'formatted' in any way
#
#   The script also formats lines starting in 'added by:' to make those lines
#   stand out from the rest of the text. This is a 'courtesy' to the
#   '/cgi-bin/add-comment' script which added this line to a text file when it
#   inserts a user provided comment in the text file.
#
#   Certain codes can also be placed after the document title in the 
#   source text file to influence the appearance of the HTML.
#     {x}   means number all section headings (including in the Table of Contents)
#     {~}   means make the section heading into 'capital case'
#           These can be combined as in {~x}
#     []   means display the table of contents
#     [~]  means display the TOC with items in 'capital case'
#     (+)  means display automatic google translation links in the HTML
#
#   At the moment this codes are usual over-riding equivalent parameters, but this
#   may change
# Examples:
#   If the scripts are on the system 'path' the the leading './' characters
#   below are not necessary
#
#   ./text2html-collab.sh concert-details.txt notran > concert-details.html
#     This command line, executed in some kind of a bash shell, will transform
#     a plain text file which isn't is any particular format, into an HTML file
#     (that is it will create a new HTML file and leave the original text file
#     unchanged) and will not display the automatic translation links to
#     Google. Also an HTML table of contents (with one entry for each heading,
#     if there are headings) will be inserted in the HTML document.
#
#   ./text2html-collab.sh mjb-work.txt notran notoc > mjb-work.html
#     The text file will be transformed into HTML but no table of contents will
#     be inserted nor any translation links.
#
#   ./text2html-collab.sh mjb-work.txt tran notoc > mjb-work.html
#     If translation links are desired but no table of contents, use a command
#     line similar to above. The string 'blah' could be anything as long as its
#     not 'notran'. This slighty dodgy 'feature' is owing to the fact that I am
#     not using any 'getopt' style option parsing.
#
#   ./text2html-collab.sh stuff.txt notran toc "http://63.105.73.195/cgi-bin/some-weird-script"
#     This transforms the file stuff.txt omitting translation links, inserting
#     a hyperlinked table of contents, and setting the target for the 'edit
#     document' form to the URL specified in the last parameter.
#
#
# Parameters:
#   textFileName  [required]
#     The name of the text file which is to be transformed from text into html
#   notran        [optional]
#     If the second parameter is the string 'notran' then the javascript links
#     to the google automatic language translation engine will NOT be inserted
#     into the HTML page. This is useful, for example, when the HTML page is
#     going to be located within a 'password-protected' directory, because the
#     Google translation engine will not be able to access the page, and
#     therefor the translation links will not work.
#   notoc         [optional]
#     If the third parameter is the string "notoc", then no HTML table of
#     contents will be generated.
#   forumProcessorUrl           [optional]
#     This parameter indicates where the processing script is located.  If it
#     is omitted, currently the url will default to
#     http://www.ella-associates.org/cgi-bin/add-comment
#   output-language [optional] {Not implemented}
#     This is the language in which the message on the generated HTML page
#     will appear. For example messages next to the comment boxes and the 
#     translation links.
#   noforum {Not implemented}
#     If this parameter is present no HTML form will be produced in the ouput
#     and therefor the web-visitor will not be able to add comments to the 
#     pages.
#   path-to-style-sheet [optional] {Not implemented}
#     Still to implement
#     This is the full path (relative to the Web Server Document Root)
#     to the style sheet which is to be used by the generated HTML page
#     
#
# Notes:
#   The only difference between this script and the 'poseidontext2html-wiki.sh'
#   is that that script does not have the 'editing form or box' on the same
#   HTML page as the rendered text
#
#   This script should also transform quotes into &quot; & into &amp; etc The
#   script appears to be working reasonably well in conjunction with the
#   'edit-collab' cgi script.
#
#   The translation links wont work from within the 'output' generated 
#   by the 'add-comment' script
#
#   This script has had problems with 'gawk' and different versions of awk. For
#   this reason the 'gawk' or 'awk' code has been removed and replaced with
#   code using the 'nl' program. This program, when used with the -bp option
#   double spaces the object file with lines containing only spaces. Therefore
#   some extra 'sed' lines are necessary to remove these blank lines
#
# See Also:
#   edit-collab
#     This is the cgi script which can work in conjunction with the current script
#   diary2html.sh, 
#     Turns a 'diary' style text file into HTML
#   linkdoc2html.sh,
#     Turns a text file which has a list of URL links and descriptions into HTML
#   linkdoc2html-index.sh
#     As above but also adds an HTML 'table of contents' for possible 'section headings'
#   linkdoc2html-forum.sh
#     Turns a text file with a URL list into an HTML file which has the capability
#     to be contributed to by a web-visitor (using cgi-scripts)
#   plaintext2pdf.sh,
#     Turns a text file into a pdf file with an optional table of contents
#   plaintext2html-forum.sh
#     Renders a text file as HTML and displays a form which allows the web-visitor
#     to add 'comments' to the page (text file)
#   plaintext2html.sh
#     Turns a text file with possible section headings and urls into an HTML file
#   glossary2xml.sh
#     Turn a text file which is a sort of 'glossary' into a dodgy xml file
#   alphabetize-glossary.sh
#     Re-arranges a text file which contains a series of definitions of 'items' or 'terms'
#     so that the items are ordered alphabetically.
#   add-comment
#     a cgi-script which can be used in conjuction with some of the 
#     scripts above to add content specified by web-visitors to a web page
#   script-summary.txt
#     contains more short descriptions of scripts and what they do.
# Author:
#   m.j.bishop
#
# Bugs and Ideas
#   In Internet explorer the 'capital case' function does not work properly. The 
#   entire text is lower-cased and the first letter is NOT upper-cased. This is
#   probably another 'text-area' line ending problem
#
#   When you hit 'refresh' in Netscape and IE the contents of the HTML textarea
#   are not 'refreshed'. That is, the contents do not reflect the true contents
#   as dictated by the HTML source code. Rather the editings of the user are
#   preserved. I presume this is customizable.
#
#   In IE when you hit refresh the page does not refresh at all, which means that
#   the user is unable to see the changes which she has made.
#
#   This script needs more internationalization
#
#   The script could also check if there are translations of the current 
#   HTML or text file, using the standard naming convention of name.file-type.language-code
#   An example of this naming convention is  stuff.html.es  which should
#   be an HTML file which contains Spanish language content. This present
#   script could check for files which have the same name as the source
#   file but which have a different language code extension, and could 
#   therefore automatically add a link to the translated file (in addition,
#   perhaps to the Google translation links). The script would only
#   check in the current directory for these 'translated' files.
#
#   When there are no numbers for section headings capital case in not working 6june
#
# Dependencies:
#   iso2html.sed
#     A script which turns accented characters into HTML entities. This is
#     a bit tricky since the American Server uses UTF-8 rather than ISO-8859
#   capital-case.sed
#     A script which 'capital cases' words. That is the first letter is upper
#     case and all the rest lower case
#   capital-case-headings.sed
#     This does the same as above but on for lines which are 'section headings'
#     which means all capital letters
#   edit-collab
#     This is not a total dependency, but the HTML form generated by the 
#     current script will not do anything useful without this script
#   procgi
#     A bash shell cgi HTML form value extractor used by 'edit-collab'
#   The 'uncle-sam.gif' image 
#     which is used next to the text box message
#     The images used on the 'poseidon site'
#     various Unix tools, a Bash shell
#
# History:
#
#   june 3, 2003
#     Adapted this script from 'poseidon-text2html-forum.sh'
#   june 6, 2003
#     Improved the handling of accented characters. All section headings are
#     now governed by the variable 'sHeadingPattern'
 
 if [ "$1" = "" ]
 then
   (echo "usage: $0  textFileName [notran] [notoc] [forum-processor-url] [noforum]"; \
   echo "PRESS q TO EXIT THIS HELP. PRESS [space-bar] TO SCROLL DOWN, b to SCROLL UP"; \
   cat $0) | sed -n "/^[ ]*#/p" | less
   exit 1;
 fi


 #-- This is the pattern which determines what sort of lines will
 #-- be interpreted as 'section headings'. I cannot use the for the 'awk' line
 #-- because awk does not seem to accept the notation \{n,\}
 
 sAccentString='A-ZÁÉÍÓÚÀÈÌÒÙÄËÏÖÜÂÊÎÔÛ·ÇÑ'
 sHeadingPattern="[$sAccentString 0-9.\/\\:_\&@]*[$sAccentString][$sAccentString][$sAccentString][$sAccentString]*[$sAccentString 0-9.\/\\:_\&@]*"
 
 sOutputLanguage="english"
 sRawPageTitle=""
 sPageTitle=""
 bTableOfContents="true"
 bTranslationLinks=""
 
sTitleDecoration=$(expand $1 | sed -n "/^[ ]*=[^=]/{N;s/^.*\n//g;s/\.\.[ ]*//g;s/[ ]*$//g;p;q;}")
sRawPageTitle=$(expand $1 | sed -n "/^[ ]*=[^=]/{s/^[ ]*=[ ]*//g;s/[ ]*$//g;p;q;}")
sPageTitle=$(\
  echo $sRawPageTitle | \
  sed -e "s/{.\{0,4\}}//g" -e "s/\[.\{0,4\}\]//g" -e "s/(.\{0,4\})//g"  | \
  sed -e "s/</\&lt;/g" -e "s/>/\&gt;/g" | \
  iconv --to-code=ISO-8859-1 --from-code=UTF-8 | \
  sed -f /var/www/utils/iso2html.sed)

#-- If a 'title image' is specified extract it and create the necessary HTML

#-- deal with title images which have the image size specified like this (50x60)
sTitleImageHtml=$( \
  expand $1 | \
  sed -n "/^[ ]*Title[ ]*Image:[ ]*([0-9]\+x[0-9]\+)/ {s/^[ ]*//g; s/[ ]*$//g; s/^Title[ ]*Image:[ ]*(\([0-9]\+\)x\([0-9]\+\))[ ]*\(.*\)/<a href='index.html'><img src='\3' width='\1' height='\2' align='middle' border='0'><\/a>/g;p;q;}")
  
if [ "$sTitleImageHtml" = "" ]
then
  #-- deal with the cases where there is no image size spedified
  sTitleImageHtml=$( \
    expand $1 | \
    sed -n "/^[ ]*Title[ ]*Image:/ {s/^[ ]*//g; s/[ ]*$//g; s/^Title[ ]*Image:\(.*\)/<a href='index.html'><img src='\1'     align='middle' border='0'><\/a>/g;p;q;}")
fi


#-- The code below allows a wiki user to specify whether a page should have 
#-- the section headings numbered by inserting '{x}' after the page title

sSectionNumberFlag=$(echo $sRawPageTitle | sed "s/{.\?x.\?}//g")
if [ "$sRawPageTitle" = "$sSectionNumberFlag" ]
then
  bNumberSections="false"
else
  bNumberSections="true"
fi

#-- This determines if Section Headings in the body of the page should
#-- be made into capital case or not (using code {~} )
sCapitalCaseSectionFlag=$(echo $sRawPageTitle | sed "s/{.\?[~].\?}//g")
if [ "$sRawPageTitle" = "$sCapitalCaseSectionFlag" ]
then
  bCapitalCaseHeadings="false"
else
  bCapitalCaseHeadings="true"
fi

 #-- Whether a Section Heading table-of-contents is generated depends on
 #-- either a script parameter, or the '[]' in the page title
 if [ "$3" = "notoc" ]
 then
   bTableOfContents="false"
 else
   bTableOfContents="true"
 fi

 sTableOfContentsFlag=$(echo $sRawPageTitle | sed "s/\[.\?\]//g")
 if [ "$sRawPageTitle" = "$sTableOfContentsFlag" ]
 then
   bTableOfContents="false"
 else
   bTableOfContents="true"
 fi

 if [ "$2" = "notran" ]
 then
   bTranslationLinks="false"
 fi

 sCapitalCaseTOCFlag=$(echo $sRawPageTitle | sed "s/\[[~]\]//g")
 if [ "$sRawPageTitle" = "$sCapitalCaseTOCFlag" ]
 then
   bCapitalCaseTOC="false"
 else
   bCapitalCaseTOC="true"
 fi
 
 sTranslationLinksFlag=$(echo $sRawPageTitle | sed "s/(.\?.\?+.\?.\?)//g")
 if [ "$sRawPageTitle" != "$sTranslationLinksFlag" ]
 then
   bTranslationLinks="true"
 else
   bTranslationLinks="false"
 fi

 sOutputLanguageFlag=$(echo $sRawPageTitle | sed "s/(.\?[A-Z][A-Z].\?)//g")
 if [ "$sRawPageTitle" != "$sOutputLanguageFlag" ]
 then
   sOutputLanguageCode=$(echo $sRawPageTitle | sed "s/.*(.\?\([A-Z][A-Z]\).\?).*/\1/g")
 else
   sOutputLanguageCode="EN"
 fi

  case "$sOutputLanguageCode" in
    ES)	sOutputLanguage="spanish";;
    IT)	sOutputLanguage="italian";;
    EN)	sOutputLanguage="english";;
    CA)	sOutputLanguage="catalan";;
    AL)	sOutputLanguage="german";;
    FR)	sOutputLanguage="french";;
    PO)	sOutputLanguage="portuguese";;
  esac

 #-- for debugging
 if [ "b" = "a" ]
 then
   echo "<pre>"
   echo "sHeadingPattern=$sHeadingPattern"
   echo "sRawPageTitle=$sRawPageTitle"
   echo "bNumberSections=$bNumberSections"
   echo "bCapitalCaseHeadings=$bCapitalCaseHeadings"
   echo "bTableOfContents=$bTableOfContents"
   echo "bCapitalCaseTOC=$bCapitalCaseTOC"
   echo "bTranslationLinks=$bTranslationLinks"
   echo "sTitleImageHtml=$sTitleImageHtml"
   echo "sOutputLanguageCode=$sOutputLanguageCode"
   echo "sOutputLanguage=$sOutputLanguage"
   echo "</pre>"
 fi


 echo "<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 3.2//EN\">"
 echo "<html>"
 echo " <META HTTP-EQUIV=\"Content-Type\" CONTENT=\"text/html; CHARSET=iso-8859-1\">"
 echo " <META HTTP-EQUIV=\"Keywords\""
 echo "          CONTENT=\"$sPageTitle\">"
 echo " <META HTTP-EQUIV=\"Description\""
 echo "          CONTENT=\"$sPageTitle\">"
 #-- The linees below are to stop browsers and servers cacheing these HTML pages
 #-- which is important since they are editable
 echo " <META HTTP-EQUIV=\"expires\" CONTENT=\"0\">"
 echo " <META HTTP-EQUIV=\"Pragma\" CONTENT=\"no-cache\">"
 echo "        <LINK REV=\"made\" HREF=\"mailto:webmaster@ella-associates.org\">"
 echo ""
 
 echo "<!-- html generated by the \"$(basename $0)\" script         -->"
 echo "<!-- From the file: \"$1\"             -->"
 echo "<!-- On the date: $(date)              -->"
 echo "<!-- see http://www.ella-associates.org/utils/$(basename $0) -->"
 #echo "<link   rel = \"stylesheet\"  type = \"text/css\""
 #echo "       href = \"/stylesheets/somestylesheet.css\">"
 echo "<head><title>$sPageTitle</title>"
 echo "<script language = \"javascript\">"
 echo "
 <!--
  function redirectToGoogleTranslation(sSourceLanguage, sTargetLanguage)
  {
    var sTranslationUrl = 'http://translate.google.com/translate?u=';

    sTranslationUrl += escape(document.location.href);
    sTranslationUrl += '&langpair=' + sSourceLanguage + '|' + sTargetLanguage;
    sTranslationUrl += '&hl=' + sSourceLanguage;
    // document.testForm.test.value=sTranslationUrl;
    window.location = sTranslationUrl;
  } //-- redirectToGoogleTranslation()
 -->  "
 
 echo "</script>"
 echo '
  <style type="text/css"> 
    a
    {
      /* Pale Acqua */
      /* color: #CCFFFF; */
    }

    a.toc
    {
      /* text-transform: lowercase; */
      text-decoration: none;
      margin-left: 30%;
      background-image: http://www.ella-associates.org/poseidonia/images/section-images-circle-small.jpg
    }
    a.toc:link     { text-decoration:none; }
    a.toc:visited  { text-decoration:none; }
    a.toc:hover    { text-decoration:underline; }
    a.toc:active   { text-decoration:none; }
	
    BODY   
    {
      margin: 10px 10px;
      background-color:white; 
      /* background-color:#CCFFFF; Pale Acqua */
      /* background-color:#669966;  */
      /* #669966 Khaki */ 
    }
    
    H2  
    {
      margin-top:0px;  margin-bottom:0px;
      padding-top:0px; padding-bottom:0px;
      font-weight:normal; font-size;11pt;
    }

    H2.padded
    {
      margin-top:6px;  margin-bottom:0px;
      padding-top:6px; padding-bottom:0px;
      font-weight:normal; font-size;11pt;
    }

    H3 
    {
      margin-top:0px;  margin-bottom:0px;
      padding-top:0px; padding-bottom:0px;
      font-weight:normal;
      font-size;9pt;

    }

    .center 
    {
      text-align:center; 
    }

    .darkblue { color:#333399; background-color:white; }
    .red    { color:#CC0000; background-color:#FFCCCC; }
    
    pre.codebox 
    {
      margin-left:0em; margin-right:4em;
      margin-top:4px;  margin-bottom:4px;
      padding-top:3px; padding-bottom:3px;
      border:1px;      border-style:solid;   border-color:#006600;
      color:#3B3B3B;   background-color:#E9E9E9;
      font-family: "Courier New", Courier, monospace;
      font-size:10pt;
      white-space:pre;
    }

  </style>'

 
 echo "</head>"
 echo "<body onload=\"document.wikiform.DocumentText.value=document.wikiform.DocumentText.value\">"
 if [ "$bTranslationLinks" = "true" ]
 then
   echo "<center>"
   if [ "$sOutputLanguage" = "spanish" ]
   then
     echo "Vea este pagina en (aproximado):"
     echo "<a  href=\"javascript:redirectToGoogleTranslation('es', 'en');\">English</a>"
   elif [ "$sOutputLanguage" = "french" ]
   then
     echo "Voir la cette page dedans (approximatif):"
     echo "<a  href=\"javascript:redirectToGoogleTranslation('es', 'en');\">English</a>"
   elif [ "$sOutputLanguage" = "italian" ]
   then
     echo "Osservi questa pagina come (approssimativo):"
     echo "<a  href=\"javascript:redirectToGoogleTranslation('es', 'en');\">English</a>"
   else
     echo "See this page in (approximate):"
     echo "<a  href=\"javascript:redirectToGoogleTranslation('en', 'es');\">Espa&ntilde;ol</a>|"
     echo "<a  href=\"javascript:redirectToGoogleTranslation('en', 'fr');\">Fran&ccedil;ais</a>|"
     echo "<a  href=\"javascript:redirectToGoogleTranslation('en', 'it');\">Italiano</a>|"
     echo "<a  href=\"javascript:redirectToGoogleTranslation('en', 'de');\">Deutsch</a>|"
     echo "<a  href=\"javascript:redirectToGoogleTranslation('en', 'pt');\">Portugu&ecirc;s</a>"
   fi
   echo "</center>"
 fi

	
#-- Put the page heading before the table of contents
echo "<center><h1>"
if [ "$sTitleImageHtml" != "" ]
then
 echo "$sTitleImageHtml"
fi

echo "$sPageTitle</h1></center>"
#--echo '<center><strong><font size=+4>({*})</font></strong></center>'
#--


#echo '<table><tr><td>'

 if [ "$sOutputLanguage" = "spanish" ]
 then
  echo "<center><a href=\"#editForm\">[editar este documento]</a> </center>"
 elif [ "$sOutputLanguage" = "french" ]; then
  echo "<center><a href=\"#editForm\">[éditez ce document]</a> </center>"
 elif [ "$sOutputLanguage" = "italian" ]; then
  echo "<center><a href=\"#editForm\">[cambi questo documento]</a> </center>"
 elif [ "$sOutputLanguage" = "portuguese" ]; then
  echo "<center><a href=\"#editForm\">[edite este original]</a> </center>"
 elif [ "$sOutputLanguage" = "german" ]; then
  echo "<center><a href=\"#editForm\">[Redigieren Sie dieses Dokument]</a> </center>"
 else
  echo "<center><a href=\"#editForm\">[edit this web-page]</a> </center>"
 fi

#-- Insert the table of contents   
if [ "$bTableOfContents" = "true" ]
then


  #bNumberSections="false"
  if [ "$bCapitalCaseTOC" = "true" ]
  then
    sDoCapitalizeCommand="sed -f /var/www/utils/capital-case.sed"
  else
    sDoCapitalizeCommand="cat"
  fi
  
 #-- The section below creates the table of contents for the web-page.
 #-- This line is designed to only number lines which match a pattern
 #-- nl -bpPATTERN does this but it also double spaces the text file for some
 #-- reason. However this can be fixed
 #--
  if [ "$bNumberSections" = "true" ]
  then
   echo "<a name = \"toc\"></a>"
   expand $1 | \
     sed "/^$sHeadingPattern$/!d" | \
     sed -e "s/^[ ]*//g" -e "s/[ ]*$//g" | \
     #-- capital case the table of contents if it has been requested, if not, do nothing
     eval "$sDoCapitalizeCommand" | \
     nl -s" " | \
     sed "s/^[ ]*\([1-9][0-9]*\) /\1/g" | \
     sed "s/^\([0-9]\+\)\(.*\)$/<br><a href=\"#item\1\" class=\"toc\">\1. \2<\/a>/g" | \
     #-- line below because the RedHat server uses UTF-8 character set
     iconv --to-code=ISO-8859-1 --from-code=UTF-8 | \
     #-- Try to 'entitize' the accented characters
     sed -f /var/www/utils/iso2html.sed 

  else
   echo "<a name = \"toc\"></a>"
   expand $1 | \
     sed "/^$sHeadingPattern$/!d" | \
     sed -e "s/^[ ]*//g" -e "s/[ ]*$//g" | \
     #-- capital case the table of contents if it has been requested, if not, do nothing
     eval "$sDoCapitalizeCommand" | \
     nl -s" " | \
     sed "s/^[ ]*\([1-9][0-9]*\) /\1/g" | \
     sed "s/^\([0-9]\+\)\(.*\)$/<br><a href=\"#item\1\" class=\"toc\">\2<\/a>/g" | \
     #-- line below because the RedHat server uses UTF-8 character set
     iconv --to-code=ISO-8859-1 --from-code=UTF-8 | \
     #-- Try to 'entitize' the accented characters
     sed -f /var/www/utils/iso2html.sed 
  fi
fi

#-- The version of SED on RedHat linux does not like the syntax "\{,4\}" but "\{0,4\}"
#-- is ok.
#
# What follows below is quite tricky. The order of each of the sed transformation DOES matter
# The tricky bits are allowing for accented european characters, and converting back and forth
# between unicode and iso latin etc
#
# In the context of this 'wiki' script it is reasonably important to display the text 'prettily'
# so I am going to change the presentation of links etc. This allows the user to have
# more control over how the web page is displayed finally


 #-- This variable is not used anymore since all section heading code is governed by
 #-- the 'sHeadingPattern' variable
  sNumberingPattern='^[ A-ZÁÉÍÓÚÀÈÌÒÚÄËÏÖÜ·ÇÑ0-9.\/\\:]*[A-ZÁÉÍÓÚÀÈÌÒÚÄËÏÖÜÇÑ][A-ZÁÉÍÓÚÀÈÌÒÚÄËÏÖÜÇÑ][A-ZÁÉÍÓÚÀÈÌÒÚÄËÏÖÜÇÑ]+[ A-ZÁÉÍÓÚÀÈÌÒÚÄËÏÖÜ·ÇÑ0-9.\/\\:]*$'

 
 if [ "$bNumberSections" = "true" ]
 then
   if [ "$bTableOfContents" = "true" ]
   then
     sSectionHeadReplacement='<h3><a name=\"item\1\">\1. \2<\/a> <a href=\"#toc\">[toc]<\/a><\/h3>'
   else
     sSectionHeadReplacement='<h3><a name=\"item\1\">\1. \2<\/a> <\/h3>'
   fi
 else
   if [ "$bTableOfContents" = "true" ]
   then
     sSectionHeadReplacement='<h3><a name=\"item\1\">\2<\/a> <a href=\"#toc\">[toc]<\/a><\/h3>'
   else
     sSectionHeadReplacement='<h3><a name=\"item\1\">\2<\/a> <\/h3>'
   fi
 fi

  if [ "$bCapitalCaseHeadings" = "true" ]
  then
    sCapHeadingsCommand="sed -f /var/www/utils/capital-case-headings.sed"
  else
    sCapHeadingsCommand="cat"
  fi
  
  expand $1 | \
  sed "s/^[ ]*$//g" | \
  #-- Number all lines that are 'section headings', allow for european accented characters
  nl -s" " -bp"^$sHeadingPattern$" | \
  #-- Get rid of the 'blank' lines which nl puts into the output
  sed  "/^[ ]\+$/d" | \
  #-- Reformat the numbered section headings
  sed "s/^[ ]*\([1-9][0-9]*\) /\1/g" | \
  #-- Delete the page title because its already been output
  sed "/^[ ]*=[^=]/d" | \
  #-- Delete 'title image' lines because they have already served their purpose
  sed "/^[ ]*Title[ ]*Image:/d" | \
  #-- Encode special characters '<>&' as HTML entities
  sed -e "s/</\&lt;/g" -e "s/>/\&gt;/g" | \
  #-- Do a trick to get the '-->>' and '--<<' blocks of text to work
  sed -e "s/^[ ]*\-\-\&gt;\&gt;/<pre>/g" -e "s/^[ ]*\-\-\&lt;\&lt;/<\/pre>/g" | \
  #-- Make each 'section heading' into an HTML anchor to work with the 'Table of Contents'
  sed "s/^\([0-9]\{0,5\}\)\($sHeadingPattern\)$/$sSectionHeadReplacement/g" | \
  #-- If the section headings need to be 'capital cased', do so
  eval "$sCapHeadingsCommand" | \
  #-- line below because the RedHat server uses UTF-8 character set
  iconv --to-code=ISO-8859-1 --from-code=UTF-8 | \
  #-- Try to 'entitize' the accented characters
  sed -f /var/www/utils/iso2html.sed | \
  #-- Allow for 'ascii decorations'
  sed "/^\.\.[ ]/{s/[ ]*$//g; s/^\.\.[ ]*\(.*\)/<center><font size=+4>\1<\/font><\/center>/g;}" | \
  #-- Allow spanish section tags
  sed "s/IM[ÁA]GEN\-[ÍI]NDICE\-PRINCIPIO/IMAGE\-INDEX\-BEGIN/g" | \
  sed "s/IM[ÁA]GEN\-[ÍI]NDICE\-FINAL/IMAGE\-INDEX\-END/g" | \
  #-- Lets deal with image index things. We have to get a few lines into the pattern space
  #-- so that we can hyperlink the image and the first label line
  #-- First deal with lines that have a size specification for the image as in (50x50)
  sed "/\[IMAGE\-INDEX\-BEGIN\]/,/\[IMAGE\-INDEX\-END\]/ {/^[ ]*Image/{N;N;s/Image:[ ]*(\([0-9]\+\)x\([0-9]\+\))[ ]*\([^ \n]\{2,\}\)[ ]*\n[ ]*Link:[ ]*\([^ \n]*\)[ ]*\n\(.*\)/<a href='\4'><img src='\3' width='\1' height='\2' border='0' align='left'><\/a><a href='\4'>\5<\/a>/g;};}" | \
  #-- Now deal with sections with no image size specification
  sed "/\[IMAGE\-INDEX\-BEGIN\]/,/\[IMAGE\-INDEX\-END\]/ {/^[ ]*Image/{N;N;s/Image:[ ]*\([^ \n]\{2,\}\)[ ]*\n[ ]*Link:[ ]*\([^ \n]*\)[ ]*\n\(.*\)/<a href='\2'><img src='\1' border='0' align='left'><\/a><a href='\2'>\3<\/a>/g;};}" | \
  sed "/\[IMAGE\-INDEX\-BEGIN\]/,/\[IMAGE\-INDEX\-END\]/ {s/^[ ]*$/<p>/;}" | \
   #-- Lets deal with BAND-INDEX TAGS
   #-- When the images have a size specification, eg: Image: (60x80) image-file.jpg
   sed "/\[BAND-DETAIL-LIST-BEGIN\]/,/\[BAND-DETAIL-LIST-END\]/ {/^[ ]*Image/{s/Image:[ ]*(\([0-9]\+\)x\([0-9]\+\))[ ]*\([^ ]\{2,\}\)/<tr><td colspan='2'><img src='\3' width='\1' height='\2' border='0' align='left'><\/td><\tr>/g;};}" | \
   #-- When the images dont have a size specification, eg: Image: image-file.jpg
   sed "/\[BAND-DETAIL-LIST-BEGIN\]/,/\[BAND-DETAIL-LIST-END\]/ {/^[ ]*Image/{s/Image:[ ]*\([^ ]\{2,\}\)/<tr><td colspan='2'><img src='\1' border='0' align='left'><\/td><\/tr>/g;};}" | \
   #-- We need to deal with the 'ejemplos de música' lines which are a little bit tricky. Firstly
   #-- the actually links to the examples on the following lines after the 'ejemplos' line and presumably
   #-- can be as numerous as they like. From a SED perspective this involves doing a 'N' command and 
   #-- then checking if the line that has just been 'N'd contains a line in the format
   #--   "Some some name"  some/path/to/a/song/file.mp3
   #-- If the latest line does contain roughly this format then we need to get another line until
   #-- we run out. This is going to require something like the 't' command which does a conditional
   #-- jump based on whether a substitution was made or not. 
   #--
   #-- In order to find the exact details of how to do all this we need to go to
   #--   http://sed.sourceforge.net   as always to find the answers.
   #-- As usual the following gem was found in Eric Pements 'one-liners'
   #-- If a line begins with an '=' it is appended to the previous and transformed
   #--
   #-- sed -e :a -e '$!N;s/\n=/ /;ta' -e 'P;D'
   #-- This piece of SED magic actually works though I am not entirely sure why
#   sed -e :a -e "/\[BAND-DETAIL-LIST-BEGIN\]/,/\[BAND-DETAIL-LIST-END\]/ {/^[ ]*Ejemplos de m&uacute;sica \[+s\]/{N; s|\n[ ]*\"\([^\"]*\)\"[ ]*\([^ ]\+\)|<a href='\2'>\1</a> -|g;ta;};}" -e 'P;D' | \
   sed -e :a -e "/\[BAND-DETAIL-LIST-BEGIN\]/,/\[BAND-DETAIL-LIST-END\]/ {/^[ ]*Ejemplos de m&uacute;sica/{N; s|\n[ ]*\"\([^\"]*\)\"[ ]*\([^ ]\+\)|<a href='\2'>\1</a> -|g;ta;};}" -e 'P;D' | \
   #-- Turn name: value into table cells
   sed "/\[BAND-DETAIL-LIST-BEGIN\]/,/\[BAND-DETAIL-LIST-END\]/ {/<img/!s/^[ ]*\([^:]\+\):\(.*\)/<tr><td><strong>\1<\/strong><\/td><td>\2<\/td><\/tr>/g;}" | \
   #-- Divide different groups using the blank lines in the source text
   sed "/\[BAND-DETAIL-LIST-BEGIN\]/,/\[BAND-DETAIL-LIST-END\]/ {s|^[ ]*$|<tr><td colspan='2'>-</td></tr>|g;}" | \
   #-- Example of Format Below: [*] My Title|/my/path/to/file-no-extension|html|txt|xml|pdf|
   sed "/<pre>/,/<\/pre>/!{/^[ ]*\*.*|.*|.*/ s/^[ ]*\*[ ]*\([^|]*\)|\([^|]*\)|\([a-zA-Z]\{1,8\}\)|\([a-zA-Z]\{1,8\}\)|\([a-zA-Z]\{1,8\}\)|\([a-zA-Z]\{1,8\}\)|/<b>\1<\/b> (<em>Formats:<\/em> <a href='\2\.\3'>\3<\/a> | <a href='\2\.\4'>\4<\/a> | <a href='\2\.\5'>\5<\/a> | <a href='\2\.\6'>\6<\/a>)/gi;}" | \
   #-- Example of Format Below: * My Title|/my/path/to/file-no-extension|html|txt|pdf| but not in <pre>s
   sed "/<pre>/,/<\/pre>/!{/^[ ]*\*.*|.*|.*/ s/^[ ]*\*[ ]*\([^|]*\)|\([^|]*\)|\([a-zA-Z]\{1,8\}\)|\([a-zA-Z]\{1,8\}\)|\([a-zA-Z]\{1,8\}\)|/<b>\1<\/b> (<em>Formats:<\/em> <a href='\2\.\3'>\3<\/a> | <a href='\2\.\4'>\4<\/a> | <a href='\2\.\5'>\5<\/a>)/gi;}" | \
   #-- Example of Format Below: \[*\] My Title|/my/path/to/file-no-extension|pdf|html| but not in <pre>s
   sed "/<pre>/,/<\/pre>/!{/^[ ]*\*.*|.*|.*/ s/^[ ]*\*[ ]*\([^|]*\)|\([^|]*\)|\([a-zA-Z]\{1,8\}\)|\([a-zA-Z]\{1,8\}\)|/<b>\1<\/b> (<em>Formats:<\/em> <a href='\2\.\3'>\3<\/a> | <a href='\2\.\4'>\4<\/a>)/gi;}" | \
   #-- Example of Format Below: * My Title|/full/path/to/htmlfile|/full/path/to/text/file|/full/path/to/pdffile|
   #-- but not in <pre>s
   sed "/<pre>/,/<\/pre>/!{/^[ ]*\*.*|.*|.*/ s/^[ ]*\*[ ]*\([^|]*\)|\([^|]*\)|\([^|]*\)|\([^|]*\)|/<b>\1<\/b> (<em>Formats:<\/em> <a href='\2'>html<\/a> | <a href='\3'>text<\/a> | <a href='\4'>pdf<\/a>)/gi;}" | \
   #-- Example of Format Below: * My Title|/full/path/to/htmlfile|/full/path/to/text/file| but not in <pre>s
   sed "/<pre>/,/<\/pre>/!{/^[ ]*\*.*|.*|.*/ s/^[ ]*\*[ ]*\([^|]*\)|\([^|]*\)|\([^|]*\)|/<b>\1<\/b> (<em>Formats:<\/em> <a href='\2'>html<\/a> | <a href='\3'>text<\/a>)/gi;}" | \
   #-- Trick to make 'txt' links into 'text' links for readability
   sed "s/>txt<\/a>/>text<\/a>/gi" | \
   #-- Example of Format Below: * My Title|/full/path/to/any-old-file|
   sed "/<pre>/,/<\/pre>/!{/^[ ]*\*.*|.*|.*/ s/^[ ]*\*[ ]*\([^|]*\)|\([^|]*\)|/<b>\1<\/b>(<a href=\"\2\">\2<\/a>)/gi;}" | \
   #-- Example of Format Below: [*] /full/path/to/any-old-file
   sed "s/^[ ]*\[\*\][ ]*\([^ ]\{2,\}\)/<a href=\"\1\">\1<\/a>/gi" | \
   #-- Example of Format Below: * http://domain.org/resource.html
   sed "s/^[ ]*\*[ ]*\(http:\/\/[^ ]\{2,\}\)/<a href=\"\1\">\1<\/a>/gi" | \
  #-- Hyperlink urls with different display text like: "Some Display" http://blah.com
  sed "/<pre>/,/<\/pre>/!s/\"\([^\"]\{1,50\}\)\"[ ]\{0,4\}\(http:\/\/[-a-z:_\%0-9\~\\\/\"\'\.\&\@?=]\{3,\}\)/<a href=\2>\1<\/a>/gi" | \
  #-- Hyperlink relative URLs with different display text like: "Some Display Text" link://relative/link.html
  sed "/<pre>/,/<\/pre>/!s/\"\([^\"]\{1,50\}\)\"[ ]\{0,4\}link:\/\/\([-a-z:_\%0-9\~\\\/\"\'\.\&\@?=]\{3,\}\)/<a href=\2>\1<\/a>/gi" | \
  #-- Hyperlink URLs beginning with http, except between <pre> tags
  #-- The style immediately below is more 'academic'
  #sed "/<pre>/,/<\/pre>/!s/[^\">]\(http:\/\/[-a-z:_\%0-9\~\\\/\"\'\.\@\&?=]\{3,\}\)/<a href=\1>[*]<\/a><tt>\1<\/tt>/gi" | \
  sed "/<pre>/,/<\/pre>/!s/[^\">'=]\(http:\/\/[-a-z:_\%0-9\~\\\/\"\'\.\@]\{3,\}\)/<a href='\1'>\1<\/a>/gi" | \
  #-- Hyperlink URLs beginning with http at the beginning of lines, except between <pre> tags
  #sed "/<pre>/,/<\/pre>/!s/^\(http:\/\/[-a-z:_\%0-9\~\\\/\"\'\.\@]\{3,\}\)/<a href='\1'>[*]<\/a><tt>\1<\/tt>/gi" | \
  sed "/<pre>/,/<\/pre>/!s/^\(http:\/\/[-a-z:_\%0-9\~\\\/\"\'\.\@]\{3,\}\)/<a href='\1'>\1<\/a>/gi" | \
  #-- Hyperlink email addresses with a 'mailto:' link
  sed "/<pre>/,/<\/pre>/!s/\([^ ]\{2,\}@[^ \"']\{2,\}\)/<a href=\"mailto:\1\">\1<\/a>/g" | \
  #-- Hyperlink URLs beginnning with 'www.'
  #sed "/<pre>/,/<\/pre>/!s/[^a-zA-Z\/\">]\(www\.[-a-z:_\%0-9\~\\\/\"\'\.\@]\{2,\}\)/<a href='http:\/\/\1'>[*]<\/a><tt>\1<\/tt>/gi" | \
  sed "/<pre>/,/<\/pre>/!s/[^a-zA-Z\/\">]\(www\.[-a-z:_\%0-9\~\\\/\"\'\.\@]\{2,\}\)/<a href='http:\/\/\1'>\1<\/a>/gi" | \
  #-- Format comments added by web-users
   sed "s/^\([ ]*added[ ]\{0,4\}by:\)\([^,]\{1,\}\)\,[ ]*on[ ]*\(.*\)/<u><em>\1<\/em><tt> \2<\/tt><em> on \3<\/em><\/u>/gi" | \
  #-- Turn spaces into non-breaking-spaces unless they are between 'pre' tags
  sed "/<pre>/,/<\/pre>/!{/\[IMAGE-INDEX-BEGIN\]/,/\[IMAGE-INDEX-END\]/! s/[ ]\{2\}/\&nbsp;\&nbsp;/g;}" | \
  #-- Make paragraphs where there are blank lines
  #sed "/<pre>/,/<\/pre>/!s/^[ ]*$/<p>/g" | \
  #-- Make 'fake' headings
  sed "/<pre>/,/<\/pre>/!s/{{ /<strong><em>/g" | \
  sed "/<pre>/,/<\/pre>/!s/ }}/<\/em><\/strong>/g" | \
  sed "/<pre>/,/<\/pre>/!s/[ ]*==[ ]*\(.*\)/<font size=+2><strong><em>\1<\/em><\/strong><\/font>/g" | \
  #-- Turn line breaks into <br> tags unles they are between 'pre' tags. This isn't really
  #-- a good idea since you dont know the width of the target screen
  #sed "/<pre>/,/<\/pre>/!{/\[IMAGE-INDEX-BEGIN\]/,/\[IMAGE-INDEX-END\]/! s/^/<br>/g;}" 
  #sed "/<pre>/,/<\/pre>/!{/<h3>/!s/^/<br>/g;}" | \
  sed "/<pre>/,/<\/pre>/!{/\[BAND-DETAIL-LIST-BEGIN\]/,/\[BAND-DETAIL-LIST-END\]/!s/^/<br>/g;}" | \
  sed -e "s/\[BAND-DETAIL-LIST-BEGIN\]/<table>/g" -e "s/\[BAND-DETAIL-LIST-END\]/<\/table>/g" | \
  #-- Get rid of Image Index tags
  sed -e "s/\[IMAGE-INDEX-BEGIN\]//g" -e "s/\[IMAGE-INDEX-END\]//g"
#echo "</td></tr></table>"
  
 echo "<br>"

 #-- Define the cgi program which will handle the updating of the document 
 #-- according to the contents of the HTML textarea component
 if [ "$4" != "" ]
 then
   sProcessorUrl="$4"
 else
   #-- It would be possible to replace the Domain Name below with
   #-- an IP address, which would mean that the script would still
   #-- work even if the DNS configuration failed. I am not sure if this
   #-- is really a good idea or not.
   #sProcessorUrl="http://www.ella-associates.org/cgi-bin/edit-collab"
   sProcessorUrl="http://63.105.73.195/cgi-bin/edit-collab"
 fi
 #-- There is a problem in that I need to find the full path 
 #-- name of the $1 variable, but I dont know how to do this. This
 #-- is necessary because the target processor is not in the same
 #-- directory as the source document (the text file)
 #-- For the time being I have used the remedy of seeing if the path
 #-- is relative or absolute. The slightly dodgy path generating code below
 #-- appears to be working. There is almost certainly a much easier way 
 #-- of doing it

 sRelativePath=$(dirname $1)
 sFirstCharacter=$(echo $sRelativePath | sed "s/^\(.\).*$/\1/g")
 if [ "$sRelativePath" = "." ]
 then
   sFullPathName="$(pwd)/$1"
 elif [ "$sFirstCharacter" = "." ]
 then
   sFullPathName="$(pwd)/$1"
 elif [ "$sFirstCharacter" = "/" ]
 then
   sFullPathName="$1"
 else
   sFullPathName="$(pwd)/$1"
 fi
 # echo $sFullPathName
 
 echo "
    <form action = \"$sProcessorUrl\" 
          method = \"post\"
	  name   = \"wikiform\">
    <input  name = \"filename\" 
            type = \"hidden\"
           value = \"$sFullPathName\">
    <input  name = \"documenttype\" 
            type = \"hidden\"
           value = \"collab\">
      
    <hr><a name = \"editForm\"></a>
    <center>
    <br>
    <em><img src = \"http://poseidonia.ella-associates.org/images/uncle-sam.gif\">"

 if [ "$sOutputLanguage" = "spanish" ]
 then
   echo " 
      Usted, si USTED, puede cambiar este documento. Edita el texto por el cajillo abajo y haz clic
      en el boton. Gracias por su ayuda."
 elif [ "$sOutputLanguage" = "french" ]
 then
   echo "
      Vous, oui vous, pouvez éditer cette document.  Changez le texte dans la boîte à textes
      ci-dessous et cliquetez le bouton que dit 'gardez les changements'"
 else
   echo " 
      You, yes you, can edit this web-page. Change the text in the text-box below and click
      on the button which says something like 'save the changes'"
 fi

 echo "</em><br><br>"
 
 if [ "$sOutputLanguage" = "spanish" ]
 then
   echo " <strong>El texto del documento</strong><br>"
 elif [ "$sOutputLanguage" = "french" ]
 then
   echo " <strong>Le texte du document</strong><br>"
 elif [ "$sOutputLanguage" = "italian" ]
 then
   echo " <strong>Il testo del documento</strong><br>"
 else
   echo " <strong>The text of the document</strong><br>"
 fi

 echo "
    <textarea name = \"DocumentText\"  cols = \"80\" 
              rows = \"10\">"
 expand $1 | \
  iconv --to-code=ISO-8859-1 --from-code=UTF-8 
  
 echo "</textarea><br><br>"

 if [ "$sOutputLanguage" = "spanish" ]
 then
   echo "<input   type = \"submit\"  value = \"G U A R D A R   L O S   C A M B I O S\">"
 elif [ "$sOutputLanguage" = "french" ]
 then
   echo "<input   type = \"submit\"  value = \"G A R D E Z   L E S   C H A N G E M E N T S\">"
 elif [ "$sOutputLanguage" = "italian" ]
 then
   echo "<input   type = \"submit\"  value = \"M A N T E N G A   I   C A M B I A M E N T I\">"
 else
   echo "<input   type = \"submit\"  value = \"S A V E   Y O U R   C H A N G E S\">"
 fi

 echo "</center></form>"
      
 if [ "$bTranslationLinks" = "true" ]
 then
   echo "<center>"
   if [ "$sOutputLanguage" = "spanish" ]
   then
     echo "Vea este pagina en (aproximado):"
     echo "<a  href=\"javascript:redirectToGoogleTranslation('es', 'en');\">English</a>"
   elif [ "$sOutputLanguage" = "french" ]
   then
     echo "Voir la cette page dedans (approximatif):"
     echo "<a  href=\"javascript:redirectToGoogleTranslation('es', 'en');\">English</a>"
   elif [ "$sOutputLanguage" = "italian" ]
   then
     echo "Osservi questa pagina come (approssimativo):"
     echo "<a  href=\"javascript:redirectToGoogleTranslation('es', 'en');\">English</a>"
   else
     echo "See this page in (approximate):"
     echo "<a  href=\"javascript:redirectToGoogleTranslation('en', 'es');\">Espa&ntilde;ol</a>|"
     echo "<a  href=\"javascript:redirectToGoogleTranslation('en', 'fr');\">Fran&ccedil;ais</a>|"
     echo "<a  href=\"javascript:redirectToGoogleTranslation('en', 'it');\">Italiano</a>|"
     echo "<a  href=\"javascript:redirectToGoogleTranslation('en', 'de');\">Deutsch</a>|"
     echo "<a  href=\"javascript:redirectToGoogleTranslation('en', 'pt');\">Portugu&ecirc;s</a>"
   fi
   echo "</center>"
 fi

 echo "</body>"
 echo "</html>"

 #rm -f $1.temp
 #rm -f plain-text-toc.temp