# Description:
#   A script to reformat a plain text file document which contains no
#   particular format. The text file stores the content of a web page which is
#   currently at http://poseidonia.ella-associates.org/ . This script is a
#   direct derivation of 'plaintext2html-forum.sh' This current script is
#   designed specifically for a particular webpage or text document.
#
#   The script will work in conjuction with the cgi script '?' to allow the
#   visitor to the web page to edit the page.
#
#   The script also generates an HTML form which allows the reader to edit the text
#   of the document The script recognises some 'cues' within the plain text document.
#   I refer to these cues or 'structures' as 'Invisible Markup Language' (IML) or Mas
#   o Menos Markup Language (MMML). The basic ideas is to have as little actual
#   'markup' in the text document as possible, and the markup which is present should
#   'look good' in the plain text file. So,
#   instead of using, say, 
#      %^* Section Heading                   
#    which is valid markup but looks ugly in the text file, we use all capitals
#    which looks better in the text file
#   
#   A line beginning with = is a page title.
#   A line beginning with '*' will be hyperlinked.
#   URLs get automatically hyperlinked in some non-determinate way.  All
#   Capitals lines are section headings. These section headings may then be
#   used as a table of contents and hyperlinked in various ways
#
#   This script, like the linkdoc2html.sh script also accepts the format
#     * Document Title|Html-Url-Or-Path|Text-Url-Or-Path|
#   The script will render this into an emphasised 'document title' with
#   hyper-links to the different formats for the document.
#
#   Blocks of text surrounded by '-->>' and '--<<' are not 'formatted' in any way
#
#   The script also formats lines starting in 'added by:' to make those lines
#   stand out from the rest of the text. This is a 'courtesy' to the
#   '/cgi-bin/add-comment' script which added this line to a text file when it
#   inserts a user provided comment in the text file.
#
# Examples:
#   If the scripts are on the system 'path' the the leading './' characters
#   below are not necessary
#
#   ./poseidontext2html-forum.sh concert-details.txt notran > concert-details.html
#     This command line, executed in some kind of a bash shell, will transform
#     a plain text file which isn't is any particular format, into an HTML file
#     (that is it will create a new HTML file and leave the original text file
#     unchanged) and will not display the automatic translation links to
#     Google. Also an HTML table of contents (with one entry for each heading,
#     if there are headings) will be inserted in the HTML document.
#
#   ./poseidontext2html-forum.sh mjb-work.txt notran notoc > mjb-work.html
#     The text file will be transformed into HTML but no table of contents will
#     be inserted nor any translation links.
#
#   ./poseidontext2html-forum.sh mjb-work.txt tran notoc > mjb-work.html
#     If translation links are desired but no table of contents, use a command
#     line similar to above. The string 'blah' could be anything as long as its
#     not 'notran'. This slighty dodgy 'feature' is owing to the fact that I am
#     not using any 'getopt' style option parsing.
#
#   ./poseidontext2html-forum.sh stuff.txt notran toc "http://63.105.73.195/cgi-bin/some-weird-script"
#     This transforms the file stuff.txt omitting translation links, inserting
#     a hyperlinked table of contents, and setting the target for the 'edit
#     document' form to the URL specified in the last parameter.
#
#
# Parameters:
#   textFileName  [required]
#     The name of the text file which is to be transformed from text into html
#   notran        [optional]
#     If the second parameter is the string 'notran' then the javascript links
#     to the google automatic language translation engine will NOT be inserted
#     into the HTML page. This is useful, for example, when the HTML page is
#     going to be located within a 'password-protected' directory, because the
#     Google translation engine will not be able to access the page, and
#     therefor the translation links will not work.
#   notoc         [optional]
#     If the third parameter is the string "notoc", then no HTML table of
#     contents will be generated.
#   forumProcessorUrl           [optional]
#     This parameter indicates where the processing script is located.  If it
#     is omitted, currently the url will default to
#     http://www.ella-associates.org/cgi-bin/add-comment
#   output-language [optional] {Not implemented}
#     This is the language in which the message on the generated HTML page
#     will appear. For example messages next to the comment boxes and the 
#     translation links.
#   noforum {Not implemented}
#     If this parameter is present no HTML form will be produced in the ouput
#     and therefor the web-visitor will not be able to add comments to the 
#     pages.
#   path-to-style-sheet [optional] {Not implemented}
#     Still to implement
#     This is the full path (relative to the Web Server Document Root)
#     to the style sheet which is to be used by the generated HTML page
#     
#
# Notes:
#   The only difference between this script and the 'poseidontext2html-wiki.sh'
#   is that that script does not have the 'editing form or box' on the same
#   HTML page as the rendered text
#
#   Because of the table used to create a left margin for the table of contents
#   and for the body of the text, this HTML is NOT friendly to 'lynx' which
#   does not support HTML tables. A CSS style-sheet command should be used
#   instead of the tables.
#
#   This script should also transform quotes into &quot; & into &amp; etc The
#   script appears to be working reasonably well in conjunction with the
#   'edit-poseidon-forum' cgi script.
#
#   It would be nice to make some kind of 'sub' table of contents for any
#   comments which are present in a document.
#   
#   The translation links wont work from within the 'output' generated 
#   by the 'add-comment' script
#
#   This script has had problems with 'gawk' and different versions of awk. For
#   this reason the 'gawk' or 'awk' code has been removed and replaced with
#   code using the 'nl' program. This program, when used with the -bp option
#   double spaces the object file with lines containing only spaces. Therefore
#   some extra 'sed' lines are necessary to remove these blank lines
#
# See Also:
#   edit-poseidon-forum
#     This is the cgi script which can work in conjunction with the current script
#   poseidontext2html-wiki.sh
#     A very similar script
#   diary2html.sh, 
#     Turns a 'diary' style text file into HTML
#   linkdoc2html.sh,
#     Turns a text file which has a list of URL links and descriptions into HTML
#   linkdoc2html-index.sh
#     As above but also adds an HTML 'table of contents' for possible 'section headings'
#   linkdoc2html-forum.sh
#     Turns a text file with a URL list into an HTML file which has the capability
#     to be contributed to by a web-visitor (using cgi-scripts)
#   plaintext2pdf.sh,
#     Turns a text file into a pdf file with an optional table of contents
#   plaintext2html-simple.sh
#     As below, but doesn't use certain 'bash' tricks
#   plaintext2html.sh
#     Turns a text file with possible section headings and urls into an HTML file
#   glossary2xml.sh
#     Turn a text file which is a sort of 'glossary' into a dodgy xml file
#   alphabetize-glossary.sh
#     Re-arranges a text file which contains a series of definitions of 'items' or 'terms'
#     so that the items are ordered alphabetically.
#   add-comment
#     a cgi-script which can be used in conjuction with some of the 
#     scripts above to add content specified by web-visitors to a web page
#   script-summary.txt
#     contains more short descriptions of scripts and what they do.
# Author:
#   m.j.bishop
#
# Bugs and Ideas
#   See the file linkdoc2html-forum.sh for the beginnings of an attempt to internationalize
#   the output of this script, in the sense that the messages which appear on the 
#   HTML page should be capable of being in various languages, depending on what language
#   the source file is in.
#
#   Add an output-language parameter to this script
#   Also, it would be good to add a 'style-sheet' parameter which would allow
#   this script to change the name or location of the style-sheet which is used
#   by the generated HTML file.
#
#   In Netscape Navigator 4.61, if the style-sheet does not exist at all
#   then the browser is unable to display anything at all. 
#
#   The script could also check if there are translations of the current 
#   HTML or text file, using the standard naming convention of name.file-type.language-code
#   An example of this naming convention is  stuff.html.es  which should
#   be an HTML file which contains Spanish language content. This present
#   script could check for files which have the same name as the source
#   file but which have a different language code extension, and could 
#   therefore automatically add a link to the translated file (in addition,
#   perhaps to the Google translation links). The script would only
#   check in the current directory for these 'translated' files.
#
# Dependencies:
#   iso2html.sed
#   The images used on the 'poseidon site'
#   various Unix tools, a Bash shell
 
 if [ "$1" = "" ]
 then
   echo "usage: $0  textFileName [notran] [notoc] [forum-processor-url] [noforum]"
   echo "PRESS q TO EXIT THIS HELP. PRESS [space-bar] TO SCROLL DOWN, b to SCROLL UP"
   cat $0 | sed -n "/^[ ]*#/p" | less
   exit 1;
 fi

 #-- The section below creates the table of contents for the diary.
 #-- This line is designed to only number lines which match a pattern
 #-- In theory 'nl -bpPATTERN' should also do this, but it insisted on
 #-- 'double-spacing' the output
 #-- Also the expressions below try and get rid of things like "can't" and "won't"
 #-- because I want to apply some formatting to the content of quotes, and these
 #-- things will get in my way.

 #-- This is the pattern which determines what sort of lines will
 #-- be interpreted as 'section headings'. I cannot use the for the 'awk' line
 #-- because awk does not seem to accept the notation \{n,\}
 
 sHeadingPattern='[ A-Z0-9.\/\\:]*[A-ZÁÉÍÓÚÀÈÌÒÚÄËÏÖÜ·ÇÑ][A-ZÁÉÍÓÚÀÈÌÒÚÄËÏÖÜ·ÇÑ][A-ZÁÉÍÓÚÀÈÌÒÚÄËÏÖÜ·ÇÑ][A-ZÁÉÍÓÚÀÈÌÒÚÄËÏÖÜ·ÇÑ]*[ A-ZÁÉÍÓÚÀÈÌÒÚÄËÏÖÜ·ÇÑ0-9.\/\\:]*'
 
 sOutputLanguage="english"
 sRawPageTitle=""
 sPageTitle=""
 bTableOfContents="true"
 bTranslationLinks=""
 
sRawPageTitle=$(expand $1 | sed -n "/^[ ]*=[^=]/{s/^[ ]*=[ ]*//g;s/[ ]*$//g;p;q;}")
sPageTitle=$(\
  echo $sRawPageTitle | \
  sed -e "s/{.\?}//g" -e "s/\[.\?.\?\]//g" -e "s/(+)//g" -e "s/(l)//g" | \
  sed -e "s/</\&lt;/g" -e "s/>/\&gt;/g" | \
  iconv --to-code=ISO-8859-1 --from-code=UTF-8 | \
  sed -f /var/www/utils/iso2html.sed)

#-- The code below allows a wiki user to specify whether a page should have 
#-- the section headings numbered by inserting '{}' after the page title

sSectionNumberFlag=$(echo $sRawPageTitle | sed "s/{.\?}//g")
if [ "$sRawPageTitle" = "$sSectionNumberFlag" ]
then
  bNumberSections="false"
else
  bNumberSections="true"
fi

sCapitalCaseSectionFlag=$(echo $sRawPageTitle | sed "s/(l)//g")
if [ "$sRawPageTitle" = "$sCapitalCaseSectionFlag" ]
then
  bCapitalCaseHeadings="false"
else
  bCapitalCaseHeadings="true"
fi

 #-- Whether a Section Heading table-of-contents is generated depends on
 #-- either a script parameter, or the '[]' in the page title
 if [ "$3" = "notoc" ]
 then
   bTableOfContents="false"
 else
   bTableOfContents="true"
 fi

 sTableOfContentsFlag=$(echo $sRawPageTitle | sed "s/\[.\?\]//g")
 if [ "$sRawPageTitle" = "$sTableOfContentsFlag" ]
 then
   bTableOfContents="false"
 else
   bTableOfContents="true"
 fi

 if [ "$2" = "notran" ]
 then
   bTranslationLinks="false"
 fi

 sCapitalCaseTOCFlag=$(echo $sRawPageTitle | sed "s/\[[~]\]//g")
 if [ "$sRawPageTitle" = "$sCapitalCaseTOCFlag" ]
 then
   bCapitalCaseTOC="false"
 else
   bCapitalCaseTOC="true"
 fi
 
 sTranslationLinksFlag=$(echo $sRawPageTitle | sed "s/(+)//g")
 if [ "$sRawPageTitle" != "$sTranslationLinksFlag" ]
 then
   bTranslationLinks="true"
 else
   bTranslationLinks="false"
 fi

 if [ "a" = "b" ]
 then
   echo "sRawPageTitle=$sRawPageTitle"
   echo "bNumberSections=$bNumberSections"
   echo "bCapitalCaseHeadings=$bCapitalCaseHeadings"
   echo "bTableOfContents=$bTableOfContents"
   echo "bCapitalCaseTOC=$bCapitalCaseTOC"
   echo "bTranslationLinks=$bTranslationLinks"

 fi


 echo "<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 3.2//EN\">"
 echo "<html>"
 echo " <META HTTP-EQUIV=\"Content-Type\" CONTENT=\"text/html; CHARSET=iso-8859-1\">"
 echo " <META HTTP-EQUIV=\"Keywords\""
 echo "          CONTENT=\"$sPageTitle\">"
 echo " <META HTTP-EQUIV=\"Description\""
 echo "          CONTENT=\"$sPageTitle\">"
 echo "        <LINK REV=\"made\" HREF=\"mailto:webmaster@ella-associates.org\">"
 echo ""
 echo "<!-- html generated by the \"$(basename $0)\" script         -->"
 echo "<!-- From the file: \"$1\"             -->"
 echo "<!-- On the date: $(date)              -->"
 echo "<!-- see http://www.ella-associates.org/utils/$(basename $0) -->"
 #echo "<link   rel = \"stylesheet\"  type = \"text/css\""
 #echo "       href = \"/stylesheets/somestylesheet.css\">"
 echo "<head><title>$sPageTitle</title>"
 echo "<script language = \"javascript\">"
 echo "
 <!--
  function redirectToGoogleTranslation(sSourceLanguage, sTargetLanguage)
  {
    var sTranslationUrl = 'http://translate.google.com/translate?u=';

    sTranslationUrl += escape(document.location.href);
    sTranslationUrl += '&langpair=' + sSourceLanguage + '|' + sTargetLanguage;
    sTranslationUrl += '&hl=' + sSourceLanguage;
    // document.testForm.test.value=sTranslationUrl;
    window.location = sTranslationUrl;
  } //-- redirectToGoogleTranslation()
 -->  "
 
 echo "</script>"
 echo '
  <style type="text/css"> 
    a
    {
      /* Pale Acqua */
      /* color: #CCFFFF; */
    }

    a.toc
    {
      /* text-transform: lowercase; */
      text-decoration: none;
      margin-left: 30%;
      background-image: http://www.ella-associates.org/poseidonia/images/section-images-circle-small.jpg
    }
    a.toc:link     { text-decoration:none; }
    a.toc:visited  { text-decoration:none; }
    a.toc:hover    { text-decoration:underline; }
    a.toc:active   { text-decoration:none; }
	
    BODY   
    {
      margin: 10px 10px;
      background-color:white; 
      /* background-color:#CCFFFF; Pale Acqua */
      /* background-color:#669966;  */
      /* #669966 Khaki */ 
    }
    
    H2  
    {
      margin-top:0px;  margin-bottom:0px;
      padding-top:0px; padding-bottom:0px;
      font-weight:normal; font-size;11pt;
    }

    H2.padded
    {
      margin-top:6px;  margin-bottom:0px;
      padding-top:6px; padding-bottom:0px;
      font-weight:normal; font-size;11pt;
    }

    H3 
    {
      margin-top:0px;  margin-bottom:0px;
      padding-top:0px; padding-bottom:0px;
      font-weight:normal;
      font-size;9pt;

    }

    .center 
    {
      text-align:center; 
    }

    .darkblue { color:#333399; background-color:white; }
    .red    { color:#CC0000; background-color:#FFCCCC; }
    
    pre.codebox 
    {
      margin-left:0em; margin-right:4em;
      margin-top:4px;  margin-bottom:4px;
      padding-top:3px; padding-bottom:3px;
      border:1px;      border-style:solid;   border-color:#006600;
      color:#3B3B3B;   background-color:#E9E9E9;
      font-family: "Courier New", Courier, monospace;
      font-size:10pt;
      white-space:pre;
    }

  </style>'

 
 echo "</head>"
 echo "<body>"
 if [ "$bTranslationLinks" = "true" ]
 then
   echo "<center>"
   if [ "$sOutputLanguage" = "spanish" ]
   then
     echo "Vea este pagina en (aproximado):"
     echo "<a  href=\"javascript:redirectToGoogleTranslation('es', 'en');\">English</a>"
   elif [ "$sOutputLanguage" = "french" ]
   then
     echo "Voir la cette page dedans (approximatif):"
     echo "<a  href=\"javascript:redirectToGoogleTranslation('es', 'en');\">English</a>"
   elif [ "$sOutputLanguage" = "italian" ]
   then
     echo "Osservi questa pagina come (approssimativo):"
     echo "<a  href=\"javascript:redirectToGoogleTranslation('es', 'en');\">English</a>"
   else
     echo "See this page in (approximate):"
     echo "<a  href=\"javascript:redirectToGoogleTranslation('en', 'es');\">Espa&ntilde;ol</a>|"
     echo "<a  href=\"javascript:redirectToGoogleTranslation('en', 'fr');\">Fran&ccedil;ais</a>|"
     echo "<a  href=\"javascript:redirectToGoogleTranslation('en', 'it');\">Italiano</a>|"
     echo "<a  href=\"javascript:redirectToGoogleTranslation('en', 'de');\">Deutsch</a>|"
     echo "<a  href=\"javascript:redirectToGoogleTranslation('en', 'pt');\">Portugu&ecirc;s</a>"
   fi
   echo "</center>"
 fi

	
#-- Put the page heading before the table of contents
echo "<center><h1>$sPageTitle</h1></center>"
echo '<center><strong><font size=+4>({*})</font></strong></center>'
#--


echo '<table><tr><td>'

 if [ "$sOutputLanguage" = "spanish" ]
 then
  echo "<center><a href=\"#editForm\">[editar este documento]</a> </center>"
 elif [ "$sOutputLanguage" = "french" ]; then
  echo "<center><a href=\"#editForm\">[éditez ce document]</a> </center>"
 elif [ "$sOutputLanguage" = "italian" ]; then
  echo "<center><a href=\"#editForm\">[cambi questo documento]</a> </center>"
 elif [ "$sOutputLanguage" = "portuguese" ]; then
  echo "<center><a href=\"#editForm\">[edite este original]</a> </center>"
 elif [ "$sOutputLanguage" = "german" ]; then
  echo "<center><a href=\"#editForm\">[Redigieren Sie dieses Dokument]</a> </center>"
 else
  echo "<center><a href=\"#editForm\">[edit this web-page]</a> </center>"
 fi

#-- Insert the table of contents   
if [ "$bTableOfContents" = "true" ]
then

 #-- This is the old but un-necessary 'nl' line. Since all non section heading
 #-- lines are removes any way we dont need to use a regular expression pattern
 #nl -s" " -bp'^[ A-ZÁÉÍÓÚÀÈÌÒÚÄËÏÖÜ·ÇÑ0-9.\/\\:]*[A-ZÁÉÍÓÚÀÈÌÒÚÄËÏÖÜÇÑ][A-ZÁÉÍÓÚÀÈÌÒÚÄËÏÖÜÇÑ][A-ZÁÉÍÓÚÀÈÌÒÚÄËÏÖÜÇÑ]+[ A-ZÁÉÍÓÚÀÈÌÒÚÄËÏÖÜ·ÇÑ0-9.\/\\:]*$' | \

  #bNumberSections="false"
  if [ "$bCapitalCaseTOC" = "true" ]
  then
    sDoCapitalizeCommand="sed -f /var/www/utils/capital-case.sed"
  else
    sDoCapitalizeCommand="cat"
  fi
  
  if [ "$bNumberSections" = "true" ]
  then
   echo "<a name = \"toc\"></a>"
   expand $1 | \
     sed "/^$sHeadingPattern$/!d" | \
     sed -e "s/^[ ]*//g" -e "s/[ ]*$//g" | \
     nl -s" " | \
     sed "s/^[ ]*\([1-9][0-9]*\) /\1/g" | \
     sed "s/^\([0-9]\+\)\(.*\)$/<br><a href=\"#item\1\" class=\"toc\">\1. \2<\/a>/g" | \
     #-- capital case the table of contents if it has been requested, if not, do nothing
     eval "$sDoCapitalizeCommand" | \
     #-- line below because the RedHat server uses UTF-8 character set
     iconv --to-code=ISO-8859-1 --from-code=UTF-8 | \
     #-- Try to 'entitize' the accented characters
     sed -f /var/www/utils/iso2html.sed 

  else
   echo "<a name = \"toc\"></a>"
   expand $1 | \
     sed "/^$sHeadingPattern$/!d" | \
     sed -e "s/^[ ]*//g" -e "s/[ ]*$//g" | \
     nl -s" " | \
     sed "s/^[ ]*\([1-9][0-9]*\) /\1/g" | \
     sed "s/^\([0-9]\+\)\(.*\)$/<br><a href=\"#item\1\" class=\"toc\">\2<\/a>/g" | \
     #-- capital case the table of contents if it has been requested, if not, do nothing
     eval "$sDoCapitalizeCommand" | \
     #-- line below because the RedHat server uses UTF-8 character set
     iconv --to-code=ISO-8859-1 --from-code=UTF-8 | \
     #-- Try to 'entitize' the accented characters
     sed -f /var/www/utils/iso2html.sed 
  fi
fi

#-- Transform the text to HTML, insert anchors
#-- Also delete the heading line which has already been inserted in the HTML
#-- But, the line will also delete lines beginning in == or === etc, which
#-- may not be desirable.
#-- The line below was designed to make the contents of quotes look different
#-- but I think that it made the text less readable
#--
#--  sed "s/\(['\"]\)[^'\"]\{1,\}\1/<tt>&<\/tt>/g" | \
#--
#-- I have disabled the line which turns * beginning lines into hyperlinks
#-- since this was not desirable for the netbeans documentation
#-- The version of SED on RedHat linux does not like the syntax "\{,4\}" but "\{0,4\}"
#-- is ok.
#
# What follows below is quite tricky. The order of each of the sed transformation DOES matter
# The tricky bits are allowing for accented european characters, and converting back and forth
# between unicode and iso latin etc
#
# In the context of this 'wiki' script it is reasonably important to display the text 'prettily'
# so I am going to change the presentation of links etc. This allows the user to have
# more control over how the web page is displayed finally


  sNumberingPattern='^[ A-ZÁÉÍÓÚÀÈÌÒÚÄËÏÖÜ·ÇÑ0-9.\/\\:]*[A-ZÁÉÍÓÚÀÈÌÒÚÄËÏÖÜÇÑ][A-ZÁÉÍÓÚÀÈÌÒÚÄËÏÖÜÇÑ][A-ZÁÉÍÓÚÀÈÌÒÚÄËÏÖÜÇÑ]+[ A-ZÁÉÍÓÚÀÈÌÒÚÄËÏÖÜ·ÇÑ0-9.\/\\:]*$'

 if [ "$bNumberSections" = "true" ]
 then
   if [ "$bTableOfContents" = "true" ]
   then
     sSectionHeadReplacement='<strong><a name=\"item\1\">\1. \2<\/a> <a href=\"#toc\">[toc]<\/a><\/strong>'
   else
     sSectionHeadReplacement='<strong><a name=\"item\1\">\1. \2<\/a> <\/strong>'
   fi
 else
   if [ "$bTableOfContents" = "true" ]
   then
     sSectionHeadReplacement='<strong><a name=\"item\1\">\2<\/a> <a href=\"#toc\">[toc]<\/a><\/strong>'
   else
     sSectionHeadReplacement='<strong><a name=\"item\1\">\2<\/a> <\/strong>'
   fi
 fi

  if [ "$bCapitalCaseHeadings" = "true" ]
  then
    sCapHeadingsCommand="sed -f /var/www/utils/capital-case-headings.sed"
  else
    sCapHeadingsCommand="cat"
  fi
  
  expand $1 | \
  sed "s/^[ ]*$//g" | \
  #-- Number all lines that are 'section headings', allow for european accented characters
  nl -s" " -bp"$sNumberingPattern" | \
  #-- Get rid of the 'blank' lines which nl puts into the output
  sed  "/^[ ]\+$/d" | \
  #-- Reformat the numbered section headings
  sed "s/^[ ]*\([1-9][0-9]*\) /\1/g" | \
  #-- Delete the page title because its already been output
  sed "/^[ ]*=[^=]/d" | \
  #-- Encode special characters '<>&' as HTML entities
  sed -e "s/</\&lt;/g" -e "s/>/\&gt;/g" | \
  #-- Do a trick to get the '-->>' and '--<<' blocks of text to work
  sed -e "s/^[ ]*\-\-\&gt;\&gt;/<pre>/g" -e "s/^[ ]*\-\-\&lt;\&lt;/<\/pre>/g" | \
  #-- Make each 'section heading' into an HTML anchor to work with the 'Table of Contents'
  sed "s/^\([0-9]\{1,\}\)\([ A-ZÁÉÍÓÚÀÈÌÒÚÄËÏÖÜ·ÇÑ0-9.\/\\:]*[A-ZÁÉÍÓÚÀÈÌÒÚÄËÏÖÜÇÑ]\{3,\}[ A-ZÁÉÍÓÚÀÈÌÒÚÄËÏÖÜ·ÇÑ0-9.\/\\:]*\)$/$sSectionHeadReplacement/g" | \
  #-- If the section headings need to be 'capital cased', do so
  #eval "$sCapHeadingsCommand" | \
  #-- line below because the RedHat server uses UTF-8 character set
  iconv --to-code=ISO-8859-1 --from-code=UTF-8 | \
  #-- Try to 'entitize' the accented characters
  sed -f /var/www/utils/iso2html.sed | \
  #-- Allow spanish section tags
  sed "s/IM[ÁA]GEN\-[ÍI]NDICE\-PRINCIPIO/IMAGE\-INDEX\-BEGIN/g" | \
  sed "s/IM[ÁA]GEN\-[ÍI]NDICE\-FINAL/IMAGE\-INDEX\-END/g" | \
  #-- Lets deal with image index things. We have to get a few lines into the pattern space
  #-- so that we can hyperlink the image and the first label line
#  sed "/\[IMAGE\-INDEX\-BEGIN\]/,/\[IMAGE\-INDEX\-END\]/ {N;N;s/Image:[ ]*\([^ \n]\{2,\}\)[ ]*\n[ ]*Link:[ ]*\([^ \n]\+\)[ ]*\n\(.*\)/<a href='\2'><img src='\1' border='0' align='left'><\/a><a href='\2'>\3<\/a>/g;}" | \
  sed "/\[IMAGE\-INDEX\-BEGIN\]/,/\[IMAGE\-INDEX\-END\]/ {/^[ ]*Image/{N;N;s/Image:[ ]*\([^ \n]\{2,\}\)[ ]*\n[ ]*Link:[ ]*\([^ \n]*\)[ ]*\n\(.*\)/<a href='\2'><img src='\1' border='0' align='left'><\/a><a href='\2'>\3<\/a>/g;};}" | \
  sed "/\[IMAGE\-INDEX\-BEGIN\]/,/\[IMAGE\-INDEX\-END\]/ {s/^[ ]*$/<p>/;}" | \
   #-- Example of Format Below: [*] My Title|/my/path/to/file-no-extension|html|txt|xml|pdf|
   sed "/^[ ]*\*.*|.*|.*/ s/^[ ]*\*[ ]*\([^|]*\)|\([^|]*\)|\([a-zA-Z]\{1,8\}\)|\([a-zA-Z]\{1,8\}\)|\([a-zA-Z]\{1,8\}\)|\([a-zA-Z]\{1,8\}\)|/<b>\1<\/b> (<em>Formats:<\/em> <a href='\2\.\3'>\3<\/a> | <a href='\2\.\4'>\4<\/a> | <a href='\2\.\5'>\5<\/a> | <a href='\2\.\6'>\6<\/a>)/gi" | \
   #-- Example of Format Below: * My Title|/my/path/to/file-no-extension|html|txt|pdf|
   sed "/^[ ]*\*.*|.*|.*/ s/^[ ]*\*[ ]*\([^|]*\)|\([^|]*\)|\([a-zA-Z]\{1,8\}\)|\([a-zA-Z]\{1,8\}\)|\([a-zA-Z]\{1,8\}\)|/<b>\1<\/b> (<em>Formats:<\/em> <a href='\2\.\3'>\3<\/a> | <a href='\2\.\4'>\4<\/a> | <a href='\2\.\5'>\5<\/a>)/gi" | \
   #-- Example of Format Below: \[*\] My Title|/my/path/to/file-no-extension|pdf|html|
   sed "/^[ ]*\*.*|.*|.*/ s/^[ ]*\*[ ]*\([^|]*\)|\([^|]*\)|\([a-zA-Z]\{1,8\}\)|\([a-zA-Z]\{1,8\}\)|/<b>\1<\/b> (<em>Formats:<\/em> <a href='\2\.\3'>\3<\/a> | <a href='\2\.\4'>\4<\/a>)/gi" | \
   #-- Example of Format Below: * My Title|/full/path/to/htmlfile|/full/path/to/text/file|/full/path/to/pdffile|
   sed "/^[ ]*\*.*|.*|.*/ s/^[ ]*\*[ ]*\([^|]*\)|\([^|]*\)|\([^|]*\)|\([^|]*\)|/<b>\1<\/b> (<em>Formats:<\/em> <a href='\2'>html<\/a> | <a href='\3'>text<\/a> | <a href='\4'>pdf<\/a>)/gi" | \
   #-- Example of Format Below: * My Title|/full/path/to/htmlfile|/full/path/to/text/file|
   sed "/^[ ]*\*.*|.*|.*/ s/^[ ]*\*[ ]*\([^|]*\)|\([^|]*\)|\([^|]*\)|/<b>\1<\/b> (<em>Formats:<\/em> <a href='\2'>html<\/a> | <a href='\3'>text<\/a>)/gi" | \
   #-- Trick to make 'txt' links into 'text' links for readability
   sed "s/>txt<\/a>/>text<\/a>/gi" | \
   #-- Example of Format Below: * My Title|/full/path/to/any-old-file|
   sed "/^[ ]*\*.*|.*|.*/ s/^[ ]*\*[ ]*\([^|]*\)|\([^|]*\)|/<b>\1<\/b>(<a href=\"\2\">\2<\/a>)/gi" | \
   #-- Example of Format Below: [*] /full/path/to/any-old-file
   sed "s/^[ ]*\[\*\][ ]*\([^ ]\{2,\}\)/<a href=\"\1\">\1<\/a>/gi" | \
   #-- Example of Format Below: * http://domain.org/resource.html
   sed "s/^[ ]*\*[ ]*\(http:\/\/[^ ]\{2,\}\)/<a href=\"\1\">\1<\/a>/gi" | \
  #-- Hyperlink urls with different display text like: "Some link" http://blah.com
  sed "/<pre>/,/<\/pre>/!s/\"\([^\"]\{1,50\}\)\"[ ]\{0,4\}\(http:\/\/[-a-z:_\%0-9\~\\\/\"\'\.\&\@?=]\{3,\}\)/<a href=\2>\1<\/a>/gi" | \
  #-- Hyperlink URLs beginning with http, except between <pre> tags
  #-- The style immediately below is more 'academic'
  #sed "/<pre>/,/<\/pre>/!s/[^\">]\(http:\/\/[-a-z:_\%0-9\~\\\/\"\'\.\@\&?=]\{3,\}\)/<a href=\1>[*]<\/a><tt>\1<\/tt>/gi" | \
  sed "/<pre>/,/<\/pre>/!s/[^\">'=]\(http:\/\/[-a-z:_\%0-9\~\\\/\"\'\.\@]\{3,\}\)/<a href='\1'>\1<\/a>/gi" | \
  #-- Hyperlink URLs beginning with http at the beginning of lines, except between <pre> tags
  #sed "/<pre>/,/<\/pre>/!s/^\(http:\/\/[-a-z:_\%0-9\~\\\/\"\'\.\@]\{3,\}\)/<a href='\1'>[*]<\/a><tt>\1<\/tt>/gi" | \
  sed "/<pre>/,/<\/pre>/!s/^\(http:\/\/[-a-z:_\%0-9\~\\\/\"\'\.\@]\{3,\}\)/<a href='\1'>\1<\/a>/gi" | \
  #-- Hyperlink email addresses with a 'mailto:' link
  sed "/<pre>/,/<\/pre>/!s/\([^ ]\{2,\}@[^ \"']\{2,\}\)/<a href=\"mailto:\1\">\1<\/a>/g" | \
  #-- Hyperlink URLs beginnning with 'www.'
  #sed "/<pre>/,/<\/pre>/!s/[^a-zA-Z\/\">]\(www\.[-a-z:_\%0-9\~\\\/\"\'\.\@]\{2,\}\)/<a href='http:\/\/\1'>[*]<\/a><tt>\1<\/tt>/gi" | \
  sed "/<pre>/,/<\/pre>/!s/[^a-zA-Z\/\">]\(www\.[-a-z:_\%0-9\~\\\/\"\'\.\@]\{2,\}\)/<a href='http:\/\/\1'>\1<\/a>/gi" | \
  #-- Format comments added by web-users
   sed "s/^\([ ]*added[ ]\{0,4\}by:\)\([^,]\{1,\}\)\,[ ]*on[ ]*\(.*\)/<u><em>\1<\/em><tt> \2<\/tt><em> on \3<\/em><\/u>/gi" | \
  #-- Turn spaces into non-breaking-spaces unless they are between 'pre' tags
  sed "/<pre>/,/<\/pre>/!{/\[IMAGE-INDEX-BEGIN\]/,/\[IMAGE-INDEX-END\]/! s/[ ]\{2\}/\&nbsp;\&nbsp;/g;}" | \
  #-- Make paragraphs where there are blank lines
  #sed "/<pre>/,/<\/pre>/!s/^[ ]*$/<p>/g" | \
  #-- Make 'fake' headings
  sed "/<pre>/,/<\/pre>/!s/{{/<strong><em>/g" | \
  sed "/<pre>/,/<\/pre>/!s/}}/<\/em><\/strong>/g" | \
  sed "/<pre>/,/<\/pre>/!s/[ ]*==[ ]*\(.*\)/<font size=+2><strong><em>\1<\/em><\/strong><\/font>/g" | \
  #-- Turn line breaks into <br> tags unles they are between 'pre' tags. This isn't really
  #-- a good idea since you dont know the width of the target screen
  #sed "/<pre>/,/<\/pre>/!{/\[IMAGE-INDEX-BEGIN\]/,/\[IMAGE-INDEX-END\]/! s/^/<br>/g;}" 
  sed "/<pre>/,/<\/pre>/!{s/^/<br>/g;}" | \
  sed -e "s/\[IMAGE-INDEX-BEGIN\]//g" -e "s/\[IMAGE-INDEX-END\]//g"
echo "</td></tr></table>"
  
 echo "<br>"

 #-- Define the cgi program which will handle the updating of the document 
 #-- according to the contents of the HTML textarea component
 if [ "$4" != "" ]
 then
   sProcessorUrl="$4"
 else
   #-- It would be possible to replace the Domain Name below with
   #-- an IP address, which would mean that the script would still
   #-- work even if the DNS configuration failed. I am not sure if this
   #-- is really a good idea or not.
   #sProcessorUrl="http://www.ella-associates.org/cgi-bin/edit-collab"
   sProcessorUrl="http://63.105.73.195/cgi-bin/edit-collab"
 fi
 #-- There is a problem in that I need to find the full path 
 #-- name of the $1 variable, but I dont know how to do this. This
 #-- is necessary because the target processor is not in the same
 #-- directory as the source document (the text file)
 #-- For the time being I have used the remedy of seeing if the path
 #-- is relative or absolute. The slightly dodgy path generating code below
 #-- appears to be working. There is almost certainly a much easier way 
 #-- of doing it

 sRelativePath=$(dirname $1)
 sFirstCharacter=$(echo $sRelativePath | sed "s/^\(.\).*$/\1/g")
 if [ "$sRelativePath" = "." ]
 then
   sFullPathName="$(pwd)/$1"
 elif [ "$sFirstCharacter" = "." ]
 then
   sFullPathName="$(pwd)/$1"
 elif [ "$sFirstCharacter" = "/" ]
 then
   sFullPathName="$1"
 else
   sFullPathName="$(pwd)/$1"
 fi
 # echo $sFullPathName
 
 echo "
    <form action = \"$sProcessorUrl\" 
          method = \"post\">
    <input  name = \"filename\" 
            type = \"hidden\"
           value = \"$sFullPathName\">
    <input  name = \"documenttype\" 
            type = \"hidden\"
           value = \"collab\">
      
    <hr><a name = \"editForm\"></a>
    <center>
    <br>
    <em><img src = \"http://poseidonia.ella-associates.org/images/uncle-sam.gif\">"

 if [ "$sOutputLanguage" = "spanish" ]
 then
   echo " 
      Usted, si USTED, puede cambiar este documento. Edita el texto por el cajillo abajo y haz clic
      en el boton. Gracias por su ayuda."
 elif [ "$sOutputLanguage" = "french" ]
 then
   echo "
      Vous, oui vous, pouvez éditer cette document.  Changez le texte dans la boîte à textes
      ci-dessous et cliquetez le bouton que dit 'gardez les changements'"
 else
   echo " 
      You, yes you, can edit this web-page. Change the text in the text-box below and click
      on the button which says something like 'save the changes'"
 fi

 echo "</em><br><br>"
 
 if [ "$sOutputLanguage" = "spanish" ]
 then
   echo " <strong>El texto del documento</strong><br>"
 elif [ "$sOutputLanguage" = "french" ]
 then
   echo " <strong>Le texte du document</strong><br>"
 elif [ "$sOutputLanguage" = "italian" ]
 then
   echo " <strong>Il testo del documento</strong><br>"
 else
   echo " <strong>The text of the document</strong><br>"
 fi

 echo "
    <textarea name = \"DocumentText\"  cols = \"80\" 
              rows = \"10\">"
 expand $1 | \
  iconv --to-code=ISO-8859-1 --from-code=UTF-8 
  
 echo "</textarea><br><br>"

 if [ "$sOutputLanguage" = "spanish" ]
 then
   echo "<input   type = \"submit\"  value = \"G U A R D A R   L O S   C A M B I O S\">"
 elif [ "$sOutputLanguage" = "french" ]
 then
   echo "<input   type = \"submit\"  value = \"G A R D E Z   L E S   C H A N G E M E N T S\">"
 elif [ "$sOutputLanguage" = "italian" ]
 then
   echo "<input   type = \"submit\"  value = \"M A N T E N G A   I   C A M B I A M E N T I\">"
 else
   echo "<input   type = \"submit\"  value = \"S A V E   Y O U R   C H A N G E S\">"
 fi

 echo "</center></form>"
      
 if [ "$bTranslationLinks" = "true" ]
 then
   echo "<center>"
   if [ "$sOutputLanguage" = "spanish" ]
   then
     echo "Vea este pagina en (aproximado):"
     echo "<a  href=\"javascript:redirectToGoogleTranslation('es', 'en');\">English</a>"
   elif [ "$sOutputLanguage" = "french" ]
   then
     echo "Voir la cette page dedans (approximatif):"
     echo "<a  href=\"javascript:redirectToGoogleTranslation('es', 'en');\">English</a>"
   elif [ "$sOutputLanguage" = "italian" ]
   then
     echo "Osservi questa pagina come (approssimativo):"
     echo "<a  href=\"javascript:redirectToGoogleTranslation('es', 'en');\">English</a>"
   else
     echo "See this page in (approximate):"
     echo "<a  href=\"javascript:redirectToGoogleTranslation('en', 'es');\">Espa&ntilde;ol</a>|"
     echo "<a  href=\"javascript:redirectToGoogleTranslation('en', 'fr');\">Fran&ccedil;ais</a>|"
     echo "<a  href=\"javascript:redirectToGoogleTranslation('en', 'it');\">Italiano</a>|"
     echo "<a  href=\"javascript:redirectToGoogleTranslation('en', 'de');\">Deutsch</a>|"
     echo "<a  href=\"javascript:redirectToGoogleTranslation('en', 'pt');\">Portugu&ecirc;s</a>"
   fi
   echo "</center>"
 fi

 echo "</body>"
 echo "</html>"

 #rm -f $1.temp
 #rm -f plain-text-toc.temp