# Description: # A script to reformat a plain text file document which contains # a set of urls and descriptions of those urls into # some kind of html. The script recognises some special structures # within the plain text document. For example: # # The '=' character, when the first non-whitespace character on a line indicates # that all the following text on the line should be formatted as a # 'heading' or 'page title'. # # The '*' character, indicates that the following white-space delimited text # should be formatted as an Html hyperlink, with the text content of the # hyperlink being the url itself. # # This script also accepts the format: # [Beginning Of Line][spaces]*[spaces]The Document Title|Url-Or-Path/to/Html/File|Url-Or-Path/To/Text/File| # # This script also accepts the format (all on one line): # [Beginning Of Line][spaces]*[spaces]The Document Title|Url-Or-Path/to/Html/File|Url-Or-Path/To/Text/File| # |Url-Or-Path/To/Pdf/File| # # This script also accepts the format: # [Beginning Of Line][spaces]*[spaces]The Document Title|Url-Or-Path/to/Base/FileName|||| # An example of this format would be # * A Interesting Analysis|/alexis-info/docs/the-ramble|||| # This example assumes that there are files # /alexis-info/docs/the-ramble.html # /alexis-info/docs/the-ramble.txt # /alexis-info/docs/the-ramble.pdf # # This format is useful when all the different 'versions' (that is, document formats) # have the same base name and directory location, but have the appropriate file name # extension for their documents type. The script will automatically generate links # to each of these document formats in the order: html, text, pdf # # This script also accepts the format: # [Beginning Of Line][spaces]*[spaces]The Document Title|Url-Or-Path/to/Base/FileName||| # This produces the same results as the format above except that no link to a Adobe 'pdf' # file is created. # # This script also accepts the format (All on one line): # [Beginning Of Line][spaces]*[spaces] # The Document/Link Title|Url-Or-Path/to/File| # # The script also accepts the format: # [Beginning Of Line][spaces]http://blah # # The script will also format blocks of text between the strings -->> and --<< # (where they are the first string on the line) as an HTML
 block
# 
#    This filter script also ignors lines starting in a '#' character. That is
#    those lines will not be rendered into Html.
# 
#    Please see the file /var/www/alexis-info/docs/resources.txt for an
#    example of a file which utilizes some of the formats described above.
#
# Example:
#    ./linkdoc2html.sh aRave.txt > aRave.html
#     
# Parameters:
#   textFileName
#     The name of the text file which is to be transformed from text into html
#   notran
#     If the second parameter is the string 'notran' then the javascript links
#     to the google automatic language translation engine will NOT be inserted
#     into the HTML page. This is useful, for example, when the HTML page is 
#     going to be located within a 'password-protected' directory, because
#     the Google translation engine will not be able to access the page, and
#     therefor the translation links will not work.
#    
# Notes:
#   The idea of this script is to allow the text file to be as free of 'mark-up'
#   as is possible. This can allow the simple maintenance of the text file, although
#   the precision and utility of a system such as XML is not available. 
#   It should be possible to modify this script to produce XML instead of HTML
#
#   This script has been successfully run on the debian linux bash shell.
#   It is possible that it would also run on a Microsoft Windows bash shell,
#   such as the Cygwin Bash shell.
#   
#   There is a GPL perl program called text2html which performs a similar task
#   to this script.
#
#   The HTML produced by this script is NOT friendly to Lynx, the text browser
#   because it uses an HTML table to create a 'left margin' for the document
#   A style sheet should be used instead.
#
#  See Also:
#    txtdoc2html.sh, diary2html.sh, plaintext2html.sh
#    plaintext2pdf.sh
#  Author:
#   m.j.bishop

 if [ "$1" = "" ]
 then
   echo "usage: $0 textFileName [notran]"
   cat $0 | sed -n "/^[ ]*#/p" 
   exit 1;
 fi

 echo ""
 echo ""
 echo " "
 echo " "
 echo " "
 echo "        "
 
 echo ""
 echo ""
 echo ""
 echo ""
 echo ""
 echo ""
 echo ""
 echo ""
 #-- The Google automatic translation links below, are sometimes disabled because they will
 #-- not work from within a password protected directory, since Google does not
 #-- have permission to view that directory.
 if [ "$2" != "notran" ]
 then
   echo "
" echo "See this page in (approximate):" echo "Español|" echo "Français|" echo "Italiano|" echo "Deutsch|" echo "Português" echo "
" fi #---- The file below contains a colorized table of the links #---- cat /var/www/utils/translator-bar.html echo "
" cat $1 | \ expand | \ sed "/^[ ]*#/d" | \ sed -e "s//\>/g" | \ sed -e "s/^[ ]*\-\-\>\>/
/g" -e "s/^[ ]*\-\-\<\</<\/pre>/g" | \
   sed "s/^[ ]*\(http:\/\/[^ ]\{3,\}\)/\1<\/a>/gi" | \
   sed "s/^[ ]*\*[ ]*\([^|]*\)|\([^|]*\)||||/\1<\/b> (Formats:<\/em> html<\/a> | text<\/a> | pdf<\/a>)/gi" | \
   sed "s/^[ ]*\*[ ]*\([^|]*\)|\([^|]*\)|html|txt|pdf|/\1<\/b> (Formats:<\/em> html<\/a> | text<\/a> | pdf<\/a>)/gi" | \
   sed "s/^[ ]*\*[ ]*\([^|]*\)|\([^|]*\)|||/\1<\/b> (Formats:<\/em> html<\/a> | text<\/a>)/gi" | \
   sed "s/^[ ]*\*[ ]*\([^|]*\)|\([^|]*\)|html|txt|/\1<\/b> (Formats:<\/em> html<\/a> | text<\/a>)/gi" | \
   sed "s/^[ ]*\*[ ]*\([^|]*\)|\([^|]*\)|txt|html|/\1<\/b> (Formats:<\/em> text<\/a> | html<\/a>)/gi" | \
   sed "s/^[ ]*\*[ ]*\([^|]*\)|\([^|]*\)|\([^|]*\)|\([^|]*\)|/\1<\/b> (Formats:<\/em> html<\/a> | text<\/a> | pdf<\/a>)/gi" | \
   sed "s/^[ ]*\*[ ]*\([^|]*\)|\([^|]*\)|\([^|]*\)|/\1<\/b> (Formats:<\/em> html<\/a> | text<\/a>)/gi" | \
   sed "s/^[ ]*\*[ ]*\([^|]*\)|\([^|]*\)|/\1<\/b>(\2<\/a>)/gi" | \
   sed "s/^[ ]*=[ ]*\([^=]*\)/

\1<\/h2><\/center>/gi" | \ sed "s/^[ ]*\*[ ]*\([^ ]\{2,\}\)/\1<\/a>/gi" | \ sed "/
/,/<\/pre>/!s/[ ]\{2\}/\ \ /g" | \
   sed "/
/,/<\/pre>/!s/^/
/g" echo "
" echo "

" if [ "$2" != "notran" ] then echo "
" echo "See this page in (approximate):" echo "Español|" echo "Français|" echo "Italiano|" echo "Deutsch|" echo "Português" echo "
" fi echo "" echo ""