#!/usr/bin/perl
##############################################################
# Parser for extended HTML code v1.1
# written in 1998/99 by Cyrille Artho, published under the GPL

if ($^O =~ /mac/i) { $slash = ":" }
else { $slash = "/" }                  # also works under Window~1

$dir = '/usr/local/htmlplain';         # directory where HTML PLAIN is installed

if (-e ".".$slash."config.pl") {
  require ".".$slash."config.pl";
  # prefer local config file
} else {
  require "$ENV{HOME}".$slash.".htmlplain".$slash."config.pl";
}

require "$dir".$slash."getargs.pl";
require "$dir".$slash."parsing.pl";
require "$dir".$slash."output.pl";
require "$dir".$slash."storage.pl";
require "$dir".$slash."template.pl";
require "$dir".$slash."variables.pl";
require "$dir".$slash."ldfiletable.pl";
require "$dir".$slash."buffer.pl";
require "$dir".$slash."macros.pl";
use File::Basename;
use Safe;
#use Cwd;

# fix paths, if needed
$root .= $slash; $outputPath .= $slash;
$root =~ s!$slash{2}$!$slash!;         # only one slash at the end
$outputPath =~ s!$slash{2}$!$slash!;   # only one slash at the end

# variables

$entities = { };                       # create reference to anonymous hash
$tags = { };
$tag_status = { }; # 0 = default = document overrides template; 1 = vice versa
$vars = { };
$files = { };
$HTMLfiles = [ ];                      # new anonymous array

$quotePos = $bracketPos = 0;           # global position marker

&store($tags, "href", '<a href="LINK">"NAME"</a>'); # default (required)
#&store($tags, "img", 'img src="LINK" alt=\""NAME"\" width="WIDTH" height="HEIGHT"'); # default (required)

&parseVariables($variableFile, $vars);
&parseTemplate($templateFile, $entities, $tags);
&parseFileTable($fileTable, $files, $HTMLfiles);

my @macros = split /\s*,\s*/, $macros;
#my @lines;                             # input buffer

# prepare safe Perl interpreter
$cpt = new Safe;
$cpt -> share ('&GetFileTable');
$cpt -> share ('&GetHTMLFiles');
$cpt -> share ('&GetHTMLFilenames');
$cpt -> share ('&GetFileInfo');
$cpt -> share ('&GetVar');
$cpt -> share ('&PutVar');
$cpt -> share ('&EndingTags');
$cpt -> share ('&adjustPath');
foreach my $macro (@macros) { $cpt -> share("&$macro"); }
my $conftime = getmtime("$ENV{HOME}/.htmlplain/config.pl");
my $ftabtime = getmtime($fileTable);
my $varstime = getmtime($variableFile);
my $tplttime = getmtime($templateFile);
my @times = reverse sort($conftime, $ftabtime, $varstime, $tplttime);
my $latest = shift @times;
# latest modification of configuration, filetable or whatever
my @ifState = (-1);
my @ifReturn = (-1); # arrays for if evaluation (initialized)
my $filter = 0;
chdir $root;                           # change to dir of input documents

# process command line input here - and check for input and -o output file name
foreach my $this (@$HTMLfiles) {
  if (@input) {                        # input params given
    my @fileInfo = &loadInfo($files, $this);
    $fileName = $fileInfo[0];
    my $found = 0;
    foreach $inputfile (@input) {
      if ($this =~ /$inputfile/i or $fileName =~ /$inputfile/i) {
	$found = 1;
	last;
      }
    }
    next unless $found;
  }
  $line = 0;                           # global line counter
  &clearBuffer;
  &processFile($this);
}

# end of main

sub processFile {                      # main loop for each file
  my $this = shift @_;
  my @fileInfo = &loadInfo($files, $this);
  $fileName = shift @fileInfo;

  my $mymtime = getmtime($fileName);
  my $outputFile = &uploadName($fileName);
  my $outmtime = getmtime($outputFile);
  my $this_size = shift @fileInfo;
  my $level = shift @fileInfo;
  my $dataFile = shift @fileInfo;      # variable file (data doc. in foreach)
  my $dataTemplate = shift @fileInfo;  # <!-- foreach --> document
  my $dataCount = shift @fileInfo;     # "*" in foreach

  if ($latest < $outmtime and $mymtime < $outmtime and (!$redoAll)) {
    if ((defined($dataFile) and getmtime($dataFile) < $outmtime and
	 getmtime($dataTemplate) < $outmtime) or (!defined($dataFile))) {
      print "Skipping $fileName...\n" if $verbose;
      return
    }
  }
  print "Processing $fileName...\n" if $verbose;
  
  &store($vars, "SELF", $fileName);
  &store($vars, "THIS", $this);
  &store($vars, "THIS.SIZE", $this_size);
  &store($vars, "LEVEL", $level);
  &store($vars, "BACK", '');
  
  
  if ($fastHeader) { 
    HTMLinsert(&getBufSize, @header);
  }
  if (handleFile($fileName, 0, $dataFile, $dataTemplate)) {
    unless ($fastHeader) { &mergeHeader; }
    if ($fastFooter) {
      HTMLinsert(&getBufSize, @footer);
    } else {
      &mergeFooter;
    }
  }
  finishFile($outputFile);
}

sub handleFile {
  my $fileName = shift;
  my $include = shift;
  my $dataFile = shift;
  my $dataTemplate = shift;

  unless ($include) {
    if (defined($dataFile)) {          # load <!-- foreach --> doc for data
      &parseVariables($dataFile, $vars); # use variable defs from data file
      # old variables are not saved and restored, therefore watch namespace!
      $fileName = $dataTemplate;       # use html file for generating document
    }
  }

  unless (open(FILE, $fileName)) {
    if ($include and $dataFile =~ /^<!--\s*\#include/io ) { 
      # assume file is generated dynamically and present on server
      printToBuffer($dataFile."\n");
      return;
    } else {
      &error ("Could not open file $fileName: $!\n");
    }
  }
  
  local @lines = <FILE>;               # read file
  close FILE;
  if ($lines[0] =~ /<!--\s*foreach\s+(.*?)\s+([^ ]+)\s*-->/io) { # foreach
    store($vars, $2, $dataCount);
    shift @lines;
  }
  if ($lines[0] =~ /<!--\s*skip\s*-->/io) {
    shift @lines;
    foreach (@lines) {
      &filterPat if $include;
      &printToBuffer($_);
    }
    return 0;
  }# else {
  while ($_ = &getLine) {              # read next line from buffer
    next if /^$/;                      # ignore empty lines
    &directives;
    next unless ($ifState[0] == $ifReturn[0]);
    &quoteLine;
    &filterPat if $include;            # remove $startPat .* $endPat
    while (&checkToken) {              # either only " or " first
      if ($quotePos < $bracketPos) {   # string in quotes
	&variable(1);
      }
      else { 
	if ($bracketPos != -1) {       # HTML tag, parse
	  &HTMLTag;
	}
      }
    }
    plainText($_);                     # rest of line in plain text
  }
  printToBuffer("");                   # work around bug
  return 1;
}

sub finishFile {
  my $outputFile = shift;
  &unquoteBuffer;
  unless ($ifReturn[0] == -1 and $ifState[0]==-1) {
    # take value from front away, check for consistency (previous if)
    &error("'endif' missing.");
  }
  &printBufferToFile($outputFile);
}

sub directives {
  if (s/<!--\s*if\s*//io) {            # if statement
    s/-?->$//;                         # remove trailing --
    &if_else(1);
    $_ = '';
    next;
  } elsif (/<!--\s*else\s*-->/io) {    # else statement
    if_else(0);
    next;
  } elsif (/<!--\s*endif\s*-->/io) {   # endif statement
    if_else(-1);
    next;
  } elsif (/<!--\s*\#?include (file|virtual)?=?\"?(.*?)\"?\s*-->/io) {
    # include statement
    # check whether file name is title - if so, lookup real file name
    if ((my $fullName) = &loadInfo($files, $2)) {
      handleFile($fullName, 1, $&)
    } else {
      handleFile($2, 1, $&);
    }
    $_ = "";
    next;
  }
}

sub filterPat {                        # filter out $startPat .* $endPat
  my $pre;
  if (s/$startPat//io) {
    $filter = 1;                       # filter everything until $endPat found
    $pre = $`;                         # prematch
    $_ = $';                           # postmatch
  }
  if ($filter) {                       # check for $endPat
    if (s/$endPat//io) {               # found, get string after $endPat
      $filter = 0;
      $_ = $pre . $';                  # stuff before $startPat + postmatch
    } else {                           # not found, just return $pre
      $_ = $pre;
    }
  }
}

sub mergeHeader {
  my $line;
  my $lineno = 0;
  my $prevline = 0;
  my $start = 0;
  my $end = 0;
  my $tag = "";
  my $glue = $headerLines{$glueHeader};
  unless (defined($glue)) { $glue = @header+1; }
  while (defined ($line = &getBufEntry($lineno))) {
    $lineno++;
    $line =~ s/^\s+//;                 # remove leading whitespace
    $tag = $line;
    $tag =~ s/(\s.*)//;                # remove options, if any
    $tag =~ s/>$//;
    next unless ($tag =~ /$header/io);
    $tag = lc($1);
    $tag =~ s/^<//;
    $line =~ s/>$//;
    $line =~ s/\s+$//;
    $end = $headerLines{$tag};
    if ($end == $glue) {      
      $lineno = $prevline;
      while (($line = &getBufEntry($lineno)) !~ /$tag/i) {
	$lineno++;
      }
      $line =~ s/>$//;
      $line =~ s/\s+$//;
      # tag found, override options
      $line = &overrideOptions($header[$end], $line);
#      $line = "$line";
      $prevline = &HTMLreplaceline($lineno, $line, 1);
      &HTMLinsert($prevline-1, @header[$start..$end-1]);
      $start = $end+1;
      # go beyond tag
    }
    last if ($end >= $glue);           # "glue tag" found or occured earlier
    $line = &overrideOptions($header[$end], $line);
    if ($start < $end) {
      &HTMLinsert($prevline, @header[$start..$end-1]);
      # insert slice of header array into document
    }
    if ($line =~ />.*</) { # replace complex entries like <title>...</title>
      if ($line =~ /\"$tag\"/i) {
	# ignore header unless it has a "magic tag" like replacement, such as
	# <tag>..."tag"...</tag>
	# treat as normal, but delete everything up to </tag
	&removeTags(--$lineno, $tag);
	#$lineno--;
      }
    } # normal simple tag (no closing tag and text in between)
#    $line = "$line";
    &HTMLreplaceline($lineno+$end-$start-1, $line);
    # replace line where match was found with new entry
    $lineno+= ($end-$start+1);
    # index has been moved ahead by $end-$start+1 due to insertion of lines
    # prevent replaced entries from being checked again
    $start = ++$end;
    $prevline = $lineno;
    last if $end == @header; # if all header lines found, exit
  }
  if ($end < @header-1) {
    &HTMLinsert($prevline, @header[$start..scalar @header-1]);
  }
}

sub mergeFooter {
  my $line;
  my $lineno = &getBufSize-1;
  my $prevline = $lineno;
  my $start = @footer-1; # start is LOWER end of slice here
  my $end = $start;
  my $tag = "";
  my $glue = $footerLines{$glueFooter};
  unless (defined($glue)) { $glue = -1; }
  while (defined ($line = &getBufEntry($lineno))) {
    $lineno--;
    $line =~ s/^\s+//;                 # remove leading whitespace
    $tag = $line;
    $tag =~ s/(\s.*)//;                # remove options, if any
    $tag =~ s/>$//;
    next unless ($tag =~ /$footer/io);
    $tag = lc($1);
    $tag =~ s/^<//;
#    $tag =~ s/>$//;
    $line =~ s/>$//;
    $line =~ s/\s+$//;
    $end = $footerLines{$tag};
    if ($end == $glue) {
      # same tag; go back beyond tag and override found tag
      $lineno = $prevline;
      while (($line = &getBufEntry($lineno)) !~ /$tag/i) {
	$lineno--;
      }
      $line =~ s/>$//;
      $line =~ s/\s+$//;
      # tag found, override options
      $line = &overrideOptions($footer[$end], $line);
#      $line = "$line";
      $prevline = &HTMLreplaceline($lineno, $line); # used to be ..., -1
      &HTMLinsert($prevline, @footer[$end+1..$start]);
      # go beyond tag
      $start = $end-1;
    }
    last if ($end <= $glue);           # "glue tag" found or occured earlier
    $line = &overrideOptions($footer[$end], $line);
    if ($start > $end) {
#      &HTMLinsert($prevline, $glueFooter, @footer[$end..$start-1]);
      &HTMLinsert($prevline, $glueFooter, @footer[$end+1..$start]);
      # insert slice of header array into document
    }
    if ($line =~ />.*</) { # replace complex entries like <title>...</title>
      if ($line =~ /\"$tag\"/i) { 
	# ignore header unless it has a "magic tag" like replacement, such as
	# <tag>..."tag"...</tag>
	# treat as normal, but delete everything up to </tag
#	&removeTags(++$lineno, $tag); #???
	&removeTags($lineno, $tag);
      }
    } # normal simple tag (no closing tag and text in between)
#    $line = "$line";
    &HTMLreplaceline($lineno+$start-$end+1, $line);
    # replace line where match was found with new entry
    $lineno+= ($start-$end-1);
    # index has been moved ahead by $end-$start-1 due to insertion of lines
    # prevent replaced entries from being checked again
    $start = --$end;
    $prevline = $lineno;
    last if $end < 0; # if all header lines found, exit
  }
  if ($end >= 0) {
    &HTMLinsert($prevline, @footer[0..$start]);
  }
}

sub HTMLreplaceline { # replaces an entry in the buffer but checks first for macros
  my $lineno = shift @_;
  my $line = shift @_;
  my $mode = shift @_;                 # 0 = return previous line; 1 = next line
  $line = &replaceVars($line);
  if ( $line =~ /^<%/ ) {             # macro action
    chop $line;
    my $tmp = getMacro($line);
#    &unquoteLine($tmp);
    $line = &execMacro($tmp);
  }
  &replaceline($lineno, $line);
  if ($mode) {
    return $lineno+1;
  } else {
    return $lineno-1;
  }
}

sub HTMLinsert {
  # insert a number of tags, which do not have the angular brackets around them,
  # into the buffer
  # arg1 = lineno, args = lines to insert
  my $lineno = shift @_;
  foreach my $line (@_) {
    my $out = $line;
    $out = &replaceVars($out);
    if ( $out =~ /^<%/ ) {           # macro action
      chop $out;
      my $tmp = getMacro($out);
#      &unquoteLine($tmp);
      $out = &execMacro($tmp);
    } else {
      $out = "$out\n";
    }
    &insertline($lineno, $out);
    $lineno++;
  }
}

sub getLine {                          # using global @lines
  $line++;
  my $tmp = shift @lines;
  $tmp =~s/$searchPat/$replacePat/o if $searchPat;
  return $tmp;
}

sub ungetLine {                        # using global @lines
  $line--;
  unshift @lines, $_;
}

# sub overwriteBuffer {
#  my @header = @{shift @_};
#  print @header;
#}

sub checkToken {                       # checks position of " and <
  $quotePos = index($_, "\"") % $MAX_LENGTH; # -1 becomes $MAX_LENGTH-1
  $bracketPos = index($_, "<") % $MAX_LENGTH;
  return ($bracketPos != $quotePos)    # returns true unless no " or <
}

sub variable { # handle text after a double quote as variable *or* plain text
  if (/\"(.*?)\"/) { # syntax check for "..." and extraction of var. name

    plainText($`);                     # text before quotes as plain text

    &printToBuffer(&replaceVar($1, 1));
    
    $_=$';                             # postmatch = plain text or rest of tag
  }
  else {
    &warning ("Closing quote not found.");
    /\"/;                              # search for quote
    plainText($`.'"');
    $_=$';                             # postmatch = string after quote
    return;
  }
}

sub HTMLTag {
  my ($nextLine, $closeBracket, $len, $after);
  my $tmp = substr $_, $bracketPos+1, 3;
  my $close = ">";
  my $warned = 0;
  if ($tmp eq "!--") { # HTML comment
    $close = "-->"; # closing sequence for HTML comment
    $warned = 1;
  }

  if ($tmp =~ /^%/) {
    s/^\s+//;
    $tmp = getMacro($_);
    printToBuffer (&execMacro($tmp));
    return;
  }
  while (($closeBracket = index($_, $close)) == -1) {
    if ($nextLine = &getLine) { # read next line
      &warning ("Line break within HTML tag.") unless $warned;
      $warned = 1;
    }
    else {
      &error ("Closing bracket not found, premature EOF reached."); 
      # better check next lines for < before >
    }
    last unless $nextLine;
    $_.= $nextLine; # append next line
  }

  &plainText(substr $_, 0, $bracketPos); # plain text before tag

  $len += $closeBracket - $bracketPos;
  if ($close eq "-->") {               # HTML comment
    $comment = substr $_, ++$bracketPos, ++$len;
    &printToBuffer("<$comment>");
    $_ = substr $_, $closeBracket+3;
    return;                            # treat rest in main loop
  }

  $after = substr $_, ++$closeBracket; # save text after tag

  $_ = substr $_, ++$bracketPos, --$len; # text within tag
  $_ = &replaceTag($_);

  if ( s/(^$variableNames$)//io ) {    # opening "magic tag"
    &magicTag($1, $after);
    return;                            # rest must again be parsed in main loop
  }  
  if (/^%/) {                          # macro from substitution
    $tmp = getMacro($_);
    printToBuffer (&execMacro($tmp));
    return;
  }
  if ( s/(^$killTags$)//io ) {         # opening "kill tag"
    &killTag($1, $after);
    return;                            # rest must again be parsed in main loop
  }
  $tmp = &replaceVars($_);             # replace options in quotes
  $tmp =~ s/^<//;                      # should be optimized away...
  $tmp =~ s/>$//;
  &printToBuffer("<$tmp>");
  $_ = $after;
#  &printToBuffer($close);
}

sub execMacro { # exec macro (text in arg1)
  my $macro = &replaceVars(shift @_);
  $out = $cpt -> reval($macro);          # exec macro safely
  if ($@) {
    &error ("Macro '$macro` caused ", $@, "[$out]");
  }
  return $out;
}

sub plainText { # plain text, may still contain HTML entities
  my $text;
  $text= shift @_; # take first argument
  $text = &URLify($text);              # make links from URLs
  while ($text =~ s/\&(.*?);//) {      # HTML entity found
    &printToBuffer($`);                # prematch is plain text
    &printToBuffer(&replaceEnt($1, $entities));
    $text = $'                         # check rest
  }
  &printToBuffer("$text");             # rest is now really plain text
}

sub magicTag { # handles "magic tags"; first arg: tag name, second: rest of line after tag
  my $variableName = lc(shift @_);
  $_ = shift @_;
  my $variableContent = "";
  while (!( /<\/$variableName>/i )) { # no ending tag
    $variableContent .= $_;
    unless ($_ = &getLine ) { # read next line
      &error ("Closing special tag </$variableName> not found, premature EOF reached."); 
    }
  }
  s/(.*)<\/$variableName>//i;
  $variableContent .= $1;
  $variableContent =~ s/^\s+//;
  $variableContent =~ s/\s+$//;
  $variableContent = replaceVars($variableContent);
  &store($vars, $variableName, $variableContent);
  printToBuffer("<$variableName>"); # use tag and its content
  s/^\s+//;
  $_ = $variableContent."</".$variableName.">\n";
}

sub killTag { # kill text between opening and closing tag; first arg: tag name, second: rest of line after tag
  my $variableName = shift @_;
  $_ = shift @_;
  while (!( /<\/$variableName>/i )) { # no ending tag
    unless ($_ = &getLine ) { # read next line
#      &bufferUngetch;
      # just erase tag as closing tag not found (does not always work)
      return;
    }
  }
  s/(.*)<\/$variableName>//i;
  s/^\s+//;
#  &bufferUngetch;
}

sub getmtime { # gets mtime of file
  my ($dev, $ino, $mode, $nlink, $uid, $gid, $rdev, $size, $atime, $mtime) = stat (shift @_);
  return $mtime;
}

sub if_else { # evaluate if, else, endif
  my $command = shift;                 # 1 = if, 0 = else, -1 = endif
  if ($command == 1) {
    if ($ifState[0] == $ifReturn[0]) {
      # we are inside a block which is not within another if/else block which
      # had been evaluated to false
      my $return = execMacro($_);
      $return ? unshift @ifReturn, 1 : unshift @ifReturn, 0;
      # add return value to front of list, convert any non-zero values to 1
      unshift @ifState, 1;             # add '1' to front of list
    } else {
      unshift @ifReturn, 2;            # add artificial '2' return value to list
      unshift @ifState, 1;             # add '1' to front of list
    }
    } elsif ($command == 0) {
    if ($ifState[0] != -1) {
      $ifState[0] = 0;
    } else {
      &error("'else' without previous 'if'");
    }
  } elsif ($command == -1) {
    if (shift @ifReturn != -1) {
      # take value from front away, check for consistency (previous if)
      shift @ifState;
    } else {
      &error("'endif' without previous 'if'");
    }
  } else {
    &error("Illegal argument to if_else, send bug report to cartho\@netlink.ch.");
  }
}
