#########################################################################
# Generic functions for parsing text (line based)
# written in 1998/99 by Cyrille Artho, published under the GPL

sub variableName {                     # strips out everything but a-zA-Z_0-9
  my $name = $_[0];                    # make a my copy of the first argument
  $name =~ tr /A-Za-z0-9_-//cd;
  return $name;
}

sub replaceTags {                      # replace any strings within < >
  # used to replace recursive definitions
  # arg1 = text

  my $text;# = shift @_;                 # first argument
  my $out = "";
  my $before = "";                     # text before <

  my @list = splitTags(shift);

  while ($text = shift @list) {
    $out .= &replaceTag($text, $tags);
  }
  $out .= $text;                       # add text after >
  return $out;
}

sub breakUpTags {                      # break up tags and store them as array
  # arg1 = text, arg2 = reference to array, arg3 = reference to hash
  # return value =  pattern

  my $text = shift @_;                 # first argument
  my $tags = shift @_;                 # reference to array
  my $line_refs = shift @_;            # reference to hash
  my ($line, $textline);
  my $out = "^<(";
  my $macro = 0;

  my @lines = splitTags($text);

  while (my $tag = shift @lines) {
    my $tagname = lc($tag);
    if ($tagname =~ s/^<//) {
      $tagname =~ s/[ >(].*//; # take everything up to end of tag or first arg
      if ($variableNames and $tag =~ /^<($variableNames)/io) {
	while ($tag !~ /\/$tagname/i) {
	  $tag .= shift @lines;
	}
      }
      $$line_refs{$tagname} = $line;
      $out .= "$tagname|";
      $textline = "";
    }
    $textline = $tag;
    $line++;
    push (@$tags, $textline);
  }
  chop $out;
  $out .= ")";
  return $out;
}

sub replaceVars {                      # replace any strings within two quotes
  # used to replace recursive definitions
  # arg1 = text

  my $text = shift @_;                 # first argument
  my $out = "";

  while ($text =~ s/(.*?)\"(.*?)\"//s) { # replace all strings in quotes
    $out .= $1. &replaceVar($2);
  }
  $out .= $text;                       # add text after "
  return $out;
}

sub overrideOptions {
  # like replace options, but arg1 = tag with options
  # arg2 = new tag with options (not only options), return value = new tag
  my $line = shift @_;
  my $options = shift @_;
  ($options, $options) = split /\s/, $options, 2;
  # take everything after first space
  return &replaceOptions($line, " $options");
}

sub replaceOptions {                   # replaces options within HTML tags
  # arg1 = tag with options; arg2 = new options, arg3 = mode
  ###############
  # options with the same name as given ones should be overwritten rather than appended #
  # //two modes: 0) overwrite old option (used when template is recursively parsed); 1) preserve old option (used when HTML doc is processed)
  ###############
  my $old = shift @_;  # first argument
  my @oldtags = splitTags("<$old>");
  my $new = shift @_;                  # second argument
  if (shift) {                         # mode 1
    my $tag = $old;
    $tag =~ s/(\s.*)//;                # take everything up to first space
    $old = $tag.$new;
    $new = $1;
    if ($new =~ /href/i) {             # special case href: set variable LINK
      my $href = $old;
      $href =~ s/.*href\s*=//i;
      unless ($href =~ s/^\s*?(\"|\001)(.*?)\1.*/$1$2$1/) {
	# remove everything but first argument
	$href =~ s/^.*?\s/ /; # remove everything after first space
      }
      $new =~ s/\"LINK\"/$href/g; # replace LINK with link from other tag
    }
  }
  my ($tail, $text, $out);
  return $old unless ($new =~ /\S/);
  # return unchanged value if no overriding options
  foreach $old (@oldtags) {    
    $old =~ s/>$//;
    if ($old =~ s/^<//) {              # HTML tag
      unless ($old =~ s/\s+/$new /) {
	# no arguments in $old
	unless ((substr $new, 0, 1) eq " ") { $old .= " "; }
	$old .= $new;
	$out.= "<$old>";
	next;
      } # add new arguments at front
      while ($old =~ s/\s+(\w+)\s*=\s*(.*?)\s(\1)\s*=\s*(.*)/ $1=$2/i) {
	# check for duplicate options and remove them
	#print STDERR "$1==$2:$3==$4\n$old\n" if $debug;
	$tail = $4;
	# restore rest of string (everything after argument name)
	unless ($tail =~ s/^\s*?(\"|\001).*?\1\s?/ /) {
	  # remove first argument in quotes (or escaped quotes)
	  $tail = '' unless ($tail =~ s/^.*?\s/ /);
	  # keep everything after space
	}
	$old .= $tail;
      }
      $old =~ s/\s+$//;
      $out .= "<$old>";
    } else {                           # plain text (not to be expected)
      $out .= $old;
    }
  }
  $out =~ s/^<+/</;
  return $out;
}

sub closingTags {
  my $tmp = shift;
  my @tags = splitTags("<$tmp>");
  my $out;
  foreach my $tag (reverse @tags) {
    if ($tag =~ s/^</<\//) {
      $tag =~ s/ .*/>/; # remove all tag options (all text after first space)
    }
    $out .= $tag;
  }
  $out =~ s/>$//;
  return $out;
}

sub splitTags { # splits up a text into tags and plain text entries
  my $text = shift;
  my @out;
  while ($text) {
    $text =~ s/^\s+//;
    if ($text =~ /^<%/) { # macro
      push @out, ('<%'.getMacro($text).'>');
    } elsif ($text =~ s/^(<.*?>)//) { # HTML tag
      push @out, $1;
    } else {                           # plain text
      $text =~ s/([^<]*)//;
      push @out, $1;
    }
  }
  return @out;
}


sub getMacro {                         # return full macro arg.
  $old = $_[0];
  $_[0] =~ s/^<?%(\w+)//;
  my $tmp = $1;
  # now check for (
  if ($_[0] =~ s/^\s*\(//) {
    $tmp .= '(';
    while (!($_[0] =~ s/^\s*\)//)) { # check for stuff between ()
      if ($_[0] =~ s/^([^\'\"\)]+)//) {
	$tmp .= $1;
      } elsif ($_[0] =~ s/^([\"\'])\1//) { # "" or ''
	$tmp .= $1.$1;
      } elsif ($_[0] =~ s/^([\"\'])(.*?[^\\])\1//) {
	$tmp .= $1.$2.$1;
      } else {
	error("Syntax error near $_[0].");
      }
    }
    $tmp .= ')';
  }
  $_[0] =~ s/^>//;
  return $tmp;
} 

sub replaceTag {                       # replaces tag if in $tags storage
  # arg1 = text
  my $text = shift @_;
  my $out;                             # temp. storage for output
  my $redef;
  my ($tag, $options);

  $debug=1 if $text =~ /a /;
  my $index = index($text, " ");
  if ($index > 0) {
    $tag = substr($text, 0, $index);
    $options = substr($text, $index);
  }
  else
    {$tag = $text;}
  if (lc($tag) eq "img") {
    &store($vars, "WIDTH", undef);
    &store($vars, "HEIGHT", undef); # reset image size, in case src is not found
  }

  my $closing = $tag =~ s/^\///;       # remove / and note if closing tag
  if ($closing) {
    if ($redef = &load($tags, "/".lc($tag))) { # closing tag from template found
      return $redef;
    }
    else { $out = "/"; }
  }
  if ($redef = &load($tags, lc($tag))) { # tag from template found, redefine it
    if ($closing) {
      $out = "";
      $redef = &closingTags($redef);
    }
    elsif ($options) {                 # check for options
      $redef = &replaceOptions($redef, $options, load($tag_status, lc($tag))); # replace old options
    }
    $out .= $redef ;
  }
  else {
    $out .= $tag.$options ;
  }
  $debug = 0;
  return $out;
}

sub replaceEnt {                       # replaces entity if in $entities storage
  # arg1 = text
  my $ent = shift @_;               # first argument
  if ($redef = &load($entities, lc($ent))) { # found, redefine it
    return $redef;
   }
  else {
    return ("&".$ent.";");
  }
}

sub replaceVar {                       # replaces var if in $vars storage
  # arg1 = text
  # arg2 = mode (within HTML tag or plain text), important for links/images
  my $var = shift @_;                  # first argument
  my $mode = shift @_;
  my $suffix;

  $debug=0;

  if ($var =~ s/(\w+)(\#.*)/$1/) { # link like "x.html#suffix"
    $suffix = $2;
  }
  if ((my $fullName, my @info) = &loadInfo($files, $var)) {
    &store($vars, "SIZE", shift @info);
    # overwrite variable SIZE
    &store($vars, "NAME", $var);
    my $name = &adjustPath($fileName, $fullName);
    $name .= $suffix;
    if ($quoteVars) {
      &store($vars, "LINK", "\"".$name."\"");
    } else {
      &store($vars, "LINK", $name);
    }
    my $field = shift @info;            # field is either "level" or image size
    my ($width, $height) = split /x/, $field;
    if ($height) {
      if ($quoteVars) {
	&store($vars, "WIDTH", "\"".$width."\"");
	&store($vars, "HEIGHT", "\"".$height."\"");
      } else {
	&store($vars, "WIDTH", $width);
	&store($vars, "HEIGHT", $height);
      }
      if ($mode) {
	my $tag = &load($tags, "img");
	return ("<".&replaceVars($tag).">");
      }
    }
    elsif ($mode) {
      my $tag = &load($tags, "href");
      return ("<".&replaceVars($tag).">");
    }
    if ($quoteVars) {
      return "\"".$name."\"";
    } else {
      return $name;
    }
  }
  if (defined ($content = &load($vars, &variableName($var)))) { 
    # variable redefinition from dictionary found, insert value
    if ($var and &variableName($var) eq "") {
      # only non-alphanumeric chars in var. name -> not allowed -> return "$var"
      return ("\"" . $var. $suffix . "\"");     # return unchanged string
    } else {
      if (($mode) and
	  ($content =~ /\b($urls:[$any]+?)(?=[$punc]*[^$any]|$)/io)) {
	# link properly to URL
	if ($quoteVars) {
	  &store($vars, "LINK", "\"".$content.$suffix."\"");
	} else {
	  &store($vars, "LINK", $content.$suffix);
	}
	&store($vars, "NAME", $var);
	my $tag = &load($tags, "href");
	return ("<".&replaceVars($tag).">");
      } else {
	return $content;
      }
    }
   }
  elsif (defined ($content = $ENV{&variableName($var)})) {
    # try environment variable as last resort
    return $content;
  }
  else {
    return ("\"" . $var . $suffix. "\"");       # return unchanged string
  }
}

sub uploadName {
  my $name = shift @_;
  $name =~ s/^$root//io;
  return ($outputPath.$name);
}

sub adjustPath { # makes absolute path relative
  # arg1 = path of document with link, arg2 = path to be linked to
  # arg2 is an absolute path that has to be adjusted
  my $path1 = shift @_;
  my $path2 = shift @_;
  $path1 =~ s/[$slash]/\//g;             # get real slashes again
  $path2 =~ s/[$slash]/\//g;
  "$path1\t$path2" =~ /^(.*\/)(.*?)\t\1/; # find common beginning, ending with /
  my $noncommon = $2;
  $path2 =~ s/$1//;
  my $depth = $noncommon =~ tr/\///;# slashes (= directories) in non-common part
  # for every non-common directory, prepend "../" to path
  my $goback = "../" x $depth;
  return ($goback . $path2);
}

sub URLify { # taken from the Perl Cookbook, slightly modified
  my $url = shift @_;
  # this version includes end of sentence punctuation in second part
  $url =~ s/\b($urls:[$any]+?)(?=[$punc]*[^$any]|$)/<a href="$1">$1<\/a>/igo;
  # now check for e-mail addresses and make "mailto:" links
  $url =~ s/\b([\w\.]+?\@[\w\.]+?)(?=[$punc]*[^$any]|$)\b/<a href="mailto:$1">$1<\/a>/igo;
  $url =~ s/<\/a>([$punc]+)/$1<\/a>/; # move punctuation between <a> and </a>
  return $url;
  # see Perl Cookbook, chapter 6.21, for full comments
}

sub removeTag_old {
  # removes all entries in buffer from linenumber on until tag found
  # arg1 = linenumber, arg2 = tag
  my $lineno = shift @_;
  my $tag = shift @_;
  while (&getBufEntry($lineno) ne $tag) { # closing tag not yet found
    &removeline($lineno); # remove!
  }
  &removeline($lineno); # remove closing tag as well
#  &removeline($lineno); # and >
}

sub removeTags {
  # removes all entries in buffer from linenumber on until tag found
  # arg1 = linenumber, arg2 = tag
  my $lineno = shift @_;
  my $tag = shift @_;
#  if (&getBufEntry($lineno) =~ /^</) {
    # preserve <
#    &replaceline($lineno, "<");
#    $lineno++;
#  }
  if (&getBufEntry($lineno) =~ /$tag/i) {
    &removeline($lineno); # remove!
  } else {
    $lineno++;
  }
  $tag = "</$tag>";
  while (lc(&getBufEntry($lineno)) ne $tag) { # closing tag not yet found
    &removeline($lineno); # remove!
  }
  &removeline($lineno); # remove closing tag as well
#  &removeline($lineno); # and >
}

sub EndingTags { # returns closing tags for a bunch of opening tags
  # (in reversed order)
  my @tags = split />\s*</, shift @_;  # get all tags
  my $out;
  foreach $tag (reverse @tags) {
    $tag =~ s/^<//;
    $tag =~ s/>$//;                  # trim brackets for first/last tag
    $tag =~ s/\s.*//;                # remove any options
    if ($tag) {
      $out .= "</$tag>";
    }
  }
  return $out;
}
1;
