package DTD; use strict; use FileHandle; my @tokens; my %macro; # to store global DTD-defined ENTITIES #===================================# # CLASS METHODS # #===================================# sub new { my $proto = shift; my $class = ref($proto) || $proto; ### Create basic object: my $self = {}; bless $self, $class; ### Build, if needed: return (@_ ? $self->build(@_) : $self); } #===================================# # INSTANCE METHODS # #===================================# sub build { my ($self,$DTDfilename,$rootobj) = @_; my $inputref; $inputref = new FileHandle $DTDfilename, "r"; unless($inputref) { die "Can't open DTD file $inputref\n"; } # Read in the entire file and split it into tokens. my $rec_sep = $/; $/ = undef; my $dtdtext = <$inputref>; my @words = split(/(|\/\/.*?\n||'|"|\(|\)|\*|\?|\+|,|\s+|\|)/s,$dtdtext); @tokens = grep {length($_)} @words; map { $_ =~ s#^//.*\n#\n#} @tokens; # replace all // comments with \n my $token = ''; # DTD input loop while(defined($token)) { $token = nonspaceToken(); if(!defined($token)) { last; } # a comment if ($token eq ''; if ($token =~ /--/) { print "Illegal characters '--' within comment.\n"; } } while ($token = nextToken()); } } elsif($token eq 'elementTag(); next; } if ($token eq 'ATTLIST') { &putbackToken($token); &putbackToken($prefix); $self->attributeTag(); next; } if ($token eq 'ENTITY') { &putbackToken($token); &putbackToken($prefix); $self->entityTag(); next; } if ($token eq 'NOTATION') { &putbackToken($token); &putbackToken($prefix); $self->notationTag(); next; } if($token eq '[') { print "'$token' tags not yet handled."; next; } #print "reached end of main loop $token\n"; #print "$token\n"; } else { print "Unknown start to tag: $token\n"; next; } } $/ = $rec_sep; $self->{dbmapIndex} = {}; $self->indexDatabaseMap($rootobj,''); return $self; } #======================================================# sub sessionrootTag { my $self = shift; my $token; # quick error check; should not happen $token = &nextToken(); if($token ne '') { $self->{$elementname}->{'sessionRoot'} = $sesspath; return 1; } else { print "Unknown close to SESSIONROOT tag: '$token'\n"; return 0; } } #======================================================# sub sessionmapTag { my $self = shift; my $token; # quick error check; should not happen $token = &nextToken(); if($token ne '')) { my $index = $token; if(!($index =~ /^[0-9]*$/)) { print "Illegal index '$index' in SESSIONMAP tag."; return 0; } my $sessvar; if(illegalAttValue(\$sessvar)) { print "Illegal session variable '$sessvar'\n"; return 0; } push(@mapping,$sessvar); $token = nonspaceToken(); } if($token eq '-->') { $self->{$elementname}->{'sessionMap'} = \@mapping; return 1; } else { print "Unknown close to SESSIONMAP tag: '$token'\n"; return 0; } } #======================================================# sub entityTag { my $self = shift; my $token; # quick error check; should not happen $token = &nextToken(); if($token ne '') { print "Unknown close to ENTITY tag: '$token'\n"; return 0; } } #======================================================# sub notationTag { my $self = shift; my $token; # quick error check; should not happen $token = &nextToken(); if($token ne '') { putbackToken($token); if(illegalSystemLiteral(\$token)) { print "Illegal System Literal '$token'\n"; return 0; } $sysLiteral = $token; } else { putbackToken($token); } } $token = nonspaceToken(); if($token ne '>') { print "Unknown close to NOTATION tag: '$token'\n"; return 0; } my %hsh; $hsh{'notationType'} = $type; $hsh{'pubLiteral'} = $pubLiteral; $hsh{'sysLiteral'} = $sysLiteral; $self->{$elementname}{'notation'} = \%hsh; return 1; } #======================================================# sub attributeTag { my $self = shift; my $token; # quick error check; should not happen $token = &nextToken(); if($token ne '{$elementname}->{'content'})) { print "Warning: Element $elementname referenced in ATTRIBUTE ", "tag has not yet been declared. Attributes will still ", "be set.\n"; my %hsh; $attlist = \%hsh; } else { if (exists $self->{$elementname}->{'attlist'}) { $attlist = $self->{$elementname}->{'attlist'}; } else { my %hsh; $attlist = \%hsh; } } my $retval = 1; while($retval == 1) { $token = nonspaceToken(); if($token ne '>') { putbackToken($token); $retval = &attDefinition($attlist); } else { # tag closed with '>' if(!defined($self->{$elementname})) { my %hsh; $self->{$elementname} = \%hsh; } $self->{$elementname}->{'attlist'} = $attlist; return $retval; } } return $retval; } #======================================================# # Read in an element tag and store it in the correct format in the DTD. # Element tags have the form: # # There are 4 rule types: ANY, EMPTY, Mixed, and Children. sub elementTag { my $self = shift; my $token; # quick error check; should not happen $token = &nextToken(); if($token ne '{$elementname}->{'content'})) { print "Element $elementname redeclared in ELEMENT tag."; return 0; } my $rule; if(exists($self->{$elementname})) { $rule = $self->{$elementname}; } else { my %hsh; $rule = \%hsh; } my $type = ''; my $content; $token = nonspaceToken(); # catch the simple rules, ANY and EMPTY if ($token eq 'ANY') { $type = 'value'; $content = $token; } elsif ($token eq 'EMPTY') { $type = 'value'; $content = $token; } # handle the simple rules, ANY and EMPTY if ($type eq 'value') { $rule->{'type'} = $type; $rule->{'cardinality'} = ''; $rule->{'content'} = $content; $token = &nonspaceToken(); # we should now be at the end of the rule if($token eq '>') { # end of rule $self->{$elementname} = $rule; return 1; } else { print "Malformed ELEMENT tag $token; expecting '>'...\n"; return 0; } } # Mixed and Children rules from here on # A Mixed rule must start with '(#PCDATA' -> we'll take advantage # of this, which is a start a Children rule cannot have. # First error-check: look for '(' if($token ne '(') { print "Illegal rule: expected '('; instead: $token\n"; return 0; } my $prev_tok = $token; $token = &nonspaceToken(); # Now look for '#PCDATA' if ($token eq '#PCDATA') { # we have a Mixed rule putbackToken($token); putbackToken($prev_tok); my $retval = &mixedRule($rule); if ($retval) { $token = nonspaceToken(); if ($token eq '>') { $self->{$elementname} = $rule; } else { print "Warning: Expected '>' to close ELEMENT tag; ", "instead received '$token'\n"; $retval = 0; } } return $retval; } else { # we have a Children rule putbackToken($token); putbackToken($prev_tok); my $retval = &childrenRule($rule); if ($retval) { $token = nonspaceToken(); if ($token eq '>') { $self->{$elementname} = $rule; } else { print "Warning: Expected '>' to close ELEMENT tag; ", "instead received '$token'\n"; $retval = 0; } } return $retval; } } #======================================================# sub databaseMapTag { my $self = shift; my $token; #---------------------------------------# # quick error check; should not happen # #---------------------------------------# $token = &nextToken(); if ($token ne '') { $tokstring .= $token; $token=&nextToken(); } if($token eq '-->') { my @ary = split("\n",$tokstring); my $line = shift(@ary); $line =~ s#//.*$##; my ($lhs,$rhs); if ($line =~ /(\S+)\s*:\s*'?(.*\S)/) { $lhs = $1; $rhs = $2; $rhs =~ s/'//; } # first lhs must be [table] while ($lhs =~ /^\[table\]$/i && @ary) { my %fieldmap; #rhs = db table name, handle multiple entries of same table push (@{ $maphash->{$rhs} }, \%fieldmap); # remaining lines map database field name to DTD attribute name | DTD function # or contain metadata in [] while (@ary) { my $line = shift(@ary); $line =~ s#//.*$##; if ($line =~ /(\S+)\s*:\s*(.*\S)/) { $lhs = $1; $rhs = $2; } else {$lhs = $rhs = '';} if ($lhs =~ /^\[table\]$/i) { $rhs =~ s/'//g; last; } if ($rhs =~ //) { # parse recursively, to any depth $self->parseParentTranslate($lhs,\%fieldmap,\@ary,$elementname); } elsif ($rhs =~ /||/) { # skip these, they are for Delphi (i.e. store operations) } elsif ($rhs =~ //) { # skip this is for storing from submittalform } elsif ($lhs =~ /\[shareRowDataWith\]/i) { my @children; $fieldmap{$lhs} = \@children; while (@ary) { $line = shift(@ary); $line =~ s#//.*$##; last if $line =~ /\s*\)/; if ($line =~ /(\S+)/) { push(@children,$1); } } if ($line !~ /\s*\)/) { print "DATABASEMAP improperly formed for $elementname:\n"; print "\tMissing ending ')' on shareRowDataWith\n"; } } elsif ($lhs =~ /\[elementid\]/i) { $fieldmap{$lhs} = $rhs; } else { $fieldmap{$lhs} = $rhs if $lhs =~ /\S/; } } } if (@ary) { print "DATABASEMAP improperly formed for $elementname: no [table] found\n"; } return 1; } else { print "Unknown close to DATABASEMAP tag for element $elementname\n"; return 0; } } #======================================================# # recursive routine sub indexDatabaseMap { my ($self,$rootobj,$parentPath) = @_; # 1. index our databasemap, if it exists if (exists($self->{$rootobj}->{databasemap})) { my $map = $self->{$rootobj}->{databasemap}; foreach my $table (keys %$map) { my $aref = $map->{$table}; # handle multiple entries of same table foreach my $hsh (@$aref) { my $key = "$table"; my @ary = ($key); # concat flagtype/datatype/purpose/flagvalue[3|2] to key if Flag if ($key eq 'Flag') { # The order of these next if statements IS important!! if (exists($hsh->{flagtype})) { &indexAttribute('flagtype',$hsh,\@ary,$rootobj,$parentPath); } if (exists($hsh->{datatype})) { &indexAttribute('datatype',$hsh,\@ary,$rootobj,$parentPath); } if (exists($hsh->{flagpurpose})) { &indexAttribute('flagpurpose',$hsh,\@ary,$rootobj,$parentPath); } if (exists($hsh->{flagvalue3})) { my $fv3 = $hsh->{flagvalue3}; if ((ref($fv3) eq 'HASH') || $fv3 =~ /^'/) { &indexAttribute('flagvalue3',$hsh,\@ary,$rootobj,$parentPath); } } # concat 'Extflag' to $flagkey if there is an Extflag entry # if [elementid] is recordid. Usually it is flagrecordid, # meaning extflag is in a 1:1 relationship to flag and # there is no need for a separate index and entity if (exists($map->{'Extflag'}) && $map->{'Extflag'}[0]->{'[elementid]'} eq 'recordid') { push(@ary,'Extflag'); } if (exists($hsh->{flagvalue2})) { my $fv2 = $hsh->{flagvalue2}; if ((ref($fv2) eq 'HASH') || $fv2 =~ /^'/) { &indexAttribute('flagvalue2',$hsh,\@ary,$rootobj,$parentPath); } } $key = join("|",@ary); } # concat linktable to key if Contlink or Prodline elsif ($key =~ /^(Contlink|Prodline)$/) { if (exists($hsh->{linktable})) { &indexAttribute('linktable',$hsh,\@ary,$rootobj,$parentPath); } $key = join("|",@ary); } # $self->{dbmapIndex}->{$key}->{dbmap} = $map; # this info stored elsewhere # eliminate quotes around literal values $key =~ s/'//g; $self->{dbmapIndex}->{$key}->{startpath}->{"$parentPath/$rootobj"} = 1; } } } # 2. index all of our children if (ref ($self->{$rootobj}->{content}) eq 'ARRAY') { foreach my $childhash ( @{ $self->{$rootobj}->{content} }) { $self->indexDatabaseMap($childhash->{content}, "$parentPath/$rootobj"); } } } #======================================================# sub indexAttribute { my ($attr,$hsh,$dbfields,$rootobj,$parentPath) = @_; my $attrval = $hsh->{$attr}; return unless length($attrval); if (ref($attrval) eq 'HASH') { # find our DTD parent as a key in the translate hash (from parentTranslate) # and add its value to the key my @parents = split("/",$parentPath); shift (@parents) if $parents[0] =~ /^\s*$/; # eliminate blank at top my $done = 0; my $parent; while (!$done && @parents) { $parent = pop(@parents); if (exists($attrval->{$parent})) { my $val = $attrval->{$parent}; if (ref($val) eq 'HASH') { $attrval = $val; } else { push(@$dbfields,$val); $done = 1; } } } if (!$done) { print "parentTranslate: Can't find $parent as key to resolve $attr for $parentPath/$rootobj\n"; } } else { push (@$dbfields,$attrval); } } #======================================================# # recurse through any level of nested tags in the source sub parseParentTranslate { my ($self,$lhs,$dsthash,$srcary,$elementname) = @_; my $line; if (!exists($dsthash->{$lhs})) { $dsthash->{$lhs} = {}; } my $xlate = $dsthash->{$lhs}; while (@$srcary) { $line = shift(@$srcary); $line =~ s#//.*$##; # remove trailing comment, if any last if $line =~ /^\s*\)\s*$/; my ($lhs2,$rhs2); if ($line =~ /(\S+)\s*:\s*'?(.*\S)/) # i.e. lhs: rhs or lhs: 'rhs' { $lhs2 = $1; $rhs2 = $2; $rhs2 =~ s/'//; if ($rhs2 =~ //) { $self->parseParentTranslate($lhs2,$xlate,$srcary,$elementname); } else { $xlate->{$lhs2} = $rhs2; } } } if ($line !~ /^\s*\)\s*$/) { print "DATABASEMAP improperly formed for $elementname:\n"; print "\tMissing ending ')' on parentTranslate\n"; } } #======================================================# sub locatePath { my ($self,$path) = @_; # special handling of 'site' path component # 1. if $path eq 'site' exactly, # return deal/site # 2. else # strip off site and continue if ($path eq 'site') { return 'deal/site'; # FIXME go to $self } else { $path =~ s#^site/##; } } #===================================# # AUXILIARY PROCEDURES # #===================================# # Children ELEMENT Rule sub childrenRule { my ($ruleref) = @_; my @compare; my $token = &nonspaceToken(); if($token ne '(') { print "Illegal rule: expected '('; received '$token'\n"; return 0; } my $end = 0; my $type = ''; my $cardinality = ''; while(!$end) { $token = &nonspaceToken(); if($token eq '(') { my %subrule; putbackToken($token); childrenRule(\%subrule); push(@compare,\%subrule); } elsif(!illegalName($token)) { my %item; %item->{'type'} = 'value'; %item->{'content'} = $token; # could be followed by a cardinality $token = nextToken(); if(($token eq '*') || ($token eq '+') || ($token eq '?')) { %item->{'cardinality'} = $token; } else { %item->{'cardinality'} = ''; putbackToken($token); } push(@compare,\%item); } else { print "Warning: Expected '(' or legal Name; instead received '$token'\n"; return 0; } $token = nonspaceToken(); if($token eq ')') { if($type eq '') { $type = ','; } $end = 1; } elsif($token eq '|') { if($type eq '') { $type = $token; } elsif($token ne $type) { print "Warning: Mixed separators: previous_type=$type new_type=$token\n"; } } elsif($token eq ',') { if($type eq '') { $type = $token; } elsif($token ne $type) { print "Warning: Mixed separators: previous_type=$type new_type=$token\n"; } } } # a ')' has been reached $token = nextToken(); if(($token eq '*') || ($token eq '+') || ($token eq '?')) { $cardinality = $token; } else { putbackToken($token); } $ruleref->{'cardinality'} = $cardinality; $ruleref->{'type'} = $type; $ruleref->{'content'} = \@compare; return 1; } #======================================================# # Mixed ELEMENT Rule # Mixed ::= '(' ? '#PCDATA' (? '|' ? Name)* # ? ')*' # ::= '(' ? '#PCDATA' ? ')' # # These can be treated as one rule, as long as we remember that if # it is the 1st version, there must be an * on the end. sub mixedRule { my ($ruleref) = @_; my @compare; my $token = &nonspaceToken(); if($token ne '(') { print "Illegal rule: expected '('; received $token\n"; return 0; } my $end = 0; while(!$end) { $token = &nonspaceToken(); # catch end of file if($token eq '') { print "Unexpected end of file encountered within Mixed ELEMENT tag\n"; return 0; } push(@compare,$token); if(scalar @compare == 1) { if(@compare[0] ne '#PCDATA') { print "Expected #PCDATA as first element; instead '@compare[0]'\n"; return 0; } } elsif(illegalName($token)) { print "Illegal name $token in ELEMENT tag."; return 0; } $token = &nonspaceToken(); if($token eq ')') { $end = 1; } elsif($token ne '|') { print "Warning: Expected '|' or ')'; received $token\n"; } } my $cardinality = ''; # a ')' has been reached; if there is more than one element, there # must be a * on the end, with no whitespace in b/w $token = nextToken(); if($token eq '*') { $cardinality = $token; } else { putbackToken($token); if(scalar @compare > 1) { print "Expected '*' after Mixed ELEMENT tag with multiple ", "choices; instead '$token'\n"; return 0; } } $ruleref->{'cardinality'} = $cardinality; if(@compare > 1) { $ruleref->{'type'} = 'choice'; my @content; for(my $i=0;$i<@compare;$i++) { my %hsh; %hsh->{'type'} = 'value'; %hsh->{'cardinality'} = ''; %hsh->{'content'} = @compare[$i]; push(@content,\%hsh); } $ruleref->{'content'} = \@content; } else { $ruleref->{'type'} = 'value'; $ruleref->{'content'} = @compare[0]; } return 1; } #======================================================# # Attribute Definition Rule sub attDefinition { my ($attlist) = @_; my $token; my $attName = nonspaceToken(); if(illegalName($attName)) { print "Illegal name $attName in ATTRIBUTE tag."; return 0; } my $attType; my $attTypeValue = nonspaceToken(); #-------------------------------# # perform macro substitutions # #-------------------------------# if ($attTypeValue =~ /^[%&](.*)/) { $attTypeValue = $macro{$1}; } if ($attTypeValue eq 'CDATA') { $attType = 'StringType'; } elsif ( grep /^\$attTypeValue$/, qw(ID IDREF IDREFS ENTITY ENTITIES NMTOKEN NMTOKENS)) { $attType = 'TokenizedType'; } elsif (grep /^\$attTypeValue$/, qw(NOTATION NOTATIONS)) { print "NOTATION attributes are not currently supported\n"; return 0; } elsif ($attTypeValue eq '(') { # Handle enumerations $attType = 'Enumeration'; my @compare; my $end = 0; while(!$end) { $token = nonspaceToken(); if(illegalName($token)) { print "Illegal name $token in ATTLIST tag."; return 0; } push(@compare,$token); $token = nonspaceToken(); if($token eq ')') { $end = 1; } elsif($token ne '|') { print "Expecting '|' or ')'; instead received '$token'\n"; return 0; } } $attTypeValue = \@compare; } # Handle the default declaration my $defaultValue = ''; my $defaultType = nonspaceToken(); if(($defaultType eq '#REQUIRED') || ($defaultType eq '#IMPLIED')) { $defaultValue = ''; } else { if($defaultType eq '#FIXED') { $defaultValue = nonspaceToken(); } else { $defaultValue = $defaultType; $defaultType = 'given'; } putbackToken($defaultValue); if(illegalAttValue(\$defaultValue)) { print "Illegal default value in Attribute tag: '$defaultValue'\n"; return 0; } # Check that the default value works for the attribute type if((($attTypeValue eq 'ID') || ($attTypeValue eq 'IDREF') || ($attTypeValue eq 'IDREFS') || ($attTypeValue eq 'ENTITY') || ($attTypeValue eq 'ENTITIES')) && (illegalName($defaultValue))) { print "Illegal default value in Attribute tag: '$defaultValue'\n"; return 0; } elsif($attType eq 'Enumeration') { my $found = 0; for(@$attTypeValue) { if($_ eq $defaultValue) { $found = 1; last; } } if(!$found) { print "Illegal default value in Attribute tag: '$defaultValue'\n"; return 0; } } } my %attribute; %attribute->{'type'} = $attType; %attribute->{'typevalue'} = $attTypeValue; %attribute->{'defaultType'} = $defaultType; %attribute->{'defaultValue'} = $defaultValue; $attlist->{$attName} = \%attribute; return 1; } #======================================================# # decides whether a CDATA attribute value is legal, as it is # being read in. sub illegalAttValue { my ($value) = @_; # a reference $$value = nonspaceToken(); if(($$value ne "'") && ($$value ne '"')) { return 1; } my $first = $$value; my $next = nextToken(); $$value = ''; while($next ne $first) { if($next =~ /[<&]/) { return 1; } $$value .= $next; $next = nextToken(); if(!defined($next)) { return 1; } } return 0; } #======================================================# sub illegalSystemLiteral { my ($value) = @_; my $first = nonspaceToken(); my $next = nextToken(); $$value = ''; while($next ne $first) { $$value .= $next; $next = nextToken(); if(!defined($next)) { return 1; } } return 0; } #======================================================# sub illegalPublicLiteral { my ($value) = @_; my $first = nonspaceToken(); my $next = nextToken(); $$value = ''; while($next ne $first) { if($next =~ /[^-'()+,.\/:=?;!*#\@\$_\% \r\na-zA-Z0-9<&]/) { return 1; } $$value .= $next; $next = nextToken(); if(!defined($next)) { return 1; } } return 0; } #======================================================# sub nonspaceToken { my $token = ''; while($token eq '') { $token = &nextToken(); if(!defined($token)) { return($token); } $token =~ s/[\s\r\n]//g; } return($token); } #======================================================# sub quotedToken # e.g. "(A | B | C)" { my $token; do { $token = &nextToken(); } while ($token ne '"'); my $resStr; $token = &nextToken(); while ($token ne '"') { $resStr .= $token; $token = &nextToken(); } $resStr =~ s/[\r\n]//g; return($resStr); } #======================================================# sub nextToken { return(shift(@tokens)); } #======================================================# sub putbackToken { my $token = shift; unshift(@tokens, $token); } #======================================================# # Name restrictions: # no <> # start with letter or _ # after start: letters, numbers, -, ., _ # no xml (any case) at start # : only for namespace - I only check if it looks reasonable sub illegalName { my ($name) = @_; $_ = $name; return(!/^[_a-zA-Z][-.:_a-zA-Z0-9]*$/ || /^xml/i || /.*:.*:.*/ || /.*:$/); } 1;