#!/usr/bin/perl # CGI script checks a text string against all entries of the lexicon and # displays category and attributes if found. # (Textpresso system) # # copyright (c) Hans-Michael Muller, Pasadena, California, 2002-2004 # use strict; use CGI; use POSIX; ### globals # # handle to this CGI and own address my $query = new CGI; my $absmyself = $query->url(absolute=>1); # directories of importance my $databaseroot = "/var/www/html/text/tdb/"; my $indexlex = $databaseroot . "lex/"; # add more as you go along my $filename = $indexlex . "lexicon"; my @results = (); my $totalentries; # ### end globals; ### process form # if (($query->param('search')) || $query->param("simplestring")) { my %lexicon = ReadLexicon($filename); # read in # lexicon my %labels = ReadElements2("/var/www/html/text/textpresso.dtd"); # read in # element name my @lexiconkeys = sort keys % lexicon; # list of keys. $totalentries = @lexiconkeys; @results = SearchLexicon($query->param("simplestring"), \%lexicon, \@lexiconkeys, \%labels); } # ### PrintHeader($query); ### start form # print $query->start_form(-method => 'POST', -action => $absmyself); # ### ### present input fields # print $query->font({-face => 'verdana, helvetica', -size => '2'}, 'This browsers tests a keyword against all '); print $query->font({-face => 'verdana, helvetica', -size => '2'}, 'regular expressions in the lexicon. '); print $query->p; print $query->font({-face => 'verdana, helvetica', -size => '2'}, 'Type in '); print $query->start_u; print $query->font({-face => 'verdana, helvetica', -size => '2'}, 'one'); print $query->end_u; print $query->font({-face => 'verdana, helvetica', -size => '2'}, ' phrase, which can consist of several words: '); print $query->p; print $query->textfield(-name => 'simplestring', -size => 50, -maxlength => 255); print $query->p; print $query->submit(-name => 'search', -value => 'Test!'); print $query->font(" "); print $query->reset('Reset!'); # ### ### output goes here # if (@results) { print $query->hr; print $query->font({-face => 'verdana, helvetica', -size => '2'}, 'Matching against ', $totalentries, ' lexicon entries... '); print $query->br; foreach my $line (@results) { print $query->font({-face => 'verdana, helvetica', -size => '2'}, $line); print $query->br; } } elsif ($query->param("simplestring") ne "") { print $query->hr; print $query->font({-face => 'verdana, helvetica', -size => '2', color => 'red'}, 'The term ',$query->param("simplestring"),' did not match any of the ', $totalentries, ' lexicon entries. '); } # ### ### end form here # print $query->end_form; print $query->end_html; # ### sub PrintHeader { # begin PrintHeader my $query = shift; print $query->header; print $query->start_html(-title=>'Ontology Browser', -author=>'Hans-Michael Muller', -bgcolor=>'#e6e6f6', -text=>'#000000', -link=>'#444444', -vlink=>'#666666'); print $query->br; print $query->font({-face => 'verdana, arial, helvetica', -size => '5'}, 'Ontology Browser'); print $query->p; } # end PrintHeader sub SearchLexicon { my $string = shift; my $pLexicon = shift; my $pKeys = shift; my $pLabels = shift; my @results = (); foreach my $key (@$pKeys) { if ($string =~ /^$key$/) { if (defined($$pLabels { $ { $$pLexicon{$key} }[0]})) { my $out = "String \"$string\" "; $out .= "is matched by entry "; $out .= "\"" . $key . "\""; $out .= " which belongs to class \""; $out .= $$pLabels { $ { $$pLexicon{$key} }[0]}; $out .= "\""; if (@ { $$pLexicon{$key} } > 1) { $out .= " with the attribute(s) "; for (my $i = 2; $i < @ { $$pLexicon{$key} }; $i++) { $out .= "\"" . $ { $$pLexicon{$key} }[$i] . "\""; $out .= ", "; } chop ($out); chop ($out); $out .= "."; } push @results, $out; } } } return @results; } sub ReadLexicon { my %aux = (); my $fn = shift; open (LEXICON, "$fn"); while (my $line = ) { chomp($line); my @array = split (/\s*\#\s*/,$line); # split lines my $key = shift @array; foreach my $item (@array) { $item =~ s/\s+$//; # get rid of any white spaces at the end } $aux{ $key } = [@array]; # depending on what is read in, the table looks like this: # Hash { keyword } = [element, grammar, attribute1, attribute2, ...]; } close(LEXICON); return %aux; } sub ReadElements2 { my $dtdfile = shift; my %labels = (); open (DTDFILE, "$dtdfile") or die "Can't open DTD file $dtdfile."; while (my $line = ) { chomp ($line); if ($line =~ / <\!--\s((\w|\s)+)\s-->/) { my $key = $1; my $label = $2; if (($key ne "sentence") && ($key ne "article")){ #because pattern above is more specific, this isn't really necessary $labels { $key } = $label; } } } close (DTDFILE); return %labels; }