I made these two changes to my search engine:
The first "bug" I found regards htdig, and occurs when I set meta description to belong to the "keywords" list.
This way, the old HTML.cc, didn't store the description inside the DocumentRef class object.
Now, I've changed the source to always store the meta-description and then, if present inside the keywords_meta_tag_names directive, store it as keyword.
Give a look to it. I'm not sure it be the best solution, but I hope it could be appreciated too ...
The other one regards the htsearch program:
In Display.cc, method: String *Display::excerpt(DocumentRef *ref, String urlanchor, int fanchor, int first)
//////////// Begin of changes
char* head;
int use_meta_description=0;
if (config.Boolean("use_meta_description",0)
&& strlen(ref->DocMetaDsc()) != 0)
{
head = ref->DocMetaDsc();
use_meta_description=1;
}
else head = ref->DocHead();
// I use the description if found and "use_meta_description"
// is set to true; or if "excerpt_show_top" is set to true
// htsearch shows the whole head. Else, it works by default.
if (config.Boolean("excerpt_show_top", 0) || use_meta_description )
first = 0;
else
first = allWordsPattern->FindFirstWord(head, which, length);
/////// Etc ...
Let's suppose I set use_meta_description to true.
So head points to the description, if found. OK?
Before tha change, when I searched for a word not contained inside the meta description,
first = allWordsPattern->FindFirstWord(head, which, length);
was set to -1 and so meta_description wasn't showed ...
Now, this instruction is skipped if the meta_description use is set (and if description is found)
This way, it seems to work properly. But it seems to me ... remember ... :-)
Thanx to all of you
Ciao
Gabriele
//
// HTML.cc
//
// Implementation of HTML
// Class to parse HTML documents and return useful information to the Retriever
//
//
#if RELEASE
static char RCSid[] = "$Id: HTML.cc,v 1.30 1999/01/28 05:20:19 ghutchis Exp $";
#endif
#include "htdig.h"
#include "HTML.h"
#include "SGMLEntities.h"
#include "Configuration.h"
#include <ctype.h>
#include "StringMatch.h"
#include "StringList.h"
#include "URL.h"
static StringMatch tags;
static StringMatch attrs;
static StringMatch srcMatch;
static StringMatch hrefMatch;
static StringMatch keywordsMatch;
//*****************************************************************************
// HTML::HTML()
//
HTML::HTML()
{
//
// Initialize the patterns that we will try to match.
// The tags Match object is used to match tag commands while
// the attrs Match object is used to match names of tag parameters.
//
tags.IgnoreCase();
tags.Pattern("title|/title|a|/a|h1|h2|h3|h4|h5|h6|/h1|/h2|/h3|/h4|/h5|/h6|noindex|/noindex|img|li|meta|frame|area|base");
attrs.IgnoreCase();
attrs.Pattern("src|href|name");
srcMatch.IgnoreCase();
srcMatch.Pattern("src");
hrefMatch.IgnoreCase();
hrefMatch.Pattern("href");
//String keywordNames = config["keywords_meta_tag_names"];
//keywordNames.replace(' ', '|');
//keywordNames.remove(",\t\r\n");
//keywordsMatch.IgnoreCase();
//keywordsMatch.Pattern(keywordNames);
StringList keywordNames(config["keywords_meta_tag_names"], " \t");
keywordsMatch.IgnoreCase();
keywordsMatch.Pattern(keywordNames.Join('|'));
keywordNames.Release();
word = 0;
href = 0;
title = 0;
description = 0;
head = 0;
meta_dsc = 0;
tag = 0;
in_title = 0;
in_ref = 0;
in_heading = 0;
base = 0;
doindex = 1;
dofollow = 1;
minimumWordLength = config.Value("minimum_word_length", 3);
}
//*****************************************************************************
// HTML::~HTML()
//
HTML::~HTML()
{
}
//*****************************************************************************
// void HTML::parse(Retriever &retriever, URL &baseURL)
// Parse the HTML document using the Retriever object for all the callbacks.
// The HTML document contents are contained in the contents String.
//
void
HTML::parse(Retriever &retriever, URL &baseURL)
{
if (contents == 0 || contents->length() == 0)
return;
base = &baseURL;
//
// We have some variables which will contain the various items we
// are looking for
//
int offset = 0;
int in_space = 0;
unsigned char *q, *start;
unsigned char *position = (unsigned char *) contents->get();
unsigned char *text = (unsigned char *) new char[contents->length()+1];
unsigned char *ptext = text;
static char *skip_start = config["noindex_start"];
static char *skip_end = config["noindex_end"];
title = 0;
head = 0;
meta_dsc = 0;
doindex = 1;
dofollow = 1;
in_heading = 0;
in_title = 0;
in_ref = 0;
in_space = 0;
while (*position)
{
//
// Filter out section marked to be ignored for indexing.
// This can contain any HTML.
//
if (strncmp((char *)position, skip_start, strlen(skip_start)) == 0)
{
q = (unsigned char*)strstr((char *)position, skip_end);
if (!q)
*position = '\0'; // Rest of document will be skipped...
else
position = q + strlen(skip_end);
continue;
}
if (strncmp((char *)position, "<!", 2) == 0)
{
//
// Possible comment declaration (but could be DTD declaration!)
// A comment can contain other '<' and '>':
// we have to ignore a complete comment declarations
// but of course also DTD declarations.
//
position += 2; // Get past declaration start
while (*position)
{
// Let's see if the declaration ends here
if (*position == '>')
{
position++;
break; // End of comment declaration
}
// Not the end of the declaration yet:
// we'll try to find an actual comment
if (strncmp((char *)position, "--", 2) == 0)
{
// Found start of comment - now find the end
position += 2;
q = (unsigned char*)strstr((char *)position, "--");
if (!q)
{
*position = '\0';
break; // Rest of document seems to be a comment...
}
position = q + 2;
}
else
{
// Not a comment declaration after all
// but possibly DTD: get to the end
q = (unsigned char*)strstr((char *)position, ">");
if (q)
{
position = q + 1;
break;
// End of (whatever) declaration
}
else
{
*position = '\0'; // Rest of document is DTD?
break;
}
}
// Skip whitespace after an individual comment
while (isspace(*position))
position++;
}
continue;
}
if (*position == '<')
{
//
// Start of a tag. Since tags cannot be nested, we can simply
// search for the closing '>'
//
q = (unsigned char*)strchr((char *)position, '>');
if (q)
{ // copy tag
while (position <= q)
*ptext++ = *position++;
}
else
{ // copy rest of text, as tag does not end
while (*position)
*ptext++ = *position++;
}
}
else if (*position == '&')
{
*ptext = SGMLEntities::translateAndUpdate(position);
if (*ptext == '<')
{
*ptext = ' ';
}
ptext++;
}
else
{
*ptext++ = *position++;
}
}
*ptext++ = '\0';
position = text;
start = position;
while (*position)
{
offset = position - start;
// String = 0 is expensive
// word = 0;
if (*position == '<')
{
//
// Start of a tag. Since tags cannot be nested, we can simply
// search for the closing '>'
//
q = (unsigned char*)strchr((char *)position, '>');
if (!q)
break; // Syntax error in the doc. Tag never ends.
tag = 0;
tag.append((char*)position, q - position + 1);
do_tag(retriever, tag);
position = q+1;
}
else if (*position > 0 && (isalnum(*position)))
{
//
// Start of a word. Try to find the whole thing
//
word = 0;
in_space = 0;
while (*position &&
(isalnum(*position) ||
strchr(valid_punctuation, *position)))
{
word << (char)*position;
position++;
}
if (in_title && doindex)
{
title << word;
}
if (in_ref)
{
description << word;
if (description.length() > max_description_length)
{
description << " ...";
if (dofollow)
retriever.got_href(*href, description);
in_ref = 0;
description = 0;
}
}
if (head.length() < max_head_length && doindex && !in_title)
{
//
// Capitalize H1 and H2 blocks
// (This is currently disabled until we can captialize
// non-ASCII characters -GRH
// if (in_heading > 1 && in_heading < 4)
// {
// word.uppercase();
// }
//
// Append the word to the head (excerpt)
//
head << word;
}
if (word.length() >= minimumWordLength && doindex)
{
retriever.got_word(word,
int(offset * 1000 / contents->length()),
in_heading);
}
}
else
{
//
// Characters that are not part of a word
//
if (doindex)
{
if (isspace(*position))
{
//
// Reduce all multiple whitespace to a single space
//
if (!in_space)
{
if (head.length() < max_head_length)
{
head << ' ';
}
if (in_ref)
{
description << ' ';
}
if (in_title)
{
title << ' ';
}
}
in_space = 1;
}
else
{
//
// Not whitespace
//
if (head.length() < max_head_length && !in_title)
{
// We don't want to add random chars to the
// excerpt if we're in the title.
head << *position;
}
if (in_ref)
{
description << *position;
}
if (in_title)
{
title << *position;
}
in_space = 0;
}
}
position++;
}
}
retriever.got_head(head);
delete text;
}
//*****************************************************************************
// void HTML::do_tag(Retriever &retriever, String &tag)
//
void
HTML::do_tag(Retriever &retriever, String &tag)
{
char *position = tag.get() + 1; // Skip the '<'
char *q, *t;
int which, length;
while (isspace(*position))
position++;
which = -1;
if (tags.CompareWord(position, which, length) < 0)
return; // Nothing matched.
if (debug > 3)
cout << "Tag: " << position << ", matched " << which << endl;
switch (which)
{
case 0: // "title"
in_title = 1;
in_heading = 1;
break;
case 1: // "/title"
in_title = 0;
in_heading = 0;
retriever.got_title(title);
break;
case 2: // "a"
{
which = -1;
int pos;
while ((pos = attrs.FindFirstWord(position, which, length)) >= 0)
{
position += pos + length;
if (debug > 1)
cout << "A tag: pos = " << pos << ", position = " << position <<
endl;
switch (which)
{
case 1: // "href"
{
//
// a href seen
//
while (*position && *position != '=')
position++;
if (!*position)
return;
position++;
while (isspace(*position))
position++;
//
// Allow either single quotes or double quotes
// around the URL itself
//
if (*position == '"'||*position == '\'')
{
position++;
q = strchr(position, position[-1]);
if (!q)
break;
//
// We seem to have matched the opening quote char
// Mark the end of the quotes as our endpoint, so
// that we can continue parsing after the current
// text
//
*q = '\0';
//
// If a '#' is present in a quoted URL,
// treat that as the end of the URL, but we skip
// past the quote to parse the rest of the anchor.
//
if ((t = strchr(position, '#')) != NULL)
*t = '\0';
}
else
{
q = position;
while (*q &&
*q != '>' &&
!isspace(*q) && // *q != '?' ???? -grh
*q != '#')
q++;
*q = '\0';
}
delete href;
href = new URL(position, *base);
in_ref = 1;
description = 0;
position = q + 1;
break;
}
case 2: // "name"
{
//
// a name seen
//
while (*position && *position != '=')
position++;
if (!*position)
return;
position++;
while (isspace(*position))
position++;
//
// Allow either single quotes or double quotes
// around the URL itself
//
if (*position == '"'||*position == '\'')
{
position++;
q = strchr(position, position[-1]);
if (!q)
break;
//
// We seem to have matched the opening quote char
// Mark the end of the quotes as our endpoint, so
// that we can continue parsing after the current
// text
//
*q = '\0';
//
// If a '#' is present in a quoted URL,
// treat that as the end of the URL, but we skip
// past the quote to parse the rest of the anchor.
//
if ((t = strchr(position, '#')) != NULL)
*t = '\0';
}
else
{
q = position;
while (*q && *q != '>' && !isspace(*q))
q++;
*q = '\0';
}
retriever.got_anchor(position);
position = q + 1;
break;
}
default:
break;
}
}
break;
}
case 3: // "/a"
if (in_ref)
{
if (dofollow)
retriever.got_href(*href, description);
in_ref = 0;
}
break;
case 4: // "h1"
in_heading = 2;
break;
case 5: // "h2"
in_heading = 3;
break;
case 6: // "h3"
in_heading = 4;
break;
case 7: // "h4"
in_heading = 5;
break;
case 8: // "h5"
in_heading = 6;
break;
case 9: // "h6"
in_heading = 7;
break;
case 10: // "/h1"
case 11: // "/h2"
case 12: // "/h3"
case 13: // "/h4"
case 14: // "/h5"
case 15: // "/h6"
in_heading = 0;
break;
case 16: // "noindex"
doindex = 0;
dofollow = 0;
break;
case 17: // "/noindex"
doindex = 1;
dofollow = 1;
break;
case 18: // "img"
{
which = -1;
int pos = attrs.FindFirstWord(position, which, length);
if (pos < 0 || which != 0)
break;
position += pos + length;
while (*position && *position != '=')
position++;
if (!*position)
break;
position++;
while (isspace(*position))
position++;
//
// Allow either single quotes or double quotes
// around the URL itself
//
if (*position == '"'||*position == '\'')
{
position++;
q = strchr(position, position[-1]);
if (!q)
break;
//
// We seem to have matched the opening quote char
// Mark the end of the quotes as our endpoint, so
// that we can continue parsing after the current
// text
//
*q = '\0';
//
// If a '#' is present in a quoted URL,
// treat that as the end of the URL, but we skip
// past the quote to parse the rest of the anchor.
//
if ((t = strchr(position, '#')) != NULL)
*t = '\0';
}
else
{
q = position;
while (*q && *q != '>' && !isspace(*q))
q++;
*q = '\0';
}
retriever.got_image(position);
break;
}
case 19: // "li"
if (doindex && head.length() < max_head_length)
head << "* ";
break;
case 20: // "meta"
{ position += length;
Configuration conf;
conf.NameValueSeparators("=");
conf.Add(position);
//
// First test for old-style meta tags (these break any
// reasonable DTD...)
//
if (conf["htdig-noindex"])
{
retriever.got_noindex();
doindex = 0;
dofollow = 0;
}
if (conf["htdig-index"])
{
doindex = 1;
dofollow = 1;
}
if (conf["htdig-email"])
{
retriever.got_meta_email(conf["htdig-email"]);
}
if (conf["htdig-notification-date"])
{
retriever.got_meta_notification(conf["htdig-notification-date"]);
}
if (conf["htdig-email-subject"])
{
retriever.got_meta_subject(conf["htdig-email-subject"]);
}
if (conf["htdig-keywords"] || conf["keywords"])
{
//
// Keywords are added as being at the very top of the
// document and have a weight factor of
// keywords-factor which is assigned to slot 10 in the
// factor table.
//
char *keywords = conf["htdig-keywords"];
if (!keywords)
keywords = conf["keywords"];
char *w = strtok(keywords, " ,\t\r\n");
while (w)
{
if (strlen(w) >= minimumWordLength)
retriever.got_word(w, 1, 10);
w = strtok(0, " ,\t\r\n");
}
w = '\0';
}
if (conf["http-equiv"])
{
// <META HTTP-EQUIV=REFRESH case
if (mystrcasecmp(conf["http-equiv"], "refresh") == 0
&& conf["content"])
{
char *content = conf["content"];
char *q = mystrcasestr(content, "url=");
if (q && *q)
{
q += 4; // skiping "URL="
char *qq = q;
while (*qq && (*qq != ';') && (*qq != '"') &&
!isspace(*qq))qq++;
*qq = 0;
URL *href = new URL(q, *base);
// I don't know why anyone would do this, but hey...
if (dofollow)
retriever.got_href(*href, "");
delete href;
}
}
}
//
// Now check for <meta name=... content=...> tags that
// fly with any reasonable DTD out there
//
if (conf["name"] && conf["content"])
{
char *cache = conf["name"];
which = -1; // What does it do?
// First of all, check for META description
if (mystrcasecmp(cache, "description") == 0
&& strlen(conf["content"]) != 0)
{
//
// We need to do two things. First grab the description
//
meta_dsc = conf["content"];
if (meta_dsc.length() > max_meta_description_length)
meta_dsc = meta_dsc.sub(0, max_meta_description_length).get();
if (debug > 1)
cout << "META Description: " << conf["content"] << endl;
retriever.got_meta_dsc(meta_dsc);
//
// Now add the words to the word list
// (slot 11 is the new slot for this)
//
char *w = strtok(conf["content"], " \t\r\n");
while (w)
{
if (strlen(w) >= minimumWordLength)
retriever.got_word(w, 1, 11);
w = strtok(0, " \t\r\n");
}
w = '\0';
}
if (keywordsMatch.CompareWord(cache))
{
char *w = strtok(conf["content"], " ,\t\r\n");
while (w)
{
if (strlen(w) >= minimumWordLength)
retriever.got_word(w, 1, 10);
w = strtok(0, " ,\t\r\n");
}
w = '\0';
}
else if (mystrcasecmp(cache, "htdig-email") == 0)
{
retriever.got_meta_email(conf["content"]);
}
else if (mystrcasecmp(cache, "htdig-notification-date") == 0)
{
retriever.got_meta_notification(conf["content"]);
}
else if (mystrcasecmp(cache, "htdig-email-subject") == 0)
{
retriever.got_meta_subject(conf["content"]);
}
else if (mystrcasecmp(cache, "htdig-noindex") == 0)
{
retriever.got_noindex();
doindex = 0;
dofollow = 0;
}
else if (mystrcasecmp(cache, "robots") == 0
&& strlen(conf["content"]) !=0)
{
String content_cache = conf["content"];
if (content_cache.indexOf("noindex") != -1)
{
doindex = 0;
retriever.got_noindex();
}
else if (content_cache.indexOf("nofollow") != -1)
dofollow = 0;
else if (content_cache.indexOf("none") != -1)
{
doindex = 0;
dofollow = 0;
retriever.got_noindex();
}
}
}
else if (conf["name"] &&
mystrcasecmp(conf["name"], "htdig-noindex") == 0)
{
retriever.got_noindex();
doindex = 0;
dofollow = 0;
}
break;
}
case 21: // frame
{
which = -1;
int pos = srcMatch.FindFirstWord(position, which, length);
position += pos + length;
switch (which)
{
case 0: // "src"
{
//
// src seen
//
while (*position && *position != '=')
position++;
if (!*position)
return;
position++;
while (isspace(*position))
position++;
//
// Allow either single quotes or double quotes
// around the URL itself
//
if (*position == '"'||*position == '\'')
{
position++;
q = strchr(position, position[-1]);
if (!q)
break;
//
// We seem to have matched the opening quote char
// Mark the end of the quotes as our endpoint, so
// that we can continue parsing after the current
// text
//
*q = '\0';
//
// If a '#' is present in a quoted URL,
// treat that as the end of the URL, but we skip
// past the quote to parse the rest of the anchor.
//
if ((t = strchr(position, '#')) != NULL)
*t = '\0';
}
else
{
q = position;
while (*q &&
*q != '>' &&
!isspace(*q) && // *q != '?' ??? -grh
*q != '#')
q++;
*q = '\0';
}
delete href;
href = new URL(position, *base);
if (dofollow)
{
description = 0;
retriever.got_href(*href, description);
in_ref = 0;
}
break;
}
break;
}
break;
}
case 22: // area
{
which = -1;
int pos = hrefMatch.FindFirstWord(position, which, length);
position += pos + length;
switch (which)
{
case 0: // "href"
{
//
// src seen
//
while (*position && *position != '=')
position++;
if (!*position)
return;
position++;
while (isspace(*position))
position++;
//
// Allow either single quotes or double quotes
// around the URL itself
//
if (*position == '"'||*position == '\'')
{
position++;
q = strchr(position, position[-1]);
if (!q)
break;
//
// We seem to have matched the opening quote char
// Mark the end of the quotes as our endpoint, so
// that we can continue parsing after the current
// text
//
*q = '\0';
//
// If a '#' is present in a quoted URL,
// treat that as the end of the URL, but we skip
// past the quote to parse the rest of the anchor.
if ((t = strchr(position, '#')) != NULL)
*t = '\0';
}
else
{
q = position;
while (*q &&
*q != '>' &&
!isspace(*q) && // *q != '?' ???? --grh
*q != '#')
q++;
*q = '\0';
}
delete href;
href = new URL(position, *base);
if (dofollow)
{
description = 0;
retriever.got_href(*href, description);
in_ref = 0;
}
break;
}
default:
break;
}
break;
}
case 23: // base
{
which = -1;
int pos = hrefMatch.FindFirstWord(position, which, length);
position += pos + length;
switch (which)
{
case 0: // "href"
{
while (*position && *position != '=')
position++;
if (!*position)
return;
position++;
while (isspace(*position))
position++;
//
// Allow either single quotes or double quotes
// around the URL itself
//
if (*position == '"'||*position == '\'')
{
position++;
q = strchr(position, position[-1]);
if (!q)
break;
//
// We seem to have matched the opening quote char
// Mark the end of the quotes as our endpoint, so
// that we can continue parsing after the current
// text
//
*q = '\0';
//
// If a '#' is present in a quoted URL,
// treat that as the end of the URL, but we skip
// past the quote to parse the rest of the anchor.
//
// Is there a better way of looking for these?
//
if ((t = strchr(position, '#')) != NULL)
*t = '\0';
}
else
{
q = position;
while (*q &&
*q != '>' &&
!isspace(*q) && // *q != '?' ??? -grh
*q != '#')
q++;
*q = '\0';
}
URL tempBase(position, *base);
*base = tempBase;
}
}
}
default:
return; // Nothing...
}
}
//
// Display.cc
//
// Implementation of Display
// Takes results of search and fills in the HTML templates
//
//
#if RELEASE
static char RCSid[] = "$Id: Display.cc,v 1.54 1999/02/05 03:51:38 ghutchis Exp $";
#endif
#include "htsearch.h"
#include "Display.h"
#include "ResultMatch.h"
#include "WeightWord.h"
#include "StringMatch.h"
#include "QuotedStringList.h"
#include "URL.h"
#include <fstream.h>
#include <stdio.h>
#include <ctype.h>
#include <syslog.h>
#include "HtURLCodec.h"
//*****************************************************************************
//
Display::Display(char *indexFile, char *docFile)
{
docIndex = Database::getDatabaseInstance();
docIndex->OpenRead(indexFile);
// Check "uncompressed"/"uncoded" urls at the price of time
// (extra DB probes).
docDB.SetCompatibility(config.Boolean("uncoded_db_compatible", 1));
docDB.Read(docFile);
limitTo = 0;
excludeFrom = 0;
// needExcerpt = 0;
templateError = 0;
maxStars = config.Value("max_stars");
maxScore = 100;
setupImages();
if (!templates.createFromString(config["template_map"]))
{
// Error in createFromString.
// Let's try the default template_map
config.Add("template_map",
"Long builtin-long builtin-long Short builtin-short builtin-short");
if (!templates.createFromString(config["template_map"]))
{
// Unrecoverable Error
// (No idea why this would happen)
templateError = 1;
}
}
currentTemplate = templates.get(config["template_name"]);
if (!currentTemplate)
{
//
// Must have been some error. Resort to the builtin-long (slot 0)
//
currentTemplate = (Template *) templates.templates[0];
}
if (!currentTemplate)
{
//
// Another error!? Time to bail out...
//
templateError = 1;
}
// if (mystrcasestr(currentTemplate->getMatchTemplate(), "excerpt"))
// needExcerpt = 1;
}
//*****************************************************************************
Display::~Display()
{
delete docIndex;
}
//*****************************************************************************
//
void
Display::display(int pageNumber)
{
List *matches = buildMatchList();
int currentMatch = 0;
int numberDisplayed = 0;
ResultMatch *match = 0;
int number = config.Value("matches_per_page");
int startAt = (pageNumber - 1) * number;
if (config.Boolean("logging"))
{
logSearch(pageNumber, matches);
}
setVariables(pageNumber, matches);
//
// The first match is guaranteed to have the highest score of
// all the matches. We use this to compute the number of stars
// to display for all the other matches.
//
match = (ResultMatch *) (*matches)[0];
if (!match)
{
//
// No matches.
//
delete matches;
cout << "Content-type: text/html\r\n\r\n";
displayNomatch();
return;
}
// maxScore = match->getScore(); // now done in buildMatchList()
cout << "Content-type: text/html\r\n\r\n";
String wrap_file = config["search_results_wrapper"];
String *wrapper = 0;
char *header = 0, *footer = 0;
if (wrap_file.length())
{
wrapper = readFile(wrap_file.get());
if (wrapper && wrapper->length())
{
char wrap_sepr[] = "HTSEARCH_RESULTS";
char *h = wrapper->get();
char *p = strstr(h, wrap_sepr);
if (p)
{
if (p > h && p[-1] == '$')
{
footer = p + strlen(wrap_sepr);
header = h;
p[-1] = '\0';
}
else if (p > h+1 && p[-2] == '$' &&
(p[-1] == '(' || p[-1] == '{') &&
(p[strlen(wrap_sepr)] == ')' ||
p[strlen(wrap_sepr)] == '}'))
{
footer = p + strlen(wrap_sepr) + 1;
header = h;
p[-2] = '\0';
}
}
}
}
if (header)
expandVariables(header);
else
displayHeader();
//
// Display the window of matches requested.
//
if (currentTemplate->getStartTemplate())
{
expandVariables(currentTemplate->getStartTemplate());
}
matches->Start_Get();
while ((match = (ResultMatch *)matches->Get_Next()) &&
numberDisplayed < number)
{
if (currentMatch >= startAt)
{
match->setRef(docDB[match->getURL()]);
DocumentRef *ref = match->getRef();
if (!ref)
continue; // The document isn't present for some reason
ref->DocAnchor(match->getAnchor());
ref->DocScore(match->getScore());
displayMatch(match,currentMatch+1);
numberDisplayed++;
}
currentMatch++;
}
if (currentTemplate->getEndTemplate())
{
expandVariables(currentTemplate->getEndTemplate());
}
if (footer)
expandVariables(footer);
else
displayFooter();
if (wrapper)
delete wrapper;
delete matches;
}
//*****************************************************************************
// Return true if the specified URL should be counted towards the results.
int
Display::includeURL(char *url)
{
if (limitTo && limitTo->FindFirst(url) < 0)
{
return 0;
}
else
{
if (excludeFrom &&
excludeFrom->hasPattern() &&
excludeFrom->FindFirst(url) >= 0)
return 0;
else
return 1;
}
}
//*****************************************************************************
void
Display::displayMatch(ResultMatch *match, int current)
{
String *str = 0;
DocumentRef *ref = match->getRef();
char *url = match->getURL();
vars.Add("URL", new String(url));
int iA = ref->DocAnchor();
int fanchor = 0;
String *anchor = new String();
if (iA > 0) // if an anchor was found
{
List *anchors = ref->DocAnchors();
if (anchors->Count() > 0)
{
fanchor = 1;
*anchor << "#" << ((String*) (*anchors)[iA-1])->get();
vars.Add("ANCHOR", anchor);
}
}
//
// no condition for determining excerpt any more:
// we need it anyway to see if an anchor is relevant
//
int first = -1;
String urlanchor(url);
urlanchor << anchor;
vars.Add("EXCERPT", excerpt(ref, urlanchor, fanchor, first));
//
// anchor only relevant if an excerpt was found, i.e.,
// the search expression matches the body of the document
// instead of only META keywords.
//
if (first < 0)
{
vars.Remove("ANCHOR");
}
vars.Add("SCORE", new String(form("%d", match->getScore())));
vars.Add("CURRENT", new String(form("%d", current)));
char *title = ref->DocTitle();
if (!title || !*title)
{
if ( strcmp(config["no_title_text"], "filename") == 0 )
{
// use actual file name
title = strrchr(url, '/');
if (title)
{
title++; // Skip slash
str = new String(form("[%s]", title));
}
else
// URL without '/' ??
str = new String("[No title]");
}
else
// use configure 'no title' text
str = new String(config["no_title_text"]);
}
else
str = new String(title);
vars.Add("TITLE", str);
vars.Add("STARSRIGHT", generateStars(ref, 1));
vars.Add("STARSLEFT", generateStars(ref, 0));
vars.Add("SIZE", new String(form("%d", ref->DocSize())));
vars.Add("SIZEK", new String(form("%d",
(ref->DocSize() + 1023) / 1024)));
if (maxScore != 0)
{
int percent = (int)(ref->DocScore() * 100 / (double)maxScore);
if (percent <= 0)
percent = 1;
vars.Add("PERCENT", new String(form("%d", percent)));
}
else
vars.Add("PERCENT", new String("100"));
{
str = new String();
char buffer[100];
time_t t = ref->DocTime();
if (t)
{
struct tm *tm = localtime(&t);
// strftime(buffer, sizeof(buffer), "%e-%h-%Y", tm);
if (config.Boolean("iso_8601"))
{
strftime(buffer, sizeof(buffer), "%Y-%m-%d %H:%M:%S %Z", tm);
}
else
{
strftime(buffer, sizeof(buffer), "%x", tm);
}
*str << buffer;
}
vars.Add("MODIFIED", str);
}
vars.Add("HOPCOUNT", new String(form("%d", ref->DocHopCount())));
vars.Add("DOCID", new String(form("%d", ref->DocID())));
vars.Add("BACKLINKS", new String(form("%d", ref->DocBackLinks())));
{
str = new String();
List *list = ref->Descriptions();
int n = list->Count();
for (int i = 0; i < n; i++)
{
*str << ((String*) (*list)[i])->get() << "<br>\n";
}
vars.Add("DESCRIPTIONS", str);
vars.Add("DESCRIPTION", ((String*) (*list)[1]));
}
expandVariables(currentTemplate->getMatchTemplate());
}
//*****************************************************************************
void
Display::setVariables(int pageNumber, List *matches)
{
String tmp;
int i;
int nMatches = 0;
if (matches)
nMatches = matches->Count();
int matchesPerPage = config.Value("matches_per_page");
int nPages = (nMatches + matchesPerPage - 1) / matchesPerPage;
if (nPages < 1)
nPages = 1; // We always have at least one page...
vars.Add("MATCHES_PER_PAGE", new String(config["matches_per_page"]));
vars.Add("MAX_STARS", new String(config["max_stars"]));
vars.Add("CONFIG", new String(config["config"]));
vars.Add("VERSION", new String(config["version"]));
vars.Add("RESTRICT", new String(config["restrict"]));
vars.Add("EXCLUDE", new String(config["exclude"]));
if (mystrcasecmp(config["match_method"], "and") == 0)
vars.Add("MATCH_MESSAGE", new String("all"));
else if (mystrcasecmp(config["match_method"], "or") == 0)
vars.Add("MATCH_MESSAGE", new String("some"));
vars.Add("MATCHES", new String(form("%d", nMatches)));
vars.Add("PLURAL_MATCHES", new String(nMatches == 1 ? (char *)"" : (char *)"s"));
vars.Add("PAGE", new String(form("%d", pageNumber)));
vars.Add("PAGES", new String(form("%d", nPages)));
vars.Add("FIRSTDISPLAYED",
new String(form("%d", (pageNumber - 1) *
matchesPerPage + 1)));
if (nPages > 1)
vars.Add("PAGEHEADER", new String(config["page_list_header"]));
else
vars.Add("PAGEHEADER", new String(config["no_page_list_header"]));
i = pageNumber * matchesPerPage;
if (i > nMatches)
i = nMatches;
vars.Add("LASTDISPLAYED", new String(form("%d", i)));
vars.Add("CGI", new String(getenv("SCRIPT_NAME")));
String *str;
char *format = input->get("format");
String *in;
vars.Add("SELECTED_FORMAT", new String(format));
str = new String();
*str << "<select name=format>\n";
for (i = 0; i < templates.displayNames.Count(); i++)
{
in = (String *) templates.internalNames[i];
*str << "<option value=\"" << in->get() << '"';
if (format && mystrcasecmp(in->get(), format) == 0)
{
*str << " selected";
}
*str << '>' << ((String*)templates.displayNames[i])->get() << '\n';
}
*str << "</select>\n";
vars.Add("FORMAT", str);
str = new String();
QuotedStringList ml(config["method_names"], " \t\r\n");
*str << "<select name=method>\n";
for (i = 0; i < ml.Count(); i += 2)
{
*str << "<option value=" << ml[i];
if (mystrcasecmp(ml[i], config["match_method"]) == 0)
*str << " selected";
*str << '>' << ml[i + 1] << '\n';
}
*str << "</select>\n";
vars.Add("METHOD", str);
vars.Add("SELECTED_METHOD", new String(config["match_method"]));
str = new String();
QuotedStringList sl(config["sort_names"], " \t\r\n");
char *st = config["sort"];
StringMatch datetime;
datetime.IgnoreCase();
datetime.Pattern("date|time");
*str << "<select name=sort>\n";
for (i = 0; i < sl.Count(); i += 2)
{
*str << "<option value=" << sl[i];
if (mystrcasecmp(sl[i], st) == 0 ||
datetime.Compare(sl[i]) && datetime.Compare(st) ||
mystrncasecmp(sl[i], st, 3) == 0 &&
datetime.Compare(sl[i]+3) && datetime.Compare(st+3))
*str << " selected";
*str << '>' << sl[i + 1] << '\n';
}
*str << "</select>\n";
vars.Add("SORT", str);
vars.Add("SELECTED_SORT", new String(st));
//
// If a paged output is required, set the appropriate variables
//
if (nMatches > config.Value("matches_per_page"))
{
if (pageNumber > 1)
{
str = new String("<a href=\"");
tmp = 0;
createURL(tmp, pageNumber - 1);
*str << tmp << "\">" << config["prev_page_text"] << "</a>";
}
else
{
str = new String(config["no_prev_page_text"]);
}
vars.Add("PREVPAGE", str);
if (pageNumber < nPages)
{
str = new String("<a href=\"");
tmp = 0;
createURL(tmp, pageNumber + 1);
*str << tmp << "\">" << config["next_page_text"] << "</a>";
}
else
{
str = new String(config["no_next_page_text"]);
}
vars.Add("NEXTPAGE", str);
str = new String();
char *p;
QuotedStringList pnt(config["page_number_text"], " \t\r\n");
QuotedStringList npnt(config["no_page_number_text"], " \t\r\n");
if (nPages > config.Value("maximum_pages", 10))
nPages = config.Value("maximum_pages");
for (i = 1; i <= nPages; i++)
{
if (i == pageNumber)
{
p = npnt[i - 1];
if (!p)
p = form("%d", i);
*str << p << ' ';
}
else
{
p = pnt[i - 1];
if (!p)
p = form("%d", i);
*str << "<a href=\"";
tmp = 0;
createURL(tmp, i);
*str << tmp << "\">" << p << "</a> ";
}
}
vars.Add("PAGELIST", str);
}
StringList form_vars(config["allow_in_form"], " \t\r\n");
String* key;
for (i= 0; i < form_vars.Count(); i++)
{
if (config[form_vars[i]])
{
key= new String(form_vars[i]);
key->uppercase();
vars.Add(key->get(), new String(config[form_vars[i]]));
}
}
}
//*****************************************************************************
void
Display::createURL(String &url, int pageNumber)
{
String s;
int i;
url << getenv("SCRIPT_NAME") << '?';
if (input->exists("restrict"))
s << "restrict=" << input->get("restrict") << '&';
if (input->exists("exclude"))
s << "exclude=" << input->get("exclude") << '&';
if (input->exists("config"))
s << "config=" << input->get("config") << '&';
if (input->exists("method"))
s << "method=" << input->get("method") << '&';
if (input->exists("format"))
s << "format=" << input->get("format") << '&';
if (input->exists("sort"))
s << "sort=" << input->get("sort") << '&';
if (input->exists("matchesperpage"))
s << "matchesperpage=" << input->get("matchesperpage") << '&';
if (input->exists("words"))
s << "words=" << input->get("words") << '&';
StringList form_vars(config["allow_in_form"], " \t\r\n");
for (i= 0; i < form_vars.Count(); i++)
{
if (input->exists(form_vars[i]))
{
s << form_vars[i] << '=' << input->get(form_vars[i]) << '&';
}
}
s << "page=" << pageNumber;
encodeURL(s);
url << s;
}
//*****************************************************************************
void
Display::displayHeader()
{
displayParsedFile(config["search_results_header"]);
}
//*****************************************************************************
void
Display::displayFooter()
{
displayParsedFile(config["search_results_footer"]);
}
//*****************************************************************************
void
Display::displayNomatch()
{
displayParsedFile(config["nothing_found_file"]);
}
//*****************************************************************************
void
Display::displaySyntaxError(char *message)
{
cout << "Content-type: text/html\r\n\r\n";
setVariables(0, 0);
vars.Add("SYNTAXERROR", new String(message));
displayParsedFile(config["syntax_error_file"]);
}
//*****************************************************************************
void
Display::displayParsedFile(char *filename)
{
FILE *fl = fopen(filename, "r");
char buffer[1000];
while (fl && fgets(buffer, sizeof(buffer), fl))
{
expandVariables(buffer);
}
if (fl)
fclose(fl);
}
//*****************************************************************************
// If the star images need to depend on the URL of the match, we need
// an efficient way to determine which image to use. To do this, we
// will build a StringMatch object with all the URL patterns and also
// a List parallel to that pattern that contains the actual images to
// use for each URL.
//
void
Display::setupImages()
{
char *starPatterns = config["star_patterns"];
if (!starPatterns || !*starPatterns)
{
//
// Set the StringMatch object up so that it will never match
// anything. We know that '<' is an illegal character for
// URLs, so this will effectively disable the matching.
//
URLimage.Pattern("<<<");
}
else
{
//
// The starPatterns string will have pairs of values. The first
// value of a pair will be a pattern, the second value will be an
// URL to an image.
//
char *token = strtok(starPatterns, " \t\r\n");
String pattern;
while (token)
{
//
// First token is a pattern...
//
pattern << token << '|';
//
// Second token is an URL
//
token = strtok(0, " \t\r\n");
URLimageList.Add(new String(token));
if (token)
token = strtok(0, " \t\r\n");
}
pattern.chop(1);
URLimage.Pattern(pattern);
}
}
//*****************************************************************************
String *
Display::generateStars(DocumentRef *ref, int right)
{
int i;
String *result = new String();
char *image = config["star_image"];
char *blank = config["star_blank"];
double score;
if (maxScore != 0)
{
score = ref->DocScore() / (double)maxScore;
}
else
{
maxScore = ref->DocScore();
score = 1;
}
int nStars = int(score * (maxStars - 1) + 0.5) + 1;
if (right)
{
for (i = 0; i < maxStars - nStars; i++)
{
*result << "<img src=\"" << blank << "\" alt=\" \">";
}
}
int match = 0;
int length = 0;
int status = URLimage.FindFirst(ref->DocURL(), match, length);
if (status >= 0 && match >= 0)
{
image = ((String*) URLimageList[match])->get();
}
for (i = 0; i < nStars; i++)
{
*result << "<img src=\"" << image << "\" alt=\"*\">";
}
if (!right)
{
for (i = 0; i < maxStars - nStars; i++)
{
*result << "<img src=\"" << blank << "\" alt=\" \">";
}
}
*result << "\n";
return result;
}
//*****************************************************************************
String *
Display::readFile(char *filename)
{
FILE *fl;
String *s = new String();
char line[1024];
fl = fopen(filename, "r");
while (fl && fgets(line, sizeof(line), fl))
{
*s << line;
}
return s;
}
//*****************************************************************************
void
Display::expandVariables(char *str)
{
int state = 0;
String var = "";
while (str && *str)
{
switch (state)
{
case 0:
if (*str == '\\')
state = 1;
else if (*str == '$')
state = 3;
else
cout << *str;
break;
case 1:
cout << *str;
state = 0;
break;
case 2:
//
// We have a complete variable in var. Look it up and
// see if we can find a good replacement for it.
//
outputVariable(var);
var = "";
if (*str == '$')
state = 3;
else if (*str == '\\')
state = 1;
else
{
state = 0;
cout << *str;
}
break;
case 3:
if (*str == '(' || *str == '{')
state = 4;
else if (isalpha(*str) || *str == '_')
{
var << *str;
state = 5;
}
else
state = 0;
break;
case 4:
if (*str == ')' || *str == '}')
state = 2;
else if (isalpha(*str) || *str == '_')
var << *str;
else
state = 0;
break;
case 5:
if (isalpha(*str) || *str == '_')
var << *str;
else if (*str == '$')
state = 6;
else
{
state = 2;
continue;
}
break;
case 6:
//
// We have a complete variable in var. Look it up and
// see if we can find a good replacement for it.
//
outputVariable(var);
var = "";
if (*str == '(' || *str == '{')
state = 4;
else if (isalpha(*str) || *str == '_')
{
var << *str;
state = 5;
}
else
state = 0;
break;
}
str++;
}
if (state == 5)
{
//
// The end of string was reached, but we are still trying to
// put a variable together. Since we now have a complete
// variable, we will look up the value for it.
//
outputVariable(var);
}
}
//*****************************************************************************
void
Display::outputVariable(char *var)
{
String *temp;
char *ev;
// We have a complete variable name in var. Look it up and
// see if we can find a good replacement for it, either in our
// vars dictionary or in the environment variables.
temp = (String *) vars[var];
if (temp)
cout << *temp;
else
{
ev = getenv(var);
if (ev)
cout << ev;
}
}
//*****************************************************************************
List *
Display::buildMatchList()
{
char *id;
String coded_url, url;
ResultMatch *thisMatch;
List *matches = new List();
double backlink_factor = config.Double("backlink_factor");
double date_factor = config.Double("date_factor");
SortType typ = sortType();
results->Start_Get();
while ((id = results->Get_Next()))
{
//
// Convert the ID to a URL
//
if (docIndex->Get(id, coded_url) == NOTOK)
{
continue;
}
// No special precations re: the option
// "uncoded_db_compatible" needs to be taken.
url = HtURLCodec::instance()->decode(coded_url);
if (!includeURL(url.get()))
{
continue;
}
thisMatch = new ResultMatch();
thisMatch->setURL(url);
thisMatch->setRef(NULL);
//
// Get the actual document record into the current ResultMatch
//
// thisMatch->setRef(docDB[thisMatch->getURL()]);
//
// Assign the incomplete score to this match. This score was
// computed from the word database only, no excerpt context was
// known at that time, or info about the document itself,
// so this still needs to be done.
//
DocMatch *dm = results->find(id);
double score = dm->score;
// We need to scale based on date relevance and backlinks
// Other changes to the score can happen now
// Or be calculated by the result match in getScore()
// This formula derived through experimentation
// We want older docs to have smaller values and the
// ultimate values to be a reasonable size (max about 100)
if (date_factor != 0.0 || backlink_factor != 0.0 || typ != SortByScore)
{
DocumentRef *thisRef = docDB[thisMatch->getURL()];
if (thisRef) // We better hope it's not null!
{
score += date_factor *
((thisRef->DocTime() * 1000 / (double)time(0)) - 900);
int links = thisRef->DocLinks();
if (links == 0)
links = 1; // It's a hack, but it helps...
score += backlink_factor
* (thisRef->DocBackLinks() / (double)links);
if (score <= 0.0)
score = 0.0;
if (typ != SortByScore)
{
DocumentRef *sortRef = new DocumentRef();
sortRef->DocTime(thisRef->DocTime());
if (typ == SortByTitle)
sortRef->DocTitle(thisRef->DocTitle());
thisMatch->setRef(sortRef);
}
}
// Get rid of it to free the memory!
delete thisRef;
}
thisMatch->setIncompleteScore(score);
thisMatch->setAnchor(dm->anchor);
//
// Append this match to our list of matches.
//
matches->Add(thisMatch);
}
//
// The matches need to be ordered by relevance level.
// Sort it.
//
sort(matches);
return matches;
}
//*****************************************************************************
String *
Display::excerpt(DocumentRef *ref, String urlanchor, int fanchor, int first)
{
char *head;
int use_meta_description=0;
if (config.Boolean("use_meta_description",0)
&& strlen(ref->DocMetaDsc()) != 0)
{
// Set the head to point to description
head = ref->DocMetaDsc();
use_meta_description=1;
}
else head = ref->DocHead();
int which, length;
char *temp = head;
String part;
String *text = new String();
// I use the description if found and "use_meta_description"
// is set to true; or if "excerpt_show_top" is set to true
// htsearch shows the whole head. Else, it works by default.
if (config.Boolean("excerpt_show_top", 0) || use_meta_description )
first = 0;
else
first = allWordsPattern->FindFirstWord(head, which, length);
if (first < 0 && config.Boolean("no_excerpt_show_top"))
first = 0; // No excerpt, but we want to show the top.
if (first < 0)
{
//
// No excerpt available, don't show top, so display message
//
if (config["no_excerpt_text"][0])
{
*text << config["no_excerpt_text"];
}
}
else
{
int headLength = strlen(head);
int length = config.Value("excerpt_length", 50);
char *start;
char *end;
if (!config.Boolean("add_anchors_to_excerpt"))
// negate flag if it's on (anchor available)
fanchor = 0;
//
// Figure out where to start the excerpt. Basically we go back
// half the excerpt length from the first matched word
//
start = &temp[first] - length / 2;
if (start < temp)
start = temp;
else
{
*text << config["start_ellipses"];
while (*start && isalpha(*start))
start++;
}
//
// Figure out the end of the excerpt.
//
end = start + length;
if (end > temp + headLength)
{
end = temp + headLength;
*text << hilight(start, urlanchor, fanchor);
}
else
{
while (*end && isalpha(*end))
end++;
*end = '\0';
*text << hilight(start, urlanchor, fanchor);
*text << config["end_ellipses"];
}
}
return text;
}
//*****************************************************************************
char *
Display::hilight(char *str, String urlanchor, int fanchor)
{
static String result;
int pos;
int which, length;
WeightWord *ww;
int first = 1;
result = 0;
while ((pos = allWordsPattern->FindFirstWord(str, which, length)) >= 0)
{
result.append(str, pos);
ww = (WeightWord *) (*searchWords)[which];
result << "<strong>";
if (first && fanchor)
result << "<a href=\"" << urlanchor << "\">";
result.append(str + pos, length);
if (first && fanchor)
result << "</a>";
result << "</strong>";
str += pos + length;
first = 0;
}
result.append(str);
return result;
}
//*****************************************************************************
void
Display::sort(List *matches)
{
int numberOfMatches = matches->Count();
int i;
ResultMatch **array = new ResultMatch*[numberOfMatches];
for (i = 0; i < numberOfMatches; i++)
{
array[i] = (ResultMatch *)(*matches)[i];
if (i == 0 || maxScore < array[i]->getScore())
maxScore = array[i]->getScore();
}
matches->Release();
SortType typ = sortType();
qsort((char *) array, numberOfMatches, sizeof(ResultMatch *),
(typ == SortByTitle) ? Display::compareTitle :
(typ == SortByTime) ? Display::compareTime :
Display::compare);
char *st = config["sort"];
if (st && *st && mystrncasecmp("rev", st, 3) == 0)
{
for (i = numberOfMatches; --i >= 0; )
matches->Add(array[i]);
}
else
{
for (i = 0; i < numberOfMatches; i++)
matches->Add(array[i]);
}
delete [] array;
}
//*****************************************************************************
int
Display::compare(const void *a1, const void *a2)
{
ResultMatch *m1 = *((ResultMatch **) a1);
ResultMatch *m2 = *((ResultMatch **) a2);
return m2->getScore() - m1->getScore();
}
//*****************************************************************************
int
Display::compareTime(const void *a1, const void *a2)
{
ResultMatch *m1 = *((ResultMatch **) a1);
ResultMatch *m2 = *((ResultMatch **) a2);
time_t t1 = (m1->getRef()) ? m1->getRef()->DocTime() : 0;
time_t t2 = (m2->getRef()) ? m2->getRef()->DocTime() : 0;
return (int) (t2 - t1);
}
//*****************************************************************************
int
Display::compareTitle(const void *a1, const void *a2)
{
ResultMatch *m1 = *((ResultMatch **) a1);
ResultMatch *m2 = *((ResultMatch **) a2);
char *t1 = (m1->getRef()) ? m1->getRef()->DocTitle() : (char *)"";
char *t2 = (m2->getRef()) ? m2->getRef()->DocTitle() : (char *)"";
if (!t1) t1 = "";
if (!t2) t2 = "";
return mystrcasecmp(t1, t2);
}
//*****************************************************************************
Display::SortType
Display::sortType()
{
static struct
{
char *typest;
SortType type;
}
sorttypes[] =
{
{"score", SortByScore},
{"date", SortByTime},
{"time", SortByTime},
{"title", SortByTitle}
};
int i = 0;
char *st = config["sort"];
if (st && *st)
{
if (mystrncasecmp("rev", st, 3) == 0)
st += 3;
for (i = sizeof(sorttypes)/sizeof(sorttypes[0]); --i > 0; )
{
if (mystrcasecmp(sorttypes[i].typest, st) == 0)
break;
}
}
return sorttypes[i].type;
}
//*****************************************************************************
void
Display::logSearch(int page, List *matches)
{
// Currently unused time_t t;
int nMatches = 0;
int level = LOG_LEVEL;
int facility = LOG_FACILITY;
char *host = getenv("REMOTE_HOST");
char *ref = getenv("HTTP_REFERER");
if (host == NULL)
host = getenv("REMOTE_ADDR");
if (host == NULL)
host = "-";
if (ref == NULL)
ref = "-";
if (matches)
nMatches = matches->Count();
openlog("htsearch", LOG_PID, facility);
syslog(level, "%s [%s] (%s) [%s] [%s] (%d/%s) - %d -- %s\n",
host,
input->exists("config") ? input->get("config") : "default",
config["match_method"], input->get("words"), logicalWords.get(),
nMatches, config["matches_per_page"],
page, ref
);
}
----------------------------------------------------------
U.O. Rete Civica - Comune di Prato
Via Ricasoli, 4 - 59100 Prato PO Italia
Tel. +39 0574616342 Fax +39 0574616003
http://www.comune.prato.it
E-Mail: [EMAIL PROTECTED]
----------------------------------------------------------
