###############################################################################
## ##
## logprocess.conf - sample configuration file for the Log Validator ##
## ##
## ##
## Written by olivier Thereaux for W3C ##
## ##
## most of the directives can be taken from the Apache configuration ##
## and have the same meaning and syntax ##
## ##
###############################################################################
# $Id: logprocess.conf,v 1.30 2008/11/18 16:04:17 ot Exp $
#####################################
## ##
## General server configuration ##
## ##
#####################################
##
## Configuration variables about the Web server being tested
## Used by the LogValidator to find and analyse log files
## NOTE: [apache] means you should safely be able to copy
## the equivalent line from the configuration of an Apache Web server
## [apache] ServerAdmin : e-mail address to send the reports ##
##
ServerAdmin me@my.domain
## MailFrom : From: address for e-mail output ##
##
## Unless the relevant option is specified when running the LogValidator,
## the mail output will use ServerAdmin (see above) as From: and To:
## This option allows you to override the From: parameter
## DEFAULT = ServerAdmin
# MailFrom logvalidator@example.org
## Title : a more useful Subject: for the Mail output and <title> for HTML Output ##
##
## Tell the mail/HTML output what this config is all about
## and make them use a better subject than the vanilla "LogValidator results"
## DEFAULT = Logvalidator results
# Title Logvalidator results
## [apache] DocumentRoot : where the files are located ##
##
## For some log formats, it is necessary to know where the actual files
## reside on the server
DocumentRoot /var/www/
## [apache] ServerName : full address for the web server ##
##
## should be of the form host.domain
## NOTE: no need to prepend http://
ServerName my.web.site
## [apache] CustomLog : log file and format ##
##
## Add as many entries as you like. The Log Validator will process all log files listed below
## formats: see http://httpd.apache.org/docs/mod/mod_log_config.html
# NOTE: only the following formats are currently supported:
# common, combined, w3, full, plain (list of addresses)
CustomLog /var/log/apache/access.log combined
# CustomLog /var/log/apache/access.log.1 combined
# CustomLog /home/me/path/to/list plain
## [apache] DirectoryIndex : document equivalent to "/"
##
## See http://httpd.apache.org/docs/mod/mod_dir.html#directoryindex
## Used by the validator to compute the "canonical" URLs for Documents
## DEFAULT = index.html index.htm index
# DirectoryIndex index.html index.htm index Overview Overview.html Overview.xhtml
####################################
## ##
## General Log Validator config ##
## ##
####################################
## QuietIfNoReport : If there is nothing to report (i.e the report table is empty,
## which can be the case if everything is valid or the log file is blank,
## The final output will be muted. This can be useful to avoid spammy mail output
## NOTE: this has nothing to do with verbosity, only for the final report
## DEFAULT = 0 [ output not muted] - set to 1 to make quiet
# QuietIfNoReport 0
## EntriesPerLogFile : How many log entries do we process per log file? ##
##
## If log files are very big, the Log Validator will only process the first n entries (lines)
## This parameter can be sent to any number.
## NOTE: Set it to 0 for "no limit" (not recommended)
## DEFAULT = 100000
# EntriesPerLogfile 5000
## ExcludeCGI : Whether URIs with query strings (e.g http://example.com/foo?bar=zog )
# should be excluded from the logs upon reading
# Values: 1 will exclude such URIs. 0 will count them as "acceptable" URIs
## DEFAULT = 1
# ExcludeCGI 1
## MaxInvalid : how many "invalid" documents to find before exiting
##
## Most Log Validator processing modules will find "invalid" documents (the definition varies)
## This is the general configuration variable for how many documents processing
## modules should look for in the global list of documents before exiting
## NOTE: this can be fine-tuned on a per-module basis by setting this variable
## In the config file for each module
## NOTE: See also MaxDocuments
## NOTE: Set it to 0 for "no limit"
## DEFAULT = 10
# MaxInvalid 20
## MaxDocuments : how many documents to process before exiting [NEW version 0.3]
##
## All Log Validator processing modules will stop after processing this number of documents
## Even if the limit set for the number of invalid documents to find (MaxInvalid) is not reached
## For purely statistical modules, this will be the only limit
## 0 is equivalent to infinite (i.e process all the entries in the log as defined by EntriesPerLogfile)
## NOTE: this can be fine-tuned by setting the same variable for each module
## NOTE: See also MaxDocuments
## NOTE: Set it to 0 for "no limit"
## DEFAULT = 0
#MaxDocuments 100
## ExcludeAreas : areas that should not be processed ##
## Define areas (such as "/News") of the Web site that should be ignored by the LogValidator
## space separated list of areas.
## regexp-like syntax (e.g . is understood as "any character") No need to escape "/"
## DEFAULT = None
## NOTE: can also be set up on a per-module basis
## NOTE: use AuthorizedExtensions to allow/ignore file extensions
# ExcludeAreas test/ignore* favicon\.ico /rootarea
## ExcludeHosts : records that should not be processed ##
## If ExcludeHosts is defined, records originating from these hosts (or IP addresses) will be ignored
## space seperated list of expressions
## regexp-like syntax for match on the remote host or address
## DEFAULT = None
## NOTE: This uses whatever is in your log file, so you may want to ask Apache to start/stop resolving
## remote addresses to host names if you want to do hostname-based matching
## Examples:
# ExcludeHosts ^localhost ^127\.0\.0\.1$
# ExcludeHosts ^crawl-.*.googlebot.com$ .*.yahoo.com$
## RefererMatch : limit referer storage to some sites
## A (new) logvalidator feature draws referer information from logs that hold it
## It can be used with the LinkReferer module
## regexp-like syntax - but giving the name of a site should just work
## DEFAULT = .*
# RefererMatch http://www.example.com
## UseValidationModule : chooses between available modules to process the logs
##
## Below is the list of modules shipped with the Log Validator
## comment out the lines for the ones you do not want to use
## or add your own (see http://www.w3.org/QA/Tools/LogValidator/Manual-Modules to create new modules)
##
## "Popular modules"
UseValidationModule W3C::LogValidator::Basic
UseValidationModule W3C::LogValidator::HTMLValidator
UseValidationModule W3C::LogValidator::CSSValidator
## "Optional modules"
## LinkReferer - [experimental] lists top referers for problematic (404 etc) resources
# UseValidationModule W3C::LogValidator::LinkReferer
## SurveyEngine - [experimental] generates statistics about quality of documents
# UseValidationModule W3C::LogValidator::SurveyEngine
## Link Checker Module - needs the link checker installed on your system
# get it at http://search.cpan.org/dist/W3C-LinkChecker/
# UseValidationModule W3C::LogValidator::LinkChecker
#####################################
## ##
## Module-Specific configuration ##
## ##
#####################################
##
## Below are specific configuration directives for some of the modules
<Module W3C::LogValidator::Basic>
## MaxDocuments : how many documents to process before exiting [NEW version 0.3]
## Setting a specific value adapted to the use of the Basic Module
## Change it at your convenience
## if set to 0, the module will output the full list of documents
MaxDocuments 100
## ExcludeAreas : areas that should not be processed ##
## NOTE: see equivalent section in general config for more info
</Module>
<Module W3C::LogValidator::LinkChecker>
## MaxDocuments and MaxInvalid
##
## how many documents should be processed and/or how many broken documents need to be found
## before this particular module stops processing the longs
## DEFAULT = global values of MaxInValid and MaxDocuments
##
## MaxInvalid 5
## MaxDocuments 0
#LinkChecker - path to 'checklink', the link checker script.
# you only need to set this up if your script is not in a "usual" location
# (/usr/bin, /usr/local/bin...)
# In the future, this will not be needed
#checklink /usr/bin/checklink
</Module>
<Module W3C::LogValidator::HTMLValidator>
## MaxDocuments and MaxInvalid
##
## how many documents should be processed and/or how many invalid documents need to be found
## before this particular module stops processing the longs
## DEFAULT = global values of MaxInValid and MaxDocuments
##
## MaxInvalid 5
## MaxDocuments 0
## ExcludeAreas : areas that should not be processed ##
## NOTE: see equivalent section in general config for more info
## ShowInvalid : whether the report should list invalid documents
## DEFAULT = Yes
# ShowInvalid Yes
## ShowAborted : whether the report should list documents which could not
## be validated (fatal errors, 404s, etc)
## DEFAULT = No
# ShowAborted No
## ShowValid : whether the report should list valid documents
## DEFAULT = No
# ShowValid No
## AuthorizedExtensions will trim the list to URIs ending with the following extensions:
## below is the default
# AuthorizedExtensions .html .xhtml .phtml .htm .shtml .php .svg .xml /
## Alternatively, if you want to check everything
# AuthorizedExtensions *
## CheckExtensionlessURIs: set this to Yes if your HTML documents are often presented without extension
## extensionless URIs will first be checked for their mime type through the web. Setting to Yes
## is not necessary but will speed up the process.
## DEFAULT = No
# CheckExtensionlessURIs No
## Other variables you may change *WARNING* at your own risks *WARNING*
## NOTE: default should be fine, don't change this unless you know what you're doing
## Below are the default values, uncomment and modify if necessary
# ValidatorMethod HEAD
# ValidatorHost validator.w3.org
# ValidatorPort 80
# ValidatorString /check?uri=
# ValidatorPostString \;output=xml
</Module>
<Module W3C::LogValidator::SurveyEngine>
## MaxDocuments and MaxInvalid
##
## how many documents should be processed and/or how many invalid documents need to be found
## before this particular module stops processing the longs
## DEFAULT = global values of MaxInValid and MaxDocuments
##
## MaxInvalid 5
MaxDocuments 100
## ExcludeAreas : areas that should not be processed ##
## NOTE: see equivalent section in general config for more info
## Other variables you may change *WARNING* at your own risks *WARNING*
## NOTE: default should be fine, don't change this unless you know what you're doing
## Below are the default values, uncomment and modify if necessary
# ValidatorMethod HEAD
# ValidatorHost validator.w3.org
# ValidatorPort 80
# ValidatorString /check?uri=
# ValidatorPostString \;output=xml
# AuthorizedExtensions .html .xhtml .phtml .htm .shtml .php .svg .xml /
</Module>
<Module W3C::LogValidator::CSSValidator>
## MaxDocuments and MaxInvalid
##
## how many documents should be processed and/or how many invalid documents need to be found
## before this particular module stops processing the longs
## DEFAULT = global values of MaxInValid and MaxDocuments
##
## MaxInvalid 5
## MaxDocuments 0
## ExcludeAreas : areas that should not be processed ##
## NOTE: see equivalent section in general config for more info
## ShowInvalid : whether the report should list invalid documents
## DEFAULT = Yes
# ShowInvalid Yes
## ShowAborted : whether the report should list documents which could not
## be validated (fatal errors, 404s, etc)
## DEFAULT = No
# ShowAborted No
## ShowValid : whether the report should list valid documents
## DEFAULT = No
# ShowValid No
## verbose: amount of output when running Log Validator
##
## verbosity is set up for the whole Log Validator from the command line
## but if you want a specific verbosity for a specific module, override the setting here
## DEFAULT = global value of verbose
# verbose 0
## other variables you may change *WARNING* at your own risks *WARNING*
# ValidatorHost jigsaw.w3.org
# ValidatorPort 80
# ValidatorString /css-validator/validator
# AuthorizedExtensions : what kind of extensions the module should claim responsibility for
# for this module, you may want to choose either only CSS or any kind of document that can have embedded CSS
# default is the former
#AuthorizedExtensions .css
#AuthorizedExtensions .html .xhtml .phtml .htm .css .svg /
## Alternatively, if you want to check everything
# AuthorizedExtensions *
## CheckExtensionlessURIs: set this to Yes if your HTML documents are often presented without extension
## extensionless URIs will first be checked for their mime type through the web. Setting to Yes
## is not necessary but will speed up the process.
## DEFAULT = No
# CheckExtensionlessURIs No
</Module>