###############################################################################
##                                                                           ##
##     logprocess.conf - sample configuration file for the Log Validator     ##
##                                                                           ##
##                                                                           ##
##               Written by olivier Thereaux for W3C                         ##
##                                                                           ##
##     most of the directives can be taken from the Apache configuration     ##
##                  and have the same meaning and syntax                     ##
##                                                                           ##
###############################################################################
# $Id: logprocess.conf,v 1.30 2008/11/18 16:04:17 ot Exp $


#####################################
##                                 ##
##   General server configuration  ##
##                                 ##
#####################################
##
##     Configuration variables about the Web server being tested
##      Used by the LogValidator to find and analyse log files
##      NOTE: [apache] means you should safely be able to copy 
## the equivalent line from the configuration of an Apache Web server

##  [apache] ServerAdmin : e-mail address to send the reports  ##
##
ServerAdmin me@my.domain

##  MailFrom : From: address for e-mail output  ##
##
## Unless the relevant option is specified when running the LogValidator,
## the mail output will use ServerAdmin (see above) as From: and To:
## This option allows you to override the From: parameter
## DEFAULT  = ServerAdmin
# MailFrom logvalidator@example.org

## Title : a more useful Subject: for the Mail output and <title> for HTML Output ##
##
## Tell the mail/HTML output what this config is all about
## and make them use a better subject than the vanilla "LogValidator results"
## DEFAULT = Logvalidator results
# Title Logvalidator results

##  [apache] DocumentRoot : where the files are located  ## 
##
## For some log formats, it is necessary to know where the actual files
## reside on the server
DocumentRoot /var/www/

##  [apache] ServerName : full address for the web server  ##
##
## should be of the form host.domain
## NOTE: no need to prepend http://
ServerName my.web.site

##  [apache] CustomLog : log file and format  ##
##
## Add as many entries as you like. The Log Validator will process all log files listed below
## formats: see http://httpd.apache.org/docs/mod/mod_log_config.html
#  NOTE: only the following formats are currently supported:
#                    common, combined, w3, full, plain (list of addresses)
CustomLog /var/log/apache/access.log combined
# CustomLog /var/log/apache/access.log.1 combined
# CustomLog /home/me/path/to/list plain

## [apache] DirectoryIndex : document equivalent to "/"
##
## See http://httpd.apache.org/docs/mod/mod_dir.html#directoryindex
## Used by the validator to compute the "canonical" URLs for Documents
## DEFAULT = index.html index.htm index
# DirectoryIndex index.html index.htm index Overview Overview.html Overview.xhtml


####################################
##                                ##
##  General Log Validator config  ##
##                                ##
####################################
## QuietIfNoReport : If there is nothing to report (i.e the report table is empty,
## which can be the case if everything is valid or the log file is blank,
## The final output will be muted. This can be useful to avoid spammy mail output
## NOTE: this has nothing to do with verbosity, only for the final report
## DEFAULT = 0 [ output not muted] - set to 1 to make quiet
# QuietIfNoReport 0

##  EntriesPerLogFile : How many log entries do we process per log file? ##
##
## If log files are very big, the Log Validator will only process the first n entries (lines)
## This parameter can be sent to any number. 
## NOTE: Set it to 0 for "no limit" (not recommended)
## DEFAULT = 100000
# EntriesPerLogfile 5000

## ExcludeCGI : Whether URIs with query strings (e.g http://example.com/foo?bar=zog )
# should be excluded from the logs upon reading
# Values: 1 will exclude such URIs. 0 will count them as "acceptable" URIs
## DEFAULT = 1
#  ExcludeCGI 1

## MaxInvalid : how many "invalid" documents to find before exiting
##
## Most Log Validator processing modules will find "invalid" documents (the definition varies)
## This is the general configuration variable for how many documents processing
##   modules should look for in the global list of documents before exiting
## NOTE: this can be fine-tuned on a per-module basis by setting this variable
##  In the config file for each module
## NOTE: See also MaxDocuments
## NOTE: Set it to 0 for "no limit"
## DEFAULT = 10
# MaxInvalid 20

## MaxDocuments : how many documents to process before exiting [NEW version 0.3]
##
## All Log Validator processing modules will stop after processing this number of documents
##   Even if the limit set for the number of invalid documents to find (MaxInvalid) is not reached
## For purely statistical modules, this will be the only limit
## 0 is equivalent to infinite (i.e process all the entries in the log as defined by EntriesPerLogfile)
## NOTE: this can be fine-tuned by setting the same variable for each module
## NOTE: See also MaxDocuments
## NOTE: Set it to 0 for "no limit"
## DEFAULT =  0
#MaxDocuments 100


## ExcludeAreas : areas that should not be processed ##
## Define areas (such as "/News") of the Web site that should be ignored by the LogValidator
## space separated list of areas. 
## regexp-like syntax (e.g . is understood as "any character") No need to escape "/"
## DEFAULT = None
## NOTE: can also be set up on a per-module basis
## NOTE: use AuthorizedExtensions to allow/ignore file extensions
# ExcludeAreas  test/ignore* favicon\.ico /rootarea

## ExcludeHosts : records that should not be processed ##
## If ExcludeHosts is defined, records originating from these hosts (or IP addresses) will be ignored
## space seperated list of expressions
## regexp-like syntax for match on the remote host or address
## DEFAULT = None
## NOTE: This uses whatever is in your log file, so you may want to ask Apache to start/stop resolving
##       remote addresses to host names if you want to do hostname-based matching
## Examples:
# ExcludeHosts ^localhost ^127\.0\.0\.1$
# ExcludeHosts ^crawl-.*.googlebot.com$ .*.yahoo.com$



## RefererMatch : limit referer storage to some sites
## A (new) logvalidator feature draws referer information from logs that hold it
## It can be used with the LinkReferer module
## regexp-like syntax - but giving the name of a site should just work
## DEFAULT = .*
# RefererMatch http://www.example.com

## UseValidationModule : chooses between available modules to process the logs
##
## Below is the list of modules shipped with the Log Validator
## comment out the lines for the ones you do not want to use
## or add your own (see http://www.w3.org/QA/Tools/LogValidator/Manual-Modules to create new modules)
##
## "Popular modules" 
UseValidationModule W3C::LogValidator::Basic
UseValidationModule W3C::LogValidator::HTMLValidator
UseValidationModule W3C::LogValidator::CSSValidator
## "Optional modules"
## LinkReferer - [experimental]  lists top referers for problematic (404 etc) resources
# UseValidationModule W3C::LogValidator::LinkReferer
## SurveyEngine - [experimental] generates statistics about quality of documents
# UseValidationModule W3C::LogValidator::SurveyEngine
## Link Checker Module - needs the link checker installed on your system
# get it at http://search.cpan.org/dist/W3C-LinkChecker/ 
# UseValidationModule W3C::LogValidator::LinkChecker


#####################################
##                                 ##
##  Module-Specific configuration  ##
##                                 ##
#####################################
##
## Below are specific configuration directives for some of the modules

<Module W3C::LogValidator::Basic>                                                                                                                              
## MaxDocuments : how many documents to process before exiting [NEW version 0.3]
## Setting a specific value adapted to the use of the Basic Module
## Change it at your convenience
## if set to 0, the module will output the full list of documents
MaxDocuments 100

## ExcludeAreas : areas that should not be processed ##
## NOTE: see equivalent section in general config for more info

</Module>
<Module W3C::LogValidator::LinkChecker>
## MaxDocuments and MaxInvalid
##
## how many documents should be processed and/or how many broken documents need to be found
##   before this particular module stops processing the longs
## DEFAULT = global values of MaxInValid and MaxDocuments
##
## MaxInvalid 5
## MaxDocuments 0

#LinkChecker - path to 'checklink', the link checker script.
# you only need to set this up if your script is not in a "usual" location
# (/usr/bin, /usr/local/bin...)
# In the future, this will not be needed
#checklink /usr/bin/checklink
</Module>



<Module W3C::LogValidator::HTMLValidator>
## MaxDocuments and MaxInvalid 
## 
## how many documents should be processed and/or how many invalid documents need to be found
##   before this particular module stops processing the longs
## DEFAULT = global values of MaxInValid and MaxDocuments
## 
## MaxInvalid 5
## MaxDocuments 0

## ExcludeAreas : areas that should not be processed ##
## NOTE: see equivalent section in general config for more info

## ShowInvalid : whether the report should list invalid documents
## DEFAULT = Yes
# ShowInvalid Yes

## ShowAborted : whether the report should list documents which could not
## be validated (fatal errors, 404s, etc)
## DEFAULT = No
# ShowAborted No

## ShowValid : whether the report should list valid documents
## DEFAULT = No
# ShowValid No

## AuthorizedExtensions will trim the list to URIs ending with the following extensions: 
## below is the default
# AuthorizedExtensions .html .xhtml .phtml .htm .shtml .php .svg .xml /
## Alternatively, if you want to check everything
# AuthorizedExtensions *

## CheckExtensionlessURIs: set this to Yes if your HTML documents are often presented without extension
## extensionless URIs will first be checked for their mime type through the web. Setting to Yes
## is not necessary but will speed up the process.
## DEFAULT = No
# CheckExtensionlessURIs No

## Other variables you may change *WARNING* at your own risks *WARNING*
## NOTE: default should be fine, don't change this unless you know what you're doing
## Below are the default values, uncomment and modify if necessary
# ValidatorMethod HEAD
# ValidatorHost validator.w3.org
# ValidatorPort 80
# ValidatorString /check?uri=
# ValidatorPostString \;output=xml
</Module>

<Module W3C::LogValidator::SurveyEngine>
## MaxDocuments and MaxInvalid 
## 
## how many documents should be processed and/or how many invalid documents need to be found
##   before this particular module stops processing the longs
## DEFAULT = global values of MaxInValid and MaxDocuments
## 
## MaxInvalid 5
MaxDocuments 100

## ExcludeAreas : areas that should not be processed ##
## NOTE: see equivalent section in general config for more info


## Other variables you may change *WARNING* at your own risks *WARNING*
## NOTE: default should be fine, don't change this unless you know what you're doing
## Below are the default values, uncomment and modify if necessary
# ValidatorMethod HEAD
# ValidatorHost validator.w3.org
# ValidatorPort 80
# ValidatorString /check?uri=
# ValidatorPostString \;output=xml
# AuthorizedExtensions .html .xhtml .phtml .htm .shtml .php .svg .xml /
</Module>


<Module W3C::LogValidator::CSSValidator>
## MaxDocuments and MaxInvalid 
## 
## how many documents should be processed and/or how many invalid documents need to be found
##   before this particular module stops processing the longs
## DEFAULT = global values of MaxInValid and MaxDocuments
## 
## MaxInvalid 5
## MaxDocuments 0

## ExcludeAreas : areas that should not be processed ##
## NOTE: see equivalent section in general config for more info

## ShowInvalid : whether the report should list invalid documents
## DEFAULT = Yes
# ShowInvalid Yes

## ShowAborted : whether the report should list documents which could not
## be validated (fatal errors, 404s, etc)
## DEFAULT = No
# ShowAborted No

## ShowValid : whether the report should list valid documents
## DEFAULT = No
# ShowValid No

## verbose: amount of output when running Log Validator
##
## verbosity is set up for the whole Log Validator from the command line
##   but if you want a specific verbosity for a specific module, override the setting here
## DEFAULT = global value of verbose
# verbose 0

## other variables you may change *WARNING* at your own risks *WARNING*
# ValidatorHost jigsaw.w3.org
# ValidatorPort 80
# ValidatorString /css-validator/validator

# AuthorizedExtensions : what kind of extensions the module should claim responsibility for
# for this module, you may want to choose either only CSS or any kind of document that can have embedded CSS
# default is the former
#AuthorizedExtensions .css 
#AuthorizedExtensions .html .xhtml .phtml .htm .css .svg /
## Alternatively, if you want to check everything
# AuthorizedExtensions *

## CheckExtensionlessURIs: set this to Yes if your HTML documents are often presented without extension
## extensionless URIs will first be checked for their mime type through the web. Setting to Yes
## is not necessary but will speed up the process.
## DEFAULT = No
# CheckExtensionlessURIs No

</Module>