]> Frank Brehm's Git Trees - config/bruni/etc.git/commitdiff
committing changes in /etc after emerge run
authorfrank <frank@bruni.home.brehm-online.com>
Mon, 16 Jan 2012 22:40:40 +0000 (23:40 +0100)
committerFrank Brehm <root@bruni.home.brehm-online.com>
Mon, 16 Jan 2012 22:40:40 +0000 (23:40 +0100)
Package changes:
+www-misc/htdig-3.2.0_beta6-r3

.etckeeper
htdig/HtFileType-magic.mime [new file with mode: 0644]
htdig/cookies.txt [new file with mode: 0644]
htdig/htdig.conf [new file with mode: 0644]
htdig/mime.types [new file with mode: 0644]

index 5b4bae4906caec0c9bce467e2d5ee25f14d35bd0..0500a7fc2ece64c950c6d43f32c4dc1863c3a3f9 100755 (executable)
@@ -637,6 +637,11 @@ maybe chmod 0644 './hotplug/usb/libsane.usermap'
 maybe chmod 0755 './hotplug/usb/libusbscanner'
 maybe chmod 0755 './hp'
 maybe chmod 0644 './hp/hplip.conf'
+maybe chmod 0755 './htdig'
+maybe chmod 0644 './htdig/HtFileType-magic.mime'
+maybe chmod 0644 './htdig/cookies.txt'
+maybe chmod 0644 './htdig/htdig.conf'
+maybe chmod 0644 './htdig/mime.types'
 maybe chmod 0644 './idmapd.conf'
 maybe chmod 0644 './idn.conf'
 maybe chmod 0644 './idn.conf.sample'
diff --git a/htdig/HtFileType-magic.mime b/htdig/HtFileType-magic.mime
new file mode 100644 (file)
index 0000000..e1ce874
--- /dev/null
@@ -0,0 +1,495 @@
+# Magic data for for file(1) command
+#
+# The format is 4-5 columns:
+#    Column #1: byte number to begin checking from, ">" indicates continuation
+#    Column #2: type of data to match
+#    Column #3: contents of data to match
+#    Column #4: MIME type of result
+#    Column #5: MIME encoding of result (optional)
+#
+# Modified by <mailto:lha@users.sourceforge.net> for compatibility with
+# different versions of  file(1):
+# - Columns are separated by TABs (for traditional versions)
+# - spaces and '<'s within a column are escaped by '\' (for new versions)
+# - Hex numbers in strings are given as '\0x' (traditional) and '\x' (new)
+# - Null characters (\000) traditionally terminate strings, but now don't
+
+#------------------------------------------------------------------------------
+# Localstuff:  file(1) magic for locally observed files
+# Add any locally observed files here.
+
+#------------------------------------------------------------------------------
+# end local stuff
+#------------------------------------------------------------------------------
+
+#------------------------------------------------------------------------------
+# Java
+
+0      short           0xcafe
+>2     short           0xbabe          application/java
+
+# Microsoft WAVE format (*.wav)
+# [GRR 950115:  probably all of the shorts and longs should be leshort/lelong]
+#                                      Microsoft RIFF
+0      string          RIFF            audio/unknown   
+#                                      - WAVE format
+>8     string          WAVE            audio/x-wav     
+>8     string          AVI             video/x-msvideo
+#
+0      belong          0x2e7261fd      application/x-realaudio
+
+# MPEG Layer 3 sound files
+0      beshort         &0xffe0         audio/mpeg
+#MP3 with ID3 tag
+0      string          ID3             audio/mpeg
+# Ogg/Vorbis
+0      string          OggS            audio/x-ogg
+
+#------------------------------------------------------------------------------
+# commands:  file(1) magic for various shells and interpreters
+#
+#0     string          :\ shell archive or commands for antique kernel text
+0      string          #!/bin/sh               application/x-shellscript
+0      string          #!\ /bin/sh             application/x-shellscript
+0      string          #!/bin/csh              application/x-shellscript
+0      string          #!\ /bin/csh            application/x-shellscript
+# korn shell magic, sent by George Wu, gwu@clyde.att.com
+0      string          #!/bin/ksh              application/x-shellscript
+0      string          #!\ /bin/ksh            application/x-shellscript
+0      string          #!/bin/tcsh             application/x-shellscript
+0      string          #!\ /bin/tcsh           application/x-shellscript
+0      string          #!/usr/local/tcsh       application/x-shellscript
+0      string          #!\ /usr/local/tcsh     application/x-shellscript
+0      string          #!/usr/local/bin/tcsh   application/x-shellscript
+0      string          #!\ /usr/local/bin/tcsh application/x-shellscript
+# bash shell magic, from Peter Tobias (tobias@server.et-inf.fho-emden.de)
+0      string          #!/bin/bash                     application/x-shellscript
+0      string          #!\ /bin/bash           application/x-shellscript
+0      string          #!/usr/local/bin/bash   application/x-shellscript
+0      string          #!\ /usr/local/bin/bash application/x-shellscript
+
+#
+# zsh/ash/ae/nawk/gawk magic from cameron@cs.unsw.oz.au (Cameron Simpson)
+0      string          #!/usr/local/bin/zsh    application/x-shellscript
+0      string          #!\ /usr/local/bin/zsh  application/x-shellscript
+0      string          #!/usr/local/bin/ash    application/x-shellscript
+0      string          #!\ /usr/local/bin/ash  application/x-shellscript
+#0     string          #!/usr/local/bin/ae     Neil Brown's ae
+#0     string          #!\ /usr/local/bin/ae   Neil Brown's ae
+0      string          #!/bin/nawk             application/x-nawk
+0      string          #!\ /bin/nawk           application/x-nawk
+0      string          #!/usr/bin/nawk         application/x-nawk
+0      string          #!\ /usr/bin/nawk       application/x-nawk
+0      string          #!/usr/local/bin/nawk   application/x-nawk
+0      string          #!\ /usr/local/bin/nawk application/x-nawk
+0      string          #!/bin/gawk             application/x-gawk
+0      string          #!\ /bin/gawk           application/x-gawk
+0      string          #!/usr/bin/gawk         application/x-gawk
+0      string          #!\ /usr/bin/gawk       application/x-gawk
+0      string          #!/usr/local/bin/gawk   application/x-gawk
+0      string          #!\ /usr/local/bin/gawk application/x-gawk
+#
+0      string          #!/bin/awk              application/x-awk
+0      string          #!\ /bin/awk            application/x-awk
+0      string          #!/usr/bin/awk          application/x-awk
+0      string          #!\ /usr/bin/awk        application/x-awk
+0      string          BEGIN                   application/x-awk
+
+# For Larry Wall's perl language.  The ``eval'' line recognizes an
+# outrageously clever hack for USG systems.
+#                               Keith Waclena <keith@cerberus.uchicago.edu>
+0      string          #!/bin/perl             application/x-perl
+0      string          #!\ /bin/perl           application/x-perl
+0      string          eval\ "exec\ /bin/perl  application/x-perl
+0      string          #!/usr/bin/perl         application/x-perl
+0      string          #!\ /usr/bin/perl       application/x-perl
+0      string          eval\ "exec\ /usr/bin/perl      application/x-perl
+0      string          #!/usr/local/bin/perl   application/x-perl
+0      string          #!\ /usr/local/bin/perl application/x-perl
+0      string          eval\ "exec\ /usr/local/bin/perl        application/x-perl
+
+#------------------------------------------------------------------------------
+# compress:  file(1) magic for pure-compression formats (no archives)
+#
+# compress, gzip, pack, compact, huf, squeeze, crunch, freeze, yabba, whap, etc.
+#
+# Formats for various forms of compressed data
+# Formats for "compress" proper have been moved into "compress.c",
+# because it tries to uncompress it to figure out what's inside.
+# (Technically, "gzip", "bzip2" etc. are encodings, not mime-types,
+# and should also decompress to find out the type of data inside.)
+
+# standard unix compress
+0      string          \037\235        application/x-compress
+
+# gzip (GNU zip, not to be confused with [Info-ZIP/PKWARE] zip archiver)
+0      string          \037\213        application/x-gzip
+
+# bzip2
+0      string          BZh             application/x-bzip2
+
+0              string                  PK\003\004              application/x-zip
+
+# According to gzip.h, this is the correct byte order for packed data.
+0      string          \037\036        application/octet-stream
+#
+# This magic number is byte-order-independent.
+#
+0      short           017437          application/octet-stream
+
+# XXX - why *two* entries for "compacted data", one of which is
+# byte-order independent, and one of which is byte-order dependent?
+#
+# compacted data
+0      short           0x1fff          application/octet-stream
+0      string          \377\037        application/octet-stream
+# huf output
+0      short           0145405         application/octet-stream
+
+# Squeeze and Crunch...
+# These numbers were gleaned from the Unix versions of the programs to
+# handle these formats.  Note that I can only uncrunch, not crunch, and
+# I didn't have a crunched file handy, so the crunch number is untested.
+#                              Keith Waclena <keith@cerberus.uchicago.edu>
+#0     leshort         0x76FF          squeezed data (CP/M, DOS)
+#0     leshort         0x76FE          crunched data (CP/M, DOS)
+
+# Freeze
+#0     string          \037\237        Frozen file 2.1
+#0     string          \037\236        Frozen file 1.0 (or gzip 0.5)
+
+# lzh?
+#0     string          \037\240        LZH compressed data
+
+257    string          ustar\0         application/x-tar       posix
+257    string          ustar\040\040\0         application/x-tar       gnu
+
+0      short           070707          application/x-cpio
+0      short           0143561         application/x-cpio      swapped
+
+0      string          =<ar>           application/x-archive
+0      string          !<arch>         application/x-archive
+>8     string          debian          application/x-debian-package
+
+#------------------------------------------------------------------------------
+#
+# RPM: file(1) magic for Red Hat Packages   Erik Troan (ewt@redhat.com)
+#
+0      beshort         0xedab
+>2     beshort         0xeedb  application/x-rpm
+
+0      lelong&0x8080ffff       0x0000081a      application/x-arc       lzw
+0      lelong&0x8080ffff       0x0000091a      application/x-arc       squashed
+0      lelong&0x8080ffff       0x0000021a      application/x-arc       uncompressed
+0      lelong&0x8080ffff       0x0000031a      application/x-arc       packed
+0      lelong&0x8080ffff       0x0000041a      application/x-arc       squeezed
+0      lelong&0x8080ffff       0x0000061a      application/x-arc       crunched
+
+0      leshort 0xea60  application/octet-stream        x-arj
+
+# LHARC/LHA archiver (Greg Roelofs, newt@uchicago.edu)
+2      string  -lh0-   application/x-lharc     lh0
+2      string  -lh1-   application/x-lharc     lh1
+2      string  -lz4-   application/x-lharc     lz4
+2      string  -lz5-   application/x-lharc     lz5
+#      [never seen any but the last; -lh4- reported in comp.compression:]
+2      string  -lzs-   application/x-lha       lzs
+2      string  -lh\ -  application/x-lha       lh
+2      string  -lhd-   application/x-lha       lhd
+2      string  -lh2-   application/x-lha       lh2
+2      string  -lh3-   application/x-lha       lh3
+2      string  -lh4-   application/x-lha       lh4
+2      string  -lh5-   application/x-lha       lh5
+2      string  -lh6-   application/x-lha       lh6
+2      string  -lh7-   application/x-lha       lh7
+# Shell archives
+10     string  #\ This\ is\ a\ shell\ archive  application/octet-stream        x-shell
+
+#------------------------------------------------------------------------------
+# frame:  file(1) magic for FrameMaker files
+#
+# This stuff came on a FrameMaker demo tape, most of which is
+# copyright, but this file is "published" as witness the following:
+#
+0      string          \<MakerFile     application/x-frame
+0      string          \<MIFFile       application/x-frame
+0      string          \<MakerDictionary       application/x-frame
+0      string          \<MakerScreenFon        application/x-frame
+0      string          \<MML           application/x-frame
+0      string          \<Book          application/x-frame
+0      string          \<Maker         application/x-frame
+
+#------------------------------------------------------------------------------
+# html:  file(1) magic for HTML (HyperText Markup Language) docs
+#
+# from Daniel Quinlan <quinlan@yggdrasil.com>
+# modified by Lachlan Andrew <lha@users.sourceforge.net> to
+# match leading whitespace, but still work with old versions
+# of file(1) which don't recognise the /cb options
+#
+0      string          \<HEAD                  text/html
+0      string          \<head                  text/html
+0      string          \<TITLE                 text/html
+0      string          \<title                 text/html
+0      string          \<HTML                  text/html
+0      string          \<html                  text/html
+0      string          \<!--                   text/html
+0      string          \<H1                    text/html
+0      string          \<h1                    text/html
+0      string          \<!DOCTYPE\ HTML        text/html
+0      string          \<!doctype\ HTML        text/html
+0      string          \<!doctype\ html        text/html
+0      string          \<!DOCTYPE\ NETSCAPE-Bookmark   text/html
+0      string/cb       \ <head                 text/html
+0      string/cb       \ <html                 text/html
+0      string/cb       \ <title                text/html
+0      string/cb       \ <!doctype\ html       text/html
+0      string          \<!\                    text/html
+
+# Extensible markup language (XML), a subset of SGML
+# from Marc Prud'hommeaux (marc@apocalypse.org)
+0      string          \<?xml                  text/xml
+0      string/cb       \ \<?xml                text/xml
+
+# SGML, mostly from rph@sq
+0      string  \<!doctype                      text/sgml
+0      string  \<!subdoc                       text/sgml
+0      string/cb       \ \<!doctype                    text/sgml
+0      string/cb       \ \<!subdoc                     text/sgml
+
+
+
+#------------------------------------------------------------------------------
+# images:  file(1) magic for image formats (see also "c-lang" for XPM bitmaps)
+#
+# originally from jef@helios.ee.lbl.gov (Jef Poskanzer),
+# additions by janl@ifi.uio.no as well as others. Jan also suggested
+# merging several one- and two-line files into here.
+#
+# XXX - byte order for GIF and TIFF fields?
+# [GRR:  TIFF allows both byte orders; GIF is probably little-endian]
+#
+
+# [GRR:  what the hell is this doing in here?]
+#0     string          xbtoa           btoa'd file
+
+# PBMPLUS
+#                                      PBM file
+0      string          P1              image/x-portable-bitmap 7bit
+#                                      PGM file
+0      string          P2              image/x-portable-greymap        7bit
+#                                      PPM file
+0      string          P3              image/x-portable-pixmap 7bit
+#                                      PBM "rawbits" file
+0      string          P4              image/x-portable-bitmap
+#                                      PGM "rawbits" file
+0      string          P5              image/x-portable-greymap
+#                                      PPM "rawbits" file
+0      string          P6              image/x-portable-pixmap
+
+# NIFF (Navy Interchange File Format, a modification of TIFF)
+# [GRR:  this *must* go before TIFF]
+0      string          IIN1            image/x-niff
+
+# TIFF and friends
+#                                      TIFF file, big-endian
+0      string          MM              image/tiff
+#                                      TIFF file, little-endian
+0      string          II              image/tiff
+
+# possible GIF replacements; none yet released!
+# (Greg Roelofs, newt@uchicago.edu)
+#
+# GRR 950115:  this was mine ("Zip GIF"):
+#                                      ZIF image (GIF+deflate alpha)
+0      string          GIF94z          image/unknown
+#
+# GRR 950115:  this is Jeremy Wohl's Free Graphics Format (better):
+#                                      FGF image (GIF+deflate beta)
+0      string          FGF95a          image/unknown
+#
+# GRR 950115:  this is Thomas Boutell's Portable Bitmap Format proposal
+# (best; not yet implemented):
+#                                      PBF image (deflate compression)
+0      string          PBF             image/unknown
+
+# GIF
+0      string          GIF             image/gif
+
+# JPEG images
+0      beshort         0xffd8          image/jpeg
+0      string          \377\330\377\340        image/jpeg
+0      string          \377\330\377\341        image/jpeg
+0      string          \377\330\377\356        image/jpeg
+
+
+# PC bitmaps (OS/2, Windoze BMP files)  (Greg Roelofs, newt@uchicago.edu)
+0      string          BM              image/bmp
+#>14   byte            12              (OS/2 1.x format)
+#>14   byte            64              (OS/2 2.x format)
+#>14   byte            40              (Windows 3.x format)
+#0     string          IC              icon
+#0     string          PI              pointer
+#0     string          CI              color icon
+#0     string          CP              color pointer
+#0     string          BA              bitmap array
+
+
+#------------------------------------------------------------------------------
+# lisp:  file(1) magic for lisp programs
+#
+# various lisp types, from Daniel Quinlan (quinlan@yggdrasil.com)
+0      string  ;;                      text/plain      8bit
+# Emacs 18 - this is always correct, but not very magical.
+0      string  \012(                   application/x-elc
+# Emacs 19
+0      string  ;ELC\023\000\000\000    application/x-elc
+
+#------------------------------------------------------------------------------
+# mail.news:  file(1) magic for mail and news
+#
+# There are tests to ascmagic.c to cope with mail and news.
+0      string          Relay-Version:  message/rfc822  7bit
+0      string          #!\ rnews       message/rfc822  7bit
+0      string          N#!\ rnews      message/rfc822  7bit
+0      string          Forward\ to     message/rfc822  7bit
+0      string          Pipe\ to        message/rfc822  7bit
+0      string          Return-Path:    message/rfc822  7bit
+0      string          Path:           message/news    8bit
+0      string          Xref:           message/news    8bit
+0      string          From:           message/rfc822  7bit
+0      string          Article         message/news    8bit
+#------------------------------------------------------------------------------
+# msword: file(1) magic for MS Word files
+#
+# Contributor claims:
+# Reversed-engineered MS Word magic numbers
+#      Some of these also occur in PowerPoint. -- lha@users.sourceforge.net
+
+0      string          \376\067\0\043                  application/msword
+0      string          \320\317\021\340\241\261        application/msword
+0      string          \333\245-\0\0\0                 application/msword
+
+
+
+#------------------------------------------------------------------------------
+# printer:  file(1) magic for printer-formatted files
+#
+
+# PostScript
+0      string          %!              application/postscript
+0      string          \004%!          application/postscript
+0      string          \033%-12345X%!PS        application/postscript
+
+# Acrobat
+# (due to clamen@cs.cmu.edu)
+0      string          %PDF-           application/pdf
+
+#------------------------------------------------------------------------------
+# sc:  file(1) magic for "sc" spreadsheet
+#
+38     string          Spreadsheet     application/x-sc
+
+#------------------------------------------------------------------------------
+# tex:  file(1) magic for TeX files
+#
+# XXX - needs byte-endian stuff (big-endian and little-endian DVI?)
+#
+# From <conklin@talisman.kaleida.com>
+
+# Although we may know the offset of certain text fields in TeX DVI
+# and font files, we can't use them reliably because they are not
+# zero terminated. [but we do anyway, christos]
+0      string          \367\002        application/x-dvi
+#0     string          \367\203        TeX generic font data
+#0     string          \367\131        TeX packed font data
+#0     string          \367\312        TeX virtual font data
+#0     string          This\ is\ TeX,  TeX transcript text     
+#0     string          This\ is\ METAFONT,     METAFONT transcript text
+
+# Texinfo and GNU Info, from Daniel Quinlan (quinlan@yggdrasil.com)
+#0     string          \\input\ texinfo        Texinfo source text
+#0     string          This\ is\ Info\ file    GNU Info text
+
+# correct TeX magic for Linux (and maybe more)
+# from Peter Tobias (tobias@server.et-inf.fho-emden.de)
+#
+0      leshort         0x02f7          application/x-dvi
+
+# RTF - Rich Text Format
+0      string          {\\rtf          text/rtf
+
+#------------------------------------------------------------------------------
+# animation:  file(1) magic for animation/movie formats
+#
+# animation formats, originally from vax@ccwf.cc.utexas.edu (VaX#n8)
+#                                              MPEG file
+0      belong          0x000001b3                      video/mpeg
+0      belong          0x000001ba                      video/mpeg
+# FLI animation format
+0      leshort         0xAF11                          video/fli
+# FLC animation format
+0      leshort         0xAF12                          video/flc
+# AVI
+>8     string          AVI\                            video/avi
+#
+# SGI and Apple formats
+#
+0      string          MOVI                            video/sgi
+4      string          moov                            video/quicktime moov
+4      string          mdat                            video/quicktime mdat
+# The contributor claims:
+#   I couldn't find a real magic number for these, however, this
+#   -appears- to work.  Note that it might catch other files, too,
+#   so BE CAREFUL!
+#
+# Note that title and author appear in the two 20-byte chunks
+# at decimal offsets 2 and 22, respectively, but they are XOR'ed with
+# 255 (hex FF)! DL format SUCKS BIG ROCKS.
+#
+#                                              DL file version 1 , medium format (160x100, 4 images/screen)
+0      byte            1                       video/unknown
+0      byte            2                       video/unknown
+#
+# Databases
+#
+# GDBM magic numbers
+#  Will be maintained as part of the GDBM distribution in the future.
+#  <downsj@teeny.org>
+0      belong  0x13579ace      application/x-gdbm
+0      lelong  0x13579ace      application/x-gdbm
+0      string          GDBM    application/x-gdbm
+#
+0      belong  0x061561        application/x-dbm
+#
+# Executables
+#
+0      string          \177ELF 
+>4     byte            0
+>4     byte            1
+>4     byte            2
+>5     byte            0
+>5     byte            1
+>>16   leshort         0
+>>16   leshort         1       application/x-object
+>>16   leshort         2       application/x-executable
+>>16   leshort         3       application/x-sharedlib
+>>16   leshort         4       application/x-coredump
+#
+# DOS
+0              string                  MZ                              application/x-dosexec
+#
+# KDE
+0              string  [KDE\ Desktop\ Entry]   application/x-kdelnk
+0              string  \#\ KDE\ Config\ File   application/x-kdelnk
+# xmcd database file for kscd
+0              string  \#\ xmcd                text/xmcd
+
+#------------------------------------------------------------------------------
+# pkgadd:  file(1) magic for SysV R4 PKG Datastreams
+#
+0      string          #\ PaCkAgE\ DaTaStReAm  application/x-svr4-package
+
+#PNG Image Format
+0      string          \x89PNG                 image/png
+0      string          \0x89PNG                image/png
diff --git a/htdig/cookies.txt b/htdig/cookies.txt
new file mode 100644 (file)
index 0000000..5088fbd
--- /dev/null
@@ -0,0 +1,38 @@
+#
+# Example of input file for cookies for ht://Dig and ht://Check
+#
+# Copyright (c) 1999-2004 Comune di Prato - Prato - Italy
+# Copyright (c) 1995-2004 The ht://Dig Group <www.htdig.org>
+# Author: Gabriele Bartolini - Prato - Italy <angusgb@users.sourceforge.net>
+#
+# For copyright details, see the file COPYING in your distribution
+# or the GNU General Public License version 2 or later 
+# <http://www.gnu.org/copyleft/gpl.html>
+#
+# This file must be located through the 'cookies_input_file' directive, and
+# its purpose is to pre-load cookies into ht://Check and ht://Dig and to be used for a
+# crawl. Each line contains one name-value pair. Lines beginning with '#'
+# or empty ones are ignored.
+#
+# Info have been taken from: http://www.cookiecentral.com/faq/#3.5
+#
+# Each line represents a single piece of stored information.
+# A tab is inserted between each of the fields. From left-to-right,
+# here is what each field represents:
+#
+# domain       The domain that created AND that can read the variable.
+# flag         A TRUE/FALSE value indicating if all machines within a given
+#              domain can access the variable. This value is IGNORED.
+# path         The path within the domain that the variable is valid for.
+# secure       A TRUE/FALSE value indicating if a secure connection with the
+#              domain is needed to access the variable. IGNORED.
+# expiration   The UNIX time that the variable will expire on. UNIX time is
+#              defined as the number of seconds since epoc (Jan 1, 1970 00:00:00 GMT).
+#              If you want to issue a session cookie, just set this field
+#              value to '0'.
+# name         The name of the variable.
+# value                The value of the variable.
+#
+# For instance, a cookies.txt file may have an entry that looks like this:
+#
+# .netscape.com     TRUE   /  FALSE  946684799   NETSCAPE_ID  100103
diff --git a/htdig/htdig.conf b/htdig/htdig.conf
new file mode 100644 (file)
index 0000000..9321c5c
--- /dev/null
@@ -0,0 +1,190 @@
+#
+# Example config file for ht://Dig.
+#
+# This configuration file is used by all the programs that make up ht://Dig.
+# Please refer to the attribute reference manual for more details on what
+# can be put into this file.  (http://www.htdig.org/confindex.html)
+# Note that most attributes have very reasonable default values so you
+# really only have to add attributes here if you want to change the defaults.
+#
+# What follows are some of the common attributes you might want to change.
+#
+
+#
+# Specify where the database files need to go.  Make sure that there is
+# plenty of free disk space available for the databases.  They can get
+# pretty big.
+#
+database_dir:          /var/lib/htdig/db
+
+#
+# This specifies the URL where the robot (htdig) will start.  You can specify
+# multiple URLs here.  Just separate them by some whitespace.
+# The example here will cause the ht://Dig homepage and related pages to be
+# indexed.
+# You could also index all the URLs in a file like so:
+# start_url:          `${common_dir}/start.url`
+#
+start_url:             http://www.htdig.org/
+
+#
+# This attribute limits the scope of the indexing process.  The default is to
+# set it to the same as the start_url above.  This way only pages that are on
+# the sites specified in the start_url attribute will be indexed and it will
+# reject any URLs that go outside of those sites.
+#
+# Keep in mind that the value for this attribute is just a list of string
+# patterns. As long as URLs contain at least one of the patterns it will be
+# seen as part of the scope of the index.
+#
+limit_urls_to:         ${start_url}
+
+#
+# This attribute is used for compressing the database.  The default is to
+# set it to the same as the limit_urls_to above, plus some common endings.
+#
+# Keep in mind that this list should be short.  If your  start_url  is a very
+# long list of URLs, it may be wise to replace it with something like
+# http://www.  or comment this out and use the compiled-in default.
+#
+common_url_parts:      ${limit_urls_to} .html .htm .shtml
+
+#
+# If there are particular pages that you definitely do NOT want to index, you
+# can use the exclude_urls attribute.  The value is a list of string patterns.
+# If a URL matches any of the patterns, it will NOT be indexed.  This is
+# useful to exclude things like virtual web trees or database accesses.  By
+# default, all CGI URLs will be excluded.  (Note that the /cgi-bin/ convention
+# may not work on your web server.  Check the  path prefix used on your web
+# server.)
+#
+exclude_urls:          /cgi-bin/ .cgi
+
+#
+# Since ht://Dig does not (and cannot) parse every document type, this 
+# attribute is a list of strings (extensions) that will be ignored during 
+# indexing. These are *only* checked at the end of a URL, whereas 
+# exclude_url patterns are matched anywhere.
+#
+# Also keep in mind that while other attributes allow regex, these must be 
+# actual strings.
+#
+bad_extensions:                .wav .gz .z .sit .au .zip .tar .hqx .exe .com .gif \
+       .jpg .jpeg .aiff .class .map .ram .tgz .bin .rpm .mpg .mov .avi .css
+
+#
+# The string htdig will send in every request to identify the robot.  Change
+# this to your email address.
+#
+maintainer:            unconfigured@htdig.searchengine.maintainer
+
+#
+# The excerpts that are displayed in long results rely on stored information
+# in the index databases.  The compiled default only stores 512 characters of
+# text from each document (this excludes any HTML markup...)  If you plan on
+# using the excerpts you probably want to make this larger.  The only concern
+# here is that more disk space is going to be needed to store the additional
+# information.  Since disk space is cheap (! :-)) you might want to set this
+# to a value so that a large percentage of the documents that you are going
+# to be indexing are stored completely in the database.  At SDSU we found
+# that by setting this value to about 50k the index would get 97% of all
+# documents completely and only 3% was cut off at 50k.  You probably want to
+# experiment with this value.
+# Note that if you want to set this value low, you probably want to set the
+# excerpt_show_top attribute to false so that the top excerpt_length characters
+# of the document are always shown.
+#
+max_head_length:       10000
+
+#
+# To limit network connections, ht://Dig will only pull up to a certain limit
+# of bytes. This prevents the indexing from dying because the server keeps
+# sending information. However, several FAQs happen because people have files
+# bigger than the default limit of 100KB. This sets the default a bit higher.
+# (see <http://www.htdig.org/FAQ.html> for more)
+#
+max_doc_size:          200000
+
+#
+# Most people expect some sort of excerpt in results. By default, if the 
+# search words aren't found in context in the stored excerpt, htsearch shows 
+# the text defined in the no_excerpt_text attribute:
+# (None of the search words were found in the top of this document.)
+# This attribute instead will show the top of the excerpt.
+#
+no_excerpt_show_top:   true
+
+#
+# Depending on your needs, you might want to enable some of the fuzzy search
+# algorithms.  There are several to choose from and you can use them in any
+# combination you feel comfortable with.  Each algorithm will get a weight
+# assigned to it so that in combinations of algorithms, certain algorithms get
+# preference over others.  Note that the weights only affect the ranking of
+# the results, not the actual searching.
+# The available algorithms are:
+#      accents
+#      exact
+#      endings
+#      metaphone
+#      prefix
+#      regex
+#      soundex
+#      speling [sic]
+#      substring
+#      synonyms
+# By default only the "exact" algorithm is used with weight 1.
+# Note that if you are going to use the endings, metaphone, soundex, accents,
+# or synonyms algorithms, you will need to run htfuzzy to generate
+# the databases they use.
+#
+search_algorithm:      exact:1 synonyms:0.5 endings:0.1
+
+#
+# The following are the templates used in the builtin search results
+# The default is to use compiled versions of these files, which produces
+# slightly faster results. However, uncommenting these lines makes it
+# very easy to change the format of search results.
+# See <http://www.htdig.org/hts_templates.html> for more details.
+#
+# template_map: Long long ${common_dir}/long.html \
+#              Short short ${common_dir}/short.html
+# template_name: long
+
+#
+# The following are used to change the text for the page index.
+# The defaults are just boring text numbers.  These images spice
+# up the result pages quite a bit.  (Feel free to do whatever, though)
+#
+next_page_text:                <img src="/htdig/buttonr.gif" border="0" align="middle" width="30" height="30" alt="next">
+no_next_page_text:
+prev_page_text:                <img src="/htdig/buttonl.gif" border="0" align="middle" width="30" height="30" alt="prev">
+no_prev_page_text:
+page_number_text:      '<img src="/htdig/button1.gif" border="0" align="middle" width="30" height="30" alt="1">' \
+                       '<img src="/htdig/button2.gif" border="0" align="middle" width="30" height="30" alt="2">' \
+                       '<img src="/htdig/button3.gif" border="0" align="middle" width="30" height="30" alt="3">' \
+                       '<img src="/htdig/button4.gif" border="0" align="middle" width="30" height="30" alt="4">' \
+                       '<img src="/htdig/button5.gif" border="0" align="middle" width="30" height="30" alt="5">' \
+                       '<img src="/htdig/button6.gif" border="0" align="middle" width="30" height="30" alt="6">' \
+                       '<img src="/htdig/button7.gif" border="0" align="middle" width="30" height="30" alt="7">' \
+                       '<img src="/htdig/button8.gif" border="0" align="middle" width="30" height="30" alt="8">' \
+                       '<img src="/htdig/button9.gif" border="0" align="middle" width="30" height="30" alt="9">' \
+                       '<img src="/htdig/button10.gif" border="0" align="middle" width="30" height="30" alt="10">'
+#
+# To make the current page stand out, we will put a border around the
+# image for that page.
+#
+no_page_number_text:   '<img src="/htdig/button1.gif" border="2" align="middle" width="30" height="30" alt="1">' \
+                       '<img src="/htdig/button2.gif" border="2" align="middle" width="30" height="30" alt="2">' \
+                       '<img src="/htdig/button3.gif" border="2" align="middle" width="30" height="30" alt="3">' \
+                       '<img src="/htdig/button4.gif" border="2" align="middle" width="30" height="30" alt="4">' \
+                       '<img src="/htdig/button5.gif" border="2" align="middle" width="30" height="30" alt="5">' \
+                       '<img src="/htdig/button6.gif" border="2" align="middle" width="30" height="30" alt="6">' \
+                       '<img src="/htdig/button7.gif" border="2" align="middle" width="30" height="30" alt="7">' \
+                       '<img src="/htdig/button8.gif" border="2" align="middle" width="30" height="30" alt="8">' \
+                       '<img src="/htdig/button9.gif" border="2" align="middle" width="30" height="30" alt="9">' \
+                       '<img src="/htdig/button10.gif" border="2" align="middle" width="30" height="30" alt="10">'
+
+# local variables:
+# mode: text
+# eval: (if (eq window-system 'x) (progn (setq font-lock-keywords (list '("^#.*" . font-lock-keyword-face) '("^[a-zA-Z][^ :]+" . font-lock-function-name-face) '("[+$]*:" . font-lock-comment-face) )) (font-lock-mode)))
+# end:
diff --git a/htdig/mime.types b/htdig/mime.types
new file mode 100644 (file)
index 0000000..04b595e
--- /dev/null
@@ -0,0 +1,279 @@
+# This is the default mime.types file from the Apache web server distribution
+
+# This file controls what Internet media types are sent to the client for
+# given file extension(s).  Sending the correct media type to the client
+# is important so they know how to handle the content of the file.
+# Extra types can either be added here or by using an AddType directive
+# in your config files. For more information about Internet media types,
+# please read RFC 2045, 2046, 2047, 2048, and 2077.  The Internet media type
+# registry is at <ftp://ftp.iana.org/in-notes/iana/assignments/media-types/>.
+
+# MIME type                    Extension
+application/EDI-Consent
+application/EDI-X12
+application/EDIFACT
+application/activemessage
+application/andrew-inset       ez
+application/applefile
+application/atomicmail
+application/cals-1840
+application/commonground
+application/cybercash
+application/dca-rft
+application/dec-dx
+application/eshop
+application/hyperstudio
+application/iges
+application/mac-binhex40       hqx
+application/mac-compactpro     cpt
+application/macwriteii
+application/marc
+application/mathematica
+application/msword             doc
+application/news-message-id
+application/news-transmission
+application/octet-stream       bin dms lha lzh exe class
+application/oda                        oda
+application/pdf                        pdf
+application/pgp-encrypted
+application/pgp-keys
+application/pgp-signature
+application/pkcs10
+application/pkcs7-mime
+application/pkcs7-signature
+application/postscript         ai eps ps
+application/prs.alvestrand.titrax-sheet
+application/prs.cww
+application/prs.nprend
+application/remote-printing
+application/riscos
+application/rtf                        rtf
+application/set-payment
+application/set-payment-initiation
+application/set-registration
+application/set-registration-initiation
+application/sgml
+application/sgml-open-catalog
+application/slate
+application/smil               smi smil
+application/vemmi
+application/vnd.3M.Post-it-Notes
+application/vnd.FloGraphIt
+application/vnd.acucobol
+application/vnd.anser-web-certificate-issue-initiation
+application/vnd.anser-web-funds-transfer-initiation
+application/vnd.audiograph
+application/vnd.businessobjects
+application/vnd.claymore
+application/vnd.comsocaller
+application/vnd.dna
+application/vnd.dxr
+application/vnd.ecdis-update
+application/vnd.ecowin.chart
+application/vnd.ecowin.filerequest
+application/vnd.ecowin.fileupdate
+application/vnd.ecowin.series
+application/vnd.ecowin.seriesrequest
+application/vnd.ecowin.seriesupdate
+application/vnd.enliven
+application/vnd.epson.salt
+application/vnd.fdf
+application/vnd.ffsns
+application/vnd.framemaker
+application/vnd.fujitsu.oasys
+application/vnd.fujitsu.oasys2
+application/vnd.fujitsu.oasys3
+application/vnd.fujitsu.oasysgp
+application/vnd.fujitsu.oasysprs
+application/vnd.fujixerox.docuworks
+application/vnd.hp-HPGL
+application/vnd.hp-PCL
+application/vnd.hp-PCLXL
+application/vnd.hp-hps
+application/vnd.ibm.MiniPay
+application/vnd.ibm.modcap
+application/vnd.intercon.formnet
+application/vnd.intertrust.digibox
+application/vnd.intertrust.nncp
+application/vnd.is-xpr
+application/vnd.japannet-directory-service
+application/vnd.japannet-jpnstore-wakeup
+application/vnd.japannet-payment-wakeup
+application/vnd.japannet-registration
+application/vnd.japannet-registration-wakeup
+application/vnd.japannet-setstore-wakeup
+application/vnd.japannet-verification
+application/vnd.japannet-verification-wakeup
+application/vnd.koan
+application/vnd.lotus-1-2-3
+application/vnd.lotus-approach
+application/vnd.lotus-freelance
+application/vnd.lotus-organizer
+application/vnd.lotus-screencam
+application/vnd.lotus-wordpro
+application/vnd.meridian-slingshot
+application/vnd.mif            mif
+application/vnd.minisoft-hp3000-save
+application/vnd.mitsubishi.misty-guard.trustweb
+application/vnd.ms-artgalry
+application/vnd.ms-asf
+application/vnd.ms-excel
+application/vnd.ms-powerpoint  ppt
+application/vnd.ms-project
+application/vnd.ms-tnef
+application/vnd.ms-works
+application/vnd.music-niff
+application/vnd.musician
+application/vnd.netfpx
+application/vnd.noblenet-directory
+application/vnd.noblenet-sealer
+application/vnd.noblenet-web
+application/vnd.novadigm.EDM
+application/vnd.novadigm.EDX
+application/vnd.novadigm.EXT
+application/vnd.osa.netdeploy
+application/vnd.powerbuilder6
+application/vnd.powerbuilder6-s
+application/vnd.rapid
+application/vnd.seemail
+application/vnd.shana.informed.formtemplate
+application/vnd.shana.informed.interchange
+application/vnd.shana.informed.package
+application/vnd.street-stream
+application/vnd.sun.xml.calc   sxc
+application/vnd.sun.xml.draw   sxd
+application/vnd.sun.xml.impress        sxi
+application/vnd.sun.xml.writer sxw
+application/vnd.svd
+application/vnd.swiftview-ics
+application/vnd.truedoc
+application/vnd.visio
+application/vnd.webturbo
+application/vnd.wrq-hp3000-labelled
+application/vnd.wt.stf
+application/vnd.xara
+application/vnd.yellowriver-custom-menu
+application/wita
+application/wordperfect5.1
+application/x-bcpio            bcpio
+application/x-cdlink           vcd
+application/x-chess-pgn                pgn
+application/x-compress
+application/x-cpio             cpio
+application/x-csh              csh
+application/x-director         dcr dir dxr
+application/x-dvi              dvi
+application/x-futuresplash     spl
+application/x-gtar             gtar
+application/x-gzip
+application/x-hdf              hdf
+application/x-javascript       js
+application/x-koan             skp skd skt skm
+application/x-latex            latex
+application/x-netcdf           nc cdf
+application/x-sh               sh
+application/x-shar             shar
+application/x-shockwave-flash  swf
+application/x-stuffit          sit
+application/x-sv4cpio          sv4cpio
+application/x-sv4crc           sv4crc
+application/x-tar              tar
+application/x-tcl              tcl
+application/x-tex              tex
+application/x-texinfo          texinfo texi
+application/x-troff            t tr roff
+application/x-troff-man                man
+application/x-troff-me         me
+application/x-troff-ms         ms
+application/x-ustar            ustar
+application/x-wais-source      src
+application/x400-bp
+application/xml
+application/zip                        zip
+audio/32kadpcm
+audio/basic                    au snd
+audio/midi                     mid midi kar
+audio/mpeg                     mpga mp2 mp3
+audio/vnd.qcelp
+audio/x-aiff                   aif aiff aifc
+audio/x-pn-realaudio           ram rm
+audio/x-pn-realaudio-plugin    rpm
+audio/x-realaudio              ra
+audio/x-wav                    wav
+chemical/x-pdb                 pdb xyz
+image/cgm
+image/g3fax
+image/gif                      gif
+image/ief                      ief
+image/jpeg                     jpeg jpg jpe
+image/naplps
+image/png                      png
+image/prs.btif
+image/tiff                     tiff tif
+image/vnd.dwg
+image/vnd.dxf
+image/vnd.fpx
+image/vnd.net-fpx
+image/vnd.svf
+image/vnd.xiff
+image/x-cmu-raster             ras
+image/x-portable-anymap                pnm
+image/x-portable-bitmap                pbm
+image/x-portable-graymap       pgm
+image/x-portable-pixmap                ppm
+image/x-rgb                    rgb
+image/x-xbitmap                        xbm
+image/x-xpixmap                        xpm
+image/x-xwindowdump            xwd
+message/delivery-status
+message/disposition-notification
+message/external-body
+message/http
+message/news
+message/partial
+message/rfc822
+model/iges                     igs iges
+model/mesh                     msh mesh silo
+model/vnd.dwf
+model/vrml                     wrl vrml
+multipart/alternative
+multipart/appledouble
+multipart/byteranges
+multipart/digest
+multipart/encrypted
+multipart/form-data
+multipart/header-set
+multipart/mixed
+multipart/parallel
+multipart/related
+multipart/report
+multipart/signed
+multipart/voice-message
+text/css                       css
+text/directory
+text/enriched
+text/plain                     asc txt
+text/prs.lines.tag
+text/rfc822-headers
+text/richtext                  rtx
+text/rtf                       rtf
+text/sgml                      sgml sgm
+text/tab-separated-values      tsv
+text/uri-list
+text/vnd.abc
+text/vnd.flatland.3dml
+text/vnd.fmi.flexstor
+text/vnd.in3d.3dml
+text/vnd.in3d.spot
+text/vnd.latex-z
+text/x-setext                  etx
+text/xml                       xml
+video/mpeg                     mpeg mpg mpe
+video/quicktime                        qt mov
+video/vnd.motorola.video
+video/vnd.motorola.videop
+video/vnd.vivo
+video/x-msvideo                        avi
+video/x-sgi-movie              movie
+x-conference/x-cooltalk                ice
+text/html                      html htm