SOURCES: hyperestraier.sh (NEW) - my own script for making searcha...

twittner twittner at pld-linux.org
Mon Apr 17 16:59:47 CEST 2006


Author: twittner                     Date: Mon Apr 17 14:59:47 2006 GMT
Module: SOURCES                       Tag: HEAD
---- Log message:
- my own script for making searchable sites with hyperestraier

---- Files affected:
SOURCES:
   hyperestraier.sh (NONE -> 1.1)  (NEW)

---- Diffs:

================================================================
Index: SOURCES/hyperestraier.sh
diff -u /dev/null SOURCES/hyperestraier.sh:1.1
--- /dev/null	Mon Apr 17 16:59:47 2006
+++ SOURCES/hyperestraier.sh	Mon Apr 17 16:59:42 2006
@@ -0,0 +1,203 @@
+#!/bin/sh
+
+# WARNING!!! - NOT TESTED 
+
+# what should it do:
+# 1. create directory skeleton
+# 2. create and install apache config
+# 3. install estseek search page templates and config
+# 4. setup proper config values
+# 5. create shell wrapper for /usr/lib/hyperestraier/estseek.cgi to avoid copying estseek.cgi binary
+# 6. index content if exists and index itself doesn't exist yet
+# 7. make links to estseek.cgi
+
+# examplary directory layout
+# /srv/http/domek.dom/${SITE}/{html,cgi-bin,est.db}
+
+# define necessary variables ...
+DATE=$(date '+%Y-%m-%d %H:%M:%S %z')
+DOMAIN="domek.dom"
+VHOST="asus.domek.dom:80"
+
+ROOTDIR="/srv/http/${DOMAIN}"
+HTTPCONFDIR="/etc/httpd/conf.d"
+
+OWNER="root"
+GROUP="http"
+MODE="0640"
+DIRMODE="0750"
+EXEMODE="0750"
+UMASK="022"
+
+usage ()
+{
+   	echo "Usage: hyperestraier target_directory [next_directory ...]"
+	[ "$1" = "exit" ] && exit 1
+}
+
+[ $# -eq 0 ] && usage exit
+
+# ... or fetch them from config file
+[ -e hyperestraier.conf ] && . ./hyperestraier.conf
+
+umask ${UMASK}
+
+# process commandline (sites names)
+while [ $# -gt 0 ] ; do
+    SITE=${1}
+    shift
+	echo -e "Setting up site $SITE ...\n"
+	SITEDIR=${ROOTDIR}/${SITE}
+	# 1. create directory skeleton
+	echo -en "Creating directory skeleton $SITEDIR ... "
+	if ! [ -d ${SITEDIR} ] ; then
+		install -o ${OWNER} -g ${GROUP} -m ${DIRMODE} -d ${SITEDIR}
+		install -o ${OWNER} -g ${GROUP} -m ${DIRMODE} -d ${SITEDIR}/{html,cgi-bin,est.db}
+		echo -e "directory skeleton created.\n"
+		echo "Please, copy content to ${SITEDIR}/html and rerun this script with \`${SITE}\' argument when it'll finish.\n"
+		continue
+	elif ! [ -d ${SITEDIR}/html -a -d ${SITEDIR}/cgi-bin -a ${SITEDIR}/est.db ] ; then
+		install -o ${OWNER} -g ${GROUP} -m ${DIRMODE} -d ${SITEDIR}/{html,cgi-bin,est.db}
+		echo 'directory skeleton created.'
+	   	echo -e "Please, copy content to ${SITEDIR}/html and rerun this script with \`${SITE}\' argument when it'll finish.\n"
+		continue
+	else
+		echo -e "${SITEDIR}/{html,cgi-bin,est.db} already exist - continue ...\n"
+	fi
+	
+	# 2. create and install config for apache
+	CONFIG=${HTTPCONFDIR}/20_hyperestraier-${SITE}.conf
+	echo -e "Creating and installing config ${CONFIG} for apache ... "
+	if ! [ -e ${CONFIG} ] ; then
+		cat <<- EOF > ${CONFIG}
+		# generated by hyperestraier.sh script - ${DATE}
+		<VirtualHost ${VHOST}>
+			
+		    ServerAdmin webmaster@${DOMAIN}
+		    DocumentRoot ${SITEDIR}/html
+		    ServerName ${SITE}.${DOMAIN}
+		    ErrorLog /var/log/httpd/${SITE}.${DOMAIN}-error_log
+		    CustomLog /var/log/httpd/${SITE}.${DOMAIN}-access_log combined
+		
+			<Location />
+				AllowOverride None
+				Allow from all		
+			</Location>
+	
+			ScriptAlias /cgi-bin/estseek.cgi ${SITEDIR}/cgi-bin/estseek.cgi
+		
+			<Location /cgi-bin/estseek.cgi>
+				Options ExecCGI
+				Allow from all
+			</Location>
+	
+		</VirtualHost>
+		# vi:syntax=apache
+		EOF
+
+		echo -e "created and installed. Remember to restart/reload your apache.\n"
+	else
+		echo -e "apache config ${CONFIG} had already existed before - remained untouched.\n"
+	fi
+
+	# 3. install estseek search page templates and config
+	echo -en "\nInstalling estseek search page templates and config ... "
+	for FILE in estseek.{conf,tmpl,top} ; do
+		echo -n "${FILE}: "
+		if ! [ -f ${SITEDIR}/cgi-bin/${FILE} ] ; then
+			install -o ${OWNER} -g ${GROUP} -m ${MODE} /usr/share/hyperestraier/${FILE} ${SITEDIR}/cgi-bin
+			echo "installed."
+		else
+			echo "instalation skipped - had already existed before."
+		fi
+	done
+	echo -e "estseek search page templates and config has been installed."
+
+	# 4. setup proper config values
+	echo -ne "Setting appropriate config values in estseek.conf ... "
+	sed -i \
+		-e "s@^\s*indexname:.*@indexname: ${SITEDIR}/est.db@" \
+		-e "s@^\s*replace: ^file:///.*@replace: ^file://${SITEDIR}/html/{{!}}http://${SITE}.${DOMAIN}/@" \
+			${SITEDIR}/cgi-bin/estseek.conf
+	echo -e "done.\n"
+
+	# 5. create shell script wrapper for /usr/lib/hyperestraier/estseek.cgi
+	if ! [ -x ${SITEDIR}/cgi-bin/estseek.cgi ] ; then 
+		cat <<- EOF > ${SITEDIR}/cgi-bin/estseek.cgi
+		#!/bin/sh
+		SCRIPT_NAME=${SITEDIR}/cgi-bin/estseek.cgi /usr/lib/hyperestraier/estseek.cgi
+		EOF
+		chown ${OWNER}:${GROUP} ${SITEDIR}/cgi-bin/estseek.cgi
+		chmod 750 ${SITEDIR}/cgi-bin/estseek.cgi
+	fi
+
+	# 6. index content if exists and index itself doesn't exist yet
+	echo -en "\nCreating hyperestraier index ... "
+	if [ -e ${SITEDIR}/html/index.html -o -e ${SITEDIR}/html/autobook.html ] ; then
+		[ -d ${SITEDIR}/est.db ] || install -o ${OWNER} -g ${GROUP} -m ${DIRMODE} -d ${SITEDIR}/est.db
+		if [ -f ${SITEDIR}/est.db/_list ] ; then
+			echo -e "index had already existed before - remained untouched."
+		else
+			estcmd gather -sd ${SITEDIR}/est.db ${SITEDIR}/html
+			chown -R ${OWNER}:${GROUP} ${SITEDIR}/html
+			chmod -R u=rwX,g=rX,o-rwx ${SITEDIR}/html
+			echo -e "done.\n"
+		fi
+		# 7. add link to estseek.cgi on the top of each html page
+		echo -en "\nAdding estseek.cgi link to "
+		FILES=$(find ${SITEDIR}/html -type f -name '*.html')
+		for FILE in $FILES ; do
+			echo -n "$FILE ... "
+cat << EOF | python - $FILE $SITE $DOMAIN
+
+import re, optparse, sys
+
+parser = optparse.OptionParser()
+(opts, args) = parser.parse_args()
+
+try:
+    filename = args[0]
+    site = args[1]
+    domain = args[2]
+except IndexError:
+    print 'ERROR: 3 arguments are required (in this order): file site domain'
+    sys.exit(127)
+
+re_body = re.compile('(?P<body><\s*?body(\s[^>]*?)?>)', re.I | re.S | re.X)
+re_estseek = re.compile('id="estseek-link"')
+
+repl = '\n<div style="text-align: center;">\n' \
+		+ '<a id="estseek-link" href="http://' + site + '.' + domain + '/cgi-bin/estseek.cgi"' \
+        + ' style="display: inline; text-align: center; font-weight: bolder;"' \
+        + '>Search <span style="font-weight: bold;">' + site + '</span> with hyperestraier.</a>\n' \
+		+ '</div>'
+
+fh = open(filename)
+content = fh.read()
+fh.close()
+
+if re_estseek.search(content) != None:
+    print 'WARN: it seems that estseek.cgi link is already added - exiting ...'
+    sys.exit(0)
+
+body_search = re_body.search(content)
+if body_search != None:
+    body = body_search.group('body')
+    if body != None:
+        content = re_body.sub(body + repl, content)
+        fh = open(filename, "w")
+        fh.write(content)
+        fh.close()
+    else:
+        print 'ERROR: odd - can\'t find <body> after matching it - exiting ...'
+        sys.exit(0)
+else:
+    print 'ERROR: can\'t find <body> - exiting ...'
+    sys.exit(0)
+
+EOF
+			echo "done."
+		done
+	fi
+
+done
================================================================


More information about the pld-cvs-commit mailing list