Improved Web Publishing

After the changes in necessary to support full XHTML files, including those produced from Docbook source, I decided the put the web source pages under CVS. This necessitated more modifications to the web publishing makefiles, originally described in Web Publishing using make.

The latest state of the makefiles, and the python script create_html.py is shown below:

Main makefile

# Makefile to process and publish hydrus web contents
#
#
# make compile - create pages by placing page contents into the standard
#                page template. 
#
# make publish - copy processed pages to web directory, and cgi-bin scripts
#
#
# MODIFICATION HISTORY
# Mnemonic      Date    Rel Who
# www-publish   040615  1.0 mpw
#   Written.
# www-publish   050103  1.1 mpw
#   Copy contents of cgi-bin directory under ${ROOT} to ${HTTPD-ROOT}
#   when publishing
# www-publish   050107  1.2 mpw
#   Added distclean option to clean contents of live www directories data
#   and cgi-bin
# www-publish   050127  1.3 mpw
#   Supporting linking of html files within multiple directories
# www-publish   050702  1.4 mpw
#   Remove any .bak files left by ispell
# www-publish   050909  1.5 mpw
#   Revised to handle Docbook output in XHTML 
# www-publish   051001  1.6 mpw
#   Support for CVS project (i.e. ignore cvs directories)
# ww-publish    051101  1.7 mpw
#   Ensure that make compile fails immediately on error
# ww-publish    051101  1.8 mpw
#   Replace explict use of gmake with ${MAKE}

ROOT-DIR=${PWD}
SRC := ${ROOT-DIR}/src
OBJ := ${HOME}/public_html
CGI-BIN-DIR := ${ROOT-DIR}/cgi-bin
LINK-DIRS := journal articles
HTTPD-ROOT := /home/www
PUB-DIR := ${HTTPD-ROOT}/data
TEMPLATE-NORMAL := page-template.html
CATALOG=${HOME}/dtd/CATALOG

.PHONY: compile publish clean template link cgi-bin

compile:    link
    cp ${TEMPLATE-NORMAL} ${OBJ}    
    chmod 644 ${OBJ}/${TEMPLATE-NORMAL}
    find ${SRC} -type f -name "*.bak" -exec rm {} \;
    DIRLIST=`find ${SRC} -name "CVS" -prune -or -type d -print` ; \
    for dir in $${DIRLIST} ; do \
        ${MAKE} ${MKFLAGS} -C $${dir} -f ${CURDIR}/compile.mk \
            SRC=${SRC} \
            OBJ=${OBJ} \
            CATALOG=${CATALOG} \
            TEMPLATE-NORMAL=${CURDIR}/${TEMPLATE-NORMAL} \
            ROOT=${CURDIR} || exit 1; \
    done

publish: cgi-bin
    find ${OBJ} -type d -exec ${MAKE} ${MKFLAGS} -C {} \
        -f ${CURDIR}/publish.mk \
        OBJ=${OBJ} ROOT=${CURDIR} PUB-DIR=${PUB-DIR} \;

# update internal links in directories which contain linked html files
link:
    for dir in ${LINK-DIRS}; do \
        cd ${SRC}/$${dir}; \
        newlink.py $${dir}; \
    done

clean:
    rm -rf ${OBJ}/*

distclean: clean
    rm -rf ${HTTPD-ROOT}/cgi-bin/*
    rm -rf ${PUB-DIR}/*

# update cgi-bin binaries if required
cgi-bin:
    find $@ -name "CVS" -prune -or -type d \
        -exec ${MAKE} ${MKFLAGS} -C {} -f ${CURDIR}/publish.mk \
        OBJ=${OBJ} ROOT=${CURDIR} PUB-DIR=${HTTPD-ROOT} \;

compile.mk

# Makefile for constructing publishable html file from source html
# files and a template.  The variable TEMPLATE-NORMAL
# should be passed as an argument to the make
# directive.  All files are dependent on the template.
#
# MODIFICATION HISTORY
# Mnemonic      Date    Rel Who
# www-publish   040615  1.0 mpw
#   Written.
# www-publish   050904  1.1 mpw
#   Add checking for valid XHTML (via chk_xhtml.py)
# www-publish   050909  1.2 mpw
#   Revised to handle XHTML conformant Docbook output
# www-publish   051001  1.3 mpw
#   SRC and OBJ dirctories passed on make invokation line
# www-publish   051101  1.4 mpw
#   remove created html file on error (ensures rebuild on next run)
#

# set target directory
TD := ${subst ${SRC},${OBJ},${CURDIR}}

# define pattern rule for producing .html files in the target
# directory.
${TD}/%.html : %.html
    ${ROOT}/create-html.py ${TEMPLATE-NORMAL} $< $@
    chk_xml.py -f ${CATALOG} $@ || (rm $@; exit 1)

# pattern rule to make non-html targets (images, support files, etc)
# note we ignore directories
${TD}/% : %
    if [ ! -d $< ]; then \
        cp $< $@; \
    fi

# define list of targets (based on list of .html files in current directory)
OBJS := ${patsubst %,${TD}/%,${wildcard *.html}}

# define list of non-html targets
OTHER := ${patsubst %,${TD}/%,${filter-out %.html,${wildcard *}}}

all: ${TD} ${OBJS} ${OTHER}

${OBJS}:    ${TEMPLATE-NORMAL}

# make target directory if necessary
${TD}:
    mkdir -p ${TD}

publish.mk

# Makefile for publishing html files from processed html pages
#
# MODIFICATION HISTORY
# Mnemonic      Date    Rel Who
# www-publish   040615  1.0 mpw
#   Written.
# www-publish   051001  1.1 mpw
#   OBJ dir passed in on make command line
#

# set target directory
# note, OBJ, PUB-DIR and ROOT are passed on invokation line
TD := ${subst ${OBJ},${PUB-DIR},${CURDIR}}

# pattern rule to make all targets (directories are ignored)
${TD}/% : %
    if [ ! -d $< ]; then \
        cp $< $@ ; \
        chmod 644 $@ ; \
    fi

# define list of  targets (that's everything)
OBJS := ${patsubst %,${TD}/%,${wildcard *}}

all: ${TD} ${OBJS} 

# make target directory if necessary
${TD}:
    mkdir -p ${TD}

create_html.py

#!/usr/local/bin/python
"""
NAME
    create-html.py - wraps HTML page contents with HTML page template

SYNOPSYS
    create-html.py template_file source_page_contents output_page

DESCRIPTION
    create-html.py will insert the contents of an HTML page into a supplied
    page template, outputing the results as a final HTML page.

    The title of the resulting page is determined by the first <h1>
    or <h2> header encountered in the page contents.

MODIFICATION HISTORY
Mnemonic       Rel    Date   Who
create-html    1.0    040614 mpw
    Written.
create-html    1.1    050909 mpw
    Handle source with existing <body> (Docbook generated)
    
"""
import sys
import re

default_title = "hydrus.org.uk"

template_file = sys.argv[1]
html_in_file = sys.argv[2]
html_out_file = sys.argv[3]

template = open(template_file).read()
html_in = open(html_in_file).read()
html_out = open(html_out_file,mode="w")

# attempt to modify title to reflect page contents
re_title = re.compile(r'<title>.*?</title>')
re_header = re.compile(r'<h[12]>(.*)</h[12]>')
re_body = re.compile(r'<body>(.*)</body>',re.DOTALL)

match = re_body.search(html_in)
if match != None:
    # page already has body; insert menu template 
    page = match.group(1)
    match = re_body.search(template)
    if match == None:
        print >>sys.stderr,"%s: missing <body> in template file: %s" % \
              (sys.argv[0],template_file)
        sys.exit(1)
    menupage = match.group(1)
    page = menupage.replace("<!-- page contents go here -->",page)
    re_body.search(html_in)
    content = re_body.sub("<body>"+page+"</body>",html_in)
else:
    # needs full page template
    match = re_header.search(html_in)
    if match != None:
        header = match.group(1)
        page_title = "<title>"+default_title+" - "+header+"</title>"
    else:
        page_title = "<title>"+default_title+"</title>"
    if re_title.search(template):
        template = re_title.sub(page_title,template)
    content = template.replace("<!-- page contents go here -->",html_in)

html_out.write(content)

$Id: webpublish1.html,v 1.3 2023/03/27 08:07:33 mark Exp $