From ktl@wag.caltech.edu Mon Dec 13 14:06:07 1993
Article: 4282 of comp.infosystems.www
From: ktl@wag.caltech.edu (Kian-Tat Lim)
Newsgroups: comp.infosystems.www
Subject: Emacs info to HTML filter
Date: Wed, 8 Dec 93 03:15:25 MET
Organization: California Institute of Technology, Pasadena, CA
Message-ID: <2e3dbt$9c5@gap.cco.caltech.edu>
I had a need for converting Emacs info files (written directly, not
compiled from TeXinfo source) into HTML. I didn't see a tool out there that
would do the job, so I wrote one. It's not very sophisticated, but it gets
the job done adequately.
===============================================================================
#!/usr/bin/perl
#
# Translate Emacs info files into HTML pages, one page per node.
# Menus, Notes, and Up/Next/Prev pointers are translated into links.
# Other text is left as-is in
tags.
#
# Option: -d specifies where to put the output files.
#
# This code is in the public domain. No warranties, express or implied.
#
# Author: Kian-Tat Lim (ktl@wag.caltech.edu)
#
# $Header: /source/info2html,v 1.1 93/12/07 15:18:47 ktl Exp $
#
# Canonicalize a node name
sub canon {
local($_) = $_[0];
s/\&/_and_/g;
s|[<>/]|_|g;
s/"/'/g;
return $_;
}
# Translate HTML special characters into escape sequences
sub html {
local($_) = $_[0];
s/\&/&/g;
s/\</g;
s/\>/>/g;
return $_;
}
# Translate escape sequences back to characters
sub dehtml {
local($_) = $_[0];
s/\<//g;
s/\&/\&/g;
return $_;
}
#
# Main program
#
# Handle -d switch
require 'getopt.pl';
do Getopt('d');
chdir($opt_d) if defined($opt_d);
# Any header stuff in the info file goes to the bit bucket
open(OUT, ">/dev/null");
# Process lines
while (<>) {
# Start a new node
if (/^\037/) {
# Finish off what we were last doing
if ($menumode) {
print OUT "\n";
$menumode = 0;
}
else {
print OUT "\n";
}
# ^_^L signals the end of the useful stuff
exit if /^\037\014/;
# Get the node header line, done if none left
$_ = <>;
exit if eof;
($file, $node) = /^File:\s+(\S+)\s+Node:\s+([^,]+),/;
$cnode = &canon($node);
# Start a new file
close OUT;
open(OUT, ">$file.$cnode.html");
print OUT "$node\n";
# Split out Up, Next, Prev pointers so we can make them into links
@items = split(/,/);
shift(@items);
for (@items) {
if (/^ Up: (.*)/) {
next if $1 eq "(dir)" || $1 eq "(DIR)";
$node = &canon($1);
print OUT
"Go up to $1.\n";
}
if (/^ Next: (.*)/) {
$node = &canon($1);
print OUT
"Go forward to $1.\n";
}
if (/^ Prev: (.*)/) {
$node = &canon($1);
print OUT "Go backward to ";
print OUT
"$1.\n";
}
} # for (@items)
# Leave the rest of the text as-is
print OUT "\n";
} # if (/^\037/)
# Start a menu
elsif (/^\* Menu:/) {
print OUT "
Menu
\n";
$menumode = 1;
}
# Process a menu item by turning it into a link
elsif ($menumode && /^\*\s+([^:]*)::\s*(.*)/) {
$node = &canon($1);
$hnode = &html($1);
$text = &html($2);
print OUT "- $hnode\n";
print OUT "
- $text\n";
}
# Alternate menu style ("tag: node text"). Tab, comma, period inside brackets.
elsif ($menumode && /^\*\s+([^:]*):\s*([^ ,.]+)(.*)/) {
$node = &canon($2);
$hnode = &html($2);
$text = &html($3);
$tag = &html($1);
print OUT "
- ";
print OUT "$tag: $hnode\n";
# Trim off termination character and space, if any
$text =~ s/^[ ,.]\s*//;
print OUT "
- $text\n";
}
# Process heading text inside menus -- continuation lines are just dumped below
elsif ($menumode && /^\S/) {
print OUT "
", &html($_);
}
# Anything else. Look for notes; otherwise, just dump after handling specials.
else {
$_ = &html($_);
# Convert a note into a link
while (/\*([Nn])ote\s+([^:]*)::/) {
$node = &canon(&dehtml($2));
$see = ($1 eq "N") ? "See" : "see";
$_ = $` . "$see $2" .
$';
}
# Alternate note style
while (/\*([Nn])ote\s+([^:]*):\s*([^ ,.]+)/) {
$node = &canon(&dehtml($3));
$see = ($1 eq "N") ? "See" : "see";
$_ = $` .
"$see $2: $3" . $';
}
print OUT;
}
} # while (<>)
===============================================================================
--
Kian-Tat Lim, ktl@wag.caltech.edu [RIPEM available]
Materials & Molecular Simulation Center, Caltech
Henry Spencer left-of-| signature fan