#!/usr/bin/perl -w use strict; # Print People I Know, public release 4 or so, 1999-10-26 # minor change, 2006-12-20: add style and favicon links # Take a list of people, formatted roughly as follows: # Here are the people I know. # # Here is another paragraph about them. # -- # sttng: Star Trek: The Next Generation # James Kirk, Captain Kirk # Captain of the original Enterprise; predecessor to Jean-Luc Picard. # # Jean-Luc Picard, Captain Picard # Captain of the Enterprise in sttng; successor to Captain Kirk. # and transform it into a set of web pages, one per person, with # hyperlinks where the pages mention one another's names. # # sttng is a macro. # The directory to create the HTML files in. my $dir = shift @ARGV; if ((not defined $dir) or not -d $dir) { die "Usage: $0 dir [inputfiles]\n"; } my @data = (); my %macros = (); my %names = (); my @names = (); my @para = (); # the real data is preceded by an initial page, followed by a line # containing just -- at the beginning of the line. my @initpage = (); my $doing_initpage = 1; # when we reach a blank line or EOF, we put the currently-being-read # name away and prepare to read a new one. sub namedone { if (@names or @para) { if (not (@names and @para)) { warn "Oops, boo-boo, line $.\n"; next; } my $data = [ [ # split on commas and chop off white space map { /^\s*(.*?)\s*$/ && $1 } split /,/, join " ", @names ], # chop off white space and join with spaces join " ", map { /^\s*(.*?)\s*$/ && $1 } @para ] ; @names = (); @para = (); for (@{$data->[0]}) { $names{$_} = $#data + 1; } push @data, $data; } # else do nothing } # main input loop. We make one pass over the file, reading it all into # memory; when we're done, we spit it all out (see below) with changes. while (<>) { chomp; next if /^#/; # ignore comment lines if ($doing_initpage) { if (/^--\s*$/) { $doing_initpage = 0; } else { push @initpage, ($_ || "

"); } } else { if (/^(\S.*?)\s*:\s*(.*?)\s*$/) { $macros{$1} = $2; } elsif (/^\s*$/) { # blank line namedone; } elsif (/^\s+/) { push @para, $_; } else { push @names, $_; } } } # in case the last name didn't have a blank line after it namedone; # convert the lines of the initpage into one string my $initpage = join " ", map { /^\s*(.*?)\s*$/ && $1 } @initpage; # pass this guy the title of your page as an argument. sub htmlbegin { qq( $_[0]

$_[0]

); } sub htmlend {"\n" } # Figure out what filenames to assign to names. my @filenames = (); my %takenfnames = ("index" => 1); for (0..$#data) { my $fname = $data[$_]->[0]->[0]; $fname =~ tr/-A-Za-z0-9/-/c; $fname =~ tr(A-Z)(a-z); while (exists $takenfnames{$fname}) { # this is not the ideal strategy if you have many people with the # same primary name, but it works for me. $fname .= "-x"; } push @filenames, $fname; } # This expands macros and makes hyperlinks to names, returning the # munged string. sub munge { my ($text) = @_; # these commented-out statements were to try to debug a problem I was # having with URLs. # my $http = 0; # do { warn "text is $text\n"; $http=1;} if $text =~ /http/; $text =~ s#(http://[^ ]*[^ ,.])#$1#g; local $_; # sort in order of descending length to keep from replacing # "Carolyn Hogg" before "Carolyn Hogg Sittler". (This doesn't # work. Carolyn Hogg gets replaced anyway, presumably later.) for (sort {(length $b) <=> (length $a)} keys %names) { $text =~ s#\b\Q$_\E\b#$_#g; } for (keys %macros) { $text =~ s/\Q$_\E/$macros{$_}/g; } $text =~ s#([-A-Za-z0-9.]*?@[-A-Za-z0-9.]*[-A-Za-z0-9])#$1#g; # warn "text became $text\n" if $http; return $text . "\n"; } my $trailer = qq(

Index of people | Search people | email Kragen | Kragen's home page\n); # Here we start to spit things out. We've read in all the names, so we # can put a list of them here, and we can expand macros in the initpage. open OUTFILE, ">$dir/index.html" or die "can't open $dir/index.html: $!\n"; print OUTFILE htmlbegin ("Index of people Kragen Sitaker knows"); print OUTFILE "

", munge ($initpage), "

\n", $trailer, htmlend; close OUTFILE; # create a text file suitable for searching open OUTFILE, ">$dir/dir.txt" or die "can't open $dir/dir.txt: $!\n"; for (sort keys %names) { print OUTFILE "$_,$filenames[$names{$_}].html,$data[$names{$_}]->[0]->[0]\n"; } close OUTFILE; # here we spit out the data from the actual names, in the order it was # in the input file. for (0..$#data) { my $fname = $filenames[$_]; open OUTFILE, ">$dir/$fname.html" or die "can't open $dir/$fname.html: $!\n"; print OUTFILE htmlbegin "$data[$_]->[0]->[0]: known by Kragen Sitaker"; print OUTFILE "

", join (", ", @{$data[$_]->[0]}), "

\n

", munge ($data[$_]->[1]), $trailer, htmlend; close OUTFILE; } # BUGS: # # Once names, macros, URLs, and email addresses are expanded, they are # vulnerable to further expansion in an unpredictable manner. This # often produces syntactically invalid HTML. # # If you have several people with the same primary name (that being the # first name in the list of their names), then adding or deleting one of # them may cause the others to get new URLs. # # If you have several people with the same primary name, you will have # long filenames: # john-doe-x-x-x-x-x-x-x-x-x-x-x-x-x-x-x-x-x-x-x-x-x.html, etc. If you # have a thousand people with the same primary name, one of them will # have a filename that is at least 2000 characters long. This could be # a problem under some circumstances. # # It has my name (Kragen Sitaker) hardcoded into it. # # It won't recognize all valid email addresses as email addresses. # # Recognizing URLs, it only recognizes http: URLs, and it requires them # to be terminated by whitespace. # # You might wish you could put a blank line in the entry for a particular # person. It won't let you do that, although you can put a

in. # There's a cheat that lets you put blank lines in the initpage. # # It produces extremely long lines, which may not be ideal. # # It won't notice filesystem-full errors. # # It will accept funky syntax; in particular, you can interleave lines # of names with lines of description about the person, and it will # happily put all the names and all the description together, when it was # probably actually meant to be two or more separate people. # # You have to create the directory for it, and if the directory already # exists, you should clean it out. # # It won't bother to warn you if it is overwriting existing files. # # Searching through a list sorted by alphabetized first names is not the # ideal user interface. I should add a grep.