#!/usr/bin/perl -w
use strict;
# Print People I Know, public release 4 or so, 1999-10-26
# minor change, 2006-12-20: add style and favicon links
# Take a list of people, formatted roughly as follows:

# Here are the people I know.
#
# Here is another paragraph about them.
# --
# sttng: Star Trek: The Next Generation
# James Kirk, Captain Kirk
#	Captain of the original Enterprise; predecessor to Jean-Luc Picard.
#
# Jean-Luc Picard, Captain Picard
#	Captain of the Enterprise in sttng; successor to Captain Kirk.

# and transform it into a set of web pages, one per person, with
# hyperlinks where the pages mention one another's names.
#
# sttng is a macro.

# The directory to create the HTML files in.
my $dir = shift @ARGV;
if ((not defined $dir) or not -d $dir) {
	die "Usage: $0 dir [inputfiles]\n";
}

my @data = ();
my %macros = ();
my %names = ();

my @names = ();
my @para = ();

# the real data is preceded by an initial page, followed by a line
# containing just -- at the beginning of the line.
my @initpage = ();
my $doing_initpage = 1;

# when we reach a blank line or EOF, we put the currently-being-read
# name away and prepare to read a new one.
sub namedone {
	if (@names or @para) {
		if (not (@names and @para)) {
			warn "Oops, boo-boo, line $.\n";
			next;
		}
		my $data =
			[
				[
					# split on commas and chop off white space
					map { /^\s*(.*?)\s*$/ && $1 } 
					split /,/, 
					join " ", @names
				],
				# chop off white space and join with spaces
				join " ", map { /^\s*(.*?)\s*$/ && $1 } @para
			]
		;
		@names = ();
		@para = ();
		for (@{$data->[0]}) {
			$names{$_} = $#data + 1;
		}
		push @data, $data;
	} # else do nothing
}

# main input loop.  We make one pass over the file, reading it all into
# memory; when we're done, we spit it all out (see below) with changes.
while (<>) {
	chomp;
	next if /^#/;  # ignore comment lines
	if ($doing_initpage) {
		if (/^--\s*$/) {
			$doing_initpage = 0;
		} else {
			push @initpage, ($_ || "</p><p>");
		}
	} else {
		if (/^(\S.*?)\s*:\s*(.*?)\s*$/) {
			$macros{$1} = $2;
		} elsif (/^\s*$/) {  # blank line
			namedone;
		} elsif (/^\s+/) {
			push @para, $_;
		} else {
			push @names, $_;
		}
	}
}
# in case the last name didn't have a blank line after it
namedone;

# convert the lines of the initpage into one string
my $initpage = join " ", map { /^\s*(.*?)\s*$/ && $1 } @initpage;

# pass this guy the title of your page as an argument.
sub htmlbegin {
	qq(<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN"
  "http://www.w3.org/TR/REC-html40/loose.dtd">
<html><head><title>$_[0]</title>
<link rel="stylesheet" href="http://pobox.com/~kragen/style.css" />
<link rel="shortcut icon" href="http://pobox.com/~kragen/kragen-icon-small.ico" />
</head><body bgcolor="white">
<h1>$_[0]</h1>
);
}

sub htmlend {"</body></html>\n" }

# Figure out what filenames to assign to names.
my @filenames = ();
my %takenfnames = ("index" => 1);
for (0..$#data) {
	my $fname = $data[$_]->[0]->[0];
	$fname =~ tr/-A-Za-z0-9/-/c;
	$fname =~ tr(A-Z)(a-z);
	while (exists $takenfnames{$fname}) {
		# this is not the ideal strategy if you have many people with the
		# same primary name, but it works for me.
		$fname .= "-x";
	}
	push @filenames, $fname;
}

# This expands macros and makes hyperlinks to names, returning the
# munged string.
sub munge {
	my ($text) = @_;
	# these commented-out statements were to try to debug a problem I was
	# having with URLs.
	# my $http = 0;
	# do { warn "text is $text\n"; $http=1;}  if $text =~ /http/;
	$text =~ s#(http://[^ ]*[^ ,.])#<a href="$1">$1</a>#g;
	local $_;
	# sort in order of descending length to keep from replacing 
	# "Carolyn Hogg" before "Carolyn Hogg Sittler".  (This doesn't
	# work.  Carolyn Hogg gets replaced anyway, presumably later.)
	for (sort {(length $b) <=> (length $a)} keys %names) {
		$text =~ s#\b\Q$_\E\b#<a href="$filenames[$names{$_}].html">$_</a>#g;
	}
	for (keys %macros) {
		$text =~ s/\Q$_\E/$macros{$_}/g;
	}
	$text =~ s#([-A-Za-z0-9.]*?@[-A-Za-z0-9.]*[-A-Za-z0-9])#<a href="mailto:$1">$1</a>#g;
	# warn "text became $text\n" if $http;
	return $text . "\n";
}

my $trailer = qq(<p><a href="index.html">Index of people</a> | <a
href="search.cgi">Search people</a> | <a 
href="mailto:kragen\@pobox.com">email Kragen</a> | <a 
href="http://www.pobox.com/~kragen/">Kragen's home page</a>\n);

# Here we start to spit things out.  We've read in all the names, so we
# can put a list of them here, and we can expand macros in the initpage.
open OUTFILE, ">$dir/index.html" or die "can't open $dir/index.html: $!\n";
print OUTFILE htmlbegin ("Index of people Kragen Sitaker knows");
print OUTFILE "<p>", munge ($initpage), "</p><ul>\n";
for (sort keys %names) {
	print OUTFILE "<li>", (munge $_), "</li>";
}
print OUTFILE "</ul>\n", $trailer, htmlend;
close OUTFILE;

# create a text file suitable for searching
open OUTFILE, ">$dir/dir.txt" or die "can't open $dir/dir.txt: $!\n";
for (sort keys %names) {
	print OUTFILE "$_,$filenames[$names{$_}].html,$data[$names{$_}]->[0]->[0]\n";
}
close OUTFILE;

# here we spit out the data from the actual names, in the order it was
# in the input file.
for (0..$#data) {
	my $fname = $filenames[$_];
	open OUTFILE, ">$dir/$fname.html" or die "can't open $dir/$fname.html: $!\n";
	print OUTFILE htmlbegin "$data[$_]->[0]->[0]: known by Kragen Sitaker";
	print OUTFILE "<h2>", join (", ", @{$data[$_]->[0]}),
		"</h2>\n<p>", munge ($data[$_]->[1]), $trailer, htmlend;
	close OUTFILE;
}

# BUGS: 
# 
# Once names, macros, URLs, and email addresses are expanded, they are
# vulnerable to further expansion in an unpredictable manner.  This
# often produces syntactically invalid HTML.
# 
# If you have several people with the same primary name (that being the
# first name in the list of their names), then adding or deleting one of
# them may cause the others to get new URLs.
#
# If you have several people with the same primary name, you will have
# long filenames:
# john-doe-x-x-x-x-x-x-x-x-x-x-x-x-x-x-x-x-x-x-x-x-x.html, etc.  If you
# have a thousand people with the same primary name, one of them will
# have a filename that is at least 2000 characters long.  This could be
# a problem under some circumstances.
#
# It has my name (Kragen Sitaker) hardcoded into it.
#
# It won't recognize all valid email addresses as email addresses.
# 
# Recognizing URLs, it only recognizes http: URLs, and it requires them
# to be terminated by whitespace.
#
# You might wish you could put a blank line in the entry for a particular
# person.  It won't let you do that, although you can put a <p> in.
# There's a cheat that lets you put blank lines in the initpage.
# 
# It produces extremely long lines, which may not be ideal.
# 
# It won't notice filesystem-full errors.
# 
# It will accept funky syntax; in particular, you can interleave lines
# of names with lines of description about the person, and it will
# happily put all the names and all the description together, when it was
# probably actually meant to be two or more separate people.
# 
# You have to create the directory for it, and if the directory already
# exists, you should clean it out.
# 
# It won't bother to warn you if it is overwriting existing files.
# 
# Searching through a list sorted by alphabetized first names is not the
# ideal user interface.  I should add a grep.
