#!/usr/bin/perl -w
# Get a word from Merriam-Webster's online dictionary and print the definition.

sub usage {
	print <<endofusage;
Usage: $0 <word> -- returns the definition of the word.
endofusage
	exit 1;
}

usage unless @ARGV == 1;

my $host = "www.m-w.com";
my $port = 80;
my $script = "/cgi-bin/netdict";

# Note that these headers do not contain the full query Netscape would post.
# Those headers are here:
#POST /cgi-bin/netdict HTTP/1.0
#Referer: http://www.m-w.com/netdict.html
#Connection: Keep-Alive
#User-Agent: Mozilla/3.0 (X11; I; Linux 2.0.30 i486)
#Host: localhost:1600
#Accept: image/gif, image/x-xbitmap, image/jpeg, image/pjpeg, */*
#Content-type: application/x-www-form-urlencoded
#Content-length: 32
#
#book=Dictionary&va=canonicalword
#12345678911234567892123456789312
#32 characters.  This needs to be calculated separately and put in the
#Content-length header for each word.

my $headers = <<headerend;
POST $script HTTP/1.0
Referer: http://www.m-w.com/netdict.htm
User-Agent: Mozilla/3.0 (X11; I; Linux 2.0.30 i486)
Host: $host:$port
Accept: image/gif, image/x-xbitmap, image/jpeg, image/pjpeg, */*
Content-type: application/x-www-form-urlencoded
headerend


# How to encode an arbitrary word safely?  RFC 1738 says:
#   Thus, only alphanumerics, the special characters "$-_.+!*'(),", and
#   reserved characters used for their reserved purposes may be used
#   unencoded within a URL.
# So [^-a-zA-Z0-9\$_.+!*'(),] should always be encoded, in this case,
# since we don't want any reserved characters (e.g. &, ;, =) in the word.
# BE CAREFUL!  You need to backslash the $ in the above pattern!

sub hexencode {
	# print STDERR "hexencoding character ($_[0])\n";
	return "%" . uc unpack "H2", $_[0];
}

sub urlencode {
	my ($word) = @_;
	$word =~ s/([^-a-zA-Z0-9\$_.+!*'(),])/hexencode $1/eg;
	return $word;
}

#book=Dictionary&va=canonicalword
sub querystring { "book=Dictionary&va=" . urlencode $_[0]; }

open NETDICT, "| nc $host $port "; # | wwwebster-postproc";
# open NETDICT, "| echo $host $port; cat";
print NETDICT $headers;

my $querystring = querystring shift;
print NETDICT "Content-length: " . length ($querystring) . "\n\n$querystring\n";
close NETDICT;
