Re: Public domain Greek text in Unicode

From: John Cowan (cowan@locke.ccil.org)
Date: Thu Oct 01 1998 - 16:05:29 EDT


Elliotte Rusty Harold wrote:

> Is anyone aware of any significant chunk (several hundred lines) of
> classical (i.e. public domain) Greek text freely vailable on the net in
> Unicode or UTF-8? I need some for an example in a book I'm working on.
> However, so far all I've been able to dig up are strange English
> transliterations.

That is TLG Beta code (I don't know why it's called that) and it's
documented at http://www.tlg.uci.edu/~tlg/BetaCode.html .

The following Perl program will process Beta code into polytoniko
UTF-8. You'll need a Unicode font that includes the Greek Additional
Extended block to view the results, though:

=====cut here=====

#!/usr/bin/perl -x
# Renders TLG Beta code (for polytonic Greek) into UTF-8 equivalents
# Note: ? is assumed to represent TONOS (monotonic accent)
while (<>) {

        s/\*A\(\/\|/\xE1\xBE\x8D/og;
        s/\*A\(\//\xE1\xBC\x8D/og;
        s/\*A\(\=\|/\xE1\xBE\x8F/og;
        s/\*A\(\=/\xE1\xBC\x8F/og;
        s/\*A\(\\\|/\xE1\xBE\x8B/og;
        s/\*A\(\\/\xE1\xBC\x8B/og;
        s/\*A\(\|/\xE1\xBE\x89/og;
        s/\*A\(/\xE1\xBC\x89/og;
        s/\*A\)\/\|/\xE1\xBE\x8C/og;
        s/\*A\)\//\xE1\xBC\x8C/og;
        s/\*A\)\=\|/\xE1\xBE\x8E/og;
        s/\*A\)\=/\xE1\xBC\x8E/og;
        s/\*A\)\\\|/\xE1\xBE\x8A/og;
        s/\*A\)\\/\xE1\xBC\x8A/og;
        s/\*A\)\|/\xE1\xBE\x88/og;
        s/\*A\)/\xE1\xBC\x88/og;
        s/\*A\//\xE1\xBE\xBB/og;
        s/\*A\?/\xCE\x86/og;
        s/\*A\\/\xE1\xBE\xBA/og;
        s/\*A\|/\xE1\xBE\xBC/og;
        s/\*A/\xCE\x91/og;
        s/\*B/\xCE\x92/og;
        s/\*C/\xCE\x9E/og;
        s/\*D/\xCE\x94/og;
        s/\*E\(\//\xE1\xBC\x9D/og;
        s/\*E\(\\/\xE1\xBC\x9B/og;
        s/\*E\(/\xE1\xBC\x99/og;
        s/\*E\)\//\xE1\xBC\x9C/og;
        s/\*E\)\\/\xE1\xBC\x9A/og;
        s/\*E\)/\xE1\xBC\x98/og;
        s/\*E\//\xE1\xBF\x89/og;
        s/\*E\?/\xCE\x88/og;
        s/\*E\\/\xE1\xBF\x88/og;
        s/\*E/\xCE\x95/og;
        s/\*F/\xCE\xA6/og;
        s/\*G/\xCE\x93/og;
        s/\*H\(\/\|/\xE1\xBE\x9D/og;
        s/\*H\(\//\xE1\xBC\xAD/og;
        s/\*H\(\=\|/\xE1\xBE\x9F/og;
        s/\*H\(\=/\xE1\xBC\xAF/og;
        s/\*H\(\\\|/\xE1\xBE\x9B/og;
        s/\*H\(\\/\xE1\xBC\xAB/og;
        s/\*H\(\|/\xE1\xBE\x99/og;
        s/\*H\(/\xE1\xBC\xA9/og;
        s/\*H\)\/\|/\xE1\xBE\x9C/og;
        s/\*H\)\//\xE1\xBC\xAC/og;
        s/\*H\)\=\|/\xE1\xBE\x9E/og;
        s/\*H\)\=/\xE1\xBC\xAE/og;
        s/\*H\)\\\|/\xE1\xBE\x9A/og;
        s/\*H\)\\/\xE1\xBC\xAA/og;
        s/\*H\)\|/\xE1\xBE\x98/og;
        s/\*H\)/\xE1\xBC\xA8/og;
        s/\*H\//\xE1\xBF\x8B/og;
        s/\*H\?/\xCE\x89/og;
        s/\*H\\/\xE1\xBF\x8A/og;
        s/\*H\|/\xE1\xBF\x8C/og;
        s/\*H/\xCE\x97/og;
        s/\*I\(\//\xE1\xBC\xBD/og;
        s/\*I\(\=/\xE1\xBC\xBF/og;
        s/\*I\(\\/\xE1\xBC\xBB/og;
        s/\*I\(/\xE1\xBC\xB9/og;
        s/\*I\)\//\xE1\xBC\xBC/og;
        s/\*I\)\=/\xE1\xBC\xBE/og;
        s/\*I\)\\/\xE1\xBC\xBA/og;
        s/\*I\)/\xE1\xBC\xB8/og;
        s/\*I\+/\xCE\xAA/og;
        s/\*I\//\xE1\xBF\x9B/og;
        s/\*I\?/\xCE\x8A/og;
        s/\*I\\/\xE1\xBF\x9A/og;
        s/\*I/\xCE\x99/og;
        s/\*K/\xCE\x9A/og;
        s/\*L/\xCE\x9B/og;
        s/\*M/\xCE\x9C/og;
        s/\*N/\xCE\x9D/og;
        s/\*O\(\//\xE1\xBD\x8D/og;
        s/\*O\(\\/\xE1\xBD\x8B/og;
        s/\*O\(/\xE1\xBD\x89/og;
        s/\*O\)\//\xE1\xBD\x8C/og;
        s/\*O\)\\/\xE1\xBD\x8A/og;
        s/\*O\)/\xE1\xBD\x88/og;
        s/\*O\//\xE1\xBF\xB9/og;
        s/\*O\?/\xCE\x8C/og;
        s/\*O\\/\xE1\xBF\xB8/og;
        s/\*O/\xCE\x9F/og;
        s/\*P/\xCE\xA0/og;
        s/\*Q/\xCE\x98/og;
        s/\*R\(/\xE1\xBF\xAC/og;
        s/\*R/\xCE\xA1/og;
        s/\*S/\xCE\xA3/og;
        s/\*T/\xCE\xA4/og;
        s/\*U\(\//\xE1\xBD\x9D/og;
        s/\*U\(\=/\xE1\xBD\x9F/og;
        s/\*U\(\\/\xE1\xBD\x9B/og;
        s/\*U\(/\xE1\xBD\x99/og;
        s/\*U\+/\xCE\xAB/og;
        s/\*U\//\xE1\xBF\xAB/og;
        s/\*U\?/\xCE\x8E/og;
        s/\*U\\/\xE1\xBF\xAA/og;
        s/\*U/\xCE\xA5/og;
        s/\*W\(\/\|/\xE1\xBE\xAD/og;
        s/\*W\(\//\xE1\xBD\xAD/og;
        s/\*W\(\=\|/\xE1\xBE\xAF/og;
        s/\*W\(\=/\xE1\xBD\xAF/og;
        s/\*W\(\\\|/\xE1\xBE\xAB/og;
        s/\*W\(\\/\xE1\xBD\xAB/og;
        s/\*W\(\|/\xE1\xBE\xA9/og;
        s/\*W\(/\xE1\xBD\xA9/og;
        s/\*W\)\/\|/\xE1\xBE\xAC/og;
        s/\*W\)\//\xE1\xBD\xAC/og;
        s/\*W\)\=\|/\xE1\xBE\xAE/og;
        s/\*W\)\=/\xE1\xBD\xAE/og;
        s/\*W\)\\\|/\xE1\xBE\xAA/og;
        s/\*W\)\\/\xE1\xBD\xAA/og;
        s/\*W\)\|/\xE1\xBE\xA8/og;
        s/\*W\)/\xE1\xBD\xA8/og;
        s/\*W\//\xE1\xBF\xBB/og;
        s/\*W\?/\xCE\x8F/og;
        s/\*W\\/\xE1\xBF\xBA/og;
        s/\*W\|/\xE1\xBF\xBC/og;
        s/\*W/\xCE\xA9/og;
        s/\*X/\xCE\xA7/og;
        s/\*Y/\xCE\xA8/og;
        s/\*Z/\xCE\x96/og;
        s/A\(\/\|/\xE1\xBE\x85/og;
        s/A\(\//\xE1\xBC\x85/og;
        s/A\(\=\|/\xE1\xBE\x87/og;
        s/A\(\=/\xE1\xBC\x87/og;
        s/A\(\\\|/\xE1\xBE\x83/og;
        s/A\(\\/\xE1\xBC\x83/og;
        s/A\(\|/\xE1\xBE\x81/og;
        s/A\(/\xE1\xBC\x81/og;
        s/A\)\/\|/\xE1\xBE\x84/og;
        s/A\)\//\xE1\xBC\x84/og;
        s/A\)\=\|/\xE1\xBE\x86/og;
        s/A\)\=/\xE1\xBC\x86/og;
        s/A\)\\\|/\xE1\xBE\x82/og;
        s/A\)\\/\xE1\xBC\x82/og;
        s/A\)\|/\xE1\xBE\x80/og;
        s/A\)/\xE1\xBC\x80/og;
        s/A\/\|/\xE1\xBE\xB4/og;
        s/A\//\xE1\xBD\xB1/og;
        s/A\=\|/\xE1\xBE\xB7/og;
        s/A\=/\xE1\xBE\xB6/og;
        s/A\?/\xCE\xAC/og;
        s/A\\\|/\xE1\xBE\xB2/og;
        s/A\\/\xE1\xBD\xB0/og;
        s/A\|/\xE1\xBE\xB3/og;
        s/A/\xCE\xB1/og;
        s/B/\xCE\xB2/og;
        s/C/\xCE\xBE/og;
        s/D/\xCE\xB4/og;
        s/E\(\//\xE1\xBC\x95/og;
        s/E\(\\/\xE1\xBC\x93/og;
        s/E\(/\xE1\xBC\x91/og;
        s/E\)\//\xE1\xBC\x94/og;
        s/E\)\\/\xE1\xBC\x92/og;
        s/E\)/\xE1\xBC\x90/og;
        s/E\//\xE1\xBD\xB3/og;
        s/E\?/\xCE\xAD/og;
        s/E\\/\xE1\xBD\xB2/og;
        s/E/\xCE\xB5/og;
        s/F/\xCF\x86/og;
        s/G/\xCE\xB3/og;
        s/H\(\/\|/\xE1\xBE\x95/og;
        s/H\(\//\xE1\xBC\xA5/og;
        s/H\(\=\|/\xE1\xBE\x97/og;
        s/H\(\=/\xE1\xBC\xA7/og;
        s/H\(\\\|/\xE1\xBE\x93/og;
        s/H\(\\/\xE1\xBC\xA3/og;
        s/H\(\|/\xE1\xBE\x91/og;
        s/H\(/\xE1\xBC\xA1/og;
        s/H\)\/\|/\xE1\xBE\x94/og;
        s/H\)\//\xE1\xBC\xA4/og;
        s/H\)\=\|/\xE1\xBE\x96/og;
        s/H\)\=/\xE1\xBC\xA6/og;
        s/H\)\\\|/\xE1\xBE\x92/og;
        s/H\)\\/\xE1\xBC\xA2/og;
        s/H\)\|/\xE1\xBE\x90/og;
        s/H\)/\xE1\xBC\xA0/og;
        s/H\/\|/\xE1\xBF\x84/og;
        s/H\//\xE1\xBD\xB5/og;
        s/H\=\|/\xE1\xBF\x87/og;
        s/H\=/\xE1\xBF\x86/og;
        s/H\?/\xCE\xAE/og;
        s/H\\\|/\xE1\xBF\x82/og;
        s/H\\/\xE1\xBD\xB4/og;
        s/H\|/\xE1\xBF\x83/og;
        s/H/\xCE\xB7/og;
        s/I\(\//\xE1\xBC\xB5/og;
        s/I\(\=/\xE1\xBC\xB7/og;
        s/I\(\\/\xE1\xBC\xB3/og;
        s/I\(/\xE1\xBC\xB1/og;
        s/I\)\//\xE1\xBC\xB4/og;
        s/I\)\=/\xE1\xBC\xB6/og;
        s/I\)\\/\xE1\xBC\xB2/og;
        s/I\)/\xE1\xBC\xB0/og;
        s/I\+\//\xE1\xBF\x93/og;
        s/I\+\=/\xE1\xBF\x97/og;
        s/I\+\?/\xCE\x90/og;
        s/I\+\\/\xE1\xBF\x92/og;
        s/I\+/\xCF\x8A/og;
        s/I\//\xE1\xBD\xB7/og;
        s/I\=/\xE1\xBF\x96/og;
        s/I\?/\xCE\xAF/og;
        s/I\\/\xE1\xBD\xB6/og;
        s/I/\xCE\xB9/og;
        s/K/\xCE\xBA/og;
        s/L/\xCE\xBB/og;
        s/M/\xCE\xBC/og;
        s/N/\xCE\xBD/og;
        s/O\(\//\xE1\xBD\x85/og;
        s/O\(\\/\xE1\xBD\x83/og;
        s/O\(/\xE1\xBD\x81/og;
        s/O\)\//\xE1\xBD\x84/og;
        s/O\)\\/\xE1\xBD\x82/og;
        s/O\)/\xE1\xBD\x80/og;
        s/O\//\xE1\xBD\xB9/og;
        s/O\?/\xCF\x8C/og;
        s/O\\/\xE1\xBD\xB8/og;
        s/O/\xCE\xBF/og;
        s/P/\xCF\x80/og;
        s/Q/\xCE\xB8/og;
        s/R\(/\xE1\xBF\xA5/og;
        s/R\)/\xE1\xBF\xA4/og;
        s/R/\xCF\x81/og;
        s/S/\xCF\x83/og;
        s/T/\xCF\x84/og;
        s/U\(\//\xE1\xBD\x95/og;
        s/U\(\=/\xE1\xBD\x97/og;
        s/U\(\\/\xE1\xBD\x93/og;
        s/U\(/\xE1\xBD\x91/og;
        s/U\)\//\xE1\xBD\x94/og;
        s/U\)\=/\xE1\xBD\x96/og;
        s/U\)\\/\xE1\xBD\x92/og;
        s/U\)/\xE1\xBD\x90/og;
        s/U\+\//\xE1\xBF\xA3/og;
        s/U\+\=/\xE1\xBF\xA7/og;
        s/U\+\?/\xCE\xB0/og;
        s/U\+\\/\xE1\xBF\xA2/og;
        s/U\+/\xCF\x8B/og;
        s/U\//\xE1\xBD\xBB/og;
        s/U\=/\xE1\xBF\xA6/og;
        s/U\?/\xCF\x8D/og;
        s/U\\/\xE1\xBD\xBA/og;
        s/U/\xCF\x85/og;
        s/W\(\/\|/\xE1\xBE\xA5/og;
        s/W\(\//\xE1\xBD\xA5/og;
        s/W\(\=\|/\xE1\xBE\xA7/og;
        s/W\(\=/\xE1\xBD\xA7/og;
        s/W\(\\\|/\xE1\xBE\xA3/og;
        s/W\(\\/\xE1\xBD\xA3/og;
        s/W\(\|/\xE1\xBE\xA1/og;
        s/W\(/\xE1\xBD\xA1/og;
        s/W\)\/\|/\xE1\xBE\xA4/og;
        s/W\)\//\xE1\xBD\xA4/og;
        s/W\)\=\|/\xE1\xBE\xA6/og;
        s/W\)\=/\xE1\xBD\xA6/og;
        s/W\)\\\|/\xE1\xBE\xA2/og;
        s/W\)\\/\xE1\xBD\xA2/og;
        s/W\)\|/\xE1\xBE\xA0/og;
        s/W\)/\xE1\xBD\xA0/og;
        s/W\/\|/\xE1\xBF\xB4/og;
        s/W\//\xE1\xBD\xBD/og;
        s/W\=\|/\xE1\xBF\xB7/og;
        s/W\=/\xE1\xBF\xB6/og;
        s/W\?/\xCF\x8E/og;
        s/W\\\|/\xE1\xBF\xB2/og;
        s/W\\/\xE1\xBD\xBC/og;
        s/W\|/\xE1\xBF\xB3/og;
        s/W/\xCF\x89/og;
        s/X/\xCF\x87/og;
        s/Y/\xCF\x88/og;
        s/Z/\xCE\xB6/og;
        print;
        }
__END__

-- 
John Cowan	http://www.ccil.org/~cowan		cowan@ccil.org
	You tollerday donsk?  N.  You tolkatiff scowegian?  Nn.
	You spigotty anglease?  Nnn.  You phonio saxo?  Nnnn.
		Clear all so!  'Tis a Jute.... (Finnegans Wake 16.5)



This archive was generated by hypermail 2.1.2 : Tue Jul 10 2001 - 17:20:42 EDT