Re: X11 unicode fonts using Java

From: Jungshik Shin (jshin@pantheon.yale.edu)
Date: Tue Apr 11 2000 - 17:08:19 EDT


On Mon, 10 Apr 2000, Viranga Ratnaike wrote:

> Hi,
> a colleague of mine wants to render unicode using Java in X11.
>
> (1) How do we render a unicode font using Java in X11?
> Even an indication, of how difficult it is, would be helpful.

  It used to be pretty easy before JDK 2. Take a look
at font.properties.* files in your JDK and it should be
obvious. (especially, font.properties.ko_KR_UTF8.5.7 included in Solaris
version of JDK should give you a good idea).

  By writting an appropriate Java class for a conversion between
Unicode(char in Java) and glyph-index (attached below is a converter
for Unicode to glyph-indices of HangulJohab fonts as used by Korean
Hanterm, CharToByteX11Johab) and listing it in font.properties file of
your locale(see below), you can make Java render a string of Java char
type with any fonts of known glyph-index.

serif.0=-monotype-times new roman-regular-r---*-%d-*-*-p-*-iso8859-1
serif.1=-kaist-myeongjo-medium-r-normal--*-%d-*-*-c-*-johabsh

fontcharset.serif.0=sun.io.CharToByteISO8859_1
fontcharset.serif.1=sun.io.CharToByteX11Johab

  Exactly the same technique can be applied to rendering of
Thai and Indic scripts without writing a single code in Java
(except for Unicode 'string' to glyph-indices converter).

  If you have all inclusive Unicode fonts, you can just use
identity map, but there's no such font even in theory because Thai,Indic
scripts and Hangul(and others) need complex character(s) to glyph
indices conversion.Therefore, you have to break the whole
Unicode repertoire to multiple ranges each of which is
covered by separate font (possibly with complex characters to
glyh indices mapping).

  Basically, the same thing is possible with JDK2, but at leat
Linux incarnation of it doesn't use X11 fonts offered by X
server. Instead, it looks for TrueType,Type1,CID-keyed fonts in
pre-configured places so that it's not possible any more to use X11
bitmap fonts or truetype fonts with multiple styles in a single ttf file.

   Jungshik Shin

/*
 * @(#)CharToByteX11Johab.java 1.0 98/05/03
 *
 * Purposes:
 * 1. Enable displaying all 11,172 Modern hangul syllables with Hanterm
 * johab fonts on Unix
 * 2. Enable displaying some of Unicode 2.0 ancient hangul syllables
 * with Hanterm johab fonts on Unix
 * 3. Enable displaying all of Unicode 2.0 ancient hangul syllables with
 * possible future extended Hanterm johab fonts on Unix
 *
 * Installation Instructions:
 * 1. Install Hanterm Johab fonts and a proper font property file to Unix system.
 * (Refer to http://calab.kaist.ac.kr/~dtkim/java/ )
 * 2. Make a directory "jdk1.x.x/classes/"
 * 3. Compile this class into "jdk1.x.x/classes/"
 *
 * Author: Deogtae Kim <dtkim@calab.kaist.ac.kr>, 98/05/03
 *
 * Based on: Hanterm source code adapted by Jungshik Shin <jshin@pantheon.yale.edu>
 */

import sun.io.CharToByteConverter;
import sun.io.MalformedInputException;
import sun.io.UnknownCharacterException;
import sun.io.ConversionBufferFullException;

public class CharToByteX11Johab extends CharToByteConverter
{
    int state = START;

    public static final int START = 0;
    public static final int LEADING_CONSONANT = 1;
    public static final int VOWEL = 2;

    int l = 0x5f; // leading consonant
    int v = 0; // vowel
    int t = 0; // trailing consonant

    /*
     * This method indicates the charset name for this font.
     */
    public String getCharacterEncoding()
    {
        return "X11Johab";
    }

    /*
     * This method indicates the range this font covers.
     */
    public boolean canConvert(char ch)
    {
        if ( 0xac00 <= ch && ch <= 0xd7a3 // Modern hangul syllables
             || 0x1100 <= ch && ch <= 0x1112 // modern leading consonants (19)
             || 0x1113 <= ch && ch <= 0x1159 // ancient leading consonants (71)
                && lconBase[ch-0x1100] != 0
             || ch == 0x115f // leading consonants filler
             || 0x1160 <= ch && ch <= 0x1175 // modern vowels (21)
             || 0x1176 <= ch && ch <= 0x11a2 // ancient vowels (45)
                && vowBase[ch-0x1160] != 0
             || 0x11a8 <= ch && ch <= 0x11c2 // modern trailing consonants (27)
             || 0x11c3 <= ch && ch <= 0x11f9 // ancient trailing consonants (55)
                && tconBase[ch-0x11a7] != 0 )
            return true;
        return false;
    }

    /*
     * This method converts the unicode to this font index.
     * Note: ConversionBufferFullException is not handled
     * since this class is only used for character display.
     */
    public int convert(char[] input, int inStart, int inEnd,
                       byte[] output, int outStart, int outEnd)
        throws MalformedInputException,
               UnknownCharacterException
    {
        charOff = inStart;
        byteOff = outStart;

        for (; charOff < inEnd; charOff++)
        {
            char ch = input[charOff];
            if (0xac00 <= ch && ch <= 0xd7a3)
            {
                if ( state != START )
                    composeHangul(output);
                ch -= 0xac00;
                l = (ch / 588); // 588 = 21*28
                v = ( ch / 28 ) % 21 + 1;
                t = ch % 28;
                composeHangul(output);
            } else if (0x1100 <= ch && ch <= 0x115f)
            { // leading consonants (19 + 71 + 1)
                if ( state != START )
                    composeHangul(output);
                l = ch - 0x1100;
                state = LEADING_CONSONANT;
            } else if (1160 <= ch && ch <= 0x11a2)
            { // vowels (1 + 21 + 45)
                v = ch - 0x1160;
                state = VOWEL;
            } else if (0x11a8 <= ch && ch <= 0x11f9)
            { // modern trailing consonants (27)
                t = ch - 0x11a7;
                composeHangul(output);
            } else
            {
                throw new UnknownCharacterException();
            }
        }

        if ( state != START )
            composeHangul( output );

        return byteOff - outStart;
    }

    public int flush(byte output[], int i, int j)
        throws MalformedInputException
    {
        byteOff = 0;
        int len = 0;
        if ( state != START )
        {
            composeHangul( output );
            len = byteOff;
        }
        byteOff = charOff = 0;
        return len;
    }

    public void reset()
    {
        byteOff = charOff = 0;
        state = START;
        l = 0x5f;
        v = t = 0;
    }

    public int getMaxBytesPerChar()
    {
        return 6;
    }

    // The base font index for leading consonants

    static final short[] lconBase = {
        // modern leading consonants (19)
        1, 11, 21, 31, 41, 51,
        61, 71, 81, 91, 101, 111,
        121, 131, 141, 151, 161, 171,
        181,

        // ancient leading consonants (71 + reserved 5 + filler 1)
        0, 0, 0, 0, 0, 0, // \u1113 ~ :
        0, 0, 0, 0, 0, 201, // \u1119 ~ :
        0, 221, 251, 0, 0, 0, // \u111f ~ :
        0, 0, 281, 0, 0, 0, // \u1125 ~ :
        191, 0, 211, 0, 231, 0, // \u112b ~ :
        0, 241, 0, 0, 0, 291, // \u1131 ~ :
        0, 0, 0, 0, 0, 0, // \u1137 ~ :
        0, 0, 0, 261, 0, 0, // \u113d ~ :
        0, 0, 0, 0, 0, 0, // \u1143 ~ :
        0, 0, 0, 271, 0, 0, // \u1149 ~ :
        0, 0, 0, 0, 0, 0, // \u114f ~ :
        0, 0, 0, 0, 301, // \u1155 ~ :
        0, 0, 0, 0, 0, // \u115a ~ : reserved
        0, // \u115f : leading consonant filler
    };

    // The base font index for vowels

    static final short[] vowBase = {
        // modern vowels (filler + 21)
        0,311,314,317,320,323, // (Fill), A, AE, YA, YAE, EO
        326,329,332,335,339,343, // E, YEO, YE, O, WA, WAE
        347,351,355,358,361,364, // OI, YO, U, WEO, WE, WI
        367,370,374,378, // YU, EU, UI, I

        // ancient vowels (45)
        0, 0, 0, 0, 0, 0, // \u1176 ~ : A-O, A-U, YA-O, YA-YO, EO-O, EO-U
        0, 0, 0, 0, 0, 0, // \u117c ~ : EO-EU, YEO-O, YEO-U, O-EO, O-E, O-YE
        0, 0, 381, 384, 0, 0, // \u1182 ~ : O-O, O-U, YO-YA, YO-YAE, YO-YEO, YO-O
        387, 0, 0, 0, 0, 0, // \u1188 ~ : YO-I, U-A, U-AE, U-EO-EU, U-YE, U-U
        0, 0, 0, 390, 393, 0, // \u118e ~ : YU-A, YU-EO, YU-E, YU-YEO, YU-YE, YU-U
        396, 0, 0, 0, 0, 0, // \u1194 ~ : YU-I, EU-U, EU-EU, YI-U, I-A, I-YA
        0, 0, 0, 0, 399, 0, // \u119a ~ : I-O, I-U, I-EU, I-ARAEA, ARAEA, ARAEA-EO
        0, 402, 0 // \u11a0 ~ : ARAEA-U, ARAEA-I,SSANGARAEA
    };

    // The base font index for trailing consonants

    static final short[] tconBase = {
        // modern trailing consonants (filler + 27)
        0,
        405, 409, 413, 417, 421,
        425, 429, 433, 437, 441,
        445, 459, 453, 457, 461,
        465, 469, 473, 477, 481,
        485, 489, 493, 497, 501,
        505, 509,

        // ancient trailing consonants (55)
        0, 0, 0, 0, 0, 0, // \u11c3 ~ :
        0, 0, 0, 0, 0, 0, // \u11c9 ~ :
        0, 0, 0, 0, 0, 0, // \u11cf ~ :
        0, 0, 0, 0, 513, 517, // \u11d5 ~ :
        0, 0, 0, 0, 0, 0, // \u11db ~ :
        0, 0, 0, 0, 0, 0, // \u11e1 ~ :
        0, 0, 0, 0, 0, 0, // \u11e7 ~ :
        0, 0, 0, 525, 0, 0, // \u11ed ~ :
        0, 0, 0, 0, 0, 0, // \u11f3 ~ :
        521 // \u11f9:
    };

    // The mapping from vowels to leading consonant type
    // in absence of trailing consonant

    static final short[] lconMap1 = {
        0,0,0,0,0,0, // (Fill), A, AE, YA, YAE, EO
        0,0,0,1,3,3, // E, YEO, YE, O, WA, WAE
        3,1,2,4,4,4, // OI, YO, U, WEO, WE, WI
        2,1,3,0, // YU, EU, UI, I

        // ancient vowels (45)
        3, 4, 3, 3, 3, 4, // \u1176 ~ : A-O, A-U, YA-O, YA-YO, EO-O, EO-U
        4, 3, 4, 3, 3, 3, // \u117c ~ : EO-EU, YEO-O, YEO-U, O-EO, O-E, O-YE
        1, 1, 3, 3, 3, 1, // \u1182 ~ : O-O, O-U, YO-YA, YO-YAE, YO-YEO, YO-O
        3, 4, 4, 4, 4, 2, // \u1188 ~ : YO-I, U-A, U-AE, U-EO-EU, U-YE, U-U
        3, 3, 3, 3, 3, 2, // \u118e ~ : YU-A, YU-EO, YU-E, YU-YEO, YU-YE, YU-U
        4, 2, 2, 4, 0, 0, // \u1194 ~ : YU-I, EU-U, EU-EU, YI-U, I-A, I-YA
        3, 4, 3, 0, 1, 3, // \u119a ~ : I-O, I-U, I-EU, I-ARAEA, ARAEA, ARAEA-EO
        2, 3, 1 // \u11a0 ~ : ARAEA-U, ARAEA-I, SSANGARAEA
    };

    // The mapping from vowels to leading consonant type
    // in presence of trailing consonant

    static final short[] lconMap2 = {
        5,5,5,5,5,5, // (Fill), A, AE, YA, YAE, EO
        5,5,5,6,8,8, // E, YEO, YE, O, WA, WAE
        8,6,7,9,9,9, // OI, YO, U, WEO, WE, WI
        7,6,8,5, // YU, EU, UI, I

        // ancient vowels (45)
        8, 9, 8, 8, 8, 9, // \u1176 ~ : A-O, A-U, YA-O, YA-YO, EO-O, EO-U
        9, 8, 9, 8, 8, 8, // \u117c ~ : EO-EU, YEO-O, YEO-U, O-EO, O-E, O-YE
        6, 6, 8, 8, 8, 6, // \u1182 ~ : O-O, O-U, YO-YA, YO-YAE, YO-YEO, YO-O
        8, 9, 9, 9, 9, 7, // \u1188 ~ : YO-I, U-A, U-AE, U-EO-EU, U-YE, U-U
        8, 8, 8, 8, 8, 7, // \u118e ~ : YU-A, YU-EO, YU-E, YU-YEO, YU-YE, YU-U
        9, 7, 7, 9, 5, 5, // \u1194 ~ : YU-I, EU-U, EU-EU, YI-U, I-A, I-YA
        8, 9, 8, 5, 6, 8, // \u119a ~ : I-O, I-U, I-EU, I-ARAEA, ARAEA, ARAEA-EO
        7, 8, 6 // \u11a0 ~ : ARAEA-U, ARAEA-I, SSANGARAEA
    };

    // vowel type ; 1 = o and its alikes, 0 = others
    static final short[] vowType = {
        0,0,0,0,0,0,
        0,0,0,1,1,1,
        1,1,0,0,0,0,
        0,1,1,0,

        // ancient vowels (45)
        1, 0, 1, 1, 1, 0, // \u1176 ~ : A-O, A-U, YA-O, YA-YO, EO-O, EO-U
        0, 1, 0, 1, 1, 1, // \u117c ~ : EO-EU, YEO-O, YEO-U, O-EO, O-E, O-YE
        1, 1, 0, 0, 0, 0, // \u1182 ~ : O-O, O-U, YO-YA, YO-YAE, YO-YEO, YO-O
        0, 0, 0, 0, 0, 0, // \u1188 ~ : YO-I, U-A, U-AE, U-EO-EU, U-YE, U-U
        0, 0, 0, 0, 0, 0, // \u118e ~ : YU-A, YU-EO, YU-E, YU-YEO, YU-YE, YU-U
        0, 0, 0, 0, 0, 0, // \u1194 ~ : YU-I, EU-U, EU-EU, YI-U, I-A, I-YA
        0, 0, 0, 0, 0, 0, // \u119a ~ : I-O, I-U, I-EU, I-ARAEA, ARAEA, ARAEA-EO
        0, 0, 0 // \u11a0 ~ : ARAEA-U, ARAEA-I, SSANGARAEA
    };

    // The mapping from trailing consonants to vowel type

    static final int[] tconType = {
        0, 1, 1, 1, 2, 1,
        1, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 1, 1,
        1, 1, 1, 1,

        // ancient trailing consonants (55)
        1, 1, 1, 1, 1, 1, // \u11c3 ~ :
        1, 1, 1, 1, 1, 1, // \u11c9 ~ :
        1, 1, 1, 1, 1, 1, // \u11cf ~ :
        1, 1, 1, 1, 1, 1, // \u11d5 ~ :
        1, 1, 1, 1, 1, 1, // \u11db ~ :
        1, 1, 1, 1, 1, 1, // \u11e1 ~ :
        1, 1, 1, 1, 1, 1, // \u11e7 ~ :
        1, 1, 1, 1, 1, 1, // \u11ed ~ :
        1, 1, 1, 1, 1, 1, // \u11f3 ~ :
        1 // \u11f9:
    };

    // The mapping from vowels to trailing consonant type

    static final int[] tconMap = {
        0, 0, 2, 0, 2, 1, // (Fill), A, AE, YA, YAE, EO
        2, 1, 2, 3, 0, 0, // E, YEO, YE, O, WA, WAE
        0, 3, 3, 1, 1, 1, // OI, YO, U, WEO, WE, WI
        3, 3, 0, 1, // YU, EU, UI, I

        // ancient vowels (45)
        3, 3, 3, 3, 3, 3, // \u1176 ~ : A-O, A-U, YA-O, YA-YO, EO-O, EO-U
        3, 3, 3, 1, 0, 0, // \u117c ~ : EO-EU, YEO-O, YEO-U, O-EO, O-E, O-YE
        3, 3, 3, 1, 0, 3, // \u1182 ~ : O-O, O-U, YO-YA, YO-YAE, YO-YEO, YO-O
        0, 0, 0, 0, 0, 3, // \u1188 ~ : YO-I, U-A, U-AE, U-EO-EU, U-YE, U-U
        0, 1, 1, 1, 1, 3, // \u118e ~ : YU-A, YU-EO, YU-E, YU-YEO, YU-YE, YU-U
        1, 3, 3, 3, 2, 2, // \u1194 ~ : YU-I, EU-U, EU-EU, YI-U, I-A, I-YA
        3, 3, 3, 1, 3, 0, // \u119a ~ : I-O, I-U, I-EU, I-ARAEA, ARAEA, ARAEA-EO
        3, 2, 3 // \u11a0 ~ : ARAEA-U, ARAEA-I, SSANGARAEA
    };

    void composeHangul(byte[] output)
    {
        int ind;

        if ( lconBase[l] != 0 )
        { // non-filler and supported by Hanterm Johab fonts
            ind = lconBase[l] + ( t > 0 ? lconMap2[v] : lconMap1[v] );
            output[byteOff++] = (byte) (ind / 256);
            output[byteOff++] = (byte) (ind % 256);
        }

        if ( vowBase[v] != 0 )
        { // non-filler and supported by Hanterm Johab fonts
            ind = vowBase[v];
            if ( vowType[v] == 1)
            { //'o' and alikes
                // GIYEOK and KIEUK got special treatment
                ind += ( (l == 0 || l == 15) ? 0 : 1)
                       + (t > 0 ? 2 : 0 );
            }
            else
            {
                ind += tconType[t];
            }

            output[byteOff++] = (byte) (ind / 256);
            output[byteOff++] = (byte) (ind % 256);
        }

        if ( tconBase[t] != 0 )
        { // non-filler and supported by Hanterm Johab fonts
            ind = tconBase[t] + tconMap[v];
            output[byteOff++] = (byte) (ind / 256);
            output[byteOff++] = (byte) (ind % 256);
        } else
        {
            output[byteOff++] = (byte) 0;
            output[byteOff++] = (byte) 0;
        }

        state = START;
        l = 0x5f;
        v = t = 0;
    }
}
  



This archive was generated by hypermail 2.1.2 : Tue Jul 10 2001 - 17:21:01 EDT