#!/usr/bin/perl -w $GBCODE_BYTE_OFFSET = 0xa1; $GBCODE_UNIT_LEN = 94; $GBCODE_CHAR_SIZE = 32; #16x16 bitmap -> 32 bytes #read from: HZK16, input txt file, #generate: output.msg, output.idx, output.pnm file sub process_kanji { my ($in_db, $in_txt, $out, $idx_h) = @_; my $cnt = 0; my @map; my $type = 0; open(IN_DB, "<$in_db") or die "Can't open $in_db: $!"; open(IN_TXT, "<$in_txt") or die "Can't open $in_txt: $!"; binmode(IN_TXT); open(OUT_MSG, ">$out.msg") or die "Can't open $out.msg: $!"; open(OUT_PNM_TMP, ">$out.pnm.tmp") or die "Can't open $out.pnm.tmp: $!"; LINE: while () { $type = 0; SWITCH: { if (/^m(\d+)/) { printf OUT_MSG "static unsigned char str_%02d [] = {", $1; $type = 1; last SWITCH; } if (/^c(\d+)/) { $type = 2; printf OUT_MSG "static unsigned char course%s [] = {", $1; last SWITCH; } if (/^s(\d+)/) { $type = 3; printf OUT_MSG "static unsigned char star_name_%s [] = {", $1; last SWITCH; } if (/^w(_\w+)/) { $type = 4; printf OUT_MSG "static unsigned char %s [] = {", $1; last SWITCH; } if (/^$/) { next LINE; } if (/^ +$/) { next LINE; } } while ($type!=0) { $c = getc(IN_TXT); if ($c eq '*') { ; printf OUT_MSG "0xff, 0xff, };\n\n"; next LINE; } SWITCH: { if ($c =~ /^\d/) { printf OUT_MSG "0x00, 0x%x, ", $c; last SWITCH; } if ($c eq "A") { printf OUT_MSG "0x00, 0x54, "; last SWITCH; } if ($c eq "{") { printf OUT_MSG "0x00, 0x54, "; last SWITCH; } if ($c eq "B") { printf OUT_MSG "0x00, 0x55, "; last SWITCH; } if ($c eq "}") { printf OUT_MSG "0x00, 0x55, "; last SWITCH; } if ($c eq "C") { printf OUT_MSG "0x00, 0x56, "; last SWITCH; } if ($c eq "Z") { printf OUT_MSG "0x00, 0x57, "; last SWITCH; } if ($c eq "<") { printf OUT_MSG "0x00, 0x57, "; last SWITCH; } if ($c eq "R") { printf OUT_MSG "0x00, 0x58, "; last SWITCH; } if ($c eq ">") { printf OUT_MSG "0x00, 0x58, "; last SWITCH; } if ($c eq "#") { printf OUT_MSG "0x00, 0xf5, "; last SWITCH; } if ($c eq "\"") { printf OUT_MSG "0x00, 0xf6, "; last SWITCH; last SWITCH; } if ($c eq "@") { printf OUT_MSG "0xff, 0xe0, "; last SWITCH; } if ($c eq " ") { printf OUT_MSG "0xff, 0x9e, "; last SWITCH; } if ($c eq "^") { $c = getc(IN_TXT); if ($c eq "[") { printf OUT_MSG "0x00, 0x56, 0x00, 0x52, "; } if ($c eq "]") { printf OUT_MSG "0x00, 0x56, 0x00, 0x53, "; } if ($c eq "/") { printf OUT_MSG "0x00, 0x56, 0x00, 0x51, "; } if ($c eq "\\") { printf OUT_MSG "0x00, 0x56, 0x00, 0x50, "; } if ($c eq ",") { printf OUT_MSG "0x00, 0x56, 0x00, 0x6f, "; } if ($c eq "\"") { printf OUT_MSG "0x00, 0x56, 0x00, 0xf6, "; } last SWITCH; } if (vec($c, 0, 8)==0x0a) { printf OUT_MSG "0xff, 0xfe, "; } else { $c2 = getc(IN_TXT); if (vec($c, 0, 8)==0x0d && vec($c2, 0, 8)==0x0a) { printf OUT_MSG "0xff, 0xfe, "; } else { $idx = (vec($c,0,8)-$GBCODE_BYTE_OFFSET)*$GBCODE_UNIT_LEN + (vec($c2,0,8)-$GBCODE_BYTE_OFFSET); if (!$map[$idx]) { $cnt ++; $map[$idx] = $cnt; $skip = $idx * $GBCODE_CHAR_SIZE; seek(IN_DB, $skip, 0); for ($i=0; $i<$GBCODE_CHAR_SIZE; $i++) { $byte = getc(IN_DB); for ($j=0; $j<8; $j++) { printf OUT_PNM_TMP "%c", (vec($byte, 0, 8) & (0x80>>$j))==0 ? 0:0xff; } } } $b1 = (($map[$idx] + 255) >> 8) & 0xff; $b2 = ($map[$idx] + 255) & 0xff; printf OUT_MSG "0x%x, 0x%x, ", $b1, $b2; } } last SWITCH; } } } close IN_DB; close IN_TXT; close OUT_MSG; close OUT_PNM_TMP; `cp $idx_h $out.idx`; `chmod u+w $out.idx`; open(OUT_IDX, ">>$out.idx") or die "Can't open $out.idx: $!"; printf OUT_IDX "\n"; for ($i=0; $i<$cnt; $i++) { printf OUT_IDX "c_%d_l_txt, c_%d_r_txt, ", $i, $i; } printf OUT_IDX "\n};\n"; close OUT_IDX; open(OUT_PNM, ">$out.pnm") or die "Can't open $out.pnm: $!"; printf OUT_PNM "P5\n16 %d\n255\n", 16*$cnt; close OUT_PNM; `cat $out.pnm.tmp >> $out.pnm`; `rm $out.pnm.tmp`; return $cnt; } #split each chinese character into half, rotate each of the half #clockwise 90 degrees, and flip sub make_pnm_map { my ($name, $height) = @_; my $cnt = 0; `pnmcut 15 $height 0 0 $name.pnm > $name.mod.pnm`; while ($cnt < $height) { `pnmcut 0 $cnt 16 16 $name.pnm > 1.pnm`; `pnmcut 0 0 8 16 1.pnm > 11.pnm`; `pnmcut 8 0 8 16 1.pnm > 12.pnm`; `pnmflip -lr -r270 11.pnm > a.pnm`; `pnmflip -lr -r270 12.pnm > b.pnm`; `pnmcat -tb $name.mod.pnm a.pnm b.pnm > foo.pnm`; `mv foo.pnm $name.mod.pnm`; $cnt = $cnt + 16; print "finished character ", $cnt/16, "\n"; } $height = $height + 1; `pnmcut 0 1 16 $height $name.mod.pnm > foo.pnm`; `mv foo.pnm $name.mod.pnm`; `rm 1.pnm 11.pnm 12.pnm a.pnm b.pnm`; } #generate data file of the 8x8 font glyph in SuperMario sub gen_font_glyph { my $name = shift; my $cnt = 0, $idx=0; open(INFILE, "<$name.mod.pnm") or die "Can't open $name.mod.pnm: $!"; open(OUTFILE, ">$name.glyph") or die "Can't open $name.glyph: $!"; for ($i=0; $i<3; $i++) {}; while(read(INFILE, $bytes, 8) != 0) { # $b1 = 7.0 / 255.0 * vec($bytes, 0, 8); # if ($b1!=0) { $b1 = $b1<<1 | 0x01; } # $b2 = 7.0 / 255.0 * vec($bytes, 1, 8); # if ($b2!=0) { $b2 = $b2<<1 | 0x01; } # $b = $b1<<4 | $b2; $b = 0; for ($j=0; $j<8; $j++) { $b1 = vec($bytes, $j, 8); if ($b1>127.0) {$b1 = 1;} else {$b1=0}; $b = $b1<<(7-$j) | $b; } if ($idx%16==0) { printf OUTFILE "static unsigned char c_%d_%s_txt[] = {\n", $cnt/2, ($cnt%2==0)?"l":"r"; $cnt ++; } printf OUTFILE "0x%02x, ", $b; $idx++; if ($idx%2==0) { printf OUTFILE "\n"; } if ($idx%16==0) { printf OUTFILE "};\n"; } } close INFILE; close OUTFILE; print $cnt/2, " characters with ", 16*$cnt/1024.0, "K bytes\n"; } die "Usage: zh_kanji.pl \n See README for more detail.\n" unless $#ARGV==3; print "processing input Chinese text file\n"; $out = process_kanji($ARGV[0], $ARGV[1], $ARGV[2], $ARGV[3]); print "number of unique Chinese characters used: ", $out, "\n"; print "making pnm map with height: ", $out*16, "\n"; $h = $out*16 - 1; make_pnm_map($ARGV[2], $h); print "writing data file for chinese font glyph\n"; gen_font_glyph($ARGV[2]);