sm64/i10n/tools/zh/zh_kanji.pl
2022-12-04 22:27:02 -05:00

275 lines
8.7 KiB
Raku

#!/usr/bin/perl -w
$GBCODE_BYTE_OFFSET = 0xa1;
$GBCODE_UNIT_LEN = 94;
$GBCODE_CHAR_SIZE = 32; #16x16 bitmap -> 32 bytes
#read from: HZK16, input txt file,
#generate: output.msg, output.idx, output.pnm file
sub process_kanji {
my ($in_db, $in_txt, $out, $idx_h) = @_;
my $cnt = 0;
my @map;
my $type = 0;
open(IN_DB, "<$in_db") or die "Can't open $in_db: $!";
open(IN_TXT, "<$in_txt") or die "Can't open $in_txt: $!";
binmode(IN_TXT);
open(OUT_MSG, ">$out.msg") or die "Can't open $out.msg: $!";
open(OUT_PNM_TMP, ">$out.pnm.tmp") or die "Can't open $out.pnm.tmp: $!";
LINE: while (<IN_TXT>) {
$type = 0;
SWITCH: {
if (/^m(\d+)/) {
printf OUT_MSG "static unsigned char str_%02d [] = {", $1;
$type = 1;
last SWITCH;
}
if (/^c(\d+)/) {
$type = 2;
printf OUT_MSG "static unsigned char course%s [] = {", $1;
last SWITCH;
}
if (/^s(\d+)/) {
$type = 3;
printf OUT_MSG "static unsigned char star_name_%s [] = {", $1;
last SWITCH;
}
if (/^w(_\w+)/) {
$type = 4;
printf OUT_MSG "static unsigned char %s [] = {", $1;
last SWITCH;
}
if (/^$/) { next LINE; }
if (/^ +$/) { next LINE; }
}
while ($type!=0) {
$c = getc(IN_TXT);
if ($c eq '*') {
<IN_TXT>;
printf OUT_MSG "0xff, 0xff, };\n\n";
next LINE;
}
SWITCH: {
if ($c =~ /^\d/) {
printf OUT_MSG "0x00, 0x%x, ", $c;
last SWITCH;
}
if ($c eq "A") {
printf OUT_MSG "0x00, 0x54, ";
last SWITCH;
}
if ($c eq "{") {
printf OUT_MSG "0x00, 0x54, ";
last SWITCH;
}
if ($c eq "B") {
printf OUT_MSG "0x00, 0x55, ";
last SWITCH;
}
if ($c eq "}") {
printf OUT_MSG "0x00, 0x55, ";
last SWITCH;
}
if ($c eq "C") {
printf OUT_MSG "0x00, 0x56, ";
last SWITCH;
}
if ($c eq "Z") {
printf OUT_MSG "0x00, 0x57, ";
last SWITCH;
}
if ($c eq "<") {
printf OUT_MSG "0x00, 0x57, ";
last SWITCH;
}
if ($c eq "R") {
printf OUT_MSG "0x00, 0x58, ";
last SWITCH;
}
if ($c eq ">") {
printf OUT_MSG "0x00, 0x58, ";
last SWITCH;
}
if ($c eq "#") {
printf OUT_MSG "0x00, 0xf5, ";
last SWITCH;
}
if ($c eq "\"") {
printf OUT_MSG "0x00, 0xf6, ";
last SWITCH;
last SWITCH;
}
if ($c eq "@") {
printf OUT_MSG "0xff, 0xe0, ";
last SWITCH;
}
if ($c eq " ") {
printf OUT_MSG "0xff, 0x9e, ";
last SWITCH;
}
if ($c eq "^") {
$c = getc(IN_TXT);
if ($c eq "[") {
printf OUT_MSG "0x00, 0x56, 0x00, 0x52, ";
}
if ($c eq "]") {
printf OUT_MSG "0x00, 0x56, 0x00, 0x53, ";
}
if ($c eq "/") {
printf OUT_MSG "0x00, 0x56, 0x00, 0x51, ";
}
if ($c eq "\\") {
printf OUT_MSG "0x00, 0x56, 0x00, 0x50, ";
}
if ($c eq ",") {
printf OUT_MSG "0x00, 0x56, 0x00, 0x6f, ";
}
if ($c eq "\"") {
printf OUT_MSG "0x00, 0x56, 0x00, 0xf6, ";
}
last SWITCH;
}
if (vec($c, 0, 8)==0x0a) {
printf OUT_MSG "0xff, 0xfe, ";
} else {
$c2 = getc(IN_TXT);
if (vec($c, 0, 8)==0x0d && vec($c2, 0, 8)==0x0a) {
printf OUT_MSG "0xff, 0xfe, ";
} else {
$idx = (vec($c,0,8)-$GBCODE_BYTE_OFFSET)*$GBCODE_UNIT_LEN + (vec($c2,0,8)-$GBCODE_BYTE_OFFSET);
if (!$map[$idx]) {
$cnt ++;
$map[$idx] = $cnt;
$skip = $idx * $GBCODE_CHAR_SIZE;
seek(IN_DB, $skip, 0);
for ($i=0; $i<$GBCODE_CHAR_SIZE; $i++) {
$byte = getc(IN_DB);
for ($j=0; $j<8; $j++) {
printf OUT_PNM_TMP "%c", (vec($byte, 0, 8) & (0x80>>$j))==0 ? 0:0xff;
}
}
}
$b1 = (($map[$idx] + 255) >> 8) & 0xff;
$b2 = ($map[$idx] + 255) & 0xff;
printf OUT_MSG "0x%x, 0x%x, ", $b1, $b2;
}
}
last SWITCH;
}
}
}
close IN_DB;
close IN_TXT;
close OUT_MSG;
close OUT_PNM_TMP;
`cp $idx_h $out.idx`;
`chmod u+w $out.idx`;
open(OUT_IDX, ">>$out.idx") or die "Can't open $out.idx: $!";
printf OUT_IDX "\n";
for ($i=0; $i<$cnt; $i++) {
printf OUT_IDX "c_%d_l_txt, c_%d_r_txt, ", $i, $i;
}
printf OUT_IDX "\n};\n";
close OUT_IDX;
open(OUT_PNM, ">$out.pnm") or die "Can't open $out.pnm: $!";
printf OUT_PNM "P5\n16 %d\n255\n", 16*$cnt;
close OUT_PNM;
`cat $out.pnm.tmp >> $out.pnm`;
`rm $out.pnm.tmp`;
return $cnt;
}
#split each chinese character into half, rotate each of the half
#clockwise 90 degrees, and flip
sub make_pnm_map {
my ($name, $height) = @_;
my $cnt = 0;
`pnmcut 15 $height 0 0 $name.pnm > $name.mod.pnm`;
while ($cnt < $height) {
`pnmcut 0 $cnt 16 16 $name.pnm > 1.pnm`;
`pnmcut 0 0 8 16 1.pnm > 11.pnm`;
`pnmcut 8 0 8 16 1.pnm > 12.pnm`;
`pnmflip -lr -r270 11.pnm > a.pnm`;
`pnmflip -lr -r270 12.pnm > b.pnm`;
`pnmcat -tb $name.mod.pnm a.pnm b.pnm > foo.pnm`;
`mv foo.pnm $name.mod.pnm`;
$cnt = $cnt + 16;
print "finished character ", $cnt/16, "\n";
}
$height = $height + 1;
`pnmcut 0 1 16 $height $name.mod.pnm > foo.pnm`;
`mv foo.pnm $name.mod.pnm`;
`rm 1.pnm 11.pnm 12.pnm a.pnm b.pnm`;
}
#generate data file of the 8x8 font glyph in SuperMario
sub gen_font_glyph {
my $name = shift;
my $cnt = 0, $idx=0;
open(INFILE, "<$name.mod.pnm") or die "Can't open $name.mod.pnm: $!";
open(OUTFILE, ">$name.glyph") or die "Can't open $name.glyph: $!";
for ($i=0; $i<3; $i++) {<INFILE>};
while(read(INFILE, $bytes, 8) != 0) {
# $b1 = 7.0 / 255.0 * vec($bytes, 0, 8);
# if ($b1!=0) { $b1 = $b1<<1 | 0x01; }
# $b2 = 7.0 / 255.0 * vec($bytes, 1, 8);
# if ($b2!=0) { $b2 = $b2<<1 | 0x01; }
# $b = $b1<<4 | $b2;
$b = 0;
for ($j=0; $j<8; $j++) {
$b1 = vec($bytes, $j, 8);
if ($b1>127.0) {$b1 = 1;} else {$b1=0};
$b = $b1<<(7-$j) | $b;
}
if ($idx%16==0) {
printf OUTFILE "static unsigned char c_%d_%s_txt[] = {\n", $cnt/2, ($cnt%2==0)?"l":"r";
$cnt ++;
}
printf OUTFILE "0x%02x, ", $b;
$idx++;
if ($idx%2==0) {
printf OUTFILE "\n";
}
if ($idx%16==0) {
printf OUTFILE "};\n";
}
}
close INFILE;
close OUTFILE;
print $cnt/2, " characters with ", 16*$cnt/1024.0, "K bytes\n";
}
die "Usage: zh_kanji.pl <Chinese GB font map> <input Chinese text file> <output file> <idx header file>\n See README for more detail.\n" unless $#ARGV==3;
print "processing input Chinese text file\n";
$out = process_kanji($ARGV[0], $ARGV[1], $ARGV[2], $ARGV[3]);
print "number of unique Chinese characters used: ", $out, "\n";
print "making pnm map with height: ", $out*16, "\n";
$h = $out*16 - 1;
make_pnm_map($ARGV[2], $h);
print "writing data file for chinese font glyph\n";
gen_font_glyph($ARGV[2]);