#!/usr/bin/perl -w

## This script takes output from java CUP and translates it into
## a polyj-friendly form.  This means two things: first, it
## works around the fact that the polyj parser doesn't like
## methodCall(new String[] { "..." }); second, it translates the
## String literals containing octal escapes into Strings with
## only Unicode (\u and four hexadecimal digits) escapes.

$curchar = 'a';
$instring = 0;
$line1 = $line2 = "";

sub unicodify {
    my $line = shift(@_);
    @parts = split(/\"/, $line);
    ## split on \, then remove the first (empty) string
    @nums = split(/\\/, $parts[1]);
    shift @nums;
    @newnums = {};
    $i = 0;
    foreach $num (@nums) {
        if ($num =~ m/u/) {   ## already a unicode character; add \
            $newnums[$i] = "\\$num";
        } else {      ## convert to 4 digits of hex, and add \u
            $newnum = oct $num;
            $_ = sprintf("\\u%4.0lx", $newnum);
            s/ /0/g;  ## this is stupid, but adds the zeroes
            $newnums[$i] = $_;
        }
        $i++;
    }
    $parts[1] = join("", @newnums);
    return join('"', @parts);
}

while (<>) {
    if ($instring) {
        if (m/\)/) {
            s/\)//;
            print scalar(unicodify $_);
            print "$line1\n";
            print $line2;
            $instring = 0;
        } else {
            print scalar(unicodify $_);
        }
    } elsif (m/(\s*protected static final short.*\[\]\[\].* =)/) {
        $line1 = $1;
        $line2 = <>;
        $line2 =~ s/new String\[\] \{/table_strs_$curchar\);/;
        print "  private static String[] table_strs_$curchar = {\n";
        $instring = 1;
        $curchar++;
    } else {
        print;
    }
}
