Source of tools/mkdigraphs-xorg

#!/usr/bin/env perl
use 5.014;
use warnings;
use utf8;
use open IO => ':encoding(utf-8)', ':std';
use re '/msx';
use JSON 'decode_json';
use Data::Dump 'pp';
use Shiar_Sheet::FormatChar;

our $VERSION = '1.01';

my $matchvim;  # enable to prefer best compatibility

my $symname = eval {
	open my $keysymh, '<', 'data/keysymdef.json' or die $!;
	local $/;
	return decode_json(readline $keysymh);
} or die "Could not read keysym definitions: $@\n";

my $vidi = eval {
	open my $jsfh, '<', 'data/digraphs.json' or die $!;
	local $/;
	return JSON->new->decode(readline $jsfh);
} or warn "Could not read comparison digraphs: $@\n";

my %table;
while ($_ = readline) {
	my ($mnem, $chr, $trail) = m/\A <Multi_key> \h (.*?) \h+ : \h "([^"]+)" \h* (.*)/
		or next;
	$chr =~ s/\\(.)/$1/g;
	$mnem !~ m/<dead | <KP_ | <U[0-9A-Fa-f]{4}/ or next;  # skip non-standard keys
	eval {
		$mnem =~ s{<([^>]+)> \h?}{$symname->{$1} // die "reference to unknown keysym $1\n"}eg;
		1;
	} or warn($@), next;
	$mnem =~ m/\A [\x20-\x7F]{2} \z/ or next;  # only interested in two ascii

	my $alias = \(state $seen = {})->{$chr};  # assume first is preferred
	my $cp = ord $chr;
	my ($class, $name, undef, undef, $string) = @{
		Shiar_Sheet::FormatChar->glyph_info($cp)
	};
	my $comparison = (
		!$vidi->{key}->{$mnem} ? 'l3' :  # free
		$vidi->{key}->{$mnem}->[0] != $cp ? 'l1' :  # conflict
		$vidi->{key}->{$mnem}->[2] eq 'l5' ? 'l5' :  # rfc
		'l4'  # any
	);

	if (${$alias}) {
		# aliases an earlier occurrence
		if ($matchvim and ${$alias}->[2] lt $comparison) {
			# replace lower compatibility level
			${$alias}->[3] = 'l0';
			${$alias}->[2] .=  ' u-' . ${$alias}->[2];
			${$alias} = undef;
		}
		else {
			$class = 'l0';
			my $menm = substr($mnem, 1, 1).substr($mnem, 0, 1);
			if ($table{$menm} && $table{$menm}[0] == $cp) {
				# unannotated if identical to reversed input
				$cp = 0;
			}
			else {
				$class .= ' ex';
			}
		}
	}

	$table{$mnem} = [ $cp, $name, $comparison, $class, $string // () ];
	${$alias} //= $table{$mnem};
}

print JSON->new->canonical->indent->encode({
	title => 'X.Org',
	key   => \%table,
	intro => join("\n",
		'Character mnemonics following compose key ⎄:',
		'in the X Window System (Shift+AltGr by default).',
		'Differences from <a href="/digraphs">RFC-1345</a> are indicated.',
		'Also see <a href="/unicode">common Unicode</a>.',
	),
	keywords => [qw( xorg x11 x )],
	flag  => {
		'l5' => "matching RFC-1345",
		'l4' => "matching Vim extension",
		'l3' => "unique to Xorg",
		'l1' => "conflict",
		('l0' => "Xorg preference") x !!$matchvim,
		'l0 ex' => "alias",
	},
	flagclass => {
		l5 => 'u-l4',
		l4 => 'u-l5',
	},
});

__END__

=head1 NAME

mkdigraphs-xorg - Output Xorg compose sequences

=head1 SYNOPSIS


    mkdigraphs-xorg /usr/share/X11/locale/en_US.UTF-8/Compose |
    jq -r '.key."AT"[0]' | perl -nE 'say chr' # @

=head1 DESCRIPTION

Extracts Multi_key definitions from X11/Xorg Compose.pre include file.
If successful, a JSON object is output containing a digraphs list in C<key>
with Unicode code points keyed by mnemonics.
Any errors and warnings are given at STDERR.

=head1 AUTHOR

Mischa POSLAWSKY <perl@shiar.org>

=head1 LICENSE

Licensed under the GNU Affero General Public License version 3.