Source of charset-utf8.inc.pl
use utf8;
+{
0x00 => [0x80, 'X l4 u-ascii', 'single byte ASCII', 'U+0000 – U+007F'],
0x80 => [0x40, 'X l3', 'multi-byte continuation'],
0xC0 => [0x02, 'X l1 joinr joind', '(overl.)', 'U+0000 – U+007F'],
0xC2 => [0x0E, 'X l2 u-bmp joinl rowspan="2"',
'2-byte sequence start', 'U+0080 – U+03FF'],
0xD0 => [0x10, 'X l2 u-bmp joinu', '', 'U+0400 – U+07FF'],
0xE0 => [0x10, 'X l2 u-bmp', '3-byte sequence start', 'U+0800 – U+FFFF'],
0xF0 => [0x05, 'X l2 joinr', '4-byte sequence', 'U+1·0000 – U+10·FFFF'],
0xF5 => [0x03, 'X l1 joinl', '(overflow)', 'U+11·0000 – U+1FF·FFFF'],
0xF8 => [0x04, 'X l1', '5-byte', 'U+200·0000 – U+3FFF·FFFF'],
0xFC => [0x02, 'X l1', '6-byte', 'U+4000·0000 – 7FFFF·FFFF'],
0xFE => [0x02, 'di-invalid', 'invalid'],
};