Skip to content

Commit ac61d48

Browse files
authored
Use HB-side packtab-based generators (#340)
* Regenerate Arabic and Indic Rust tables * Remove obsolete local table generators * Switch emoji table generation to HarfBuzz
1 parent 12178f3 commit ac61d48

File tree

9 files changed

+392
-1547
lines changed

9 files changed

+392
-1547
lines changed

harfrust/src/hb/mod.rs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,7 @@ mod ot_shape_normalize;
4545
pub mod ot_shape_plan;
4646
mod ot_shaper;
4747
mod ot_shaper_arabic;
48+
#[rustfmt::skip]
4849
mod ot_shaper_arabic_table;
4950
mod ot_shaper_hangul;
5051
mod ot_shaper_hebrew;
@@ -69,6 +70,8 @@ mod tag;
6970
mod tag_table;
7071
mod text_parser;
7172
#[rustfmt::skip]
73+
mod unicode_emoji_table;
74+
#[rustfmt::skip]
7275
mod ucd_table;
7376
mod unicode;
7477

Lines changed: 96 additions & 195 deletions
Original file line numberDiff line numberDiff line change
@@ -1,202 +1,103 @@
1-
// WARNING: this file was generated by scripts/gen-arabic-table.py
1+
/* == Start of generated table == */
2+
/*
3+
* The following table is generated by running:
4+
*
5+
* ./gen-arabic-table.py --rust ArabicShaping.txt UnicodeData.txt Blocks.txt
6+
*
7+
* on files with these headers:
8+
*
9+
* # ArabicShaping-17.0.0.txt
10+
* # Date: 2025-08-14
11+
* # Blocks-17.0.0.txt
12+
* # Date: 2025-08-01
13+
* UnicodeData.txt does not have a header.
14+
*/
15+
16+
#![allow(unused_parens)]
17+
#![allow(clippy::unnecessary_cast, clippy::unreadable_literal, clippy::double_parens)]
218

319
use crate::hb::unicode::Codepoint;
420

5-
use super::ot_shaper_arabic::hb_arabic_joining_type_t::{
6-
self, GroupAlaph as A, GroupDalathRish as DR, D, L, R, T, U, X,
7-
};
8-
9-
#[rustfmt::skip]
10-
pub(crate) static JOINING_TABLE: &[hb_arabic_joining_type_t] = &[
11-
12-
/* Arabic */
13-
14-
/* 0600 */ U,U,U,U,U,U,X,X,U,X,X,U,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,
15-
/* 0620 */ D,U,R,R,R,R,D,R,D,R,D,D,D,D,D,R,R,R,R,D,D,D,D,D,D,D,D,D,D,D,D,D,
16-
/* 0640 */ D,D,D,D,D,D,D,D,R,D,D,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,
17-
/* 0660 */ X,X,X,X,X,X,X,X,X,X,X,X,X,X,D,D,X,R,R,R,U,R,R,R,D,D,D,D,D,D,D,D,
18-
/* 0680 */ D,D,D,D,D,D,D,D,R,R,R,R,R,R,R,R,R,R,R,R,R,R,R,R,R,R,D,D,D,D,D,D,
19-
/* 06A0 */ D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,
20-
/* 06C0 */ R,D,D,R,R,R,R,R,R,R,R,R,D,R,D,R,D,D,R,R,X,R,X,X,X,X,X,X,X,U,X,X,
21-
/* 06E0 */ X,X,X,X,X,X,X,X,X,X,X,X,X,X,R,R,X,X,X,X,X,X,X,X,X,X,D,D,D,X,X,D,
22-
23-
/* Syriac */
24-
25-
/* 0700 */ X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,T,A,X,D,D,D,DR,DR,R,R,R,D,D,D,D,R,D,
26-
/* 0720 */ D,D,D,D,D,D,D,D,R,D,DR,D,R,D,D,DR,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,
27-
/* 0740 */ X,X,X,X,X,X,X,X,X,X,X,X,X,R,D,D,
28-
29-
/* Arabic Supplement */
30-
31-
/* 0740 */ D,D,D,D,D,D,D,D,D,R,R,R,D,D,D,D,
32-
/* 0760 */ D,D,D,D,D,D,D,D,D,D,D,R,R,D,D,D,D,R,D,R,R,D,D,D,R,R,D,D,D,D,D,D,
33-
34-
/* FILLER */
35-
36-
/* 0780 */ X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,
37-
/* 07A0 */ X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,
38-
39-
/* NKo */
40-
41-
/* 07C0 */ X,X,X,X,X,X,X,X,X,X,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,
42-
/* 07E0 */ D,D,D,D,D,D,D,D,D,D,D,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,D,X,X,X,X,X,
43-
44-
/* FILLER */
45-
46-
/* 0800 */ X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,
47-
/* 0820 */ X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,
48-
49-
/* Mandaic */
50-
51-
/* 0840 */ R,D,D,D,D,D,R,R,D,R,D,D,D,D,D,D,D,D,D,D,R,D,R,R,R,X,X,X,X,X,X,X,
52-
53-
/* Syriac Supplement */
54-
55-
/* 0860 */ D,U,D,D,D,D,U,R,D,R,R,X,X,X,X,X,
56-
57-
/* Arabic Extended-B */
58-
59-
/* 0860 */ R,R,R,R,R,R,R,R,R,R,R,R,R,R,R,R,
60-
/* 0880 */ R,R,R,D,D,D,D,U,U,D,D,D,D,D,R,D,U,U,X,X,X,X,X,X,X,X,X,X,X,X,X,X,
61-
62-
/* Arabic Extended-A */
63-
64-
/* 08A0 */ D,D,D,D,D,D,D,D,D,D,R,R,R,U,R,D,D,R,R,D,D,D,D,D,D,R,D,D,D,D,D,D,
65-
/* 08C0 */ D,D,D,D,D,D,D,D,D,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,
66-
/* 08E0 */ X,X,U,
67-
68-
/* Mongolian */
69-
70-
/* 1800 */ U,D,X,X,D,X,X,X,U,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,
71-
/* 1820 */ D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,
72-
/* 1840 */ D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,
73-
/* 1860 */ D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,X,X,X,X,X,X,X,
74-
/* 1880 */ U,U,U,U,U,T,T,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,
75-
/* 18A0 */ D,D,D,D,D,D,D,D,D,X,D,
76-
77-
/* General Punctuation */
78-
79-
/* 2000 */ U,D,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,
80-
/* 2020 */ X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,U,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,
81-
/* 2040 */ X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,
82-
/* 2060 */ X,X,X,X,X,X,U,U,U,U,
83-
84-
/* Phags-pa */
85-
86-
/* A840 */ D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,
87-
/* A860 */ D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,L,U,
88-
89-
/* Manichaean */
90-
91-
/* 10AC0 */ D,D,D,D,D,R,U,R,U,R,R,U,U,L,R,R,R,R,R,D,D,D,D,L,D,D,D,D,D,R,D,D,
92-
/* 10AE0 */ D,R,U,U,R,X,X,X,X,X,X,D,D,D,D,R,
93-
94-
/* Psalter Pahlavi */
95-
96-
/* 10B80 */ D,R,D,R,R,R,D,D,D,R,D,D,R,D,R,R,D,R,X,X,X,X,X,X,X,X,X,X,X,X,X,X,
97-
/* 10BA0 */ X,X,X,X,X,X,X,X,X,R,R,R,R,D,D,U,
98-
99-
/* Hanifi Rohingya */
100-
101-
/* 10D00 */ L,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,
102-
/* 10D20 */ D,D,R,D,
103-
104-
/* Arabic Extended-C */
105-
106-
/* 10EC0 */ R,D,D,X,D,D,
107-
108-
/* Sogdian */
109-
110-
/* 10F20 */ D,D,D,R,D,D,D,D,D,D,D,D,D,D,D,D,
111-
/* 10F40 */ D,D,D,D,D,U,X,X,X,X,X,X,X,X,X,X,X,D,D,D,R,X,X,X,X,X,X,X,X,X,X,X,
112-
/* 10F60 */ X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,
113-
114-
/* Old Uyghur */
115-
116-
/* 10F60 */ D,D,D,D,R,R,D,D,D,D,D,D,D,D,D,D,
117-
/* 10F80 */ D,D,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,
118-
/* 10FA0 */ X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,
119-
120-
/* Chorasmian */
121-
122-
/* 10FA0 */ D,U,D,D,R,R,R,U,D,R,R,D,D,R,D,D,
123-
/* 10FC0 */ U,D,R,R,D,U,U,U,U,R,D,L,
124-
125-
/* Kaithi */
126-
127-
/* 110A0 */ U,X,X,
128-
/* 110C0 */ X,X,X,X,X,X,X,X,X,X,X,X,X,U,
129-
130-
/* Adlam */
131-
132-
/* 1E900 */ D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,
133-
/* 1E920 */ D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,
134-
/* 1E940 */ D,D,D,D,X,X,X,X,X,X,X,T,
21+
use super::ot_shaper_arabic::hb_arabic_joining_type_t::{self, D, GroupAlaph, GroupDalathRish, L, R, T, U, X};
22+
23+
pub(crate) static _hb_arabic_joining_u8: [u8; 737]=
24+
[
25+
0, 16, 2, 0, 0, 0, 3, 0, 4, 0, 0, 0, 0, 0, 0, 0,
26+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
27+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 0, 0, 0, 0, 0,
28+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
29+
0, 0, 96,135, 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
30+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
31+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
32+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 10, 0, 0, 0, 0, 0,
33+
0, 0, 0, 8, 16, 24, 32, 40, 48, 0, 56, 0, 64, 72, 80, 0,
34+
0, 0, 0, 88, 96,104, 0, 0, 0, 0, 0,112,120, 0, 0, 0,
35+
0, 0, 0, 0,128, 0, 0, 0, 0, 0, 0, 0, 0, 0,136, 0,
36+
0,144, 0, 0, 0, 0, 0,152, 0, 0, 0, 0, 0, 0,160,168,
37+
176,184,192, 0, 0,200,208, 0, 0, 0, 0, 0, 0, 0, 0,216,
38+
224, 0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 0, 0, 10, 11, 12,
39+
1, 1, 13, 0, 0, 0, 14, 15, 1, 1, 2, 2, 4, 1, 1, 1,
40+
1, 16, 17, 18, 3, 0, 19, 0, 20, 0, 21, 22, 23, 1, 24, 0,
41+
0, 0, 25, 1, 26, 1, 27, 28, 4, 0, 29, 1, 1, 1, 30, 0,
42+
31, 32, 5, 33, 34, 35, 36, 2, 2, 37, 38, 6, 0, 1, 39, 40,
43+
5, 1, 7, 0, 0, 41, 0, 0, 0, 42, 43, 0, 0, 1, 1, 1,
44+
1, 1, 1, 1, 1, 1, 1, 1, 7, 44, 1, 1, 1, 1, 45, 0,
45+
0, 0, 46, 0, 0, 0, 47, 0, 0, 0, 0, 0, 0, 48, 6, 0,
46+
0, 1, 1, 1, 1, 1, 1, 49, 0, 50, 51, 52, 53, 54, 55, 0,
47+
0, 56, 57, 58, 0, 0, 59, 0, 0, 60, 1, 1, 1, 61, 0, 0,
48+
0, 62, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 63,
49+
1, 64, 0, 65, 0, 0, 0, 66, 1, 67, 0, 0, 0, 0, 0, 68,
50+
69, 70, 71, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
51+
3, 0, 3, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1,
52+
1, 72, 73, 0, 0, 0, 0, 0, 0,119,119,119,119, 51, 51, 51,
53+
51, 34, 34, 34, 34,119,119, 7,119, 34, 51, 51, 51, 35, 51, 51,
54+
51, 0,119,119,119,115,119,119,119, 0, 0, 0,119,112, 7,119,
55+
119, 3, 34, 34, 35, 35, 51, 51, 35, 34, 50, 51, 51, 50,115,119,
56+
119,119,119,119, 51, 39, 34, 32, 34, 50, 35, 34, 34, 34, 34, 35,
57+
35, 51, 34, 39,119,119,119,119, 34,119, 51,115, 55,119,119,119,
58+
103,116, 51, 83, 37, 34, 51, 51, 50, 50, 53, 50, 83,119,119, 39,
59+
51, 35, 34, 51, 51, 51, 35, 50, 51, 35, 35, 50, 51,119, 51, 51,
60+
51, 51,115,119,119,119,115,119,119, 50, 51, 51, 34, 51, 51, 50,
61+
34,114,119,119,119, 3, 51, 51, 32, 35,114,119,119, 34, 50, 51,
62+
3, 48, 51, 51, 50, 51, 34, 2, 50, 35, 50, 51, 51,119,112,119,
63+
119,119,119,119, 48,119,115,119,112, 0, 0, 96, 54,115,115,119,
64+
119,119,119, 48,119,119,119,119, 7,119,119,119, 0, 51, 1,119,
65+
119, 51, 51, 35, 32, 32, 2, 16, 34, 34, 50, 51, 19, 51, 51, 35,
66+
51, 35, 0,114,119,119, 55, 51, 35, 35, 35, 34, 51, 35, 51, 50,
67+
34, 35,119,119,119, 39, 34, 50, 3, 49, 51, 51, 51, 51, 50,119,
68+
119,119, 50,115, 51, 51, 35, 51, 51, 51, 51, 3,119, 55, 51,114,
69+
119, 51, 51, 34, 51, 51,119,119,119, 3, 51, 34, 2, 35, 50, 35,
70+
51, 48, 34, 3, 0, 32, 19,119,119, 51, 51,119,119,119,103,119,
71+
119,
13572
];
13673

137-
const JOINING_OFFSET_0X0600: usize = 0;
138-
const JOINING_OFFSET_0X1806: usize = 739;
139-
const JOINING_OFFSET_0X200C: usize = 904;
140-
const JOINING_OFFSET_0XA840: usize = 998;
141-
const JOINING_OFFSET_0X10AC0: usize = 1050;
142-
const JOINING_OFFSET_0X10B80: usize = 1098;
143-
const JOINING_OFFSET_0X10D00: usize = 1146;
144-
const JOINING_OFFSET_0X10EC2: usize = 1182;
145-
const JOINING_OFFSET_0X10F30: usize = 1188;
146-
const JOINING_OFFSET_0X110BD: usize = 1344;
147-
const JOINING_OFFSET_0X1E900: usize = 1361;
148-
149-
pub fn joining_type(u: Codepoint) -> hb_arabic_joining_type_t {
150-
match u >> 12 {
151-
0x0 => {
152-
if (0x0600..=0x08E2).contains(&u) {
153-
return JOINING_TABLE[u as usize - 0x0600 + JOINING_OFFSET_0X0600];
154-
}
155-
}
156-
0x1 => {
157-
if (0x1806..=0x18AA).contains(&u) {
158-
return JOINING_TABLE[u as usize - 0x1806 + JOINING_OFFSET_0X1806];
159-
}
160-
}
161-
0x2 => {
162-
if (0x200C..=0x2069).contains(&u) {
163-
return JOINING_TABLE[u as usize - 0x200C + JOINING_OFFSET_0X200C];
164-
}
165-
}
166-
0xA => {
167-
if (0xA840..=0xA873).contains(&u) {
168-
return JOINING_TABLE[u as usize - 0xA840 + JOINING_OFFSET_0XA840];
169-
}
170-
}
171-
0x10 => {
172-
if (0x10AC0..=0x10AEF).contains(&u) {
173-
return JOINING_TABLE[u as usize - 0x10AC0 + JOINING_OFFSET_0X10AC0];
174-
}
175-
if (0x10B80..=0x10BAF).contains(&u) {
176-
return JOINING_TABLE[u as usize - 0x10B80 + JOINING_OFFSET_0X10B80];
177-
}
178-
if (0x10D00..=0x10D23).contains(&u) {
179-
return JOINING_TABLE[u as usize - 0x10D00 + JOINING_OFFSET_0X10D00];
180-
}
181-
if (0x10EC2..=0x10EC7).contains(&u) {
182-
return JOINING_TABLE[u as usize - 0x10EC2 + JOINING_OFFSET_0X10EC2];
183-
}
184-
if (0x10F30..=0x10FCB).contains(&u) {
185-
return JOINING_TABLE[u as usize - 0x10F30 + JOINING_OFFSET_0X10F30];
186-
}
187-
}
188-
0x11 => {
189-
if (0x110BD..=0x110CD).contains(&u) {
190-
return JOINING_TABLE[u as usize - 0x110BD + JOINING_OFFSET_0X110BD];
191-
}
192-
}
193-
0x1E => {
194-
if (0x1E900..=0x1E94B).contains(&u) {
195-
return JOINING_TABLE[u as usize - 0x1E900 + JOINING_OFFSET_0X1E900];
196-
}
197-
}
198-
_ => {}
199-
}
74+
#[inline(always)]
75+
fn _hb_arabic_joining_b4 (a: &[u8], i: usize) -> u8
76+
{
77+
(a[i>>1]>>((i&1)<<2))&15
78+
}
79+
#[inline]
80+
pub(crate) fn _hb_arabic_joining_joining_type_u8 (u: usize) -> u8
81+
{
82+
/* packtab: [2^4,2^3,2^3,2^3] */
83+
if u<125260usize { (_hb_arabic_joining_b4(&_hb_arabic_joining_u8[441usize..],(((_hb_arabic_joining_u8[209usize+(((_hb_arabic_joining_u8[123usize+((((_hb_arabic_joining_b4(&_hb_arabic_joining_u8,(((((((u)>>3))>>3))>>3) as usize) as usize)) as usize)<<3) as usize+((((((u)>>3))>>3))&7) as usize) as usize]) as usize) as usize+((((u)>>3))&7) as usize) as usize]) as usize)<<3) as usize+((u)&7) as usize)) as u8 } else { 7 }
84+
}
20085

201-
X
86+
#[inline]
87+
pub(crate) fn joining_type (u: Codepoint) -> hb_arabic_joining_type_t
88+
{
89+
match _hb_arabic_joining_joining_type_u8 (u as usize) {
90+
0 => U,
91+
1 => L,
92+
2 => R,
93+
3 => D,
94+
4 => GroupAlaph,
95+
5 => GroupDalathRish,
96+
6 => T,
97+
7 => X,
98+
_ => unreachable! (),
99+
}
202100
}
101+
102+
103+
/* == End of generated table == */

0 commit comments

Comments
 (0)