Skip to content

Commit e8da23c

Browse files
committed
Add ClosureGlyphSegmenter test which uses dep graph analysis.
1 parent 9f1f23b commit e8da23c

4 files changed

Lines changed: 30 additions & 17 deletions

File tree

ift/encoder/closure_glyph_segmenter_test.cc

Lines changed: 12 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -24,9 +24,6 @@ using ift::freq::UnigramProbabilityCalculator;
2424

2525
namespace ift::encoder {
2626

27-
// TODO XXXXX add full roboto type test which uses the alternate closure analysis modes.
28-
// verify result is the same as pure closure.
29-
3027
class ClosureGlyphSegmenterTest : public ::testing::Test {
3128
protected:
3229
ClosureGlyphSegmenterTest()
@@ -35,7 +32,8 @@ class ClosureGlyphSegmenterTest : public ::testing::Test {
3532
noto_sans_jp(make_hb_face(nullptr)),
3633
segmenter(8, 8, PATCH, CLOSURE_ONLY),
3734
segmenter_find_conditions(8, 8, FIND_CONDITIONS, CLOSURE_ONLY),
38-
segmenter_move_to_init_font(8, 8, MOVE_TO_INIT_FONT, CLOSURE_ONLY) {
35+
segmenter_move_to_init_font(8, 8, MOVE_TO_INIT_FONT, CLOSURE_ONLY),
36+
segmenter_dep_graph(8, 8, PATCH, CLOSURE_AND_DEP_GRAPH) {
3937
roboto = from_file("common/testdata/Roboto-Regular.ttf");
4038
noto_nastaliq_urdu =
4139
from_file("common/testdata/NotoNastaliqUrdu.subset.ttf");
@@ -59,6 +57,7 @@ class ClosureGlyphSegmenterTest : public ::testing::Test {
5957
ClosureGlyphSegmenter segmenter;
6058
ClosureGlyphSegmenter segmenter_find_conditions;
6159
ClosureGlyphSegmenter segmenter_move_to_init_font;
60+
ClosureGlyphSegmenter segmenter_dep_graph;
6261
};
6362

6463
TEST_F(ClosureGlyphSegmenterTest, SimpleSegmentation) {
@@ -515,7 +514,7 @@ if ((s2 OR s3)) then p5
515514
)");
516515
}
517516

518-
TEST_F(ClosureGlyphSegmenterTest, FullRoboto_WithFeatures) {
517+
TEST_F(ClosureGlyphSegmenterTest, FullRoboto_WithFeaturesAndDepGraph) {
519518
auto codepoints = common::FontHelper::ToCodepointsSet(roboto.get());
520519

521520
uint32_t num_segments = 412;
@@ -545,6 +544,14 @@ TEST_F(ClosureGlyphSegmenterTest, FullRoboto_WithFeatures) {
545544
auto segmentation = segmenter.CodepointToGlyphSegments(
546545
roboto.get(), {}, segments, MergeStrategy::Heuristic(4000, 12000));
547546
ASSERT_TRUE(segmentation.ok()) << segmentation.status();
547+
548+
auto segmentation_dep_graph = segmenter_dep_graph.CodepointToGlyphSegments(
549+
roboto.get(), {}, segments, MergeStrategy::Heuristic(4000, 12000));
550+
ASSERT_TRUE(segmentation_dep_graph.ok()) << segmentation_dep_graph.status();
551+
552+
// Dep graph should produce identical results.
553+
ASSERT_EQ(segmentation->Segments(), segmentation_dep_graph->Segments());
554+
ASSERT_EQ(segmentation->GidSegments(), segmentation_dep_graph->GidSegments());
548555
}
549556

550557
TEST_F(ClosureGlyphSegmenterTest, CostRequiresFrequencies) {

ift/encoder/dependency_closure.h

Lines changed: 18 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -41,17 +41,27 @@ class DependencyClosure {
4141
};
4242

4343
// Attempts to analyze the given segment using a glyph dependency graph
44-
// from harfbuzz. Returns true if a accurate analysis is possible, otherwise
45-
// false.
44+
// from harfbuzz. The return value signals if an analysis was able to be performed
45+
// or not.
4646
//
47-
// When false is returned GlyphClosureCache should be used instead to analyze
48-
// the segment.
47+
// If ACCURATE is returned then the dep graph is able to produce an analysis
48+
// which should match GlyphClosureCache::AnalyzeSegment(). In this case
49+
// the three output sets (and_gids, or_gids, and exclusive_gids) will be populated
50+
// with the anaysis results.
4951
//
50-
// If true is returned then the input sets *_gids will have glyphs appended to
51-
// them based on the analysis classification.
52+
// Otherwise if INACCURATE is returned then the dep graph analysis was found
53+
// to possibly not match GlyphClosureCache::AnalyzeSegment(). The three
54+
// output sets will not be modified in this case.
5255
//
53-
// TODO XXXX explain the meaning behind the three gid sets.
54-
// TODO XXXX explain the return type.
56+
// The three output sets have the following interpretation:
57+
// and_gids: these gids have the union of input segments as a conjunctive condition.
58+
// ie. (s_1 U ... U s_n) AND ... -> and_gids
59+
//
60+
// or_gids: these gids have the union of input segments as a disjunctive condition.
61+
// ie. (s_1 U ... U s_n) OR ... -> or_gids
62+
//
63+
// exclusive_gids: these gids are exclusively needed by the union of input segments.
64+
// ie. (s_1 U ... U s_n) -> exclusive_gids
5565
absl::StatusOr<AnalysisAccuracy> AnalyzeSegment(const common::SegmentSet& segments,
5666
common::GlyphSet& and_gids,
5767
common::GlyphSet& or_gids,

ift/encoder/segmentation_context.cc

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -148,7 +148,6 @@ Status SegmentationContext::AnalyzeSegment(const SegmentSet& segment_ids,
148148
GlyphSet& and_gids,
149149
GlyphSet& or_gids,
150150
GlyphSet& exclusive_gids) {
151-
// TODO XXXXX count number of times we were able to use dep analysis.
152151
ConditionAnalysisMode effective_mode = condition_analysis_mode_;
153152
GlyphSet dep_and_gids = and_gids;
154153
GlyphSet dep_or_gids = or_gids;

ift/encoder/subset_definition.cc

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -181,9 +181,6 @@ void SubsetDefinition::ConfigureInput(hb_subset_input_t* input,
181181
hb_face_t* face) const {
182182
codepoints.union_into(hb_subset_input_unicode_set(input));
183183

184-
// TODO XXXX should we clear the harfbuzz default feature set so that we are
185-
// always working with a known set of features? we would
186-
// use the IFT spec defaults instead.
187184
hb_set_t* features =
188185
hb_subset_input_set(input, HB_SUBSET_SETS_LAYOUT_FEATURE_TAG);
189186
// hb_input_t has a set of layout featurs configured by default, we

0 commit comments

Comments
 (0)