@@ -27,6 +27,29 @@ namespace util {
2727
2828static constexpr uint32_t kMinimumGroupSize = 4 ;
2929
30+ // Quality Table:
31+ // Quality | bigrams | find conditions | init brotli | non init brotli | init font merge threshold | opt cut off | preprocess merging | preprocess threshold
32+ // 1 | No | No | 0 | 0 | 60% | 5% | Yes | 5%
33+ // 2 | Yes | No | 0 | 0 | 55% | 4% | Yes | 4%
34+ // 3 | Yes | Yes | 0 | 0 | 50% | 3% | Yes | 3%
35+ // 4 | Yes | Yes | 0 | 9 | 45% | 2% | Yes | 2%
36+ // 5 | Yes | Yes | 9 | 9 | 40% | 1% | Yes | 1%
37+ // 6 | Yes | Yes | 9 | 11 | 30% | 0.5% | Yes | 0.5%
38+ // 7 | Yes | Yes | 11 | 11 | 25% | 0.5% | Yes | 0.05%
39+ // 8 | Yes | Yes | 11 | 11 | 25% | 0.5% | No | na
40+ enum Quality {
41+ MIN = 1 , // Alias for ONE
42+ ONE = 1 ,
43+ TWO = 2 ,
44+ THREE = 3 ,
45+ FOUR = 4 ,
46+ FIVE = 5 ,
47+ SIX = 6 ,
48+ SEVEN = 7 ,
49+ EIGHT = 8 ,
50+ MAX = 8 , // Alias for EIGHT
51+ };
52+
3053// TODO(garretrieger): define a very basic set of quality levels first (see next TODO),
3154// start with just a lowest and highest to set the upper and lower bounds for quality
3255// settings (maybe also a mid point). To begin use number of codepoints to select quality
@@ -464,24 +487,140 @@ static Status ApplyPrimaryScript(
464487 return absl::OkStatus ();
465488}
466489
490+ static void ApplyQualityLevelTo (Quality quality, HeuristicConfiguration& config) {
491+ config.set_min_patch_size (2500 );
492+ }
493+
494+ static void ApplyQualityLevelTo (Quality quality, CostConfiguration& config) {
495+ config.set_min_group_size (kMinimumGroupSize );
496+
497+ if (quality == ONE) {
498+ config.set_use_bigrams (false );
499+ } else {
500+ config.set_use_bigrams (true );
501+ }
502+
503+ switch (quality) {
504+ case ONE: config.set_optimization_cutoff_fraction (0.05 ); break ;
505+ case TWO: config.set_optimization_cutoff_fraction (0.04 ); break ;
506+ case THREE: config.set_optimization_cutoff_fraction (0.03 ); break ;
507+ case FOUR: config.set_optimization_cutoff_fraction (0.02 ); break ;
508+ case FIVE: config.set_optimization_cutoff_fraction (0.01 ); break ;
509+ case SIX:
510+ case SEVEN:
511+ case EIGHT:
512+ default : config.set_optimization_cutoff_fraction (0.005 ); break ;
513+ }
514+ }
515+
516+ static void ApplyQualityLevelTo (Quality quality, MergeGroup& merge_group) {
517+ if (merge_group.has_cost_config ()) {
518+ if (quality >= ONE && quality <= SEVEN) {
519+ merge_group.set_preprocess_merging_group_size (kMinimumGroupSize );
520+ } else {
521+ merge_group.set_preprocess_merging_group_size (1 );
522+ }
523+
524+ switch (quality) {
525+ case ONE: merge_group.set_preprocess_merging_probability_threshold (0.05 ); break ;
526+ case TWO: merge_group.set_preprocess_merging_probability_threshold (0.04 ); break ;
527+ case THREE: merge_group.set_preprocess_merging_probability_threshold (0.03 ); break ;
528+ case FOUR: merge_group.set_preprocess_merging_probability_threshold (0.02 ); break ;
529+ case FIVE: merge_group.set_preprocess_merging_probability_threshold (0.01 ); break ;
530+ case SIX: merge_group.set_preprocess_merging_probability_threshold (0.005 ); break ;
531+ case SEVEN: merge_group.set_preprocess_merging_probability_threshold (0.0005 ); break ;
532+ case EIGHT:
533+ default : merge_group.clear_preprocess_merging_probability_threshold (); break ;
534+ }
535+
536+ if (merge_group.mutable_cost_config ()->has_initial_font_merge_threshold ()) {
537+ switch (quality) {
538+ case ONE: merge_group.mutable_cost_config ()->set_initial_font_merge_probability_threshold (0.60 ); break ;
539+ case TWO: merge_group.mutable_cost_config ()->set_initial_font_merge_probability_threshold (0.55 ); break ;
540+ case THREE: merge_group.mutable_cost_config ()->set_initial_font_merge_probability_threshold (0.50 ); break ;
541+ case FOUR: merge_group.mutable_cost_config ()->set_initial_font_merge_probability_threshold (0.45 ); break ;
542+ case FIVE: merge_group.mutable_cost_config ()->set_initial_font_merge_probability_threshold (0.40 ); break ;
543+ case SIX: merge_group.mutable_cost_config ()->set_initial_font_merge_probability_threshold (0.30 ); break ;
544+ case SEVEN:
545+ case EIGHT:
546+ default : merge_group.mutable_cost_config ()->set_initial_font_merge_probability_threshold (0.25 ); break ;
547+ }
548+ }
549+ }
550+ }
551+
552+ static void ApplyQualityLevelTo (Quality quality, SegmenterConfig& config) {
553+ config.set_preprocess_merging_group_size_for_ungrouped (kMinimumGroupSize );
554+
555+ if (quality == ONE || quality == TWO) {
556+ config.set_unmapped_glyph_handling (MOVE_TO_INIT_FONT);
557+ } else {
558+ config.set_unmapped_glyph_handling (FIND_CONDITIONS);
559+ }
560+
561+ switch (quality) {
562+ case ONE:
563+ case TWO:
564+ case THREE:
565+ config.set_brotli_quality (0 );
566+ break ;
567+ case FOUR:
568+ case FIVE:
569+ config.set_brotli_quality (9 );
570+ break ;
571+ case SIX:
572+ case SEVEN:
573+ case EIGHT:
574+ default :
575+ config.set_brotli_quality (11 );
576+ break ;
577+ }
578+
579+ switch (quality) {
580+ case ONE:
581+ case TWO:
582+ case THREE:
583+ case FOUR:
584+ config.set_brotli_quality_for_initial_font_merging (0 );
585+ break ;
586+ case FIVE:
587+ case SIX:
588+ config.set_brotli_quality_for_initial_font_merging (9 );
589+ break ;
590+ case SEVEN:
591+ case EIGHT:
592+ default :
593+ config.set_brotli_quality_for_initial_font_merging (11 );
594+ break ;
595+ }
596+
597+ ApplyQualityLevelTo (quality, *config.mutable_base_heuristic_config ());
598+ ApplyQualityLevelTo (quality, *config.mutable_base_cost_config ());
599+
600+ for (auto & merge_group : *config.mutable_merge_groups ()) {
601+ ApplyQualityLevelTo (quality, merge_group);
602+ }
603+ }
604+
467605absl::StatusOr<SegmenterConfig> AutoSegmenterConfig::GenerateConfig (
468- hb_face_t * face, std::optional<std::string> primary_script) {
606+ hb_face_t * face, std::optional<std::string> primary_script, std::optional< int > quality_level ) {
469607 SegmenterConfig config;
470608 config.set_generate_table_keyed_segments (true );
471609 config.set_generate_feature_segments (true );
472- config.set_unmapped_glyph_handling (FIND_CONDITIONS);
473610 config.set_condition_analysis_mode (CLOSURE_AND_DEP_GRAPH);
474611
475612 auto * base_plan = config.mutable_base_segmentation_plan ();
476613 base_plan->set_jump_ahead (2 );
477614 base_plan->set_use_prefetch_lists (true );
478615
479- config.mutable_ungrouped_config ()->set_min_patch_size (2500 );
480-
481616 // Collect codepoints
482617 auto freq_list = TRY (BuiltInFrequenciesList ());
483618 CodepointSet unicodes = FontHelper::ToCodepointsSet (face);
484619 uint32_t cp_count = unicodes.size ();
620+ Quality quality = cp_count > 2000 ? MIN : MAX;
621+ if (quality_level.has_value () && quality_level.value () >= ONE && quality_level.value () <= MAX) {
622+ quality = static_cast <Quality>(quality_level.value ());
623+ }
485624
486625 // Detect scripts by intersection with frequency data
487626 btree_set<std::string> detected_scripts = DetectScripts (freq_list, unicodes);
@@ -491,18 +630,6 @@ absl::StatusOr<SegmenterConfig> AutoSegmenterConfig::GenerateConfig(
491630 // (including accounting for pairs only within merge groups), and then select
492631 // the cutoffs and premerging to keep the number of brotli ops within a
493632 // specific range.
494- auto * base_cost = config.mutable_base_cost_config ();
495- base_cost->set_use_bigrams (true );
496- base_cost->set_min_group_size (
497- kMinimumGroupSize ); // as recommended by the spec.
498- config.set_preprocess_merging_group_size_for_ungrouped (kMinimumGroupSize );
499- base_cost->set_optimization_cutoff_fraction (0.01 );
500-
501- if (cp_count > 2000 ) {
502- config.set_brotli_quality (9 );
503- } else {
504- config.set_brotli_quality (11 );
505- }
506633
507634 TRYV (ApplyPrimaryScript (freq_list, primary_script.value_or (" Script_latin" ),
508635 detected_scripts));
@@ -515,20 +642,15 @@ absl::StatusOr<SegmenterConfig> AutoSegmenterConfig::GenerateConfig(
515642 mg->set_name (ScriptName (script));
516643 auto * cost = mg->mutable_cost_config ();
517644
518- // TODO(garretrieger): use a heuristic to select probability threshold based
519- // on estimated number of brotli ops (assuming O(n^2) on codepoints in the
520- // group).
521- mg->set_preprocess_merging_group_size (kMinimumGroupSize );
522- mg->set_preprocess_merging_probability_threshold (0.001 );
523-
524645 cost->set_built_in_freq_data_name (script);
525646 if (script == primary_script_file) {
526647 // TODO(garretrieger): customize these values based on the quality level
527648 cost->set_initial_font_merge_threshold (-60 );
528- cost->set_initial_font_merge_probability_threshold (0.40 );
529649 }
530650 }
531651
652+ ApplyQualityLevelTo (quality, config);
653+
532654 return config;
533655}
534656
0 commit comments