Remove SetBaseSubsetFromPatches() method.

garretrieger · garretrieger · commit f52f144c898f · 2025-02-28T13:31:16.000-07:00
Base subset is now always set in terms of codepoints or a subset def.
diff --git a/ift/encoder/condition.h b/ift/encoder/condition.h
@@ -9,9 +9,12 @@
 namespace ift::encoder {
 
 /*
- * This conditions is satisfied if the input subset definition matches at
- * least one segment in each required group and every feature in
- * required_features.
+ * This conditions is satisfied if the input subset definition
+ * matches the conditions subset_definition and all child conditions
+ * are matched.
+ *
+ * Child conditions refer to the indices of previous condition entries
+ * See: https://w3c.github.io/IFT/Overview.html#mapping-entry-childentryindices
  */
 struct Condition {
   SubsetDefinition subset_definition;
diff --git a/ift/encoder/encoder.cc b/ift/encoder/encoder.cc
@@ -20,6 +20,7 @@
 #include "common/try.h"
 #include "common/woff2.h"
 #include "hb-subset.h"
+#include "ift/encoder/subset_definition.h"
 #include "ift/glyph_keyed_diff.h"
 #include "ift/proto/ift_table.h"
 #include "ift/proto/patch_encoding.h"
@@ -86,7 +87,7 @@ void Encoder::AddCombinations(const std::vector<const SubsetDefinition*>& in,
 StatusOr<FontData> Encoder::FullyExpandedSubset(
     const ProcessingContext& context) const {
   SubsetDefinition all;
-  all.Union(base_subset_);
+  all.Union(context.base_subset_);
 
   for (const auto& s : extension_subsets_) {
     all.Union(s);
@@ -191,58 +192,6 @@ Status Encoder::AddGlyphDataPatchCondition(Condition condition) {
   return absl::OkStatus();
 }
 
-Status Encoder::SetBaseSubsetFromPatches(
-    const flat_hash_set<uint32_t>& included_glyph_data) {
-  design_space_t empty;
-  return SetBaseSubsetFromPatches(included_glyph_data, empty);
-}
-
-Status Encoder::SetBaseSubsetFromPatches(
-    const flat_hash_set<uint32_t>& included_glyph_data,
-    const design_space_t& design_space) {
-  // TODO(garretrieger): support also providing initial features.
-  if (!face_) {
-    return absl::FailedPreconditionError("Encoder must have a face set.");
-  }
-
-  if (!base_subset_.empty()) {
-    return absl::FailedPreconditionError("Base subset has already been set.");
-  }
-
-  for (uint32_t patch_id : included_glyph_data) {
-    if (!glyph_data_patches_.contains(patch_id)) {
-      return absl::InvalidArgumentError(StrCat("Glyph data patch, ", patch_id,
-                                               ", not added to the encoder."));
-    }
-  }
-
-  auto included = SubsetDefinitionForPatches(included_glyph_data);
-  if (!included.ok()) {
-    return included.status();
-  }
-
-  base_subset_ = *included;
-  base_subset_.design_space = design_space;
-
-  // Glyph keyed patches can't change the glyph count in the font (and hence
-  // loca len) so always include the last gid in the base subset to force the
-  // loca table to remain at the full length from the start.
-  //
-  // TODO(garretrieger): this unnecessarily includes the last gid in the subset,
-  //                     should update the subsetter to retain the glyph count
-  //                     but not actually keep the last gid.
-  //
-  // TODO(garretrieger): instead of forcing max glyph count here we can utilize
-  //                     table keyed patches to change loca len/glyph count to
-  //                     the max for any currently reachable segments. This
-  //                     would improve efficiency slightly by avoid including
-  //                     extra space in the initial font.
-  uint32_t gid_count = hb_face_get_glyph_count(face_.get());
-  if (gid_count > 0) base_subset_.gids.insert(gid_count - 1);
-
-  return absl::OkStatus();
-}
-
 void Encoder::AddFeatureGroupSegment(const btree_set<hb_tag_t>& feature_tags) {
   SubsetDefinition def;
   def.feature_tags = feature_tags;
@@ -255,36 +204,35 @@ void Encoder::AddDesignSpaceSegment(const design_space_t& space) {
   extension_subsets_.push_back(def);
 }
 
-StatusOr<SubsetDefinition> Encoder::SubsetDefinitionForPatches(
-    const flat_hash_set<uint32_t>& patch_ids) const {
-  auto gid_to_unicode = FontHelper::GidToUnicodeMap(face_.get());
-
-  SubsetDefinition result;
-  for (uint32_t patch_id : patch_ids) {
-    auto p = glyph_data_patches_.find(patch_id);
-    if (p == glyph_data_patches_.end()) {
-      return absl::InvalidArgumentError(
-          StrCat("Glyph data patches, ", patch_id, ", not found."));
-    }
-
-    for (uint32_t gid : p->second) {
-      auto cp = gid_to_unicode.find(gid);
-      if (cp != gid_to_unicode.end()) {
-        result.codepoints.insert(cp->second);
-      }
-      result.gids.insert(gid);
-    }
-  }
-
-  return result;
-}
-
 StatusOr<Encoder::Encoding> Encoder::Encode() const {
   if (!face_) {
     return absl::FailedPreconditionError("Encoder must have a face set.");
   }
 
   ProcessingContext context(next_id_);
+  context.base_subset_ = base_subset_;
+  if (IsMixedMode()) {
+    // Glyph keyed patches can't change the glyph count in the font (and hence
+    // loca len) so always include the last gid in the base subset to force the
+    // loca table to remain at the full length from the start.
+    //
+    // TODO(garretrieger): this unnecessarily includes the last gid in the
+    // subset,
+    //                     should update the subsetter to retain the glyph count
+    //                     but not actually keep the last gid.
+    //
+    // TODO(garretrieger): instead of forcing max glyph count here we can
+    // utilize
+    //                     table keyed patches to change loca len/glyph count to
+    //                     the max for any currently reachable segments. This
+    //                     would improve efficiency slightly by avoid including
+    //                     extra space in the initial font. However, it would
+    //                     require us to examine conditions against each subset
+    //                     to determine patch reachability.
+    uint32_t gid_count = hb_face_get_glyph_count(face_.get());
+    if (gid_count > 0) context.base_subset_.gids.insert(gid_count - 1);
+  }
+
   context.force_long_loca_and_gvar_ = false;
   auto expanded = FullyExpandedSubset(context);
   if (!expanded.ok()) {
@@ -297,7 +245,7 @@ StatusOr<Encoder::Encoding> Encoder::Encode() const {
       FontHelper::HasLongLoca(expanded_face.get()) ||
       FontHelper::HasWideGvar(expanded_face.get());
 
-  auto init_font = Encode(context, base_subset_, true);
+  auto init_font = Encode(context, context.base_subset_, true);
   if (!init_font.ok()) {
     return init_font.status();
   }
@@ -603,7 +551,7 @@ StatusOr<FontData> Encoder::GenerateBaseGvar(
   }
 
   // Step 2: glyph subsetting
-  SubsetDefinition subset = base_subset_;
+  SubsetDefinition subset = context.base_subset_;
   // We don't want to apply any instancing here as it was done in step 1
   // so clear out the design space.
   subset.design_space = {};
diff --git a/ift/encoder/encoder.h b/ift/encoder/encoder.h
@@ -69,35 +69,21 @@ class Encoder {
    * Configure the base subset to cover the provided codepoints, and the set of
    * layout features retained by default in the harfbuzz subsetter.
    */
-  template <typename T>
-  absl::Status SetBaseSubset(const T& base_subset) {
+  template <typename Set>
+  absl::Status SetBaseSubset(const Set& base_codepoints) {
     if (!base_subset_.empty()) {
       return absl::FailedPreconditionError("Base subset has already been set.");
     }
-    base_subset_.codepoints.insert(base_subset.begin(), base_subset.end());
+    base_subset_.codepoints.insert(base_codepoints.begin(),
+                                   base_codepoints.end());
     return absl::OkStatus();
   }
 
-  /*
-   * Set up the base subset to cover all glyphs in the provided list of glyph
-   * data patches.
-   */
-  absl::Status SetBaseSubsetFromPatches(
-      const absl::flat_hash_set<uint32_t>& included_glyph_data);
-
-  /*
-   * Set up the base subset to cover all glyphs in the provided list of glyph
-   * data patches. Additionally, instance to the supplied design space.
-   */
-  absl::Status SetBaseSubsetFromPatches(
-      const absl::flat_hash_set<uint32_t>& included_segments,
-      const design_space_t& design_space);
-
   /*
    * Adds a segment around which the non glyph data in the font will be split.
    */
-  template <typename T>
-  void AddNonGlyphDataSegment(const T& codepoints) {
+  template <typename Set>
+  void AddNonGlyphDataSegment(const Set& codepoints) {
     SubsetDefinition def;
     def.codepoints.insert(codepoints.begin(), codepoints.end());
     extension_subsets_.push_back(def);
@@ -138,13 +124,6 @@ class Encoder {
     if (!base_subset_.empty()) {
       return absl::FailedPreconditionError("Base subset has already been set.");
     }
-    // TODO(garretrieger): XXXXXXX we need to use the last gid trick  from
-    //                     SetBaseSubsetFromPatches (if we're mixed mode) or
-    //                     table keyed patch generation needs to extend the loca
-    //                     up to the maximum reachable gid for each subset.
-    //
-    //                     Also add a test that checks this case works
-    //                     correctly.
     base_subset_ = base_subset;
     return absl::OkStatus();
   }
@@ -189,9 +168,6 @@ class Encoder {
                                           const SubsetDefinition& base_subset,
                                           bool is_root = true) const;
 
-  absl::StatusOr<SubsetDefinition> SubsetDefinitionForPatches(
-      const absl::flat_hash_set<uint32_t>& patch_ids) const;
-
   /*
    * Returns true if this encoding will contain both glyph keyed and table keyed
    * patches.
@@ -283,6 +259,7 @@ class Encoder {
 
     absl::flat_hash_map<SubsetDefinition, common::FontData> built_subsets_;
     absl::flat_hash_map<std::string, common::FontData> patches_;
+    SubsetDefinition base_subset_;
 
     common::CompatId GenerateCompatId();
   };
diff --git a/ift/encoder/encoder_test.cc b/ift/encoder/encoder_test.cc
@@ -349,9 +349,6 @@ TEST_F(EncoderTest, MissingFace) {
   auto s1 = encoder.AddGlyphDataPatch(1, segment_1_gids);
   ASSERT_TRUE(absl::IsFailedPrecondition(s1)) << s1;
 
-  auto s2 = encoder.SetBaseSubsetFromPatches({});
-  ASSERT_TRUE(absl::IsFailedPrecondition(s2)) << s2;
-
   auto s3 = encoder.Encode();
   ASSERT_TRUE(absl::IsFailedPrecondition(s3.status())) << s3.status();
 }
@@ -368,21 +365,6 @@ TEST_F(EncoderTest, GlyphDataSegments_GidsNotInFace) {
   ASSERT_TRUE(absl::IsInvalidArgument(s)) << s;
 }
 
-TEST_F(EncoderTest, InvalidGlyphDataPatchIds) {
-  Encoder encoder;
-  {
-    hb_face_t* face = noto_sans_jp.reference_face();
-    encoder.SetFace(face);
-    hb_face_destroy(face);
-  }
-
-  auto s = encoder.AddGlyphDataPatch(1, segment_1_gids);
-  ASSERT_TRUE(s.ok()) << s;
-
-  s = encoder.SetBaseSubsetFromPatches({2});
-  ASSERT_TRUE(absl::IsInvalidArgument(s)) << s;
-}
-
 TEST_F(EncoderTest, DontClobberBaseSubset) {
   Encoder encoder;
   {
@@ -394,13 +376,13 @@ TEST_F(EncoderTest, DontClobberBaseSubset) {
   auto s = encoder.AddGlyphDataPatch(1, segment_1_gids);
   ASSERT_TRUE(s.ok()) << s;
 
-  s = encoder.SetBaseSubsetFromPatches({});
+  s = encoder.SetBaseSubset(flat_hash_set<uint32_t>{});
   ASSERT_TRUE(s.ok()) << s;
 
   s = encoder.SetBaseSubset(flat_hash_set<uint32_t>{1});
-  ASSERT_TRUE(absl::IsFailedPrecondition(s)) << s;
+  ASSERT_TRUE(s.ok()) << s;
 
-  s = encoder.SetBaseSubsetFromPatches({});
+  s = encoder.SetBaseSubset(flat_hash_set<uint32_t>{});
   ASSERT_TRUE(absl::IsFailedPrecondition(s)) << s;
 }
 
@@ -587,7 +569,11 @@ TEST_F(EncoderTest, Encode_ThreeSubsets_Mixed) {
   s.Update(encoder.AddGlyphDataPatchCondition(Condition::SimpleCondition(
       SubsetDefinition::Codepoints(segment_4_cps), 4)));
 
-  s.Update(encoder.SetBaseSubsetFromPatches({0, 1, 2}));
+  flat_hash_set<uint32_t> base_subset;
+  base_subset.insert(segment_0_cps.begin(), segment_0_cps.end());
+  base_subset.insert(segment_1_cps.begin(), segment_1_cps.end());
+  base_subset.insert(segment_2_cps.begin(), segment_2_cps.end());
+  s.Update(encoder.SetBaseSubset(base_subset));
 
   flat_hash_set<uint32_t> extension_segment;
   extension_segment.insert(segment_3_cps.begin(), segment_3_cps.end());
@@ -658,7 +644,10 @@ TEST_F(EncoderTest, Encode_ThreeSubsets_Mixed_WithFeatureMappings) {
   ASSERT_TRUE(s.ok()) << s;
 
   // Partitions {0, 1}, {2, 3, 4}, +ccmp
-  s.Update(encoder.SetBaseSubsetFromPatches({0, 1}));
+  flat_hash_set<uint32_t> base_subset;
+  base_subset.insert(segment_0_cps.begin(), segment_0_cps.end());
+  base_subset.insert(segment_1_cps.begin(), segment_1_cps.end());
+  s.Update(encoder.SetBaseSubset(base_subset));
 
   flat_hash_set<uint32_t> extension_segment;
   extension_segment.insert(segment_2_cps.begin(), segment_2_cps.end());
diff --git a/ift/integration_test.cc b/ift/integration_test.cc
diff --git a/util/glyph_keyed_segmenter.cc b/util/glyph_keyed_segmenter.cc