Skip to content

Commit ae44e7f

Browse files
committed
Switch topological sort to SCC in condition extraction.
Dependency graphs can have cycles within a closure phase (for example bidi mirror dependencies naturally form cycles). Since topological sorting can't handle cycles a different approach is needed. In condition extraction topological sorting is swapped out with a strongly connected component finding algorithm (Tarjan's, ref: https://en.wikipedia.org/wiki/Tarjan%27s_strongly_connected_components_algorithm). This returns a topological sorting of strongly connected components. When the dependency graph is a DAG this gives the exact same behaviour as the prior topological sorting based implementation. To handle the cycles within a strongly connected component condition propagation has been modified to iteratively propagate conditions inside a component until they stabilize.
1 parent 40d1353 commit ae44e7f

6 files changed

Lines changed: 325 additions & 182 deletions

File tree

ift/dep_graph/dependency_graph.cc

Lines changed: 126 additions & 67 deletions
Original file line numberDiff line numberDiff line change
@@ -39,9 +39,9 @@ using ift::encoder::SubsetDefinition;
3939

4040
namespace ift::dep_graph {
4141

42-
StatusOr<GlyphSet> GetContextSet(
43-
hb_depend_t* depend, const ift::common::GlyphSet* full_closure,
44-
hb_codepoint_t context_set_id) {
42+
StatusOr<GlyphSet> GetContextSet(hb_depend_t* depend,
43+
const ift::common::GlyphSet* full_closure,
44+
hb_codepoint_t context_set_id) {
4545
// the context set is actually a set of sets.
4646
hb_set_unique_ptr context_sets = make_hb_set();
4747
if (!hb_depend_get_set_from_index(depend, context_set_id,
@@ -82,7 +82,9 @@ class TraversalContext {
8282
hb_depend_t* depend = nullptr;
8383

8484
// Only edges from these tables will be followed.
85-
flat_hash_set<hb_tag_t> table_filter = {FontHelper::kCmap, FontHelper::kGlyf, FontHelper::kGSUB, FontHelper::kCOLR, FontHelper::kMATH, FontHelper::kCFF};
85+
flat_hash_set<hb_tag_t> table_filter = {FontHelper::kCmap, FontHelper::kGlyf,
86+
FontHelper::kGSUB, FontHelper::kCOLR,
87+
FontHelper::kMATH, FontHelper::kCFF};
8688

8789
// Only edges that originate from and end at glyphs from this filter will be
8890
// followed.
@@ -449,7 +451,8 @@ static Status DoTraversal(const PendingEdge& edge,
449451
TraversalContext<CallbackT>& context) {
450452
context.callback.VisitPending(edge);
451453

452-
if (edge.table_tag == FontHelper::kGSUB && edge.required_feature.has_value()) {
454+
if (edge.table_tag == FontHelper::kGSUB &&
455+
edge.required_feature.has_value()) {
453456
if (edge.required_context_set_index.has_value()) {
454457
GlyphSet context_glyphs =
455458
TRY(GetContextSet(context.depend, context.full_closure,
@@ -524,17 +527,17 @@ absl::Status DependencyGraph::HandleOutgoingEdges(
524527
return absl::OkStatus();
525528
}
526529

527-
StatusOr<std::vector<Node>> DependencyGraph::TopologicalSorting(
528-
const flat_hash_set<hb_tag_t>& table_filter,
529-
uint32_t node_type_filter
530-
) const {
531-
struct TopoCallback {
530+
StatusOr<std::vector<std::vector<Node>>>
531+
DependencyGraph::StronglyConnectedComponents(
532+
const flat_hash_set<hb_tag_t>& table_filter,
533+
uint32_t node_type_filter) const {
534+
struct Callback {
532535
std::vector<Node> edges;
533-
void Visit(Node dest) {}
534-
void Visit(Node dest, hb_tag_t) {}
535-
void VisitGsub(Node dest, hb_tag_t) {}
536-
void VisitContextual(Node dest, hb_tag_t, ift::common::GlyphSet) {}
537-
void VisitPending(const PendingEdge& edge) { edges.push_back(edge.dest); }
536+
void Visit(Node) {}
537+
void Visit(Node, hb_tag_t) {}
538+
void VisitGsub(Node, hb_tag_t) {}
539+
void VisitContextual(Node, hb_tag_t, GlyphSet) {}
540+
void VisitPending(const PendingEdge& pe) { edges.push_back(pe.dest); }
538541
};
539542

540543
CodepointSet non_init_font_codepoints =
@@ -549,7 +552,7 @@ StatusOr<std::vector<Node>> DependencyGraph::TopologicalSorting(
549552

550553
GlyphSet non_init_font_glyphs = segmentation_info_->NonInitFontGlyphs();
551554

552-
TraversalContext<TopoCallback> context;
555+
TraversalContext<Callback> context;
553556
context.depend = dependency_graph_.get();
554557
context.full_closure = &segmentation_info_->FullClosure();
555558
context.enforce_context = false;
@@ -559,73 +562,124 @@ StatusOr<std::vector<Node>> DependencyGraph::TopologicalSorting(
559562
context.table_filter = table_filter;
560563
context.node_type_filter = node_type_filter;
561564

562-
// DFS topological sorting algorithm from:
563-
// https://en.wikipedia.org/wiki/Topological_sorting
564-
// Modified to use a stack instead of recursion.
565-
flat_hash_set<Node> visited;
566-
flat_hash_set<Node> visiting;
567-
std::vector<Node> post_order;
568-
std::vector<Node> dfs_stack;
569-
auto process_node = [&](Node start_node) -> Status {
570-
dfs_stack.push_back(start_node);
565+
// Implementation based on
566+
// https://en.wikipedia.org/wiki/Tarjan%27s_strongly_connected_components_algorithm
567+
// Modified to use stack instead of recursion.
568+
struct Meta {
569+
int64_t index = -1;
570+
int64_t lowlink = -1;
571+
bool on_stack = false;
572+
};
573+
574+
flat_hash_map<Node, Meta> node_meta;
575+
std::vector<Node> stack;
576+
577+
int64_t index = 0;
578+
std::vector<std::vector<Node>> sccs;
579+
580+
struct DfsState {
581+
Node node;
582+
std::vector<Node> edges;
583+
};
584+
585+
auto strongconnect = [&](Node start_node) -> Status {
586+
std::vector<DfsState> dfs_stack = {{start_node, {}}};
587+
571588
while (!dfs_stack.empty()) {
572-
Node node = dfs_stack.back();
573-
if (visited.contains(node)) {
574-
dfs_stack.pop_back();
575-
continue;
576-
}
577-
if (visiting.contains(node)) {
578-
visiting.erase(node);
579-
visited.insert(node);
580-
post_order.push_back(node);
581-
dfs_stack.pop_back();
582-
continue;
589+
Node v = dfs_stack.back().node;
590+
591+
std::vector<Node>& edges = dfs_stack.back().edges;
592+
{
593+
// Scope v_meta to where it's still valid
594+
auto& v_meta = node_meta[v];
595+
if (v_meta.index == -1) {
596+
// first time time visiting this node, set up initial state.
597+
v_meta.index = v_meta.lowlink = index++;
598+
stack.push_back(v);
599+
v_meta.on_stack = true;
600+
context.callback.edges.clear();
601+
TRYV(HandleOutgoingEdges(v, &context));
602+
edges = std::move(context.callback.edges);
603+
}
583604
}
584605

585-
visiting.insert(node);
586-
context.callback.edges.clear();
587-
TRYV(HandleOutgoingEdges(node, &context));
588-
589-
std::vector<Node> current_edges = std::move(context.callback.edges);
590-
for (Node dest : current_edges) {
591-
if (visiting.contains(dest)) {
592-
return absl::InvalidArgumentError(
593-
absl::StrCat("Cycle detected during topological sort in "
594-
"dependency graph involving node: ",
595-
dest.ToString()));
606+
bool recursing = false;
607+
while (!edges.empty()) {
608+
Node w = edges.back();
609+
edges.pop_back();
610+
611+
auto& w_meta = node_meta[w];
612+
if (w_meta.index == -1) {
613+
// recurse strongconnect()
614+
dfs_stack.push_back({w, {}});
615+
recursing = true;
616+
break;
617+
} else if (w_meta.on_stack) {
618+
// Successor w is in stack and hence in the current SCC
619+
auto& v_meta = node_meta[v];
620+
v_meta.lowlink = std::min(v_meta.lowlink, w_meta.index);
596621
}
597-
if (!visited.contains(dest)) {
598-
dfs_stack.push_back(dest);
622+
}
623+
624+
if (recursing) continue;
625+
626+
const auto& v_meta = node_meta.at(v);
627+
// If v is a root node, pop the stack and generate an SCC
628+
if (v_meta.lowlink == v_meta.index) {
629+
std::vector<Node> scc;
630+
while (true) {
631+
Node node = stack.back();
632+
stack.pop_back();
633+
node_meta.at(node).on_stack = false;
634+
scc.push_back(node);
635+
if (node == v) break;
599636
}
637+
sccs.push_back(std::move(scc));
638+
}
639+
640+
dfs_stack.pop_back();
641+
if (!dfs_stack.empty()) {
642+
Node parent = dfs_stack.back().node;
643+
auto& parent_meta = node_meta.at(parent);
644+
parent_meta.lowlink = std::min(parent_meta.lowlink, v_meta.lowlink);
600645
}
601646
}
602647
return absl::OkStatus();
603648
};
604649

605-
/* ### Run process_node for every possible, non init font node. #### */
650+
/* ### Run strongconnect for every possible, non init font node. #### */
606651
for (segment_index_t s : segmentation_info_->NonEmptySegments()) {
607-
TRYV(process_node(Node::Segment(s)));
652+
Node n = Node::Segment(s);
653+
if (!node_meta.contains(n)) {
654+
TRYV(strongconnect(n));
655+
}
608656
}
609657

610658
for (hb_codepoint_t u : non_init_font_codepoints) {
611-
TRYV(process_node(Node::Unicode(u)));
659+
Node n = Node::Unicode(u);
660+
if (!node_meta.contains(n)) {
661+
TRYV(strongconnect(n));
662+
}
612663
}
613664

614665
for (hb_tag_t tag : non_init_font_features) {
615-
TRYV(process_node(Node::Feature(tag)));
666+
Node n = Node::Feature(tag);
667+
if (!node_meta.contains(n)) {
668+
TRYV(strongconnect(n));
669+
}
616670
}
617671

618672
for (glyph_id_t gid : non_init_font_glyphs) {
619-
TRYV(process_node(Node::Glyph(gid)));
620-
}
621-
622-
std::vector<Node> topo_order;
623-
topo_order.reserve(post_order.size());
624-
for (auto it = post_order.rbegin(); it != post_order.rend(); ++it) {
625-
topo_order.push_back(*it);
673+
Node n = Node::Glyph(gid);
674+
if (!node_meta.contains(n)) {
675+
TRYV(strongconnect(n));
676+
}
626677
}
627678

628-
return topo_order;
679+
// Tarjan's returns SCCs in reverse topological order, reverse to get
680+
// topological order.
681+
std::reverse(sccs.begin(), sccs.end());
682+
return sccs;
629683
}
630684

631685
StatusOr<Traversal> DependencyGraph::TraverseGraph(
@@ -730,7 +784,8 @@ StatusOr<Traversal> DependencyGraph::ClosureTraversal(
730784
}
731785

732786
/* ### Remaining Phases ### */
733-
for (unsigned phase = 1; phase < DependencyGraph::kNumberOfClosurePhases; phase++) {
787+
for (unsigned phase = 1; phase < DependencyGraph::kNumberOfClosurePhases;
788+
phase++) {
734789
if (table_tags.contains(DependencyGraph::kClosurePhaseTable[phase])) {
735790
TRYV(ClosureSubTraversal(&base_context, phase, traversal_full));
736791
}
@@ -744,13 +799,15 @@ Status DependencyGraph::ClosureSubTraversal(
744799
Traversal& traversal_full) const {
745800
btree_set<Node> start_nodes;
746801

747-
if (DependencyGraph::kClosurePhaseStartNodes[phase_index] & Node::NodeType::GLYPH) {
802+
if (DependencyGraph::kClosurePhaseStartNodes[phase_index] &
803+
Node::NodeType::GLYPH) {
748804
for (glyph_id_t gid : traversal_full.ReachedGlyphs()) {
749805
start_nodes.insert(Node::Glyph(gid));
750806
}
751807
}
752808

753-
if (DependencyGraph::kClosurePhaseStartNodes[phase_index] & Node::NodeType::FEATURE) {
809+
if (DependencyGraph::kClosurePhaseStartNodes[phase_index] &
810+
Node::NodeType::FEATURE) {
754811
for (hb_tag_t feature : traversal_full.ReachedLayoutFeatures()) {
755812
start_nodes.insert(Node::Feature(feature));
756813
}
@@ -760,7 +817,8 @@ Status DependencyGraph::ClosureSubTraversal(
760817
context.SetReached(start_nodes);
761818
context.callback.SetStartNodes(start_nodes);
762819
context.table_filter = {DependencyGraph::kClosurePhaseTable[phase_index]};
763-
context.node_type_filter = DependencyGraph::kClosurePhaseNodeFilter[phase_index];
820+
context.node_type_filter =
821+
DependencyGraph::kClosurePhaseNodeFilter[phase_index];
764822
traversal_full.Merge(TRY(TraverseGraph(&context)));
765823
return absl::OkStatus();
766824
}
@@ -1142,7 +1200,8 @@ DependencyGraph::ComputeFeatureEdges() const {
11421200
while (hb_depend_get_glyph_entry(
11431201
dependency_graph_.get(), gid, index++, &table_tag, &dest_gid,
11441202
&layout_tag, &ligature_set, &context_set, nullptr /* flags */)) {
1145-
if (table_tag != FontHelper::kGSUB || layout_tag == HB_CODEPOINT_INVALID) {
1203+
if (table_tag != FontHelper::kGSUB ||
1204+
layout_tag == HB_CODEPOINT_INVALID) {
11461205
continue;
11471206
}
11481207

@@ -1179,7 +1238,7 @@ DependencyGraph::CollectIncomingEdges(
11791238
void Visit(Node dest) {}
11801239
void Visit(Node dest, hb_tag_t) {}
11811240
void VisitGsub(Node, hb_tag_t) {}
1182-
void VisitContextual(Node, hb_tag_t, ift::common::GlyphSet) {}
1241+
void VisitContextual(Node, hb_tag_t, GlyphSet) {}
11831242
void VisitPending(const PendingEdge& pe) {
11841243
auto reqs = graph->ExtractRequirements(pe);
11851244
had_error.Update(reqs.status());

ift/dep_graph/dependency_graph.h

Lines changed: 25 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -44,32 +44,29 @@ class DependencyGraph {
4444
// _populate_gids_to_retain() from
4545
// https://github.com/harfbuzz/harfbuzz/blob/main/src/hb-subset-plan.cc#L439
4646
static constexpr hb_tag_t kClosurePhaseTable[] = {
47-
common::FontHelper::kCmap,
48-
common::FontHelper::kGSUB,
49-
common::FontHelper::kMATH,
50-
common::FontHelper::kCOLR,
51-
common::FontHelper::kGlyf,
52-
common::FontHelper::kCFF,
47+
common::FontHelper::kCmap, common::FontHelper::kGSUB,
48+
common::FontHelper::kMATH, common::FontHelper::kCOLR,
49+
common::FontHelper::kGlyf, common::FontHelper::kCFF,
5350
};
5451

5552
static constexpr hb_tag_t kClosurePhaseNodeFilter[] = {
56-
/* cmap */ 0xFFFFFFFF,
57-
/* GSUB */ Node::NodeType::GLYPH,
58-
/* MATH */ Node::NodeType::GLYPH,
59-
/* COLR */ Node::NodeType::GLYPH,
60-
/* glyf */ Node::NodeType::GLYPH,
61-
/* CFF */ Node::NodeType::GLYPH,
53+
/* cmap */ 0xFFFFFFFF,
54+
/* GSUB */ Node::NodeType::GLYPH,
55+
/* MATH */ Node::NodeType::GLYPH,
56+
/* COLR */ Node::NodeType::GLYPH,
57+
/* glyf */ Node::NodeType::GLYPH,
58+
/* CFF */ Node::NodeType::GLYPH,
6259
};
6360

6461
static constexpr hb_tag_t kClosurePhaseStartNodes[] = {
65-
/* cmap */ Node::NodeType::SEGMENT,
66-
// For GSUB we also need to consider reached features as starting nodes
67-
// since those have outgoing GSUB edges.
68-
/* GSUB */ Node::NodeType::GLYPH | Node::NodeType::FEATURE,
69-
/* MATH */ Node::NodeType::GLYPH,
70-
/* COLR */ Node::NodeType::GLYPH,
71-
/* glyf */ Node::NodeType::GLYPH,
72-
/* CFF */ Node::NodeType::GLYPH,
62+
/* cmap */ Node::NodeType::SEGMENT,
63+
// For GSUB we also need to consider reached features as starting nodes
64+
// since those have outgoing GSUB edges.
65+
/* GSUB */ Node::NodeType::GLYPH | Node::NodeType::FEATURE,
66+
/* MATH */ Node::NodeType::GLYPH,
67+
/* COLR */ Node::NodeType::GLYPH,
68+
/* glyf */ Node::NodeType::GLYPH,
69+
/* CFF */ Node::NodeType::GLYPH,
7370
};
7471

7572
static absl::StatusOr<DependencyGraph> Create(
@@ -117,17 +114,21 @@ class DependencyGraph {
117114
absl::StatusOr<ift::common::GlyphSet> RequiredGlyphsFor(
118115
const PendingEdge& edge) const;
119116

120-
// Returns a topological sorting of the dependency graph's nodes, excluding
121-
// any nodes that are in the init font.
122-
absl::StatusOr<std::vector<Node>> TopologicalSorting(const absl::flat_hash_set<hb_tag_t>& table_filter, uint32_t node_type_filter) const;
117+
// Returns the strongly connected components of the dependency graph's nodes,
118+
// excluding any nodes that are in the init font. The components are returned
119+
// in topological order.
120+
absl::StatusOr<std::vector<std::vector<Node>>> StronglyConnectedComponents(
121+
const absl::flat_hash_set<hb_tag_t>& table_filter,
122+
uint32_t node_type_filter) const;
123123

124124
// Computes the incoming edges for every node in the dependency graph, taking
125125
// into account all context requirements and implicit dependencies.
126126
//
127127
// The return value is a map from each node to each unique edge requirement.
128128
// Edge requirements are represented as a CNF expression over other nodes.
129129
absl::StatusOr<absl::flat_hash_map<Node, absl::btree_set<EdgeConditonsCnf>>>
130-
CollectIncomingEdges(const absl::flat_hash_set<hb_tag_t>& table_filter, uint32_t node_type_filter) const;
130+
CollectIncomingEdges(const absl::flat_hash_set<hb_tag_t>& table_filter,
131+
uint32_t node_type_filter) const;
131132

132133
private:
133134
DependencyGraph(

0 commit comments

Comments
 (0)