Skip to content

Commit ca78308

Browse files
theanalystesindril
authored andcommitted
COMMON: StringUtils: introduce a XrdOucString equivalent replace_all
This function would replace all src patterns within the string with a target. When the target replacement is larger than source the output string is likely going to be a larger string, in this case we do an optimisation pass to determine the total size we'd grow to and reserve the string. This allows for string::replace function calls to never realloc while maintaining the similar memmove/memcpy patterns that'd be eventually called with std::string::replace functions Signed-off-by: Abhishek Lekshmanan <abhishek.lekshmanan@cern.ch>
1 parent b4ece24 commit ca78308

2 files changed

Lines changed: 311 additions & 2 deletions

File tree

common/StringUtils.hh

Lines changed: 96 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@
3030
#include <string>
3131
#include <charconv>
3232
#include <cstdint>
33+
#include <cstddef>
3334
#include <string_view>
3435

3536
#include "common/Namespace.hh"
@@ -259,6 +260,100 @@ auto StringToNumeric(const StrT& key, NumT& value,
259260
return true;
260261
}
261262

262-
#endif
263+
#endif // __cpp_lib_to_chars
264+
265+
266+
// an XrdOucString inspired replace function that replaces
267+
// all occurences of s1 with s2 for a given str, in place
268+
// The Original function returned the signed size of total
269+
// length modification, however this doesn't really tell
270+
// us if any replacement happened since s1.size() == s2.size()
271+
// would mean the str size remains the same regardless of replacement
272+
// so we don't really return the difference.
273+
static inline void replace_all(std::string& str,
274+
std::string_view s1, std::string_view s2,
275+
size_t from=0, size_t to=std::string::npos)
276+
{
277+
const size_t orig_str_size = str.size();
278+
if (str.empty() || s1.empty() || from >= orig_str_size || from > to) {
279+
return;
280+
}
281+
282+
to = std::min(to, str.size() - 1);
283+
284+
// Run 2 passes of the replace function, the first pass
285+
// determines the total delta, this allows to calculate
286+
// upfront our target size and hence reduce regrowing the
287+
// string at every interval
288+
const size_t l1 = s1.size();
289+
const size_t l2 = s2.size();
290+
291+
if (l1 > to - from + 1) {
292+
return;
293+
}
294+
295+
// Check if the orig string will need to be expanded, this occurs
296+
// when the replacement target is bigger. We'd need to reallocate
297+
// do this once!
298+
if (l2 > l1) {
299+
size_t match_count {0};
300+
size_t pos {from};
301+
size_t end_pos {to - l1 + 1};
302+
size_t delta_per {l2 - l1};
303+
while (pos <= end_pos) {
304+
pos = str.find(s1, pos);
305+
if (pos == std::string::npos || pos > end_pos) {
306+
break;
307+
}
308+
++match_count;
309+
pos += l1;
310+
}
311+
312+
if (match_count == 0) {
313+
return;
314+
}
315+
316+
str.reserve(orig_str_size + (match_count * delta_per));
317+
}
318+
319+
320+
size_t end_pos {to - l1 + 1};
321+
size_t curr_pos {from};
322+
const std::ptrdiff_t delta_per = static_cast<std::ptrdiff_t>(l2) - static_cast<std::ptrdiff_t>(l1);
323+
while (curr_pos != std::string::npos) {
324+
size_t match_pos = str.find(s1, curr_pos);
325+
326+
if (match_pos == std::string::npos || match_pos > end_pos) {
327+
return;
328+
}
329+
330+
str.replace(match_pos, l1, s2);
331+
332+
if (delta_per != 0) {
333+
if (delta_per > 0) {
334+
to += static_cast<size_t>(delta_per);
335+
} else {
336+
const size_t abs_diff = static_cast<size_t>(-delta_per);
337+
if (abs_diff <= to) {
338+
to -= abs_diff;
339+
} else {
340+
// Clamp to 0 if the contraction exceeds the 'to' index
341+
to = 0;
342+
}
343+
}
344+
345+
if (to < l1) {
346+
break;
347+
}
348+
349+
end_pos = to - l1 + 1;
350+
351+
}
352+
curr_pos = match_pos + l2;
353+
}
354+
355+
}
356+
357+
263358

264359
EOSCOMMONNAMESPACE_END

unit_tests/common/StringUtilsTests.cc

Lines changed: 215 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -120,4 +120,218 @@ TEST(StringUtils, StringToNumericErrorMessage)
120120
ASSERT_TRUE(!err_msg.empty());
121121
EXPECT_EQ(err_msg,
122122
"\"msg=Failed Numeric conversion\" key=128 error_msg=Numerical result out of range");
123-
}
123+
}
124+
125+
using eos::common::replace_all;
126+
127+
TEST(StringUtils, ReplaceAllEmpty) {
128+
std::string str = "the quick brown fox jumps over the lazy dog";
129+
std::string orig_str {str};
130+
131+
replace_all(orig_str, "","");
132+
EXPECT_EQ(orig_str, str);
133+
134+
replace_all(orig_str, "", "foo");
135+
EXPECT_EQ(orig_str, str);
136+
137+
replace_all(orig_str, "fox", "charlie", 1000,2000);
138+
EXPECT_EQ(orig_str, str);
139+
140+
replace_all(orig_str, "fox", "charlie", 10,5);
141+
EXPECT_EQ(orig_str, str);
142+
143+
}
144+
145+
TEST(StringUtils, ReplaceAllBasic) {
146+
std::string str = "the quick brown fox jumps over the lazy dog";
147+
148+
replace_all(str, "fox", "charlie");
149+
EXPECT_EQ(str, "the quick brown charlie jumps over the lazy dog");
150+
151+
str = "the quick brown fox jumps over the lazy dog";
152+
replace_all(str, "the", "a");
153+
EXPECT_EQ(str, "a quick brown fox jumps over a lazy dog");
154+
155+
str = "the quick brown fox jumps over the lazy dog";
156+
replace_all(str, "o", "O");
157+
EXPECT_EQ(str, "the quick brOwn fOx jumps Over the lazy dOg");
158+
159+
str = "the quick brown fox jumps over the lazy dog";
160+
replace_all(str, " ", "_");
161+
EXPECT_EQ(str, "the_quick_brown_fox_jumps_over_the_lazy_dog");
162+
163+
str = "the quick brown fox jumps over the lazy dog";
164+
replace_all(str, "the", "a", 10, 43);
165+
EXPECT_EQ(str, "the quick brown fox jumps over a lazy dog");
166+
167+
str = "the quick brown fox jumps over the lazy dog";
168+
replace_all(str, "o", "O", 12, 25);
169+
EXPECT_EQ(str, "the quick brOwn fOx jumps over the lazy dog");
170+
}
171+
172+
TEST(StringUtils, ReplaceAllReduce) {
173+
std::string str = "aaaaaa";
174+
replace_all(str, "aa", "b");
175+
EXPECT_EQ(str, "bbb");
176+
177+
str = "aaaaa";
178+
replace_all(str, "aa","b");
179+
EXPECT_EQ(str, "bba");
180+
}
181+
182+
TEST(StringUtils, ReplaceAllEmptyIn) {
183+
std::string str = "hello world";
184+
replace_all(str, "hello","");
185+
EXPECT_EQ(str, " world");
186+
}
187+
188+
// Test with 'from' parameter
189+
TEST(StringUtils, ReplaceAllFromParameter) {
190+
std::string str = "abc def abc ghi abc";
191+
replace_all(str, "abc", "xyz", 4); // Start from position 4
192+
EXPECT_EQ(str, "abc def xyz ghi xyz");
193+
}
194+
195+
// Test with 'to' parameter
196+
TEST(StringUtils, ReplaceAllToParameter) {
197+
std::string str = "abc def abc ghi abc";
198+
replace_all(str, "abc", "xyz", 0, 10); // Only replace up to position 10
199+
EXPECT_EQ(str, "xyz def xyz ghi abc");
200+
}
201+
202+
// Test with both 'from' and 'to' parameters
203+
TEST(StringUtils, ReplaceAllFromAndToParameters) {
204+
std::string str = "abc def abc ghi abc jkl abc";
205+
replace_all(str, "abc", "xyz", 4, 15); // Replace from pos 4 to pos 15
206+
EXPECT_EQ(str, "abc def xyz ghi abc jkl abc");
207+
}
208+
209+
// Test invalid range parameters
210+
TEST(StringUtils, ReplaceAllInvalidRangeParameters) {
211+
std::string str = "hello world hello";
212+
std::string original = str;
213+
214+
// from >= string size
215+
replace_all(str, "hello", "hi", 20);
216+
EXPECT_EQ(str, original);
217+
218+
// from > to
219+
std::string str2 = "hello world hello";
220+
replace_all(str2, "hello", "hi", 10, 5);
221+
EXPECT_EQ(str2, original);
222+
}
223+
224+
// Test overlapping patterns
225+
TEST(StringUtils, ReplaceAllOverlappingPatterns) {
226+
std::string str = "aaaa";
227+
replace_all(str, "aa", "b");
228+
// Should replace non-overlapping occurrences: "aaaa" -> "bb"
229+
EXPECT_EQ(str, "bb");
230+
}
231+
232+
// Test single character replacement
233+
TEST(StringUtils, ReplaceAllSingleCharacterReplacement) {
234+
std::string str = "a b a c a";
235+
replace_all(str, "a", "x");
236+
EXPECT_EQ(str, "x b x c x");
237+
}
238+
239+
// Test replacement at string boundaries
240+
TEST(StringUtils, ReplaceAllBoundaryReplacement) {
241+
// At beginning
242+
std::string str1 = "hello world";
243+
replace_all(str1, "hello", "hi");
244+
EXPECT_EQ(str1, "hi world");
245+
246+
// At end
247+
std::string str2 = "world hello";
248+
replace_all(str2, "hello", "hi");
249+
EXPECT_EQ(str2, "world hi");
250+
251+
// Entire string
252+
std::string str3 = "hello";
253+
replace_all(str3, "hello", "hi");
254+
EXPECT_EQ(str3, "hi");
255+
}
256+
257+
// Test with special characters
258+
TEST(StringUtils, ReplaceAllSpecialCharacters) {
259+
std::string str = "a\nb\ta\nc";
260+
replace_all(str, "a", "x");
261+
EXPECT_EQ(str, "x\nb\tx\nc");
262+
}
263+
264+
// Test case sensitivity
265+
TEST(StringUtils, ReplaceAllCaseSensitivity) {
266+
std::string str = "Hello world HELLO";
267+
replace_all(str, "Hello", "Hi");
268+
EXPECT_EQ(str, "Hi world HELLO"); // Should not replace "HELLO"
269+
}
270+
271+
// Test with long strings and patterns
272+
TEST(StringUtils, ReplaceAllLongStringsAndPatterns) {
273+
std::string str = "this is a long pattern and this is another long pattern";
274+
replace_all(str, "long pattern", "short");
275+
EXPECT_EQ(str, "this is a short and this is another short");
276+
}
277+
278+
// Test performance with many replacements
279+
TEST(StringUtils, ReplaceAllManyReplacements) {
280+
std::string str;
281+
for (int i = 0; i < 1000; ++i) {
282+
str += "a ";
283+
}
284+
replace_all(str, "a", "bb");
285+
286+
// Verify the result
287+
std::string expected;
288+
for (int i = 0; i < 1000; ++i) {
289+
expected += "bb ";
290+
}
291+
EXPECT_EQ(str, expected);
292+
}
293+
294+
// Test with substring that appears within the replacement
295+
TEST(StringUtils, ReplaceAllSubstringInReplacement) {
296+
std::string str = "abc abc abc";
297+
replace_all(str, "abc", "abcdef");
298+
EXPECT_EQ(str, "abcdef abcdef abcdef");
299+
}
300+
301+
// Test edge case where search pattern is larger than available range
302+
TEST(StringUtils, ReplaceAllSearchPatternLargerThanRange) {
303+
std::string str = "hello world";
304+
std::string original = str;
305+
replace_all(str, "hello world!", "hi", 0, 5); // Pattern longer than range
306+
EXPECT_EQ(str, original); // Should not change
307+
}
308+
309+
// Test replacement with exact range boundaries
310+
TEST(StringUtils, ReplaceAllExactRangeBoundaries) {
311+
std::string str = "abcdefabc";
312+
replace_all(str, "abc", "x", 0, 2); // Range exactly matches first "abc"
313+
EXPECT_EQ(str, "xdefabc");
314+
}
315+
316+
// Test string_view parameters
317+
TEST(StringUtils, ReplaceAllStringViewParameters) {
318+
std::string str = "hello world hello";
319+
std::string_view search = "hello";
320+
std::string_view replacement = "hi";
321+
replace_all(str, search, replacement);
322+
EXPECT_EQ(str, "hi world hi");
323+
}
324+
325+
// Test with to parameter as string::npos (default)
326+
TEST(StringUtils, ReplaceAllDefaultToParameter) {
327+
std::string str = "abc def abc ghi abc";
328+
replace_all(str, "abc", "xyz", 4, std::string::npos);
329+
EXPECT_EQ(str, "abc def xyz ghi xyz");
330+
}
331+
332+
// Test contraction that would make 'to' go below search pattern length
333+
TEST(StringUtils, ReplaceAllContractionBelowPatternLength) {
334+
std::string str = "abcabcabc";
335+
replace_all(str, "abc", "x", 0, 8); // This should handle the contraction properly
336+
EXPECT_EQ(str, "xxx");
337+
}

0 commit comments

Comments
 (0)