Skip to content

Commit facb225

Browse files
tavisitccaffy
authored andcommitted
MGM: Added a new Tokenize function for quoted values. Fixes EOS-5783
1 parent 30edf48 commit facb225

4 files changed

Lines changed: 106 additions & 5 deletions

File tree

common/StringConversion.cc

Lines changed: 48 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -78,6 +78,53 @@ StringConversion::Tokenize(const std::string& str,
7878
}
7979
}
8080

81+
//------------------------------------------------------------------------------
82+
// Tokenize a string respecting quoted strings and escapes
83+
//------------------------------------------------------------------------------
84+
void
85+
StringConversion::TokenizeQuoted(const std::string& str, std::vector<std::string>& tokens,
86+
const std::string& delimiters)
87+
{
88+
if (str.empty()) {
89+
return;
90+
}
91+
92+
size_t pos = 0;
93+
const size_t len = str.length();
94+
bool in_quotes = false;
95+
bool escaped = false;
96+
std::string current_token;
97+
98+
while (pos < len) {
99+
char ch = str[pos++];
100+
101+
if (escaped) {
102+
// Previous char was '\', add this char literally (e.g., \" becomes ")
103+
current_token += ch;
104+
escaped = false;
105+
} else if (ch == '\\') {
106+
// escape next character
107+
escaped = true;
108+
} else if (ch == '"') {
109+
// Toggle quote state
110+
in_quotes = !in_quotes;
111+
} else if (!in_quotes && delimiters.find(ch) != std::string::npos) {
112+
// Found delimiter outside quotes - end current token
113+
if (!current_token.empty()) {
114+
tokens.push_back(std::move(current_token));
115+
current_token.clear();
116+
}
117+
} else {
118+
// Regular character or delimiter inside quotes
119+
current_token += ch;
120+
}
121+
}
122+
123+
// Add final token if any
124+
if (!current_token.empty()) {
125+
tokens.push_back(std::move(current_token));
126+
}
127+
}
81128

82129
//------------------------------------------------------------------------------
83130
// Tokenize a string seperated by one single charactor or multichar string
@@ -614,7 +661,7 @@ StringConversion::GetKeyValueMap(const char* mapstring,
614661
std::string is = mapstring;
615662
std::string delimiter = sdelimiter;
616663
std::vector<std::string> slist;
617-
Tokenize(is, slist, delimiter);
664+
TokenizeQuoted(is, slist, delimiter);
618665

619666
if (!slist.size()) {
620667
return false;

common/StringConversion.hh

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,13 @@ public:
8282
std::vector<std::string>& tokens,
8383
const std::string& delimiters = " ");
8484

85+
/**
86+
* Tokenize a string respecting quoted strings
87+
* Quotes and escape sequences are handled properly
88+
*/
89+
static void TokenizeQuoted(const std::string& str, std::vector<std::string>& tokens,
90+
const std::string& delimiters);
91+
8592
// ---------------------------------------------------------------------------
8693
/**
8794
* Tokenize a string

mgm/proc/IProcCommand.cc

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -357,10 +357,6 @@ IProcCommand::ConvertOutputToJsonFormat(const std::string& stdOut)
357357
for (int i = 1; i < (int)token.size(); i++) {
358358
jep = &((*jep)[token[i]]);
359359
}
360-
361-
// Unquote value
362-
std::stringstream quoted_ss(value);
363-
quoted_ss >> std::quoted(value);
364360
// Seal value
365361
XrdOucString svalue = value.c_str();
366362
eos::common::StringConversion::Seal(svalue);

unit_tests/common/StringConversionTests.cc

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -196,4 +196,55 @@ TEST(StringConversion, ReplaceStringInPlace)
196196
ASSERT_STREQ("aabbccddxyeeffggxyhhiijjxy", input.c_str());
197197
}
198198

199+
TEST(StringConversion, TokenizeQuoted)
200+
{
201+
std::vector<std::string> tokens;
202+
203+
StringConversion::TokenizeQuoted("", tokens, " ");
204+
ASSERT_EQ(tokens.size(), 0u);
205+
tokens.clear();
206+
207+
StringConversion::TokenizeQuoted("hello world test", tokens, " ");
208+
ASSERT_EQ(tokens.size(), 3u);
209+
ASSERT_STREQ(tokens[0].c_str(), "hello");
210+
ASSERT_STREQ(tokens[1].c_str(), "world");
211+
ASSERT_STREQ(tokens[2].c_str(), "test");
212+
tokens.clear();
213+
214+
StringConversion::TokenizeQuoted("user.reva.overleaf.name=\"my first project\"", tokens,
215+
" ");
216+
ASSERT_EQ(tokens.size(), 1u);
217+
ASSERT_STREQ(tokens[0].c_str(), "user.reva.overleaf.name=my first project");
218+
219+
std::vector<std::string> kv;
220+
StringConversion::TokenizeQuoted(tokens[0], kv, "=");
221+
ASSERT_EQ(kv.size(), 2u);
222+
ASSERT_STREQ(kv[0].c_str(), "user.reva.overleaf.name");
223+
ASSERT_STREQ(kv[1].c_str(), "my first project");
224+
tokens.clear();
225+
kv.clear();
226+
227+
std::string attr_line = "sys.acl=\"u:12345:rwxm+dq\" sys.versioning=\"10\" "
228+
"user.reva.overleaf.name=\"my first project\"";
229+
StringConversion::TokenizeQuoted(attr_line, tokens, " ");
230+
ASSERT_EQ(tokens.size(), 3u);
231+
ASSERT_STREQ(tokens[0].c_str(), "sys.acl=u:12345:rwxm+dq");
232+
ASSERT_STREQ(tokens[1].c_str(), "sys.versioning=10");
233+
ASSERT_STREQ(tokens[2].c_str(), "user.reva.overleaf.name=my first project");
234+
tokens.clear();
235+
236+
StringConversion::TokenizeQuoted("key=\"value with \\\"quoted\\\" text\"", tokens,
237+
" =");
238+
ASSERT_EQ(tokens.size(), 2u);
239+
ASSERT_STREQ(tokens[0].c_str(), "key");
240+
ASSERT_STREQ(tokens[1].c_str(), "value with \"quoted\" text");
241+
tokens.clear();
242+
243+
StringConversion::TokenizeQuoted("hello world", tokens, " ");
244+
ASSERT_EQ(tokens.size(), 2u);
245+
ASSERT_STREQ(tokens[0].c_str(), "hello");
246+
ASSERT_STREQ(tokens[1].c_str(), "world");
247+
tokens.clear();
248+
}
249+
199250
EOSCOMMONTESTING_END

0 commit comments

Comments
 (0)