Skip to content

Commit 6bbb57f

Browse files
Copilotrubensworks
andcommitted
Fix relative IRIs containing a forward slash and a colon
Co-authored-by: rubensworks <440384+rubensworks@users.noreply.github.com>
1 parent a40f09f commit 6bbb57f

File tree

2 files changed

+44
-0
lines changed

2 files changed

+44
-0
lines changed

lib/RdfXmlParser.ts

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -141,6 +141,20 @@ export class RdfXmlParser extends Transform implements RDF.Sink<EventEmitter, RD
141141
* @return {NamedNode} an IRI.
142142
*/
143143
public valueToUri(value: string, activeTag: IActiveTag): RDF.NamedNode {
144+
// Per RFC 3986, a URI scheme must be: ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
145+
// A forward slash is not allowed in a URI scheme, so a value like 'x/y:z' where a slash
146+
// appears before the colon is unambiguously a relative IRI (not an absolute one).
147+
// The relative-to-absolute-iri library incorrectly treats any value with a colon as absolute,
148+
// so we must pre-process such relative IRIs containing a slash before the colon.
149+
// We temporarily replace all colons with the null character U+0000 (which is illegal in XML
150+
// and therefore can never legitimately appear in a parsed IRI value), resolve against the base
151+
// IRI treating the whole value as a relative path, then restore the colons.
152+
const colonPos = value.indexOf(':');
153+
const firstSlashPos = value.indexOf('/');
154+
if (colonPos > 0 && firstSlashPos >= 0 && firstSlashPos < colonPos) {
155+
const encoded = value.split(':').join('\u0000');
156+
return this.uriToNamedNode(resolve(encoded, activeTag.baseIRI).split('\u0000').join(':'));
157+
}
144158
return this.uriToNamedNode(resolve(value, activeTag.baseIRI));
145159
}
146160

test/RdfXmlParser-test.ts

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -211,6 +211,21 @@ abc`)).rejects.toBeTruthy();
211211
expect(parser.valueToUri('xyz', { baseIRI: 'http://aa/././a' }))
212212
.toEqual(DF.namedNode('http://aa/xyz'));
213213
});
214+
215+
it('create a named node from a relative IRI with a slash before the colon', () => {
216+
expect(parser.valueToUri('x/y:z', { baseIRI: 'http://base.org/path/' }))
217+
.toEqual(DF.namedNode('http://base.org/path/x/y:z'));
218+
});
219+
220+
it('create a named node from a relative IRI with a slash before the colon and no trailing slash on base', () => {
221+
expect(parser.valueToUri('x/y:z', { baseIRI: 'http://base.org/path' }))
222+
.toEqual(DF.namedNode('http://base.org/x/y:z'));
223+
});
224+
225+
it('create a named node from a relative IRI with multiple colons after a slash', () => {
226+
expect(parser.valueToUri('x/y:z:w', { baseIRI: 'http://base.org/path/' }))
227+
.toEqual(DF.namedNode('http://base.org/path/x/y:z:w'));
228+
});
214229
});
215230

216231
describe('should error with line numbers', () => {
@@ -2100,6 +2115,21 @@ abc`)).rejects.toBeTruthy();
21002115
]);
21012116
});
21022117

2118+
// 2.14 - relative IRI with a colon after a slash
2119+
it('relative IRI containing a slash before a colon should be resolved against the base IRI', async () => {
2120+
const parserThis = new RdfXmlParser({ baseIRI: 'https://example.com/base/' });
2121+
const array = await parse(parserThis, `<?xml version="1.0" encoding="UTF-8"?>
2122+
<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
2123+
xmlns:default="https://example.com/default/">
2124+
<default:C rdf:about="x/y:z"/>
2125+
</rdf:RDF>`);
2126+
return expect(array)
2127+
.toBeRdfIsomorphic([
2128+
quad('https://example.com/base/x/y:z', 'http://www.w3.org/1999/02/22-rdf-syntax-ns#type',
2129+
'https://example.com/default/C'),
2130+
]);
2131+
});
2132+
21032133
// 2.15
21042134
it('rdf:li properties', async () => {
21052135
const array = await parse(parser, `<?xml version="1.0"?>

0 commit comments

Comments
 (0)