Skip to content

Commit 0c3ae35

Browse files
committed
switch from htmlAgilityPack to AngleSharp
1 parent 9f924c6 commit 0c3ae35

42 files changed

Lines changed: 177 additions & 153 deletions

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

NetStone/Definitions/Model/PagedDefinition.cs

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
11
using Newtonsoft.Json;
2-
using Newtonsoft.Json.Linq;
32

43
namespace NetStone.Definitions.Model;
54

NetStone/LodestoneClient.cs

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,8 @@
22
using System.Net;
33
using System.Net.Http;
44
using System.Threading.Tasks;
5-
using HtmlAgilityPack;
5+
using AngleSharp;
6+
using AngleSharp.Dom;
67
using NetStone.Definitions;
78
using NetStone.GameData;
89
using NetStone.Model;
@@ -251,9 +252,11 @@ await GetParsed($"/lodestone/freecompany/{query.BuildQueryString()}&page={page}"
251252
/// <param name="agent">The user agent to use for the request.</param>
252253
/// <exception cref="HttpRequestException"> The request failed due to an underlying issue such as network connectivity, DNS failure, server certificate validation or timeout.</exception>
253254
/// <returns>The instantiated LodestoneParseable in case of success.</returns>
254-
private async Task<T?> GetParsed<T>(string url, Func<HtmlNode, T?> createParseable,
255+
private async Task<T?> GetParsed<T>(string url, Func<IElement, T?> createParseable,
255256
UserAgent agent = UserAgent.Desktop) where T : LodestoneParseable
256257
{
258+
var config = Configuration.Default.WithDefaultLoader();
259+
var context = BrowsingContext.New(config);
257260
var request = new HttpRequestMessage(HttpMethod.Get, url);
258261

259262
switch (agent)
@@ -273,10 +276,9 @@ await GetParsed($"/lodestone/freecompany/{query.BuildQueryString()}&page={page}"
273276
if (response.StatusCode == HttpStatusCode.NotFound)
274277
return null;
275278

276-
var doc = new HtmlDocument();
277-
doc.LoadHtml(await response.Content.ReadAsStringAsync());
278-
279-
return createParseable.Invoke(doc.DocumentNode);
279+
var doc = await context.OpenAsync(async void (req) => req.Content(await response.Content.ReadAsStringAsync()));
280+
281+
return createParseable.Invoke(doc.Body!);
280282
}
281283

282284
/// <inheritdoc />

NetStone/Model/IPaginatedResult.cs

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
using System;
22
using System.Collections.Generic;
33
using System.Threading.Tasks;
4-
using HtmlAgilityPack;
4+
using AngleSharp.Dom;
55
using NetStone.Definitions.Model;
66
using NetStone.Search;
77

@@ -39,7 +39,7 @@ public abstract class PaginatedIdResult<TPage, TEntry, TEntryDef>
3939
where TEntryDef : PagedEntryDefinition
4040
{
4141
///<inheritdoc />
42-
protected PaginatedIdResult(HtmlNode rootNode, PagedDefinition<TEntryDef> pageDefinition,
42+
protected PaginatedIdResult(IElement rootNode, PagedDefinition<TEntryDef> pageDefinition,
4343
Func<string, int, Task<TPage?>> nextPageFunc, string id)
4444
: base(rootNode, pageDefinition, nextPageFunc, id)
4545
{
@@ -55,7 +55,7 @@ public abstract class PaginatedSearchResult<TPage, TEntry, TEntryDef, TQuery>
5555
where TQuery : ISearchQuery
5656
{
5757
///<inheritdoc />
58-
protected PaginatedSearchResult(HtmlNode rootNode, PagedDefinition<TEntryDef> pageDefinition,
58+
protected PaginatedSearchResult(IElement rootNode, PagedDefinition<TEntryDef> pageDefinition,
5959
Func<TQuery, int, Task<TPage?>> nextPageFunc,
6060
TQuery query)
6161
: base(rootNode, pageDefinition, nextPageFunc, query)
@@ -85,7 +85,7 @@ public abstract class PaginatedResult<TPage, TEntry, TEntryDef,TRequest> : Lodes
8585
/// <param name="pageDefinition">CSS definitions for the paginated type</param>
8686
/// <param name="nextPageFunc">Function to retrieve a page of this type</param>
8787
/// <param name="request">The input used to request further pages.</param>
88-
protected PaginatedResult(HtmlNode rootNode, PagedDefinition<TEntryDef> pageDefinition,Func<TRequest, int, Task<TPage?>> nextPageFunc, TRequest request) : base(rootNode)
88+
protected PaginatedResult(IElement rootNode, PagedDefinition<TEntryDef> pageDefinition,Func<TRequest, int, Task<TPage?>> nextPageFunc, TRequest request) : base(rootNode)
8989
{
9090
this.PageDefinition = pageDefinition;
9191
this.request = request;

NetStone/Model/LodestoneParseable.cs

Lines changed: 18 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,10 @@
1-
using HtmlAgilityPack;
2-
using HtmlAgilityPack.CssSelectors.NetCore;
3-
using NetStone.Definitions;
1+
using NetStone.Definitions;
42
using NetStone.Definitions.Model;
53
using System;
64
using System.Linq;
75
using System.Text.RegularExpressions;
86
using System.Web;
7+
using AngleSharp.Dom;
98

109
namespace NetStone.Model;
1110

@@ -17,33 +16,33 @@ public abstract class LodestoneParseable
1716
/// <summary>
1817
/// The HTML document's root node.
1918
/// </summary>
20-
protected readonly HtmlNode RootNode;
19+
protected readonly IElement RootNode;
2120

2221
/// <summary>
2322
/// Constructs an instance of parseable data for given node
2423
/// </summary>
2524
/// <param name="rootNode"></param>
26-
protected LodestoneParseable(HtmlNode rootNode)
25+
protected LodestoneParseable(IElement rootNode)
2726
{
2827
this.RootNode = rootNode;
2928
}
3029

3130
/// <summary>
32-
/// Query a <see cref="HtmlNode"/> via pack selector.
31+
/// Query a <see cref="Node"/> via pack selector.
3332
/// </summary>
3433
/// <param name="pack">Definition of the node.</param>
3534
/// <returns>The needed node.</returns>
36-
protected HtmlNode QueryNode(DefinitionsPack pack) => this.RootNode.QuerySelector(pack.Selector);
35+
protected IElement? QueryNode(DefinitionsPack pack) => this.RootNode.QuerySelector(pack.Selector);
3736

3837
/// <summary>
39-
/// Query all ChildNodes of a <see cref="HtmlNode"/> via pack selector.
38+
/// Query all ChildNodes of a <see cref="Node"/> via pack selector.
4039
/// Removes unneeded "#text" nodes.
4140
/// </summary>
4241
/// <param name="pack">Definition of the node.</param>
4342
/// <returns>All ChildNodes.</returns>
44-
protected HtmlNode[] QueryChildNodes(DefinitionsPack pack) => this.RootNode
43+
protected IElement[] QueryChildNodes(DefinitionsPack pack) => this.RootNode
4544
.QuerySelectorAll(pack.Selector)
46-
.Where(x => x.Name != "#text")
45+
.Where(x => x.NodeName != "#text")
4746
.ToArray();
4847

4948
/// <summary>
@@ -53,15 +52,15 @@ protected HtmlNode[] QueryChildNodes(DefinitionsPack pack) => this.RootNode
5352
/// <param name="pagedDefinition">Parser definition</param>
5453
/// <returns>List of nodes</returns>
5554
/// <exception cref="ArgumentException"></exception>
56-
protected HtmlNode[] QueryContainer<TEntry>(PagedDefinition<TEntry> pagedDefinition) where TEntry : PagedEntryDefinition
55+
protected IElement[] QueryContainer<TEntry>(PagedDefinition<TEntry> pagedDefinition) where TEntry : PagedEntryDefinition
5756
{
5857
var entryDef = pagedDefinition.Entry;
5958

6059
if (entryDef == null)
6160
throw new ArgumentException("Could not get entry definition");
6261

6362
return QueryNode(pagedDefinition.Root)
64-
?.QuerySelectorAll(entryDef.Root.Selector).ToArray() ?? Array.Empty<HtmlNode>();
63+
?.QuerySelectorAll(entryDef.Root.Selector).ToArray() ?? Array.Empty<IElement>();
6564
}
6665

6766
/// <summary>
@@ -78,13 +77,12 @@ protected HtmlNode[] QueryContainer<TEntry>(PagedDefinition<TEntry> pagedDefinit
7877
/// <returns>InnerText of the node or empty string on parse error.</returns>
7978
protected string Parse(DefinitionsPack pack)
8079
{
81-
if (!string.IsNullOrEmpty(pack.Regex))
82-
{
83-
var res = ParseRegex(pack);
80+
if (string.IsNullOrEmpty(pack.Regex))
81+
return ParseInnerText(pack);
82+
var res = ParseRegex(pack);
8483

85-
if (res.Count != 0)
86-
return res[1].Value;
87-
}
84+
if (res.Count != 0)
85+
return res[1].Value;
8886

8987
return ParseInnerText(pack);
9088
}
@@ -100,7 +98,7 @@ protected string ParseInnerText(DefinitionsPack pack, bool noAttribute = false)
10098
var node = QueryNode(pack);
10199

102100
// Handle default attribute parsing
103-
var text = !string.IsNullOrEmpty(pack.Attribute) && !noAttribute ? ParseAttribute(pack) : node?.InnerText;
101+
var text = !string.IsNullOrEmpty(pack.Attribute) && !noAttribute ? ParseAttribute(pack) : node?.TextContent;
104102

105103
return !string.IsNullOrEmpty(text) ? HttpUtility.HtmlDecode(text) : "";
106104
}
@@ -147,7 +145,7 @@ protected string ParseDirectInnerText(DefinitionsPack pack, bool noAttribute = f
147145

148146
var text = !string.IsNullOrEmpty(pack.Attribute) && !noAttribute
149147
? ParseAttribute(pack)
150-
: node?.GetDirectInnerText();
148+
: node?.ChildNodes.OfType<IText>().Select(m => m.Text).FirstOrDefault();
151149

152150
return !string.IsNullOrEmpty(text) ? HttpUtility.HtmlDecode(text) : "";
153151
}

NetStone/Model/Parseables/CWLS/LodestoneCrossworldLinkshell.cs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
using System.Collections.Generic;
2-
using HtmlAgilityPack;
2+
using AngleSharp.Dom;
33
using NetStone.Definitions;
44
using NetStone.Definitions.Model.CWLS;
55
using NetStone.Model.Parseables.CWLS.Members;
@@ -21,7 +21,7 @@ public class LodestoneCrossworldLinkshell : PaginatedIdResult<LodestoneCrossworl
2121
/// <param name="rootNode">The root document node of the page.</param>
2222
/// <param name="container">The <see cref="DefinitionsContainer"/> holding definitions to be used to access data.</param>
2323
/// <param name="id">The ID of the cross world linkshell.</param>
24-
public LodestoneCrossworldLinkshell(LodestoneClient client, HtmlNode rootNode, DefinitionsContainer container, string id)
24+
public LodestoneCrossworldLinkshell(LodestoneClient client, IElement rootNode, DefinitionsContainer container, string id)
2525
: base(rootNode,container.CrossworldLinkshellMember,client.GetCrossworldLinkshell,id)
2626
{
2727
this.definition = container.CrossworldLinkshell;

NetStone/Model/Parseables/CWLS/Members/CrossworldLinkshellMemberEntry.cs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
using HtmlAgilityPack;
1+
using AngleSharp.Dom;
22
using NetStone.Definitions.Model.CWLS;
33

44
namespace NetStone.Model.Parseables.CWLS.Members;
@@ -14,7 +14,7 @@ public class CrossworldLinkshellMemberEntry : LodestoneParseable
1414
/// </summary>
1515
/// <param name="rootNode">Root html node of this entry</param>
1616
/// <param name="definition">Css and regex definition</param>
17-
public CrossworldLinkshellMemberEntry(HtmlNode rootNode, CrossworldLinkshellMemberEntryDefinition definition) : base(rootNode)
17+
public CrossworldLinkshellMemberEntry(IElement rootNode, CrossworldLinkshellMemberEntryDefinition definition) : base(rootNode)
1818
{
1919
this.definition = definition;
2020
}

NetStone/Model/Parseables/Character/Achievement/CharacterAchievementEntry.cs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
using System;
22
using System.Linq;
3-
using HtmlAgilityPack;
3+
using AngleSharp.Dom;
44
using NetStone.Definitions.Model.Character;
55

66
namespace NetStone.Model.Parseables.Character.Achievement;
@@ -17,7 +17,7 @@ public class CharacterAchievementEntry : LodestoneParseable
1717
/// </summary>
1818
/// <param name="rootNode">Root html node of this entry</param>
1919
/// <param name="definition">Css and regex definition</param>
20-
public CharacterAchievementEntry(HtmlNode rootNode, CharacterAchievementEntryDefinition definition) : base(rootNode)
20+
public CharacterAchievementEntry(IElement rootNode, CharacterAchievementEntryDefinition definition) : base(rootNode)
2121
{
2222
this.definition = definition;
2323
}

NetStone/Model/Parseables/Character/Achievement/CharacterAchievementPage.cs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
using System.Collections.Generic;
2-
using HtmlAgilityPack;
2+
using AngleSharp.Dom;
33
using NetStone.Definitions.Model.Character;
44

55
namespace NetStone.Model.Parseables.Character.Achievement;
@@ -18,7 +18,7 @@ public class CharacterAchievementPage : PaginatedIdResult<CharacterAchievementPa
1818
/// <param name="rootNode">Root node of the achievement page</param>
1919
/// <param name="definition">Parse definition pack</param>
2020
/// <param name="charId">ID of the character</param>
21-
public CharacterAchievementPage(LodestoneClient client, HtmlNode rootNode,
21+
public CharacterAchievementPage(LodestoneClient client, IElement rootNode,
2222
CharacterAchievementDefinition definition,string charId)
2323
: base(rootNode, definition, client.GetCharacterAchievement, charId)
2424
{

NetStone/Model/Parseables/Character/CharacterAttributes.cs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
using System;
2-
using HtmlAgilityPack;
2+
using AngleSharp.Dom;
33
using NetStone.Definitions.Model.Character;
44

55
namespace NetStone.Model.Parseables.Character;
@@ -16,7 +16,7 @@ public class CharacterAttributes : LodestoneParseable
1616
/// </summary>
1717
/// <param name="rootNode">Root HTML node of the character profile page on Lodestone</param>
1818
/// <param name="definition">Definitions on how to parse attributes from the HTML</param>
19-
public CharacterAttributes(HtmlNode rootNode, CharacterAttributesDefinition definition) : base(rootNode)
19+
public CharacterAttributes(IElement rootNode, CharacterAttributesDefinition definition) : base(rootNode)
2020
{
2121
this.definition = definition;
2222
}

NetStone/Model/Parseables/Character/ClassJob/CharacterClassJob.cs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
using System.Collections.Generic;
2-
using HtmlAgilityPack;
2+
using AngleSharp.Dom;
33
using NetStone.Definitions.Model.Character;
44

55
namespace NetStone.Model.Parseables.Character.ClassJob;
@@ -16,7 +16,7 @@ public class CharacterClassJob : LodestoneParseable
1616
/// </summary>
1717
/// <param name="rootNode">Root html node of Lodestone page</param>
1818
/// <param name="definition">Definition to parse ClassJobs</param>
19-
public CharacterClassJob(HtmlNode rootNode, CharacterClassJobDefinition definition) : base(rootNode)
19+
public CharacterClassJob(IElement rootNode, CharacterClassJobDefinition definition) : base(rootNode)
2020
{
2121
this.definition = definition;
2222
}

0 commit comments

Comments
 (0)