1- using HtmlAgilityPack ;
2- using HtmlAgilityPack . CssSelectors . NetCore ;
3- using NetStone . Definitions ;
1+ using NetStone . Definitions ;
42using NetStone . Definitions . Model ;
53using System ;
64using System . Linq ;
75using System . Text . RegularExpressions ;
86using System . Web ;
7+ using AngleSharp . Dom ;
98
109namespace NetStone . Model ;
1110
@@ -17,33 +16,33 @@ public abstract class LodestoneParseable
1716 /// <summary>
1817 /// The HTML document's root node.
1918 /// </summary>
20- protected readonly HtmlNode RootNode ;
19+ protected readonly IElement RootNode ;
2120
2221 /// <summary>
2322 /// Constructs an instance of parseable data for given node
2423 /// </summary>
2524 /// <param name="rootNode"></param>
26- protected LodestoneParseable ( HtmlNode rootNode )
25+ protected LodestoneParseable ( IElement rootNode )
2726 {
2827 this . RootNode = rootNode ;
2928 }
3029
3130 /// <summary>
32- /// Query a <see cref="HtmlNode "/> via pack selector.
31+ /// Query a <see cref="Node "/> via pack selector.
3332 /// </summary>
3433 /// <param name="pack">Definition of the node.</param>
3534 /// <returns>The needed node.</returns>
36- protected HtmlNode QueryNode ( DefinitionsPack pack ) => this . RootNode . QuerySelector ( pack . Selector ) ;
35+ protected IElement ? QueryNode ( DefinitionsPack pack ) => this . RootNode . QuerySelector ( pack . Selector ) ;
3736
3837 /// <summary>
39- /// Query all ChildNodes of a <see cref="HtmlNode "/> via pack selector.
38+ /// Query all ChildNodes of a <see cref="Node "/> via pack selector.
4039 /// Removes unneeded "#text" nodes.
4140 /// </summary>
4241 /// <param name="pack">Definition of the node.</param>
4342 /// <returns>All ChildNodes.</returns>
44- protected HtmlNode [ ] QueryChildNodes ( DefinitionsPack pack ) => this . RootNode
43+ protected IElement [ ] QueryChildNodes ( DefinitionsPack pack ) => this . RootNode
4544 . QuerySelectorAll ( pack . Selector )
46- . Where ( x => x . Name != "#text" )
45+ . Where ( x => x . NodeName != "#text" )
4746 . ToArray ( ) ;
4847
4948 /// <summary>
@@ -53,15 +52,15 @@ protected HtmlNode[] QueryChildNodes(DefinitionsPack pack) => this.RootNode
5352 /// <param name="pagedDefinition">Parser definition</param>
5453 /// <returns>List of nodes</returns>
5554 /// <exception cref="ArgumentException"></exception>
56- protected HtmlNode [ ] QueryContainer < TEntry > ( PagedDefinition < TEntry > pagedDefinition ) where TEntry : PagedEntryDefinition
55+ protected IElement [ ] QueryContainer < TEntry > ( PagedDefinition < TEntry > pagedDefinition ) where TEntry : PagedEntryDefinition
5756 {
5857 var entryDef = pagedDefinition . Entry ;
5958
6059 if ( entryDef == null )
6160 throw new ArgumentException ( "Could not get entry definition" ) ;
6261
6362 return QueryNode ( pagedDefinition . Root )
64- ? . QuerySelectorAll ( entryDef . Root . Selector ) . ToArray ( ) ?? Array . Empty < HtmlNode > ( ) ;
63+ ? . QuerySelectorAll ( entryDef . Root . Selector ) . ToArray ( ) ?? Array . Empty < IElement > ( ) ;
6564 }
6665
6766 /// <summary>
@@ -78,13 +77,12 @@ protected HtmlNode[] QueryContainer<TEntry>(PagedDefinition<TEntry> pagedDefinit
7877 /// <returns>InnerText of the node or empty string on parse error.</returns>
7978 protected string Parse ( DefinitionsPack pack )
8079 {
81- if ( ! string . IsNullOrEmpty ( pack . Regex ) )
82- {
83- var res = ParseRegex ( pack ) ;
80+ if ( string . IsNullOrEmpty ( pack . Regex ) )
81+ return ParseInnerText ( pack ) ;
82+ var res = ParseRegex ( pack ) ;
8483
85- if ( res . Count != 0 )
86- return res [ 1 ] . Value ;
87- }
84+ if ( res . Count != 0 )
85+ return res [ 1 ] . Value ;
8886
8987 return ParseInnerText ( pack ) ;
9088 }
@@ -100,7 +98,7 @@ protected string ParseInnerText(DefinitionsPack pack, bool noAttribute = false)
10098 var node = QueryNode ( pack ) ;
10199
102100 // Handle default attribute parsing
103- var text = ! string . IsNullOrEmpty ( pack . Attribute ) && ! noAttribute ? ParseAttribute ( pack ) : node ? . InnerText ;
101+ var text = ! string . IsNullOrEmpty ( pack . Attribute ) && ! noAttribute ? ParseAttribute ( pack ) : node ? . TextContent ;
104102
105103 return ! string . IsNullOrEmpty ( text ) ? HttpUtility . HtmlDecode ( text ) : "" ;
106104 }
@@ -147,7 +145,7 @@ protected string ParseDirectInnerText(DefinitionsPack pack, bool noAttribute = f
147145
148146 var text = ! string . IsNullOrEmpty ( pack . Attribute ) && ! noAttribute
149147 ? ParseAttribute ( pack )
150- : node ? . GetDirectInnerText ( ) ;
148+ : node ? . ChildNodes . OfType < IText > ( ) . Select ( m => m . Text ) . FirstOrDefault ( ) ;
151149
152150 return ! string . IsNullOrEmpty ( text ) ? HttpUtility . HtmlDecode ( text ) : "" ;
153151 }
0 commit comments