From 0118018aaf4c1d43cb072bee9dca9a06c3a6dfed Mon Sep 17 00:00:00 2001
From: CarloToso <105941898+CarloToso@users.noreply.github.com>
Date: Fri, 17 Mar 2023 10:14:52 +0100
Subject: [PATCH 1/6] regex
---
.../BasicHtmlWebResponseObject.Common.cs | 47 +++++--------------
1 file changed, 13 insertions(+), 34 deletions(-)
diff --git a/src/Microsoft.PowerShell.Commands.Utility/commands/utility/WebCmdlet/Common/BasicHtmlWebResponseObject.Common.cs b/src/Microsoft.PowerShell.Commands.Utility/commands/utility/WebCmdlet/Common/BasicHtmlWebResponseObject.Common.cs
index 8e8c099c661..fa142b57468 100644
--- a/src/Microsoft.PowerShell.Commands.Utility/commands/utility/WebCmdlet/Common/BasicHtmlWebResponseObject.Common.cs
+++ b/src/Microsoft.PowerShell.Commands.Utility/commands/utility/WebCmdlet/Common/BasicHtmlWebResponseObject.Common.cs
@@ -18,12 +18,19 @@ public class BasicHtmlWebResponseObject : WebResponseObject
{
#region Private Fields
- private static Regex s_attribNameValueRegex;
- private static Regex s_attribsRegex;
- private static Regex s_imageRegex;
- private static Regex s_inputFieldRegex;
- private static Regex s_linkRegex;
- private static Regex s_tagRegex;
+ private static readonly Regex s_attribsRegex = CreateAttribsRegex();
+ private static readonly Regex s_attribNameValueRegex = CreateAttribNameValueRegex();
+ private static readonly Regex s_imageRegex = CreateImageRegex();
+ private static readonly Regex s_inputFieldRegex = CreateInputFieldRegex();
+ private static readonly Regex s_linkRegex = CreateLinkRegex();
+ private static readonly Regex s_tagRegex = CreateTagRegex();
+
+ private static Regex CreateAttribsRegex() => new Regex(@"(?<=\s+)([^""'>/=\s\p{Cc}]+(\s*=\s*(?:"".*?""|'.*?'|[^'"">\s]+))?)", RegexOptions.Singleline | RegexOptions.IgnoreCase | RegexOptions.Compiled);
+ private static Regex CreateAttribNameValueRegex() => new Regex(@"([^""'>/=\s\p{Cc}]+)(?:\s*=\s*(?:""(.*?)""|'(.*?)'|([^'"">\s]+)))?", RegexOptions.Singleline | RegexOptions.IgnoreCase | RegexOptions.Compiled);
+ private static Regex CreateImageRegex() => new Regex(@"
]*?>", RegexOptions.Singleline | RegexOptions.IgnoreCase | RegexOptions.Compiled);
+ private static Regex CreateInputFieldRegex() => new Regex(@"]*(/?>|>.*?)", RegexOptions.Singleline | RegexOptions.IgnoreCase | RegexOptions.Compiled);
+ private static Regex CreateLinkRegex() => new Regex(@"]*(/>|>.*?)", RegexOptions.Singleline | RegexOptions.IgnoreCase | RegexOptions.Compiled);
+ private static Regex CreateTagRegex() => new Regex(@"<\w+((\s+[^""'>/=\s\p{Cc}]+(\s*=\s*(?:"".*?""|'.*?'|[^'"">\s]+))?)+\s*|\s*)/?>", RegexOptions.Singleline | RegexOptions.IgnoreCase | RegexOptions.Compiled);
#endregion Private Fields
@@ -43,7 +50,6 @@ public BasicHtmlWebResponseObject(HttpResponseMessage response) : this(response,
///
public BasicHtmlWebResponseObject(HttpResponseMessage response, Stream contentStream) : base(response, contentStream)
{
- EnsureHtmlParser();
InitializeContent();
InitializeRawContent(response);
}
@@ -82,8 +88,6 @@ public WebCmdletElementCollection InputFields
{
if (_inputFields == null)
{
- EnsureHtmlParser();
-
List parsedFields = new();
MatchCollection fieldMatch = s_inputFieldRegex.Matches(Content);
foreach (Match field in fieldMatch)
@@ -109,8 +113,6 @@ public WebCmdletElementCollection Links
{
if (_links == null)
{
- EnsureHtmlParser();
-
List parsedLinks = new();
MatchCollection linkMatch = s_linkRegex.Matches(Content);
foreach (Match link in linkMatch)
@@ -136,8 +138,6 @@ public WebCmdletElementCollection Images
{
if (_images == null)
{
- EnsureHtmlParser();
-
List parsedImages = new();
MatchCollection imageMatch = s_imageRegex.Matches(Content);
foreach (Match image in imageMatch)
@@ -188,27 +188,6 @@ private static PSObject CreateHtmlObject(string html, string tagName)
return elementObject;
}
- private static void EnsureHtmlParser()
- {
- s_tagRegex ??= new Regex(@"<\w+((\s+[^""'>/=\s\p{Cc}]+(\s*=\s*(?:"".*?""|'.*?'|[^'"">\s]+))?)+\s*|\s*)/?>",
- RegexOptions.Singleline | RegexOptions.IgnoreCase | RegexOptions.Compiled);
-
- s_attribsRegex ??= new Regex(@"(?<=\s+)([^""'>/=\s\p{Cc}]+(\s*=\s*(?:"".*?""|'.*?'|[^'"">\s]+))?)",
- RegexOptions.Singleline | RegexOptions.IgnoreCase | RegexOptions.Compiled);
-
- s_attribNameValueRegex ??= new Regex(@"([^""'>/=\s\p{Cc}]+)(?:\s*=\s*(?:""(.*?)""|'(.*?)'|([^'"">\s]+)))?",
- RegexOptions.Singleline | RegexOptions.IgnoreCase | RegexOptions.Compiled);
-
- s_inputFieldRegex ??= new Regex(@"]*(/?>|>.*?)",
- RegexOptions.Singleline | RegexOptions.IgnoreCase | RegexOptions.Compiled);
-
- s_linkRegex ??= new Regex(@"]*(/>|>.*?)",
- RegexOptions.Singleline | RegexOptions.IgnoreCase | RegexOptions.Compiled);
-
- s_imageRegex ??= new Regex(@"
]*?>",
- RegexOptions.Singleline | RegexOptions.IgnoreCase | RegexOptions.Compiled);
- }
-
private void InitializeRawContent(HttpResponseMessage baseResponse)
{
StringBuilder raw = ContentHelper.GetRawContentHeader(baseResponse);
From 4a8b9d24c9fbddad2db2709abf08820c57a2ba45 Mon Sep 17 00:00:00 2001
From: CarloToso <105941898+CarloToso@users.noreply.github.com>
Date: Fri, 17 Mar 2023 11:12:44 +0100
Subject: [PATCH 2/6] fix build
---
.../WebCmdlet/Common/BasicHtmlWebResponseObject.Common.cs | 5 +++++
1 file changed, 5 insertions(+)
diff --git a/src/Microsoft.PowerShell.Commands.Utility/commands/utility/WebCmdlet/Common/BasicHtmlWebResponseObject.Common.cs b/src/Microsoft.PowerShell.Commands.Utility/commands/utility/WebCmdlet/Common/BasicHtmlWebResponseObject.Common.cs
index fa142b57468..936069498da 100644
--- a/src/Microsoft.PowerShell.Commands.Utility/commands/utility/WebCmdlet/Common/BasicHtmlWebResponseObject.Common.cs
+++ b/src/Microsoft.PowerShell.Commands.Utility/commands/utility/WebCmdlet/Common/BasicHtmlWebResponseObject.Common.cs
@@ -26,10 +26,15 @@ public class BasicHtmlWebResponseObject : WebResponseObject
private static readonly Regex s_tagRegex = CreateTagRegex();
private static Regex CreateAttribsRegex() => new Regex(@"(?<=\s+)([^""'>/=\s\p{Cc}]+(\s*=\s*(?:"".*?""|'.*?'|[^'"">\s]+))?)", RegexOptions.Singleline | RegexOptions.IgnoreCase | RegexOptions.Compiled);
+
private static Regex CreateAttribNameValueRegex() => new Regex(@"([^""'>/=\s\p{Cc}]+)(?:\s*=\s*(?:""(.*?)""|'(.*?)'|([^'"">\s]+)))?", RegexOptions.Singleline | RegexOptions.IgnoreCase | RegexOptions.Compiled);
+
private static Regex CreateImageRegex() => new Regex(@"
]*?>", RegexOptions.Singleline | RegexOptions.IgnoreCase | RegexOptions.Compiled);
+
private static Regex CreateInputFieldRegex() => new Regex(@"]*(/?>|>.*?)", RegexOptions.Singleline | RegexOptions.IgnoreCase | RegexOptions.Compiled);
+
private static Regex CreateLinkRegex() => new Regex(@"]*(/>|>.*?)", RegexOptions.Singleline | RegexOptions.IgnoreCase | RegexOptions.Compiled);
+
private static Regex CreateTagRegex() => new Regex(@"<\w+((\s+[^""'>/=\s\p{Cc}]+(\s*=\s*(?:"".*?""|'.*?'|[^'"">\s]+))?)+\s*|\s*)/?>", RegexOptions.Singleline | RegexOptions.IgnoreCase | RegexOptions.Compiled);
#endregion Private Fields
From ba1d43715fe763756b62cfb4da6a3056dedcff85 Mon Sep 17 00:00:00 2001
From: CarloToso <105941898+CarloToso@users.noreply.github.com>
Date: Fri, 17 Mar 2023 15:24:39 +0100
Subject: [PATCH 3/6] class HtmlParser
---
.../BasicHtmlWebResponseObject.Common.cs | 56 +++++++++----------
1 file changed, 27 insertions(+), 29 deletions(-)
diff --git a/src/Microsoft.PowerShell.Commands.Utility/commands/utility/WebCmdlet/Common/BasicHtmlWebResponseObject.Common.cs b/src/Microsoft.PowerShell.Commands.Utility/commands/utility/WebCmdlet/Common/BasicHtmlWebResponseObject.Common.cs
index 936069498da..42ec5be303a 100644
--- a/src/Microsoft.PowerShell.Commands.Utility/commands/utility/WebCmdlet/Common/BasicHtmlWebResponseObject.Common.cs
+++ b/src/Microsoft.PowerShell.Commands.Utility/commands/utility/WebCmdlet/Common/BasicHtmlWebResponseObject.Common.cs
@@ -16,29 +16,6 @@ namespace Microsoft.PowerShell.Commands
///
public class BasicHtmlWebResponseObject : WebResponseObject
{
- #region Private Fields
-
- private static readonly Regex s_attribsRegex = CreateAttribsRegex();
- private static readonly Regex s_attribNameValueRegex = CreateAttribNameValueRegex();
- private static readonly Regex s_imageRegex = CreateImageRegex();
- private static readonly Regex s_inputFieldRegex = CreateInputFieldRegex();
- private static readonly Regex s_linkRegex = CreateLinkRegex();
- private static readonly Regex s_tagRegex = CreateTagRegex();
-
- private static Regex CreateAttribsRegex() => new Regex(@"(?<=\s+)([^""'>/=\s\p{Cc}]+(\s*=\s*(?:"".*?""|'.*?'|[^'"">\s]+))?)", RegexOptions.Singleline | RegexOptions.IgnoreCase | RegexOptions.Compiled);
-
- private static Regex CreateAttribNameValueRegex() => new Regex(@"([^""'>/=\s\p{Cc}]+)(?:\s*=\s*(?:""(.*?)""|'(.*?)'|([^'"">\s]+)))?", RegexOptions.Singleline | RegexOptions.IgnoreCase | RegexOptions.Compiled);
-
- private static Regex CreateImageRegex() => new Regex(@"
]*?>", RegexOptions.Singleline | RegexOptions.IgnoreCase | RegexOptions.Compiled);
-
- private static Regex CreateInputFieldRegex() => new Regex(@"]*(/?>|>.*?)", RegexOptions.Singleline | RegexOptions.IgnoreCase | RegexOptions.Compiled);
-
- private static Regex CreateLinkRegex() => new Regex(@"]*(/>|>.*?)", RegexOptions.Singleline | RegexOptions.IgnoreCase | RegexOptions.Compiled);
-
- private static Regex CreateTagRegex() => new Regex(@"<\w+((\s+[^""'>/=\s\p{Cc}]+(\s*=\s*(?:"".*?""|'.*?'|[^'"">\s]+))?)+\s*|\s*)/?>", RegexOptions.Singleline | RegexOptions.IgnoreCase | RegexOptions.Compiled);
-
- #endregion Private Fields
-
#region Constructors
///
@@ -94,7 +71,7 @@ public WebCmdletElementCollection InputFields
if (_inputFields == null)
{
List parsedFields = new();
- MatchCollection fieldMatch = s_inputFieldRegex.Matches(Content);
+ MatchCollection fieldMatch = HtmlParser.s_inputFieldRegex.Matches(Content);
foreach (Match field in fieldMatch)
{
parsedFields.Add(CreateHtmlObject(field.Value, "INPUT"));
@@ -119,7 +96,7 @@ public WebCmdletElementCollection Links
if (_links == null)
{
List parsedLinks = new();
- MatchCollection linkMatch = s_linkRegex.Matches(Content);
+ MatchCollection linkMatch = HtmlParser.s_linkRegex.Matches(Content);
foreach (Match link in linkMatch)
{
parsedLinks.Add(CreateHtmlObject(link.Value, "A"));
@@ -144,7 +121,7 @@ public WebCmdletElementCollection Images
if (_images == null)
{
List parsedImages = new();
- MatchCollection imageMatch = s_imageRegex.Matches(Content);
+ MatchCollection imageMatch = HtmlParser.s_imageRegex.Matches(Content);
foreach (Match image in imageMatch)
{
parsedImages.Add(CreateHtmlObject(image.Value, "IMG"));
@@ -207,16 +184,16 @@ private static void ParseAttributes(string outerHtml, PSObject elementObject)
{
// Extract just the opening tag of the HTML element (omitting the closing tag and any contents,
// including contained HTML elements)
- Match match = s_tagRegex.Match(outerHtml);
+ Match match = HtmlParser.s_tagRegex.Match(outerHtml);
// Extract all the attribute specifications within the HTML element opening tag
- MatchCollection attribMatches = s_attribsRegex.Matches(match.Value);
+ MatchCollection attribMatches = HtmlParser.s_attribsRegex.Matches(match.Value);
foreach (Match attribMatch in attribMatches)
{
// Extract the name and value for this attribute (allowing for variations like single/double/no
// quotes, and no value at all)
- Match nvMatches = s_attribNameValueRegex.Match(attribMatch.Value);
+ Match nvMatches = HtmlParser.s_attribNameValueRegex.Match(attribMatch.Value);
Debug.Assert(nvMatches.Groups.Count == 5);
// Name is always captured by group #1
@@ -244,4 +221,25 @@ private static void ParseAttributes(string outerHtml, PSObject elementObject)
#endregion Methods
}
+
+ internal static class HtmlParser
+ {
+ internal static Regex s_tagRegex = new Regex(@"<\w+((\s+[^""'>/=\s\p{Cc}]+(\s*=\s*(?:"".*?""|'.*?'|[^'"">\s]+))?)+\s*|\s*)/?>",
+ RegexOptions.Singleline | RegexOptions.IgnoreCase | RegexOptions.Compiled);
+
+ internal static Regex s_attribsRegex = new Regex(@"(?<=\s+)([^""'>/=\s\p{Cc}]+(\s*=\s*(?:"".*?""|'.*?'|[^'"">\s]+))?)",
+ RegexOptions.Singleline | RegexOptions.IgnoreCase | RegexOptions.Compiled);
+
+ internal static Regex s_attribNameValueRegex = new Regex(@"([^""'>/=\s\p{Cc}]+)(?:\s*=\s*(?:""(.*?)""|'(.*?)'|([^'"">\s]+)))?",
+ RegexOptions.Singleline | RegexOptions.IgnoreCase | RegexOptions.Compiled);
+
+ internal static Regex s_inputFieldRegex = new Regex(@"]*(/?>|>.*?)",
+ RegexOptions.Singleline | RegexOptions.IgnoreCase | RegexOptions.Compiled);
+
+ internal static Regex s_linkRegex = new Regex(@"]*(/>|>.*?)",
+ RegexOptions.Singleline | RegexOptions.IgnoreCase | RegexOptions.Compiled);
+
+ internal static Regex s_imageRegex = new Regex(@"
]*?>",
+ RegexOptions.Singleline | RegexOptions.IgnoreCase | RegexOptions.Compiled);
+ }
}
From f5e90a15110938dde13a2ea5c585252d948bb8b0 Mon Sep 17 00:00:00 2001
From: CarloToso <105941898+CarloToso@users.noreply.github.com>
Date: Fri, 17 Mar 2023 19:36:22 +0100
Subject: [PATCH 4/6] follow suggestions
---
.../BasicHtmlWebResponseObject.Common.cs | 31 ++++++++++---------
1 file changed, 16 insertions(+), 15 deletions(-)
diff --git a/src/Microsoft.PowerShell.Commands.Utility/commands/utility/WebCmdlet/Common/BasicHtmlWebResponseObject.Common.cs b/src/Microsoft.PowerShell.Commands.Utility/commands/utility/WebCmdlet/Common/BasicHtmlWebResponseObject.Common.cs
index 42ec5be303a..a6887d922d1 100644
--- a/src/Microsoft.PowerShell.Commands.Utility/commands/utility/WebCmdlet/Common/BasicHtmlWebResponseObject.Common.cs
+++ b/src/Microsoft.PowerShell.Commands.Utility/commands/utility/WebCmdlet/Common/BasicHtmlWebResponseObject.Common.cs
@@ -220,26 +220,27 @@ private static void ParseAttributes(string outerHtml, PSObject elementObject)
}
#endregion Methods
- }
- internal static class HtmlParser
- {
- internal static Regex s_tagRegex = new Regex(@"<\w+((\s+[^""'>/=\s\p{Cc}]+(\s*=\s*(?:"".*?""|'.*?'|[^'"">\s]+))?)+\s*|\s*)/?>",
- RegexOptions.Singleline | RegexOptions.IgnoreCase | RegexOptions.Compiled);
+ // This class is needed so the static Regexes are initialized only the first time they are used
+ private static class HtmlParser
+ {
+ internal static Regex s_tagRegex = new Regex(@"<\w+((\s+[^""'>/=\s\p{Cc}]+(\s*=\s*(?:"".*?""|'.*?'|[^'"">\s]+))?)+\s*|\s*)/?>",
+ RegexOptions.Singleline | RegexOptions.IgnoreCase | RegexOptions.Compiled);
- internal static Regex s_attribsRegex = new Regex(@"(?<=\s+)([^""'>/=\s\p{Cc}]+(\s*=\s*(?:"".*?""|'.*?'|[^'"">\s]+))?)",
- RegexOptions.Singleline | RegexOptions.IgnoreCase | RegexOptions.Compiled);
+ internal static Regex s_attribsRegex = new Regex(@"(?<=\s+)([^""'>/=\s\p{Cc}]+(\s*=\s*(?:"".*?""|'.*?'|[^'"">\s]+))?)",
+ RegexOptions.Singleline | RegexOptions.IgnoreCase | RegexOptions.Compiled);
- internal static Regex s_attribNameValueRegex = new Regex(@"([^""'>/=\s\p{Cc}]+)(?:\s*=\s*(?:""(.*?)""|'(.*?)'|([^'"">\s]+)))?",
- RegexOptions.Singleline | RegexOptions.IgnoreCase | RegexOptions.Compiled);
+ internal static Regex s_attribNameValueRegex = new Regex(@"([^""'>/=\s\p{Cc}]+)(?:\s*=\s*(?:""(.*?)""|'(.*?)'|([^'"">\s]+)))?",
+ RegexOptions.Singleline | RegexOptions.IgnoreCase | RegexOptions.Compiled);
- internal static Regex s_inputFieldRegex = new Regex(@"]*(/?>|>.*?)",
- RegexOptions.Singleline | RegexOptions.IgnoreCase | RegexOptions.Compiled);
+ internal static Regex s_inputFieldRegex = new Regex(@"]*(/?>|>.*?)",
+ RegexOptions.Singleline | RegexOptions.IgnoreCase | RegexOptions.Compiled);
- internal static Regex s_linkRegex = new Regex(@"]*(/>|>.*?)",
- RegexOptions.Singleline | RegexOptions.IgnoreCase | RegexOptions.Compiled);
+ internal static Regex s_linkRegex = new Regex(@"]*(/>|>.*?)",
+ RegexOptions.Singleline | RegexOptions.IgnoreCase | RegexOptions.Compiled);
- internal static Regex s_imageRegex = new Regex(@"
]*?>",
- RegexOptions.Singleline | RegexOptions.IgnoreCase | RegexOptions.Compiled);
+ internal static Regex s_imageRegex = new Regex(@"
]*?>",
+ RegexOptions.Singleline | RegexOptions.IgnoreCase | RegexOptions.Compiled);
+ }
}
}
From c0dbd6e138041a57e38b823e02bc3a29158c13e2 Mon Sep 17 00:00:00 2001
From: CarloToso <105941898+CarloToso@users.noreply.github.com>
Date: Sat, 18 Mar 2023 10:02:55 +0100
Subject: [PATCH 5/6] readonly
---
.../Common/BasicHtmlWebResponseObject.Common.cs | 12 ++++++------
1 file changed, 6 insertions(+), 6 deletions(-)
diff --git a/src/Microsoft.PowerShell.Commands.Utility/commands/utility/WebCmdlet/Common/BasicHtmlWebResponseObject.Common.cs b/src/Microsoft.PowerShell.Commands.Utility/commands/utility/WebCmdlet/Common/BasicHtmlWebResponseObject.Common.cs
index a6887d922d1..152a2483a96 100644
--- a/src/Microsoft.PowerShell.Commands.Utility/commands/utility/WebCmdlet/Common/BasicHtmlWebResponseObject.Common.cs
+++ b/src/Microsoft.PowerShell.Commands.Utility/commands/utility/WebCmdlet/Common/BasicHtmlWebResponseObject.Common.cs
@@ -224,22 +224,22 @@ private static void ParseAttributes(string outerHtml, PSObject elementObject)
// This class is needed so the static Regexes are initialized only the first time they are used
private static class HtmlParser
{
- internal static Regex s_tagRegex = new Regex(@"<\w+((\s+[^""'>/=\s\p{Cc}]+(\s*=\s*(?:"".*?""|'.*?'|[^'"">\s]+))?)+\s*|\s*)/?>",
+ internal static readonly Regex s_tagRegex = new Regex(@"<\w+((\s+[^""'>/=\s\p{Cc}]+(\s*=\s*(?:"".*?""|'.*?'|[^'"">\s]+))?)+\s*|\s*)/?>",
RegexOptions.Singleline | RegexOptions.IgnoreCase | RegexOptions.Compiled);
- internal static Regex s_attribsRegex = new Regex(@"(?<=\s+)([^""'>/=\s\p{Cc}]+(\s*=\s*(?:"".*?""|'.*?'|[^'"">\s]+))?)",
+ internal static readonly Regex s_attribsRegex = new Regex(@"(?<=\s+)([^""'>/=\s\p{Cc}]+(\s*=\s*(?:"".*?""|'.*?'|[^'"">\s]+))?)",
RegexOptions.Singleline | RegexOptions.IgnoreCase | RegexOptions.Compiled);
- internal static Regex s_attribNameValueRegex = new Regex(@"([^""'>/=\s\p{Cc}]+)(?:\s*=\s*(?:""(.*?)""|'(.*?)'|([^'"">\s]+)))?",
+ internal static readonly Regex s_attribNameValueRegex = new Regex(@"([^""'>/=\s\p{Cc}]+)(?:\s*=\s*(?:""(.*?)""|'(.*?)'|([^'"">\s]+)))?",
RegexOptions.Singleline | RegexOptions.IgnoreCase | RegexOptions.Compiled);
- internal static Regex s_inputFieldRegex = new Regex(@"]*(/?>|>.*?)",
+ internal static readonly Regex s_inputFieldRegex = new Regex(@"]*(/?>|>.*?)",
RegexOptions.Singleline | RegexOptions.IgnoreCase | RegexOptions.Compiled);
- internal static Regex s_linkRegex = new Regex(@"]*(/>|>.*?)",
+ internal static readonly Regex s_linkRegex = new Regex(@"]*(/>|>.*?)",
RegexOptions.Singleline | RegexOptions.IgnoreCase | RegexOptions.Compiled);
- internal static Regex s_imageRegex = new Regex(@"
]*?>",
+ internal static readonly Regex s_imageRegex = new Regex(@"
]*?>",
RegexOptions.Singleline | RegexOptions.IgnoreCase | RegexOptions.Compiled);
}
}
From d140ed579d791924d47fe433f936d9930a59b6f6 Mon Sep 17 00:00:00 2001
From: CarloToso <105941898+CarloToso@users.noreply.github.com>
Date: Sat, 18 Mar 2023 16:34:32 +0100
Subject: [PATCH 6/6] solve codefactor issues, ABC Regex
---
.../BasicHtmlWebResponseObject.Common.cs | 30 ++++++++-----------
1 file changed, 12 insertions(+), 18 deletions(-)
diff --git a/src/Microsoft.PowerShell.Commands.Utility/commands/utility/WebCmdlet/Common/BasicHtmlWebResponseObject.Common.cs b/src/Microsoft.PowerShell.Commands.Utility/commands/utility/WebCmdlet/Common/BasicHtmlWebResponseObject.Common.cs
index 152a2483a96..8c15ac96d97 100644
--- a/src/Microsoft.PowerShell.Commands.Utility/commands/utility/WebCmdlet/Common/BasicHtmlWebResponseObject.Common.cs
+++ b/src/Microsoft.PowerShell.Commands.Utility/commands/utility/WebCmdlet/Common/BasicHtmlWebResponseObject.Common.cs
@@ -71,7 +71,7 @@ public WebCmdletElementCollection InputFields
if (_inputFields == null)
{
List parsedFields = new();
- MatchCollection fieldMatch = HtmlParser.s_inputFieldRegex.Matches(Content);
+ MatchCollection fieldMatch = HtmlParser.InputFieldRegex.Matches(Content);
foreach (Match field in fieldMatch)
{
parsedFields.Add(CreateHtmlObject(field.Value, "INPUT"));
@@ -96,7 +96,7 @@ public WebCmdletElementCollection Links
if (_links == null)
{
List parsedLinks = new();
- MatchCollection linkMatch = HtmlParser.s_linkRegex.Matches(Content);
+ MatchCollection linkMatch = HtmlParser.LinkRegex.Matches(Content);
foreach (Match link in linkMatch)
{
parsedLinks.Add(CreateHtmlObject(link.Value, "A"));
@@ -121,7 +121,7 @@ public WebCmdletElementCollection Images
if (_images == null)
{
List parsedImages = new();
- MatchCollection imageMatch = HtmlParser.s_imageRegex.Matches(Content);
+ MatchCollection imageMatch = HtmlParser.ImageRegex.Matches(Content);
foreach (Match image in imageMatch)
{
parsedImages.Add(CreateHtmlObject(image.Value, "IMG"));
@@ -184,16 +184,16 @@ private static void ParseAttributes(string outerHtml, PSObject elementObject)
{
// Extract just the opening tag of the HTML element (omitting the closing tag and any contents,
// including contained HTML elements)
- Match match = HtmlParser.s_tagRegex.Match(outerHtml);
+ Match match = HtmlParser.TagRegex.Match(outerHtml);
// Extract all the attribute specifications within the HTML element opening tag
- MatchCollection attribMatches = HtmlParser.s_attribsRegex.Matches(match.Value);
+ MatchCollection attribMatches = HtmlParser.AttribsRegex.Matches(match.Value);
foreach (Match attribMatch in attribMatches)
{
// Extract the name and value for this attribute (allowing for variations like single/double/no
// quotes, and no value at all)
- Match nvMatches = HtmlParser.s_attribNameValueRegex.Match(attribMatch.Value);
+ Match nvMatches = HtmlParser.AttribNameValueRegex.Match(attribMatch.Value);
Debug.Assert(nvMatches.Groups.Count == 5);
// Name is always captured by group #1
@@ -224,23 +224,17 @@ private static void ParseAttributes(string outerHtml, PSObject elementObject)
// This class is needed so the static Regexes are initialized only the first time they are used
private static class HtmlParser
{
- internal static readonly Regex s_tagRegex = new Regex(@"<\w+((\s+[^""'>/=\s\p{Cc}]+(\s*=\s*(?:"".*?""|'.*?'|[^'"">\s]+))?)+\s*|\s*)/?>",
- RegexOptions.Singleline | RegexOptions.IgnoreCase | RegexOptions.Compiled);
+ internal static readonly Regex AttribsRegex = new Regex(@"(?<=\s+)([^""'>/=\s\p{Cc}]+(\s*=\s*(?:"".*?""|'.*?'|[^'"">\s]+))?)", RegexOptions.Singleline | RegexOptions.IgnoreCase | RegexOptions.Compiled);
- internal static readonly Regex s_attribsRegex = new Regex(@"(?<=\s+)([^""'>/=\s\p{Cc}]+(\s*=\s*(?:"".*?""|'.*?'|[^'"">\s]+))?)",
- RegexOptions.Singleline | RegexOptions.IgnoreCase | RegexOptions.Compiled);
+ internal static readonly Regex AttribNameValueRegex = new Regex(@"([^""'>/=\s\p{Cc}]+)(?:\s*=\s*(?:""(.*?)""|'(.*?)'|([^'"">\s]+)))?", RegexOptions.Singleline | RegexOptions.IgnoreCase | RegexOptions.Compiled);
- internal static readonly Regex s_attribNameValueRegex = new Regex(@"([^""'>/=\s\p{Cc}]+)(?:\s*=\s*(?:""(.*?)""|'(.*?)'|([^'"">\s]+)))?",
- RegexOptions.Singleline | RegexOptions.IgnoreCase | RegexOptions.Compiled);
+ internal static readonly Regex ImageRegex = new Regex(@"
]*?>", RegexOptions.Singleline | RegexOptions.IgnoreCase | RegexOptions.Compiled);
- internal static readonly Regex s_inputFieldRegex = new Regex(@"]*(/?>|>.*?)",
- RegexOptions.Singleline | RegexOptions.IgnoreCase | RegexOptions.Compiled);
+ internal static readonly Regex InputFieldRegex = new Regex(@"]*(/?>|>.*?)", RegexOptions.Singleline | RegexOptions.IgnoreCase | RegexOptions.Compiled);
- internal static readonly Regex s_linkRegex = new Regex(@"]*(/>|>.*?)",
- RegexOptions.Singleline | RegexOptions.IgnoreCase | RegexOptions.Compiled);
+ internal static readonly Regex LinkRegex = new Regex(@"]*(/>|>.*?)", RegexOptions.Singleline | RegexOptions.IgnoreCase | RegexOptions.Compiled);
- internal static readonly Regex s_imageRegex = new Regex(@"
]*?>",
- RegexOptions.Singleline | RegexOptions.IgnoreCase | RegexOptions.Compiled);
+ internal static readonly Regex TagRegex = new Regex(@"<\w+((\s+[^""'>/=\s\p{Cc}]+(\s*=\s*(?:"".*?""|'.*?'|[^'"">\s]+))?)+\s*|\s*)/?>", RegexOptions.Singleline | RegexOptions.IgnoreCase | RegexOptions.Compiled);
}
}
}