From 0118018aaf4c1d43cb072bee9dca9a06c3a6dfed Mon Sep 17 00:00:00 2001 From: CarloToso <105941898+CarloToso@users.noreply.github.com> Date: Fri, 17 Mar 2023 10:14:52 +0100 Subject: [PATCH 1/6] regex --- .../BasicHtmlWebResponseObject.Common.cs | 47 +++++-------------- 1 file changed, 13 insertions(+), 34 deletions(-) diff --git a/src/Microsoft.PowerShell.Commands.Utility/commands/utility/WebCmdlet/Common/BasicHtmlWebResponseObject.Common.cs b/src/Microsoft.PowerShell.Commands.Utility/commands/utility/WebCmdlet/Common/BasicHtmlWebResponseObject.Common.cs index 8e8c099c661..fa142b57468 100644 --- a/src/Microsoft.PowerShell.Commands.Utility/commands/utility/WebCmdlet/Common/BasicHtmlWebResponseObject.Common.cs +++ b/src/Microsoft.PowerShell.Commands.Utility/commands/utility/WebCmdlet/Common/BasicHtmlWebResponseObject.Common.cs @@ -18,12 +18,19 @@ public class BasicHtmlWebResponseObject : WebResponseObject { #region Private Fields - private static Regex s_attribNameValueRegex; - private static Regex s_attribsRegex; - private static Regex s_imageRegex; - private static Regex s_inputFieldRegex; - private static Regex s_linkRegex; - private static Regex s_tagRegex; + private static readonly Regex s_attribsRegex = CreateAttribsRegex(); + private static readonly Regex s_attribNameValueRegex = CreateAttribNameValueRegex(); + private static readonly Regex s_imageRegex = CreateImageRegex(); + private static readonly Regex s_inputFieldRegex = CreateInputFieldRegex(); + private static readonly Regex s_linkRegex = CreateLinkRegex(); + private static readonly Regex s_tagRegex = CreateTagRegex(); + + private static Regex CreateAttribsRegex() => new Regex(@"(?<=\s+)([^""'>/=\s\p{Cc}]+(\s*=\s*(?:"".*?""|'.*?'|[^'"">\s]+))?)", RegexOptions.Singleline | RegexOptions.IgnoreCase | RegexOptions.Compiled); + private static Regex CreateAttribNameValueRegex() => new Regex(@"([^""'>/=\s\p{Cc}]+)(?:\s*=\s*(?:""(.*?)""|'(.*?)'|([^'"">\s]+)))?", RegexOptions.Singleline | RegexOptions.IgnoreCase | RegexOptions.Compiled); + private static Regex CreateImageRegex() => new Regex(@"]*?>", RegexOptions.Singleline | RegexOptions.IgnoreCase | RegexOptions.Compiled); + private static Regex CreateInputFieldRegex() => new Regex(@"]*(/?>|>.*?)", RegexOptions.Singleline | RegexOptions.IgnoreCase | RegexOptions.Compiled); + private static Regex CreateLinkRegex() => new Regex(@"]*(/>|>.*?)", RegexOptions.Singleline | RegexOptions.IgnoreCase | RegexOptions.Compiled); + private static Regex CreateTagRegex() => new Regex(@"<\w+((\s+[^""'>/=\s\p{Cc}]+(\s*=\s*(?:"".*?""|'.*?'|[^'"">\s]+))?)+\s*|\s*)/?>", RegexOptions.Singleline | RegexOptions.IgnoreCase | RegexOptions.Compiled); #endregion Private Fields @@ -43,7 +50,6 @@ public BasicHtmlWebResponseObject(HttpResponseMessage response) : this(response, /// public BasicHtmlWebResponseObject(HttpResponseMessage response, Stream contentStream) : base(response, contentStream) { - EnsureHtmlParser(); InitializeContent(); InitializeRawContent(response); } @@ -82,8 +88,6 @@ public WebCmdletElementCollection InputFields { if (_inputFields == null) { - EnsureHtmlParser(); - List parsedFields = new(); MatchCollection fieldMatch = s_inputFieldRegex.Matches(Content); foreach (Match field in fieldMatch) @@ -109,8 +113,6 @@ public WebCmdletElementCollection Links { if (_links == null) { - EnsureHtmlParser(); - List parsedLinks = new(); MatchCollection linkMatch = s_linkRegex.Matches(Content); foreach (Match link in linkMatch) @@ -136,8 +138,6 @@ public WebCmdletElementCollection Images { if (_images == null) { - EnsureHtmlParser(); - List parsedImages = new(); MatchCollection imageMatch = s_imageRegex.Matches(Content); foreach (Match image in imageMatch) @@ -188,27 +188,6 @@ private static PSObject CreateHtmlObject(string html, string tagName) return elementObject; } - private static void EnsureHtmlParser() - { - s_tagRegex ??= new Regex(@"<\w+((\s+[^""'>/=\s\p{Cc}]+(\s*=\s*(?:"".*?""|'.*?'|[^'"">\s]+))?)+\s*|\s*)/?>", - RegexOptions.Singleline | RegexOptions.IgnoreCase | RegexOptions.Compiled); - - s_attribsRegex ??= new Regex(@"(?<=\s+)([^""'>/=\s\p{Cc}]+(\s*=\s*(?:"".*?""|'.*?'|[^'"">\s]+))?)", - RegexOptions.Singleline | RegexOptions.IgnoreCase | RegexOptions.Compiled); - - s_attribNameValueRegex ??= new Regex(@"([^""'>/=\s\p{Cc}]+)(?:\s*=\s*(?:""(.*?)""|'(.*?)'|([^'"">\s]+)))?", - RegexOptions.Singleline | RegexOptions.IgnoreCase | RegexOptions.Compiled); - - s_inputFieldRegex ??= new Regex(@"]*(/?>|>.*?)", - RegexOptions.Singleline | RegexOptions.IgnoreCase | RegexOptions.Compiled); - - s_linkRegex ??= new Regex(@"]*(/>|>.*?)", - RegexOptions.Singleline | RegexOptions.IgnoreCase | RegexOptions.Compiled); - - s_imageRegex ??= new Regex(@"]*?>", - RegexOptions.Singleline | RegexOptions.IgnoreCase | RegexOptions.Compiled); - } - private void InitializeRawContent(HttpResponseMessage baseResponse) { StringBuilder raw = ContentHelper.GetRawContentHeader(baseResponse); From 4a8b9d24c9fbddad2db2709abf08820c57a2ba45 Mon Sep 17 00:00:00 2001 From: CarloToso <105941898+CarloToso@users.noreply.github.com> Date: Fri, 17 Mar 2023 11:12:44 +0100 Subject: [PATCH 2/6] fix build --- .../WebCmdlet/Common/BasicHtmlWebResponseObject.Common.cs | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/Microsoft.PowerShell.Commands.Utility/commands/utility/WebCmdlet/Common/BasicHtmlWebResponseObject.Common.cs b/src/Microsoft.PowerShell.Commands.Utility/commands/utility/WebCmdlet/Common/BasicHtmlWebResponseObject.Common.cs index fa142b57468..936069498da 100644 --- a/src/Microsoft.PowerShell.Commands.Utility/commands/utility/WebCmdlet/Common/BasicHtmlWebResponseObject.Common.cs +++ b/src/Microsoft.PowerShell.Commands.Utility/commands/utility/WebCmdlet/Common/BasicHtmlWebResponseObject.Common.cs @@ -26,10 +26,15 @@ public class BasicHtmlWebResponseObject : WebResponseObject private static readonly Regex s_tagRegex = CreateTagRegex(); private static Regex CreateAttribsRegex() => new Regex(@"(?<=\s+)([^""'>/=\s\p{Cc}]+(\s*=\s*(?:"".*?""|'.*?'|[^'"">\s]+))?)", RegexOptions.Singleline | RegexOptions.IgnoreCase | RegexOptions.Compiled); + private static Regex CreateAttribNameValueRegex() => new Regex(@"([^""'>/=\s\p{Cc}]+)(?:\s*=\s*(?:""(.*?)""|'(.*?)'|([^'"">\s]+)))?", RegexOptions.Singleline | RegexOptions.IgnoreCase | RegexOptions.Compiled); + private static Regex CreateImageRegex() => new Regex(@"]*?>", RegexOptions.Singleline | RegexOptions.IgnoreCase | RegexOptions.Compiled); + private static Regex CreateInputFieldRegex() => new Regex(@"]*(/?>|>.*?)", RegexOptions.Singleline | RegexOptions.IgnoreCase | RegexOptions.Compiled); + private static Regex CreateLinkRegex() => new Regex(@"]*(/>|>.*?)", RegexOptions.Singleline | RegexOptions.IgnoreCase | RegexOptions.Compiled); + private static Regex CreateTagRegex() => new Regex(@"<\w+((\s+[^""'>/=\s\p{Cc}]+(\s*=\s*(?:"".*?""|'.*?'|[^'"">\s]+))?)+\s*|\s*)/?>", RegexOptions.Singleline | RegexOptions.IgnoreCase | RegexOptions.Compiled); #endregion Private Fields From ba1d43715fe763756b62cfb4da6a3056dedcff85 Mon Sep 17 00:00:00 2001 From: CarloToso <105941898+CarloToso@users.noreply.github.com> Date: Fri, 17 Mar 2023 15:24:39 +0100 Subject: [PATCH 3/6] class HtmlParser --- .../BasicHtmlWebResponseObject.Common.cs | 56 +++++++++---------- 1 file changed, 27 insertions(+), 29 deletions(-) diff --git a/src/Microsoft.PowerShell.Commands.Utility/commands/utility/WebCmdlet/Common/BasicHtmlWebResponseObject.Common.cs b/src/Microsoft.PowerShell.Commands.Utility/commands/utility/WebCmdlet/Common/BasicHtmlWebResponseObject.Common.cs index 936069498da..42ec5be303a 100644 --- a/src/Microsoft.PowerShell.Commands.Utility/commands/utility/WebCmdlet/Common/BasicHtmlWebResponseObject.Common.cs +++ b/src/Microsoft.PowerShell.Commands.Utility/commands/utility/WebCmdlet/Common/BasicHtmlWebResponseObject.Common.cs @@ -16,29 +16,6 @@ namespace Microsoft.PowerShell.Commands /// public class BasicHtmlWebResponseObject : WebResponseObject { - #region Private Fields - - private static readonly Regex s_attribsRegex = CreateAttribsRegex(); - private static readonly Regex s_attribNameValueRegex = CreateAttribNameValueRegex(); - private static readonly Regex s_imageRegex = CreateImageRegex(); - private static readonly Regex s_inputFieldRegex = CreateInputFieldRegex(); - private static readonly Regex s_linkRegex = CreateLinkRegex(); - private static readonly Regex s_tagRegex = CreateTagRegex(); - - private static Regex CreateAttribsRegex() => new Regex(@"(?<=\s+)([^""'>/=\s\p{Cc}]+(\s*=\s*(?:"".*?""|'.*?'|[^'"">\s]+))?)", RegexOptions.Singleline | RegexOptions.IgnoreCase | RegexOptions.Compiled); - - private static Regex CreateAttribNameValueRegex() => new Regex(@"([^""'>/=\s\p{Cc}]+)(?:\s*=\s*(?:""(.*?)""|'(.*?)'|([^'"">\s]+)))?", RegexOptions.Singleline | RegexOptions.IgnoreCase | RegexOptions.Compiled); - - private static Regex CreateImageRegex() => new Regex(@"]*?>", RegexOptions.Singleline | RegexOptions.IgnoreCase | RegexOptions.Compiled); - - private static Regex CreateInputFieldRegex() => new Regex(@"]*(/?>|>.*?)", RegexOptions.Singleline | RegexOptions.IgnoreCase | RegexOptions.Compiled); - - private static Regex CreateLinkRegex() => new Regex(@"]*(/>|>.*?)", RegexOptions.Singleline | RegexOptions.IgnoreCase | RegexOptions.Compiled); - - private static Regex CreateTagRegex() => new Regex(@"<\w+((\s+[^""'>/=\s\p{Cc}]+(\s*=\s*(?:"".*?""|'.*?'|[^'"">\s]+))?)+\s*|\s*)/?>", RegexOptions.Singleline | RegexOptions.IgnoreCase | RegexOptions.Compiled); - - #endregion Private Fields - #region Constructors /// @@ -94,7 +71,7 @@ public WebCmdletElementCollection InputFields if (_inputFields == null) { List parsedFields = new(); - MatchCollection fieldMatch = s_inputFieldRegex.Matches(Content); + MatchCollection fieldMatch = HtmlParser.s_inputFieldRegex.Matches(Content); foreach (Match field in fieldMatch) { parsedFields.Add(CreateHtmlObject(field.Value, "INPUT")); @@ -119,7 +96,7 @@ public WebCmdletElementCollection Links if (_links == null) { List parsedLinks = new(); - MatchCollection linkMatch = s_linkRegex.Matches(Content); + MatchCollection linkMatch = HtmlParser.s_linkRegex.Matches(Content); foreach (Match link in linkMatch) { parsedLinks.Add(CreateHtmlObject(link.Value, "A")); @@ -144,7 +121,7 @@ public WebCmdletElementCollection Images if (_images == null) { List parsedImages = new(); - MatchCollection imageMatch = s_imageRegex.Matches(Content); + MatchCollection imageMatch = HtmlParser.s_imageRegex.Matches(Content); foreach (Match image in imageMatch) { parsedImages.Add(CreateHtmlObject(image.Value, "IMG")); @@ -207,16 +184,16 @@ private static void ParseAttributes(string outerHtml, PSObject elementObject) { // Extract just the opening tag of the HTML element (omitting the closing tag and any contents, // including contained HTML elements) - Match match = s_tagRegex.Match(outerHtml); + Match match = HtmlParser.s_tagRegex.Match(outerHtml); // Extract all the attribute specifications within the HTML element opening tag - MatchCollection attribMatches = s_attribsRegex.Matches(match.Value); + MatchCollection attribMatches = HtmlParser.s_attribsRegex.Matches(match.Value); foreach (Match attribMatch in attribMatches) { // Extract the name and value for this attribute (allowing for variations like single/double/no // quotes, and no value at all) - Match nvMatches = s_attribNameValueRegex.Match(attribMatch.Value); + Match nvMatches = HtmlParser.s_attribNameValueRegex.Match(attribMatch.Value); Debug.Assert(nvMatches.Groups.Count == 5); // Name is always captured by group #1 @@ -244,4 +221,25 @@ private static void ParseAttributes(string outerHtml, PSObject elementObject) #endregion Methods } + + internal static class HtmlParser + { + internal static Regex s_tagRegex = new Regex(@"<\w+((\s+[^""'>/=\s\p{Cc}]+(\s*=\s*(?:"".*?""|'.*?'|[^'"">\s]+))?)+\s*|\s*)/?>", + RegexOptions.Singleline | RegexOptions.IgnoreCase | RegexOptions.Compiled); + + internal static Regex s_attribsRegex = new Regex(@"(?<=\s+)([^""'>/=\s\p{Cc}]+(\s*=\s*(?:"".*?""|'.*?'|[^'"">\s]+))?)", + RegexOptions.Singleline | RegexOptions.IgnoreCase | RegexOptions.Compiled); + + internal static Regex s_attribNameValueRegex = new Regex(@"([^""'>/=\s\p{Cc}]+)(?:\s*=\s*(?:""(.*?)""|'(.*?)'|([^'"">\s]+)))?", + RegexOptions.Singleline | RegexOptions.IgnoreCase | RegexOptions.Compiled); + + internal static Regex s_inputFieldRegex = new Regex(@"]*(/?>|>.*?)", + RegexOptions.Singleline | RegexOptions.IgnoreCase | RegexOptions.Compiled); + + internal static Regex s_linkRegex = new Regex(@"]*(/>|>.*?)", + RegexOptions.Singleline | RegexOptions.IgnoreCase | RegexOptions.Compiled); + + internal static Regex s_imageRegex = new Regex(@"]*?>", + RegexOptions.Singleline | RegexOptions.IgnoreCase | RegexOptions.Compiled); + } } From f5e90a15110938dde13a2ea5c585252d948bb8b0 Mon Sep 17 00:00:00 2001 From: CarloToso <105941898+CarloToso@users.noreply.github.com> Date: Fri, 17 Mar 2023 19:36:22 +0100 Subject: [PATCH 4/6] follow suggestions --- .../BasicHtmlWebResponseObject.Common.cs | 31 ++++++++++--------- 1 file changed, 16 insertions(+), 15 deletions(-) diff --git a/src/Microsoft.PowerShell.Commands.Utility/commands/utility/WebCmdlet/Common/BasicHtmlWebResponseObject.Common.cs b/src/Microsoft.PowerShell.Commands.Utility/commands/utility/WebCmdlet/Common/BasicHtmlWebResponseObject.Common.cs index 42ec5be303a..a6887d922d1 100644 --- a/src/Microsoft.PowerShell.Commands.Utility/commands/utility/WebCmdlet/Common/BasicHtmlWebResponseObject.Common.cs +++ b/src/Microsoft.PowerShell.Commands.Utility/commands/utility/WebCmdlet/Common/BasicHtmlWebResponseObject.Common.cs @@ -220,26 +220,27 @@ private static void ParseAttributes(string outerHtml, PSObject elementObject) } #endregion Methods - } - internal static class HtmlParser - { - internal static Regex s_tagRegex = new Regex(@"<\w+((\s+[^""'>/=\s\p{Cc}]+(\s*=\s*(?:"".*?""|'.*?'|[^'"">\s]+))?)+\s*|\s*)/?>", - RegexOptions.Singleline | RegexOptions.IgnoreCase | RegexOptions.Compiled); + // This class is needed so the static Regexes are initialized only the first time they are used + private static class HtmlParser + { + internal static Regex s_tagRegex = new Regex(@"<\w+((\s+[^""'>/=\s\p{Cc}]+(\s*=\s*(?:"".*?""|'.*?'|[^'"">\s]+))?)+\s*|\s*)/?>", + RegexOptions.Singleline | RegexOptions.IgnoreCase | RegexOptions.Compiled); - internal static Regex s_attribsRegex = new Regex(@"(?<=\s+)([^""'>/=\s\p{Cc}]+(\s*=\s*(?:"".*?""|'.*?'|[^'"">\s]+))?)", - RegexOptions.Singleline | RegexOptions.IgnoreCase | RegexOptions.Compiled); + internal static Regex s_attribsRegex = new Regex(@"(?<=\s+)([^""'>/=\s\p{Cc}]+(\s*=\s*(?:"".*?""|'.*?'|[^'"">\s]+))?)", + RegexOptions.Singleline | RegexOptions.IgnoreCase | RegexOptions.Compiled); - internal static Regex s_attribNameValueRegex = new Regex(@"([^""'>/=\s\p{Cc}]+)(?:\s*=\s*(?:""(.*?)""|'(.*?)'|([^'"">\s]+)))?", - RegexOptions.Singleline | RegexOptions.IgnoreCase | RegexOptions.Compiled); + internal static Regex s_attribNameValueRegex = new Regex(@"([^""'>/=\s\p{Cc}]+)(?:\s*=\s*(?:""(.*?)""|'(.*?)'|([^'"">\s]+)))?", + RegexOptions.Singleline | RegexOptions.IgnoreCase | RegexOptions.Compiled); - internal static Regex s_inputFieldRegex = new Regex(@"]*(/?>|>.*?)", - RegexOptions.Singleline | RegexOptions.IgnoreCase | RegexOptions.Compiled); + internal static Regex s_inputFieldRegex = new Regex(@"]*(/?>|>.*?)", + RegexOptions.Singleline | RegexOptions.IgnoreCase | RegexOptions.Compiled); - internal static Regex s_linkRegex = new Regex(@"]*(/>|>.*?)", - RegexOptions.Singleline | RegexOptions.IgnoreCase | RegexOptions.Compiled); + internal static Regex s_linkRegex = new Regex(@"]*(/>|>.*?)", + RegexOptions.Singleline | RegexOptions.IgnoreCase | RegexOptions.Compiled); - internal static Regex s_imageRegex = new Regex(@"]*?>", - RegexOptions.Singleline | RegexOptions.IgnoreCase | RegexOptions.Compiled); + internal static Regex s_imageRegex = new Regex(@"]*?>", + RegexOptions.Singleline | RegexOptions.IgnoreCase | RegexOptions.Compiled); + } } } From c0dbd6e138041a57e38b823e02bc3a29158c13e2 Mon Sep 17 00:00:00 2001 From: CarloToso <105941898+CarloToso@users.noreply.github.com> Date: Sat, 18 Mar 2023 10:02:55 +0100 Subject: [PATCH 5/6] readonly --- .../Common/BasicHtmlWebResponseObject.Common.cs | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/Microsoft.PowerShell.Commands.Utility/commands/utility/WebCmdlet/Common/BasicHtmlWebResponseObject.Common.cs b/src/Microsoft.PowerShell.Commands.Utility/commands/utility/WebCmdlet/Common/BasicHtmlWebResponseObject.Common.cs index a6887d922d1..152a2483a96 100644 --- a/src/Microsoft.PowerShell.Commands.Utility/commands/utility/WebCmdlet/Common/BasicHtmlWebResponseObject.Common.cs +++ b/src/Microsoft.PowerShell.Commands.Utility/commands/utility/WebCmdlet/Common/BasicHtmlWebResponseObject.Common.cs @@ -224,22 +224,22 @@ private static void ParseAttributes(string outerHtml, PSObject elementObject) // This class is needed so the static Regexes are initialized only the first time they are used private static class HtmlParser { - internal static Regex s_tagRegex = new Regex(@"<\w+((\s+[^""'>/=\s\p{Cc}]+(\s*=\s*(?:"".*?""|'.*?'|[^'"">\s]+))?)+\s*|\s*)/?>", + internal static readonly Regex s_tagRegex = new Regex(@"<\w+((\s+[^""'>/=\s\p{Cc}]+(\s*=\s*(?:"".*?""|'.*?'|[^'"">\s]+))?)+\s*|\s*)/?>", RegexOptions.Singleline | RegexOptions.IgnoreCase | RegexOptions.Compiled); - internal static Regex s_attribsRegex = new Regex(@"(?<=\s+)([^""'>/=\s\p{Cc}]+(\s*=\s*(?:"".*?""|'.*?'|[^'"">\s]+))?)", + internal static readonly Regex s_attribsRegex = new Regex(@"(?<=\s+)([^""'>/=\s\p{Cc}]+(\s*=\s*(?:"".*?""|'.*?'|[^'"">\s]+))?)", RegexOptions.Singleline | RegexOptions.IgnoreCase | RegexOptions.Compiled); - internal static Regex s_attribNameValueRegex = new Regex(@"([^""'>/=\s\p{Cc}]+)(?:\s*=\s*(?:""(.*?)""|'(.*?)'|([^'"">\s]+)))?", + internal static readonly Regex s_attribNameValueRegex = new Regex(@"([^""'>/=\s\p{Cc}]+)(?:\s*=\s*(?:""(.*?)""|'(.*?)'|([^'"">\s]+)))?", RegexOptions.Singleline | RegexOptions.IgnoreCase | RegexOptions.Compiled); - internal static Regex s_inputFieldRegex = new Regex(@"]*(/?>|>.*?)", + internal static readonly Regex s_inputFieldRegex = new Regex(@"]*(/?>|>.*?)", RegexOptions.Singleline | RegexOptions.IgnoreCase | RegexOptions.Compiled); - internal static Regex s_linkRegex = new Regex(@"]*(/>|>.*?)", + internal static readonly Regex s_linkRegex = new Regex(@"]*(/>|>.*?)", RegexOptions.Singleline | RegexOptions.IgnoreCase | RegexOptions.Compiled); - internal static Regex s_imageRegex = new Regex(@"]*?>", + internal static readonly Regex s_imageRegex = new Regex(@"]*?>", RegexOptions.Singleline | RegexOptions.IgnoreCase | RegexOptions.Compiled); } } From d140ed579d791924d47fe433f936d9930a59b6f6 Mon Sep 17 00:00:00 2001 From: CarloToso <105941898+CarloToso@users.noreply.github.com> Date: Sat, 18 Mar 2023 16:34:32 +0100 Subject: [PATCH 6/6] solve codefactor issues, ABC Regex --- .../BasicHtmlWebResponseObject.Common.cs | 30 ++++++++----------- 1 file changed, 12 insertions(+), 18 deletions(-) diff --git a/src/Microsoft.PowerShell.Commands.Utility/commands/utility/WebCmdlet/Common/BasicHtmlWebResponseObject.Common.cs b/src/Microsoft.PowerShell.Commands.Utility/commands/utility/WebCmdlet/Common/BasicHtmlWebResponseObject.Common.cs index 152a2483a96..8c15ac96d97 100644 --- a/src/Microsoft.PowerShell.Commands.Utility/commands/utility/WebCmdlet/Common/BasicHtmlWebResponseObject.Common.cs +++ b/src/Microsoft.PowerShell.Commands.Utility/commands/utility/WebCmdlet/Common/BasicHtmlWebResponseObject.Common.cs @@ -71,7 +71,7 @@ public WebCmdletElementCollection InputFields if (_inputFields == null) { List parsedFields = new(); - MatchCollection fieldMatch = HtmlParser.s_inputFieldRegex.Matches(Content); + MatchCollection fieldMatch = HtmlParser.InputFieldRegex.Matches(Content); foreach (Match field in fieldMatch) { parsedFields.Add(CreateHtmlObject(field.Value, "INPUT")); @@ -96,7 +96,7 @@ public WebCmdletElementCollection Links if (_links == null) { List parsedLinks = new(); - MatchCollection linkMatch = HtmlParser.s_linkRegex.Matches(Content); + MatchCollection linkMatch = HtmlParser.LinkRegex.Matches(Content); foreach (Match link in linkMatch) { parsedLinks.Add(CreateHtmlObject(link.Value, "A")); @@ -121,7 +121,7 @@ public WebCmdletElementCollection Images if (_images == null) { List parsedImages = new(); - MatchCollection imageMatch = HtmlParser.s_imageRegex.Matches(Content); + MatchCollection imageMatch = HtmlParser.ImageRegex.Matches(Content); foreach (Match image in imageMatch) { parsedImages.Add(CreateHtmlObject(image.Value, "IMG")); @@ -184,16 +184,16 @@ private static void ParseAttributes(string outerHtml, PSObject elementObject) { // Extract just the opening tag of the HTML element (omitting the closing tag and any contents, // including contained HTML elements) - Match match = HtmlParser.s_tagRegex.Match(outerHtml); + Match match = HtmlParser.TagRegex.Match(outerHtml); // Extract all the attribute specifications within the HTML element opening tag - MatchCollection attribMatches = HtmlParser.s_attribsRegex.Matches(match.Value); + MatchCollection attribMatches = HtmlParser.AttribsRegex.Matches(match.Value); foreach (Match attribMatch in attribMatches) { // Extract the name and value for this attribute (allowing for variations like single/double/no // quotes, and no value at all) - Match nvMatches = HtmlParser.s_attribNameValueRegex.Match(attribMatch.Value); + Match nvMatches = HtmlParser.AttribNameValueRegex.Match(attribMatch.Value); Debug.Assert(nvMatches.Groups.Count == 5); // Name is always captured by group #1 @@ -224,23 +224,17 @@ private static void ParseAttributes(string outerHtml, PSObject elementObject) // This class is needed so the static Regexes are initialized only the first time they are used private static class HtmlParser { - internal static readonly Regex s_tagRegex = new Regex(@"<\w+((\s+[^""'>/=\s\p{Cc}]+(\s*=\s*(?:"".*?""|'.*?'|[^'"">\s]+))?)+\s*|\s*)/?>", - RegexOptions.Singleline | RegexOptions.IgnoreCase | RegexOptions.Compiled); + internal static readonly Regex AttribsRegex = new Regex(@"(?<=\s+)([^""'>/=\s\p{Cc}]+(\s*=\s*(?:"".*?""|'.*?'|[^'"">\s]+))?)", RegexOptions.Singleline | RegexOptions.IgnoreCase | RegexOptions.Compiled); - internal static readonly Regex s_attribsRegex = new Regex(@"(?<=\s+)([^""'>/=\s\p{Cc}]+(\s*=\s*(?:"".*?""|'.*?'|[^'"">\s]+))?)", - RegexOptions.Singleline | RegexOptions.IgnoreCase | RegexOptions.Compiled); + internal static readonly Regex AttribNameValueRegex = new Regex(@"([^""'>/=\s\p{Cc}]+)(?:\s*=\s*(?:""(.*?)""|'(.*?)'|([^'"">\s]+)))?", RegexOptions.Singleline | RegexOptions.IgnoreCase | RegexOptions.Compiled); - internal static readonly Regex s_attribNameValueRegex = new Regex(@"([^""'>/=\s\p{Cc}]+)(?:\s*=\s*(?:""(.*?)""|'(.*?)'|([^'"">\s]+)))?", - RegexOptions.Singleline | RegexOptions.IgnoreCase | RegexOptions.Compiled); + internal static readonly Regex ImageRegex = new Regex(@"]*?>", RegexOptions.Singleline | RegexOptions.IgnoreCase | RegexOptions.Compiled); - internal static readonly Regex s_inputFieldRegex = new Regex(@"]*(/?>|>.*?)", - RegexOptions.Singleline | RegexOptions.IgnoreCase | RegexOptions.Compiled); + internal static readonly Regex InputFieldRegex = new Regex(@"]*(/?>|>.*?)", RegexOptions.Singleline | RegexOptions.IgnoreCase | RegexOptions.Compiled); - internal static readonly Regex s_linkRegex = new Regex(@"]*(/>|>.*?)", - RegexOptions.Singleline | RegexOptions.IgnoreCase | RegexOptions.Compiled); + internal static readonly Regex LinkRegex = new Regex(@"]*(/>|>.*?)", RegexOptions.Singleline | RegexOptions.IgnoreCase | RegexOptions.Compiled); - internal static readonly Regex s_imageRegex = new Regex(@"]*?>", - RegexOptions.Singleline | RegexOptions.IgnoreCase | RegexOptions.Compiled); + internal static readonly Regex TagRegex = new Regex(@"<\w+((\s+[^""'>/=\s\p{Cc}]+(\s*=\s*(?:"".*?""|'.*?'|[^'"">\s]+))?)+\s*|\s*)/?>", RegexOptions.Singleline | RegexOptions.IgnoreCase | RegexOptions.Compiled); } } }