From 965d325a8f86301b06cf06d8170833400170bbb8 Mon Sep 17 00:00:00 2001 From: James Truher Date: Mon, 19 Jun 2017 16:43:33 -0700 Subject: [PATCH 01/14] Unify file encoding and set new default to UTF8 without BOM Create new class PowerShellEncoding and enum FileEncoding to unify cmdlet and provider code for file encoding. Created PowerShellEncoding class and FileEncoding enum and removed ClrFacade.GetDefaultEncoding. PSDefaultFileEncoding preference variable now can set file encoding across all cmdlets. Setting PSDefaultFileEncoding to WindowsLegacy will set file encoding to historic PowerShell5 encodings. --- .../commands/utility/CSVCommands.cs | 9 +- .../commands/utility/ExportAliasCommand.cs | 2 +- .../FormatAndOutput/format-hex/Format-Hex.cs | 11 +- .../FormatAndOutput/out-file/Out-File.cs | 17 +- .../utility/ImplicitRemotingCommands.cs | 16 +- .../commands/utility/MatchString.cs | 16 +- .../commands/utility/XmlCommands.cs | 4 +- .../engine/ExternalScriptInfo.cs | 3 +- .../engine/InitialSessionState.cs | 8 + .../Modules/NewModuleManifestCommand.cs | 9 +- .../engine/SpecialVariables.cs | 5 + .../engine/Utils.cs | 3 +- .../commands/NewPSSessionConfigurationFile.cs | 6 +- .../namespaces/FileSystemContentStream.cs | 9 +- .../namespaces/FileSystemProvider.cs | 29 +- .../resources/RunspaceInit.resx | 3 + .../utils/Encoding.cs | 408 ++++++++++++++++++ .../utils/PathUtils.cs | 44 +- .../Parser/RedirectionOperator.Tests.ps1 | 15 +- .../TestGetCommand.Tests.ps1 | 9 +- test/powershell/engine/Encoding.Tests.ps1 | 170 ++++++++ 21 files changed, 693 insertions(+), 103 deletions(-) create mode 100644 src/System.Management.Automation/utils/Encoding.cs create mode 100644 test/powershell/engine/Encoding.Tests.ps1 diff --git a/src/Microsoft.PowerShell.Commands.Utility/commands/utility/CSVCommands.cs b/src/Microsoft.PowerShell.Commands.Utility/commands/utility/CSVCommands.cs index 6313a9a5ddc..9c9c8a7c03b 100644 --- a/src/Microsoft.PowerShell.Commands.Utility/commands/utility/CSVCommands.cs +++ b/src/Microsoft.PowerShell.Commands.Utility/commands/utility/CSVCommands.cs @@ -10,6 +10,7 @@ using System.IO; using System.Linq; using System.Management.Automation; +using Microsoft.PowerShell; using System.Text; using Dbg = System.Management.Automation.Diagnostics; @@ -206,8 +207,7 @@ public SwitchParameter NoClobber /// Encoding optional flag /// [Parameter()] - [ValidateSetAttribute(new string[] { "Unicode", "UTF7", "UTF8", "ASCII", "UTF32", "BigEndianUnicode", "Default", "OEM" })] - public string Encoding { get; set; } + public FileEncoding Encoding { get; set; } = FileEncoding.Unknown; /// /// Property that sets append parameter. @@ -367,7 +367,7 @@ private void CreateFileStream() PathUtils.MasterStreamOpen( this, this.Path, - Encoding ?? "ASCII", + Encoding, false, // defaultEncoding Append, Force, @@ -571,8 +571,7 @@ public SwitchParameter UseCulture /// Encoding optional flag /// [Parameter()] - [ValidateSetAttribute(new[] { "Unicode", "UTF7", "UTF8", "ASCII", "UTF32", "BigEndianUnicode", "Default", "OEM" })] - public string Encoding { get; set; } + public FileEncoding Encoding { get; set; } = FileEncoding.Unknown; /// /// Avoid writing out duplicate warning messages when there are diff --git a/src/Microsoft.PowerShell.Commands.Utility/commands/utility/ExportAliasCommand.cs b/src/Microsoft.PowerShell.Commands.Utility/commands/utility/ExportAliasCommand.cs index 26e75084930..9c7ae6314d6 100644 --- a/src/Microsoft.PowerShell.Commands.Utility/commands/utility/ExportAliasCommand.cs +++ b/src/Microsoft.PowerShell.Commands.Utility/commands/utility/ExportAliasCommand.cs @@ -387,7 +387,7 @@ private StreamWriter OpenFile(out FileInfo readOnlyFileInfo) PathUtils.MasterStreamOpen( this, this.Path, - EncodingConversion.Unicode, + FileEncoding.Unicode, false, // defaultEncoding Append, Force, diff --git a/src/Microsoft.PowerShell.Commands.Utility/commands/utility/FormatAndOutput/format-hex/Format-Hex.cs b/src/Microsoft.PowerShell.Commands.Utility/commands/utility/FormatAndOutput/format-hex/Format-Hex.cs index b1c65d3f2b1..e6d0a4d71f2 100644 --- a/src/Microsoft.PowerShell.Commands.Utility/commands/utility/FormatAndOutput/format-hex/Format-Hex.cs +++ b/src/Microsoft.PowerShell.Commands.Utility/commands/utility/FormatAndOutput/format-hex/Format-Hex.cs @@ -46,14 +46,7 @@ public sealed class FormatHex : PSCmdlet /// Type of character encoding for InputObject /// [Parameter(ParameterSetName = "ByInputObject")] - [ValidateSetAttribute(new string[] { - EncodingConversion.Unicode, - EncodingConversion.BigEndianUnicode, - EncodingConversion.Utf8, - EncodingConversion.Utf7, - EncodingConversion.Utf32, - EncodingConversion.Ascii})] - public string Encoding { get; set; } = "Ascii"; + public FileEncoding Encoding { get; set; } = FileEncoding.UTF8NoBOM; /// /// This parameter is no-op @@ -239,7 +232,7 @@ private void ProcessObjectContent(PSObject inputObject) else if (obj is string) { string inputString = obj.ToString(); - Encoding resolvedEncoding = EncodingConversion.Convert(this, Encoding); + Encoding resolvedEncoding = PowerShellEncoding.GetEncoding(this, Encoding); inputBytes = resolvedEncoding.GetBytes(inputString); } diff --git a/src/Microsoft.PowerShell.Commands.Utility/commands/utility/FormatAndOutput/out-file/Out-File.cs b/src/Microsoft.PowerShell.Commands.Utility/commands/utility/FormatAndOutput/out-file/Out-File.cs index b8fd1cc09b6..d61c109a588 100644 --- a/src/Microsoft.PowerShell.Commands.Utility/commands/utility/FormatAndOutput/out-file/Out-File.cs +++ b/src/Microsoft.PowerShell.Commands.Utility/commands/utility/FormatAndOutput/out-file/Out-File.cs @@ -8,6 +8,7 @@ using System.Management.Automation.Host; using System.IO; using Microsoft.PowerShell.Commands.Internal.Format; +using Microsoft.PowerShell; namespace Microsoft.PowerShell.Commands { @@ -72,25 +73,13 @@ public string LiteralPath /// /// [Parameter(Position = 1)] - [ValidateNotNullOrEmpty] - [ValidateSetAttribute(new string[] { - EncodingConversion.Unknown, - EncodingConversion.String, - EncodingConversion.Unicode, - EncodingConversion.BigEndianUnicode, - EncodingConversion.Utf8, - EncodingConversion.Utf7, - EncodingConversion.Utf32, - EncodingConversion.Ascii, - EncodingConversion.Default, - EncodingConversion.OEM })] - public string Encoding + public FileEncoding Encoding { get { return _encoding; } set { _encoding = value; } } - private string _encoding; + private FileEncoding _encoding; /// /// Property that sets append parameter. diff --git a/src/Microsoft.PowerShell.Commands.Utility/commands/utility/ImplicitRemotingCommands.cs b/src/Microsoft.PowerShell.Commands.Utility/commands/utility/ImplicitRemotingCommands.cs index e694403c6b8..8f34eb1c50a 100644 --- a/src/Microsoft.PowerShell.Commands.Utility/commands/utility/ImplicitRemotingCommands.cs +++ b/src/Microsoft.PowerShell.Commands.Utility/commands/utility/ImplicitRemotingCommands.cs @@ -77,19 +77,7 @@ public SwitchParameter Force /// Encoding optional flag /// [Parameter] - [ValidateSetAttribute(new string[] { "Unicode", "UTF7", "UTF8", "ASCII", "UTF32", "BigEndianUnicode", "Default", "OEM" })] - public string Encoding - { - get - { - return _encoding.GetType().Name; - } - set - { - _encoding = EncodingConversion.Convert(this, value); - } - } - private Encoding _encoding = System.Text.Encoding.UTF8; + public FileEncoding Encoding { get; set; } = FileEncoding.Unknown; #endregion Parameters @@ -144,7 +132,7 @@ protected override void BeginProcessing() List generatedFiles = GenerateProxyModule( tempDirectory, Path.GetFileName(directory.FullName), - _encoding, + PowerShellEncoding.GetEncoding(this, Encoding), _force, listOfCommandMetadata, alias2resolvedCommandName, diff --git a/src/Microsoft.PowerShell.Commands.Utility/commands/utility/MatchString.cs b/src/Microsoft.PowerShell.Commands.Utility/commands/utility/MatchString.cs index 5cc6d26738b..b76b8c4d61a 100644 --- a/src/Microsoft.PowerShell.Commands.Utility/commands/utility/MatchString.cs +++ b/src/Microsoft.PowerShell.Commands.Utility/commands/utility/MatchString.cs @@ -1200,17 +1200,7 @@ public SwitchParameter AllMatches /// The text encoding to process each file as. /// [Parameter] - [ValidateNotNullOrEmpty] - [ValidateSetAttribute(new string[] { - EncodingConversion.Unicode, - EncodingConversion.Utf7, - EncodingConversion.Utf8, - EncodingConversion.Utf32, - EncodingConversion.Ascii, - EncodingConversion.BigEndianUnicode, - EncodingConversion.Default, - EncodingConversion.OEM })] - public string Encoding { get; set; } + public FileEncoding Encoding { get; set; } = FileEncoding.Unknown; private System.Text.Encoding _textEncoding; @@ -1282,9 +1272,9 @@ public SwitchParameter AllMatches protected override void BeginProcessing() { // Process encoding switch. - if (Encoding != null) + if (Encoding != FileEncoding.Unknown ) { - _textEncoding = EncodingConversion.Convert(this, Encoding); + _textEncoding = PowerShellEncoding.GetEncoding(this, Encoding); } else { diff --git a/src/Microsoft.PowerShell.Commands.Utility/commands/utility/XmlCommands.cs b/src/Microsoft.PowerShell.Commands.Utility/commands/utility/XmlCommands.cs index 2a9263060bf..d677c6600e0 100644 --- a/src/Microsoft.PowerShell.Commands.Utility/commands/utility/XmlCommands.cs +++ b/src/Microsoft.PowerShell.Commands.Utility/commands/utility/XmlCommands.cs @@ -14,6 +14,7 @@ using System.Security; using System.Text; using System.Xml; +using Microsoft.PowerShell; using Dbg = System.Management.Automation.Diagnostics; namespace Microsoft.PowerShell.Commands @@ -108,8 +109,7 @@ public SwitchParameter NoClobber /// /// [Parameter] - [ValidateSetAttribute(new string[] { "Unicode", "UTF7", "UTF8", "ASCII", "UTF32", "BigEndianUnicode", "Default", "OEM" })] - public string Encoding { get; set; } = "Unicode"; + public FileEncoding Encoding { get; set; } = FileEncoding.Unknown; #endregion Command Line Parameters diff --git a/src/System.Management.Automation/engine/ExternalScriptInfo.cs b/src/System.Management.Automation/engine/ExternalScriptInfo.cs index 0a2897ed506..e7779b9467b 100644 --- a/src/System.Management.Automation/engine/ExternalScriptInfo.cs +++ b/src/System.Management.Automation/engine/ExternalScriptInfo.cs @@ -6,6 +6,7 @@ using System.Text; using System.Collections.ObjectModel; using System.Management.Automation.Runspaces; +using Microsoft.PowerShell; using Microsoft.PowerShell.Commands; using System.Collections.Generic; using System.Management.Automation.Language; @@ -467,7 +468,7 @@ private void ReadScriptContents() { using (FileStream readerStream = new FileStream(_path, FileMode.Open, FileAccess.Read)) { - Encoding defaultEncoding = ClrFacade.GetDefaultEncoding(); + Encoding defaultEncoding = PowerShellEncoding.GetDefaultEncoding(); Microsoft.Win32.SafeHandles.SafeFileHandle safeFileHandle = readerStream.SafeFileHandle; using (StreamReader scriptReader = new StreamReader(readerStream, defaultEncoding)) diff --git a/src/System.Management.Automation/engine/InitialSessionState.cs b/src/System.Management.Automation/engine/InitialSessionState.cs index 235b1dc943a..b28af452ac0 100644 --- a/src/System.Management.Automation/engine/InitialSessionState.cs +++ b/src/System.Management.Automation/engine/InitialSessionState.cs @@ -4828,6 +4828,7 @@ .ForwardHelpCategory Cmdlet internal const ActionPreference defaultVerbosePreference = ActionPreference.SilentlyContinue; internal const ActionPreference defaultWarningPreference = ActionPreference.Continue; internal const ActionPreference defaultInformationPreference = ActionPreference.SilentlyContinue; + internal const Microsoft.PowerShell.FileEncoding defaultFileEncodingPreference = Microsoft.PowerShell.FileEncoding.Unknown; internal const bool defaultWhatIfPreference = false; internal const ConfirmImpact defaultConfirmPreference = ConfirmImpact.High; @@ -4908,6 +4909,13 @@ .ForwardHelpCategory Cmdlet ScopedItemOptions.None, new ArgumentTypeConverterAttribute(typeof(ActionPreference)) ), + new SessionStateVariableEntry( + SpecialVariables.DefaultFileEncodingPreference, + defaultFileEncodingPreference, + RunspaceInit.DefaultFileEncodingDescription, + ScopedItemOptions.None, + new ArgumentTypeConverterAttribute(typeof(Microsoft.PowerShell.FileEncoding)) + ), new SessionStateVariableEntry( SpecialVariables.ErrorView, "NormalView", diff --git a/src/System.Management.Automation/engine/Modules/NewModuleManifestCommand.cs b/src/System.Management.Automation/engine/Modules/NewModuleManifestCommand.cs index 1ea15017e1d..ac0a89da8a5 100644 --- a/src/System.Management.Automation/engine/Modules/NewModuleManifestCommand.cs +++ b/src/System.Management.Automation/engine/Modules/NewModuleManifestCommand.cs @@ -14,6 +14,7 @@ using System.Management.Automation; using System.Management.Automation.Internal; using System.Diagnostics.CodeAnalysis; +using Microsoft.PowerShell; using Dbg = System.Management.Automation.Diagnostics; // @@ -940,12 +941,8 @@ protected override void EndProcessing() // Now open the output file... PathUtils.MasterStreamOpen( this, - filePath, -#if UNIX - new UTF8Encoding(false), // UTF-8, no BOM -#else - EncodingConversion.Unicode, // UTF-16 with BOM -#endif + filePath, + FileEncoding.Unknown, /* defaultEncoding */ false, /* Append */ false, /* Force */ false, diff --git a/src/System.Management.Automation/engine/SpecialVariables.cs b/src/System.Management.Automation/engine/SpecialVariables.cs index 835384138f6..97f04b75116 100644 --- a/src/System.Management.Automation/engine/SpecialVariables.cs +++ b/src/System.Management.Automation/engine/SpecialVariables.cs @@ -196,6 +196,9 @@ internal static class SpecialVariables internal const string InformationPreference = "InformationPreference"; internal static readonly VariablePath InformationPreferenceVarPath = new VariablePath(InformationPreference); + internal const string DefaultFileEncodingPreference = "PSDefaultFileEncoding"; + internal static readonly VariablePath DefaultFileEncodingVarPath = new VariablePath(DefaultFileEncodingPreference); + #endregion Preference Variables internal const string ErrorView = "ErrorView"; @@ -279,6 +282,7 @@ internal static class SpecialVariables SpecialVariables.WarningPreference, SpecialVariables.InformationPreference, SpecialVariables.ConfirmPreference, + SpecialVariables.DefaultFileEncodingPreference, }; internal static readonly Type[] PreferenceVariableTypes = { @@ -289,6 +293,7 @@ internal static class SpecialVariables /* WarningPreference */ typeof(ActionPreference), /* InformationPreference */ typeof(ActionPreference), /* ConfirmPreference */ typeof(ConfirmImpact), + /* PSDefaultFileEncoding */ typeof(Microsoft.PowerShell.FileEncoding), }; // The following variables are created in every session w/ AllScope. We avoid creating local slots when we diff --git a/src/System.Management.Automation/engine/Utils.cs b/src/System.Management.Automation/engine/Utils.cs index a5d9611202d..f465af5fe10 100644 --- a/src/System.Management.Automation/engine/Utils.cs +++ b/src/System.Management.Automation/engine/Utils.cs @@ -8,6 +8,7 @@ using System.Management.Automation.Internal; using System.Management.Automation.Security; using System.Reflection; +using Microsoft.PowerShell; using Microsoft.PowerShell.Commands; using Microsoft.Win32; using System.Globalization; @@ -1309,7 +1310,7 @@ internal static Encoding GetEncodingFromEnum(FileSystemCmdletProviderEncoding en break; case FileSystemCmdletProviderEncoding.Default: - result = ClrFacade.GetDefaultEncoding(); + result = PowerShellEncoding.GetDefaultEncoding(); break; case FileSystemCmdletProviderEncoding.Oem: diff --git a/src/System.Management.Automation/engine/remoting/commands/NewPSSessionConfigurationFile.cs b/src/System.Management.Automation/engine/remoting/commands/NewPSSessionConfigurationFile.cs index 065f37b379a..c80761fff9c 100644 --- a/src/System.Management.Automation/engine/remoting/commands/NewPSSessionConfigurationFile.cs +++ b/src/System.Management.Automation/engine/remoting/commands/NewPSSessionConfigurationFile.cs @@ -604,7 +604,7 @@ protected override void ProcessRecord() PathUtils.MasterStreamOpen( this, filePath, - EncodingConversion.Unicode, + FileEncoding.Unicode, /* defaultEncoding */ false, /* Append */ false, /* Force */ false, @@ -1460,7 +1460,7 @@ protected override void ProcessRecord() PathUtils.MasterStreamOpen( this, filePath, - EncodingConversion.Unicode, + FileEncoding.Unicode, /* defaultEncoding */ false, /* Append */ false, /* Force */ false, @@ -2029,4 +2029,4 @@ internal static string CombineHashTableOrStringArray(object[] values, StreamWrit } #endregion -} \ No newline at end of file +} diff --git a/src/System.Management.Automation/namespaces/FileSystemContentStream.cs b/src/System.Management.Automation/namespaces/FileSystemContentStream.cs index 999eca632bb..a7e087094c8 100644 --- a/src/System.Management.Automation/namespaces/FileSystemContentStream.cs +++ b/src/System.Management.Automation/namespaces/FileSystemContentStream.cs @@ -8,6 +8,7 @@ using System.Diagnostics.CodeAnalysis; using System.IO; using System.Runtime.InteropServices; +using Microsoft.PowerShell; using System.Text; using System.Management.Automation; using System.Management.Automation.Internal; @@ -796,7 +797,7 @@ private bool ReadByteEncoded(bool waitChanges, ArrayList blocks, bool readBack) // the changes if (waitChanges) { - WaitForChanges(_path, _mode, _access, _share, ClrFacade.GetDefaultEncoding()); + WaitForChanges(_path, _mode, _access, _share, PowerShellEncoding.GetDefaultEncoding()); byteRead = _stream.ReadByte(); } } @@ -1160,8 +1161,10 @@ internal FileStreamBackReader(FileStream fileStream, Encoding encoding) _currentPosition = _stream.Position; // Get the oem encoding and system current ANSI code page - _oemEncoding = EncodingConversion.Convert(null, EncodingConversion.OEM); - _defaultAnsiEncoding = EncodingConversion.Convert(null, EncodingConversion.Default); + // _oemEncoding = EncodingConversion.Convert(null, EncodingConversion.OEM); + // _defaultAnsiEncoding = EncodingConversion.Convert(null, EncodingConversion.Default); + _oemEncoding = PowerShellEncoding.GetEncoding(FileEncoding.Oem); + _defaultAnsiEncoding = PowerShellEncoding.GetEncoding(FileEncoding.Default); } } diff --git a/src/System.Management.Automation/namespaces/FileSystemProvider.cs b/src/System.Management.Automation/namespaces/FileSystemProvider.cs index 7205120bdd1..e73b207f75c 100644 --- a/src/System.Management.Automation/namespaces/FileSystemProvider.cs +++ b/src/System.Management.Automation/namespaces/FileSystemProvider.cs @@ -23,6 +23,7 @@ using Dbg = System.Management.Automation; using System.Runtime.InteropServices; using System.Management.Automation.Runspaces; +using Microsoft.PowerShell; namespace Microsoft.PowerShell.Commands { @@ -6557,7 +6558,7 @@ public IContentReader GetContentReader(string path) // Defaults for the file read operation string delimiter = "\n"; - Encoding encoding = ClrFacade.GetDefaultEncoding(); + Encoding encoding = PowerShellEncoding.GetDefaultEncoding(); bool waitForChanges = false; bool streamTypeSpecified = false; @@ -6592,7 +6593,7 @@ public IContentReader GetContentReader(string path) if (streamTypeSpecified) { - encoding = dynParams.EncodingType; + encoding = PowerShellEncoding.GetProviderEncoding(this, dynParams.Encoding); } // Get the wait value @@ -6720,7 +6721,8 @@ public IContentWriter GetContentWriter(string path) // If this is true, then the content will be read as bytes bool usingByteEncoding = false; bool streamTypeSpecified = false; - Encoding encoding = ClrFacade.GetDefaultEncoding(); + // we need to discover the encoding + Encoding encoding = PowerShellEncoding.GetProviderEncoding(this, FileEncoding.Unknown); FileMode filemode = FileMode.OpenOrCreate; string streamName = null; bool suppressNewline = false; @@ -6739,7 +6741,7 @@ public IContentWriter GetContentWriter(string path) if (streamTypeSpecified) { - encoding = dynParams.EncodingType; + encoding = PowerShellEncoding.GetProviderEncoding(this, dynParams.Encoding); } streamName = dynParams.Stream; @@ -7610,7 +7612,8 @@ public class FileSystemContentDynamicParametersBase /// reading data from the file. /// [Parameter] - public FileSystemCmdletProviderEncoding Encoding { get; set; } = FileSystemCmdletProviderEncoding.String; + // public FileSystemCmdletProviderEncoding Encoding { get; set; } = FileSystemCmdletProviderEncoding.String; + public FileEncoding Encoding { get; set; } = FileEncoding.Unknown; /// /// A parameter to return a stream of an item. @@ -7618,18 +7621,6 @@ public class FileSystemContentDynamicParametersBase [Parameter] public String Stream { get; set; } - - /// - /// Gets the encoding from the specified StreamType parameter. - /// - public Encoding EncodingType - { - get - { - return Utils.GetEncodingFromEnum(Encoding); - } - } // EncodingType - /// /// Gets the Byte Encoding status of the StreamType parameter. Returns true /// if the stream was opened with "Byte" encoding, false otherwise. @@ -7638,7 +7629,7 @@ public bool UsingByteEncoding { get { - return Encoding == FileSystemCmdletProviderEncoding.Byte; + return Encoding == FileEncoding.Byte; } // get } // UsingByteEncoding @@ -7650,7 +7641,7 @@ public bool WasStreamTypeSpecified { get { - return (Encoding != FileSystemCmdletProviderEncoding.String); + return (Encoding != FileEncoding.String); } // get } // WasStreamTypeSpecified diff --git a/src/System.Management.Automation/resources/RunspaceInit.resx b/src/System.Management.Automation/resources/RunspaceInit.resx index d036c4897c0..0b5242e6f5a 100644 --- a/src/System.Management.Automation/resources/RunspaceInit.resx +++ b/src/System.Management.Automation/resources/RunspaceInit.resx @@ -174,6 +174,9 @@ Dictates the action taken when a command generates an item in the Information stream + + Dictates the encoding used when creating a file + Dictates the view mode to use when displaying errors diff --git a/src/System.Management.Automation/utils/Encoding.cs b/src/System.Management.Automation/utils/Encoding.cs new file mode 100644 index 00000000000..6b076429e41 --- /dev/null +++ b/src/System.Management.Automation/utils/Encoding.cs @@ -0,0 +1,408 @@ +/********************************************************************++ +Copyright (c) Microsoft Corporation. All rights reserved. +--********************************************************************/ + +using System; +using System.Collections.Generic; +using System.Globalization; +using System.IO; +using System.Text; +using System.Management.Automation; +using System.Management.Automation.Provider; + +namespace Microsoft.PowerShell +{ + /// + /// The list of available file encodings + /// + public enum FileEncoding + { + /// + /// No encoding, or unset. + /// + Unknown, + + /// + /// Unicode encoding. + /// + String, + + /// + /// Unicode encoding. + /// + Unicode, + + /// + /// Byte encoding. + /// + Byte, + + /// + /// Big Endian Unicode encoding. + /// + BigEndianUnicode, + + /// + /// Backward compatibility - UTF8 encoding without BOM + /// + UTF8, + + /// + /// UTF8 encoding which includes BOM. + /// + UTF8BOM, + + /// + /// UTF8 encoding without BOM. + /// + UTF8NoBOM, + + /// + /// UTF7 encoding. + /// + UTF7, + + /// + /// UTF32 encoding. + /// + UTF32, + + /// + /// ASCII encoding. + /// + Ascii, + + /// + /// Default encoding. + /// + Default, + + /// + /// OEM encoding. + /// + Oem, + + /// + /// Big Endian UTF32 encoding. + /// + BigEndianUTF32, + + /// + /// Windows legacy encoding. This requires a cmdlet object to resolve. + /// + WindowsLegacy, + } + + /// + /// the helper class for determining encodings for PowerShell + /// + public static class PowerShellEncoding + { + + /// + /// Return the default PowerShell encoding + /// which is UTF8 without a BOM. + /// There is no distinction between platforms + /// + public static Encoding GetDefaultEncoding() + { + return new UTF8Encoding(false); + } + + /// + /// translate a FileEncoding to an actual System.Text.Encoding + /// The enum value + /// + public static Encoding GetEncoding(FileEncoding TextEncoding) + { + System.Text.Encoding result = GetDefaultEncoding(); + switch ( TextEncoding ) + { + case FileEncoding.String: + result = Encoding.Unicode; + break; + + case FileEncoding.Unicode: + result = Encoding.Unicode; + break; + + case FileEncoding.BigEndianUnicode: + result = Encoding.BigEndianUnicode; + break; + + case FileEncoding.UTF8BOM: + result = Encoding.UTF8; // The default UTF8 encoder includes the BOM + break; + + case FileEncoding.Byte: + result = Encoding.Unicode; + break; + + case FileEncoding.UTF8: + case FileEncoding.UTF8NoBOM: + result = new UTF8Encoding(false); + break; + + case FileEncoding.UTF7: + result = Encoding.UTF7; + break; + + case FileEncoding.UTF32: + result = Encoding.UTF32; + break; + + case FileEncoding.BigEndianUTF32: + result = Encoding.BigEndianUnicode; + break; + + case FileEncoding.Ascii: + result = Encoding.ASCII; + break; + + case FileEncoding.Default: + result = GetDefaultEncoding(); + break; + + case FileEncoding.Oem: + result = ClrFacade.GetOEMEncoding(); + break; + + default: + break; + } + + return result; + } + + // the way the encoding is implemented in PowerShell 5 and earlier + // if the user sets the default encoding to WindowsLegacy, we will + // be able to encode for that + internal static Dictionary legacyEncodingMap = + new Dictionary(StringComparer.OrdinalIgnoreCase) + { + { "microsoft.powershell.commands.addcontentcommand", Encoding.ASCII }, + { "microsoft.powershell.commands.exportclixmlcommand", Encoding.Unicode }, + { "microsoft.powershell.commands.exportcsvcommand", Encoding.ASCII }, + { "microsoft.powershell.commands.exportpssessioncommand", Encoding.UTF8 }, // with BOM + { "microsoft.powershell.commands.formathex", Encoding.ASCII }, + { "microsoft.powershell.commands.newmodulemanifestcommand", Encoding.Unicode }, + { "microsoft.powershell.commands.getcontentcommand", Encoding.ASCII }, + { "microsoft.powershell.commands.importcsvcommand", Encoding.ASCII }, + { "microsoft.powershell.commands.outfilecommand", Encoding.Unicode }, // This includes redirection + { "microsoft.powershell.commands.setcontentcommand", Encoding.ASCII }, + // Providers are handled here + { "microsoft.powershell.commands.filesystemprovider", Encoding.ASCII }, + + }; + + internal static Encoding GetWindowsLegacyEncoding(string name) + { + if ( legacyEncodingMap.ContainsKey(name)) + { + return legacyEncodingMap[name]; + } + else + { + return Encoding.Default; + } + } + + /// + /// Retrieve the PSDefaultFileEncoding preference value if set + /// + public static FileEncoding GetEncodingPreference(SessionState sessionState) + { + FileEncoding encodingPreference = FileEncoding.Unknown; + try + { + // It doesn't matter if this fails or throws, we will return unknown in that case + object tmp = sessionState.PSVariable.GetValue("PSDefaultFileEncoding"); + LanguagePrimitives.TryConvertTo(tmp, out encodingPreference); + } + catch + { + ; + } + return encodingPreference; + } + + /* + /// + /// Retrieve the encoding in a provider context + /// + /// + public static Encoding GetProviderEncoding(NavigationCmdletProvider provider, FileEncoding encoding) + { + Encoding resolvedEncoding = GetDefaultEncoding(); + FileEncoding encodingPreference = GetEncodingPreference(provider.SessionState); + + if ( encoding != FileEncoding.Unknown ) + { + resolvedEncoding = GetEncoding(encoding); + } + return resolvedEncoding; + } + */ + + /// + /// Retrieve the encoding in a provider context + /// + /// + public static Encoding GetProviderEncoding(CmdletProvider provider, FileEncoding encoding) + { + Encoding resolvedEncoding = GetDefaultEncoding(); + FileEncoding encodingPreference = GetEncodingPreference(provider.SessionState); + if ( encoding == FileEncoding.Unknown && encodingPreference != FileEncoding.Unknown ) + { + resolvedEncoding = GetEncoding(encodingPreference); + } + else if ( encoding != FileEncoding.Unknown ) + { + resolvedEncoding = GetEncoding(encoding); + } + return resolvedEncoding; + } + + /// + /// Retrieve the encoding based on the Cmdlet and the Encoding + /// The cmdlet of interest + /// The Encoding parameter value + /// + public static Encoding GetEncoding(Cmdlet cmdlet, FileEncoding encoding) + { + Encoding resolvedEncoding = GetDefaultEncoding(); + FileEncoding encodingPreference = FileEncoding.Unknown; + bool preferenceSetAndValid = false; + string name = cmdlet.GetType().FullName.ToLower(CultureInfo.InvariantCulture); + + + // An encoding has been specified as a parameter (or the explicit parameter value is "Unknown") + if ( encoding != FileEncoding.Unknown ) + { + // If the encoding has been set to WindowsLegacy, we need to look up the actual encoding + if ( encoding == FileEncoding.WindowsLegacy ) + { + resolvedEncoding = GetWindowsLegacyEncoding(name); + } + else + { + resolvedEncoding = GetEncoding(encoding); + } + } + else + { + // the parameter is not specifically set + // Check the preference variable + encodingPreference = GetEncodingPreference(cmdlet.Context.SessionState); + preferenceSetAndValid = encodingPreference != FileEncoding.Unknown; // If set to unknown, we accept that it is unset + // If the encoding preference has been set to WindowsLegacy, we need to look up the actual encoding + if ( encodingPreference == FileEncoding.WindowsLegacy ) + { + resolvedEncoding = GetWindowsLegacyEncoding(name); + } + else if ( encodingPreference != FileEncoding.Unknown ) + { + resolvedEncoding = GetEncoding(encodingPreference); + } + // the final else would be set the encoding to GetDefaultEncoding() + // which was handled above + } + + return resolvedEncoding; // GetEncoding(FileEncoding.UTF8NoBOM); + } + + // [System.Text.Encoding]::GetEncodings() | ? { $_.GetEncoding().GetPreamble() } | + // Add-Member ScriptProperty Preamble { $this.GetEncoding().GetPreamble() -join "-" } -PassThru | + // Format-Table -Auto + internal static Dictionary encodingMap = + new Dictionary() + { + { "255-254", FileEncoding.Unicode }, + { "254-255", FileEncoding.BigEndianUnicode }, + { "255-254-0-0", FileEncoding.UTF32 }, + { "0-0-254-255", FileEncoding.BigEndianUTF32 }, + { "239-187-191", FileEncoding.UTF8 }, + }; + + internal static char[] nonPrintableCharacters = { + (char) 0, (char) 1, (char) 2, (char) 3, (char) 4, (char) 5, (char) 6, (char) 7, (char) 8, + (char) 11, (char) 12, (char) 14, (char) 15, (char) 16, (char) 17, (char) 18, (char) 19, (char) 20, + (char) 21, (char) 22, (char) 23, (char) 24, (char) 25, (char) 26, (char) 28, (char) 29, (char) 30, + (char) 31, (char) 127, (char) 129, (char) 141, (char) 143, (char) 144, (char) 157 }; + + // take a look at the file contents and guess at the best encoding + // + internal static FileEncoding GetEncoding(string path) + { + if (!File.Exists(path)) + { + return FileEncoding.Default; + } + + byte[] initialBytes = new byte[100]; + int bytesRead = 0; + + try + { + using (FileStream stream = System.IO.File.OpenRead(path)) + { + using (BinaryReader reader = new BinaryReader(stream)) + { + bytesRead = reader.Read(initialBytes, 0, 100); + } + } + } + catch (IOException) + { + return FileEncoding.Default; + } + + // Test for four-byte preambles + string preamble = null; + FileEncoding foundEncoding = FileEncoding.Default; + + if (bytesRead > 3) + { + preamble = String.Join("-", initialBytes[0], initialBytes[1], initialBytes[2], initialBytes[3]); + + if (encodingMap.TryGetValue(preamble, out foundEncoding)) + { + return foundEncoding; + } + } + + // Test for three-byte preambles + if (bytesRead > 2) + { + preamble = String.Join("-", initialBytes[0], initialBytes[1], initialBytes[2]); + if (encodingMap.TryGetValue(preamble, out foundEncoding)) + { + return foundEncoding; + } + } + + // Test for two-byte preambles + if (bytesRead > 1) + { + preamble = String.Join("-", initialBytes[0], initialBytes[1]); + if (encodingMap.TryGetValue(preamble, out foundEncoding)) + { + return foundEncoding; + } + } + + // Check for binary + string initialBytesAsAscii = System.Text.Encoding.ASCII.GetString(initialBytes, 0, bytesRead); + if (initialBytesAsAscii.IndexOfAny(nonPrintableCharacters) >= 0) + { + return FileEncoding.Byte; + } + + // we couldn't determine anything from direct examination, + // return UTF8 without a BOM which should be good for both Windows and Non-Windows + return FileEncoding.UTF8NoBOM; + } + } + +} + diff --git a/src/System.Management.Automation/utils/PathUtils.cs b/src/System.Management.Automation/utils/PathUtils.cs index 632938e40e7..ff5751c883b 100644 --- a/src/System.Management.Automation/utils/PathUtils.cs +++ b/src/System.Management.Automation/utils/PathUtils.cs @@ -6,6 +6,7 @@ using System.Globalization; using System.IO; using System.Text; +using Microsoft.PowerShell; using System.Management.Automation.Internal; using Dbg = System.Management.Automation.Diagnostics; @@ -17,6 +18,41 @@ namespace System.Management.Automation /// internal static class PathUtils { + + /// + /// THE method for opening a file for writing. + /// Should be used by all cmdlets that write to a file. + /// + /// cmdlet that is opening the file (used mainly for error reporting) + /// path to the file (as specified on the command line - this method will resolve the path) + /// encoding (this method will convert the command line string to an Encoding instance) + /// if true, then we will use default .NET encoding instead of the encoding specified in parameter + /// + /// + /// + /// Result1: opened for writing + /// Result2: (inherits from ) opened for writing + /// Result3: file info that should be used to restore file attributes after done with the file (null is this is not needed) + /// True if wildcard expansion should be bypassed. + internal static void MasterStreamOpen( + PSCmdlet cmdlet, + string filePath, + FileEncoding encoding, + bool defaultEncoding, + bool Append, + bool Force, + bool NoClobber, + out FileStream fileStream, + out StreamWriter streamWriter, + out FileInfo readOnlyFileInfo, + bool isLiteralPath + ) + { + Encoding resolvedEncoding = PowerShellEncoding.GetEncoding(cmdlet, encoding); + + MasterStreamOpen(cmdlet, filePath, resolvedEncoding, defaultEncoding, Append, Force, NoClobber, out fileStream, out streamWriter, out readOnlyFileInfo, isLiteralPath); + } + /// /// THE method for opening a file for writing. /// Should be used by all cmdlets that write to a file. @@ -188,6 +224,12 @@ internal static void ReportFileOpenFailure(Cmdlet cmdlet, string filePath, Excep cmdlet.ThrowTerminatingError(errorRecord); } + internal static StreamReader OpenStreamReader(PSCmdlet command, string filePath, FileEncoding encoding, bool isLiteralPath) + { + FileStream fileStream = OpenFileStream(filePath, command, isLiteralPath); + return new StreamReader(fileStream, PowerShellEncoding.GetEncoding(command, encoding)); + } + internal static StreamReader OpenStreamReader(PSCmdlet command, string filePath, string encoding, bool isLiteralPath) { FileStream fileStream = OpenFileStream(filePath, command, isLiteralPath); @@ -490,7 +532,7 @@ internal static Encoding Convert(Cmdlet cmdlet, string encoding) return System.Text.Encoding.UTF32; if (string.Equals(encoding, Default, StringComparison.OrdinalIgnoreCase)) - return ClrFacade.GetDefaultEncoding(); + return PowerShellEncoding.GetDefaultEncoding(); if (string.Equals(encoding, OEM, StringComparison.OrdinalIgnoreCase)) { diff --git a/test/powershell/Language/Parser/RedirectionOperator.Tests.ps1 b/test/powershell/Language/Parser/RedirectionOperator.Tests.ps1 index 20b35867651..100a3110b91 100644 --- a/test/powershell/Language/Parser/RedirectionOperator.Tests.ps1 +++ b/test/powershell/Language/Parser/RedirectionOperator.Tests.ps1 @@ -29,13 +29,14 @@ Describe "Redirection operator now supports encoding changes" -Tags "CI" { $psdefaultParameterValues.Remove("out-file:encoding") } - It "If encoding is unset, redirection should be Unicode" { + It "If encoding is unset, redirection should be platform appropriate" { $asciiString > TESTDRIVE:\file.txt + $encoder = [Microsoft.PowerShell.PowerShellEncoding]::GetEncoding("utf8nobom") $bytes = get-content -encoding byte TESTDRIVE:\file.txt # create the expected - $BOM = [text.encoding]::unicode.GetPreamble() - $TXT = [text.encoding]::unicode.GetBytes($asciiString) - $CR = [text.encoding]::unicode.GetBytes($asciiCR) + $BOM = $encoder.GetPreamble() + $TXT = $encoder.GetBytes($asciiString) + $CR = $encoder.GetBytes($asciiCR) $expectedBytes = .{ $BOM; $TXT; $CR } $bytes.Count | should be $expectedBytes.count for($i = 0; $i -lt $bytes.count; $i++) { @@ -43,8 +44,8 @@ Describe "Redirection operator now supports encoding changes" -Tags "CI" { } } - # $availableEncodings = "unknown","string","unicode","bigendianunicode","utf8","utf7", "utf32","ascii","default","oem" - $availableEncodings = (get-command out-file).Parameters["Encoding"].Attributes.ValidValues + # WindowsLegacy encoding tests will be done elsewhere + $availableEncodings = [enum]::GetNames([Microsoft.PowerShell.FileEncoding])|?{@("default","WindowsLegacy") -notcontains $_ } foreach($encoding in $availableEncodings) { $skipTest = $false @@ -60,7 +61,7 @@ Describe "Redirection operator now supports encoding changes" -Tags "CI" { # and out-file has its own translation, so we'll # not do that logic here, but simply ignore those encodings # as they eventually are translated to "real" encoding - $enc = [system.text.encoding]::$encoding + $enc = [Microsoft.PowerShell.PowerShellEncoding]::GetEncoding($encoding) if ( $enc ) { $msg = "Overriding encoding for out-file is respected for $encoding" diff --git a/test/powershell/Modules/Microsoft.PowerShell.Core/TestGetCommand.Tests.ps1 b/test/powershell/Modules/Microsoft.PowerShell.Core/TestGetCommand.Tests.ps1 index 8f48d529620..202b048b1e9 100644 --- a/test/powershell/Modules/Microsoft.PowerShell.Core/TestGetCommand.Tests.ps1 +++ b/test/powershell/Modules/Microsoft.PowerShell.Core/TestGetCommand.Tests.ps1 @@ -211,7 +211,7 @@ $paramName = "Encoding" $results = get-command -verb get -noun content -Encoding Unicode VerifyDynamicParametersExist -cmdlet $results[0] -parameterNames $paramName - VerifyParameterType -cmdlet $results[0] -parameterName $paramName -parameterType Microsoft.PowerShell.Commands.FileSystemCmdletProviderEncoding + VerifyParameterType -cmdlet $results[0] -parameterName $paramName -parameterType Microsoft.PowerShell.FileEncoding } It "Verify Single Cmdlet Using Verb&Noun ParameterSet With Usage" { @@ -261,7 +261,7 @@ $paramName = "Encoding" $results = Get-Command -verb get -noun content -encoding UTF8 VerifyDynamicParametersExist -cmdlet $results[0] -parameterNames $paramName - VerifyParameterType -cmdlet $results[0] -parameterName $paramName -ParameterType Microsoft.PowerShell.Commands.FileSystemCmdletProviderEncoding + VerifyParameterType -cmdlet $results[0] -parameterName $paramName -ParameterType Microsoft.PowerShell.FileEncoding } #unsupported parameter: -synop @@ -269,6 +269,7 @@ $paramName = "Encoding" $results = get-command -verb get -noun content -encoding UTF8 -synop VerifyDynamicParametersExist -cmdlet $results[0] -parameterNames $paramName - VerifyParameterType -cmdlet $results[0] -parameterName $paramName -ParameterType Microsoft.PowerShell.Commands.FileSystemCmdletProviderEncoding + VerifyParameterType -cmdlet $results[0] -parameterName $paramName -ParameterType Microsoft.PowerShell.FileEncoding } -} \ No newline at end of file +} + diff --git a/test/powershell/engine/Encoding.Tests.ps1 b/test/powershell/engine/Encoding.Tests.ps1 new file mode 100644 index 00000000000..fa4e2f06098 --- /dev/null +++ b/test/powershell/engine/Encoding.Tests.ps1 @@ -0,0 +1,170 @@ +Describe "Encoding classes and methods are available" -Tag CI { + BeforeAll { + $testString = "t" + ([char]233) + "st" + $provider = get-item $TESTDRIVE + $testFile = "${TESTDRIVE}/file.txt" + $preamble = @{ + Ascii = '' + BigEndianUTF32 = '254-255' + BigEndianUnicode = '254-255' + Byte = '255-254' + Default = '' + Oem = '' + String = '255-254' + UTF32 = '255-254-0-0' + UTF7 = '' + UTF8 = '' + UTF8BOM = '239-187-191' + UTF8NoBOM = '' + Unicode = '255-254' + Unknown = '' + WindowsLegacy = '' + } + + function Get-FileBytes + { + param ( $file, [int]$count = [int]::MaxValue ) + (Get-Content $file -Encoding byte | Select-Object -First $count) -Join "-" + } + + function Get-NewLineBytes + { + param ( [Microsoft.PowerShell.FileEncoding]$encoding ) + $encoder = [Microsoft.PowerShell.PowerShellEncoding]::GetEncoding($encoding) + $encoder.GetBytes([Environment]::NewLine) -Join "-" + } + function Test-GetEncoding + { + [CmdletBinding()] + param ( + [Microsoft.PowerShell.FileEncoding]$Encoding + ) + END { + [Microsoft.PowerShell.PowerShellEncoding]::GetCmdletEncoding($pscmdlet, $encoding) + } + } + + $preambleTests = + @{ Name = 'Ascii'; Preamble = '' }, + @{ Name = 'BigEndianUTF32'; Preamble = '254-255' }, + @{ Name = 'BigEndianUnicode'; Preamble = '254-255' }, + @{ Name = 'Byte'; Preamble = '255-254' }, + @{ Name = 'Default'; Preamble = '' }, + @{ Name = 'Oem'; Preamble = '' }, + @{ Name = 'String'; Preamble = '255-254' }, + @{ Name = 'UTF32'; Preamble = '255-254-0-0' }, + @{ Name = 'UTF7'; Preamble = '' }, + @{ Name = 'UTF8'; Preamble = '' }, + @{ Name = 'UTF8BOM'; Preamble = '239-187-191' }, + @{ Name = 'UTF8NoBOM'; Preamble = '' }, + @{ Name = 'Unicode'; Preamble = '255-254' }, + @{ Name = 'Unknown'; Preamble = '' }, + @{ Name = 'WindowsLegacy'; Preamble = '' } + + $contentTests = + @{ Name = 'Ascii'; Bytes = "116-63-115-116-" + (Get-NewLineBytes Ascii) }, + @{ Name = 'BigEndianUTF32'; Bytes = "254-255-0-116-0-233-0-115-0-116-" + (Get-NewLineBytes BigEndianUTF32) }, + @{ Name = 'BigEndianUnicode'; Bytes = "254-255-0-116-0-233-0-115-0-116-" + (Get-NewLineBytes BigEndianUnicode) }, + @{ Name = 'Byte'; Bytes = "255-254-116-0-233-0-115-0-116-0-" + (Get-NewLineBytes Byte) }, + @{ Name = 'Default'; Bytes = "116-195-169-115-116-" + (Get-NewLineBytes Default) }, + @{ Name = 'Oem'; Bytes = "116-195-169-115-116-" + (Get-NewLineBytes Oem) }, + @{ Name = 'String'; Bytes = "255-254-116-0-233-0-115-0-116-0-" + (Get-NewLineBytes String) }, + @{ Name = 'UTF32'; Bytes = "255-254-0-0-116-0-0-0-233-0-0-0-115-0-0-0-116-0-0-0-" + (Get-NewLineBytes UTF32) }, + @{ Name = 'UTF7'; Bytes = "116-43-65-79-107-45-115-116-" + (Get-NewLineBytes UTF7) }, + @{ Name = 'UTF8'; Bytes = "116-195-169-115-116-" + (Get-NewLineBytes UTF8 ) }, + @{ Name = 'UTF8BOM'; Bytes = "239-187-191-116-195-169-115-116-" + (Get-NewLineBytes UTF8BOM) }, + @{ Name = 'UTF8NoBOM'; Bytes = "116-195-169-115-116-" + (Get-NewLineBytes UTF8NoBOM) }, + @{ Name = 'Unicode'; Bytes = "255-254-116-0-233-0-115-0-116-0-" + (Get-NewLineBytes Unicode) }, + @{ Name = 'Unknown'; Bytes = "116-195-169-115-116-" + (Get-NewLineBytes Unknown) } + + } + + AfterEach { + if ( Test-Path $testFile ) + { + remove-item $testFile + } + $PSDefaultFileEncoding = "Unknown" + } + + It "Encoding for '' should have correct preamble ''" -TestCase $preambleTests { + param ( $Name, $Preamble ) + [Microsoft.PowerShell.PowerShellEncoding]::GetEncoding($Name).GetPreamble() -Join "-" | Should be $Preamble + } + + It "Encoding for '' should create file with proper encoding" -TestCase $contentTests { + param ( $Name, $Bytes ) + $str = "t" + [char]233 + "st" + $str | out-file -encoding $Name $testFile + Get-FileBytes $testFile | should be $Bytes + } + + It "Setting PSDefaultFileEncoding to '' should create file with proper encoding" -TestCase $contentTests { + param ( $Name, $Bytes ) + $str = "t" + [char]233 + "st" + $PSDefaultFileEncoding = $Name + $str | out-file $testFile + Get-FileBytes $testFile | should be $Bytes + } + + It "Explicit encoding is not overridden by setting PSDefaultFileEncoding to ''" -TestCase $contentTests { + param ( $Name, $Bytes ) + $str = "t" + [char]233 + "st" + $PSDefaultFileEncoding = $Name + $str | out-file -encoding ascii $testFile + Get-FileBytes $testFile | should be "116-63-115-116-10" + } + + Context "Legacy Windows Behavior" { + + It "Add-Content creates ascii encoded files" { + $testString | add-content -encoding WindowsLegacy $TESTDRIVE/file.txt + Get-FileBytes $TESTDRIVE/file.txt | should be ("116-195-169-115-116-" + (Get-NewLineBytes ASCII)) + } + + It "Set-Content creates ascii encoded files" { + $testString | set-content -encoding WindowsLegacy $TESTDRIVE/file.txt + Get-FileBytes $TESTDRIVE/file.txt | should be ("116-195-169-115-116-" + (Get-NewLineBytes ASCII)) + } + + It "Export-CliXml creates unicode encoded files" { + [pscustomobject]@{ text = $testString } | export-clixml -encoding WindowsLegacy $TESTDRIVE/file.clixml + Get-FileBytes $TESTDRIVE/file.clixml -count 10 | should be "255-254-60-0-79-0-98-0-106-0" + } + + It "Export-Csv creates ascii encoded files" { + # we'll be looking for the bytes 116-63-115-116 which is what $testString looks like when encoded as ascii + [pscustomobject]@{ text = $testString } | export-csv -encoding WindowsLegacy $TESTDRIVE/file.clixml + Get-FileBytes $TESTDRIVE/file.clixml | should match "116-63-115-116" + } + + It "New-ModuleManifest creates unicode encoded files" { + try { + $PSDefaultFileEncoding = "WindowsLegacy" + New-ModuleManifest -path "$TESTDRIVE/${testString}.psd1" + } + finally { + $PSDefaultFileEncoding = "Unknown" + } + Get-FileBytes $TESTDRIVE/${testString}.psd1 -count 10 | should match "255-254-35-0-10-0-35-0-32-0" + } + + It "Out-File creates properly encoded files" { + $testString | Out-File -encoding WindowsLegacy -FilePath $TESTDRIVE/file.txt + Get-FileBytes $TESTDRIVE/file.txt -count 10 | should match "255-254-116-0-233-0-115-0-116-0" + } + + It "Redirection creates unicode encoded files" { + try { + $PSDefaultFileEncoding = "WindowsLegacy" + $testString > $TESTDRIVE/file.txt + } + finally { + $PSDefaultFileEncoding = "Unknown" + } + Get-FileBytes $TESTDRIVE/file.txt -count 10 | should match "255-254-116-0-233-0-115-0-116-0" + } + } +} + + From afbf374405a4eb86c228c0038d6a6e51516dec9f Mon Sep 17 00:00:00 2001 From: James Truher Date: Mon, 26 Jun 2017 16:06:27 -0700 Subject: [PATCH 02/14] Fix test issue where newline was being provided rather than calculated some tests were failing on Windows because new line is different, calculate the bytes in newline rather than hardcoding them --- test/powershell/engine/Encoding.Tests.ps1 | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/test/powershell/engine/Encoding.Tests.ps1 b/test/powershell/engine/Encoding.Tests.ps1 index fa4e2f06098..07c10c52f22 100644 --- a/test/powershell/engine/Encoding.Tests.ps1 +++ b/test/powershell/engine/Encoding.Tests.ps1 @@ -67,7 +67,8 @@ Describe "Encoding classes and methods are available" -Tag CI { @{ Name = 'BigEndianUnicode'; Bytes = "254-255-0-116-0-233-0-115-0-116-" + (Get-NewLineBytes BigEndianUnicode) }, @{ Name = 'Byte'; Bytes = "255-254-116-0-233-0-115-0-116-0-" + (Get-NewLineBytes Byte) }, @{ Name = 'Default'; Bytes = "116-195-169-115-116-" + (Get-NewLineBytes Default) }, - @{ Name = 'Oem'; Bytes = "116-195-169-115-116-" + (Get-NewLineBytes Oem) }, + # Oem encoding can change depending on system, calculate the expected string + @{ Name = 'Oem'; Bytes = ([Microsoft.PowerShell.PowerShellEncoding]::GetEncoding("Oem").GetBytes($testString) -join "-") + "-" + (Get-NewLineBytes Oem) }, @{ Name = 'String'; Bytes = "255-254-116-0-233-0-115-0-116-0-" + (Get-NewLineBytes String) }, @{ Name = 'UTF32'; Bytes = "255-254-0-0-116-0-0-0-233-0-0-0-115-0-0-0-116-0-0-0-" + (Get-NewLineBytes UTF32) }, @{ Name = 'UTF7'; Bytes = "116-43-65-79-107-45-115-116-" + (Get-NewLineBytes UTF7) }, @@ -94,24 +95,21 @@ Describe "Encoding classes and methods are available" -Tag CI { It "Encoding for '' should create file with proper encoding" -TestCase $contentTests { param ( $Name, $Bytes ) - $str = "t" + [char]233 + "st" - $str | out-file -encoding $Name $testFile + $testString | out-file -encoding $Name $testFile Get-FileBytes $testFile | should be $Bytes } It "Setting PSDefaultFileEncoding to '' should create file with proper encoding" -TestCase $contentTests { param ( $Name, $Bytes ) - $str = "t" + [char]233 + "st" $PSDefaultFileEncoding = $Name - $str | out-file $testFile + $testString | out-file $testFile Get-FileBytes $testFile | should be $Bytes } It "Explicit encoding is not overridden by setting PSDefaultFileEncoding to ''" -TestCase $contentTests { param ( $Name, $Bytes ) - $str = "t" + [char]233 + "st" $PSDefaultFileEncoding = $Name - $str | out-file -encoding ascii $testFile + $testString | out-file -encoding ascii $testFile Get-FileBytes $testFile | should be "116-63-115-116-10" } @@ -146,7 +144,12 @@ Describe "Encoding classes and methods are available" -Tag CI { finally { $PSDefaultFileEncoding = "Unknown" } - Get-FileBytes $TESTDRIVE/${testString}.psd1 -count 10 | should match "255-254-35-0-10-0-35-0-32-0" + # we know what the encoding should be + $legacyEncoding = [System.Text.Encoding]::Unicode + $newLineBytes = $legacyEncoding.GetBytes([Environment]::NewLine) + $newLineByteString = $newLineBytes -join "-" + $expected = "255-254-35-0-${newLineByteString}-35-0-32-0" + Get-FileBytes $TESTDRIVE/${testString}.psd1 -count 10 | should match $expected } It "Out-File creates properly encoded files" { From f996e6fe4dd946b9f998673900028d21d57fc23b Mon Sep 17 00:00:00 2001 From: James Truher Date: Mon, 26 Jun 2017 16:08:06 -0700 Subject: [PATCH 03/14] Remove code which was relying on previous encoding code ClrFacade retains some of its functionality, but now relies on PowerShellEncoding class for knowing what the default coding is. The encoding methods which calls native methods is retained. --- .../commands/utility/Send-MailMessage.cs | 30 ++-- .../FormatAndOutput/common/FormatXMLWriter.cs | 3 +- .../engine/InitialSessionState.cs | 3 +- .../engine/Modules/ScriptAnalysis.cs | 5 +- .../engine/Utils.cs | 145 ------------------ .../engine/hostifaces/MshHostUserInterface.cs | 7 +- .../remoting/common/RunspaceConnectionInfo.cs | 7 +- .../namespaces/FileSystemProvider.cs | 68 -------- .../utils/ClrFacade.cs | 2 + .../utils/Encoding.cs | 37 +---- .../utils/PathUtils.cs | 6 + 11 files changed, 40 insertions(+), 273 deletions(-) diff --git a/src/Microsoft.PowerShell.Commands.Utility/commands/utility/Send-MailMessage.cs b/src/Microsoft.PowerShell.Commands.Utility/commands/utility/Send-MailMessage.cs index 13ed7e00f1a..13a316a678c 100644 --- a/src/Microsoft.PowerShell.Commands.Utility/commands/utility/Send-MailMessage.cs +++ b/src/Microsoft.PowerShell.Commands.Utility/commands/utility/Send-MailMessage.cs @@ -8,6 +8,7 @@ using System.Net.Mail; using System.Diagnostics.CodeAnalysis; using System.Management.Automation; +using Microsoft.PowerShell; namespace Microsoft.PowerShell.Commands @@ -492,35 +493,22 @@ protected override void EndProcessing() /// /// To make it easier to specify -Encoding parameter, we add an ArgumentTransformationAttribute here. - /// When the input data is of type string and is valid to be converted to System.Text.Encoding, we do - /// the conversion and return the converted value. Otherwise, we just return the input data. + /// When the input data is of type string and is valid to be converted to System.Text.Encoding + /// via PowerShellEncoding.GetEncoding(), we do the conversion and return the converted value. + /// Otherwise, we just return the input data. /// internal sealed class ArgumentToEncodingNameTransformationAttribute : ArgumentTransformationAttribute { public override object Transform(EngineIntrinsics engineIntrinsics, object inputData) { - string encodingName; - if (LanguagePrimitives.TryConvertTo(inputData, out encodingName)) - { - if (string.Equals(encodingName, EncodingConversion.Unknown, StringComparison.OrdinalIgnoreCase) || - string.Equals(encodingName, EncodingConversion.String, StringComparison.OrdinalIgnoreCase) || - string.Equals(encodingName, EncodingConversion.Unicode, StringComparison.OrdinalIgnoreCase) || - string.Equals(encodingName, EncodingConversion.BigEndianUnicode, StringComparison.OrdinalIgnoreCase) || - string.Equals(encodingName, EncodingConversion.Utf8, StringComparison.OrdinalIgnoreCase) || - string.Equals(encodingName, EncodingConversion.Utf7, StringComparison.OrdinalIgnoreCase) || - string.Equals(encodingName, EncodingConversion.Utf32, StringComparison.OrdinalIgnoreCase) || - string.Equals(encodingName, EncodingConversion.Ascii, StringComparison.OrdinalIgnoreCase) || - string.Equals(encodingName, EncodingConversion.Default, StringComparison.OrdinalIgnoreCase) || - string.Equals(encodingName, EncodingConversion.OEM, StringComparison.OrdinalIgnoreCase)) - { - // the encodingName is guaranteed to be valid, so it is safe to pass null to method - // Convert(Cmdlet cmdlet, string encoding) as the value of 'cmdlet'. - return EncodingConversion.Convert(null, encodingName); - } + FileEncoding encoding; + if (LanguagePrimitives.TryConvertTo(inputData, out encoding)) + { + return PowerShellEncoding.GetEncoding(encoding); } return inputData; } } #endregion -} \ No newline at end of file +} diff --git a/src/System.Management.Automation/commands/utility/FormatAndOutput/common/FormatXMLWriter.cs b/src/System.Management.Automation/commands/utility/FormatAndOutput/common/FormatXMLWriter.cs index 117024cfd2b..5f425d474d3 100644 --- a/src/System.Management.Automation/commands/utility/FormatAndOutput/common/FormatXMLWriter.cs +++ b/src/System.Management.Automation/commands/utility/FormatAndOutput/common/FormatXMLWriter.cs @@ -8,6 +8,7 @@ using System.Collections.Generic; using System.Globalization; using System.Xml; +using Microsoft.PowerShell; namespace Microsoft.PowerShell.Commands { @@ -37,7 +38,7 @@ internal static void WriteToPs1Xml(PSCmdlet cmdlet, List StreamWriter streamWriter; FileStream fileStream; FileInfo fileInfo; - PathUtils.MasterStreamOpen(cmdlet, filepath, "ascii", true, false, force, noclobber, + PathUtils.MasterStreamOpen(cmdlet, filepath, PowerShellEncoding.GetEncoding(FileEncoding.Ascii), true, false, force, noclobber, out fileStream, out streamWriter, out fileInfo, isLiteralPath); try diff --git a/src/System.Management.Automation/engine/InitialSessionState.cs b/src/System.Management.Automation/engine/InitialSessionState.cs index b28af452ac0..63c10c14a73 100644 --- a/src/System.Management.Automation/engine/InitialSessionState.cs +++ b/src/System.Management.Automation/engine/InitialSessionState.cs @@ -15,6 +15,7 @@ using System.Management.Automation.Language; using System.Reflection; using System.Threading; +using Microsoft.PowerShell; using Microsoft.PowerShell.Commands; using Debug = System.Management.Automation.Diagnostics; using System.Management.Automation.Host; @@ -4828,7 +4829,7 @@ .ForwardHelpCategory Cmdlet internal const ActionPreference defaultVerbosePreference = ActionPreference.SilentlyContinue; internal const ActionPreference defaultWarningPreference = ActionPreference.Continue; internal const ActionPreference defaultInformationPreference = ActionPreference.SilentlyContinue; - internal const Microsoft.PowerShell.FileEncoding defaultFileEncodingPreference = Microsoft.PowerShell.FileEncoding.Unknown; + internal const Microsoft.PowerShell.FileEncoding defaultFileEncodingPreference = FileEncoding.Unknown; internal const bool defaultWhatIfPreference = false; internal const ConfirmImpact defaultConfirmPreference = ConfirmImpact.High; diff --git a/src/System.Management.Automation/engine/Modules/ScriptAnalysis.cs b/src/System.Management.Automation/engine/Modules/ScriptAnalysis.cs index 8fb272b488b..12ebc9d9b7c 100644 --- a/src/System.Management.Automation/engine/Modules/ScriptAnalysis.cs +++ b/src/System.Management.Automation/engine/Modules/ScriptAnalysis.cs @@ -9,6 +9,7 @@ using System.Text; using System.Management.Automation.Language; using System.Text.RegularExpressions; +using Microsoft.PowerShell; namespace System.Management.Automation { @@ -95,7 +96,7 @@ internal static string ReadScript(string path) { using (FileStream readerStream = new FileStream(path, FileMode.Open, FileAccess.Read)) { - Encoding defaultEncoding = ClrFacade.GetDefaultEncoding(); + Encoding defaultEncoding = PowerShellEncoding.GetDefaultEncoding(); Microsoft.Win32.SafeHandles.SafeFileHandle safeFileHandle = readerStream.SafeFileHandle; using (StreamReader scriptReader = new StreamReader(readerStream, defaultEncoding)) @@ -554,4 +555,4 @@ internal class RequiredModuleInfo internal string Name { get; set; } internal List CommandsToPostFilter { get; set; } } -} // System.Management.Automation \ No newline at end of file +} // System.Management.Automation diff --git a/src/System.Management.Automation/engine/Utils.cs b/src/System.Management.Automation/engine/Utils.cs index f465af5fe10..17c99e34f2a 100644 --- a/src/System.Management.Automation/engine/Utils.cs +++ b/src/System.Management.Automation/engine/Utils.cs @@ -1201,151 +1201,6 @@ internal static bool Succeeded(int hresult) return hresult >= 0; } - internal static FileSystemCmdletProviderEncoding GetEncoding(string path) - { - if (!File.Exists(path)) - { - return FileSystemCmdletProviderEncoding.Default; - } - - byte[] initialBytes = new byte[100]; - int bytesRead = 0; - - try - { - using (FileStream stream = System.IO.File.OpenRead(path)) - { - using (BinaryReader reader = new BinaryReader(stream)) - { - bytesRead = reader.Read(initialBytes, 0, 100); - } - } - } - catch (IOException) - { - return FileSystemCmdletProviderEncoding.Default; - } - - // Test for four-byte preambles - string preamble = null; - FileSystemCmdletProviderEncoding foundEncoding = FileSystemCmdletProviderEncoding.Default; - - if (bytesRead > 3) - { - preamble = String.Join("-", initialBytes[0], initialBytes[1], initialBytes[2], initialBytes[3]); - - if (encodingMap.TryGetValue(preamble, out foundEncoding)) - { - return foundEncoding; - } - } - - // Test for three-byte preambles - if (bytesRead > 2) - { - preamble = String.Join("-", initialBytes[0], initialBytes[1], initialBytes[2]); - if (encodingMap.TryGetValue(preamble, out foundEncoding)) - { - return foundEncoding; - } - } - - // Test for two-byte preambles - if (bytesRead > 1) - { - preamble = String.Join("-", initialBytes[0], initialBytes[1]); - if (encodingMap.TryGetValue(preamble, out foundEncoding)) - { - return foundEncoding; - } - } - - // Check for binary - string initialBytesAsAscii = System.Text.Encoding.ASCII.GetString(initialBytes, 0, bytesRead); - if (initialBytesAsAscii.IndexOfAny(nonPrintableCharacters) >= 0) - { - return FileSystemCmdletProviderEncoding.Byte; - } - - return FileSystemCmdletProviderEncoding.Ascii; - } - - internal static Encoding GetEncodingFromEnum(FileSystemCmdletProviderEncoding encoding) - { - // Default to unicode encoding - Encoding result = Encoding.Unicode; - - switch (encoding) - { - case FileSystemCmdletProviderEncoding.String: - result = Encoding.Unicode; - break; - - case FileSystemCmdletProviderEncoding.Unicode: - result = Encoding.Unicode; - break; - - case FileSystemCmdletProviderEncoding.BigEndianUnicode: - result = Encoding.BigEndianUnicode; - break; - - case FileSystemCmdletProviderEncoding.UTF8: - result = Encoding.UTF8; - break; - - case FileSystemCmdletProviderEncoding.UTF7: - result = Encoding.UTF7; - break; - - case FileSystemCmdletProviderEncoding.UTF32: - result = Encoding.UTF32; - break; - - case FileSystemCmdletProviderEncoding.BigEndianUTF32: - result = Encoding.BigEndianUnicode; - break; - - case FileSystemCmdletProviderEncoding.Ascii: - result = Encoding.ASCII; - break; - - case FileSystemCmdletProviderEncoding.Default: - result = PowerShellEncoding.GetDefaultEncoding(); - break; - - case FileSystemCmdletProviderEncoding.Oem: - result = ClrFacade.GetOEMEncoding(); - break; - - default: - break; - } - - return result; - } // GetEncodingFromEnum - - // [System.Text.Encoding]::GetEncodings() | Where-Object { $_.GetEncoding().GetPreamble() } | - // Add-Member ScriptProperty Preamble { $this.GetEncoding().GetPreamble() -join "-" } -PassThru | - // Format-Table -Auto - internal static Dictionary encodingMap = - new Dictionary() - { - { "255-254", FileSystemCmdletProviderEncoding.Unicode }, - { "254-255", FileSystemCmdletProviderEncoding.BigEndianUnicode }, - { "255-254-0-0", FileSystemCmdletProviderEncoding.UTF32 }, - { "0-0-254-255", FileSystemCmdletProviderEncoding.BigEndianUTF32 }, - { "239-187-191", FileSystemCmdletProviderEncoding.UTF8 }, - }; - - internal static char[] nonPrintableCharacters = { - (char) 0, (char) 1, (char) 2, (char) 3, (char) 4, (char) 5, (char) 6, (char) 7, (char) 8, - (char) 11, (char) 12, (char) 14, (char) 15, (char) 16, (char) 17, (char) 18, (char) 19, (char) 20, - (char) 21, (char) 22, (char) 23, (char) 24, (char) 25, (char) 26, (char) 28, (char) 29, (char) 30, - (char) 31, (char) 127, (char) 129, (char) 141, (char) 143, (char) 144, (char) 157 }; - - internal static readonly UTF8Encoding utf8NoBom = - new UTF8Encoding(encoderShouldEmitUTF8Identifier: false); - #if !CORECLR // TODO:CORECLR - WindowsIdentity.Impersonate() is not available. Use WindowsIdentity.RunImpersonated to replace it. /// /// Queues a CLR worker thread with impersonation of provided Windows identity. diff --git a/src/System.Management.Automation/engine/hostifaces/MshHostUserInterface.cs b/src/System.Management.Automation/engine/hostifaces/MshHostUserInterface.cs index d3cf16bc0a8..e3e28fba3f1 100644 --- a/src/System.Management.Automation/engine/hostifaces/MshHostUserInterface.cs +++ b/src/System.Management.Automation/engine/hostifaces/MshHostUserInterface.cs @@ -10,6 +10,7 @@ using System.Security; using System.Globalization; using System.Management.Automation.Runspaces; +using Microsoft.PowerShell; using Microsoft.PowerShell.Commands; using System.Threading; using System.Threading.Tasks; @@ -1070,11 +1071,11 @@ internal string Path _path = value; Encoding = Encoding.UTF8; - FileSystemCmdletProviderEncoding fileEncoding = Utils.GetEncoding(value); + FileEncoding fileEncoding = PowerShellEncoding.GetEncoding(value); - if (fileEncoding != FileSystemCmdletProviderEncoding.Default) + if (fileEncoding != FileEncoding.Default) { - Encoding = Utils.GetEncodingFromEnum(fileEncoding); + Encoding = PowerShellEncoding.GetEncoding(fileEncoding); } } } diff --git a/src/System.Management.Automation/engine/remoting/common/RunspaceConnectionInfo.cs b/src/System.Management.Automation/engine/remoting/common/RunspaceConnectionInfo.cs index aeaa379110f..db5ea0aea46 100644 --- a/src/System.Management.Automation/engine/remoting/common/RunspaceConnectionInfo.cs +++ b/src/System.Management.Automation/engine/remoting/common/RunspaceConnectionInfo.cs @@ -19,6 +19,7 @@ using System.Runtime.InteropServices; using System.Threading; using System.Security.AccessControl; +using Microsoft.PowerShell; using Microsoft.Win32.SafeHandles; using Dbg = System.Management.Automation.Diagnostics; using WSManAuthenticationMechanism = System.Management.Automation.Remoting.Client.WSManNativeApi.WSManAuthenticationMechanism; @@ -2179,20 +2180,20 @@ internal static int StartSSHProcess( { Debug.Assert(stdinFd >= 0, "Invalid Fd"); standardInput = new StreamWriter(OpenStream(stdinFd, FileAccess.Write), - Utils.utf8NoBom, StreamBufferSize) + PowerShellEncoding.utf8NoBom, StreamBufferSize) { AutoFlush = true }; } if (startInfo.RedirectStandardOutput) { Debug.Assert(stdoutFd >= 0, "Invalid Fd"); standardOutput = new StreamReader(OpenStream(stdoutFd, FileAccess.Read), - startInfo.StandardOutputEncoding ?? Utils.utf8NoBom, true, StreamBufferSize); + startInfo.StandardOutputEncoding ?? PowerShellEncoding.utf8NoBom, true, StreamBufferSize); } if (startInfo.RedirectStandardError) { Debug.Assert(stderrFd >= 0, "Invalid Fd"); standardError = new StreamReader(OpenStream(stderrFd, FileAccess.Read), - startInfo.StandardErrorEncoding ?? Utils.utf8NoBom, true, StreamBufferSize); + startInfo.StandardErrorEncoding ?? PowerShellEncoding.utf8NoBom, true, StreamBufferSize); } return childPid; diff --git a/src/System.Management.Automation/namespaces/FileSystemProvider.cs b/src/System.Management.Automation/namespaces/FileSystemProvider.cs index e73b207f75c..458ba5e1d4e 100644 --- a/src/System.Management.Automation/namespaces/FileSystemProvider.cs +++ b/src/System.Management.Automation/namespaces/FileSystemProvider.cs @@ -7442,73 +7442,6 @@ public static Hashtable Invoke(System.Management.Automation.PowerShell ps, FileS } } - /// - /// Defines the values that can be supplied as the encoding parameter in the - /// FileSystemContentDynamicParametersBase class. - /// - public enum FileSystemCmdletProviderEncoding - { - /// - /// No encoding. - /// - Unknown, - - /// - /// Unicode encoding. - /// - String, - - /// - /// Unicode encoding. - /// - Unicode, - - /// - /// Byte encoding. - /// - Byte, - - /// - /// Big Endian Unicode encoding. - /// - BigEndianUnicode, - - /// - /// UTF8 encoding. - /// - UTF8, - - /// - /// UTF7 encoding. - /// - UTF7, - - /// - /// UTF32 encoding. - /// - UTF32, - - /// - /// ASCII encoding. - /// - Ascii, - - /// - /// Default encoding. - /// - Default, - - /// - /// OEM encoding. - /// - Oem, - - /// - /// Big Endian UTF32 encoding. - /// - BigEndianUTF32, - } // FileSystemCmdletProviderEncoding - #endregion #region Dynamic Parameters @@ -7612,7 +7545,6 @@ public class FileSystemContentDynamicParametersBase /// reading data from the file. /// [Parameter] - // public FileSystemCmdletProviderEncoding Encoding { get; set; } = FileSystemCmdletProviderEncoding.String; public FileEncoding Encoding { get; set; } = FileEncoding.Unknown; /// diff --git a/src/System.Management.Automation/utils/ClrFacade.cs b/src/System.Management.Automation/utils/ClrFacade.cs index 69058bfa373..9b1715cc75f 100644 --- a/src/System.Management.Automation/utils/ClrFacade.cs +++ b/src/System.Management.Automation/utils/ClrFacade.cs @@ -20,6 +20,7 @@ using System.Security; using Microsoft.Win32.SafeHandles; using System.Runtime.InteropServices.ComTypes; +using Microsoft.PowerShell; namespace System.Management.Automation { @@ -269,6 +270,7 @@ private static SecurityZone ReadFromZoneIdentifierDataStream(string filePath) FileAccess.Read, FileShare.Read); // If we successfully get the zone data stream, try to read the ZoneId information + // use the method in this class not PowerShellEncoding. using (StreamReader zoneDataReader = new StreamReader(zoneDataSteam, GetDefaultEncoding())) { string line = null; diff --git a/src/System.Management.Automation/utils/Encoding.cs b/src/System.Management.Automation/utils/Encoding.cs index 6b076429e41..3c008df8fcb 100644 --- a/src/System.Management.Automation/utils/Encoding.cs +++ b/src/System.Management.Automation/utils/Encoding.cs @@ -100,8 +100,7 @@ public static class PowerShellEncoding { /// - /// Return the default PowerShell encoding - /// which is UTF8 without a BOM. + /// Return the default PowerShell encoding which is UTF8 without a BOM. /// There is no distinction between platforms /// public static Encoding GetDefaultEncoding() @@ -226,27 +225,8 @@ public static FileEncoding GetEncodingPreference(SessionState sessionState) return encodingPreference; } - /* /// /// Retrieve the encoding in a provider context - /// - /// - public static Encoding GetProviderEncoding(NavigationCmdletProvider provider, FileEncoding encoding) - { - Encoding resolvedEncoding = GetDefaultEncoding(); - FileEncoding encodingPreference = GetEncodingPreference(provider.SessionState); - - if ( encoding != FileEncoding.Unknown ) - { - resolvedEncoding = GetEncoding(encoding); - } - return resolvedEncoding; - } - */ - - /// - /// Retrieve the encoding in a provider context - /// /// public static Encoding GetProviderEncoding(CmdletProvider provider, FileEncoding encoding) { @@ -275,7 +255,6 @@ public static Encoding GetEncoding(Cmdlet cmdlet, FileEncoding encoding) bool preferenceSetAndValid = false; string name = cmdlet.GetType().FullName.ToLower(CultureInfo.InvariantCulture); - // An encoding has been specified as a parameter (or the explicit parameter value is "Unknown") if ( encoding != FileEncoding.Unknown ) { @@ -291,10 +270,10 @@ public static Encoding GetEncoding(Cmdlet cmdlet, FileEncoding encoding) } else { - // the parameter is not specifically set - // Check the preference variable + // the parameter is not specifically set, so check the preference variable encodingPreference = GetEncodingPreference(cmdlet.Context.SessionState); - preferenceSetAndValid = encodingPreference != FileEncoding.Unknown; // If set to unknown, we accept that it is unset + // If set to unknown, we accept that it is unset + preferenceSetAndValid = encodingPreference != FileEncoding.Unknown; // If the encoding preference has been set to WindowsLegacy, we need to look up the actual encoding if ( encodingPreference == FileEncoding.WindowsLegacy ) { @@ -304,11 +283,10 @@ public static Encoding GetEncoding(Cmdlet cmdlet, FileEncoding encoding) { resolvedEncoding = GetEncoding(encodingPreference); } - // the final else would be set the encoding to GetDefaultEncoding() - // which was handled above + // the final else would be set the encoding to GetDefaultEncoding() which was handled above } - return resolvedEncoding; // GetEncoding(FileEncoding.UTF8NoBOM); + return resolvedEncoding; } // [System.Text.Encoding]::GetEncodings() | ? { $_.GetEncoding().GetPreamble() } | @@ -330,8 +308,9 @@ public static Encoding GetEncoding(Cmdlet cmdlet, FileEncoding encoding) (char) 21, (char) 22, (char) 23, (char) 24, (char) 25, (char) 26, (char) 28, (char) 29, (char) 30, (char) 31, (char) 127, (char) 129, (char) 141, (char) 143, (char) 144, (char) 157 }; + internal static readonly UTF8Encoding utf8NoBom = new UTF8Encoding(encoderShouldEmitUTF8Identifier: false); + // take a look at the file contents and guess at the best encoding - // internal static FileEncoding GetEncoding(string path) { if (!File.Exists(path)) diff --git a/src/System.Management.Automation/utils/PathUtils.cs b/src/System.Management.Automation/utils/PathUtils.cs index ff5751c883b..96d6094cff8 100644 --- a/src/System.Management.Automation/utils/PathUtils.cs +++ b/src/System.Management.Automation/utils/PathUtils.cs @@ -53,6 +53,7 @@ bool isLiteralPath MasterStreamOpen(cmdlet, filePath, resolvedEncoding, defaultEncoding, Append, Force, NoClobber, out fileStream, out streamWriter, out readOnlyFileInfo, isLiteralPath); } + /* /// /// THE method for opening a file for writing. /// Should be used by all cmdlets that write to a file. @@ -86,6 +87,7 @@ bool isLiteralPath MasterStreamOpen(cmdlet, filePath, resolvedEncoding, defaultEncoding, Append, Force, NoClobber, out fileStream, out streamWriter, out readOnlyFileInfo, isLiteralPath); } + */ /// /// THE method for opening a file for writing. @@ -230,6 +232,7 @@ internal static StreamReader OpenStreamReader(PSCmdlet command, string filePath, return new StreamReader(fileStream, PowerShellEncoding.GetEncoding(command, encoding)); } + /* internal static StreamReader OpenStreamReader(PSCmdlet command, string filePath, string encoding, bool isLiteralPath) { FileStream fileStream = OpenFileStream(filePath, command, isLiteralPath); @@ -242,6 +245,7 @@ internal static StreamReader OpenStreamReader(PSCmdlet command, string filePath, return new StreamReader(fileStream, EncodingConversion.Convert(command, encoding)); } } + */ internal static FileStream OpenFileStream(string filePath, PSCmdlet command, bool isLiteralPath) { @@ -479,6 +483,7 @@ internal static DirectoryInfo CreateTemporaryDirectory() } } + /* internal static class EncodingConversion { internal const string Unknown = "unknown"; @@ -558,4 +563,5 @@ internal static Encoding Convert(Cmdlet cmdlet, string encoding) return null; } } + */ } From a067de4f9919ded4cf57b1cb8e0281d1d5122728 Mon Sep 17 00:00:00 2001 From: James Truher Date: Mon, 26 Jun 2017 16:49:20 -0700 Subject: [PATCH 04/14] fix another couple of xplat issues the WindowsLegacy behavior for New-ModuleManifest should get the correct number of bytes which will change depending on how many bytes are encoded for [Environment]::NewLine --- test/powershell/engine/Encoding.Tests.ps1 | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/powershell/engine/Encoding.Tests.ps1 b/test/powershell/engine/Encoding.Tests.ps1 index 07c10c52f22..a29701842c5 100644 --- a/test/powershell/engine/Encoding.Tests.ps1 +++ b/test/powershell/engine/Encoding.Tests.ps1 @@ -110,7 +110,7 @@ Describe "Encoding classes and methods are available" -Tag CI { param ( $Name, $Bytes ) $PSDefaultFileEncoding = $Name $testString | out-file -encoding ascii $testFile - Get-FileBytes $testFile | should be "116-63-115-116-10" + Get-FileBytes $testFile | should be ("116-63-115-116-" + (Get-NewLineBytes ASCII)) } Context "Legacy Windows Behavior" { @@ -149,7 +149,7 @@ Describe "Encoding classes and methods are available" -Tag CI { $newLineBytes = $legacyEncoding.GetBytes([Environment]::NewLine) $newLineByteString = $newLineBytes -join "-" $expected = "255-254-35-0-${newLineByteString}-35-0-32-0" - Get-FileBytes $TESTDRIVE/${testString}.psd1 -count 10 | should match $expected + Get-FileBytes $TESTDRIVE/${testString}.psd1 -count (8 + $newLineBytes.Count) | should match $expected } It "Out-File creates properly encoded files" { From 9b8e4fb5036c98b98ccaf12416499f931497e0b2 Mon Sep 17 00:00:00 2001 From: James Truher Date: Thu, 29 Jun 2017 14:48:21 -0700 Subject: [PATCH 05/14] Return static Utf8NoBom encoder rather than creating a new instance update calls which create had been creating a new instance of the Utf8 encoding without BOM to return the available static --- src/System.Management.Automation/utils/Encoding.cs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/System.Management.Automation/utils/Encoding.cs b/src/System.Management.Automation/utils/Encoding.cs index 3c008df8fcb..584bdd307f1 100644 --- a/src/System.Management.Automation/utils/Encoding.cs +++ b/src/System.Management.Automation/utils/Encoding.cs @@ -105,7 +105,7 @@ public static class PowerShellEncoding /// public static Encoding GetDefaultEncoding() { - return new UTF8Encoding(false); + return utf8NoBom; } /// @@ -139,7 +139,7 @@ public static Encoding GetEncoding(FileEncoding TextEncoding) case FileEncoding.UTF8: case FileEncoding.UTF8NoBOM: - result = new UTF8Encoding(false); + result = utf8NoBom; break; case FileEncoding.UTF7: From 5309b9078885e1520b2dd2bd78cc234877e2af73 Mon Sep 17 00:00:00 2001 From: James Truher Date: Thu, 29 Jun 2017 15:10:50 -0700 Subject: [PATCH 06/14] Change expected encoding to not distinguish based on platform except for newline --- test/powershell/engine/Module/NewModuleManifest.Tests.ps1 | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/test/powershell/engine/Module/NewModuleManifest.Tests.ps1 b/test/powershell/engine/Module/NewModuleManifest.Tests.ps1 index 2baae5dfac5..6ba187eaff6 100644 --- a/test/powershell/engine/Module/NewModuleManifest.Tests.ps1 +++ b/test/powershell/engine/Module/NewModuleManifest.Tests.ps1 @@ -9,9 +9,10 @@ Describe "New-ModuleManifest tests" -tags "CI" { } BeforeAll { - if ($IsWindows) + # encoding is the same on all platforms, except for new lines + if ( $IsWindows ) { - $ExpectedManifestBytes = @(255,254,35,0,13,0,10,0) + $ExpectedManifestBytes = @(35,13,10) } else { From 9a45f4042e793102fb96222391d1e6d65184e4ac Mon Sep 17 00:00:00 2001 From: James Truher Date: Tue, 11 Jul 2017 16:33:17 -0700 Subject: [PATCH 07/14] Remove commented out code --- .../utils/PathUtils.cs | 15 --------------- 1 file changed, 15 deletions(-) diff --git a/src/System.Management.Automation/utils/PathUtils.cs b/src/System.Management.Automation/utils/PathUtils.cs index 96d6094cff8..2f8333d95dd 100644 --- a/src/System.Management.Automation/utils/PathUtils.cs +++ b/src/System.Management.Automation/utils/PathUtils.cs @@ -232,21 +232,6 @@ internal static StreamReader OpenStreamReader(PSCmdlet command, string filePath, return new StreamReader(fileStream, PowerShellEncoding.GetEncoding(command, encoding)); } - /* - internal static StreamReader OpenStreamReader(PSCmdlet command, string filePath, string encoding, bool isLiteralPath) - { - FileStream fileStream = OpenFileStream(filePath, command, isLiteralPath); - if (encoding == null) - { - return new StreamReader(fileStream); - } - else - { - return new StreamReader(fileStream, EncodingConversion.Convert(command, encoding)); - } - } - */ - internal static FileStream OpenFileStream(string filePath, PSCmdlet command, bool isLiteralPath) { string resolvedPath = PathUtils.ResolveFilePath(filePath, command, isLiteralPath); From 65c255c48a72b341e1c8e00cca55f99b1e6663b8 Mon Sep 17 00:00:00 2001 From: James Truher Date: Wed, 19 Jul 2017 15:42:24 -0700 Subject: [PATCH 08/14] refactor tests to use fewer instances of hardcoded byte strings only use hardcoded bytes when it's a custom file generation (like Export-CliXml or New-ModuleManifest) or when we're looking at a set of partial results also remove an unused function --- test/powershell/engine/Encoding.Tests.ps1 | 133 ++++++++++++---------- 1 file changed, 75 insertions(+), 58 deletions(-) diff --git a/test/powershell/engine/Encoding.Tests.ps1 b/test/powershell/engine/Encoding.Tests.ps1 index a29701842c5..225ad76c249 100644 --- a/test/powershell/engine/Encoding.Tests.ps1 +++ b/test/powershell/engine/Encoding.Tests.ps1 @@ -33,50 +33,58 @@ Describe "Encoding classes and methods are available" -Tag CI { $encoder = [Microsoft.PowerShell.PowerShellEncoding]::GetEncoding($encoding) $encoder.GetBytes([Environment]::NewLine) -Join "-" } - function Test-GetEncoding - { - [CmdletBinding()] - param ( - [Microsoft.PowerShell.FileEncoding]$Encoding - ) - END { - [Microsoft.PowerShell.PowerShellEncoding]::GetCmdletEncoding($pscmdlet, $encoding) - } - } $preambleTests = - @{ Name = 'Ascii'; Preamble = '' }, - @{ Name = 'BigEndianUTF32'; Preamble = '254-255' }, - @{ Name = 'BigEndianUnicode'; Preamble = '254-255' }, - @{ Name = 'Byte'; Preamble = '255-254' }, - @{ Name = 'Default'; Preamble = '' }, - @{ Name = 'Oem'; Preamble = '' }, - @{ Name = 'String'; Preamble = '255-254' }, - @{ Name = 'UTF32'; Preamble = '255-254-0-0' }, - @{ Name = 'UTF7'; Preamble = '' }, - @{ Name = 'UTF8'; Preamble = '' }, - @{ Name = 'UTF8BOM'; Preamble = '239-187-191' }, - @{ Name = 'UTF8NoBOM'; Preamble = '' }, - @{ Name = 'Unicode'; Preamble = '255-254' }, - @{ Name = 'Unknown'; Preamble = '' }, - @{ Name = 'WindowsLegacy'; Preamble = '' } + @{ Encoding = 'Ascii'; Preamble = '' }, + @{ Encoding = 'BigEndianUTF32'; Preamble = '254-255' }, + @{ Encoding = 'BigEndianUnicode'; Preamble = '254-255' }, + @{ Encoding = 'Byte'; Preamble = '255-254' }, + @{ Encoding = 'Default'; Preamble = '' }, + @{ Encoding = 'Oem'; Preamble = '' }, + @{ Encoding = 'String'; Preamble = '255-254' }, + @{ Encoding = 'UTF32'; Preamble = '255-254-0-0' }, + @{ Encoding = 'UTF7'; Preamble = '' }, + @{ Encoding = 'UTF8'; Preamble = '' }, + @{ Encoding = 'UTF8BOM'; Preamble = '239-187-191' }, + @{ Encoding = 'UTF8NoBOM'; Preamble = '' }, + @{ Encoding = 'Unicode'; Preamble = '255-254' }, + @{ Encoding = 'Unknown'; Preamble = '' }, + @{ Encoding = 'WindowsLegacy'; Preamble = '' } + + $testStringEncodedBytes = @{ + Ascii = "116-63-115-116-" + (Get-NewLineBytes Ascii) + BigEndianUTF32 = "254-255-0-116-0-233-0-115-0-116-" + (Get-NewLineBytes BigEndianUTF32) + BigEndianUnicode = "254-255-0-116-0-233-0-115-0-116-" + (Get-NewLineBytes BigEndianUnicode) + Byte = "255-254-116-0-233-0-115-0-116-0-" + (Get-NewLineBytes Byte) + Default = "116-195-169-115-116-" + (Get-NewLineBytes Default) + # Oem encoding can change depending on system, calculate the expected string + Oem = ([Microsoft.PowerShell.PowerShellEncoding]::GetEncoding("Oem").GetBytes($testString) -join "-") + "-" + (Get-NewLineBytes Oem) + String = "255-254-116-0-233-0-115-0-116-0-" + (Get-NewLineBytes String) + UTF32 = "255-254-0-0-116-0-0-0-233-0-0-0-115-0-0-0-116-0-0-0-" + (Get-NewLineBytes UTF32) + UTF7 = "116-43-65-79-107-45-115-116-" + (Get-NewLineBytes UTF7) + UTF8 = "116-195-169-115-116-" + (Get-NewLineBytes UTF8 ) + UTF8BOM = "239-187-191-116-195-169-115-116-" + (Get-NewLineBytes UTF8BOM) + UTF8NoBOM = "116-195-169-115-116-" + (Get-NewLineBytes UTF8NoBOM) + Unicode = "255-254-116-0-233-0-115-0-116-0-" + (Get-NewLineBytes Unicode) + Unknown = "116-195-169-115-116-" + (Get-NewLineBytes Unknown) + } $contentTests = - @{ Name = 'Ascii'; Bytes = "116-63-115-116-" + (Get-NewLineBytes Ascii) }, - @{ Name = 'BigEndianUTF32'; Bytes = "254-255-0-116-0-233-0-115-0-116-" + (Get-NewLineBytes BigEndianUTF32) }, - @{ Name = 'BigEndianUnicode'; Bytes = "254-255-0-116-0-233-0-115-0-116-" + (Get-NewLineBytes BigEndianUnicode) }, - @{ Name = 'Byte'; Bytes = "255-254-116-0-233-0-115-0-116-0-" + (Get-NewLineBytes Byte) }, - @{ Name = 'Default'; Bytes = "116-195-169-115-116-" + (Get-NewLineBytes Default) }, + @{ Encoding = 'Ascii'; Bytes = $testStringEncodedBytes['Ascii'] }, + @{ Encoding = 'BigEndianUTF32'; Bytes = $testStringEncodedBytes['BigEndianUTF32'] }, + @{ Encoding = 'BigEndianUnicode'; Bytes = $testStringEncodedBytes['BigEndianUnicode'] }, + @{ Encoding = 'Byte'; Bytes = $testStringEncodedBytes['Byte'] }, + @{ Encoding = 'Default'; Bytes = $testStringEncodedBytes['Default'] }, # Oem encoding can change depending on system, calculate the expected string - @{ Name = 'Oem'; Bytes = ([Microsoft.PowerShell.PowerShellEncoding]::GetEncoding("Oem").GetBytes($testString) -join "-") + "-" + (Get-NewLineBytes Oem) }, - @{ Name = 'String'; Bytes = "255-254-116-0-233-0-115-0-116-0-" + (Get-NewLineBytes String) }, - @{ Name = 'UTF32'; Bytes = "255-254-0-0-116-0-0-0-233-0-0-0-115-0-0-0-116-0-0-0-" + (Get-NewLineBytes UTF32) }, - @{ Name = 'UTF7'; Bytes = "116-43-65-79-107-45-115-116-" + (Get-NewLineBytes UTF7) }, - @{ Name = 'UTF8'; Bytes = "116-195-169-115-116-" + (Get-NewLineBytes UTF8 ) }, - @{ Name = 'UTF8BOM'; Bytes = "239-187-191-116-195-169-115-116-" + (Get-NewLineBytes UTF8BOM) }, - @{ Name = 'UTF8NoBOM'; Bytes = "116-195-169-115-116-" + (Get-NewLineBytes UTF8NoBOM) }, - @{ Name = 'Unicode'; Bytes = "255-254-116-0-233-0-115-0-116-0-" + (Get-NewLineBytes Unicode) }, - @{ Name = 'Unknown'; Bytes = "116-195-169-115-116-" + (Get-NewLineBytes Unknown) } + @{ Encoding = 'Oem'; Bytes = $testStringEncodedBytes['Oem'] }, + @{ Encoding = 'String'; Bytes = $testStringEncodedBytes['String'] }, + @{ Encoding = 'UTF32'; Bytes = $testStringEncodedBytes['UTF32'] }, + @{ Encoding = 'UTF7'; Bytes = $testStringEncodedBytes['UTF7'] }, + @{ Encoding = 'UTF8'; Bytes = $testStringEncodedBytes['UTF8'] }, + @{ Encoding = 'UTF8BOM'; Bytes = $testStringEncodedBytes['UTF8BOM'] }, + @{ Encoding = 'UTF8NoBOM'; Bytes = $testStringEncodedBytes['UTF8NoBOM'] }, + @{ Encoding = 'Unicode'; Bytes = $testStringEncodedBytes['Unicode'] }, + @{ Encoding = 'Unknown'; Bytes = $testStringEncodedBytes['Unknown'] } } @@ -88,52 +96,57 @@ Describe "Encoding classes and methods are available" -Tag CI { $PSDefaultFileEncoding = "Unknown" } - It "Encoding for '' should have correct preamble ''" -TestCase $preambleTests { - param ( $Name, $Preamble ) - [Microsoft.PowerShell.PowerShellEncoding]::GetEncoding($Name).GetPreamble() -Join "-" | Should be $Preamble + It "Encoding for '' should have correct preamble ''" -TestCase $preambleTests { + param ( $Encoding, $Preamble ) + [Microsoft.PowerShell.PowerShellEncoding]::GetEncoding($Encoding).GetPreamble() -Join "-" | Should be $Preamble } - It "Encoding for '' should create file with proper encoding" -TestCase $contentTests { - param ( $Name, $Bytes ) - $testString | out-file -encoding $Name $testFile + It "Encoding for '' should create file with proper encoding" -TestCase $contentTests { + param ( $Encoding, $Bytes ) + $testString | out-file -encoding $Encoding $testFile Get-FileBytes $testFile | should be $Bytes } - It "Setting PSDefaultFileEncoding to '' should create file with proper encoding" -TestCase $contentTests { - param ( $Name, $Bytes ) - $PSDefaultFileEncoding = $Name + It "Setting PSDefaultFileEncoding to '' should create file with proper encoding" -TestCase $contentTests { + param ( $Encoding, $Bytes ) + $PSDefaultFileEncoding = $Encoding $testString | out-file $testFile Get-FileBytes $testFile | should be $Bytes } - It "Explicit encoding is not overridden by setting PSDefaultFileEncoding to ''" -TestCase $contentTests { - param ( $Name, $Bytes ) - $PSDefaultFileEncoding = $Name + It "Explicit encoding is not overridden by setting PSDefaultFileEncoding to ''" -TestCase $contentTests { + param ( $Encoding, $Bytes ) + $PSDefaultFileEncoding = $Encoding $testString | out-file -encoding ascii $testFile - Get-FileBytes $testFile | should be ("116-63-115-116-" + (Get-NewLineBytes ASCII)) + Get-FileBytes $testFile | should be $testStringEncodedBytes['Ascii'] } Context "Legacy Windows Behavior" { - It "Add-Content creates ascii encoded files" { + It "Add-Content creates utf8 encoded files" { $testString | add-content -encoding WindowsLegacy $TESTDRIVE/file.txt - Get-FileBytes $TESTDRIVE/file.txt | should be ("116-195-169-115-116-" + (Get-NewLineBytes ASCII)) + Get-FileBytes $TESTDRIVE/file.txt | should be $testStringEncodedBytes['UTF8'] } - It "Set-Content creates ascii encoded files" { + It "Set-Content creates utf8 encoded files" { $testString | set-content -encoding WindowsLegacy $TESTDRIVE/file.txt - Get-FileBytes $TESTDRIVE/file.txt | should be ("116-195-169-115-116-" + (Get-NewLineBytes ASCII)) + Get-FileBytes $TESTDRIVE/file.txt | should be $testStringEncodedBytes['UTF8'] } It "Export-CliXml creates unicode encoded files" { [pscustomobject]@{ text = $testString } | export-clixml -encoding WindowsLegacy $TESTDRIVE/file.clixml + # these are the characters + # Date: Thu, 20 Jul 2017 16:56:41 -0700 Subject: [PATCH 09/14] update tests to include tests against Encoding probe method for existing files improve code coverage for PowerShellEncoding class and don't duplicate byte representations when they're not needed --- test/powershell/engine/Encoding.Tests.ps1 | 39 +++++++++++++++++++++-- 1 file changed, 37 insertions(+), 2 deletions(-) diff --git a/test/powershell/engine/Encoding.Tests.ps1 b/test/powershell/engine/Encoding.Tests.ps1 index 225ad76c249..bc245fa2519 100644 --- a/test/powershell/engine/Encoding.Tests.ps1 +++ b/test/powershell/engine/Encoding.Tests.ps1 @@ -36,7 +36,7 @@ Describe "Encoding classes and methods are available" -Tag CI { $preambleTests = @{ Encoding = 'Ascii'; Preamble = '' }, - @{ Encoding = 'BigEndianUTF32'; Preamble = '254-255' }, + @{ Encoding = 'BigEndianUTF32'; Preamble = '0-0-254-255' }, @{ Encoding = 'BigEndianUnicode'; Preamble = '254-255' }, @{ Encoding = 'Byte'; Preamble = '255-254' }, @{ Encoding = 'Default'; Preamble = '' }, @@ -53,7 +53,7 @@ Describe "Encoding classes and methods are available" -Tag CI { $testStringEncodedBytes = @{ Ascii = "116-63-115-116-" + (Get-NewLineBytes Ascii) - BigEndianUTF32 = "254-255-0-116-0-233-0-115-0-116-" + (Get-NewLineBytes BigEndianUTF32) + BigEndianUTF32 = "0-0-254-255-0-0-0-116-0-0-0-233-0-0-0-115-0-0-0-116-" + (Get-NewLineBytes BigEndianUTF32) BigEndianUnicode = "254-255-0-116-0-233-0-115-0-116-" + (Get-NewLineBytes BigEndianUnicode) Byte = "255-254-116-0-233-0-115-0-116-0-" + (Get-NewLineBytes Byte) Default = "116-195-169-115-116-" + (Get-NewLineBytes Default) @@ -121,6 +121,41 @@ Describe "Encoding classes and methods are available" -Tag CI { Get-FileBytes $testFile | should be $testStringEncodedBytes['Ascii'] } + It "Explicit encoding set to unknown and preference variable set to unicode creates unicode file" { + $PSDefaultFileEncoding = "Unicode" + $testString | set-content -encoding unknown $testfile + Get-FileBytes $testFile | should be $testStringEncodedBytes['Unicode'] + } + + It "getting the encoding for an unknown cmdlet should return utf-8" { + $method = [microsoft.powershell.powershellencoding].getmember("GetWindowsLegacyEncoding","NonPublic,Static") + $method.Invoke($null, "badcmdlet").BodyName | should be "utf-8" + } + + It "When session state is null, GetEncodingPreference returns unknown" { + [Microsoft.PowerShell.PowerShellEncoding]::GetEncodingPreference($null) | should be "unknown" + } + + Context "GetFileEncodingFromFile tests" { + BeforeAll { + $TestCases = @{ Encoding = "Unicode"; Text = $testString; FilePath = $testFile }, + @{ Encoding = "UTF8NoBOM"; Text = $testString; FilePath = $testFile }, + @{ Encoding = "UTF32"; Text = $testString; FilePath = $testFile }, + @{ Encoding = "BigEndianUTF32"; Text = $testString; FilePath = $testFile }, + @{ Encoding = "UTF8Bom"; Text = $testString; FilePath = $testFile }, + @{ Encoding = "Byte"; Text = [byte[]](20..40); FilePath = $testFile }, + @{ Encoding = "UTF8NoBom"; Text = ""; FilePath = $testFile }, + @{ Encoding = "Default"; Text = ""; FilePath = "$TESTDRIVE/ThisFileCouldNotPossiblyExist" } + } + + It "GetFileEncodingFromFile can discover a encoded file" -TestCase $TestCases { + param ( $Encoding, $Text, $FilePath ) + # I need a way to not open the right file to test the missing file scenario + $Text | set-content -encoding $Encoding $testFile + [Microsoft.PowerShell.PowerShellEncoding]::GetFileEncodingFromFile($FilePath) | should be $encoding + } + } + Context "Legacy Windows Behavior" { It "Add-Content creates utf8 encoded files" { From acac54769113fde47f63f193f75048e54b26ca3c Mon Sep 17 00:00:00 2001 From: James Truher Date: Thu, 20 Jul 2017 17:05:16 -0700 Subject: [PATCH 10/14] Refactor Encoding.cs to improve readability Remove a couple of extraneous using statements Move encoding code from PathUtils.cs to Encoding.cs Move and rename GetEncoding method in Utils.cs to GetFileEncodingFromFile in Encoding.cs Expand some explanatory comments with more details --- .../FormatAndOutput/common/FormatXMLWriter.cs | 2 +- .../engine/Utils.cs | 1 - .../engine/hostifaces/MshHostUserInterface.cs | 2 +- .../utils/ClrFacade.cs | 5 + .../utils/Encoding.cs | 206 +++++++++--------- .../utils/PathUtils.cs | 119 +--------- 6 files changed, 117 insertions(+), 218 deletions(-) diff --git a/src/System.Management.Automation/commands/utility/FormatAndOutput/common/FormatXMLWriter.cs b/src/System.Management.Automation/commands/utility/FormatAndOutput/common/FormatXMLWriter.cs index 5f425d474d3..d3e98d71b92 100644 --- a/src/System.Management.Automation/commands/utility/FormatAndOutput/common/FormatXMLWriter.cs +++ b/src/System.Management.Automation/commands/utility/FormatAndOutput/common/FormatXMLWriter.cs @@ -38,7 +38,7 @@ internal static void WriteToPs1Xml(PSCmdlet cmdlet, List StreamWriter streamWriter; FileStream fileStream; FileInfo fileInfo; - PathUtils.MasterStreamOpen(cmdlet, filepath, PowerShellEncoding.GetEncoding(FileEncoding.Ascii), true, false, force, noclobber, + PathUtils.MasterStreamOpen(cmdlet, filepath, FileEncoding.Ascii, true, false, force, noclobber, out fileStream, out streamWriter, out fileInfo, isLiteralPath); try diff --git a/src/System.Management.Automation/engine/Utils.cs b/src/System.Management.Automation/engine/Utils.cs index 17c99e34f2a..848a79af18f 100644 --- a/src/System.Management.Automation/engine/Utils.cs +++ b/src/System.Management.Automation/engine/Utils.cs @@ -8,7 +8,6 @@ using System.Management.Automation.Internal; using System.Management.Automation.Security; using System.Reflection; -using Microsoft.PowerShell; using Microsoft.PowerShell.Commands; using Microsoft.Win32; using System.Globalization; diff --git a/src/System.Management.Automation/engine/hostifaces/MshHostUserInterface.cs b/src/System.Management.Automation/engine/hostifaces/MshHostUserInterface.cs index e3e28fba3f1..f3f069a55f5 100644 --- a/src/System.Management.Automation/engine/hostifaces/MshHostUserInterface.cs +++ b/src/System.Management.Automation/engine/hostifaces/MshHostUserInterface.cs @@ -1071,7 +1071,7 @@ internal string Path _path = value; Encoding = Encoding.UTF8; - FileEncoding fileEncoding = PowerShellEncoding.GetEncoding(value); + FileEncoding fileEncoding = PowerShellEncoding.GetFileEncodingFromFile(value); if (fileEncoding != FileEncoding.Default) { diff --git a/src/System.Management.Automation/utils/ClrFacade.cs b/src/System.Management.Automation/utils/ClrFacade.cs index 9b1715cc75f..6ee0086d4fa 100644 --- a/src/System.Management.Automation/utils/ClrFacade.cs +++ b/src/System.Management.Automation/utils/ClrFacade.cs @@ -127,6 +127,11 @@ internal static Encoding GetDefaultEncoding() /// internal static Encoding GetOEMEncoding() { + // The OEM code pages are sometimes used by Win32 console applications, and + // on non-Windows platforms they still may have uses (if installed) and + // could be used if desired. + // On non-windows platforms, they have more limited uses, and probably won't + // be installed. if (s_oemEncoding == null) { #if UNIX // PowerShell Core on Unix diff --git a/src/System.Management.Automation/utils/Encoding.cs b/src/System.Management.Automation/utils/Encoding.cs index 584bdd307f1..3f65ab9e356 100644 --- a/src/System.Management.Automation/utils/Encoding.cs +++ b/src/System.Management.Automation/utils/Encoding.cs @@ -83,7 +83,7 @@ public enum FileEncoding Oem, /// - /// Big Endian UTF32 encoding. + /// Big Endian UTF32 encoding /// BigEndianUTF32, @@ -111,6 +111,7 @@ public static Encoding GetDefaultEncoding() /// /// translate a FileEncoding to an actual System.Text.Encoding /// The enum value + /// System.Text.Encoding /// public static Encoding GetEncoding(FileEncoding TextEncoding) { @@ -151,7 +152,9 @@ public static Encoding GetEncoding(FileEncoding TextEncoding) break; case FileEncoding.BigEndianUTF32: - result = Encoding.BigEndianUnicode; + // This can possibly throw, but if so, we can't provide + // the encoding which the user requested, so we should fail + result = Encoding.GetEncoding("utf-32BE"); break; case FileEncoding.Ascii: @@ -173,80 +176,11 @@ public static Encoding GetEncoding(FileEncoding TextEncoding) return result; } - // the way the encoding is implemented in PowerShell 5 and earlier - // if the user sets the default encoding to WindowsLegacy, we will - // be able to encode for that - internal static Dictionary legacyEncodingMap = - new Dictionary(StringComparer.OrdinalIgnoreCase) - { - { "microsoft.powershell.commands.addcontentcommand", Encoding.ASCII }, - { "microsoft.powershell.commands.exportclixmlcommand", Encoding.Unicode }, - { "microsoft.powershell.commands.exportcsvcommand", Encoding.ASCII }, - { "microsoft.powershell.commands.exportpssessioncommand", Encoding.UTF8 }, // with BOM - { "microsoft.powershell.commands.formathex", Encoding.ASCII }, - { "microsoft.powershell.commands.newmodulemanifestcommand", Encoding.Unicode }, - { "microsoft.powershell.commands.getcontentcommand", Encoding.ASCII }, - { "microsoft.powershell.commands.importcsvcommand", Encoding.ASCII }, - { "microsoft.powershell.commands.outfilecommand", Encoding.Unicode }, // This includes redirection - { "microsoft.powershell.commands.setcontentcommand", Encoding.ASCII }, - // Providers are handled here - { "microsoft.powershell.commands.filesystemprovider", Encoding.ASCII }, - - }; - - internal static Encoding GetWindowsLegacyEncoding(string name) - { - if ( legacyEncodingMap.ContainsKey(name)) - { - return legacyEncodingMap[name]; - } - else - { - return Encoding.Default; - } - } - - /// - /// Retrieve the PSDefaultFileEncoding preference value if set - /// - public static FileEncoding GetEncodingPreference(SessionState sessionState) - { - FileEncoding encodingPreference = FileEncoding.Unknown; - try - { - // It doesn't matter if this fails or throws, we will return unknown in that case - object tmp = sessionState.PSVariable.GetValue("PSDefaultFileEncoding"); - LanguagePrimitives.TryConvertTo(tmp, out encodingPreference); - } - catch - { - ; - } - return encodingPreference; - } - - /// - /// Retrieve the encoding in a provider context - /// - public static Encoding GetProviderEncoding(CmdletProvider provider, FileEncoding encoding) - { - Encoding resolvedEncoding = GetDefaultEncoding(); - FileEncoding encodingPreference = GetEncodingPreference(provider.SessionState); - if ( encoding == FileEncoding.Unknown && encodingPreference != FileEncoding.Unknown ) - { - resolvedEncoding = GetEncoding(encodingPreference); - } - else if ( encoding != FileEncoding.Unknown ) - { - resolvedEncoding = GetEncoding(encoding); - } - return resolvedEncoding; - } - /// /// Retrieve the encoding based on the Cmdlet and the Encoding /// The cmdlet of interest /// The Encoding parameter value + /// System.Text.Encoding /// public static Encoding GetEncoding(Cmdlet cmdlet, FileEncoding encoding) { @@ -273,7 +207,7 @@ public static Encoding GetEncoding(Cmdlet cmdlet, FileEncoding encoding) // the parameter is not specifically set, so check the preference variable encodingPreference = GetEncodingPreference(cmdlet.Context.SessionState); // If set to unknown, we accept that it is unset - preferenceSetAndValid = encodingPreference != FileEncoding.Unknown; + preferenceSetAndValid = encodingPreference != FileEncoding.Unknown; // If the encoding preference has been set to WindowsLegacy, we need to look up the actual encoding if ( encodingPreference == FileEncoding.WindowsLegacy ) { @@ -289,29 +223,12 @@ public static Encoding GetEncoding(Cmdlet cmdlet, FileEncoding encoding) return resolvedEncoding; } - // [System.Text.Encoding]::GetEncodings() | ? { $_.GetEncoding().GetPreamble() } | - // Add-Member ScriptProperty Preamble { $this.GetEncoding().GetPreamble() -join "-" } -PassThru | - // Format-Table -Auto - internal static Dictionary encodingMap = - new Dictionary() - { - { "255-254", FileEncoding.Unicode }, - { "254-255", FileEncoding.BigEndianUnicode }, - { "255-254-0-0", FileEncoding.UTF32 }, - { "0-0-254-255", FileEncoding.BigEndianUTF32 }, - { "239-187-191", FileEncoding.UTF8 }, - }; - - internal static char[] nonPrintableCharacters = { - (char) 0, (char) 1, (char) 2, (char) 3, (char) 4, (char) 5, (char) 6, (char) 7, (char) 8, - (char) 11, (char) 12, (char) 14, (char) 15, (char) 16, (char) 17, (char) 18, (char) 19, (char) 20, - (char) 21, (char) 22, (char) 23, (char) 24, (char) 25, (char) 26, (char) 28, (char) 29, (char) 30, - (char) 31, (char) 127, (char) 129, (char) 141, (char) 143, (char) 144, (char) 157 }; - - internal static readonly UTF8Encoding utf8NoBom = new UTF8Encoding(encoderShouldEmitUTF8Identifier: false); - - // take a look at the file contents and guess at the best encoding - internal static FileEncoding GetEncoding(string path) + /// + /// Given a path to a file, attempt to retrieve the encoding + /// The path to a file to inspect for an encoding + /// System.Text.Encoding + /// + public static FileEncoding GetFileEncodingFromFile(string path) { if (!File.Exists(path)) { @@ -327,7 +244,7 @@ internal static FileEncoding GetEncoding(string path) { using (BinaryReader reader = new BinaryReader(stream)) { - bytesRead = reader.Read(initialBytes, 0, 100); + bytesRead = reader.Read(initialBytes, 0, initialBytes.Length); } } } @@ -381,6 +298,101 @@ internal static FileEncoding GetEncoding(string path) // return UTF8 without a BOM which should be good for both Windows and Non-Windows return FileEncoding.UTF8NoBOM; } + + /// + /// Retrieve the PSDefaultFileEncoding preference value if set + /// + public static FileEncoding GetEncodingPreference(SessionState sessionState) + { + FileEncoding encodingPreference = FileEncoding.Unknown; + try + { + // It doesn't matter if this fails or throws, we will return unknown in that case + object tmp = sessionState.PSVariable.GetValue("PSDefaultFileEncoding"); + LanguagePrimitives.TryConvertTo(tmp, out encodingPreference); + } + catch + { + ; + } + return encodingPreference; + } + + /// + /// Retrieve the encoding in a provider context + /// + public static Encoding GetProviderEncoding(CmdletProvider provider, FileEncoding encoding) + { + Encoding resolvedEncoding = GetDefaultEncoding(); + FileEncoding encodingPreference = GetEncodingPreference(provider.SessionState); + // If the encoding isn't set, but is available as $PSDefaultFileEncoding, use that + // It the encoding is set use that, otherwise return the default encoding + if ( encoding == FileEncoding.Unknown && encodingPreference != FileEncoding.Unknown ) + { + resolvedEncoding = GetEncoding(encodingPreference); + } + else if ( encoding != FileEncoding.Unknown ) + { + resolvedEncoding = GetEncoding(encoding); + } + return resolvedEncoding; + } + + // This is the way the encoding is implemented in PowerShell 5 and earlier. + // If the user sets the default encoding to WindowsLegacy, we will + // be able to encode for that + internal static Dictionary legacyEncodingMap = + new Dictionary(StringComparer.OrdinalIgnoreCase) + { + { "microsoft.powershell.commands.addcontentcommand", Encoding.ASCII }, + { "microsoft.powershell.commands.exportclixmlcommand", Encoding.Unicode }, + { "microsoft.powershell.commands.exportcsvcommand", Encoding.ASCII }, + { "microsoft.powershell.commands.exportpssessioncommand", Encoding.UTF8 }, // with BOM + { "microsoft.powershell.commands.formathex", Encoding.ASCII }, + { "microsoft.powershell.commands.newmodulemanifestcommand", Encoding.Unicode }, + { "microsoft.powershell.commands.getcontentcommand", Encoding.ASCII }, + { "microsoft.powershell.commands.importcsvcommand", Encoding.ASCII }, + { "microsoft.powershell.commands.outfilecommand", Encoding.Unicode }, // This includes redirection + { "microsoft.powershell.commands.setcontentcommand", Encoding.ASCII }, + // Providers are handled here + { "microsoft.powershell.commands.filesystemprovider", Encoding.ASCII }, + }; + + /// Get the Windows legacy encoding from our encoding map + internal static Encoding GetWindowsLegacyEncoding(string name) + { + if ( legacyEncodingMap.ContainsKey(name)) + { + return legacyEncodingMap[name]; + } + else + { + return Encoding.Default; + } + } + + + // [System.Text.Encoding]::GetEncodings() | ? { $_.GetEncoding().GetPreamble() } | + // Add-Member ScriptProperty Preamble { $this.GetEncoding().GetPreamble() -join "-" } -PassThru | + // Format-Table -Auto + internal static Dictionary encodingMap = + new Dictionary() + { + { "255-254", FileEncoding.Unicode }, + { "254-255", FileEncoding.BigEndianUnicode }, + { "255-254-0-0", FileEncoding.UTF32 }, + { "0-0-254-255", FileEncoding.BigEndianUTF32 }, + { "239-187-191", FileEncoding.UTF8BOM }, + }; + + internal static char[] nonPrintableCharacters = { + (char) 0, (char) 1, (char) 2, (char) 3, (char) 4, (char) 5, (char) 6, (char) 7, (char) 8, + (char) 11, (char) 12, (char) 14, (char) 15, (char) 16, (char) 17, (char) 18, (char) 19, (char) 20, + (char) 21, (char) 22, (char) 23, (char) 24, (char) 25, (char) 26, (char) 28, (char) 29, (char) 30, + (char) 31, (char) 127, (char) 129, (char) 141, (char) 143, (char) 144, (char) 157 }; + + internal static readonly UTF8Encoding utf8NoBom = new UTF8Encoding(encoderShouldEmitUTF8Identifier: false); + } } diff --git a/src/System.Management.Automation/utils/PathUtils.cs b/src/System.Management.Automation/utils/PathUtils.cs index 2f8333d95dd..416d6063263 100644 --- a/src/System.Management.Automation/utils/PathUtils.cs +++ b/src/System.Management.Automation/utils/PathUtils.cs @@ -53,49 +53,13 @@ bool isLiteralPath MasterStreamOpen(cmdlet, filePath, resolvedEncoding, defaultEncoding, Append, Force, NoClobber, out fileStream, out streamWriter, out readOnlyFileInfo, isLiteralPath); } - /* /// /// THE method for opening a file for writing. /// Should be used by all cmdlets that write to a file. /// /// cmdlet that is opening the file (used mainly for error reporting) /// path to the file (as specified on the command line - this method will resolve the path) - /// encoding (this method will convert the command line string to an Encoding instance) - /// if true, then we will use default .NET encoding instead of the encoding specified in parameter - /// - /// - /// - /// Result1: opened for writing - /// Result2: (inherits from ) opened for writing - /// Result3: file info that should be used to restore file attributes after done with the file (null is this is not needed) - /// True if wildcard expansion should be bypassed. - internal static void MasterStreamOpen( - PSCmdlet cmdlet, - string filePath, - string encoding, - bool defaultEncoding, - bool Append, - bool Force, - bool NoClobber, - out FileStream fileStream, - out StreamWriter streamWriter, - out FileInfo readOnlyFileInfo, - bool isLiteralPath - ) - { - Encoding resolvedEncoding = EncodingConversion.Convert(cmdlet, encoding); - - MasterStreamOpen(cmdlet, filePath, resolvedEncoding, defaultEncoding, Append, Force, NoClobber, out fileStream, out streamWriter, out readOnlyFileInfo, isLiteralPath); - } - */ - - /// - /// THE method for opening a file for writing. - /// Should be used by all cmdlets that write to a file. - /// - /// cmdlet that is opening the file (used mainly for error reporting) - /// path to the file (as specified on the command line - this method will resolve the path) - /// encoding (this method will convert the command line string to an Encoding instance) + /// the encoding (this method will convert the FileEncoding value to an Encoding instance) /// if true, then we will use default .NET encoding instead of the encoding specified in parameter /// /// @@ -468,85 +432,4 @@ internal static DirectoryInfo CreateTemporaryDirectory() } } - /* - internal static class EncodingConversion - { - internal const string Unknown = "unknown"; - internal const string String = "string"; - internal const string Unicode = "unicode"; - internal const string BigEndianUnicode = "bigendianunicode"; - internal const string Ascii = "ascii"; - internal const string Utf8 = "utf8"; - internal const string Utf7 = "utf7"; - internal const string Utf32 = "utf32"; - internal const string Default = "default"; - internal const string OEM = "oem"; - - /// - /// retrieve the encoding parameter from the command line - /// it throws if the encoding does not match the known ones - /// - /// a System.Text.Encoding object (null if no encoding specified) - internal static Encoding Convert(Cmdlet cmdlet, string encoding) - { - if (string.IsNullOrEmpty(encoding)) - { - // no parameter passed, default to Unicode (OS preferred) - return System.Text.Encoding.Unicode; - } - - // Default to unicode (this matches Get-Content) - if (string.Equals(encoding, Unknown, StringComparison.OrdinalIgnoreCase)) - return System.Text.Encoding.Unicode; - - if (string.Equals(encoding, String, StringComparison.OrdinalIgnoreCase)) - return System.Text.Encoding.Unicode; - - // these are the encodings the CLR supports - if (string.Equals(encoding, Unicode, StringComparison.OrdinalIgnoreCase)) - return System.Text.Encoding.Unicode; - - if (string.Equals(encoding, BigEndianUnicode, StringComparison.OrdinalIgnoreCase)) - return System.Text.Encoding.BigEndianUnicode; - - if (string.Equals(encoding, Utf8, StringComparison.OrdinalIgnoreCase)) - return System.Text.Encoding.UTF8; - - if (string.Equals(encoding, Ascii, StringComparison.OrdinalIgnoreCase)) - return System.Text.Encoding.ASCII; - - if (string.Equals(encoding, Utf7, StringComparison.OrdinalIgnoreCase)) - return System.Text.Encoding.UTF7; - - if (string.Equals(encoding, Utf32, StringComparison.OrdinalIgnoreCase)) - return System.Text.Encoding.UTF32; - - if (string.Equals(encoding, Default, StringComparison.OrdinalIgnoreCase)) - return PowerShellEncoding.GetDefaultEncoding(); - - if (string.Equals(encoding, OEM, StringComparison.OrdinalIgnoreCase)) - { - return ClrFacade.GetOEMEncoding(); - } - - // error condition: unknown encoding value - string validEncodingValues = string.Join( - ", ", - new string[] { Unknown, String, Unicode, BigEndianUnicode, Ascii, Utf8, Utf7, Utf32, Default, OEM }); - string msg = StringUtil.Format(PathUtilsStrings.OutFile_WriteToFileEncodingUnknown, - encoding, validEncodingValues); - - ErrorRecord errorRecord = new ErrorRecord( - PSTraceSource.NewArgumentException("Encoding"), - "WriteToFileEncodingUnknown", - ErrorCategory.InvalidArgument, - null); - - errorRecord.ErrorDetails = new ErrorDetails(msg); - cmdlet.ThrowTerminatingError(errorRecord); - - return null; - } - } - */ } From 40a8eee82a3e1f819a031f0d1a3dba969fc04329 Mon Sep 17 00:00:00 2001 From: James Truher Date: Tue, 25 Jul 2017 16:03:43 -0700 Subject: [PATCH 11/14] Change FileEncoding.Unknown to FileEncoding.Unspecified which more closer to what is really happening Update RedirectionOperator tests to compare bytes in a more sensible manner Remove PSDefaultFileEncoding from special variable collection so they don't show up in script cmdlets miscellaneous code clean up --- .../commands/utility/CSVCommands.cs | 4 +- .../utility/ImplicitRemotingCommands.cs | 2 +- .../commands/utility/MatchString.cs | 4 +- .../commands/utility/XmlCommands.cs | 2 +- .../engine/InitialSessionState.cs | 4 +- .../Modules/NewModuleManifestCommand.cs | 2 +- .../engine/SpecialVariables.cs | 2 - .../namespaces/FileSystemContentStream.cs | 2 - .../namespaces/FileSystemProvider.cs | 4 +- .../utils/Encoding.cs | 55 ++++++++++--------- .../Parser/RedirectionOperator.Tests.ps1 | 4 +- test/powershell/engine/Encoding.Tests.ps1 | 25 ++++----- 12 files changed, 51 insertions(+), 59 deletions(-) diff --git a/src/Microsoft.PowerShell.Commands.Utility/commands/utility/CSVCommands.cs b/src/Microsoft.PowerShell.Commands.Utility/commands/utility/CSVCommands.cs index 9c9c8a7c03b..71049f48643 100644 --- a/src/Microsoft.PowerShell.Commands.Utility/commands/utility/CSVCommands.cs +++ b/src/Microsoft.PowerShell.Commands.Utility/commands/utility/CSVCommands.cs @@ -207,7 +207,7 @@ public SwitchParameter NoClobber /// Encoding optional flag /// [Parameter()] - public FileEncoding Encoding { get; set; } = FileEncoding.Unknown; + public FileEncoding Encoding { get; set; } = FileEncoding.Unspecified; /// /// Property that sets append parameter. @@ -571,7 +571,7 @@ public SwitchParameter UseCulture /// Encoding optional flag /// [Parameter()] - public FileEncoding Encoding { get; set; } = FileEncoding.Unknown; + public FileEncoding Encoding { get; set; } = FileEncoding.Unspecified; /// /// Avoid writing out duplicate warning messages when there are diff --git a/src/Microsoft.PowerShell.Commands.Utility/commands/utility/ImplicitRemotingCommands.cs b/src/Microsoft.PowerShell.Commands.Utility/commands/utility/ImplicitRemotingCommands.cs index 8f34eb1c50a..359f73fe089 100644 --- a/src/Microsoft.PowerShell.Commands.Utility/commands/utility/ImplicitRemotingCommands.cs +++ b/src/Microsoft.PowerShell.Commands.Utility/commands/utility/ImplicitRemotingCommands.cs @@ -77,7 +77,7 @@ public SwitchParameter Force /// Encoding optional flag /// [Parameter] - public FileEncoding Encoding { get; set; } = FileEncoding.Unknown; + public FileEncoding Encoding { get; set; } = FileEncoding.Unspecified; #endregion Parameters diff --git a/src/Microsoft.PowerShell.Commands.Utility/commands/utility/MatchString.cs b/src/Microsoft.PowerShell.Commands.Utility/commands/utility/MatchString.cs index b76b8c4d61a..809eebf0d2e 100644 --- a/src/Microsoft.PowerShell.Commands.Utility/commands/utility/MatchString.cs +++ b/src/Microsoft.PowerShell.Commands.Utility/commands/utility/MatchString.cs @@ -1200,7 +1200,7 @@ public SwitchParameter AllMatches /// The text encoding to process each file as. /// [Parameter] - public FileEncoding Encoding { get; set; } = FileEncoding.Unknown; + public FileEncoding Encoding { get; set; } = FileEncoding.Unspecified; private System.Text.Encoding _textEncoding; @@ -1272,7 +1272,7 @@ public SwitchParameter AllMatches protected override void BeginProcessing() { // Process encoding switch. - if (Encoding != FileEncoding.Unknown ) + if (Encoding != FileEncoding.Unspecified ) { _textEncoding = PowerShellEncoding.GetEncoding(this, Encoding); } diff --git a/src/Microsoft.PowerShell.Commands.Utility/commands/utility/XmlCommands.cs b/src/Microsoft.PowerShell.Commands.Utility/commands/utility/XmlCommands.cs index d677c6600e0..46ce0561c30 100644 --- a/src/Microsoft.PowerShell.Commands.Utility/commands/utility/XmlCommands.cs +++ b/src/Microsoft.PowerShell.Commands.Utility/commands/utility/XmlCommands.cs @@ -109,7 +109,7 @@ public SwitchParameter NoClobber /// /// [Parameter] - public FileEncoding Encoding { get; set; } = FileEncoding.Unknown; + public FileEncoding Encoding { get; set; } = FileEncoding.Unspecified; #endregion Command Line Parameters diff --git a/src/System.Management.Automation/engine/InitialSessionState.cs b/src/System.Management.Automation/engine/InitialSessionState.cs index 63c10c14a73..8561989746f 100644 --- a/src/System.Management.Automation/engine/InitialSessionState.cs +++ b/src/System.Management.Automation/engine/InitialSessionState.cs @@ -1,4 +1,4 @@ -/********************************************************************++ +/*********************************************************************++ Copyright (c) Microsoft Corporation. All rights reserved. --********************************************************************/ @@ -4829,7 +4829,7 @@ .ForwardHelpCategory Cmdlet internal const ActionPreference defaultVerbosePreference = ActionPreference.SilentlyContinue; internal const ActionPreference defaultWarningPreference = ActionPreference.Continue; internal const ActionPreference defaultInformationPreference = ActionPreference.SilentlyContinue; - internal const Microsoft.PowerShell.FileEncoding defaultFileEncodingPreference = FileEncoding.Unknown; + internal const Microsoft.PowerShell.FileEncoding defaultFileEncodingPreference = FileEncoding.Unspecified; internal const bool defaultWhatIfPreference = false; internal const ConfirmImpact defaultConfirmPreference = ConfirmImpact.High; diff --git a/src/System.Management.Automation/engine/Modules/NewModuleManifestCommand.cs b/src/System.Management.Automation/engine/Modules/NewModuleManifestCommand.cs index ac0a89da8a5..6656e843885 100644 --- a/src/System.Management.Automation/engine/Modules/NewModuleManifestCommand.cs +++ b/src/System.Management.Automation/engine/Modules/NewModuleManifestCommand.cs @@ -942,7 +942,7 @@ protected override void EndProcessing() PathUtils.MasterStreamOpen( this, filePath, - FileEncoding.Unknown, + FileEncoding.Unspecified, /* defaultEncoding */ false, /* Append */ false, /* Force */ false, diff --git a/src/System.Management.Automation/engine/SpecialVariables.cs b/src/System.Management.Automation/engine/SpecialVariables.cs index 97f04b75116..597fd1732d9 100644 --- a/src/System.Management.Automation/engine/SpecialVariables.cs +++ b/src/System.Management.Automation/engine/SpecialVariables.cs @@ -282,7 +282,6 @@ internal static class SpecialVariables SpecialVariables.WarningPreference, SpecialVariables.InformationPreference, SpecialVariables.ConfirmPreference, - SpecialVariables.DefaultFileEncodingPreference, }; internal static readonly Type[] PreferenceVariableTypes = { @@ -293,7 +292,6 @@ internal static class SpecialVariables /* WarningPreference */ typeof(ActionPreference), /* InformationPreference */ typeof(ActionPreference), /* ConfirmPreference */ typeof(ConfirmImpact), - /* PSDefaultFileEncoding */ typeof(Microsoft.PowerShell.FileEncoding), }; // The following variables are created in every session w/ AllScope. We avoid creating local slots when we diff --git a/src/System.Management.Automation/namespaces/FileSystemContentStream.cs b/src/System.Management.Automation/namespaces/FileSystemContentStream.cs index a7e087094c8..dbf5f83a558 100644 --- a/src/System.Management.Automation/namespaces/FileSystemContentStream.cs +++ b/src/System.Management.Automation/namespaces/FileSystemContentStream.cs @@ -1161,8 +1161,6 @@ internal FileStreamBackReader(FileStream fileStream, Encoding encoding) _currentPosition = _stream.Position; // Get the oem encoding and system current ANSI code page - // _oemEncoding = EncodingConversion.Convert(null, EncodingConversion.OEM); - // _defaultAnsiEncoding = EncodingConversion.Convert(null, EncodingConversion.Default); _oemEncoding = PowerShellEncoding.GetEncoding(FileEncoding.Oem); _defaultAnsiEncoding = PowerShellEncoding.GetEncoding(FileEncoding.Default); } diff --git a/src/System.Management.Automation/namespaces/FileSystemProvider.cs b/src/System.Management.Automation/namespaces/FileSystemProvider.cs index 458ba5e1d4e..fb96f7f5e58 100644 --- a/src/System.Management.Automation/namespaces/FileSystemProvider.cs +++ b/src/System.Management.Automation/namespaces/FileSystemProvider.cs @@ -6722,7 +6722,7 @@ public IContentWriter GetContentWriter(string path) bool usingByteEncoding = false; bool streamTypeSpecified = false; // we need to discover the encoding - Encoding encoding = PowerShellEncoding.GetProviderEncoding(this, FileEncoding.Unknown); + Encoding encoding = PowerShellEncoding.GetProviderEncoding(this, FileEncoding.Unspecified); FileMode filemode = FileMode.OpenOrCreate; string streamName = null; bool suppressNewline = false; @@ -7545,7 +7545,7 @@ public class FileSystemContentDynamicParametersBase /// reading data from the file. /// [Parameter] - public FileEncoding Encoding { get; set; } = FileEncoding.Unknown; + public FileEncoding Encoding { get; set; } = FileEncoding.Unspecified; /// /// A parameter to return a stream of an item. diff --git a/src/System.Management.Automation/utils/Encoding.cs b/src/System.Management.Automation/utils/Encoding.cs index 3f65ab9e356..f1196dcd58c 100644 --- a/src/System.Management.Automation/utils/Encoding.cs +++ b/src/System.Management.Automation/utils/Encoding.cs @@ -20,7 +20,7 @@ public enum FileEncoding /// /// No encoding, or unset. /// - Unknown, + Unspecified, /// /// Unicode encoding. @@ -110,13 +110,13 @@ public static Encoding GetDefaultEncoding() /// /// translate a FileEncoding to an actual System.Text.Encoding - /// The enum value + /// The enum value /// System.Text.Encoding /// - public static Encoding GetEncoding(FileEncoding TextEncoding) + public static Encoding GetEncoding(FileEncoding textEncoding) { - System.Text.Encoding result = GetDefaultEncoding(); - switch ( TextEncoding ) + System.Text.Encoding result; + switch ( textEncoding ) { case FileEncoding.String: result = Encoding.Unicode; @@ -170,7 +170,8 @@ public static Encoding GetEncoding(FileEncoding TextEncoding) break; default: - break; + result = GetDefaultEncoding(); + break; } return result; @@ -185,17 +186,16 @@ public static Encoding GetEncoding(FileEncoding TextEncoding) public static Encoding GetEncoding(Cmdlet cmdlet, FileEncoding encoding) { Encoding resolvedEncoding = GetDefaultEncoding(); - FileEncoding encodingPreference = FileEncoding.Unknown; + FileEncoding encodingPreference = FileEncoding.Unspecified; bool preferenceSetAndValid = false; - string name = cmdlet.GetType().FullName.ToLower(CultureInfo.InvariantCulture); // An encoding has been specified as a parameter (or the explicit parameter value is "Unknown") - if ( encoding != FileEncoding.Unknown ) + if ( encoding != FileEncoding.Unspecified ) { // If the encoding has been set to WindowsLegacy, we need to look up the actual encoding if ( encoding == FileEncoding.WindowsLegacy ) { - resolvedEncoding = GetWindowsLegacyEncoding(name); + resolvedEncoding = GetWindowsLegacyEncoding(cmdlet); } else { @@ -204,16 +204,20 @@ public static Encoding GetEncoding(Cmdlet cmdlet, FileEncoding encoding) } else { - // the parameter is not specifically set, so check the preference variable - encodingPreference = GetEncodingPreference(cmdlet.Context.SessionState); + // if we have a cmdlet and the parameter is not specifically set, + // so check the preference variable + if ( cmdlet != null ) + { + encodingPreference = GetEncodingPreference(cmdlet.Context.SessionState); + } // If set to unknown, we accept that it is unset - preferenceSetAndValid = encodingPreference != FileEncoding.Unknown; + preferenceSetAndValid = encodingPreference != FileEncoding.Unspecified; // If the encoding preference has been set to WindowsLegacy, we need to look up the actual encoding if ( encodingPreference == FileEncoding.WindowsLegacy ) { - resolvedEncoding = GetWindowsLegacyEncoding(name); + resolvedEncoding = GetWindowsLegacyEncoding(cmdlet); } - else if ( encodingPreference != FileEncoding.Unknown ) + else if ( encodingPreference != FileEncoding.Unspecified ) { resolvedEncoding = GetEncoding(encodingPreference); } @@ -255,7 +259,7 @@ public static FileEncoding GetFileEncodingFromFile(string path) // Test for four-byte preambles string preamble = null; - FileEncoding foundEncoding = FileEncoding.Default; + FileEncoding foundEncoding; if (bytesRead > 3) { @@ -301,10 +305,11 @@ public static FileEncoding GetFileEncodingFromFile(string path) /// /// Retrieve the PSDefaultFileEncoding preference value if set + /// SessionState to use to retrieve the preference variable if set /// public static FileEncoding GetEncodingPreference(SessionState sessionState) { - FileEncoding encodingPreference = FileEncoding.Unknown; + FileEncoding encodingPreference = FileEncoding.Unspecified; try { // It doesn't matter if this fails or throws, we will return unknown in that case @@ -327,11 +332,11 @@ public static Encoding GetProviderEncoding(CmdletProvider provider, FileEncoding FileEncoding encodingPreference = GetEncodingPreference(provider.SessionState); // If the encoding isn't set, but is available as $PSDefaultFileEncoding, use that // It the encoding is set use that, otherwise return the default encoding - if ( encoding == FileEncoding.Unknown && encodingPreference != FileEncoding.Unknown ) + if ( encoding == FileEncoding.Unspecified && encodingPreference != FileEncoding.Unspecified ) { resolvedEncoding = GetEncoding(encodingPreference); } - else if ( encoding != FileEncoding.Unknown ) + else if ( encoding != FileEncoding.Unspecified ) { resolvedEncoding = GetEncoding(encoding); } @@ -359,16 +364,14 @@ public static Encoding GetProviderEncoding(CmdletProvider provider, FileEncoding }; /// Get the Windows legacy encoding from our encoding map - internal static Encoding GetWindowsLegacyEncoding(string name) + internal static Encoding GetWindowsLegacyEncoding(Cmdlet cmdlet) { - if ( legacyEncodingMap.ContainsKey(name)) - { - return legacyEncodingMap[name]; - } - else + Encoding encoding = Encoding.Default; + if ( cmdlet != null ) { - return Encoding.Default; + legacyEncodingMap.TryGetValue(cmdlet.GetType().FullName, out encoding); } + return encoding; } diff --git a/test/powershell/Language/Parser/RedirectionOperator.Tests.ps1 b/test/powershell/Language/Parser/RedirectionOperator.Tests.ps1 index 100a3110b91..f2bc3c10307 100644 --- a/test/powershell/Language/Parser/RedirectionOperator.Tests.ps1 +++ b/test/powershell/Language/Parser/RedirectionOperator.Tests.ps1 @@ -39,9 +39,7 @@ Describe "Redirection operator now supports encoding changes" -Tags "CI" { $CR = $encoder.GetBytes($asciiCR) $expectedBytes = .{ $BOM; $TXT; $CR } $bytes.Count | should be $expectedBytes.count - for($i = 0; $i -lt $bytes.count; $i++) { - $bytes[$i] | Should be $expectedBytes[$i] - } + $bytes -join "-" | should be ($expectedBytes -join "-") } # WindowsLegacy encoding tests will be done elsewhere diff --git a/test/powershell/engine/Encoding.Tests.ps1 b/test/powershell/engine/Encoding.Tests.ps1 index bc245fa2519..804b0e8a20b 100644 --- a/test/powershell/engine/Encoding.Tests.ps1 +++ b/test/powershell/engine/Encoding.Tests.ps1 @@ -17,7 +17,7 @@ Describe "Encoding classes and methods are available" -Tag CI { UTF8BOM = '239-187-191' UTF8NoBOM = '' Unicode = '255-254' - Unknown = '' + Unspecified = '' WindowsLegacy = '' } @@ -48,7 +48,7 @@ Describe "Encoding classes and methods are available" -Tag CI { @{ Encoding = 'UTF8BOM'; Preamble = '239-187-191' }, @{ Encoding = 'UTF8NoBOM'; Preamble = '' }, @{ Encoding = 'Unicode'; Preamble = '255-254' }, - @{ Encoding = 'Unknown'; Preamble = '' }, + @{ Encoding = 'Unspecified'; Preamble = '' }, @{ Encoding = 'WindowsLegacy'; Preamble = '' } $testStringEncodedBytes = @{ @@ -66,7 +66,7 @@ Describe "Encoding classes and methods are available" -Tag CI { UTF8BOM = "239-187-191-116-195-169-115-116-" + (Get-NewLineBytes UTF8BOM) UTF8NoBOM = "116-195-169-115-116-" + (Get-NewLineBytes UTF8NoBOM) Unicode = "255-254-116-0-233-0-115-0-116-0-" + (Get-NewLineBytes Unicode) - Unknown = "116-195-169-115-116-" + (Get-NewLineBytes Unknown) + Unspecified = "116-195-169-115-116-" + (Get-NewLineBytes Unspecified) } $contentTests = @@ -84,7 +84,7 @@ Describe "Encoding classes and methods are available" -Tag CI { @{ Encoding = 'UTF8BOM'; Bytes = $testStringEncodedBytes['UTF8BOM'] }, @{ Encoding = 'UTF8NoBOM'; Bytes = $testStringEncodedBytes['UTF8NoBOM'] }, @{ Encoding = 'Unicode'; Bytes = $testStringEncodedBytes['Unicode'] }, - @{ Encoding = 'Unknown'; Bytes = $testStringEncodedBytes['Unknown'] } + @{ Encoding = 'Unspecified'; Bytes = $testStringEncodedBytes['Unspecified'] } } @@ -93,7 +93,7 @@ Describe "Encoding classes and methods are available" -Tag CI { { remove-item $testFile } - $PSDefaultFileEncoding = "Unknown" + $PSDefaultFileEncoding = "Unspecified" } It "Encoding for '' should have correct preamble ''" -TestCase $preambleTests { @@ -123,17 +123,12 @@ Describe "Encoding classes and methods are available" -Tag CI { It "Explicit encoding set to unknown and preference variable set to unicode creates unicode file" { $PSDefaultFileEncoding = "Unicode" - $testString | set-content -encoding unknown $testfile + $testString | set-content -encoding unspecified $testfile Get-FileBytes $testFile | should be $testStringEncodedBytes['Unicode'] } - It "getting the encoding for an unknown cmdlet should return utf-8" { - $method = [microsoft.powershell.powershellencoding].getmember("GetWindowsLegacyEncoding","NonPublic,Static") - $method.Invoke($null, "badcmdlet").BodyName | should be "utf-8" - } - - It "When session state is null, GetEncodingPreference returns unknown" { - [Microsoft.PowerShell.PowerShellEncoding]::GetEncodingPreference($null) | should be "unknown" + It "When session state is null, GetEncodingPreference returns unspecified" { + [Microsoft.PowerShell.PowerShellEncoding]::GetEncodingPreference($null) | should be "unspecified" } Context "GetFileEncodingFromFile tests" { @@ -190,7 +185,7 @@ Describe "Encoding classes and methods are available" -Tag CI { New-ModuleManifest -path "$TESTDRIVE/${testString}.psd1" } finally { - $PSDefaultFileEncoding = "Unknown" + $PSDefaultFileEncoding = "Unspecified" } # we know what the encoding should be $legacyEncoding = [System.Text.Encoding]::Unicode @@ -213,7 +208,7 @@ Describe "Encoding classes and methods are available" -Tag CI { $testString > $TESTDRIVE/file.txt } finally { - $PSDefaultFileEncoding = "Unknown" + $PSDefaultFileEncoding = "Unspecified" } # we are using the first 10 bytes to convince us that we created the proper encoding # this doesn't include the new line From bf2be6fd369ceec7f3178b9649ee4ba5df09dae9 Mon Sep 17 00:00:00 2001 From: James Truher Date: Wed, 26 Jul 2017 15:30:51 -0700 Subject: [PATCH 12/14] Changed PowerShellEncoding class name to EncodingUtils made file encoding probe method internal --- .../utility/FormatAndOutput/format-hex/Format-Hex.cs | 2 +- .../commands/utility/ImplicitRemotingCommands.cs | 2 +- .../commands/utility/MatchString.cs | 2 +- .../commands/utility/Send-MailMessage.cs | 4 ++-- .../engine/ExternalScriptInfo.cs | 2 +- .../engine/Modules/ScriptAnalysis.cs | 2 +- .../engine/hostifaces/MshHostUserInterface.cs | 4 ++-- .../engine/remoting/common/RunspaceConnectionInfo.cs | 6 +++--- .../namespaces/FileSystemContentStream.cs | 6 +++--- .../namespaces/FileSystemProvider.cs | 8 ++++---- src/System.Management.Automation/utils/ClrFacade.cs | 8 ++++---- src/System.Management.Automation/utils/Encoding.cs | 4 ++-- src/System.Management.Automation/utils/PathUtils.cs | 4 ++-- 13 files changed, 27 insertions(+), 27 deletions(-) diff --git a/src/Microsoft.PowerShell.Commands.Utility/commands/utility/FormatAndOutput/format-hex/Format-Hex.cs b/src/Microsoft.PowerShell.Commands.Utility/commands/utility/FormatAndOutput/format-hex/Format-Hex.cs index e6d0a4d71f2..9b442e69a60 100644 --- a/src/Microsoft.PowerShell.Commands.Utility/commands/utility/FormatAndOutput/format-hex/Format-Hex.cs +++ b/src/Microsoft.PowerShell.Commands.Utility/commands/utility/FormatAndOutput/format-hex/Format-Hex.cs @@ -232,7 +232,7 @@ private void ProcessObjectContent(PSObject inputObject) else if (obj is string) { string inputString = obj.ToString(); - Encoding resolvedEncoding = PowerShellEncoding.GetEncoding(this, Encoding); + Encoding resolvedEncoding = EncodingUtils.GetEncoding(this, Encoding); inputBytes = resolvedEncoding.GetBytes(inputString); } diff --git a/src/Microsoft.PowerShell.Commands.Utility/commands/utility/ImplicitRemotingCommands.cs b/src/Microsoft.PowerShell.Commands.Utility/commands/utility/ImplicitRemotingCommands.cs index 359f73fe089..cf3f4082ba4 100644 --- a/src/Microsoft.PowerShell.Commands.Utility/commands/utility/ImplicitRemotingCommands.cs +++ b/src/Microsoft.PowerShell.Commands.Utility/commands/utility/ImplicitRemotingCommands.cs @@ -132,7 +132,7 @@ protected override void BeginProcessing() List generatedFiles = GenerateProxyModule( tempDirectory, Path.GetFileName(directory.FullName), - PowerShellEncoding.GetEncoding(this, Encoding), + EncodingUtils.GetEncoding(this, Encoding), _force, listOfCommandMetadata, alias2resolvedCommandName, diff --git a/src/Microsoft.PowerShell.Commands.Utility/commands/utility/MatchString.cs b/src/Microsoft.PowerShell.Commands.Utility/commands/utility/MatchString.cs index 809eebf0d2e..cd64a8176f1 100644 --- a/src/Microsoft.PowerShell.Commands.Utility/commands/utility/MatchString.cs +++ b/src/Microsoft.PowerShell.Commands.Utility/commands/utility/MatchString.cs @@ -1274,7 +1274,7 @@ protected override void BeginProcessing() // Process encoding switch. if (Encoding != FileEncoding.Unspecified ) { - _textEncoding = PowerShellEncoding.GetEncoding(this, Encoding); + _textEncoding = EncodingUtils.GetEncoding(this, Encoding); } else { diff --git a/src/Microsoft.PowerShell.Commands.Utility/commands/utility/Send-MailMessage.cs b/src/Microsoft.PowerShell.Commands.Utility/commands/utility/Send-MailMessage.cs index 13a316a678c..d391780c29b 100644 --- a/src/Microsoft.PowerShell.Commands.Utility/commands/utility/Send-MailMessage.cs +++ b/src/Microsoft.PowerShell.Commands.Utility/commands/utility/Send-MailMessage.cs @@ -494,7 +494,7 @@ protected override void EndProcessing() /// /// To make it easier to specify -Encoding parameter, we add an ArgumentTransformationAttribute here. /// When the input data is of type string and is valid to be converted to System.Text.Encoding - /// via PowerShellEncoding.GetEncoding(), we do the conversion and return the converted value. + /// via EncodingUtils.GetEncoding(), we do the conversion and return the converted value. /// Otherwise, we just return the input data. /// internal sealed class ArgumentToEncodingNameTransformationAttribute : ArgumentTransformationAttribute @@ -504,7 +504,7 @@ public override object Transform(EngineIntrinsics engineIntrinsics, object input FileEncoding encoding; if (LanguagePrimitives.TryConvertTo(inputData, out encoding)) { - return PowerShellEncoding.GetEncoding(encoding); + return EncodingUtils.GetEncoding(encoding); } return inputData; } diff --git a/src/System.Management.Automation/engine/ExternalScriptInfo.cs b/src/System.Management.Automation/engine/ExternalScriptInfo.cs index e7779b9467b..eb670e6b7bb 100644 --- a/src/System.Management.Automation/engine/ExternalScriptInfo.cs +++ b/src/System.Management.Automation/engine/ExternalScriptInfo.cs @@ -468,7 +468,7 @@ private void ReadScriptContents() { using (FileStream readerStream = new FileStream(_path, FileMode.Open, FileAccess.Read)) { - Encoding defaultEncoding = PowerShellEncoding.GetDefaultEncoding(); + Encoding defaultEncoding = EncodingUtils.GetDefaultEncoding(); Microsoft.Win32.SafeHandles.SafeFileHandle safeFileHandle = readerStream.SafeFileHandle; using (StreamReader scriptReader = new StreamReader(readerStream, defaultEncoding)) diff --git a/src/System.Management.Automation/engine/Modules/ScriptAnalysis.cs b/src/System.Management.Automation/engine/Modules/ScriptAnalysis.cs index 12ebc9d9b7c..7c87f828aca 100644 --- a/src/System.Management.Automation/engine/Modules/ScriptAnalysis.cs +++ b/src/System.Management.Automation/engine/Modules/ScriptAnalysis.cs @@ -96,7 +96,7 @@ internal static string ReadScript(string path) { using (FileStream readerStream = new FileStream(path, FileMode.Open, FileAccess.Read)) { - Encoding defaultEncoding = PowerShellEncoding.GetDefaultEncoding(); + Encoding defaultEncoding = EncodingUtils.GetDefaultEncoding(); Microsoft.Win32.SafeHandles.SafeFileHandle safeFileHandle = readerStream.SafeFileHandle; using (StreamReader scriptReader = new StreamReader(readerStream, defaultEncoding)) diff --git a/src/System.Management.Automation/engine/hostifaces/MshHostUserInterface.cs b/src/System.Management.Automation/engine/hostifaces/MshHostUserInterface.cs index f3f069a55f5..f3a353b5390 100644 --- a/src/System.Management.Automation/engine/hostifaces/MshHostUserInterface.cs +++ b/src/System.Management.Automation/engine/hostifaces/MshHostUserInterface.cs @@ -1071,11 +1071,11 @@ internal string Path _path = value; Encoding = Encoding.UTF8; - FileEncoding fileEncoding = PowerShellEncoding.GetFileEncodingFromFile(value); + FileEncoding fileEncoding = EncodingUtils.GetFileEncodingFromFile(value); if (fileEncoding != FileEncoding.Default) { - Encoding = PowerShellEncoding.GetEncoding(fileEncoding); + Encoding = EncodingUtils.GetEncoding(fileEncoding); } } } diff --git a/src/System.Management.Automation/engine/remoting/common/RunspaceConnectionInfo.cs b/src/System.Management.Automation/engine/remoting/common/RunspaceConnectionInfo.cs index db5ea0aea46..b7748feb751 100644 --- a/src/System.Management.Automation/engine/remoting/common/RunspaceConnectionInfo.cs +++ b/src/System.Management.Automation/engine/remoting/common/RunspaceConnectionInfo.cs @@ -2180,20 +2180,20 @@ internal static int StartSSHProcess( { Debug.Assert(stdinFd >= 0, "Invalid Fd"); standardInput = new StreamWriter(OpenStream(stdinFd, FileAccess.Write), - PowerShellEncoding.utf8NoBom, StreamBufferSize) + EncodingUtils.utf8NoBom, StreamBufferSize) { AutoFlush = true }; } if (startInfo.RedirectStandardOutput) { Debug.Assert(stdoutFd >= 0, "Invalid Fd"); standardOutput = new StreamReader(OpenStream(stdoutFd, FileAccess.Read), - startInfo.StandardOutputEncoding ?? PowerShellEncoding.utf8NoBom, true, StreamBufferSize); + startInfo.StandardOutputEncoding ?? EncodingUtils.utf8NoBom, true, StreamBufferSize); } if (startInfo.RedirectStandardError) { Debug.Assert(stderrFd >= 0, "Invalid Fd"); standardError = new StreamReader(OpenStream(stderrFd, FileAccess.Read), - startInfo.StandardErrorEncoding ?? PowerShellEncoding.utf8NoBom, true, StreamBufferSize); + startInfo.StandardErrorEncoding ?? EncodingUtils.utf8NoBom, true, StreamBufferSize); } return childPid; diff --git a/src/System.Management.Automation/namespaces/FileSystemContentStream.cs b/src/System.Management.Automation/namespaces/FileSystemContentStream.cs index dbf5f83a558..b688e93b9f3 100644 --- a/src/System.Management.Automation/namespaces/FileSystemContentStream.cs +++ b/src/System.Management.Automation/namespaces/FileSystemContentStream.cs @@ -797,7 +797,7 @@ private bool ReadByteEncoded(bool waitChanges, ArrayList blocks, bool readBack) // the changes if (waitChanges) { - WaitForChanges(_path, _mode, _access, _share, PowerShellEncoding.GetDefaultEncoding()); + WaitForChanges(_path, _mode, _access, _share, EncodingUtils.GetDefaultEncoding()); byteRead = _stream.ReadByte(); } } @@ -1161,8 +1161,8 @@ internal FileStreamBackReader(FileStream fileStream, Encoding encoding) _currentPosition = _stream.Position; // Get the oem encoding and system current ANSI code page - _oemEncoding = PowerShellEncoding.GetEncoding(FileEncoding.Oem); - _defaultAnsiEncoding = PowerShellEncoding.GetEncoding(FileEncoding.Default); + _oemEncoding = EncodingUtils.GetEncoding(FileEncoding.Oem); + _defaultAnsiEncoding = EncodingUtils.GetEncoding(FileEncoding.Default); } } diff --git a/src/System.Management.Automation/namespaces/FileSystemProvider.cs b/src/System.Management.Automation/namespaces/FileSystemProvider.cs index fb96f7f5e58..4a5c855b889 100644 --- a/src/System.Management.Automation/namespaces/FileSystemProvider.cs +++ b/src/System.Management.Automation/namespaces/FileSystemProvider.cs @@ -6558,7 +6558,7 @@ public IContentReader GetContentReader(string path) // Defaults for the file read operation string delimiter = "\n"; - Encoding encoding = PowerShellEncoding.GetDefaultEncoding(); + Encoding encoding = EncodingUtils.GetDefaultEncoding(); bool waitForChanges = false; bool streamTypeSpecified = false; @@ -6593,7 +6593,7 @@ public IContentReader GetContentReader(string path) if (streamTypeSpecified) { - encoding = PowerShellEncoding.GetProviderEncoding(this, dynParams.Encoding); + encoding = EncodingUtils.GetProviderEncoding(this, dynParams.Encoding); } // Get the wait value @@ -6722,7 +6722,7 @@ public IContentWriter GetContentWriter(string path) bool usingByteEncoding = false; bool streamTypeSpecified = false; // we need to discover the encoding - Encoding encoding = PowerShellEncoding.GetProviderEncoding(this, FileEncoding.Unspecified); + Encoding encoding = EncodingUtils.GetProviderEncoding(this, FileEncoding.Unspecified); FileMode filemode = FileMode.OpenOrCreate; string streamName = null; bool suppressNewline = false; @@ -6741,7 +6741,7 @@ public IContentWriter GetContentWriter(string path) if (streamTypeSpecified) { - encoding = PowerShellEncoding.GetProviderEncoding(this, dynParams.Encoding); + encoding = EncodingUtils.GetProviderEncoding(this, dynParams.Encoding); } streamName = dynParams.Stream; diff --git a/src/System.Management.Automation/utils/ClrFacade.cs b/src/System.Management.Automation/utils/ClrFacade.cs index 6ee0086d4fa..f8b15ad2c50 100644 --- a/src/System.Management.Automation/utils/ClrFacade.cs +++ b/src/System.Management.Automation/utils/ClrFacade.cs @@ -114,7 +114,7 @@ internal static Encoding GetDefaultEncoding() EncodingRegisterProvider(); uint currentAnsiCp = NativeMethods.GetACP(); - s_defaultEncoding = Encoding.GetEncoding((int)currentAnsiCp); + s_defaultEncoding = EncodingUtils.GetDefaultEncoding(); #endif } return s_defaultEncoding; @@ -136,11 +136,11 @@ internal static Encoding GetOEMEncoding() { #if UNIX // PowerShell Core on Unix s_oemEncoding = GetDefaultEncoding(); -#else // PowerShell Core on Windows +#else // PowerShell Core on Windows, which needs provider registration EncodingRegisterProvider(); uint oemCp = NativeMethods.GetOEMCP(); - s_oemEncoding = Encoding.GetEncoding((int)oemCp); + s_oemEncoding = EncodingUtils.GetDefaultEncoding(); #endif } return s_oemEncoding; @@ -275,7 +275,7 @@ private static SecurityZone ReadFromZoneIdentifierDataStream(string filePath) FileAccess.Read, FileShare.Read); // If we successfully get the zone data stream, try to read the ZoneId information - // use the method in this class not PowerShellEncoding. + // use the method in this class not EncodingUtils. using (StreamReader zoneDataReader = new StreamReader(zoneDataSteam, GetDefaultEncoding())) { string line = null; diff --git a/src/System.Management.Automation/utils/Encoding.cs b/src/System.Management.Automation/utils/Encoding.cs index f1196dcd58c..df29f12b34a 100644 --- a/src/System.Management.Automation/utils/Encoding.cs +++ b/src/System.Management.Automation/utils/Encoding.cs @@ -96,7 +96,7 @@ public enum FileEncoding /// /// the helper class for determining encodings for PowerShell /// - public static class PowerShellEncoding + public static class EncodingUtils { /// @@ -232,7 +232,7 @@ public static Encoding GetEncoding(Cmdlet cmdlet, FileEncoding encoding) /// The path to a file to inspect for an encoding /// System.Text.Encoding /// - public static FileEncoding GetFileEncodingFromFile(string path) + internal static FileEncoding GetFileEncodingFromFile(string path) { if (!File.Exists(path)) { diff --git a/src/System.Management.Automation/utils/PathUtils.cs b/src/System.Management.Automation/utils/PathUtils.cs index 416d6063263..8b90b83ec83 100644 --- a/src/System.Management.Automation/utils/PathUtils.cs +++ b/src/System.Management.Automation/utils/PathUtils.cs @@ -48,7 +48,7 @@ internal static void MasterStreamOpen( bool isLiteralPath ) { - Encoding resolvedEncoding = PowerShellEncoding.GetEncoding(cmdlet, encoding); + Encoding resolvedEncoding = EncodingUtils.GetEncoding(cmdlet, encoding); MasterStreamOpen(cmdlet, filePath, resolvedEncoding, defaultEncoding, Append, Force, NoClobber, out fileStream, out streamWriter, out readOnlyFileInfo, isLiteralPath); } @@ -193,7 +193,7 @@ internal static void ReportFileOpenFailure(Cmdlet cmdlet, string filePath, Excep internal static StreamReader OpenStreamReader(PSCmdlet command, string filePath, FileEncoding encoding, bool isLiteralPath) { FileStream fileStream = OpenFileStream(filePath, command, isLiteralPath); - return new StreamReader(fileStream, PowerShellEncoding.GetEncoding(command, encoding)); + return new StreamReader(fileStream, EncodingUtils.GetEncoding(command, encoding)); } internal static FileStream OpenFileStream(string filePath, PSCmdlet command, bool isLiteralPath) From 8a0765eeaa0b18a95f9cd4ab9e6c5121a5fcfa96 Mon Sep 17 00:00:00 2001 From: James Truher Date: Wed, 26 Jul 2017 15:35:28 -0700 Subject: [PATCH 13/14] update PowerShellEncoding to use new name EncodingUtils --- test/powershell/engine/Encoding.Tests.ps1 | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/test/powershell/engine/Encoding.Tests.ps1 b/test/powershell/engine/Encoding.Tests.ps1 index 804b0e8a20b..ffebae9a42d 100644 --- a/test/powershell/engine/Encoding.Tests.ps1 +++ b/test/powershell/engine/Encoding.Tests.ps1 @@ -30,7 +30,7 @@ Describe "Encoding classes and methods are available" -Tag CI { function Get-NewLineBytes { param ( [Microsoft.PowerShell.FileEncoding]$encoding ) - $encoder = [Microsoft.PowerShell.PowerShellEncoding]::GetEncoding($encoding) + $encoder = [Microsoft.PowerShell.EncodingUtils]::GetEncoding($encoding) $encoder.GetBytes([Environment]::NewLine) -Join "-" } @@ -58,7 +58,7 @@ Describe "Encoding classes and methods are available" -Tag CI { Byte = "255-254-116-0-233-0-115-0-116-0-" + (Get-NewLineBytes Byte) Default = "116-195-169-115-116-" + (Get-NewLineBytes Default) # Oem encoding can change depending on system, calculate the expected string - Oem = ([Microsoft.PowerShell.PowerShellEncoding]::GetEncoding("Oem").GetBytes($testString) -join "-") + "-" + (Get-NewLineBytes Oem) + Oem = ([Microsoft.PowerShell.EncodingUtils]::GetEncoding("Oem").GetBytes($testString) -join "-") + "-" + (Get-NewLineBytes Oem) String = "255-254-116-0-233-0-115-0-116-0-" + (Get-NewLineBytes String) UTF32 = "255-254-0-0-116-0-0-0-233-0-0-0-115-0-0-0-116-0-0-0-" + (Get-NewLineBytes UTF32) UTF7 = "116-43-65-79-107-45-115-116-" + (Get-NewLineBytes UTF7) @@ -98,7 +98,7 @@ Describe "Encoding classes and methods are available" -Tag CI { It "Encoding for '' should have correct preamble ''" -TestCase $preambleTests { param ( $Encoding, $Preamble ) - [Microsoft.PowerShell.PowerShellEncoding]::GetEncoding($Encoding).GetPreamble() -Join "-" | Should be $Preamble + [Microsoft.PowerShell.EncodingUtils]::GetEncoding($Encoding).GetPreamble() -Join "-" | Should be $Preamble } It "Encoding for '' should create file with proper encoding" -TestCase $contentTests { @@ -128,7 +128,7 @@ Describe "Encoding classes and methods are available" -Tag CI { } It "When session state is null, GetEncodingPreference returns unspecified" { - [Microsoft.PowerShell.PowerShellEncoding]::GetEncodingPreference($null) | should be "unspecified" + [Microsoft.PowerShell.EncodingUtils]::GetEncodingPreference($null) | should be "unspecified" } Context "GetFileEncodingFromFile tests" { @@ -138,7 +138,6 @@ Describe "Encoding classes and methods are available" -Tag CI { @{ Encoding = "UTF32"; Text = $testString; FilePath = $testFile }, @{ Encoding = "BigEndianUTF32"; Text = $testString; FilePath = $testFile }, @{ Encoding = "UTF8Bom"; Text = $testString; FilePath = $testFile }, - @{ Encoding = "Byte"; Text = [byte[]](20..40); FilePath = $testFile }, @{ Encoding = "UTF8NoBom"; Text = ""; FilePath = $testFile }, @{ Encoding = "Default"; Text = ""; FilePath = "$TESTDRIVE/ThisFileCouldNotPossiblyExist" } } @@ -147,7 +146,7 @@ Describe "Encoding classes and methods are available" -Tag CI { param ( $Encoding, $Text, $FilePath ) # I need a way to not open the right file to test the missing file scenario $Text | set-content -encoding $Encoding $testFile - [Microsoft.PowerShell.PowerShellEncoding]::GetFileEncodingFromFile($FilePath) | should be $encoding + get-content $testFile | should be $Text } } From ff24e22a4f9ad45a930d2f4a6da17ceedcc89205 Mon Sep 17 00:00:00 2001 From: James Truher Date: Thu, 27 Jul 2017 12:59:11 -0700 Subject: [PATCH 14/14] Use new class name EncodingUtils --- test/powershell/Language/Parser/RedirectionOperator.Tests.ps1 | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/powershell/Language/Parser/RedirectionOperator.Tests.ps1 b/test/powershell/Language/Parser/RedirectionOperator.Tests.ps1 index f2bc3c10307..284e88d381c 100644 --- a/test/powershell/Language/Parser/RedirectionOperator.Tests.ps1 +++ b/test/powershell/Language/Parser/RedirectionOperator.Tests.ps1 @@ -31,7 +31,7 @@ Describe "Redirection operator now supports encoding changes" -Tags "CI" { It "If encoding is unset, redirection should be platform appropriate" { $asciiString > TESTDRIVE:\file.txt - $encoder = [Microsoft.PowerShell.PowerShellEncoding]::GetEncoding("utf8nobom") + $encoder = [Microsoft.PowerShell.EncodingUtils]::GetEncoding("utf8nobom") $bytes = get-content -encoding byte TESTDRIVE:\file.txt # create the expected $BOM = $encoder.GetPreamble() @@ -59,7 +59,7 @@ Describe "Redirection operator now supports encoding changes" -Tags "CI" { # and out-file has its own translation, so we'll # not do that logic here, but simply ignore those encodings # as they eventually are translated to "real" encoding - $enc = [Microsoft.PowerShell.PowerShellEncoding]::GetEncoding($encoding) + $enc = [Microsoft.PowerShell.EncodingUtils]::GetEncoding($encoding) if ( $enc ) { $msg = "Overriding encoding for out-file is respected for $encoding"