Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Appearance settings

Commit 59eed8a

Browse filesBrowse files
committed
Auto merge of rust-lang#90460 - pietroalbini:bidi-stable, r=nikomatsakis,pietroalbini
[stable] Fix CVE-2021-42574 and prepare Rust 1.56.1 This PR implements new lints to mitigate the impact of [CVE-2021-42574], caused by the presence of bidirectional-override Unicode codepoints in the compiled source code. [See the advisory][advisory] for more information about the vulnerability. The changes in this PR will be released later today as part of Rust 1.56.1. [CVE-2021-42574]: https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-42574 [advisory]: https://blog.rust-lang.org/2021/11/01/cve-2021-42574.html
2 parents 09c42c4 + 6552f7a commit 59eed8a
Copy full SHA for 59eed8a

File tree

Expand file treeCollapse file tree

15 files changed

+545
-11
lines changed
Filter options
Expand file treeCollapse file tree

15 files changed

+545
-11
lines changed

‎Cargo.lock

Copy file name to clipboardExpand all lines: Cargo.lock
+1Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4101,6 +4101,7 @@ dependencies = [
41014101
"rustc_span",
41024102
"tracing",
41034103
"unicode-normalization",
4104+
"unicode-width",
41044105
]
41054106

41064107
[[package]]

‎RELEASES.md

Copy file name to clipboardExpand all lines: RELEASES.md
+8Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,11 @@
1+
Version 1.56.1 (2021-11-01)
2+
===========================
3+
4+
- New lints to detect the presence of bidirectional-override Unicode
5+
codepoints in the compiled source code ([CVE-2021-42574])
6+
7+
[CVE-2021-42574]: https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-42574
8+
19
Version 1.56.0 (2021-10-21)
210
========================
311

‎compiler/rustc_errors/src/emitter.rs

Copy file name to clipboardExpand all lines: compiler/rustc_errors/src/emitter.rs
+19-1Lines changed: 19 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2054,8 +2054,26 @@ fn num_decimal_digits(num: usize) -> usize {
20542054
MAX_DIGITS
20552055
}
20562056

2057+
// We replace some characters so the CLI output is always consistent and underlines aligned.
2058+
const OUTPUT_REPLACEMENTS: &[(char, &str)] = &[
2059+
('\t', " "), // We do our own tab replacement
2060+
('\u{202A}', ""), // The following unicode text flow control characters are inconsistently
2061+
('\u{202B}', ""), // supported accross CLIs and can cause confusion due to the bytes on disk
2062+
('\u{202D}', ""), // not corresponding to the visible source code, so we replace them always.
2063+
('\u{202E}', ""),
2064+
('\u{2066}', ""),
2065+
('\u{2067}', ""),
2066+
('\u{2068}', ""),
2067+
('\u{202C}', ""),
2068+
('\u{2069}', ""),
2069+
];
2070+
20572071
fn replace_tabs(str: &str) -> String {
2058-
str.replace('\t', " ")
2072+
let mut s = str.to_string();
2073+
for (c, replacement) in OUTPUT_REPLACEMENTS {
2074+
s = s.replace(*c, replacement);
2075+
}
2076+
s
20592077
}
20602078

20612079
fn draw_col_separator(buffer: &mut StyledBuffer, line: usize, col: usize) {

‎compiler/rustc_lint/src/context.rs

Copy file name to clipboardExpand all lines: compiler/rustc_lint/src/context.rs
+38-1Lines changed: 38 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
1717
use self::TargetLint::*;
1818

19+
use crate::hidden_unicode_codepoints::UNICODE_TEXT_FLOW_CHARS;
1920
use crate::levels::{is_known_lint_tool, LintLevelsBuilder};
2021
use crate::passes::{EarlyLintPassObject, LateLintPassObject};
2122
use rustc_ast as ast;
@@ -40,7 +41,7 @@ use rustc_session::lint::{FutureIncompatibleInfo, Level, Lint, LintBuffer, LintI
4041
use rustc_session::Session;
4142
use rustc_session::SessionLintStore;
4243
use rustc_span::lev_distance::find_best_match_for_name;
43-
use rustc_span::{symbol::Symbol, MultiSpan, Span, DUMMY_SP};
44+
use rustc_span::{symbol::Symbol, BytePos, MultiSpan, Span, DUMMY_SP};
4445
use rustc_target::abi::{self, LayoutOf};
4546
use tracing::debug;
4647

@@ -612,6 +613,42 @@ pub trait LintContext: Sized {
612613
// Now, set up surrounding context.
613614
let sess = self.sess();
614615
match diagnostic {
616+
BuiltinLintDiagnostics::UnicodeTextFlow(span, content) => {
617+
let spans: Vec<_> = content
618+
.char_indices()
619+
.filter_map(|(i, c)| {
620+
UNICODE_TEXT_FLOW_CHARS.contains(&c).then(|| {
621+
let lo = span.lo() + BytePos(2 + i as u32);
622+
(c, span.with_lo(lo).with_hi(lo + BytePos(c.len_utf8() as u32)))
623+
})
624+
})
625+
.collect();
626+
let (an, s) = match spans.len() {
627+
1 => ("an ", ""),
628+
_ => ("", "s"),
629+
};
630+
db.span_label(span, &format!(
631+
"this comment contains {}invisible unicode text flow control codepoint{}",
632+
an,
633+
s,
634+
));
635+
for (c, span) in &spans {
636+
db.span_label(*span, format!("{:?}", c));
637+
}
638+
db.note(
639+
"these kind of unicode codepoints change the way text flows on \
640+
applications that support them, but can cause confusion because they \
641+
change the order of characters on the screen",
642+
);
643+
if !spans.is_empty() {
644+
db.multipart_suggestion_with_style(
645+
"if their presence wasn't intentional, you can remove them",
646+
spans.into_iter().map(|(_, span)| (span, "".to_string())).collect(),
647+
Applicability::MachineApplicable,
648+
SuggestionStyle::HideCodeAlways,
649+
);
650+
}
651+
},
615652
BuiltinLintDiagnostics::Normal => (),
616653
BuiltinLintDiagnostics::BareTraitObject(span, is_global) => {
617654
let (sugg, app) = match sess.source_map().span_to_snippet(span) {
+161Lines changed: 161 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,161 @@
1+
use crate::{EarlyContext, EarlyLintPass, LintContext};
2+
use rustc_ast as ast;
3+
use rustc_errors::{Applicability, SuggestionStyle};
4+
use rustc_span::{BytePos, Span, Symbol};
5+
6+
declare_lint! {
7+
/// The `text_direction_codepoint_in_literal` lint detects Unicode codepoints that change the
8+
/// visual representation of text on screen in a way that does not correspond to their on
9+
/// memory representation.
10+
///
11+
/// ### Explanation
12+
///
13+
/// The unicode characters `\u{202A}`, `\u{202B}`, `\u{202D}`, `\u{202E}`, `\u{2066}`,
14+
/// `\u{2067}`, `\u{2068}`, `\u{202C}` and `\u{2069}` make the flow of text on screen change
15+
/// its direction on software that supports these codepoints. This makes the text "abc" display
16+
/// as "cba" on screen. By leveraging software that supports these, people can write specially
17+
/// crafted literals that make the surrounding code seem like it's performing one action, when
18+
/// in reality it is performing another. Because of this, we proactively lint against their
19+
/// presence to avoid surprises.
20+
///
21+
/// ### Example
22+
///
23+
/// ```rust,compile_fail
24+
/// #![deny(text_direction_codepoint_in_literal)]
25+
/// fn main() {
26+
/// println!("{:?}", '‮');
27+
/// }
28+
/// ```
29+
///
30+
/// {{produces}}
31+
///
32+
pub TEXT_DIRECTION_CODEPOINT_IN_LITERAL,
33+
Deny,
34+
"detect special Unicode codepoints that affect the visual representation of text on screen, \
35+
changing the direction in which text flows",
36+
}
37+
38+
declare_lint_pass!(HiddenUnicodeCodepoints => [TEXT_DIRECTION_CODEPOINT_IN_LITERAL]);
39+
40+
crate const UNICODE_TEXT_FLOW_CHARS: &[char] = &[
41+
'\u{202A}', '\u{202B}', '\u{202D}', '\u{202E}', '\u{2066}', '\u{2067}', '\u{2068}', '\u{202C}',
42+
'\u{2069}',
43+
];
44+
45+
impl HiddenUnicodeCodepoints {
46+
fn lint_text_direction_codepoint(
47+
&self,
48+
cx: &EarlyContext<'_>,
49+
text: Symbol,
50+
span: Span,
51+
padding: u32,
52+
point_at_inner_spans: bool,
53+
label: &str,
54+
) {
55+
// Obtain the `Span`s for each of the forbidden chars.
56+
let spans: Vec<_> = text
57+
.as_str()
58+
.char_indices()
59+
.filter_map(|(i, c)| {
60+
UNICODE_TEXT_FLOW_CHARS.contains(&c).then(|| {
61+
let lo = span.lo() + BytePos(i as u32 + padding);
62+
(c, span.with_lo(lo).with_hi(lo + BytePos(c.len_utf8() as u32)))
63+
})
64+
})
65+
.collect();
66+
67+
cx.struct_span_lint(TEXT_DIRECTION_CODEPOINT_IN_LITERAL, span, |lint| {
68+
let mut err = lint.build(&format!(
69+
"unicode codepoint changing visible direction of text present in {}",
70+
label
71+
));
72+
let (an, s) = match spans.len() {
73+
1 => ("an ", ""),
74+
_ => ("", "s"),
75+
};
76+
err.span_label(
77+
span,
78+
&format!(
79+
"this {} contains {}invisible unicode text flow control codepoint{}",
80+
label, an, s,
81+
),
82+
);
83+
if point_at_inner_spans {
84+
for (c, span) in &spans {
85+
err.span_label(*span, format!("{:?}", c));
86+
}
87+
}
88+
err.note(
89+
"these kind of unicode codepoints change the way text flows on applications that \
90+
support them, but can cause confusion because they change the order of \
91+
characters on the screen",
92+
);
93+
if point_at_inner_spans && !spans.is_empty() {
94+
err.multipart_suggestion_with_style(
95+
"if their presence wasn't intentional, you can remove them",
96+
spans.iter().map(|(_, span)| (*span, "".to_string())).collect(),
97+
Applicability::MachineApplicable,
98+
SuggestionStyle::HideCodeAlways,
99+
);
100+
err.multipart_suggestion(
101+
"if you want to keep them but make them visible in your source code, you can \
102+
escape them",
103+
spans
104+
.into_iter()
105+
.map(|(c, span)| {
106+
let c = format!("{:?}", c);
107+
(span, c[1..c.len() - 1].to_string())
108+
})
109+
.collect(),
110+
Applicability::MachineApplicable,
111+
);
112+
} else {
113+
// FIXME: in other suggestions we've reversed the inner spans of doc comments. We
114+
// should do the same here to provide the same good suggestions as we do for
115+
// literals above.
116+
err.note("if their presence wasn't intentional, you can remove them");
117+
err.note(&format!(
118+
"if you want to keep them but make them visible in your source code, you can \
119+
escape them: {}",
120+
spans
121+
.into_iter()
122+
.map(|(c, _)| { format!("{:?}", c) })
123+
.collect::<Vec<String>>()
124+
.join(", "),
125+
));
126+
}
127+
err.emit();
128+
});
129+
}
130+
}
131+
impl EarlyLintPass for HiddenUnicodeCodepoints {
132+
fn check_attribute(&mut self, cx: &EarlyContext<'_>, attr: &ast::Attribute) {
133+
if let ast::AttrKind::DocComment(_, comment) = attr.kind {
134+
if comment.as_str().contains(UNICODE_TEXT_FLOW_CHARS) {
135+
self.lint_text_direction_codepoint(cx, comment, attr.span, 0, false, "doc comment");
136+
}
137+
}
138+
}
139+
140+
fn check_expr(&mut self, cx: &EarlyContext<'_>, expr: &ast::Expr) {
141+
// byte strings are already handled well enough by `EscapeError::NonAsciiCharInByteString`
142+
let (text, span, padding) = match &expr.kind {
143+
ast::ExprKind::Lit(ast::Lit { token, kind, span }) => {
144+
let text = token.symbol;
145+
if !text.as_str().contains(UNICODE_TEXT_FLOW_CHARS) {
146+
return;
147+
}
148+
let padding = match kind {
149+
// account for `"` or `'`
150+
ast::LitKind::Str(_, ast::StrStyle::Cooked) | ast::LitKind::Char(_) => 1,
151+
// account for `r###"`
152+
ast::LitKind::Str(_, ast::StrStyle::Raw(val)) => *val as u32 + 2,
153+
_ => return,
154+
};
155+
(text, span, padding)
156+
}
157+
_ => return,
158+
};
159+
self.lint_text_direction_codepoint(cx, text, *span, padding, true, "literal");
160+
}
161+
}

‎compiler/rustc_lint/src/lib.rs

Copy file name to clipboardExpand all lines: compiler/rustc_lint/src/lib.rs
+3Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,7 @@ mod array_into_iter;
4848
pub mod builtin;
4949
mod context;
5050
mod early;
51+
pub mod hidden_unicode_codepoints;
5152
mod internal;
5253
mod late;
5354
mod levels;
@@ -77,6 +78,7 @@ use rustc_span::Span;
7778

7879
use array_into_iter::ArrayIntoIter;
7980
use builtin::*;
81+
use hidden_unicode_codepoints::*;
8082
use internal::*;
8183
use methods::*;
8284
use non_ascii_idents::*;
@@ -128,6 +130,7 @@ macro_rules! early_lint_passes {
128130
DeprecatedAttr: DeprecatedAttr::new(),
129131
WhileTrue: WhileTrue,
130132
NonAsciiIdents: NonAsciiIdents,
133+
HiddenUnicodeCodepoints: HiddenUnicodeCodepoints,
131134
IncompleteFeatures: IncompleteFeatures,
132135
RedundantSemicolons: RedundantSemicolons,
133136
UnusedDocComment: UnusedDocComment,

‎compiler/rustc_lint_defs/src/builtin.rs

Copy file name to clipboardExpand all lines: compiler/rustc_lint_defs/src/builtin.rs
+28Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3416,3 +3416,31 @@ declare_lint! {
34163416
Warn,
34173417
"`break` expression with label and unlabeled loop as value expression"
34183418
}
3419+
3420+
declare_lint! {
3421+
/// The `text_direction_codepoint_in_comment` lint detects Unicode codepoints in comments that
3422+
/// change the visual representation of text on screen in a way that does not correspond to
3423+
/// their on memory representation.
3424+
///
3425+
/// ### Example
3426+
///
3427+
/// ```rust,compile_fail
3428+
/// #![deny(text_direction_codepoint_in_comment)]
3429+
/// fn main() {
3430+
/// println!("{:?}"); // '‮');
3431+
/// }
3432+
/// ```
3433+
///
3434+
/// {{produces}}
3435+
///
3436+
/// ### Explanation
3437+
///
3438+
/// Unicode allows changing the visual flow of text on screen in order to support scripts that
3439+
/// are written right-to-left, but a specially crafted comment can make code that will be
3440+
/// compiled appear to be part of a comment, depending on the software used to read the code.
3441+
/// To avoid potential problems or confusion, such as in CVE-2021-42574, by default we deny
3442+
/// their use.
3443+
pub TEXT_DIRECTION_CODEPOINT_IN_COMMENT,
3444+
Deny,
3445+
"invisible directionality-changing codepoints in comment"
3446+
}

‎compiler/rustc_lint_defs/src/lib.rs

Copy file name to clipboardExpand all lines: compiler/rustc_lint_defs/src/lib.rs
+1Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -306,6 +306,7 @@ pub enum BuiltinLintDiagnostics {
306306
TrailingMacro(bool, Ident),
307307
BreakWithLabelAndLoop(Span),
308308
NamedAsmLabel(String),
309+
UnicodeTextFlow(Span, String),
309310
}
310311

311312
/// Lints that are buffered up early on in the `Session` before the

‎compiler/rustc_parse/Cargo.toml

Copy file name to clipboardExpand all lines: compiler/rustc_parse/Cargo.toml
+1Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,3 +18,4 @@ rustc_session = { path = "../rustc_session" }
1818
rustc_span = { path = "../rustc_span" }
1919
rustc_ast = { path = "../rustc_ast" }
2020
unicode-normalization = "0.1.11"
21+
unicode-width = "0.1.4"

0 commit comments

Comments
0 (0)
Morty Proxy This is a proxified and sanitized view of the page, visit original site.