Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Appearance settings

Commit 9cb1111

Browse filesBrowse files
authored
MAINT Cython linting (#25861)
1 parent 42c2731 commit 9cb1111
Copy full SHA for 9cb1111

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.
Dismiss banner

53 files changed

+477
-423
lines changed

‎.circleci/config.yml

Copy file name to clipboardExpand all lines: .circleci/config.yml
+1-1Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ jobs:
1111
command: |
1212
source build_tools/shared.sh
1313
# Include pytest compatibility with mypy
14-
pip install pytest flake8 $(get_dep mypy min) $(get_dep black min)
14+
pip install pytest flake8 $(get_dep mypy min) $(get_dep black min) cython-lint
1515
- run:
1616
name: linting
1717
command: ./build_tools/linting.sh

‎.pre-commit-config.yaml

Copy file name to clipboardExpand all lines: .pre-commit-config.yaml
+6Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,3 +20,9 @@ repos:
2020
- id: mypy
2121
files: sklearn/
2222
additional_dependencies: [pytest==6.2.4]
23+
- repo: https://github.com/MarcoGorelli/cython-lint
24+
rev: v0.15.0
25+
hooks:
26+
# TODO: add the double-quote-cython-strings hook when it's usability has improved:
27+
# possibility to pass a directory and use it as a check instead of auto-formatter.
28+
- id: cython-lint

‎azure-pipelines.yml

Copy file name to clipboardExpand all lines: azure-pipelines.yml
+1-1Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@ jobs:
3535
- bash: |
3636
source build_tools/shared.sh
3737
# Include pytest compatibility with mypy
38-
pip install pytest flake8 $(get_dep mypy min) $(get_dep black min)
38+
pip install pytest flake8 $(get_dep mypy min) $(get_dep black min) cython-lint
3939
displayName: Install linters
4040
- bash: |
4141
./build_tools/linting.sh

‎build_tools/linting.sh

Copy file name to clipboardExpand all lines: build_tools/linting.sh
+3Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,9 @@ echo -e "No problem detected by flake8\n"
1313
mypy sklearn/
1414
echo -e "No problem detected by mypy\n"
1515

16+
cython-lint sklearn/
17+
echo -e "No problem detected by cython-lint\n"
18+
1619
# For docstrings and warnings of deprecated attributes to be rendered
1720
# properly, the property decorator must come before the deprecated decorator
1821
# (else they are treated as functions)

‎pyproject.toml

Copy file name to clipboardExpand all lines: pyproject.toml
+46Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,3 +38,49 @@ exclude = '''
3838
| asv_benchmarks/env
3939
)/
4040
'''
41+
42+
[tool.cython-lint]
43+
# Ignore the same error codes as flake8
44+
# + E501 (line too long) because keeping it < 88 in cython
45+
# often makes code less readable.
46+
ignore = [
47+
# check ignored by default in flake8. Meaning unclear.
48+
'E24',
49+
# space before : (needed for how black formats slicing)
50+
'E203',
51+
# line too long
52+
'E501',
53+
# do not assign a lambda expression, use a def
54+
'E731',
55+
# do not use variables named 'l', 'O', or 'I'
56+
'E741',
57+
# line break before binary operator
58+
'W503',
59+
# line break after binary operator
60+
'W504',
61+
]
62+
# Exclude files are generated from tempita templates
63+
exclude= '''
64+
(
65+
sklearn/_loss/_loss.pyx
66+
| sklearn/linear_model/_sag_fast.pyx
67+
| sklearn/linear_model/_sgd_fast.pyx
68+
| sklearn/utils/_seq_dataset.pyx
69+
| sklearn/utils/_seq_dataset.pxd
70+
| sklearn/utils/_weight_vector.pyx
71+
| sklearn/utils/_weight_vector.pxd
72+
| sklearn/metrics/_dist_metrics.pyx
73+
| sklearn/metrics/_dist_metrics.pxd
74+
| sklearn/metrics/_pairwise_distances_reduction/_argkmin.pxd
75+
| sklearn/metrics/_pairwise_distances_reduction/_argkmin.pyx
76+
| sklearn/metrics/_pairwise_distances_reduction/_argkmin_classmode.pyx
77+
| sklearn/metrics/_pairwise_distances_reduction/_base.pxd
78+
| sklearn/metrics/_pairwise_distances_reduction/_base.pyx
79+
| sklearn/metrics/_pairwise_distances_reduction/_datasets_pair.pxd
80+
| sklearn/metrics/_pairwise_distances_reduction/_datasets_pair.pyx
81+
| sklearn/metrics/_pairwise_distances_reduction/_middle_term_computer.pxd
82+
| sklearn/metrics/_pairwise_distances_reduction/_middle_term_computer.pyx
83+
| sklearn/metrics/_pairwise_distances_reduction/_radius_neighbors.pxd
84+
| sklearn/metrics/_pairwise_distances_reduction/_radius_neighbors.pyx
85+
)
86+
'''

‎setup.cfg

Copy file name to clipboardExpand all lines: setup.cfg
+1-10Lines changed: 1 addition & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -33,18 +33,8 @@ target-version = ['py37']
3333
ignore=
3434
# check ignored by default in flake8. Meaning unclear.
3535
E24,
36-
# continuation line under-indented
37-
E121,
38-
# closing bracket does not match indentation
39-
E123,
40-
# continuation line over-indented for hanging indent
41-
E126,
4236
# space before : (needed for how black formats slicing)
4337
E203,
44-
# missing whitespace around arithmetic operator
45-
E226,
46-
# multiple statements on one line (def)
47-
E704,
4838
# do not assign a lambda expression, use a def
4939
E731,
5040
# do not use variables named 'l', 'O', or 'I'
@@ -82,6 +72,7 @@ allow_redefinition = True
8272
ignore =
8373
sklearn/_loss/_loss.pyx
8474
sklearn/linear_model/_sag_fast.pyx
75+
sklearn/linear_model/_sgd_fast.pyx
8576
sklearn/utils/_seq_dataset.pyx
8677
sklearn/utils/_seq_dataset.pxd
8778
sklearn/utils/_weight_vector.pyx

‎sklearn/_isotonic.pyx

Copy file name to clipboardExpand all lines: sklearn/_isotonic.pyx
+1-3Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,6 @@ import numpy as np
88
from cython cimport floating
99

1010

11-
1211
def _inplace_contiguous_isotonic_regression(floating[::1] y, floating[::1] w):
1312
cdef:
1413
Py_ssize_t n = y.shape[0], i, k
@@ -85,7 +84,6 @@ def _make_unique(const floating[::1] X,
8584
cdef floating current_x = X[0]
8685
cdef floating current_y = 0
8786
cdef floating current_weight = 0
88-
cdef floating y_old = 0
8987
cdef int i = 0
9088
cdef int j
9189
cdef floating x
@@ -114,4 +112,4 @@ def _make_unique(const floating[::1] X,
114112
np.asarray(x_out[:i+1]),
115113
np.asarray(y_out[:i+1]),
116114
np.asarray(weights_out[:i+1]),
117-
)
115+
)

‎sklearn/_loss/_loss.pxd

Copy file name to clipboardExpand all lines: sklearn/_loss/_loss.pxd
+2-2Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,8 +14,8 @@ ctypedef fused G_DTYPE_C:
1414

1515
# Struct to return 2 doubles
1616
ctypedef struct double_pair:
17-
double val1
18-
double val2
17+
double val1
18+
double val2
1919

2020

2121
# C base class for loss functions

‎sklearn/cluster/_dbscan_inner.pyx

Copy file name to clipboardExpand all lines: sklearn/cluster/_dbscan_inner.pyx
+1Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ cimport numpy as cnp
77

88
cnp.import_array()
99

10+
1011
def dbscan_inner(const cnp.uint8_t[::1] is_core,
1112
object[:] neighborhoods,
1213
cnp.npy_intp[::1] labels):

‎sklearn/cluster/_k_means_common.pxd

Copy file name to clipboardExpand all lines: sklearn/cluster/_k_means_common.pxd
+1-1Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ cdef floating _euclidean_sparse_dense(
1212
const floating[::1],
1313
const int[::1],
1414
const floating[::1],
15-
floating,
15+
floating,
1616
bint
1717
) noexcept nogil
1818

‎sklearn/cluster/_k_means_common.pyx

Copy file name to clipboardExpand all lines: sklearn/cluster/_k_means_common.pyx
+10-6Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -35,11 +35,14 @@ cdef floating _euclidean_dense_dense(
3535

3636
# We manually unroll the loop for better cache optimization.
3737
for i in range(n):
38-
result += ((a[0] - b[0]) * (a[0] - b[0])
39-
+(a[1] - b[1]) * (a[1] - b[1])
40-
+(a[2] - b[2]) * (a[2] - b[2])
41-
+(a[3] - b[3]) * (a[3] - b[3]))
42-
a += 4; b += 4
38+
result += (
39+
(a[0] - b[0]) * (a[0] - b[0]) +
40+
(a[1] - b[1]) * (a[1] - b[1]) +
41+
(a[2] - b[2]) * (a[2] - b[2]) +
42+
(a[3] - b[3]) * (a[3] - b[3])
43+
)
44+
a += 4
45+
b += 4
4346

4447
for i in range(rem):
4548
result += (a[i] - b[i]) * (a[i] - b[i])
@@ -77,7 +80,8 @@ cdef floating _euclidean_sparse_dense(
7780

7881
result += b_squared_norm
7982

80-
if result < 0: result = 0.0
83+
if result < 0:
84+
result = 0.0
8185

8286
return result if squared else sqrt(result)
8387

‎sklearn/cluster/_k_means_elkan.pyx

Copy file name to clipboardExpand all lines: sklearn/cluster/_k_means_elkan.pyx
+18-11Lines changed: 18 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -154,7 +154,6 @@ def init_bounds_sparse(
154154
cdef:
155155
int n_samples = X.shape[0]
156156
int n_clusters = centers.shape[0]
157-
int n_features = X.shape[1]
158157

159158
floating[::1] X_data = X.data
160159
int[::1] X_indices = X.indices
@@ -269,7 +268,7 @@ def elkan_iter_chunked_dense(
269268
int n_samples_chunk = CHUNK_SIZE if n_samples > CHUNK_SIZE else n_samples
270269
int n_chunks = n_samples // n_samples_chunk
271270
int n_samples_rem = n_samples % n_samples_chunk
272-
int chunk_idx, n_samples_chunk_eff
271+
int chunk_idx
273272
int start, end
274273

275274
int i, j, k
@@ -386,9 +385,11 @@ cdef void _update_chunk_dense(
386385
# If this holds, then center_index is a good candidate for the
387386
# sample to be relabelled, and we need to confirm this by
388387
# recomputing the upper and lower bounds.
389-
if (j != label
388+
if (
389+
j != label
390390
and (upper_bound > lower_bounds[i, j])
391-
and (upper_bound > center_half_distances[label, j])):
391+
and (upper_bound > center_half_distances[label, j])
392+
):
392393

393394
# Recompute upper bound by calculating the actual distance
394395
# between the sample and its current assigned center.
@@ -401,8 +402,10 @@ cdef void _update_chunk_dense(
401402
# If the condition still holds, then compute the actual
402403
# distance between the sample and center. If this is less
403404
# than the previous distance, reassign label.
404-
if (upper_bound > lower_bounds[i, j]
405-
or (upper_bound > center_half_distances[label, j])):
405+
if (
406+
upper_bound > lower_bounds[i, j]
407+
or (upper_bound > center_half_distances[label, j])
408+
):
406409

407410
distance = _euclidean_dense_dense(
408411
&X[i, 0], &centers_old[j, 0], n_features, False)
@@ -504,7 +507,7 @@ def elkan_iter_chunked_sparse(
504507
int n_samples_chunk = CHUNK_SIZE if n_samples > CHUNK_SIZE else n_samples
505508
int n_chunks = n_samples // n_samples_chunk
506509
int n_samples_rem = n_samples % n_samples_chunk
507-
int chunk_idx, n_samples_chunk_eff
510+
int chunk_idx
508511
int start, end
509512

510513
int i, j, k
@@ -631,9 +634,11 @@ cdef void _update_chunk_sparse(
631634
# If this holds, then center_index is a good candidate for the
632635
# sample to be relabelled, and we need to confirm this by
633636
# recomputing the upper and lower bounds.
634-
if (j != label
637+
if (
638+
j != label
635639
and (upper_bound > lower_bounds[i, j])
636-
and (upper_bound > center_half_distances[label, j])):
640+
and (upper_bound > center_half_distances[label, j])
641+
):
637642

638643
# Recompute upper bound by calculating the actual distance
639644
# between the sample and its current assigned center.
@@ -648,8 +653,10 @@ cdef void _update_chunk_sparse(
648653
# If the condition still holds, then compute the actual
649654
# distance between the sample and center. If this is less
650655
# than the previous distance, reassign label.
651-
if (upper_bound > lower_bounds[i, j]
652-
or (upper_bound > center_half_distances[label, j])):
656+
if (
657+
upper_bound > lower_bounds[i, j]
658+
or (upper_bound > center_half_distances[label, j])
659+
):
653660
distance = _euclidean_sparse_dense(
654661
X_data[X_indptr[i] - s: X_indptr[i + 1] - s],
655662
X_indices[X_indptr[i] - s: X_indptr[i + 1] - s],

‎sklearn/cluster/_k_means_lloyd.pyx

Copy file name to clipboardExpand all lines: sklearn/cluster/_k_means_lloyd.pyx
+5-4Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -87,7 +87,7 @@ def lloyd_iter_chunked_dense(
8787
int n_samples_chunk = CHUNK_SIZE if n_samples > CHUNK_SIZE else n_samples
8888
int n_chunks = n_samples // n_samples_chunk
8989
int n_samples_rem = n_samples % n_samples_chunk
90-
int chunk_idx, n_samples_chunk_eff
90+
int chunk_idx
9191
int start, end
9292

9393
int j, k
@@ -153,8 +153,9 @@ def lloyd_iter_chunked_dense(
153153

154154
if update_centers:
155155
omp_destroy_lock(&lock)
156-
_relocate_empty_clusters_dense(X, sample_weight, centers_old,
157-
centers_new, weight_in_clusters, labels)
156+
_relocate_empty_clusters_dense(
157+
X, sample_weight, centers_old, centers_new, weight_in_clusters, labels
158+
)
158159

159160
_average_centers(centers_new, weight_in_clusters)
160161
_center_shift(centers_old, centers_new, center_shift)
@@ -278,7 +279,7 @@ def lloyd_iter_chunked_sparse(
278279
int n_samples_chunk = CHUNK_SIZE if n_samples > CHUNK_SIZE else n_samples
279280
int n_chunks = n_samples // n_samples_chunk
280281
int n_samples_rem = n_samples % n_samples_chunk
281-
int chunk_idx, n_samples_chunk_eff = 0
282+
int chunk_idx
282283
int start = 0, end = 0
283284

284285
int j, k

‎sklearn/datasets/_svmlight_format_fast.pyx

Copy file name to clipboardExpand all lines: sklearn/datasets/_svmlight_format_fast.pyx
+8-9Lines changed: 8 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -113,6 +113,7 @@ def _load_svmlight_file(f, dtype, bint multilabel, bint zero_based,
113113

114114
return (dtype, data, indices, indptr, labels, query)
115115

116+
116117
# Two fused types are defined to be able to
117118
# use all possible combinations of parameters.
118119
ctypedef fused int_or_float:
@@ -128,8 +129,9 @@ ctypedef fused int_or_longlong:
128129
cython.integral
129130
signed long long
130131

132+
131133
def get_dense_row_string(
132-
int_or_float[:,:] X,
134+
int_or_float[:, :] X,
133135
Py_ssize_t[:] x_inds,
134136
double_or_longlong[:] x_vals,
135137
Py_ssize_t row,
@@ -143,7 +145,7 @@ def get_dense_row_string(
143145
int_or_float val
144146

145147
for k in range(row_length):
146-
val = X[row,k]
148+
val = X[row, k]
147149
if val == 0:
148150
continue
149151
x_inds[x_nz_used] = k
@@ -157,6 +159,7 @@ def get_dense_row_string(
157159

158160
return " ".join(reprs)
159161

162+
160163
def get_sparse_row_string(
161164
int_or_float[:] X_data,
162165
int[:] X_indptr,
@@ -176,6 +179,7 @@ def get_sparse_row_string(
176179

177180
return " ".join(reprs)
178181

182+
179183
def _dump_svmlight_file(
180184
X,
181185
y,
@@ -211,8 +215,6 @@ def _dump_svmlight_file(
211215
Py_ssize_t j
212216
Py_ssize_t col_start
213217
Py_ssize_t col_end
214-
bint first
215-
Py_ssize_t x_nz_used
216218
Py_ssize_t[:] x_inds = np.empty(row_length, dtype=np.intp)
217219
signed long long[:] x_vals_int
218220
double[:] x_vals_float
@@ -224,8 +226,6 @@ def _dump_svmlight_file(
224226
x_vals_float = np.zeros(row_length, dtype=np.float64)
225227

226228
for i in range(x_len):
227-
x_nz_used = 0
228-
229229
if not X_is_sp:
230230
if X_is_integral:
231231
s = get_dense_row_string(X, x_inds, x_vals_int, i, value_pattern, one_based)
@@ -234,18 +234,17 @@ def _dump_svmlight_file(
234234
else:
235235
s = get_sparse_row_string(X.data, X.indptr, X.indices, i, value_pattern, one_based)
236236
if multilabel:
237-
first = True
238237
if y_is_sp:
239238
col_start = y.indptr[i]
240239
col_end = y.indptr[i+1]
241240
labels_str = ','.join(tuple(label_pattern % y.indices[j] for j in range(col_start, col_end) if y.data[j] != 0))
242241
else:
243-
labels_str = ','.join(label_pattern % j for j in range(num_labels) if y[i,j] != 0)
242+
labels_str = ','.join(label_pattern % j for j in range(num_labels) if y[i, j] != 0)
244243
else:
245244
if y_is_sp:
246245
labels_str = label_pattern % y.data[i]
247246
else:
248-
labels_str = label_pattern % y[i,0]
247+
labels_str = label_pattern % y[i, 0]
249248

250249
if query_id_is_not_empty:
251250
feat = (labels_str, query_id[i], s)

‎sklearn/decomposition/_cdnmf_fast.pyx

Copy file name to clipboardExpand all lines: sklearn/decomposition/_cdnmf_fast.pyx
+1-1Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,5 +34,5 @@ def _update_cdnmf_fast(floating[:, ::1] W, floating[:, :] HHt,
3434

3535
if hess != 0:
3636
W[i, t] = max(W[i, t] - grad / hess, 0.)
37-
37+
3838
return violation

0 commit comments

Comments
0 (0)
Morty Proxy This is a proxified and sanitized view of the page, visit original site.