Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Appearance settings

Latest commit

 

History

History
History
96 lines (80 loc) · 2.63 KB

File metadata and controls

96 lines (80 loc) · 2.63 KB
Copy raw file
Download raw file
Open symbols panel
Edit and raw actions
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
/*
Given a data set of an unknown size,
Get a random sample in a random order
It's used in data analytics, often as a way to get a small random sample from a data lake or warehouse, or from a large CSV file
*/
function shuf(datasetSource, sampleSize) {
const output = fillBaseSample(datasetSource, sampleSize)
return randomizeOutputFromDataset(datasetSource, output)
}
/**
* Fills the output if possible, with the minimum number of values
* @param {Iterable.<T>} datasetSource The iterable source of data
* @param {number} sampleSize The size of the sample to extract from the dataset
* @returns {Array.<T>} The random sample, as an array
* @template T
*/
function fillBaseSample(datasetSource, sampleSize) {
let filledIndexes = []
let output = new Array(sampleSize)
// Spread data out filling the array
while (true) {
const iterator = datasetSource.next()
if (iterator.done) break
let insertTo = Math.floor(Math.random() * output.length)
while (filledIndexes.includes(insertTo)) {
insertTo++
if (insertTo === output.length) {
insertTo = 0
}
}
output[insertTo] = {
value: iterator.value
}
filledIndexes = [...filledIndexes, insertTo]
if (filledIndexes.length === sampleSize) {
break
}
}
if (filledIndexes.length < output.length) {
// Not a large enough dataset to fill the sample - trim empty values
output = output.filter((_, i) => filledIndexes.includes(i))
}
return output.map((o) => o.value)
}
/**
* Replaces values in the output randomly with new ones from the dataset
* @param {Iterable.<T>} datasetSource The iterable source of data
* @param {Array.<T>} output The output so far, filled with data
* @returns {Array.<T>} The random sample, as an array
* @template T
*/
function randomizeOutputFromDataset(datasetSource, output) {
const newOutput = [...output]
let readSoFar = output.length
while (true) {
const iterator = datasetSource.next()
if (iterator.done) break
readSoFar++
const insertTo = Math.floor(Math.random() * readSoFar)
if (insertTo < newOutput.length) {
newOutput[insertTo] = iterator.value
}
}
return newOutput
}
// Example
/**
* Generates a random range of data, with values between 0 and 2^31 - 1
* @param {number} length The number of data items to generate
* @returns {Iterable<number>} Random iterable data
*/
function* generateRandomData(length) {
const maxValue = Math.pow(2, 31) - 1
for (let i = 0; i < length; i++) {
yield Math.floor(Math.random() * maxValue)
}
}
// const source = generateRandomData(1000)
// const result = shuf(source, 10)
export { shuf, generateRandomData }
Morty Proxy This is a proxified and sanitized view of the page, visit original site.