Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Appearance settings
Closed
Binary file added BIN +14 KB images/Crossover.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added BIN +50.7 KB images/comparision.PNG
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added BIN +4.87 KB images/mutation.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
105 changes: 61 additions & 44 deletions 105 learning.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,39 +12,45 @@
import random

from statistics import mean
from collections import defaultdict, Counter
from collections import defaultdict

# ______________________________________________________________________________

def rms_error(predictions, targets):
return math.sqrt(ms_error(predictions, targets))

def euclidean_distance(X, Y):
return math.sqrt(sum([(x - y)**2 for x, y in zip(X, Y)]))

def ms_error(predictions, targets):
return mean([(p - t)**2 for p, t in zip(predictions, targets)])

def rms_error(X, Y):
return math.sqrt(ms_error(X, Y))

def mean_error(predictions, targets):
return mean([abs(p - t) for p, t in zip(predictions, targets)])

def ms_error(X, Y):
return mean([(x - y)**2 for x, y in zip(X, Y)])

def manhattan_distance(predictions, targets):
return sum([abs(p - t) for p, t in zip(predictions, targets)])

def mean_error(X, Y):
return mean([abs(x - y) for x, y in zip(X, Y)])

def mean_boolean_error(predictions, targets):
return mean(int(p != t) for p, t in zip(predictions, targets))

def hamming_distance(predictions, targets):
return sum(p != t for p, t in zip(predictions, targets))
def manhattan_distance(X, Y):
return sum([abs(x - y) for x, y in zip(X, Y)])


def mean_boolean_error(X, Y):
return mean(int(x != y) for x, y in zip(X, Y))


def hamming_distance(X, Y):
return sum(x != y for x, y in zip(X, Y))

# ______________________________________________________________________________


class DataSet:
"""A data set for a machine learning problem. It has the following fields:
"""A data set for a machine learning problem. It has the following fields:

d.examples A list of examples. Each one is a list of attribute values.
d.examples A list of examples. Each one is a list of attribute values.
d.attrs A list of integers to index into an example, so example[attr]
gives a value. Normally the same as range(len(d.examples[0])).
d.attrnames Optional list of mnemonic names for corresponding attrs.
Expand All @@ -60,14 +66,16 @@ class DataSet:
since that can handle any field types.
d.name Name of the data set (for output display only).
d.source URL or other source where the data came from.
d.exclude A list of attribute indexes to exclude from d.inputs. Elements
of this list can either be integers (attrs) or attrnames.

Normally, you call the constructor and you're done; then you just
access fields like d.examples and d.target and d.inputs."""

def __init__(self, examples=None, attrs=None, attrnames=None, target=-1,
inputs=None, values=None, distance=mean_boolean_error,
name='', source='', exclude=()):
"""Accepts any of DataSet's fields. Examples can also be a
"""Accepts any of DataSet's fields. Examples can also be a
string or file from which to parse examples using parse_csv.
Optional parameter: exclude, as documented in .setproblem().
>>> DataSet(examples='1, 2, 3')
Expand Down Expand Up @@ -107,14 +115,14 @@ def setproblem(self, target, inputs=None, exclude=()):
to not use in inputs. Attributes can be -n .. n, or an attrname.
Also computes the list of possible values, if that wasn't done yet."""
self.target = self.attrnum(target)
exclude = map(self.attrnum, exclude)
exclude = list(map(self.attrnum, exclude))
if inputs:
self.inputs = removeall(self.target, inputs)
else:
self.inputs = [a for a in self.attrs
if a != self.target and a not in exclude]
if not self.values:
self.values = list(map(unique, zip(*self.examples)))
self.update_values()
self.check_me()

def check_me(self):
Expand Down Expand Up @@ -149,22 +157,26 @@ def attrnum(self, attr):
else:
return attr

def update_values(self):
self.values = list(map(unique, zip(*self.examples)))

def sanitize(self, example):
"""Return a copy of example, with non-input attributes replaced by None."""
return [attr_i if i in self.inputs else None
for i, attr_i in enumerate(example)]

def classes_to_numbers(self,classes=None):
def classes_to_numbers(self, classes=None):
"""Converts class names to numbers."""
if not classes:
# If classes were not given, extract them from values
classes = sorted(self.values[self.target])
for item in self.examples:
item[self.target] = classes.index(item[self.target])
def remove_examples(self,value=""):

def remove_examples(self, value=""):
"""Remove examples that contain given value."""
self.examples = [x for x in self.examples if value not in x]
self.update_values()

def __repr__(self):
return '<DataSet({}): {:d} examples, {:d} attributes>'.format(
Expand Down Expand Up @@ -376,7 +388,7 @@ def plurality_value(examples):

def count(attr, val, examples):
"""Count the number of examples that have attr = val."""
return sum(e[attr] == val for e in examples) #count(e[attr] == val for e in examples)
return sum(e[attr] == val for e in examples)

def all_same_class(examples):
"""Are all these examples in the same target class?"""
Expand Down Expand Up @@ -635,16 +647,17 @@ def LinearLearner(dataset, learning_rate=0.01, epochs=100):
idx_i = dataset.inputs
idx_t = dataset.target # As of now, dataset.target gives only one index.
examples = dataset.examples
num_examples = len(examples)

# X transpose
X_col = [dataset.values[i] for i in idx_i] # vertical columns of X

# Add dummy
ones = [1 for _ in range(len(examples))]
X_col = ones + X_col
X_col = [ones] + X_col

# Initialize random weigts
w = [random.randrange(-0.5, 0.5) for _ in range(len(idx_i) + 1)]
w = [random.uniform(-0.5, 0.5) for _ in range(len(idx_i) + 1)]

for epoch in range(epochs):
err = []
Expand All @@ -657,7 +670,8 @@ def LinearLearner(dataset, learning_rate=0.01, epochs=100):

# update weights
for i in range(len(w)):
w[i] = w[i] - learning_rate * dotproduct(err, X_col[i])
w[i] = w[i] + learning_rate * (dotproduct(err, X_col[i]) / num_examples)


def predict(example):
x = [1] + example
Expand Down Expand Up @@ -754,7 +768,7 @@ def weighted_replicate(seq, weights, n):
wholes = [int(w * n) for w in weights]
fractions = [(w * n) % 1 for w in weights]
return (flatten([x] * nx for x, nx in zip(seq, wholes)) +
weighted_sample_with_replacement(n - sum(wholes),seq, fractions, ))
weighted_sample_with_replacement(n - sum(wholes), seq, fractions))


def flatten(seqs): return sum(seqs, [])
Expand Down Expand Up @@ -850,7 +864,7 @@ def cross_validation_wrapper(learner, dataset, k=10, trials=1):
size += 1


def leave_one_out(learner, dataset):
def leave_one_out(learner, dataset, size=None):
"""Leave one out cross-validation over the dataset."""
return cross_validation(learner, size, dataset, k=len(dataset.examples))

Expand All @@ -868,6 +882,7 @@ def score(learner, size):
# ______________________________________________________________________________
# The rest of this file gives datasets for machine learning problems.


orings = DataSet(name='orings', target='Distressed',
attrnames="Rings Distressed Temp Pressure Flightnum")

Expand All @@ -891,6 +906,7 @@ def RestaurantDataSet(examples=None):
attrnames='Alternate Bar Fri/Sat Hungry Patrons Price ' +
'Raining Reservation Type WaitEstimate Wait')


restaurant = RestaurantDataSet()


Expand All @@ -900,28 +916,29 @@ def T(attrname, branches):
for value, child in branches.items()}
return DecisionFork(restaurant.attrnum(attrname), attrname, branches)


""" [Figure 18.2]
A decision tree for deciding whether to wait for a table at a hotel.
"""

waiting_decision_tree = T('Patrons',
{'None': 'No', 'Some': 'Yes', 'Full':
T('WaitEstimate',
{'>60': 'No', '0-10': 'Yes',
'30-60':
T('Alternate', {'No':
T('Reservation', {'Yes': 'Yes', 'No':
T('Bar', {'No': 'No',
'Yes': 'Yes'
})}),
'Yes':
T('Fri/Sat', {'No': 'No', 'Yes': 'Yes'})}),
'10-30':
T('Hungry', {'No': 'Yes', 'Yes':
T('Alternate',
{'No': 'Yes', 'Yes':
T('Raining', {'No': 'No', 'Yes': 'Yes'})
})})})})
{'None': 'No', 'Some': 'Yes',
'Full': T('WaitEstimate',
{'>60': 'No', '0-10': 'Yes',
'30-60': T('Alternate',
{'No': T('Reservation',
{'Yes': 'Yes',
'No': T('Bar', {'No': 'No',
'Yes': 'Yes'})}),
'Yes': T('Fri/Sat', {'No': 'No', 'Yes': 'Yes'})}
),
'10-30': T('Hungry',
{'No': 'Yes',
'Yes': T('Alternate',
{'No': 'Yes',
'Yes': T('Raining',
{'No': 'No',
'Yes': 'Yes'})})})})})


def SyntheticRestaurant(n=20):
Expand Down
Loading
Morty Proxy This is a proxified and sanitized view of the page, visit original site.