Search For Fun

A blog for collecting diverse useful information


  • Home

  • Tags

  • Categories

  • Archives

  • Search

python iterate or loop each row of a dataframe

Posted on 2018-09-27 | Edited on 2018-07-19 | In python

description: python iterate a dataframe

loop each row of a dataframe

1
2
3
4
5
6
7
8
import pandas
data = pandas.DataFrame([[1,2,3],[4,5,6],[7,8,9]])
for row in data.values:
print(row)
# output
# [1 2 3]
# [4 5 6]
# [7 8 9]

more on search4fan.github.io

python assign value of dataframe by conditions

Posted on 2018-09-27 | Edited on 2018-07-14 | In python

description: python assign or set value if conditions match

python assign or set value based on conditions

1
2
3
4
5
6
7
8
import pandas

data = pandas.DataFrame({"test": [10, 20, 30, 40], "a": [1, 1, 1, 2], "b": [0, 2, 2, 2]})
# test a b
# 0 10 1 0
# 1 20 1 2
# 2 30 1 2
# 3 40 2 2

filter the data with condtion 1

1
2
3
4
5
tmp = data[data["a"] == 1]
# test a b
# 0 10 1 0
# 1 20 1 2
# 2 30 1 2

filter the data with condition 2

1
2
3
4
tmp = tmp[tmp["b"] == 2]
# test a b
# 1 20 1 2
# 2 30 1 2

get the index of the data

1
2
indexes = tmp.index
# Int64Index([1, 2], dtype='int64')

change values by indexes and column name

1
2
3
4
5
6
data._set_value(indexes, 'test', 888888)
# test a b
# 0 10 1 0
# 1 888888 1 2
# 2 888888 1 2
# 3 40 2 2

more on search4fan.github.io

python build small network example for ANN

Posted on 2018-09-27 | Edited on 2018-07-23 | In machine learning

description: build small network example in python

initial a network

1
2
3
4
5
6
7
def initialize_network(n_inputs, n_hidden, n_outputs):
network = list()
hidden_layer = [{'weights':[random() for i in range(n_inputs + 1)]} for i in range(n_hidden)]
network.append(hidden_layer)
output_layer = [{'weights':[random() for i in range(n_hidden + 1)]} for i in range(n_outputs)]
network.append(output_layer)
return network

test network

1
2
3
4
seed(1)
network = initialize_network(2, 1, 2)
for layer in network:
print(layer)

output

1
2
[{'weights': [0.13436424411240122, 0.8474337369372327, 0.763774618976614]}]
[{'weights': [0.2550690257394217, 0.49543508709194095]}, {'weights': [0.4494910647887381, 0.651592972722763]}]

python create simple MLP in Keras

Posted on 2018-09-27 | Edited on 2018-07-23 | In machine learning

description: create simple MLP in Keras

import packages

1
2
3
from keras.models import Sequential
from keras.layers import Dense
import numpy

fix random seed for reproducibility

1
numpy.random.seed(7)

load pima indians dataset

1
dataset = numpy.loadtxt("pima-indians-diabetes.csv", delimiter=",")

split into input (X) and output (Y) variables

1
2
X = dataset[:,0:8]
Y = dataset[:,8]

create model

1
2
3
4
model = Sequential()
model.add(Dense(12, input_dim=8, activation='relu'))
model.add(Dense(8, activation='relu'))
model.add(Dense(1, activation='sigmoid'))

Compile model

1
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

Fit the model

1
model.fit(X, Y, epochs=150, batch_size=10)

evaluate the model

1
2
scores = model.evaluate(X, Y)
print("\n%s: %.2f%%" % (model.metrics_names[1], scores[1]*100))

python simple convolutional neural network for MNIST dataset

Posted on 2018-09-27 | Edited on 2018-07-23 | In machine learning

description: simple CNN example for MNIST dataset

import packages

1
2
3
4
5
6
7
8
9
10
11
import numpy
from keras.datasets import mnist
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Dropout
from keras.layers import Flatten
from keras.layers.convolutional import Conv2D
from keras.layers.convolutional import MaxPooling2D
from keras.utils import np_utils
from keras import backend as K
K.set_image_dim_ordering('th')

fix random seed for reproducibility

1
2
seed = 7
numpy.random.seed(seed)

load data

1
2
3
4
(X_train, y_train), (X_test, y_test) = mnist.load_data()
# reshape to be [samples][pixels][width][height]
X_train = X_train.reshape(X_train.shape[0], 1, 28, 28).astype('float32')
X_test = X_test.reshape(X_test.shape[0], 1, 28, 28).astype('float32')

normalize inputs from 0-255 to 0-1

1
2
3
4
5
6
X_train = X_train / 255
X_test = X_test / 255
# one hot encode outputs
y_train = np_utils.to_categorical(y_train)
y_test = np_utils.to_categorical(y_test)
num_classes = y_test.shape[1]

define baseline model

1
2
3
4
5
6
7
8
9
10
11
12
def baseline_model():
# create model
model = Sequential()
model.add(Conv2D(32, (5, 5), input_shape=(1, 28, 28), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.2))
model.add(Flatten())
model.add(Dense(128, activation='relu'))
model.add(Dense(num_classes, activation='softmax'))
# Compile model
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
return model

define model

1
2
3
4
5
6
7
# build the model
model = baseline_model()
# Fit the model
model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=3, batch_size=200, verbose=2)
# Final evaluation of the model
scores = model.evaluate(X_test, y_test, verbose=0)
print("Baseline Error: %.2f%%" % (100-scores[1]*100))

output

1
2
3
4
5
6
7
8
9
10
11
Train on 60000 samples, validate on 10000 samples

Epoch 1/3

69s - loss: 0.2498 - acc: 0.9283 - val_loss: 0.0715 - val_acc: 0.9803

Epoch 2/3

70s - loss: 0.0714 - acc: 0.9786 - val_loss: 0.0503 - val_acc: 0.9833

Epoch 3/3

a example for RL in python

Posted on 2018-09-27 | Edited on 2018-07-23 | In machine learning

description: a small example for reinforcement learning in python

import packages and set seed

1
2
3
4
5
import numpy as np
import pandas as pd
import time

np.random.seed(2) # reproducible

initial parameters

1
2
3
4
5
6
7
NUMBER_OF_STATES = 6   # the length of the 1 dimensional world 
ACTIONS = ['left', 'right'] # available actions
EPSILON = 0.9 # greedy police
ALPHA = 0.1 # learning rate
GAMMA = 0.9 # discount factor
MAX_EPISODES = 13 # maximum episodes
FRESH_TIME = 0.3 # speed of actions

build q-table for actions

1
2
3
4
5
6
7
8
9
10
11
12
13
def build_q_table(number_of_states, actions):
"""
just build a table with zero in the beginning
:param number_of_states:
:param actions:
:return:
"""
table = pd.DataFrame(
np.zeros((number_of_states, len(actions))), # q_table initial values
columns=actions, # actions's name
)
# print(table) # show table
return table

method to take action

1
2
3
4
5
6
7
8
9
def choose_action(state, q_table):
# This is how to choose an action
state_actions = q_table.iloc[state, :]
# np.random.uniform() is a random Number
if (np.random.uniform() > EPSILON) or ((state_actions == 0).all()): # act non-greedy or state-action have no value
action_name = np.random.choice(ACTIONS)
else: # act greedy
action_name = state_actions.idxmax() # replace argmax to idxmax as argmax means a different function in newer version of pandas
return action_name

define the change of environment

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
def get_env_feedback(Current_State, Current_Action):
"""
get the result based on the movement, right to +1, left to -1
:param Current_State:
:param Current_Action:
:return: response 1 means End while response 0 means pass
"""
# This is how agent will interact with the environment
if Current_Action == 'right': # move right
if Current_State == NUMBER_OF_STATES - 2: # terminate
Next_State = 'terminal'
Response = 1
else:
Next_State = Current_State + 1
Response = 0
else: # move left
Response = 0
if Current_State == 0:
Next_State = Current_State # reach the wall
else:
Next_State = Current_State - 1
return Next_State, Response

update environment

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
def update_env(state, episode, step_counter):
"""
print the screen to the movement
:param state:
:param episode:
:param step_counter:
:return:
"""
# This is how environment be updated
env_list = ['-'] * (NUMBER_OF_STATES - 1) + ['T'] # '---------T' our environment
if state == 'terminal':
interaction = 'Episode %s: total_steps = %s' % (episode+1, step_counter)
print('\r{}'.format(interaction), end='')
time.sleep(2)
print('\r ', end='')
else:
env_list[state] = 'o'
interaction = ''.join(env_list)
print('\r{}'.format(interaction), end='')
time.sleep(FRESH_TIME)

main part of RL loop

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
def rl():
q_table = build_q_table(NUMBER_OF_STATES, ACTIONS)
for episode in range(MAX_EPISODES):
step_counter = 0
current_state = 0
is_terminated = False
update_env(current_state, episode, step_counter)
while not is_terminated:

action = choose_action(current_state, q_table)
next_state, R = get_env_feedback(current_state, action) # take action & get next state and reward
q_predict = q_table.loc[current_state, action]
if next_state != 'terminal':
q_target = R + GAMMA * q_table.iloc[next_state, :].max() # next state is not terminal
else:
q_target = R # next state is terminal
is_terminated = True # terminate this episode

q_table.loc[current_state, action] += ALPHA * (q_target - q_predict) # update
current_state = next_state # move to next state

update_env(current_state, episode, step_counter+1)
step_counter += 1
return q_table

run the application

1
2
3
4
if __name__ == "__main__":
q_table = rl()
print('\r\nQ-table:\n')
print(q_table)

simplest example of self-build LSTM in python

Posted on 2018-09-27 | Edited on 2018-07-27 | In machine learning

description: simple lstm example tensorflow

compute sigmoid nonlinearity

1
2
3
def sigmoid(x):
output = 1 / (1 + np.exp(-x))
return output

convert output of sigmoid function to its derivative

1
2
def sigmoid_output_to_derivative(output):
return output * (1 - output)

training dataset generation

1
2
3
4
5
6
7
8
9
10
int2binary = {}
binary_dim = 8

largest_number = pow(2, binary_dim)
# generate binary table
binary = np.unpackbits(
np.array([range(largest_number)], dtype=np.uint8).T, axis=1)
# int + binary array pair
for i in range(largest_number):
int2binary[i] = binary[i]

input variables

1
2
3
4
alpha = 0.1
input_dim = 2
hidden_dim = 16
output_dim = 1

initialize neural network weights

1
2
3
4
5
6
7
synapse_0 = 2 * np.random.random((input_dim, hidden_dim)) - 1 
synapse_1 = 2 * np.random.random((hidden_dim, output_dim)) - 1
synapse_h = 2 * np.random.random((hidden_dim, hidden_dim)) - 1

synapse_0_update = np.zeros_like(synapse_0)
synapse_1_update = np.zeros_like(synapse_1)
synapse_h_update = np.zeros_like(synapse_h)

training logic # 训练的次数

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
for j in range(10000):

# generate a simple addition problem (a + b = c)
a_int = np.random.randint(largest_number / 2) # int version
a = int2binary[a_int] # binary encoding like a: [0, 0, 0, 0, 1, 0, 0, 1]

b_int = np.random.randint(largest_number / 2) # int version
b = int2binary[b_int] # binary encoding like b: [0, 0, 1, 1, 1, 1, 0, 0]

# true answer
c_int = a_int + b_int
c = int2binary[c_int]

# where we'll store our best guess (binary encoded)
d = np.zeros_like(c) # like [0, 0, 0, 0, 0, 0, 0, 0]

overallError = 0

layer_2_deltas = list()
layer_1_values = list() # restore hidden layer for next timestep
layer_1_values.append(np.zeros(hidden_dim))

# moving along the positions in the binary encoding
for position in range(binary_dim):
# generate input and output
X = np.array([[a[binary_dim - position - 1], b[binary_dim - position - 1]]]) # like [[1, 0]]
y = np.array([[c[binary_dim - position - 1]]]).T # [[1]]
# hidden layer (input ~+ prev_hidden)
layer_1 = sigmoid(np.dot(X, synapse_0) + np.dot(layer_1_values[-1], synapse_h))

# output layer (new binary representation)
layer_2 = sigmoid(np.dot(layer_1, synapse_1))
# calculate the difference and error
layer_2_error = y - layer_2
layer_2_deltas.append((layer_2_error) * sigmoid_output_to_derivative(layer_2))
overallError += np.abs(layer_2_error[0])
# decode estimate so we can print it out
d[binary_dim - position - 1] = np.round(layer_2[0][0])

# store hidden layer so we can use it in the next timestep
layer_1_values.append(copy.deepcopy(layer_1))

future_layer_1_delta = np.zeros(hidden_dim)

# update weights
for position in range(binary_dim):
X = np.array([[a[position], b[position]]])
layer_1 = layer_1_values[-position - 1]
prev_layer_1 = layer_1_values[-position - 2]

# error at output layer
layer_2_delta = layer_2_deltas[-position - 1]
# error at hidden layer
layer_1_delta = (future_layer_1_delta.dot(synapse_h.T) + \
layer_2_delta.dot(synapse_1.T)) * sigmoid_output_to_derivative(layer_1)
# let's update all our weights so we can try again
synapse_1_update += np.atleast_2d(layer_1).T.dot(layer_2_delta)
synapse_h_update += np.atleast_2d(prev_layer_1).T.dot(layer_1_delta)
synapse_0_update += X.T.dot(layer_1_delta)

future_layer_1_delta = layer_1_delta

synapse_0 += synapse_0_update * alpha
synapse_1 += synapse_1_update * alpha
synapse_h += synapse_h_update * alpha

synapse_0_update *= 0
synapse_1_update *= 0
synapse_h_update *= 0

# print out progress
if (j % 1000 == 0):
print("Error:" + str(overallError))
print("Pred:" + str(d))
print("True:" + str(c))
out = 0
for index, x in enumerate(reversed(d)):
out += x * pow(2, index)
print(str(a_int) + " + " + str(b_int) + " = " + str(out))
print("------------")

output

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
Error:[3.45638663]
Pred:[0 0 0 0 0 0 0 1]
True:[0 1 0 0 0 1 0 1]
9 + 60 = 1
------------
Error:[3.63389116]
Pred:[1 1 1 1 1 1 1 1]
True:[0 0 1 1 1 1 1 1]
28 + 35 = 255
------------
Error:[3.91366595]
Pred:[0 1 0 0 1 0 0 0]
True:[1 0 1 0 0 0 0 0]
116 + 44 = 72
------------
Error:[3.72191702]
Pred:[1 1 0 1 1 1 1 1]
True:[0 1 0 0 1 1 0 1]
4 + 73 = 223
------------
Error:[3.5852713]
Pred:[0 0 0 0 1 0 0 0]
True:[0 1 0 1 0 0 1 0]
71 + 11 = 8
------------
Error:[2.53352328]
Pred:[1 0 1 0 0 0 1 0]
True:[1 1 0 0 0 0 1 0]
81 + 113 = 162
------------
Error:[0.57691441]
Pred:[0 1 0 1 0 0 0 1]
True:[0 1 0 1 0 0 0 1]
81 + 0 = 81
------------
Error:[1.42589952]
Pred:[1 0 0 0 0 0 0 1]
True:[1 0 0 0 0 0 0 1]
4 + 125 = 129
------------
Error:[0.47477457]
Pred:[0 0 1 1 1 0 0 0]
True:[0 0 1 1 1 0 0 0]
39 + 17 = 56
------------
Error:[0.21595037]
Pred:[0 0 0 0 1 1 1 0]
True:[0 0 0 0 1 1 1 0]
11 + 3 = 14
------------

reference

https://blog.csdn.net/zzukun/article/details/49968129

python to use LSTM for predicting words in Txt

Posted on 2018-09-27 | Edited on 2018-07-23 | In machine learning

description: use LSTM to predict words in Text in python

import packages

1
2
3
4
5
6
7
import numpy
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Dropout
from keras.layers import LSTM
from keras.callbacks import ModelCheckpoint
from keras.utils import np_utils

load ascii text and covert to lowercase

1
2
3
filename = "wonderland.txt"
raw_text = open(filename).read()
raw_text = raw_text.lower()

create mapping of unique chars to integers

1
2
chars = sorted(list(set(raw_text)))
char_to_int = dict((c, i) for i, c in enumerate(chars))

summarize the loaded data

1
2
3
4
5
6
# Total Characters:  147674
# Total Vocab: 47
n_chars = len(raw_text)
n_vocab = len(chars)
print("Total Characters: ", n_chars)
print("Total Vocab: ", n_vocab)

split the book text up into subsequences with a fixed length of 100 characters

1
2
3
4
5
6
7
8
9
10
11
seq_length = 100
dataX = []
dataY = []
for i in range(0, n_chars - seq_length, 1):
seq_in = raw_text[i: i+seq_length]
seq_out = raw_text[ i+seq_length ]
dataX.append([char_to_int[char] for char in seq_in])
dataY.append(char_to_int[seq_out])
n_patterns = len(dataX)
# Total Patterns:  147574
print( "Total Patterns: ", n_patterns)

reshape X to be [samples, time steps, features]

1
X = numpy.reshape(dataX, (n_patterns, seq_length, 1))

normalize

1
X = X / float(n_vocab)

one hot encode the output variable

1
y = np_utils.to_categorical(dataY)

define the LSTM model

1
2
3
4
5
6
model = Sequential()
model.add(LSTM(256, input_shape=(X.shape[1], X.shape[2])))
model.add(Dropout(0.2))
# y.shape (144312, 47)
model.add(Dense(y.shape[1], activation='softmax'))
model.compile(loss='categorical_crossentropy', optimizer='adam')

define the checkpoint

1
2
3
filepath="weights-improvement-{epoch:02d}-{loss:.4f}.hdf5"
checkpoint = ModelCheckpoint(filepath, monitor='loss', verbose=1, save_best_only=True, mode='min')
callbacks_list = [checkpoint]

fit the model

1
model.fit(X, y, epochs=20, batch_size=128, callbacks=callbacks_list)

python ANN forward Propagate

Posted on 2018-09-27 | Edited on 2018-07-23 | In machine learning

Calculate neuron activation for an input

1
2
3
4
5
def activate(weights, inputs):
activation = weights[-1]
for i in range(len(weights)-1):
activation += weights[i] * inputs[i]
return activation

Transfer neuron activation

1
2
3
from math import exp
def transfer(activation):
return 1.0 / (1.0 + exp(-activation))

Forward propagate input to a network output

1
2
3
4
5
6
7
8
9
10
def forward_propagate(network, row):
inputs = row
for layer in network:
new_inputs = []
for neuron in layer:
activation = activate(neuron['weights'], inputs)
neuron['output'] = transfer(activation)
new_inputs.append(neuron['output'])
inputs = new_inputs
return inputs

test forward propagation

1
2
3
4
5
network = [[{'weights': [0.13436424411240122, 0.8474337369372327, 0.763774618976614]}],
[{'weights': [0.2550690257394217, 0.49543508709194095]}, {'weights': [0.4494910647887381, 0.651592972722763]}]]
row = [1, 0, None]
output = forward_propagate(network, row)
print(output)

output

1
[0.6629970129852887, 0.7253160725279748]

python build Decision Tree on bank note dataset

Posted on 2018-09-27 | Edited on 2018-07-23 | In machine learning

description: build Decision Tree from bank note dataset in python

CART on the Bank Note dataset

1
2
3
from random import seed
from random import randrange
from csv import reader

Load a CSV file

1
2
3
4
5
def load_csv(filename):
file = open(filename, "r")
lines = reader(file)
dataset = list(lines)
return dataset

Convert string column to float

1
2
3
def str_column_to_float(dataset, column):
for row in dataset:
row[column] = float(row[column].strip())

Split a dataset into k folds

1
2
3
4
5
6
7
8
9
10
11
def cross_validation_split(dataset, n_folds):
dataset_split = list()
dataset_copy = list(dataset)
fold_size = int(len(dataset) / n_folds)
for i in range(n_folds):
fold = list()
while len(fold) < fold_size:
index = randrange(len(dataset_copy))
fold.append(dataset_copy.pop(index))
dataset_split.append(fold)
return dataset_split

Calculate accuracy percentage

1
2
3
4
5
6
def accuracy_metric(actual, predicted):
correct = 0
for i in range(len(actual)):
if actual[i] == predicted[i]:
correct += 1
return correct / float(len(actual)) * 100.0

Evaluate an algorithm using a cross validation split

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
def evaluate_algorithm(dataset, algorithm, n_folds, *args):
folds = cross_validation_split(dataset, n_folds)
scores = list()
for fold in folds:
train_set = list(folds)
train_set.remove(fold)
train_set = sum(train_set, [])
test_set = list()
for row in fold:
row_copy = list(row)
test_set.append(row_copy)
row_copy[-1] = None
predicted = algorithm(train_set, test_set, *args)
actual = [row[-1] for row in fold]
accuracy = accuracy_metric(actual, predicted)
scores.append(accuracy)
return scores

Split a dataset based on an attribute and an attribute value

1
2
3
4
5
6
7
8
def test_split(index, value, dataset):
left, right = list(), list()
for row in dataset:
if row[index] < value:
left.append(row)
else:
right.append(row)
return left, right

Calculate the Gini index for a split dataset

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
def gini_index(groups, classes):
# count all samples at split point
n_instances = float(sum([len(group) for group in groups]))
# sum weighted Gini index for each group
gini = 0.0
for group in groups:
size = float(len(group))
# avoid divide by zero
if size == 0:
continue
score = 0.0
# score the group based on the score for each class
for class_val in classes:
p = [row[-1] for row in group].count(class_val) / size
score += p * p
# weight the group score by its relative size
gini += (1.0 - score) * (size / n_instances)
return gini

Select the best split point for a dataset

1
2
3
4
5
6
7
8
9
10
def get_split(dataset):
class_values = list(set(row[-1] for row in dataset))
b_index, b_value, b_score, b_groups = 999, 999, 999, None
for index in range(len(dataset[0])-1):
for row in dataset:
groups = test_split(index, row[index], dataset)
gini = gini_index(groups, class_values)
if gini < b_score:
b_index, b_value, b_score, b_groups = index, row[index], gini, groups
return {'index':b_index, 'value':b_value, 'groups':b_groups}

Create a terminal node value

1
2
3
def to_terminal(group):
outcomes = [row[-1] for row in group]
return max(set(outcomes), key=outcomes.count)

Create child splits for a node or make terminal

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
def split(node, max_depth, min_size, depth):
left, right = node['groups']
del(node['groups'])
# check for a no split
if not left or not right:
node['left'] = node['right'] = to_terminal(left + right)
return
# check for max depth
if depth >= max_depth:
node['left'], node['right'] = to_terminal(left), to_terminal(right)
return
# process left child
if len(left) <= min_size:
node['left'] = to_terminal(left)
else:
node['left'] = get_split(left)
split(node['left'], max_depth, min_size, depth+1)
# process right child
if len(right) <= min_size:
node['right'] = to_terminal(right)
else:
node['right'] = get_split(right)
split(node['right'], max_depth, min_size, depth+1)

Build a decision tree

1
2
3
4
def build_tree(train, max_depth, min_size):
root = get_split(train)
split(root, max_depth, min_size, 1)
return root

Make a prediction with a decision tree

1
2
3
4
5
6
7
8
9
10
11
def predict(node, row):
if row[node['index']] < node['value']:
if isinstance(node['left'], dict):
return predict(node['left'], row)
else:
return node['left']
else:
if isinstance(node['right'], dict):
return predict(node['right'], row)
else:
return node['right']

Classification and Regression Tree Algorithm

1
2
3
4
5
6
7
def decision_tree(train, test, max_depth, min_size):
tree = build_tree(train, max_depth, min_size)
predictions = list()
for row in test:
prediction = predict(tree, row)
predictions.append(prediction)
return(predictions)

Test CART on Bank Note dataset

1
seed(1)

load and prepare data

1
2
filename = 'data_banknote_authentication.csv'
dataset = load_csv(filename)

convert string attributes to integers

1
2
for i in range(len(dataset[0])):
str_column_to_float(dataset, i)

evaluate algorithm

1
2
3
4
5
6
n_folds = 5
max_depth = 5
min_size = 10
scores = evaluate_algorithm(dataset, decision_tree, n_folds, max_depth, min_size)
print('Scores: %s' % scores)
print('Mean Accuracy: %.3f%%' % (sum(scores)/float(len(scores))))

output

1
2
3
Scores: [100.0, 100.0, 100.0, 100.0, 100.0]

Mean Accuracy: 100.000%
1234…7

killfun

Personal blog for collecting useful information

64 posts
14 categories
38 tags
RSS
© 2018 killfun
Powered by Hexo v3.7.1
|
Theme — NexT.Muse v6.3.0