A blog for collecting diverse useful information

a example for RL in python

Posted on 2018-09-27 | Edited on 2018-07-23 | In machine learning

description: a small example for reinforcement learning in python

import packages and set seed

import numpy as np
import pandas as pd
import time

np.random.seed(2)  # reproducible

initial parameters

NUMBER_OF_STATES = 6   # the length of the 1 dimensional world 
ACTIONS = ['left', 'right']     # available actions
EPSILON = 0.9   # greedy police 
ALPHA = 0.1     # learning rate
GAMMA = 0.9    # discount factor 
MAX_EPISODES = 13   # maximum episodes
FRESH_TIME = 0.3 # speed of actions

build q-table for actions

def build_q_table(number_of_states, actions):
    """
    just build a table with zero in the beginning
    :param number_of_states:
    :param actions:
    :return:
    """
    table = pd.DataFrame(
        np.zeros((number_of_states, len(actions))),     # q_table initial values
        columns=actions,    # actions's name
    )
    # print(table)    # show table
    return table

method to take action

def choose_action(state, q_table):
    # This is how to choose an action
    state_actions = q_table.iloc[state, :]
     # np.random.uniform() is a random Number
    if (np.random.uniform() > EPSILON) or ((state_actions == 0).all()):  # act non-greedy or state-action have no value
        action_name = np.random.choice(ACTIONS)
    else:   # act greedy
        action_name = state_actions.idxmax()    # replace argmax to idxmax as argmax means a different function in newer version of pandas
    return action_name

define the change of environment

def get_env_feedback(Current_State, Current_Action):
    """
    get the result based on the movement, right to +1, left to -1
    :param Current_State:
    :param Current_Action:
    :return: response 1 means End while response 0 means pass
    """
    # This is how agent will interact with the environment
    if Current_Action == 'right':    # move right
        if Current_State == NUMBER_OF_STATES - 2:   # terminate
            Next_State = 'terminal'
            Response = 1
        else:
            Next_State = Current_State + 1
            Response = 0
    else:   # move left
        Response = 0
        if Current_State == 0:
            Next_State = Current_State  # reach the wall
        else:
            Next_State = Current_State - 1
    return Next_State, Response

update environment

def update_env(state, episode, step_counter):
    """
    print the screen to the movement
    :param state:
    :param episode:
    :param step_counter:
    :return:
    """
    # This is how environment be updated
    env_list = ['-'] * (NUMBER_OF_STATES - 1) + ['T']   # '---------T' our environment
    if state == 'terminal':
        interaction = 'Episode %s: total_steps = %s' % (episode+1, step_counter)
        print('\r{}'.format(interaction), end='')
        time.sleep(2)
        print('\r                                ', end='')
    else:
        env_list[state] = 'o'
        interaction = ''.join(env_list)
        print('\r{}'.format(interaction), end='')
        time.sleep(FRESH_TIME)

main part of RL loop

def rl():
    q_table = build_q_table(NUMBER_OF_STATES, ACTIONS)
    for episode in range(MAX_EPISODES):
        step_counter = 0
        current_state = 0
        is_terminated = False
        update_env(current_state, episode, step_counter)
        while not is_terminated:

            action = choose_action(current_state, q_table)
            next_state, R = get_env_feedback(current_state, action)  # take action & get next state and reward
            q_predict = q_table.loc[current_state, action]
            if next_state != 'terminal':
                q_target = R + GAMMA * q_table.iloc[next_state, :].max()   # next state is not terminal
            else:
                q_target = R     # next state is terminal
                is_terminated = True    # terminate this episode

            q_table.loc[current_state, action] += ALPHA * (q_target - q_predict)  # update
            current_state = next_state  # move to next state

            update_env(current_state, episode, step_counter+1)
            step_counter += 1
    return q_table

run the application

if __name__ == "__main__":
    q_table = rl()
    print('\r\nQ-table:\n')
    print(q_table)

Post author: killfun
Post link: http://search4fan.github.io/post/python_AI_rl_example.html
Copyright Notice: All articles in this blog are licensed under CC BY-NC-SA 4.0 unless stating additionally.