%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Q learning of single agent move in N rooms
% Matlab Code companion of
% Q Learning by Example
%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
function q=ReinforcementLearning
clc;
format short
format compact
% Two input: R and gamma
% immediate reward matrix;
% row and column = states; -Inf = no door between room
R=[-inf,-inf,-inf,-inf, 0, -inf;
-inf,-inf,-inf, 0,-inf, 100;
-inf,-inf,-inf, 0,-inf, -inf;
-inf, 0, 0,-inf, 0, -inf;
0,-inf,-inf, 0,-inf, 100;
-inf, 0,-inf,-inf, 0, 100];
gamma=0.80; % learning parameter
q=zeros(size(R)); % initialize Q as zero,q的行数和列数等于矩阵R的。
q1=ones(size(R))*inf; % initialize previous Q as big number
count=0; % counter
for episode=0:50000
% random initial state
y=randperm(size(R,1));%产生1到6的随机数%a=size(R,1)把矩阵R的行数返回给a,b=size(R,2)把矩阵R的列数返回给b
state=y(1); %取1到6的随机数的第一个数
% select any action from this state
x=find(R(state,:)>=0); % find possible action of this state.返回矩阵R第state行所有列中不小于零的数据的下标
if size(x,1)>0,
x1=RandomPermutation(x); % randomize the possible action
x1=x1(1); % select an action
end
qMax=max(q,[],2);
q(state,x1)= R(state,x1)+gamma*qMax(x1); % get max of all actions
state=x1;
% break if convergence: small deviation on q for 1000 consecutive
if sum(sum(abs(q1-q)))<0.0001 & sum(sum(q >0))
if count>1000,
episode % report last episode
break % for
else
count=count+1; % set counter if deviation of q is small
end
else
q1=q;
count=0; % reset counter when deviation of q from previous q is large
end
end
%normalize q
g=max(max(q));
if g>0,
q=100*q/g;
end
% The code above is using basic library RandomPermutation below
function y=RandomPermutation(A)
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% return random permutation of matrix A
% unlike randperm(n) that give permutation of integer 1:n only,
% RandomPermutation rearrange member of matrix A randomly
% This function is useful for MonteCarlo Simulation,
% Bootstrap sampling, game, etc.
%
% Copyright Kardi Teknomo(c) 2005
% (http://people.revoledu.com/kardi/)
%
% example: A = [ 2, 1, 5, 3]
% RandomPermutation(A) may produce [ 1, 5, 3, 2] or [ 5, 3, 2, 3]
%
% example:
% A=magic(3)
% RandomPermutation(A)
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
[r,c]=size(A);
b=reshape(A,r*c,1); % convert to column vector
x=randperm(r*c); % make integer permutation of similar array as key
w=[b,x']; % combine matrix and key
d=sortrows(w,2); % sort according to key
y=reshape(d(:,1),r,c); % return back the matrix