data_scraping_australian_sport

%Written by Owen Churches, June 2014.

clear all
close all

years = [1908:2013];

yearCount = 0;

for y = years(1):years(end)

yearCount = yearCount + 1;

year(yearCount) = years(yearCount)%Leaving this unsemicoloned lets you see on the screen how far along you are.

webPage = strcat(‘http://stats.rleague.com/rl/seas/’, num2str(years(yearCount)), ‘.html’);

for r = 1:300

clearvars err %Clears the error message vaiable because it will be used as a test in the next section.

try
getTableFromWeb_mod(webPage, r);

catch err

tableExist(r) = exist(‘err’, ‘var’); %If there isn’t an rth table then an error message will throw and this will record a 1 on tableExist.

end
end

for r = 1:300

if tableExist(r) == 0 %If there was no error message (which indicates that the table was a match and not a bye) then tableExist will be 0

currentTable = getTableFromWeb_mod(webPage, r);

if size(currentTable) == [2 4] %Non-match tables (such as the ones that say the round number) have different dimensions.

currentTable = getTableFromWeb_mod(webPage, r);

HomeTeam = currentTable{1,1}; %Extract the name of the home team and the result.
Result = currentTable{2,4};
HomeTeamAbr = HomeTeam(1:5);
ResultAbr = Result(1:5);

if HomeTeamAbr == ResultAbr %Compare the name of the home team and the result.
HomeWon(r) = 1;
else HomeWon(r) = 0;
end

else HomeWon(r) = NaN;
end

else HomeWon(r) = NaN;
end

end

%Count the proportion of homeside wins for the year

numOfMatches = sum(~isnan(HomeWon)); %Counts the number of matches regardless of the outcome.
numberOfHomeWins = nansum(HomeWon); %Counts the number of matches the home side won

proportionOfHomeWinsNRL(yearCount) = numberOfHomeWins/numOfMatches;
year(yearCount) = years(yearCount);

end
%%
clearvars -except proportionOfHomeWinsNRL

years = [1908:2013];

yearCount = 0;

for y = years(1):years(end)

yearCount = yearCount + 1;

webPage = strcat(‘http://afltables.com/afl/seas/’, num2str(years(yearCount)), ‘.html’)

clearvars err %Clears the error message vaiable because it will be used as a test in the next section.

%Find out how many tables there are on the page. Note that this isn’t the
%number of matches, this includes all tables.

for r = 1:300
try
getTableFromWeb_mod(webPage, r)

catch err

tableExist(r) = exist(‘err’, ‘var’); %If there isn’t an rth table then an error message will throw and this will record a 1 on the rth row of tableExist.

end
end

numOfTablesExist = sum(tableExist == 0); %This is the number of rows on the page.

%%
%Extract all the tables that relate to matches

for t = 1:numOfTablesExist
currentTable = getTableFromWeb_mod(webPage, t);

if size(currentTable) == [2 4] %Non-match tables (such as the ones that say the round number) have different dimensions.

HomeTeam = currentTable{1,1}; %Extract the name of the home team and the result.
Result = currentTable{2,4};
HomeTeamAbr = HomeTeam(1:5);
ResultAbr = Result(1:5);

if HomeTeamAbr == ResultAbr %Compare the name of the home team and the result.
HomeWon(t) = 1;
else HomeWon(t) = 0;
end
else HomeWon(t) = NaN;
end

end

%%
%Count the proportion of homeside wins for the year

numOfMatches = sum(~isnan(HomeWon)); %Counts the number of matches regardless of the outcome.
numberOfHomeWins = nansum(HomeWon); %Counts the number of matches the home side won

proportionOfHomeWinsAFL(yearCount) = numberOfHomeWins/numOfMatches;
year(yearCount) = years(yearCount);

end

close all

figure
plot(years, proportionOfHomeWinsNRL, ‘r’)
hold on
plot(years, proportionOfHomeWinsAFL, ‘b’)
legend (‘NRL’, ‘AFL’)

meanNRL = mean(proportionOfHomeWinsNRL)
plot([years(1) years(end)], [meanNRL meanNRL], ‘–r’)

meanAFL = mean(proportionOfHomeWinsAFL)
plot([years(1) years(end)], [meanAFL meanAFL], ‘–b’)

title(‘Proportion of home wins by season for the AFL and NRL’)
text(1910, meanNRL, [‘Mean NRL home wins’])
text(1910, meanAFL, [‘Mean AFL home wins’])
xlabel(‘Year’)

Advertisements

Leave a Reply

Fill in your details below or click an icon to log in:

WordPress.com Logo

You are commenting using your WordPress.com account. Log Out / Change )

Twitter picture

You are commenting using your Twitter account. Log Out / Change )

Facebook photo

You are commenting using your Facebook account. Log Out / Change )

Google+ photo

You are commenting using your Google+ account. Log Out / Change )

Connecting to %s