Grant words

clear all
close all
clc

years = {’10’, ’11’, ’12’,’13’, ’14’, ’15’};

successfulGrantsPerYear = [925, 931, 778, 732, 703, 665];

for i = 1:length(years)
clear vars projectSummaries lowerText splitText flatWords justAlpha wantedWords1 wantedWords2 wantedWords3 startProjectSumary tempEndProjectSumary ix newEnd endProjectSumary

listing = fileread([‘DP’, years{i}, ‘_Listing_by_State_and_Org.txt’]);%Download the original file in word format from
%http://www.arc.gov.au/ncgp/dp/dp_outcomes.htm then save as a .txt file

startProjectSumary = strfind(listing, ‘Project Summary’);
tempEndProjectSumary = strfind(listing, [’20’, years{i}]);

for a = 1:size(startProjectSumary,2)

ix = find(tempEndProjectSumary>startProjectSumary(a),1);
newEnd = tempEndProjectSumary(ix);

if a == size(startProjectSumary,2)

projectSummaries{a} = listing(startProjectSumary(a)+19:size(listing,2)); %+17 takes you to the end of the phrase ‘project summary’ and the hard returns after it

else

projectSummaries{a} = listing(startProjectSumary(a)+19:newEnd); %+1 because there is a ‘2015’ in the title page

end

end

lowerText = lower(projectSummaries);

%split up
for a = 1:size(lowerText,2)

splitText{a} = strsplit(lowerText{a});

end

count = 0;

for a = 1:size(splitText,2)
for b = 1:size(splitText{1,a},2)

count = count+1;

flatWords{count} = splitText{a}{b};

end
end

for a = 1:size(flatWords,2)
justAlpha{a} = regexprep(flatWords{a},'[^a-zA-Z]’,”);
end

stopwordsURL =’http://www.textfixer.com/resources/common-english-words.txt’;
stopWordsString = urlread(stopwordsURL);
stopWords = strsplit(stopWordsString, ‘,’);

count = 0;
for a= 1:length(justAlpha)
temp = find(ismember(stopWords,justAlpha{1,a}));
if isempty(temp) == 1
count = count+1;
wantedWords1(count) = justAlpha(a);
end
end

extraNoWords = {‘prof’, ‘dr’, ‘aprof’, ‘dp’, ”, ‘approved’, ‘university’, ‘title’, ‘south’, ‘melbourne’, ‘sydney’, ‘projects’, ‘proposals’, ‘discovery’, ‘page’, ‘research’, ‘project’};

count = 0;
for a = 1:length(wantedWords1)

if ismember(wantedWords1{a},extraNoWords) == 0
count = count+1;
wantedWords2{count} = wantedWords1{a};
end

end

count = 0;
for a = 1:length(wantedWords2)

if size(wantedWords2{a},2) > 1
count = count+1;
wantedWords3{count} = wantedWords2{a};
end

end

allYears{i} = wantedWords3;

end

%%
count = 0;

for a = 1:size(allYears,2)
for b = 1:size(allYears{1,a},2)

count = count+1;

combinedYears{count} = allYears{a}{b};

end
end

figure
[uniqueWordsAll,~,idxAll] = unique(combinedYears);
countsAll = accumarray(idxAll(:),1,[],@sum);
countsAll = countsAll’;

x = 1:length(uniqueWordsAll);
plot(1:length(uniqueWordsAll),countsAll,’b.’)
xlabel(‘Words used aranged alphabetically’, ‘FontSize’, 20)
ylabel(‘Count for each word’, ‘FontSize’, 20)
set(gca,’XTickLabel’,”)
title(‘Most frequently used words in succesful Discovery Projects’)
%ylim([0 100])
axis tight
for p = 1:length(uniqueWordsAll)
if countsAll(p) > 500
text(x(p),countsAll(p),uniqueWordsAll{p},’FontSize’,22)
end
end

allTogether = cell(2,size(uniqueWordsAll,2));
allTogether(1,:) = uniqueWordsAll;
allTogether(2,:) = num2cell(countsAll);

for y = 1:size(allYears,2)

for w = 1:size(uniqueWordsAll,2)

index = find(strcmp(allYears{y}, uniqueWordsAll{w}));
occurance(y,w) = length(index);

clear index

end
end
%%
interestingWords1 = {‘climate’, ‘health’, ‘disease’, ‘aim’, ‘understanding’, ‘new’};

for a = 1:size(interestingWords1,2)

indexInterestingWords(a) = find(ismember(uniqueWordsAll,interestingWords1{a}));
occuranceInterestingWords(:,a) = occurance(:,indexInterestingWords(a));

end

figure
plot(occuranceInterestingWords,’DisplayName’,’occuranceInterestingWords’, ‘LineWidth’,2)
legend(interestingWords1)

xlabel(‘Years’, ‘FontSize’, 20)
ylabel(‘Count for each word’, ‘FontSize’, 20)
labels = {‘2010’, ‘2011’, ‘2012’, ‘2013’, ‘2014’, ‘2015’};
set(gca,’XTickLabel’,labels)
set(gca,’XTick’,1:6)
title(‘Change in the frequency of words used in succesful Discovery Projects’)
axis tight

figure
gainsAndFalls = occurance(6,:)-occurance(1,:);
plot(gainsAndFalls)

for p = 1:length(uniqueWordsAll)
if gainsAndFalls(p) > 50 || gainsAndFalls(p) < -100
text(x(p),gainsAndFalls(p),uniqueWordsAll{p},’FontSize’,22)
end
end

xlabel(‘Words used aranged alphabetically’, ‘FontSize’, 20)
ylabel(‘Change in word count (2015-2010)’, ‘FontSize’, 20)
set(gca,’XTickLabel’,”)
title(‘Change in the frequency of words used in succesful Discovery Projects’)
axis tight

Advertisements

Leave a Reply

Fill in your details below or click an icon to log in:

WordPress.com Logo

You are commenting using your WordPress.com account. Log Out / Change )

Twitter picture

You are commenting using your Twitter account. Log Out / Change )

Facebook photo

You are commenting using your Facebook account. Log Out / Change )

Google+ photo

You are commenting using your Google+ account. Log Out / Change )

Connecting to %s