-
Notifications
You must be signed in to change notification settings - Fork 1
/
redditPRAW_TD.m
96 lines (70 loc) · 2.65 KB
/
redditPRAW_TD.m
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
function [] = redditPRAW_TD()
clc; close all; clear;
dirpath = '/path/to/directory/with/statit/bot/output/data/';
filepath = '/path/to/directory/with/statit/bot/output/data/The_Donald.mat';
csvpath = '/path/to/directory/with/statit/bot/output/data/The_Donald.csv';
cd(dirpath)
load(filepath); % loads SubmissionURL & SubmissionScore
TD_Score = SubmissionScore;
TD_URL = cellstr(SubmissionURL);
%---------------------------
Rx1 = '(\/\/.{1,3}\.)';
Tx1 = '';
S1 = regexprep(TD_URL,Rx1,Tx1);
%---------------------------
Rx1 = '(mobile\.)|(upload\.)';
Tx1 = '';
S1 = regexprep(S1,Rx1,Tx1);
%---------------------------
Rx1 = '(http:)|(https:)|(http:\/\/)|(https:\/\/)|(\/\/)|(mobile\.)';
Tx1 = '';
S1 = regexprep(S1,Rx1,Tx1);
%---------------------------
Rx1 = ['(\.com\/).+|(\.net\/).+|(\.org\/).+|(\.gov\/).+|(\.uk\/).+|(\.be\/).+|(\.it\/).+'...
'|(\.is\/).+|(\.st\/).+|(\.us\/).+|(\.online\/).+|(\.pw\/).+|(\.info\/).+|(\.in\/).+'...
'|(\.land\/).+|(\.edu\/).+|(\.ga\/).+|(\.rs\/).+|(\.kr\/).+|(\.tmz\/).+|(\.media\/).+'...
'|(\.jp\/).+|(\.se\/).+|(\.ga\/).+|(\.rs\/).+|(\.kr\/).+|(\.tmz\/).+|(\.media\/).+'...
'|(\.ca\/).+|(\.au\/).+|(\.gop\/).+|(\.co\/).+|(\.biz\/).+|(\.io\/).+|(\.nl\/).+'];
Tx1 = '';
S1 = regexprep(S1,Rx1,Tx1);
%---------------------------
Rx1 = ['(\.com\/).*|(\.net\/).*|(\.org\/).*|(\.gov\/).*|(\.uk\/).*|(\.be\/).*|(\.it\/).*'...
'|(\.is\/).*|(\.st\/).*|(\.us\/).*|(\.online\/).*|(\.pw\/).*|(\.info\/).*|(\.in\/).*'...
'|(\.land\/).*|(\.edu\/).*|(\.ga\/).*|(\.rs\/).*|(\.kr\/).*|(\.tmz\/).*|(\.media\/).*'...
'|(\.jp\/).*|(\.se\/).*|(\.ga\/).*|(\.rs\/).*|(\.kr\/).*|(\.tmz\/).*|(\.media\/).*'...
'|(\.ca\/).*|(\.au\/).*|(\.gop\/).*|(\.co\/).*|(\.biz\/).*|(\.io\/).*|(\.nl\/).*'];
Tx1 = '';
S1 = regexprep(S1,Rx1,Tx1);
%---------------------------
Rx1 = '(\.com$)|(\.net$)|(\.co$)';
Tx1 = '';
S1 = regexprep(S1,Rx1,Tx1);
% %----
% Rx1 = '(\/).*';
% Tx1 = '';
% S1 = regexprep(S1,Rx1,Tx1);
S1(1:2) = [];
TD_Score(1:2) = [];
z = zeros(1,size(S1,1));
for nn = 1:size(S1,1)
a = strcmp(S1(nn),'reddit');
b = strcmp(S1(nn),'redd');
c = strcmp(S1(nn),'twitter');
d = strcmp(S1(nn),'youtube');
e = strcmp(S1(nn),'youtu');
f = strcmp(S1(nn),'facebook');
g = strcmp(S1(nn),'imgur');
h = strcmp(S1(nn),'reddituploads');
z(nn) = any([a,b,c,d,e,f,g,h]);
end
z = z>0;
S1(z) = [];
TD_Score(z) = [];
[URL, indx] = sort(S1);
Karma = TD_Score(indx);
clearvars -except URL Karma csvpath
T = readtable(csvpath,'Format','%s','ReadVariableNames',false,'HeaderLines',2);
dateTime = T{end,1}{1};
clearvars -except URL dateTime Karma
save(['TD_' dateTime(1:10) '.mat'],'URL','Karma')
end