-
Notifications
You must be signed in to change notification settings - Fork 1
/
make_opt_rgb_video_with_predict_level.py
167 lines (135 loc) · 6.5 KB
/
make_opt_rgb_video_with_predict_level.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
# this code created by TingYu Yang
# generate a demo video of RGB frame and optical flow frame with prediction score with a same timeline
import os
import cv2
import pickle
import numpy as np
import argparse
parser = argparse.ArgumentParser(description='Make demo video')
parser.add_argument('--jpg-path', default='cropped_jpeg_1080', type=str, metavar='PATH', help='path to jpeg folder')
parser.add_argument('--opt-path', default='cropped_flowNet_1080', type=str, metavar='PATH', help='path to opt folder')
parser.add_argument('--participant', default='g03', type=str, help='participant name')
parser.add_argument('--numberOfVideo', default=35, metavar='N', type=int, help='number of test video')
parser.add_argument('--rgbPred', default='record/spatial/evel_spatial_video_preds.pickle', type=str, metavar='PATH', help='path to rbg prediction pickle' )
parser.add_argument('--optPred', default='record/motion/evel_motion_video_preds.pickle', type=str, metavar='PATH', help='path to optical flow prediction pickle' )
parser.add_argument('--classes', default='NDA_list/classInd.txt', type=str, metavar='PATH', help='path to optical flow prediction pickle' )
global arg
arg = parser.parse_args()
print(arg)
with open(arg.rgbPred,'rb') as file:
rgb_dic_frame = pickle.load(file)
file.close()
with open(arg.optPred,'rb') as file:
opt_dic_frame = pickle.load(file)
file.close()
classes = []
class_file = open('NDA_list/classInd.txt', 'r')
for line in class_file:
if line[-1] == '\n':
name = line.split(' ')[1]
name = name.replace('\n', '')
classes.append(name)
print(classes)
participant_name = arg.participant
# calculate prediction percentage matrix
percentage_cate_dic = {}
fuse_highest_percentage_cate_dic = {}
rgb_highest_percentage_cate_dic = {}
opt_highest_percentage_cate_dic = {}
cate_image_path = []
for name in sorted(rgb_dic_frame.keys()):
if arg.participant in name:
cate_image_path.append(name)
r = rgb_dic_frame[name]
r = r/sum(abs(r))
o = opt_dic_frame[name]
o = o/sum(abs(o))
fuse = r+o
fuse = fuse - min(fuse)
fuse = fuse/sum(fuse)*100
percentage_cate_dic[name] = fuse
rgb_predict_class = classes[r.tolist().index(max(r))]
optical_flow_predict_class = classes[o.tolist().index(max(o))]
fuse_predict_class = classes[fuse.tolist().index(max(fuse))]
fuse_highest_percentage_cate_dic[name] = fuse_predict_class
rgb_highest_percentage_cate_dic[name] = rgb_predict_class
opt_highest_percentage_cate_dic[name] = optical_flow_predict_class
# print(cate_image_path)
opt_image = []
rbg_image= []
height = 500
width = 500
size = (height, width)
rbg_root = arg.jpg_path
opt_root = arg.opt_path
font = cv2.FONT_HERSHEY_SIMPLEX
num_video = arg.numberOfVideo
for ram, cate in enumerate(cate_image_path):
if ram>num_video:
break
ground_truth = cate.split('_g')[0]
opt_first_img = np.zeros([height, width, 3], dtype=np.uint8)
cv2.putText(opt_first_img,'FlowNet 2.0 optical flow',(10, 30), font, 1,(0,255,255),1,cv2.LINE_AA)
predict_frame = np.zeros([height, width, 3], dtype=np.uint8)
cv2.putText(predict_frame,'Percentage after fuse',(10, 30), font, 1,(0,255,255),1,cv2.LINE_AA)
for position, className in enumerate(classes):
cv2.putText(predict_frame,className+': ',(10, (position+2)*30), font, 1,(255,255,255),1,cv2.LINE_AA)
cv2.putText(predict_frame,'0.000%',(250, (position+2)*30), font, 1,(0,255,0),1,cv2.LINE_AA)
cv2.putText(predict_frame,'RGB stream: ',(10, (position+3)*30), font, 1,(255,255,0),1,cv2.LINE_AA)
cv2.putText(predict_frame,'Optical flow: ',(10, (position+4)*30), font, 1,(255,255,0),1,cv2.LINE_AA)
cv2.putText(predict_frame,'Fused: ',(10, (position+5)*30), font, 1,(255,255,0),1,cv2.LINE_AA)
cv2.putText(predict_frame,'Ground Truth: '+ ground_truth,(10, (position+6)*30), font, 1,(255,255,0),1,cv2.LINE_AA)
combined = np.concatenate((opt_first_img, predict_frame), axis=1)
opt_image.append(combined)
ind = 1
rbg_cate_root = os.path.join(rbg_root, cate)
opt_cate_root = os.path.join(opt_root, cate)
for file in os.listdir(opt_cate_root):
if '.' in file:
if file.split('.')[1] == 'jpg':
opt_frame_name = os.path.join(opt_cate_root, 'frame_{:06d}.jpg'.format(ind))
opt = cv2.imread(opt_frame_name)
opt = cv2.resize(opt, size)
cv2.putText(opt,'FlowNet 2.0 optical flow',(10, 30), font, 1,(0,255,255),1,cv2.LINE_AA)
predict_frame = np.zeros([height, width, 3], dtype=np.uint8)
cv2.putText(predict_frame,'Percentage after fuse',(10, 30), font, 1,(0,255,255),1,cv2.LINE_AA)
for position, className in enumerate(classes):
if className == fuse_highest_percentage_cate_dic[cate]:
color = (0,0,255)
else:
color = (255,255,255)
cv2.putText(predict_frame,className,(10, (position+2)*30), font, 1,color,1,cv2.LINE_AA)
cv2.putText(predict_frame,str(round((percentage_cate_dic[cate])[position], 3))+'%',(250, (position+2)*30), font, 1,(0,255,0),1,cv2.LINE_AA)
cv2.putText(predict_frame,'RGB stream: ' + rgb_highest_percentage_cate_dic[cate],(10, (position+3)*30), font, 1,(255,255,0),1,cv2.LINE_AA)
cv2.putText(predict_frame,'Optical flow: ' + opt_highest_percentage_cate_dic[cate],(10, (position+4)*30), font, 1,(255,255,0),1,cv2.LINE_AA)
cv2.putText(predict_frame,'Fused: ' + fuse_highest_percentage_cate_dic[cate],(10, (position+5)*30), font, 1,(255,255,0),1,cv2.LINE_AA)
cv2.putText(predict_frame,'Ground Truth: '+ ground_truth,(10, (position+6)*30), font, 1,(255,255,0),1,cv2.LINE_AA)
combined = np.concatenate((opt, predict_frame), axis=1)
opt_image.append(combined)
rbg_frame_name = os.path.join(rbg_cate_root, 'frame_{:06d}.jpg'.format(ind))
rgb = cv2.imread(rbg_frame_name)
rgb = cv2.resize(rgb, size)
cv2.putText(rgb,'RGB frame',(10, 30), font, 1,(255,255,255),1,cv2.LINE_AA)
rbg_image.append(rgb)
ind += 1
rbg_frame_name = os.path.join(rbg_cate_root, 'frame_{:06d}.jpg'.format(ind))
rgb = cv2.imread(rbg_frame_name)
rgb = cv2.resize(rgb, size)
cv2.putText(rgb,'RGB frame',(10, 30), font, 1,(255,255,255),1,cv2.LINE_AA)
rbg_image.append(rgb)
print("current frames: ", len(rbg_image))
# opt_last_img = np.zeros([height, width, 3], dtype=np.uint8)
# opt_image.append(opt_last_img)
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
w = rbg_image[0].shape[0]
h = rbg_image[0].shape[0]
print(w, h)
out = cv2.VideoWriter('demo_{}.mp4'.format(participant_name), fourcc, 24, (int(w)*3, int(h)))
print('writing video ...')
for i in range(len(rbg_image)):
vis = np.concatenate((rbg_image[i], opt_image[i]), axis=1)
# cv2.imshow('a', vis)
# cv2.waitKey(10)
out.write(vis)
out.release()
print('Done.')