-
Notifications
You must be signed in to change notification settings - Fork 0
/
app.py
186 lines (149 loc) · 5.94 KB
/
app.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
# -*- coding: utf-8 -*-
'''
Created on Mon Oct 25 11:35:11 2021
@author: shangfr
'''
import pandas as pd
import streamlit as st
from ui import data_exploration, data_analysis
from ui import data_modeling, result_display
from ui import model_prediction
from utils import describe, load_pickle
@st.cache_data(show_spinner=False)
def get_file_content_as_string(path):
'''get file content as string.
'''
with open(path, 'r+', encoding='utf-8') as fo:
content = fo.read()
return content
def init_state():
'''cache_data session_state Initialization.
'''
if 'cache_data' not in st.session_state:
st.session_state['cache_data'] = {
'origin': {},
'machine_learning': {
'parm': {},
'model_pipe': {}
},
'output': {}
}
if 'loaded' not in st.session_state:
st.session_state['loaded'] = False
if 'disabled' not in st.session_state:
st.session_state['disabled'] = False
if 'file_id' not in st.session_state:
st.session_state['file_id'] = 0
if 'ml_step' not in st.session_state:
st.session_state['ml_step'] = 0
def reset_state():
'''delete session_state and init.
'''
for key in st.session_state.keys():
del st.session_state[key]
init_state()
def load_state():
cache_data = load_pickle()
reset_state()
st.session_state['cache_data'] = cache_data
st.session_state['ml_step'] = 3
st.session_state['loaded'] = True
st.session_state['disabled'] = True
def show_ml_step():
'''show ml step.
'''
st.header('1. Methods of data exploration')
ui_info1 = '''
`EDA`
> Data exploration, also known as exploratory data analysis (EDA), is a process where users look at and understand their data with statistical and visualization methods.
> 数据探索EDA,即探索性数据分析,是用户使用统计和可视化方法查看和理解数据的过程。
'''
st.markdown(ui_info1)
st.header('2. Train a machine learning model')
ui_info2 = '''
`Clustering, Classification and Regression`
> Machine learning is a branch of artificial intelligence. It focuses on using data and algorithms to imitate human learning methods and gradually improve the accuracy of simulation.
> 机器学习是人工智能的一个分支,其重点是使用数据和算法模仿人类的学习方式,并逐步提高模仿的准确性。
'''
st.markdown(ui_info2)
st.stop()
def read_uploaded_file():
'''read uploaded file.
'''
uploaded_file = st.sidebar.file_uploader(
'上传数据', type=['xlsx', 'csv'], key='dfa')
if uploaded_file is None:
st.sidebar.warning('请先上传数据集', icon='👆')
reset_state()
show_ml_step()
file_id = uploaded_file.id
if file_id != st.session_state['file_id']:
dtype = uploaded_file.name.split('.')[-1]
if dtype in ['csv', 'txt']:
df = pd.read_csv(uploaded_file)
elif dtype in ['xls', 'xlsx']:
df = pd.read_excel(uploaded_file)
st.session_state['cache_data']['origin'] = describe(df)
st.session_state['file_id'] = file_id
st.session_state['ml_step'] = 1
st.sidebar.success('数据更换成功', icon="✅")
else:
st.sidebar.success('数据上传成功', icon="✅")
return st.session_state['cache_data']
def model_training():
'''model training.
'''
init_state()
# 读取或更新数据
cache_data = read_uploaded_file()
# 数据探索与预处理
data_exploration(cache_data)
# 数据分析
st.info('3. 模型特征分析(Features Analysis)', icon='👇')
data_analysis(cache_data)
# 数据建模
data_modeling(cache_data)
result_display(cache_data)
if __name__ == '__main__':
st.set_page_config(
page_title='Data Follower',
page_icon='🦄',
layout='wide',
initial_sidebar_state='expanded',
menu_items={
'About': '# A Machine Learning Web App with Streamlit and Python'
})
st.sidebar.title('An Interactive ML WebApp')
app_mode = st.sidebar.selectbox("Select a task above.", [
"---", "Training", "Application"])
# Style
with open('style.css')as f:
st.markdown(f"<style>{f.read()}</style>", unsafe_allow_html=True)
if app_mode == "---":
st.sidebar.success('Choose a task')
st.markdown(get_file_content_as_string('instructions.md'))
elif app_mode == "Training":
st.header('Clustering, Classification and Regression')
model_training()
elif app_mode == "Application":
load_state()
st.sidebar.success("Model loaded successfully. ", icon="👆")
tool_mode = st.sidebar.selectbox("Model", [
"Checking", "Prediction"])
if tool_mode == "Prediction":
model_prediction(st.session_state['cache_data'])
elif tool_mode == "Checking":
st.info('1. 模型特征分析(Features Analysis)', icon='👇')
data_analysis(st.session_state['cache_data'])
st.info('2. 模型展示(Model Display)', icon='👇')
result_display(st.session_state['cache_data'])
with st.sidebar:
st.markdown("---")
st.markdown(
'<h6>Made in  <img src="https://streamlit.io/images/brand/streamlit-mark-color.png" alt="Streamlit logo" height="16">  by <a href="https://twitter.com/kuake2022">@kuake2022</a></h6>',
unsafe_allow_html=True,
)
st.markdown(
'<div style="margin-top: 0.75em;"><a href="https://www.buymeacoffee.com/kuake" target="_blank"><img src="https://cdn.buymeacoffee.com/buttons/default-orange.png" alt="Buy Me A Coffee" height="41" width="174"></a></div>',
unsafe_allow_html=True,
)