-
Notifications
You must be signed in to change notification settings - Fork 0
/
streamlit_dataframe_editor.py
187 lines (157 loc) · 10.1 KB
/
streamlit_dataframe_editor.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
# Import relevant libraries
import streamlit as st
import random
import pandas as pd
# from st_pages import get_pages, get_script_run_ctx
def get_random_integer(stop=1000000):
'''
Use this to create a new data editor key which is the only way to reset
an editable dataframe to its original contents, per
https://discuss.streamlit.io/t/reset-experimental-data-editor-to-original-values/41618.
We likely haven't run into this before because in our editable dataframes
we generally have new contents within the dataframe which forces this.
But to reset to the exact same contents, you need a different key.
'''
return '{:06d}'.format(random.randrange(start=0, stop=stop))
def reconstruct_edited_dataframe(df, changes_dict):
'''
Retaining information in a data editor upon switching pages
'''
# Make a copy of the original dataframe with a reset, range index
df_edited = df.reset_index(drop=True) # this resetting of the index is probably important because of how the indexing works below, which from https://docs.streamlit.io/library/advanced-features/dataframes#access-edited-data appears to be zero-based range indexing. Note this does not affect the indexing of the original dataframe, only the version of it (df_edited) that is input into the data_editor
# Update edited rows
for idx in changes_dict['edited_rows']:
for col in changes_dict['edited_rows'][idx]:
df_edited.loc[idx, col] = changes_dict['edited_rows'][idx][col]
# Update added rows
df_edited = pd.concat([df_edited, pd.DataFrame(changes_dict['added_rows'])]).reset_index(drop=True)
# Update deleted rows
df_edited = df_edited.drop(index=changes_dict['deleted_rows']).reset_index(drop=True)
# Return the updated dataframe
return df_edited
def save_data_editor_changes(key_for_changes_dict, key_for_data_editor_widget, additional_callback):
'''
From the data editor key, which is a dictionary of changes to the input dataframe, save the changes
dictionary to the session state
This should probably be renamed as I've generalized it to include optional additional callbacks
'''
st.session_state[key_for_changes_dict] = st.session_state[key_for_data_editor_widget]
if additional_callback is not None:
additional_callback()
def get_current_page_name():
'''This is a snippet from Zachary Blackwood using his st_pages package per
https://discuss.streamlit.io/t/how-can-i-learn-what-page-i-am-looking-at/56980
for getting the page name without having the script run twice as it does using
the st_javascript package with curr_url = st_javascript("await fetch('').then(r => window.parent.location.href)").
# Note that you can also import these from streamlit directly
'''
pages = get_pages("")
ctx = get_script_run_ctx()
try:
current_page = pages[ctx.page_script_hash]
except KeyError:
current_page = [
p for p in pages.values() if p["relative_page_hash"] == ctx.page_script_hash
][0]
return current_page['page_name']
def load_session_state_from_previous_page(session_state):
'''
Reload everything in session_state except for widgets that cannot be so saved,
which are manually ignore by appending "__do_not_persist" to the widget's key
'''
for key, val in session_state.items():
if (not key.endswith('__do_not_persist')) and (not key.startswith('FormSubmitter:')):
session_state[key] = val
return session_state
def initialize_previous_page_name(current_page_name, session_state):
'''
Define the previous page name if it hasn't already been initialized
'''
if 'previous_page_name' not in session_state:
session_state['previous_page_name'] = current_page_name
return session_state
def initialize_session_state(session_state):
'''
Run common things at the top of the page in a single function
'''
session_state = load_session_state_from_previous_page(session_state) # load the st.session_state from the previous page
session_state['current_page_name'] = get_current_page_name() # get the current page name, saving it to the session state
session_state = initialize_previous_page_name(session_state['current_page_name'], session_state) # initialize the previous page name to the current page name if it's not already defined. This is unnecessary to do on pages not containing any dataframe editor objects, but for uniformity in "decorating" all pages, it shouldn't hurt to include this here and to call top_matter() on all pages
return session_state
def finalize_session_state(session_state):
'''
Set the old page name to the current page name
'''
session_state['previous_page_name'] = session_state['current_page_name']
return session_state
def cast_column_labels_to_strings(df):
'''
Run a check on dataframe columns labels and convert them to strings if they're not already strings
'''
cols = df.columns
non_str_cols = [col for col in cols if not isinstance(col, str)]
non_str_cols_string_versions = [str(col) for col in non_str_cols]
mapper = dict(zip(non_str_cols, non_str_cols_string_versions))
if len(non_str_cols) > 0:
st.warning('Columns with non-string labels have been detected, and Streamlit internally casts [columns names to strings](https://docs.streamlit.io/library/advanced-features/dataframes#limitations), a known limitation. This causes problems when data is edited in such columns, so we\'re now casting these column names to strings: {}.'.format(non_str_cols), icon='⚠️')
return df.rename(columns=mapper)
class DataframeEditor:
'''
Define the DataframeEditor class
'''
def __init__(self, df_name, default_df_contents):
'''
Object instantiation
'''
self.df_name = df_name
self.default_df_contents = cast_column_labels_to_strings(default_df_contents)
self.reset_dataframe_content()
def reset_dataframe_content(self, additional_callback=None):
'''
Initialize the editor contents to its default dataframe
'''
self.update_editor_contents(new_df_contents=self.default_df_contents, additional_callback=additional_callback) # reset_key must = True (the default) here or else the resetting will not happen per nuances of Streamlit, due to the desired dataframe contents being the same as they once were with the same key I believe
def update_editor_contents(self, new_df_contents, reset_key=True, additional_callback=None):
'''
Set the dataframe in the data editor to specific values robustly
Note reset_key=True has some flicker but is necessary in case the data editor contents are ever the same, in which case they wouldn't update. So reset_key=False may lead to non-updates, but no flicker, and is the right option for e.g. leaving and coming back to the page containing the data editor as long as the contents were not expected to have changed in the interim, so there's no need to force a refresh using reset_key=True.
'''
df_name = self.df_name
st.session_state[df_name] = cast_column_labels_to_strings(new_df_contents)
st.session_state[df_name + '_changes_dict'] = {'edited_rows': {}, 'added_rows': [], 'deleted_rows': []}
# This step causes brief flashing of the dataframe so it's best to not run this unless required. A good time to not run this is when a dataframe is reloaded after leaving and coming back to the page it's on
if reset_key:
st.session_state[df_name + '_key'] = df_name + '_' + get_random_integer() + '__do_not_persist' # not strictly necessary to do every time, though it is in the case where the new data is exactly the same as the original data, so we may as well do it every time
# Allow extra functionality to be run if the data editor contents are programmatically changed
if additional_callback is not None:
additional_callback()
def reconstruct_edited_dataframe(self):
'''
Method version of same-named function
'''
df_name = self.df_name
return reconstruct_edited_dataframe(st.session_state[df_name], st.session_state[df_name + '_changes_dict'])
def dataframe_editor(self, current_page_key='current_page_name', previous_page_key='previous_page_name', dynamic_rows=True, reset_data_editor_button=True, reset_data_editor_button_text='Reset data editor', on_change=None, hide_index=None, column_config=None, debug=False):
'''
Function to perform all data editor functionalities for a dataframe that users should be able to manipulate
'''
# Shortcuts to object attributes
df_name = self.df_name
# Shortcuts to session state values
current_page_name = st.session_state[current_page_key]
previous_page_name = st.session_state[previous_page_key]
key_for_data_editor_widget = st.session_state[df_name + '_key']
# If the user switches to this page, then
# have the data editor input be the previously saved data editor "output".
# Note doing this provide a smooth and hiccup-free experience
# e.g., no scrollbar snapping back to the topmost location
if current_page_name != previous_page_name:
self.update_editor_contents(new_df_contents=self.reconstruct_edited_dataframe(), reset_key=False)
# Output a data editor for a dataframe of interest
st.data_editor(st.session_state[df_name], key=key_for_data_editor_widget, on_change=save_data_editor_changes, args=(df_name + '_changes_dict', key_for_data_editor_widget, on_change), num_rows=('dynamic' if dynamic_rows else 'fixed'), hide_index=hide_index, column_config=column_config)
# Debugging information
if debug:
st.write(f'st.data_editor() key for the dataframe {df_name}: {key_for_data_editor_widget}')
# Create a button to reset the data in the data editor
if reset_data_editor_button:
st.button(reset_data_editor_button_text, on_click=self.reset_dataframe_content, key=(df_name + '_button__do_not_persist'), kwargs={'additional_callback': on_change})