-
- Downloads
change script for recursive path
%% Cell type:code id: tags: | ||
``` python | ||
import pandas as pd | ||
import matplotlib.pyplot as plt | ||
import matplotlib.dates as mdates | ||
import plotly.express as px | ||
import glob | ||
filepaths = [ | ||
r'C:\Studium_MIN\05_Masterarbeit\thesis\ma_code\code\eval\experiments\240212_first_try\query_logs\memgraph\2024-02-11_11-58-08_query_logs.csv', | ||
r'C:\Studium_MIN\05_Masterarbeit\thesis\ma_code\code\eval\experiments\240212_first_try\query_logs\neo4j\2024-02-11_11-58-08_query_logs.csv', | ||
r'C:\Studium_MIN\05_Masterarbeit\thesis\ma_code\code\eval\experiments\240212_first_try\query_logs\ongdb\2024-02-11_11-58-08_query_logs.csv', | ||
r'C:\Studium_MIN\05_Masterarbeit\thesis\ma_code\code\eval\experiments\240212_first_try\query_logs\pg\2024-02-11_11-58-08_query_execution_logs.csv' | ||
] | ||
directory_path = r'C:\Studium_MIN\05_Masterarbeit\thesis\ma_code\code\eval\experiments\<cur_experiment_folder>' | ||
filepaths = glob.glob(directory_path + '/**/*.csv', recursive=True) # Use glob to find all csv files in subdirectories | ||
for filepath in filepaths: | ||
df = pd.read_csv(filepath) | ||
# Ensure 'Start Time' is converted to a recognizable datetime format | ||
df['Start Time'] = pd.to_datetime(df['Start Time'], unit='s') | ||
# # Extracting the database name from the filepath for the title | ||
db_name = filepath.split('\\')[-2] # Assumes the database name is the second to last part of the path | ||
# Use Plotly Express to plot | ||
fig = px.line(df, x='Start Time', y='Total Time (s)', color='Query Key', | ||
title=f'Total Time per Query Key Over Time for {db_name}') | ||
# Customize the tick format for the x-axis to show date and time | ||
# You can adjust the format as per your needs. Here are some examples: | ||
# '%Y-%m-%d %H:%M:%S' for "Year-Month-Day Hour:Minute:Second" | ||
# '%H:%M:%S' for "Hour:Minute:Second" if you want to focus on time of day | ||
# '%Y-%m-%d' for "Year-Month-Day" if the date is more important | ||
fig.update_xaxes(tickformat='%d.%m. %H:%M:%S', title_text='Time') | ||
# ax.set_title(f'Total Time per Query Key Over Time for {db_name}') | ||
# Update y-axis label | ||
fig.update_yaxes(title_text='Total Time (s)') | ||
# Show the plot | ||
fig.show() | ||
``` | ||
%% Output | ||
%% Cell type:code id: tags: | ||
``` python | ||
import pandas as pd | ||
import matplotlib.pyplot as plt | ||
import matplotlib.dates as mdates | ||
import plotly.express as px | ||
filepaths = [ | ||
r'C:\Studium_MIN\05_Masterarbeit\thesis\ma_code\code\eval\experiments\240212_first_try\query_logs\memgraph\2024-02-11_11-58-08_query_logs.csv', | ||
r'C:\Studium_MIN\05_Masterarbeit\thesis\ma_code\code\eval\experiments\240212_first_try\query_logs\neo4j\2024-02-11_11-58-08_query_logs.csv', | ||
r'C:\Studium_MIN\05_Masterarbeit\thesis\ma_code\code\eval\experiments\240212_first_try\query_logs\ongdb\2024-02-11_11-58-08_query_logs.csv', | ||
r'C:\Studium_MIN\05_Masterarbeit\thesis\ma_code\code\eval\experiments\240212_first_try\query_logs\pg\2024-02-11_11-58-08_query_execution_logs.csv' | ||
] | ||
for filepath in filepaths: | ||
# Assuming df is your DataFrame | ||
# Replace 'filepath' with the actual path of your data file | ||
df = pd.read_csv(filepath) | ||
df['Start Time'] = pd.to_datetime(df['Start Time'], unit='s') | ||
fig = px.line(df, x='Start Time', y='Total Time (s)', color='Query Key', | ||
title='Total Time per Query Key Over Time') | ||
fig.update_xaxes( | ||
dtick="M1", | ||
tickformat="%b\n%Y", | ||
title_text='Time' | ||
) | ||
fig.update_yaxes(title_text='Total Time (s)') | ||
fig.show() | ||
# # Load data directly from the current file | ||
# df = pd.read_csv(filepath) | ||
# # Convert timestamps to datetime objects for plotting | ||
# df['Start Time'] = pd.to_datetime(df['Start Time'], unit='s') | ||
# # Plotting | ||
# fig, ax = plt.subplots(figsize=(10, 6)) | ||
# for key in df['Query Key'].unique(): | ||
# subset = df[df['Query Key'] == key] | ||
# ax.plot(subset['Start Time'], subset['Total Time (s)'], label=key) | ||
# # Extracting the database name from the filepath for the title | ||
# db_name = filepath.split('\\')[-2] # Assumes the database name is the second to last part of the path | ||
# # Formatting the plot | ||
# ax.set_title(f'Total Time per Query Key Over Time for {db_name}') | ||
# ax.set_xlabel('Time') | ||
# ax.set_ylabel('Total Time (s)') | ||
# ax.legend() | ||
# ax.xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m-%d %H:%M:%S')) | ||
# plt.xticks(rotation=45) | ||
# plt.tight_layout() | ||
# # Show the plot | ||
# plt.show() | ||
``` | ||
%% Output | ||
--------------------------------------------------------------------------- | ||
KeyError Traceback (most recent call last) | ||
c:\Python38\lib\site-packages\pandas\core\indexes\base.py in get_loc(self, key, method, tolerance) | ||
3360 try: | ||
-> 3361 return self._engine.get_loc(casted_key) | ||
3362 except KeyError as err: | ||
c:\Python38\lib\site-packages\pandas\_libs\index.pyx in pandas._libs.index.IndexEngine.get_loc() | ||
c:\Python38\lib\site-packages\pandas\_libs\index.pyx in pandas._libs.index.IndexEngine.get_loc() | ||
pandas\_libs\hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item() | ||
pandas\_libs\hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item() | ||
KeyError: 'Total Time (s)' | ||
The above exception was the direct cause of the following exception: | ||
KeyError Traceback (most recent call last) | ||
C:\Users\SVEN-O~1\AppData\Local\Temp/ipykernel_7836/3302337684.py in <module> | ||
22 for key in df['Query Key'].unique(): | ||
23 subset = df[df['Query Key'] == key] | ||
---> 24 ax.plot(subset['Start Time'], subset['Total Time (s)'], label=key) | ||
25 | ||
26 # Extracting the database name from the filepath for the title | ||
c:\Python38\lib\site-packages\pandas\core\frame.py in __getitem__(self, key) | ||
3456 if self.columns.nlevels > 1: | ||
3457 return self._getitem_multilevel(key) | ||
-> 3458 indexer = self.columns.get_loc(key) | ||
3459 if is_integer(indexer): | ||
3460 indexer = [indexer] | ||
c:\Python38\lib\site-packages\pandas\core\indexes\base.py in get_loc(self, key, method, tolerance) | ||
3361 return self._engine.get_loc(casted_key) | ||
3362 except KeyError as err: | ||
-> 3363 raise KeyError(key) from err | ||
3364 | ||
3365 if is_scalar(key) and isna(key) and not self.hasnans: | ||
KeyError: 'Total Time (s)' | ||
... | ... |
Please register or sign in to comment