Skip to content
Snippets Groups Projects
Commit 7914d6d9 authored by Sven-Ove Hänsel's avatar Sven-Ove Hänsel
Browse files

change script for recursive path

parent 3c83fe14
No related branches found
No related tags found
No related merge requests found
%% Cell type:code id: tags:
 
``` python
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import plotly.express as px
import glob
 
filepaths = [
r'C:\Studium_MIN\05_Masterarbeit\thesis\ma_code\code\eval\experiments\240212_first_try\query_logs\memgraph\2024-02-11_11-58-08_query_logs.csv',
r'C:\Studium_MIN\05_Masterarbeit\thesis\ma_code\code\eval\experiments\240212_first_try\query_logs\neo4j\2024-02-11_11-58-08_query_logs.csv',
r'C:\Studium_MIN\05_Masterarbeit\thesis\ma_code\code\eval\experiments\240212_first_try\query_logs\ongdb\2024-02-11_11-58-08_query_logs.csv',
r'C:\Studium_MIN\05_Masterarbeit\thesis\ma_code\code\eval\experiments\240212_first_try\query_logs\pg\2024-02-11_11-58-08_query_execution_logs.csv'
]
directory_path = r'C:\Studium_MIN\05_Masterarbeit\thesis\ma_code\code\eval\experiments\<cur_experiment_folder>'
filepaths = glob.glob(directory_path + '/**/*.csv', recursive=True) # Use glob to find all csv files in subdirectories
 
for filepath in filepaths:
 
df = pd.read_csv(filepath)
 
# Ensure 'Start Time' is converted to a recognizable datetime format
df['Start Time'] = pd.to_datetime(df['Start Time'], unit='s')
 
# # Extracting the database name from the filepath for the title
db_name = filepath.split('\\')[-2] # Assumes the database name is the second to last part of the path
 
# Use Plotly Express to plot
fig = px.line(df, x='Start Time', y='Total Time (s)', color='Query Key',
title=f'Total Time per Query Key Over Time for {db_name}')
 
# Customize the tick format for the x-axis to show date and time
# You can adjust the format as per your needs. Here are some examples:
# '%Y-%m-%d %H:%M:%S' for "Year-Month-Day Hour:Minute:Second"
# '%H:%M:%S' for "Hour:Minute:Second" if you want to focus on time of day
# '%Y-%m-%d' for "Year-Month-Day" if the date is more important
fig.update_xaxes(tickformat='%d.%m. %H:%M:%S', title_text='Time')
 
 
# ax.set_title(f'Total Time per Query Key Over Time for {db_name}')
 
# Update y-axis label
fig.update_yaxes(title_text='Total Time (s)')
 
# Show the plot
fig.show()
```
 
%% Output
 
 
 
 
%% Cell type:code id: tags:
``` python
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import plotly.express as px
filepaths = [
r'C:\Studium_MIN\05_Masterarbeit\thesis\ma_code\code\eval\experiments\240212_first_try\query_logs\memgraph\2024-02-11_11-58-08_query_logs.csv',
r'C:\Studium_MIN\05_Masterarbeit\thesis\ma_code\code\eval\experiments\240212_first_try\query_logs\neo4j\2024-02-11_11-58-08_query_logs.csv',
r'C:\Studium_MIN\05_Masterarbeit\thesis\ma_code\code\eval\experiments\240212_first_try\query_logs\ongdb\2024-02-11_11-58-08_query_logs.csv',
r'C:\Studium_MIN\05_Masterarbeit\thesis\ma_code\code\eval\experiments\240212_first_try\query_logs\pg\2024-02-11_11-58-08_query_execution_logs.csv'
]
for filepath in filepaths:
# Assuming df is your DataFrame
# Replace 'filepath' with the actual path of your data file
df = pd.read_csv(filepath)
df['Start Time'] = pd.to_datetime(df['Start Time'], unit='s')
fig = px.line(df, x='Start Time', y='Total Time (s)', color='Query Key',
title='Total Time per Query Key Over Time')
fig.update_xaxes(
dtick="M1",
tickformat="%b\n%Y",
title_text='Time'
)
fig.update_yaxes(title_text='Total Time (s)')
fig.show()
# # Load data directly from the current file
# df = pd.read_csv(filepath)
# # Convert timestamps to datetime objects for plotting
# df['Start Time'] = pd.to_datetime(df['Start Time'], unit='s')
# # Plotting
# fig, ax = plt.subplots(figsize=(10, 6))
# for key in df['Query Key'].unique():
# subset = df[df['Query Key'] == key]
# ax.plot(subset['Start Time'], subset['Total Time (s)'], label=key)
# # Extracting the database name from the filepath for the title
# db_name = filepath.split('\\')[-2] # Assumes the database name is the second to last part of the path
# # Formatting the plot
# ax.set_title(f'Total Time per Query Key Over Time for {db_name}')
# ax.set_xlabel('Time')
# ax.set_ylabel('Total Time (s)')
# ax.legend()
# ax.xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m-%d %H:%M:%S'))
# plt.xticks(rotation=45)
# plt.tight_layout()
# # Show the plot
# plt.show()
```
%% Output
---------------------------------------------------------------------------
KeyError Traceback (most recent call last)
c:\Python38\lib\site-packages\pandas\core\indexes\base.py in get_loc(self, key, method, tolerance)
3360 try:
-> 3361 return self._engine.get_loc(casted_key)
3362 except KeyError as err:
c:\Python38\lib\site-packages\pandas\_libs\index.pyx in pandas._libs.index.IndexEngine.get_loc()
c:\Python38\lib\site-packages\pandas\_libs\index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas\_libs\hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
pandas\_libs\hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
KeyError: 'Total Time (s)'
The above exception was the direct cause of the following exception:
KeyError Traceback (most recent call last)
C:\Users\SVEN-O~1\AppData\Local\Temp/ipykernel_7836/3302337684.py in <module>
22 for key in df['Query Key'].unique():
23 subset = df[df['Query Key'] == key]
---> 24 ax.plot(subset['Start Time'], subset['Total Time (s)'], label=key)
25
26 # Extracting the database name from the filepath for the title
c:\Python38\lib\site-packages\pandas\core\frame.py in __getitem__(self, key)
3456 if self.columns.nlevels > 1:
3457 return self._getitem_multilevel(key)
-> 3458 indexer = self.columns.get_loc(key)
3459 if is_integer(indexer):
3460 indexer = [indexer]
c:\Python38\lib\site-packages\pandas\core\indexes\base.py in get_loc(self, key, method, tolerance)
3361 return self._engine.get_loc(casted_key)
3362 except KeyError as err:
-> 3363 raise KeyError(key) from err
3364
3365 if is_scalar(key) and isna(key) and not self.hasnans:
KeyError: 'Total Time (s)'
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment