Skip to content
Snippets Groups Projects
Commit 1aff93cd authored by Sven-Ove Hänsel's avatar Sven-Ove Hänsel
Browse files

update chart generation

parent fafe1804
No related branches found
No related tags found
No related merge requests found
%% Cell type:code id: tags:
 
``` python
import pandas as pd
import plotly.express as px
import glob
from datetime import datetime
import os
 
cur_folder = "240212_window_size_2500"
cur_folder = "240212_window_size_1500"
# cur_folder =
 
chart_directory = f'C:\\Studium_MIN\\05_Masterarbeit\\thesis\\ma_code\\code\\eval\\charts\\{cur_folder}'
if not os.path.exists(chart_directory):
os.makedirs(chart_directory)
 
cur_folder = "240212_window_size_1500"
# cur_folder = "240212_window_size_1500"
 
# Assuming the `directory_path` variable is correctly set to the directory containing your CSV files
directory_path = f'C:\\Studium_MIN\\05_Masterarbeit\\thesis\\ma_code\\code\\eval\\experiments\\{cur_folder}'
 
 
# directory_path = r'C:\Studium_MIN\05_Masterarbeit\thesis\ma_code\code\eval\experiments\240212_first_try'
filepaths = glob.glob(directory_path + '/**/*.csv', recursive=True) # Use glob to find all csv files in subdirectories
 
for filepath in filepaths:
df = pd.read_csv(filepath)
 
# Ensure 'Start Time' is converted to a recognizable datetime format
df['Start Time'] = pd.to_datetime(df['Start Time'], unit='s')
 
# Extracting the database name from the filepath for the title
db_name = filepath.split('\\')[-2] # Assumes the database name is the second to last part of the path
 
# Use Plotly Express to plot
fig = px.line(df, x='Start Time', y='Total Time (s)', color='Query Key',
title=f'Total Time per Query Key Over Time for {db_name}')
 
# Customize the tick format for the x-axis
fig.update_xaxes(tickformat='%d.%m. %H:%M:%S', title_text='Time')
 
# Update y-axis label
fig.update_yaxes(title_text='Total Time (s)')
 
# Generating a timestamp for the filename
timestamp = datetime.now().strftime('%Y-%m-%d_%H-%M-%S')
# Define the filename with timestamp and db_name for uniqueness
filename = f"{chart_directory}/{db_name}"
 
print(filename)
# Saving the figure to an HTML file, which includes interactive plots
fig.write_html(file=f"{filename}.html")
fig.write_image(file=f"{filename}.jpg")
fig.write_image(file=f"{filename}.pdf")
 
# Note: Ensure that the working directory is writable or specify an absolute path for 'filename'
```
 
%% Output
 
C:\Studium_MIN\05_Masterarbeit\thesis\ma_code\code\eval\charts\240212_window_size_2500/memgraph
C:\Studium_MIN\05_Masterarbeit\thesis\ma_code\code\eval\charts\240212_window_size_2500/neo4j
C:\Studium_MIN\05_Masterarbeit\thesis\ma_code\code\eval\charts\240212_window_size_2500/ongdb
C:\Studium_MIN\05_Masterarbeit\thesis\ma_code\code\eval\charts\240212_window_size_2500/pg
---------------------------------------------------------------------------
KeyError Traceback (most recent call last)
c:\Python38\lib\site-packages\pandas\core\indexes\base.py in get_loc(self, key, method, tolerance)
3360 try:
-> 3361 return self._engine.get_loc(casted_key)
3362 except KeyError as err:
c:\Python38\lib\site-packages\pandas\_libs\index.pyx in pandas._libs.index.IndexEngine.get_loc()
c:\Python38\lib\site-packages\pandas\_libs\index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas\_libs\hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
pandas\_libs\hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
KeyError: 'Start Time'
The above exception was the direct cause of the following exception:
KeyError Traceback (most recent call last)
C:\Users\SVEN-O~1\AppData\Local\Temp/ipykernel_12360/3835503201.py in <module>
25
26 # Ensure 'Start Time' is converted to a recognizable datetime format
---> 27 df['Start Time'] = pd.to_datetime(df['Start Time'], unit='s')
28
29 # Extracting the database name from the filepath for the title
c:\Python38\lib\site-packages\pandas\core\frame.py in __getitem__(self, key)
3456 if self.columns.nlevels > 1:
3457 return self._getitem_multilevel(key)
-> 3458 indexer = self.columns.get_loc(key)
3459 if is_integer(indexer):
3460 indexer = [indexer]
c:\Python38\lib\site-packages\pandas\core\indexes\base.py in get_loc(self, key, method, tolerance)
3361 return self._engine.get_loc(casted_key)
3362 except KeyError as err:
-> 3363 raise KeyError(key) from err
3364
3365 if is_scalar(key) and isna(key) and not self.hasnans:
KeyError: 'Start Time'
 
%% Cell type:code id: tags:
 
``` python
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import plotly.express as px
import glob
from datetime import datetime
 
filepaths = [
r'C:\Studium_MIN\05_Masterarbeit\thesis\ma_code\code\eval\experiments\240212_first_try\query_logs\memgraph\2024-02-11_11-58-08_query_logs.csv',
r'C:\Studium_MIN\05_Masterarbeit\thesis\ma_code\code\eval\experiments\240212_first_try\query_logs\neo4j\2024-02-11_11-58-08_query_logs.csv',
r'C:\Studium_MIN\05_Masterarbeit\thesis\ma_code\code\eval\experiments\240212_first_try\query_logs\ongdb\2024-02-11_11-58-08_query_logs.csv',
r'C:\Studium_MIN\05_Masterarbeit\thesis\ma_code\code\eval\experiments\240212_first_try\query_logs\pg\2024-02-11_11-58-08_query_execution_logs.csv'
]
directory_path = r'C:\Studium_MIN\05_Masterarbeit\thesis\ma_code\code\eval\experiments\<cur_experiment_folder>'
filepaths = glob.glob(directory_path + '/**/*.csv', recursive=True) # Use glob to find all csv files in subdirectories
 
 
for filepath in filepaths:
 
df = pd.read_csv(filepath)
 
# Ensure 'Start Time' is converted to a recognizable datetime format
df['Start Time'] = pd.to_datetime(df['Start Time'], unit='s')
 
# # Extracting the database name from the filepath for the title
db_name = filepath.split('\\')[-2] # Assumes the database name is the second to last part of the path
 
# Use Plotly Express to plot
fig = px.line(df, x='Start Time', y='Total Time (s)', color='Query Key',
title=f'Total Time per Query Key Over Time for {db_name}')
 
# Customize the tick format for the x-axis to show date and time
# You can adjust the format as per your needs. Here are some examples:
# '%Y-%m-%d %H:%M:%S' for "Year-Month-Day Hour:Minute:Second"
# '%H:%M:%S' for "Hour:Minute:Second" if you want to focus on time of day
# '%Y-%m-%d' for "Year-Month-Day" if the date is more important
fig.update_xaxes(tickformat='%d.%m. %H:%M:%S', title_text='Time')
 
 
# ax.set_title(f'Total Time per Query Key Over Time for {db_name}')
 
# Update y-axis label
fig.update_yaxes(title_text='Total Time (s)')
 
# Show the plot
fig.show()
```
 
%% Output
 
 
 
 
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment