Skip to content
Snippets Groups Projects
Commit 1465f48e authored by Sven-Ove Hänsel's avatar Sven-Ove Hänsel
Browse files

update chart generation

parent bc2d38b3
No related branches found
No related tags found
No related merge requests found
%% Cell type:code id: tags: %% Cell type:code id: tags:
   
``` python ``` python
import pandas as pd import pandas as pd
import plotly.express as px import plotly.express as px
import glob import glob
from datetime import datetime from datetime import datetime
import os import os
   
cur_folder = "240212_window_size_2500" cur_folder = "240212_window_size_1500"
# cur_folder = # cur_folder =
   
chart_directory = f'C:\\Studium_MIN\\05_Masterarbeit\\thesis\\ma_code\\code\\eval\\charts\\{cur_folder}' chart_directory = f'C:\\Studium_MIN\\05_Masterarbeit\\thesis\\ma_code\\code\\eval\\charts\\{cur_folder}'
if not os.path.exists(chart_directory): if not os.path.exists(chart_directory):
os.makedirs(chart_directory) os.makedirs(chart_directory)
   
cur_folder = "240212_window_size_1500" # cur_folder = "240212_window_size_1500"
   
# Assuming the `directory_path` variable is correctly set to the directory containing your CSV files # Assuming the `directory_path` variable is correctly set to the directory containing your CSV files
directory_path = f'C:\\Studium_MIN\\05_Masterarbeit\\thesis\\ma_code\\code\\eval\\experiments\\{cur_folder}' directory_path = f'C:\\Studium_MIN\\05_Masterarbeit\\thesis\\ma_code\\code\\eval\\experiments\\{cur_folder}'
   
   
# directory_path = r'C:\Studium_MIN\05_Masterarbeit\thesis\ma_code\code\eval\experiments\240212_first_try' # directory_path = r'C:\Studium_MIN\05_Masterarbeit\thesis\ma_code\code\eval\experiments\240212_first_try'
filepaths = glob.glob(directory_path + '/**/*.csv', recursive=True) # Use glob to find all csv files in subdirectories filepaths = glob.glob(directory_path + '/**/*.csv', recursive=True) # Use glob to find all csv files in subdirectories
   
for filepath in filepaths: for filepath in filepaths:
df = pd.read_csv(filepath) df = pd.read_csv(filepath)
   
# Ensure 'Start Time' is converted to a recognizable datetime format # Ensure 'Start Time' is converted to a recognizable datetime format
df['Start Time'] = pd.to_datetime(df['Start Time'], unit='s') df['Start Time'] = pd.to_datetime(df['Start Time'], unit='s')
   
# Extracting the database name from the filepath for the title # Extracting the database name from the filepath for the title
db_name = filepath.split('\\')[-2] # Assumes the database name is the second to last part of the path db_name = filepath.split('\\')[-2] # Assumes the database name is the second to last part of the path
   
# Use Plotly Express to plot # Use Plotly Express to plot
fig = px.line(df, x='Start Time', y='Total Time (s)', color='Query Key', fig = px.line(df, x='Start Time', y='Total Time (s)', color='Query Key',
title=f'Total Time per Query Key Over Time for {db_name}') title=f'Total Time per Query Key Over Time for {db_name}')
   
# Customize the tick format for the x-axis # Customize the tick format for the x-axis
fig.update_xaxes(tickformat='%d.%m. %H:%M:%S', title_text='Time') fig.update_xaxes(tickformat='%d.%m. %H:%M:%S', title_text='Time')
   
# Update y-axis label # Update y-axis label
fig.update_yaxes(title_text='Total Time (s)') fig.update_yaxes(title_text='Total Time (s)')
   
# Generating a timestamp for the filename # Generating a timestamp for the filename
timestamp = datetime.now().strftime('%Y-%m-%d_%H-%M-%S') timestamp = datetime.now().strftime('%Y-%m-%d_%H-%M-%S')
# Define the filename with timestamp and db_name for uniqueness # Define the filename with timestamp and db_name for uniqueness
filename = f"{chart_directory}/{db_name}" filename = f"{chart_directory}/{db_name}"
   
print(filename) print(filename)
# Saving the figure to an HTML file, which includes interactive plots # Saving the figure to an HTML file, which includes interactive plots
fig.write_html(file=f"{filename}.html") fig.write_html(file=f"{filename}.html")
fig.write_image(file=f"{filename}.jpg") fig.write_image(file=f"{filename}.jpg")
fig.write_image(file=f"{filename}.pdf") fig.write_image(file=f"{filename}.pdf")
   
# Note: Ensure that the working directory is writable or specify an absolute path for 'filename' # Note: Ensure that the working directory is writable or specify an absolute path for 'filename'
``` ```
   
%% Output %% Output
   
C:\Studium_MIN\05_Masterarbeit\thesis\ma_code\code\eval\charts\240212_window_size_2500/memgraph ---------------------------------------------------------------------------
C:\Studium_MIN\05_Masterarbeit\thesis\ma_code\code\eval\charts\240212_window_size_2500/neo4j KeyError Traceback (most recent call last)
C:\Studium_MIN\05_Masterarbeit\thesis\ma_code\code\eval\charts\240212_window_size_2500/ongdb c:\Python38\lib\site-packages\pandas\core\indexes\base.py in get_loc(self, key, method, tolerance)
C:\Studium_MIN\05_Masterarbeit\thesis\ma_code\code\eval\charts\240212_window_size_2500/pg 3360 try:
-> 3361 return self._engine.get_loc(casted_key)
3362 except KeyError as err:
c:\Python38\lib\site-packages\pandas\_libs\index.pyx in pandas._libs.index.IndexEngine.get_loc()
c:\Python38\lib\site-packages\pandas\_libs\index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas\_libs\hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
pandas\_libs\hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
KeyError: 'Start Time'
The above exception was the direct cause of the following exception:
KeyError Traceback (most recent call last)
C:\Users\SVEN-O~1\AppData\Local\Temp/ipykernel_12360/3835503201.py in <module>
25
26 # Ensure 'Start Time' is converted to a recognizable datetime format
---> 27 df['Start Time'] = pd.to_datetime(df['Start Time'], unit='s')
28
29 # Extracting the database name from the filepath for the title
c:\Python38\lib\site-packages\pandas\core\frame.py in __getitem__(self, key)
3456 if self.columns.nlevels > 1:
3457 return self._getitem_multilevel(key)
-> 3458 indexer = self.columns.get_loc(key)
3459 if is_integer(indexer):
3460 indexer = [indexer]
c:\Python38\lib\site-packages\pandas\core\indexes\base.py in get_loc(self, key, method, tolerance)
3361 return self._engine.get_loc(casted_key)
3362 except KeyError as err:
-> 3363 raise KeyError(key) from err
3364
3365 if is_scalar(key) and isna(key) and not self.hasnans:
KeyError: 'Start Time'
   
%% Cell type:code id: tags: %% Cell type:code id: tags:
   
``` python ``` python
import pandas as pd import pandas as pd
import matplotlib.pyplot as plt import matplotlib.pyplot as plt
import matplotlib.dates as mdates import matplotlib.dates as mdates
import plotly.express as px import plotly.express as px
import glob import glob
from datetime import datetime from datetime import datetime
   
filepaths = [ filepaths = [
r'C:\Studium_MIN\05_Masterarbeit\thesis\ma_code\code\eval\experiments\240212_first_try\query_logs\memgraph\2024-02-11_11-58-08_query_logs.csv', r'C:\Studium_MIN\05_Masterarbeit\thesis\ma_code\code\eval\experiments\240212_first_try\query_logs\memgraph\2024-02-11_11-58-08_query_logs.csv',
r'C:\Studium_MIN\05_Masterarbeit\thesis\ma_code\code\eval\experiments\240212_first_try\query_logs\neo4j\2024-02-11_11-58-08_query_logs.csv', r'C:\Studium_MIN\05_Masterarbeit\thesis\ma_code\code\eval\experiments\240212_first_try\query_logs\neo4j\2024-02-11_11-58-08_query_logs.csv',
r'C:\Studium_MIN\05_Masterarbeit\thesis\ma_code\code\eval\experiments\240212_first_try\query_logs\ongdb\2024-02-11_11-58-08_query_logs.csv', r'C:\Studium_MIN\05_Masterarbeit\thesis\ma_code\code\eval\experiments\240212_first_try\query_logs\ongdb\2024-02-11_11-58-08_query_logs.csv',
r'C:\Studium_MIN\05_Masterarbeit\thesis\ma_code\code\eval\experiments\240212_first_try\query_logs\pg\2024-02-11_11-58-08_query_execution_logs.csv' r'C:\Studium_MIN\05_Masterarbeit\thesis\ma_code\code\eval\experiments\240212_first_try\query_logs\pg\2024-02-11_11-58-08_query_execution_logs.csv'
] ]
directory_path = r'C:\Studium_MIN\05_Masterarbeit\thesis\ma_code\code\eval\experiments\<cur_experiment_folder>' directory_path = r'C:\Studium_MIN\05_Masterarbeit\thesis\ma_code\code\eval\experiments\<cur_experiment_folder>'
filepaths = glob.glob(directory_path + '/**/*.csv', recursive=True) # Use glob to find all csv files in subdirectories filepaths = glob.glob(directory_path + '/**/*.csv', recursive=True) # Use glob to find all csv files in subdirectories
   
   
for filepath in filepaths: for filepath in filepaths:
   
df = pd.read_csv(filepath) df = pd.read_csv(filepath)
   
# Ensure 'Start Time' is converted to a recognizable datetime format # Ensure 'Start Time' is converted to a recognizable datetime format
df['Start Time'] = pd.to_datetime(df['Start Time'], unit='s') df['Start Time'] = pd.to_datetime(df['Start Time'], unit='s')
   
# # Extracting the database name from the filepath for the title # # Extracting the database name from the filepath for the title
db_name = filepath.split('\\')[-2] # Assumes the database name is the second to last part of the path db_name = filepath.split('\\')[-2] # Assumes the database name is the second to last part of the path
   
# Use Plotly Express to plot # Use Plotly Express to plot
fig = px.line(df, x='Start Time', y='Total Time (s)', color='Query Key', fig = px.line(df, x='Start Time', y='Total Time (s)', color='Query Key',
title=f'Total Time per Query Key Over Time for {db_name}') title=f'Total Time per Query Key Over Time for {db_name}')
   
# Customize the tick format for the x-axis to show date and time # Customize the tick format for the x-axis to show date and time
# You can adjust the format as per your needs. Here are some examples: # You can adjust the format as per your needs. Here are some examples:
# '%Y-%m-%d %H:%M:%S' for "Year-Month-Day Hour:Minute:Second" # '%Y-%m-%d %H:%M:%S' for "Year-Month-Day Hour:Minute:Second"
# '%H:%M:%S' for "Hour:Minute:Second" if you want to focus on time of day # '%H:%M:%S' for "Hour:Minute:Second" if you want to focus on time of day
# '%Y-%m-%d' for "Year-Month-Day" if the date is more important # '%Y-%m-%d' for "Year-Month-Day" if the date is more important
fig.update_xaxes(tickformat='%d.%m. %H:%M:%S', title_text='Time') fig.update_xaxes(tickformat='%d.%m. %H:%M:%S', title_text='Time')
   
   
# ax.set_title(f'Total Time per Query Key Over Time for {db_name}') # ax.set_title(f'Total Time per Query Key Over Time for {db_name}')
   
# Update y-axis label # Update y-axis label
fig.update_yaxes(title_text='Total Time (s)') fig.update_yaxes(title_text='Total Time (s)')
   
# Show the plot # Show the plot
fig.show() fig.show()
``` ```
   
%% Output %% Output
   
   
   
   
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment