update chart generation

1465f48e · Sven-Ove Hänsel · bc2d38b3 · 1465f48e
Commit 1465f48e authored 1 year ago by Sven-Ove Hänsel
--- a/code/eval/charts/charts.ipynb
+++ b/code/eval/charts/charts.ipynb
@@ -2,17 +2,28 @@
 "cells": [
  {
   "cell_type": "code",
-   "execution_count": 7,
+   "execution_count": 1,
   "metadata": {},
   "outputs": [
    {
-     "name": "stdout",
+     "ename": "KeyError",
-     "output_type": "stream",
+     "evalue": "'Start Time'",
-     "text": [
+     "output_type": "error",
-      "C:\\Studium_MIN\\05_Masterarbeit\\thesis\\ma_code\\code\\eval\\charts\\240212_window_size_2500/memgraph\n",
+     "traceback": [
-      "C:\\Studium_MIN\\05_Masterarbeit\\thesis\\ma_code\\code\\eval\\charts\\240212_window_size_2500/neo4j\n",
+      "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
-      "C:\\Studium_MIN\\05_Masterarbeit\\thesis\\ma_code\\code\\eval\\charts\\240212_window_size_2500/ongdb\n",
+      "\u001b[1;31mKeyError\u001b[0m                                  Traceback (most recent call last)",
-      "C:\\Studium_MIN\\05_Masterarbeit\\thesis\\ma_code\\code\\eval\\charts\\240212_window_size_2500/pg\n"
+      "\u001b[1;32mc:\\Python38\\lib\\site-packages\\pandas\\core\\indexes\\base.py\u001b[0m in \u001b[0;36mget_loc\u001b[1;34m(self, key, method, tolerance)\u001b[0m\n\u001b[0;32m   3360\u001b[0m             \u001b[1;32mtry\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m-> 3361\u001b[1;33m                 \u001b[1;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_engine\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mget_loc\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mcasted_key\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m   3362\u001b[0m             \u001b[1;32mexcept\u001b[0m \u001b[0mKeyError\u001b[0m \u001b[1;32mas\u001b[0m \u001b[0merr\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
+      "\u001b[1;32mc:\\Python38\\lib\\site-packages\\pandas\\_libs\\index.pyx\u001b[0m in \u001b[0;36mpandas._libs.index.IndexEngine.get_loc\u001b[1;34m()\u001b[0m\n",
+      "\u001b[1;32mc:\\Python38\\lib\\site-packages\\pandas\\_libs\\index.pyx\u001b[0m in \u001b[0;36mpandas._libs.index.IndexEngine.get_loc\u001b[1;34m()\u001b[0m\n",
+      "\u001b[1;32mpandas\\_libs\\hashtable_class_helper.pxi\u001b[0m in \u001b[0;36mpandas._libs.hashtable.PyObjectHashTable.get_item\u001b[1;34m()\u001b[0m\n",
+      "\u001b[1;32mpandas\\_libs\\hashtable_class_helper.pxi\u001b[0m in \u001b[0;36mpandas._libs.hashtable.PyObjectHashTable.get_item\u001b[1;34m()\u001b[0m\n",
+      "\u001b[1;31mKeyError\u001b[0m: 'Start Time'",
+      "\nThe above exception was the direct cause of the following exception:\n",
+      "\u001b[1;31mKeyError\u001b[0m                                  Traceback (most recent call last)",
+      "\u001b[1;32mC:\\Users\\SVEN-O~1\\AppData\\Local\\Temp/ipykernel_12360/3835503201.py\u001b[0m in \u001b[0;36m<module>\u001b[1;34m\u001b[0m\n\u001b[0;32m     25\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m     26\u001b[0m     \u001b[1;31m# Ensure 'Start Time' is converted to a recognizable datetime format\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 27\u001b[1;33m     \u001b[0mdf\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;34m'Start Time'\u001b[0m\u001b[1;33m]\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mpd\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mto_datetime\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mdf\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;34m'Start Time'\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0munit\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;34m's'\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m     28\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m     29\u001b[0m     \u001b[1;31m# Extracting the database name from the filepath for the title\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
+      "\u001b[1;32mc:\\Python38\\lib\\site-packages\\pandas\\core\\frame.py\u001b[0m in \u001b[0;36m__getitem__\u001b[1;34m(self, key)\u001b[0m\n\u001b[0;32m   3456\u001b[0m             \u001b[1;32mif\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mcolumns\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mnlevels\u001b[0m \u001b[1;33m>\u001b[0m \u001b[1;36m1\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m   3457\u001b[0m                 \u001b[1;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_getitem_multilevel\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m-> 3458\u001b[1;33m             \u001b[0mindexer\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mcolumns\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mget_loc\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m   3459\u001b[0m             \u001b[1;32mif\u001b[0m \u001b[0mis_integer\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mindexer\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m   3460\u001b[0m                 \u001b[0mindexer\u001b[0m \u001b[1;33m=\u001b[0m \u001b[1;33m[\u001b[0m\u001b[0mindexer\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
+      "\u001b[1;32mc:\\Python38\\lib\\site-packages\\pandas\\core\\indexes\\base.py\u001b[0m in \u001b[0;36mget_loc\u001b[1;34m(self, key, method, tolerance)\u001b[0m\n\u001b[0;32m   3361\u001b[0m                 \u001b[1;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_engine\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mget_loc\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mcasted_key\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m   3362\u001b[0m             \u001b[1;32mexcept\u001b[0m \u001b[0mKeyError\u001b[0m \u001b[1;32mas\u001b[0m \u001b[0merr\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m-> 3363\u001b[1;33m                 \u001b[1;32mraise\u001b[0m \u001b[0mKeyError\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[1;33m)\u001b[0m \u001b[1;32mfrom\u001b[0m \u001b[0merr\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m   3364\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m   3365\u001b[0m         \u001b[1;32mif\u001b[0m \u001b[0mis_scalar\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[1;33m)\u001b[0m \u001b[1;32mand\u001b[0m \u001b[0misna\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[1;33m)\u001b[0m \u001b[1;32mand\u001b[0m \u001b[1;32mnot\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mhasnans\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
+      "\u001b[1;31mKeyError\u001b[0m: 'Start Time'"
     ]
    }
   ],
@@ -23,14 +34,14 @@
    "from datetime import datetime\n",
    "import os\n",
    "\n",
-    "cur_folder = \"240212_window_size_2500\"\n",
+    "cur_folder = \"240212_window_size_1500\"\n",
    "# cur_folder = \n",
    "\n",
    "chart_directory = f'C:\\\\Studium_MIN\\\\05_Masterarbeit\\\\thesis\\\\ma_code\\\\code\\\\eval\\\\charts\\\\{cur_folder}'\n",
    "if not os.path.exists(chart_directory):\n",
    "    os.makedirs(chart_directory)\n",
    "\n",
-    "cur_folder = \"240212_window_size_1500\"\n",
+    "# cur_folder = \"240212_window_size_1500\"\n",
    "\n",
    "# Assuming the `directory_path` variable is correctly set to the directory containing your CSV files\n",
    "directory_path = f'C:\\\\Studium_MIN\\\\05_Masterarbeit\\\\thesis\\\\ma_code\\\\code\\\\eval\\\\experiments\\\\{cur_folder}'\n",

 %% Cell type:code id: tags:
 ``` python
 import pandas as pd
 import plotly.express as px
 import glob
 from datetime import datetime
 import os
-cur_folder = "240212_window_size_2500"
+cur_folder = "240212_window_size_1500"
 # cur_folder =
 chart_directory = f'C:\\Studium_MIN\\05_Masterarbeit\\thesis\\ma_code\\code\\eval\\charts\\{cur_folder}'
 if not os.path.exists(chart_directory):
    os.makedirs(chart_directory)
-cur_folder = "240212_window_size_1500"
+# cur_folder = "240212_window_size_1500"
 # Assuming the `directory_path` variable is correctly set to the directory containing your CSV files
 directory_path = f'C:\\Studium_MIN\\05_Masterarbeit\\thesis\\ma_code\\code\\eval\\experiments\\{cur_folder}'
 # directory_path = r'C:\Studium_MIN\05_Masterarbeit\thesis\ma_code\code\eval\experiments\240212_first_try'
 filepaths = glob.glob(directory_path + '/**/*.csv', recursive=True)  # Use glob to find all csv files in subdirectories
 for filepath in filepaths:
    df = pd.read_csv(filepath)
    # Ensure 'Start Time' is converted to a recognizable datetime format
    df['Start Time'] = pd.to_datetime(df['Start Time'], unit='s')
    # Extracting the database name from the filepath for the title
    db_name = filepath.split('\\')[-2]  # Assumes the database name is the second to last part of the path
    # Use Plotly Express to plot
    fig = px.line(df, x='Start Time', y='Total Time (s)', color='Query Key',
                  title=f'Total Time per Query Key Over Time for {db_name}')
    # Customize the tick format for the x-axis
    fig.update_xaxes(tickformat='%d.%m. %H:%M:%S', title_text='Time')
    # Update y-axis label
    fig.update_yaxes(title_text='Total Time (s)')
    # Generating a timestamp for the filename
    timestamp = datetime.now().strftime('%Y-%m-%d_%H-%M-%S')
    # Define the filename with timestamp and db_name for uniqueness
    filename = f"{chart_directory}/{db_name}"
    print(filename)
    # Saving the figure to an HTML file, which includes interactive plots
    fig.write_html(file=f"{filename}.html")
    fig.write_image(file=f"{filename}.jpg")
    fig.write_image(file=f"{filename}.pdf")
 # Note: Ensure that the working directory is writable or specify an absolute path for 'filename'
 ```
 %% Output
-    C:\Studium_MIN\05_Masterarbeit\thesis\ma_code\code\eval\charts\240212_window_size_2500/memgraph
+    ---------------------------------------------------------------------------
-    C:\Studium_MIN\05_Masterarbeit\thesis\ma_code\code\eval\charts\240212_window_size_2500/neo4j
+    KeyError                                  Traceback (most recent call last)
-    C:\Studium_MIN\05_Masterarbeit\thesis\ma_code\code\eval\charts\240212_window_size_2500/ongdb
+    c:\Python38\lib\site-packages\pandas\core\indexes\base.py in get_loc(self, key, method, tolerance)
-    C:\Studium_MIN\05_Masterarbeit\thesis\ma_code\code\eval\charts\240212_window_size_2500/pg
+       3360             try:
+    -> 3361                 return self._engine.get_loc(casted_key)
+       3362             except KeyError as err:
+    c:\Python38\lib\site-packages\pandas\_libs\index.pyx in pandas._libs.index.IndexEngine.get_loc()
+    c:\Python38\lib\site-packages\pandas\_libs\index.pyx in pandas._libs.index.IndexEngine.get_loc()
+    pandas\_libs\hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
+    pandas\_libs\hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
+    KeyError: 'Start Time'
+The above exception was the direct cause of the following exception:
+    KeyError                                  Traceback (most recent call last)
+    C:\Users\SVEN-O~1\AppData\Local\Temp/ipykernel_12360/3835503201.py in <module>
+         25
+         26     # Ensure 'Start Time' is converted to a recognizable datetime format
+    ---> 27     df['Start Time'] = pd.to_datetime(df['Start Time'], unit='s')
+         28
+         29     # Extracting the database name from the filepath for the title
+    c:\Python38\lib\site-packages\pandas\core\frame.py in __getitem__(self, key)
+       3456             if self.columns.nlevels > 1:
+       3457                 return self._getitem_multilevel(key)
+    -> 3458             indexer = self.columns.get_loc(key)
+       3459             if is_integer(indexer):
+       3460                 indexer = [indexer]
+    c:\Python38\lib\site-packages\pandas\core\indexes\base.py in get_loc(self, key, method, tolerance)
+       3361                 return self._engine.get_loc(casted_key)
+       3362             except KeyError as err:
+    -> 3363                 raise KeyError(key) from err
+       3364
+       3365         if is_scalar(key) and isna(key) and not self.hasnans:
+    KeyError: 'Start Time'
 %% Cell type:code id: tags:
 ``` python
 import pandas as pd
 import matplotlib.pyplot as plt
 import matplotlib.dates as mdates
 import plotly.express as px
 import glob
 from datetime import datetime
 filepaths = [
    r'C:\Studium_MIN\05_Masterarbeit\thesis\ma_code\code\eval\experiments\240212_first_try\query_logs\memgraph\2024-02-11_11-58-08_query_logs.csv',
    r'C:\Studium_MIN\05_Masterarbeit\thesis\ma_code\code\eval\experiments\240212_first_try\query_logs\neo4j\2024-02-11_11-58-08_query_logs.csv',
    r'C:\Studium_MIN\05_Masterarbeit\thesis\ma_code\code\eval\experiments\240212_first_try\query_logs\ongdb\2024-02-11_11-58-08_query_logs.csv',
    r'C:\Studium_MIN\05_Masterarbeit\thesis\ma_code\code\eval\experiments\240212_first_try\query_logs\pg\2024-02-11_11-58-08_query_execution_logs.csv'
 ]
 directory_path = r'C:\Studium_MIN\05_Masterarbeit\thesis\ma_code\code\eval\experiments\<cur_experiment_folder>'
 filepaths = glob.glob(directory_path + '/**/*.csv', recursive=True)  # Use glob to find all csv files in subdirectories
 for filepath in filepaths:
    df = pd.read_csv(filepath)
    # Ensure 'Start Time' is converted to a recognizable datetime format
    df['Start Time'] = pd.to_datetime(df['Start Time'], unit='s')
    # # Extracting the database name from the filepath for the title
    db_name = filepath.split('\\')[-2]  # Assumes the database name is the second to last part of the path
    # Use Plotly Express to plot
    fig = px.line(df, x='Start Time', y='Total Time (s)', color='Query Key',
              title=f'Total Time per Query Key Over Time for {db_name}')
    # Customize the tick format for the x-axis to show date and time
    # You can adjust the format as per your needs. Here are some examples:
    # '%Y-%m-%d %H:%M:%S' for "Year-Month-Day Hour:Minute:Second"
    # '%H:%M:%S' for "Hour:Minute:Second" if you want to focus on time of day
    # '%Y-%m-%d' for "Year-Month-Day" if the date is more important
    fig.update_xaxes(tickformat='%d.%m. %H:%M:%S', title_text='Time')
    # ax.set_title(f'Total Time per Query Key Over Time for {db_name}')
    # Update y-axis label
    fig.update_yaxes(title_text='Total Time (s)')
    # Show the plot
    fig.show()
 ```
 %% Output