diff --git a/code/eval/queries/cypher/driver_1_7/Dockerfile b/code/eval/queries/cypher/driver_1_7/Dockerfile deleted file mode 100644 index 8eb44b6dc4486dc801d1e4ff6e30fd6d14e8ea2d..0000000000000000000000000000000000000000 --- a/code/eval/queries/cypher/driver_1_7/Dockerfile +++ /dev/null @@ -1,21 +0,0 @@ -# Use an official Python runtime as a base image -FROM python:3.8 - -# Set working directory in the container -WORKDIR /app - -# install dependencies -RUN pip install neo4j==1.7.0 -RUN pip install schedule - -ENV DOCKERIZE_VERSION v0.6.1 - -# Copy the Python script into the container -COPY cdm_cypher_queries.py /app/ - -# Set environment variable 'chunk_size' and 'number_of_insertions' -ENV db_port='7687' -ENV db_host='localhost' -ENV interval=15 - -CMD [ "python","cdm_cypher_queries.py" ] \ No newline at end of file diff --git a/code/eval/queries/cypher/driver_1_7/cypher_queries.py b/code/eval/queries/cypher/driver_1_7/cypher_queries.py deleted file mode 100644 index 484ae421942cde9ede61ac8adb017ca3ca7e61cc..0000000000000000000000000000000000000000 --- a/code/eval/queries/cypher/driver_1_7/cypher_queries.py +++ /dev/null @@ -1,122 +0,0 @@ -from neo4j import GraphDatabase -import schedule -import time -import logging -import os -import csv - -PORT = int(os.getenv('db_port',default=7687)) -HOST = str(os.getenv('db_host',default='localhost')) -INTERVAL = int(os.getenv('interval',default=15)) -log_directory = "log" -queries_file_path = str(os.getenv('query_file_path',default="C:\\Studium_MIN\\05_Masterarbeit\\thesis\\ma_code\\code\\infrastructure\\eval\\query\\neo4j\\queries.txt")) - -# Connection details -uri = f"bolt://{HOST}:{PORT}" -auth = ("","") # Use this for no authentication, or (USERNAME, PASSWORD) tuple for authentication - -# create log directory -if not os.path.exists(log_directory): - os.makedirs(log_directory) - - -# Setup logging to file -formatted_time = time.strftime("%Y-%m-%d_%H-%M-%S") -logging.basicConfig(level=logging.INFO, - filename=f"{log_directory}/{formatted_time}_query_log.log", - filemode='a', # Append to the log file if it exists, 'w' to overwrite - format='%(asctime)s - %(levelname)s - %(message)s') - -csv_log_file = open(f'{log_directory}/{formatted_time}_query_logs.csv', mode='a', newline='') - -cypher_queries = { - "anc": "MATCH path=(node:Host)-[*]->(ancestor) RETURN ancestor, length(path) AS distance_upstream ORDER BY distance_upstream;", - "desc": "MATCH path=(node:Host)<-[*]-(descendant) RETURN descendant, length(path) AS distance_downstream ORDER BY distance_downstream;", - "path": "MATCH path = shortestPath((a:Subject)-[*]-(b:Event)) where a._uuid='0CF2BB3E-36B8-11E8-BF66-D9AA8AFF4A69' RETURN path;", - "2-hop": "MATCH (startNode: Host)-[*1..2]-(neighborhood) RETURN neighborhood;" -} -def load_queries_from_file(file_path): - cypher_queries = {} - with open(file_path, 'r') as file: - for line in file: - if '|' in line: # Simple validation to ignore malformed lines - key, query = line.strip().split('|', 1) - cypher_queries[key] = query - return cypher_queries - -# Function to execute a specific Cypher query and log response time -# def execute_and_log_query(query_key, query): -# start_time = time.time() -# with driver.session() as session: -# result = session.run(query) -# summary = result.consume() -# end_time = time.time() -# # Check if the attributes are None and set them to 0 if they are -# result_available_after = summary.result_available_after if summary.result_available_after is not None else 0 -# result_consumed_after = summary.result_consumed_after if summary.result_consumed_after is not None else 0 -# execution_time = result_available_after + result_consumed_after - -# logging.info(f"Results for Query {query_key}:") -# logging.info(f"Python executed in: {end_time - start_time}s") -# if execution_time == 0: -# logging.warning(f"No internal DBMS metric available") -# else: -# logging.info(f"DBMS answered in: {execution_time} ms\n") - -def execute_and_log_query(query_key, query): - start_time = time.time() - with driver.session() as session: - result = session.run(query) - summary = result.consume() - end_time = time.time() - - # Check if the attributes are None and set them to 0 if they are - result_available_after = summary.result_available_after if summary.result_available_after is not None else 0 - result_consumed_after = summary.result_consumed_after if summary.result_consumed_after is not None else 0 - execution_time = result_available_after + result_consumed_after - - logging.info(f"Results for Query {query_key}:") - logging.info(f"Python executed in: {end_time - start_time}s") - if execution_time == 0: - logging.warning(f"No internal DBMS metric available") - else: - logging.info(f"DBMS answered in: {execution_time} ms\n") - - # Open the CSV file in append mode and write the log information - writer = csv.writer(csv_log_file) - # Write a header if the file is newly created or empty, else append the data - csv_log_file.seek(0, 2) # Move the cursor to the end of the file - if csv_log_file.tell() == 0: # If file is empty, write a header - writer.writerow(['Query Key', 'Start Time', 'End Time', 'Execution Time (ms)', 'Total Time (s)']) - writer.writerow([query_key, start_time, end_time, execution_time, end_time - start_time]) - -# Function to schedule and execute all queries -def schedule_and_execute_queries(): - for query_key, query in cypher_queries.items(): - - execute_and_log_query(query_key=query_key, query=query) - # Example scheduling: execute immediately then every minute for demonstration - # TODO: Create the schedule here - schedule.every(INTERVAL).minutes.do(execute_and_log_query, query_key=query_key, query=query) - # schedule.every(15).minutes.do(execute_and_log_query, query_key=query_key, query=query) - # schedule.every(30).minutes.do() - - while True: - schedule.run_pending() - time.sleep(1) - - -cypher_queries = load_queries_from_file(queries_file_path) -# Initialize the Neo4j connection -driver = GraphDatabase.driver(uri=uri, auth=auth,encrypted=False) -logging.info(f"Connected to db... {uri}") - -# Main execution block with graceful shutdown -if __name__ == "__main__": - try: - schedule_and_execute_queries() - except KeyboardInterrupt: - logging.info("Script interrupted by user") - finally: - driver.close() - logging.info("Database connection closed.") diff --git a/code/eval/queries/cypher/ongdb/cypher_queries.py b/code/eval/queries/cypher/ongdb/cypher_queries.py index cd4741ff07225b3e88ede71bbafe3ef022fa60db..d48bcdb2b419fc373ced5e91243af549b4e9d244 100644 --- a/code/eval/queries/cypher/ongdb/cypher_queries.py +++ b/code/eval/queries/cypher/ongdb/cypher_queries.py @@ -67,16 +67,23 @@ def execute_and_log_query(query_key, query): start_time = time.time() with driver.session() as session: result = session.run(query) + row_count= len(result.data()) summary = result.consume() end_time = time.time() # Check if the attributes are None and set them to 0 if they are result_available_after = summary.result_available_after if summary.result_available_after is not None else 0 result_consumed_after = summary.result_consumed_after if summary.result_consumed_after is not None else 0 + row_count = row_count if row_count is not None else 0 execution_time = result_available_after + result_consumed_after logging.info(f"Results for Query {query_key}:") logging.info(f"Python executed in: {end_time - start_time}s") + + if row_count == 0: + logging.info(f"Number of rows not available") + else: + logging.info(f"Number of rows {row_count}") if execution_time == 0: logging.warning(f"No internal DBMS metric available") else: @@ -87,8 +94,8 @@ def execute_and_log_query(query_key, query): # Write a header if the file is newly created or empty, else append the data csv_log_file.seek(0, 2) # Move the cursor to the end of the file if csv_log_file.tell() == 0: # If file is empty, write a header - writer.writerow(['Query Key', 'Start Time', 'End Time', 'Execution Time (ms)', 'Total Time (s)']) - writer.writerow([query_key, start_time, end_time, execution_time, end_time - start_time]) + writer.writerow(['Query Key', 'Start Time', 'End Time','Fetched nodes', 'Execution Time (ms)', 'Total Time (s)']) + writer.writerow([query_key, start_time, end_time, row_count, execution_time, end_time - start_time]) # Function to schedule and execute all queries def schedule_and_execute_queries():