From bed8d6e370f372efb3bc1a641a940e253e12bee6 Mon Sep 17 00:00:00 2001 From: cwy-p8d-u1 <sven-ove.haensel@stud.hs-hannover.de> Date: Thu, 8 Feb 2024 16:55:20 +0100 Subject: [PATCH] update docker for queries from txt --- code/build_push_container.sh | 2 +- .../cypher/{ => driver_1_7}/Dockerfile | 0 .../{ => driver_1_7}/cdm_cypher_queries.py | 2 +- .../queries/cypher/driver_latest/Dockerfile | 22 +++++ .../driver_latest/cdm_cypher_queries.py | 86 +++++++++++++++++++ code/eval/queries/sql/Dockerfile | 1 + code/eval/queries/sql/cdm_sql_queries.py | 13 +++ code/infrastructure/docker-compose.yml | 12 ++- 8 files changed, 134 insertions(+), 4 deletions(-) rename code/eval/queries/cypher/{ => driver_1_7}/Dockerfile (100%) rename code/eval/queries/cypher/{ => driver_1_7}/cdm_cypher_queries.py (97%) create mode 100644 code/eval/queries/cypher/driver_latest/Dockerfile create mode 100644 code/eval/queries/cypher/driver_latest/cdm_cypher_queries.py diff --git a/code/build_push_container.sh b/code/build_push_container.sh index e6b6597..f9bb09f 100644 --- a/code/build_push_container.sh +++ b/code/build_push_container.sh @@ -11,7 +11,7 @@ declare -A paths=( ["sub_pg"]="./infrastructure/streaming/clients/sub/postgres" ["sub_neo4j"]="./infrastructure/streaming/clients/sub/neo4j" ["pub_cdm"]="./infrastructure/streaming/clients/pub" - ["cypher_queries"]="./eval/queries/cypher" + ["cypher_queries"]="./eval/queries/cypher/driver_latest" ["sql_queries"]="./eval/queries/sql" ) diff --git a/code/eval/queries/cypher/Dockerfile b/code/eval/queries/cypher/driver_1_7/Dockerfile similarity index 100% rename from code/eval/queries/cypher/Dockerfile rename to code/eval/queries/cypher/driver_1_7/Dockerfile diff --git a/code/eval/queries/cypher/cdm_cypher_queries.py b/code/eval/queries/cypher/driver_1_7/cdm_cypher_queries.py similarity index 97% rename from code/eval/queries/cypher/cdm_cypher_queries.py rename to code/eval/queries/cypher/driver_1_7/cdm_cypher_queries.py index 05f1cc2..5fce58e 100644 --- a/code/eval/queries/cypher/cdm_cypher_queries.py +++ b/code/eval/queries/cypher/driver_1_7/cdm_cypher_queries.py @@ -65,7 +65,7 @@ logging.basicConfig(level=logging.INFO, # Initialize the Neo4j connection -driver = GraphDatabase.driver(uri=uri, auth=auth, encrypted=False) +driver = GraphDatabase.driver(uri=uri, auth=auth,encrypted=False) logging.info(f"Connected to db... {uri}") # Main execution block with graceful shutdown diff --git a/code/eval/queries/cypher/driver_latest/Dockerfile b/code/eval/queries/cypher/driver_latest/Dockerfile new file mode 100644 index 0000000..5fefbd1 --- /dev/null +++ b/code/eval/queries/cypher/driver_latest/Dockerfile @@ -0,0 +1,22 @@ +# Use an official Python runtime as a base image +FROM python:3.8 + +# Set working directory in the container +WORKDIR /app + +# install dependencies +RUN pip install neo4j +RUN pip install schedule + +ENV DOCKERIZE_VERSION v0.6.1 + +# Copy the Python script into the container +COPY cdm_cypher_queries.py /app/ + +# Set environment variable 'chunk_size' and 'number_of_insertions' +ENV db_port='7687' +ENV db_host='localhost' +ENV interval=15 +ENV query_file_path='/app/queries/queries.txt' + +CMD [ "python","cdm_cypher_queries.py" ] \ No newline at end of file diff --git a/code/eval/queries/cypher/driver_latest/cdm_cypher_queries.py b/code/eval/queries/cypher/driver_latest/cdm_cypher_queries.py new file mode 100644 index 0000000..1f3a269 --- /dev/null +++ b/code/eval/queries/cypher/driver_latest/cdm_cypher_queries.py @@ -0,0 +1,86 @@ +from neo4j import GraphDatabase +import schedule +import time +import logging +import os + +PORT = int(os.getenv('db_port',default=7687)) +HOST = str(os.getenv('db_host',default='localhost')) +INTERVAL = int(os.getenv('interval',default=15)) +log_directory = "log" +queries_file_path = str(os.getenv('query_file_path',default="C:\\Studium_MIN\\05_Masterarbeit\\thesis\\ma_code\\code\\infrastructure\\eval\\query\\neo4j\\queries.txt")) + +# Connection details +uri = f"bolt://{HOST}:{PORT}" +auth = ("","") # Use this for no authentication, or (USERNAME, PASSWORD) tuple for authentication + +cypher_queries = { + "anc": "MATCH path=(node:Host)-[*]->(ancestor) RETURN ancestor, length(path) AS distance_upstream ORDER BY distance_upstream;", + "desc": "MATCH path=(node:Host)<-[*]-(descendant) RETURN descendant, length(path) AS distance_downstream ORDER BY distance_downstream;", + "path": "MATCH path = shortestPath((a:Subject)-[*]-(b:Event)) where a._uuid='0CF2BB3E-36B8-11E8-BF66-D9AA8AFF4A69' RETURN path;", + "2-hop": "MATCH (startNode: Host)-[*1..2]-(neighborhood) RETURN neighborhood;" +} +def load_queries_from_file(file_path): + cypher_queries = {} + with open(file_path, 'r') as file: + for line in file: + if '|' in line: # Simple validation to ignore malformed lines + key, query = line.strip().split('|', 1) + cypher_queries[key] = query + return cypher_queries + +# Function to execute a specific Cypher query and log response time +def execute_and_log_query(query_key, query): + start_time = time.time() + with driver.session() as session: + result = session.run(query) + summary = result.consume() + end_time = time.time() + # Depending on the driver version, you might directly get the execution time from the summary + execution_time = summary.result_available_after + summary.result_consumed_after + + + logging.info(f"Results for Query {query_key}:") + logging.info(f"Python executed in: {end_time - start_time}s") + logging.info(f"DBMS answered in: {execution_time} ms\n") + +# Function to schedule and execute all queries +def schedule_and_execute_queries(): + for query_key, query in cypher_queries.items(): + + execute_and_log_query(query_key=query_key, query=query) + # Example scheduling: execute immediately then every minute for demonstration + # TODO: Create the schedule here + schedule.every(INTERVAL).minutes.do(execute_and_log_query, query_key=query_key, query=query) + # schedule.every(15).minutes.do(execute_and_log_query, query_key=query_key, query=query) + # schedule.every(30).minutes.do() + + while True: + schedule.run_pending() + time.sleep(1) + +# create log directory +if not os.path.exists(log_directory): + os.makedirs(log_directory) + +# Setup logging to file +formatted_time = time.strftime("%Y-%m-%d_%H-%M-%S") +logging.basicConfig(level=logging.INFO, + filename=f"{log_directory}/{formatted_time}_query_log.log", + filemode='a', # Append to the log file if it exists, 'w' to overwrite + format='%(asctime)s - %(levelname)s - %(message)s') + +cypher_queries = load_queries_from_file(queries_file_path) +# Initialize the Neo4j connection +driver = GraphDatabase.driver(uri=uri, auth=auth,encrypted=False) +logging.info(f"Connected to db... {uri}") + +# Main execution block with graceful shutdown +if __name__ == "__main__": + try: + schedule_and_execute_queries() + except KeyboardInterrupt: + logging.info("Script interrupted by user") + finally: + driver.close() + logging.info("Database connection closed.") diff --git a/code/eval/queries/sql/Dockerfile b/code/eval/queries/sql/Dockerfile index 8117f90..6b6ae5c 100644 --- a/code/eval/queries/sql/Dockerfile +++ b/code/eval/queries/sql/Dockerfile @@ -20,5 +20,6 @@ ENV PGUSER='localhost' ENV PGPASSWORD='localhost' ENV PGPORT='localhost' ENV interval=15 +ENV query_file_path=/app/queries CMD [ "python","cdm_sql_queries.py" ] \ No newline at end of file diff --git a/code/eval/queries/sql/cdm_sql_queries.py b/code/eval/queries/sql/cdm_sql_queries.py index 41efd00..520a6fa 100644 --- a/code/eval/queries/sql/cdm_sql_queries.py +++ b/code/eval/queries/sql/cdm_sql_queries.py @@ -11,6 +11,7 @@ username = os.getenv('PGUSER', 'postgres') password = os.getenv('PGPASSWORD', 'postgres') port_id = os.getenv('PGPORT', '5432') # Default to 5432 if not set interval = int(os.getenv('interval',default=15)) +queries_file_path = str(os.getenv('query_file_path',default="C:\\Studium_MIN\\05_Masterarbeit\\thesis\\ma_code\\code\\infrastructure\\eval\\query\\pg\\queries.txt")) def connect_to_db(): try: @@ -27,9 +28,21 @@ def connect_to_db(): except psycopg2.Error as e: logging.error(f"Error connecting to PostgreSQL database: {e}") return None + +def load_queries_from_file(file_path): + queries = {} + with open(file_path, 'r') as file: + content = file.read() + query_blocks = content.split('---') # Assuming '---' is used as separator between queries + for block in query_blocks: + if '|' in block: + key, query = block.strip().split('|', 1) + queries[key.strip()] = query.strip() + return queries def execute_queries(conn): try: + queries = load_queries_from_file(queries_file_path) # Create a cursor object cur = conn.cursor() diff --git a/code/infrastructure/docker-compose.yml b/code/infrastructure/docker-compose.yml index a9c38d3..a49c1a2 100644 --- a/code/infrastructure/docker-compose.yml +++ b/code/infrastructure/docker-compose.yml @@ -175,7 +175,7 @@ services: test: ["CMD","ls"] interval: 30s timeout: 10s - retries: 2 + retries: 5 start_period: 10s depends_on: ongdb: @@ -217,7 +217,7 @@ services: test: ["CMD","ls"] interval: 30s timeout: 10s - retries: 2 + retries: 5 start_period: 10s depends_on: neo4j: @@ -268,8 +268,10 @@ services: - PGPASSWORD=postgres - PGPORT=5432 - interval=1 + - query_file_path=/app/queries/queries.txt volumes: - ./eval/query/pg/log:/app/log + - ./eval/query/pg/queries:/app/queries depends_on: - pub @@ -280,8 +282,10 @@ services: - interval=1 - db_host=neo4j - db_port=7687 + - query_file_path=/app/queries/queries.txt volumes: - ./eval/query/neo4j/log:/app/log + - ./eval/query/neo4j/queries:/app/queries depends_on: - pub @@ -292,8 +296,10 @@ services: - interval=1 - db_host=ongdb - db_port=7687 + - query_file_path='/app/queries/queries.txt' volumes: - ./eval/query/ongdb/log:/app/log + - ./eval/query/ongdb/queries:/app/queries depends_on: - pub @@ -304,8 +310,10 @@ services: - interval=1 - db_host=memgraph - db_port=7687 + - query_file_path=/app/queries/queries.txt volumes: - ./eval/query/memgraph/log:/app/log + - ./eval/query/memgraph/queries:/app/queries depends_on: - pub -- GitLab