diff --git a/code/build_push_container.sh b/code/build_push_container.sh index e6b6597bd537a859bf81b329bec0dcd8ce96461e..f9bb09fa295ee5b6d5c2419bb397ff5250d29d0f 100644 --- a/code/build_push_container.sh +++ b/code/build_push_container.sh @@ -11,7 +11,7 @@ declare -A paths=( ["sub_pg"]="./infrastructure/streaming/clients/sub/postgres" ["sub_neo4j"]="./infrastructure/streaming/clients/sub/neo4j" ["pub_cdm"]="./infrastructure/streaming/clients/pub" - ["cypher_queries"]="./eval/queries/cypher" + ["cypher_queries"]="./eval/queries/cypher/driver_latest" ["sql_queries"]="./eval/queries/sql" ) diff --git a/code/eval/queries/cypher/Dockerfile b/code/eval/queries/cypher/driver_1_7/Dockerfile similarity index 100% rename from code/eval/queries/cypher/Dockerfile rename to code/eval/queries/cypher/driver_1_7/Dockerfile diff --git a/code/eval/queries/cypher/cdm_cypher_queries.py b/code/eval/queries/cypher/driver_1_7/cdm_cypher_queries.py similarity index 97% rename from code/eval/queries/cypher/cdm_cypher_queries.py rename to code/eval/queries/cypher/driver_1_7/cdm_cypher_queries.py index 05f1cc295586a9f448e28dc771934e49bfea34c8..5fce58e7032332fd8a27c88274700bd3e0c2b1e1 100644 --- a/code/eval/queries/cypher/cdm_cypher_queries.py +++ b/code/eval/queries/cypher/driver_1_7/cdm_cypher_queries.py @@ -65,7 +65,7 @@ logging.basicConfig(level=logging.INFO, # Initialize the Neo4j connection -driver = GraphDatabase.driver(uri=uri, auth=auth, encrypted=False) +driver = GraphDatabase.driver(uri=uri, auth=auth,encrypted=False) logging.info(f"Connected to db... {uri}") # Main execution block with graceful shutdown diff --git a/code/eval/queries/cypher/driver_latest/Dockerfile b/code/eval/queries/cypher/driver_latest/Dockerfile new file mode 100644 index 0000000000000000000000000000000000000000..5fefbd1c3a953846b01acd43698670c825d2e24e --- /dev/null +++ b/code/eval/queries/cypher/driver_latest/Dockerfile @@ -0,0 +1,22 @@ +# Use an official Python runtime as a base image +FROM python:3.8 + +# Set working directory in the container +WORKDIR /app + +# install dependencies +RUN pip install neo4j +RUN pip install schedule + +ENV DOCKERIZE_VERSION v0.6.1 + +# Copy the Python script into the container +COPY cdm_cypher_queries.py /app/ + +# Set environment variable 'chunk_size' and 'number_of_insertions' +ENV db_port='7687' +ENV db_host='localhost' +ENV interval=15 +ENV query_file_path='/app/queries/queries.txt' + +CMD [ "python","cdm_cypher_queries.py" ] \ No newline at end of file diff --git a/code/eval/queries/cypher/driver_latest/cdm_cypher_queries.py b/code/eval/queries/cypher/driver_latest/cdm_cypher_queries.py new file mode 100644 index 0000000000000000000000000000000000000000..1f3a269643ef64ded83a8f4159d4c2586f9fee1c --- /dev/null +++ b/code/eval/queries/cypher/driver_latest/cdm_cypher_queries.py @@ -0,0 +1,86 @@ +from neo4j import GraphDatabase +import schedule +import time +import logging +import os + +PORT = int(os.getenv('db_port',default=7687)) +HOST = str(os.getenv('db_host',default='localhost')) +INTERVAL = int(os.getenv('interval',default=15)) +log_directory = "log" +queries_file_path = str(os.getenv('query_file_path',default="C:\\Studium_MIN\\05_Masterarbeit\\thesis\\ma_code\\code\\infrastructure\\eval\\query\\neo4j\\queries.txt")) + +# Connection details +uri = f"bolt://{HOST}:{PORT}" +auth = ("","") # Use this for no authentication, or (USERNAME, PASSWORD) tuple for authentication + +cypher_queries = { + "anc": "MATCH path=(node:Host)-[*]->(ancestor) RETURN ancestor, length(path) AS distance_upstream ORDER BY distance_upstream;", + "desc": "MATCH path=(node:Host)<-[*]-(descendant) RETURN descendant, length(path) AS distance_downstream ORDER BY distance_downstream;", + "path": "MATCH path = shortestPath((a:Subject)-[*]-(b:Event)) where a._uuid='0CF2BB3E-36B8-11E8-BF66-D9AA8AFF4A69' RETURN path;", + "2-hop": "MATCH (startNode: Host)-[*1..2]-(neighborhood) RETURN neighborhood;" +} +def load_queries_from_file(file_path): + cypher_queries = {} + with open(file_path, 'r') as file: + for line in file: + if '|' in line: # Simple validation to ignore malformed lines + key, query = line.strip().split('|', 1) + cypher_queries[key] = query + return cypher_queries + +# Function to execute a specific Cypher query and log response time +def execute_and_log_query(query_key, query): + start_time = time.time() + with driver.session() as session: + result = session.run(query) + summary = result.consume() + end_time = time.time() + # Depending on the driver version, you might directly get the execution time from the summary + execution_time = summary.result_available_after + summary.result_consumed_after + + + logging.info(f"Results for Query {query_key}:") + logging.info(f"Python executed in: {end_time - start_time}s") + logging.info(f"DBMS answered in: {execution_time} ms\n") + +# Function to schedule and execute all queries +def schedule_and_execute_queries(): + for query_key, query in cypher_queries.items(): + + execute_and_log_query(query_key=query_key, query=query) + # Example scheduling: execute immediately then every minute for demonstration + # TODO: Create the schedule here + schedule.every(INTERVAL).minutes.do(execute_and_log_query, query_key=query_key, query=query) + # schedule.every(15).minutes.do(execute_and_log_query, query_key=query_key, query=query) + # schedule.every(30).minutes.do() + + while True: + schedule.run_pending() + time.sleep(1) + +# create log directory +if not os.path.exists(log_directory): + os.makedirs(log_directory) + +# Setup logging to file +formatted_time = time.strftime("%Y-%m-%d_%H-%M-%S") +logging.basicConfig(level=logging.INFO, + filename=f"{log_directory}/{formatted_time}_query_log.log", + filemode='a', # Append to the log file if it exists, 'w' to overwrite + format='%(asctime)s - %(levelname)s - %(message)s') + +cypher_queries = load_queries_from_file(queries_file_path) +# Initialize the Neo4j connection +driver = GraphDatabase.driver(uri=uri, auth=auth,encrypted=False) +logging.info(f"Connected to db... {uri}") + +# Main execution block with graceful shutdown +if __name__ == "__main__": + try: + schedule_and_execute_queries() + except KeyboardInterrupt: + logging.info("Script interrupted by user") + finally: + driver.close() + logging.info("Database connection closed.") diff --git a/code/eval/queries/sql/Dockerfile b/code/eval/queries/sql/Dockerfile index 8117f90f329a7580edac5ea855e3623cd0e5a06f..6b6ae5cd2f743eaaa955a9b1100575dca912954a 100644 --- a/code/eval/queries/sql/Dockerfile +++ b/code/eval/queries/sql/Dockerfile @@ -20,5 +20,6 @@ ENV PGUSER='localhost' ENV PGPASSWORD='localhost' ENV PGPORT='localhost' ENV interval=15 +ENV query_file_path=/app/queries CMD [ "python","cdm_sql_queries.py" ] \ No newline at end of file diff --git a/code/eval/queries/sql/cdm_sql_queries.py b/code/eval/queries/sql/cdm_sql_queries.py index 41efd008f1d80f1311b17fce06d43b4c9d7f34ee..520a6faba0bc4e59ade002985d4a3ee173f4bf64 100644 --- a/code/eval/queries/sql/cdm_sql_queries.py +++ b/code/eval/queries/sql/cdm_sql_queries.py @@ -11,6 +11,7 @@ username = os.getenv('PGUSER', 'postgres') password = os.getenv('PGPASSWORD', 'postgres') port_id = os.getenv('PGPORT', '5432') # Default to 5432 if not set interval = int(os.getenv('interval',default=15)) +queries_file_path = str(os.getenv('query_file_path',default="C:\\Studium_MIN\\05_Masterarbeit\\thesis\\ma_code\\code\\infrastructure\\eval\\query\\pg\\queries.txt")) def connect_to_db(): try: @@ -27,9 +28,21 @@ def connect_to_db(): except psycopg2.Error as e: logging.error(f"Error connecting to PostgreSQL database: {e}") return None + +def load_queries_from_file(file_path): + queries = {} + with open(file_path, 'r') as file: + content = file.read() + query_blocks = content.split('---') # Assuming '---' is used as separator between queries + for block in query_blocks: + if '|' in block: + key, query = block.strip().split('|', 1) + queries[key.strip()] = query.strip() + return queries def execute_queries(conn): try: + queries = load_queries_from_file(queries_file_path) # Create a cursor object cur = conn.cursor() diff --git a/code/infrastructure/docker-compose.yml b/code/infrastructure/docker-compose.yml index a9c38d3b7867f52be8aa2a726dac5f0b69ad71fb..a49c1a2e46dc5819fcffceb38eafcfa629eb0f45 100644 --- a/code/infrastructure/docker-compose.yml +++ b/code/infrastructure/docker-compose.yml @@ -175,7 +175,7 @@ services: test: ["CMD","ls"] interval: 30s timeout: 10s - retries: 2 + retries: 5 start_period: 10s depends_on: ongdb: @@ -217,7 +217,7 @@ services: test: ["CMD","ls"] interval: 30s timeout: 10s - retries: 2 + retries: 5 start_period: 10s depends_on: neo4j: @@ -268,8 +268,10 @@ services: - PGPASSWORD=postgres - PGPORT=5432 - interval=1 + - query_file_path=/app/queries/queries.txt volumes: - ./eval/query/pg/log:/app/log + - ./eval/query/pg/queries:/app/queries depends_on: - pub @@ -280,8 +282,10 @@ services: - interval=1 - db_host=neo4j - db_port=7687 + - query_file_path=/app/queries/queries.txt volumes: - ./eval/query/neo4j/log:/app/log + - ./eval/query/neo4j/queries:/app/queries depends_on: - pub @@ -292,8 +296,10 @@ services: - interval=1 - db_host=ongdb - db_port=7687 + - query_file_path='/app/queries/queries.txt' volumes: - ./eval/query/ongdb/log:/app/log + - ./eval/query/ongdb/queries:/app/queries depends_on: - pub @@ -304,8 +310,10 @@ services: - interval=1 - db_host=memgraph - db_port=7687 + - query_file_path=/app/queries/queries.txt volumes: - ./eval/query/memgraph/log:/app/log + - ./eval/query/memgraph/queries:/app/queries depends_on: - pub