Skip to content
Snippets Groups Projects
Commit 08ead8c4 authored by Sven-Ove Hänsel's avatar Sven-Ove Hänsel
Browse files

add theia for paper results

parent 8117d40e
No related branches found
No related tags found
No related merge requests found
kolloquium kolloquium
ma ma
# This is a commen .venv
data/* data/*
.vscode .vscode
code/infrastructure/streaming/clients/pub/data/* code/infrastructure/streaming/clients/pub/data/*
......
...@@ -2,29 +2,29 @@ ...@@ -2,29 +2,29 @@
# Docker login # Docker login
echo 'Logging into docker...' echo 'Logging into docker...'
docker login lab.it.hs-hannover.de:4567 # docker login lab.it.hs-hannover.de:4567
docker login docker.gitlab.gwdg.de
# Docker build and push for each name and path # Docker build and push for each name and path
declare -A paths=( declare -A paths=(
["sub_ongdb"]="./infrastructure/streaming/clients/sub/ongdb" # ["sub_ongdb"]="./infrastructure/streaming/clients/sub/ongdb"
["sub_mem"]="./infrastructure/streaming/clients/sub/memgraph" # ["sub_mem"]="./infrastructure/streaming/clients/sub/memgraph"
["sub_pg"]="./infrastructure/streaming/clients/sub/postgres" # ["sub_pg"]="./infrastructure/streaming/clients/sub/postgres"
["sub_neo4j"]="./infrastructure/streaming/clients/sub/neo4j" # ["sub_neo4j"]="./infrastructure/streaming/clients/sub/neo4j"
["pub_cdm"]="./infrastructure/streaming/clients/pub" ["pub_cdm"]="./infrastructure/streaming/clients/pub"
["neo4j_queries"]="./eval/queries/cypher/neo4j" # ["neo4j_queries"]="./eval/queries/cypher/neo4j"
["memgraph_queries"]="./eval/queries/cypher/memgraph" # ["memgraph_queries"]="./eval/queries/cypher/memgraph"
["ongdb_queries"]="./eval/queries/cypher/ongdb" # ["ongdb_queries"]="./eval/queries/cypher/ongdb"
["sql_queries"]="./eval/queries/sql" # ["sql_queries"]="./eval/queries/sql"
) )
for name in "${!paths[@]}"; do for name in "${!paths[@]}"; do
path=${paths[$name]} path=${paths[$name]}
echo '' echo ''
echo 'Build docker image for '${name}'...' echo 'Build docker image for '${name}'...'
docker build -t lab.it.hs-hannover.de:4567/cwy-p8d-u1/ma_code/${name} ${path} docker build -t docker.gitlab.gwdg.de/sven-ove.haensel/ma_haensel/${name} ${path}
echo '' echo ''
echo 'Push docker image for '${name}'...' echo 'Push docker image for '${name}'...'
docker push lab.it.hs-hannover.de:4567/cwy-p8d-u1/ma_code/${name} docker push docker.gitlab.gwdg.de/sven-ove.haensel/ma_haensel/${name}
done done
echo "Finished Building..." echo "Finished Building..."
\ No newline at end of file
...@@ -95,31 +95,32 @@ services: ...@@ -95,31 +95,32 @@ services:
- ./streaming/broker/log/:/mosquitto/log/ - ./streaming/broker/log/:/mosquitto/log/
- ./streaming/broker/data/:/mosquitto/data/ - ./streaming/broker/data/:/mosquitto/data/
# sub_mem: # don t start sub sub_mem: # don t start sub
# container_name: sub_mem container_name: sub_mem
# image: lab.it.hs-hannover.de:4567/cwy-p8d-u1/ma_code/sub_mem image: lab.it.hs-hannover.de:4567/cwy-p8d-u1/ma_code/sub_mem
# environment: environment:
# - abort_time_limit=999999 - abort_time_limit=999999
# - mem_host=bolt://memgraph:7687 - mem_host=bolt://memgraph:7687
# - mos_host=mos1 - mos_host=mos1
# - mos_port=1883 - mos_port=1883
# depends_on: depends_on:
# memgraph: memgraph:
# condition: service_healthy condition: service_healthy
#restart: true restart: true
pub: pub:
container_name: pub_cdm container_name: pub_cdm
image: lab.it.hs-hannover.de:4567/cwy-p8d-u1/ma_code/pub_cdm image: docker.gitlab.gwdg.de/sven-ove.haensel/ma_haensel/0:latest
# image: lab.it.hs-hannover.de:4567/cwy-p8d-u1/ma_code/pub_cdm
environment: environment:
- lines_per_window= 45 #${WINDOW_SIZE} - lines_per_window= 45 #${WINDOW_SIZE}
# $Winndow from python start # $Winndow from python start
- path_to_firstK=/var/lib/import/first1k.json - path_to_firstK=/var/lib/import/first1k.json
- path_data=/var/lib/import/ - path_data=/var/lib/import/theia/
- sleeptime=0.04 - sleeptime=1
- original=false - original=false
volumes: volumes:
- ./streaming/clients/pub/data/theia:/var/lib/import - ./streaming/clients/pub/data/theia/:/var/lib/import/theia/
# depends_on: #don't start sub # depends_on: #don't start sub
# sub_mem: # sub_mem:
# condition: service_started # condition: service_started
......
...@@ -8,24 +8,29 @@ Original_version = os.getenv('original', 'False').lower() in ['true', '1', 't', ...@@ -8,24 +8,29 @@ Original_version = os.getenv('original', 'False').lower() in ['true', '1', 't',
broker_hostname = str(os.getenv('mos_host',default="localhost")) broker_hostname = str(os.getenv('mos_host',default="localhost"))
port = int(os.getenv('mos_port',default="1883")) port = int(os.getenv('mos_port',default="1883"))
lines_per_window = int(os.getenv('lines_per_window',default=1000)) lines_per_window = int(os.getenv('lines_per_window',default=1000))
path = str(os.getenv('path_data',default='C:\\Studium_MIN\\05_Masterarbeit\\thesis\\ma_code\\code\\infrastructure\\streaming\\clients\\pub\\data\\')) path = str(os.getenv('path_data',default='./data/theia/'))
sleep_time = float(os.getenv('sleeptime',default=1)) sleep_time = float(os.getenv('sleeptime',default=1))
files = [ # files = [
'ta1-cadets-e3-official_0.json', # 'ta1-cadets-e3-official_0.json',
'ta1-cadets-e3-official_1.json', # 'ta1-cadets-e3-official_1.json',
'ta1-cadets-e3-official_2.json', # 'ta1-cadets-e3-official_2.json',
'ta1-cadets-e3-official-1_0.json', # 'ta1-cadets-e3-official-1_0.json',
'ta1-cadets-e3-official-1_1.json', # 'ta1-cadets-e3-official-1_1.json',
'ta1-cadets-e3-official-1_2.json', # 'ta1-cadets-e3-official-1_2.json',
'ta1-cadets-e3-official-1_3.json', # 'ta1-cadets-e3-official-1_3.json',
'ta1-cadets-e3-official-1_4.json', # 'ta1-cadets-e3-official-1_4.json',
'ta1-cadets-e3-official-2_0.json', # 'ta1-cadets-e3-official-2_0.json',
'ta1-cadets-e3-official-2_1.json' # 'ta1-cadets-e3-official-2_1.json'
] # ]
files = os.listdir(path)
files.sort() # Sort the files to ensure consistent order
print("Files in directory: ", files)
#files = ['ta1-cadets-e3-official_0.json']# Values to limit the amount of Nodes #files = ['ta1-cadets-e3-official_0.json']# Values to limit the amount of Nodes
#line_count = [100000]# -> 100000 + lines_per_window #line_count = [100000]# -> 100000 + lines_per_window
line_count = [4999999,4999999,3911712,4999999,4999999,4999999,4999999,4999999,2059063,4999999,3433561] # line_count corresponding to each file # line_count = [] # This will be dynamically calculated for each file
client = mqtt.Client(mqtt.CallbackAPIVersion.VERSION1,"Client1") client = mqtt.Client(mqtt.CallbackAPIVersion.VERSION1,"Client1")
topic = "neo4j" topic = "neo4j"
...@@ -47,8 +52,8 @@ def read_moving_window_and_send_data(file_path, lines_per_window, line_count): ...@@ -47,8 +52,8 @@ def read_moving_window_and_send_data(file_path, lines_per_window, line_count):
if i >= line_count: if i >= line_count:
break break
# Read the next lines_per_window lines # Read the next lines_per_window lines
if i + lines_per_window >= line_count: # only intresting when amount limited not whole file if index + lines_per_window > stop_index: # Adjust for the last chunk
lines_per_window = lines_per_window - (i+lines_per_window- line_count) # seems not to work (its +1 reading so maybe break) lines_per_window = stop_index - index
window_data = [next(file) for _ in range(lines_per_window)] window_data = [next(file) for _ in range(lines_per_window)]
# If no more data is left, break the loop # If no more data is left, break the loop
...@@ -120,10 +125,13 @@ index_pick_json = 0 ...@@ -120,10 +125,13 @@ index_pick_json = 0
try: try:
i = 0 i = 0
for file in files: for file in files:
stop_index = line_count[i] # Dynamically calculate the total number of lines in the file
with open(path + file, 'r') as f:
total_lines = sum(1 for _ in f)
stop_index = total_lines
file_path = path+file file_path = path+file
print("Reading file: ", file) print("Reading file: ", file)
print("Max number of batches: ",line_count[i]/lines_per_window) print("Max number of batches: ",stop_index/lines_per_window)
read_moving_window_and_send_data(file_path, lines_per_window,stop_index) read_moving_window_and_send_data(file_path, lines_per_window,stop_index)
i += 1 i += 1
finally: finally:
......
paho-mqtt
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment