Skip to content

Commit 6a200fb

Browse files
authored
Merge pull request #118 from ClickHouse/revert-117-mongo-snappy
Revert "Remove MongoDB snappy config"
2 parents 145fbad + 21ff393 commit 6a200fb

18 files changed

+1390
-30
lines changed

clickhouse/ddl.sql

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
CREATE TABLE bluesky
22
(
33
`data` JSON(
4-
max_dynamic_paths = 0, -- will become the default for large uses in future
4+
max_dynamic_paths = 0,
55
kind LowCardinality(String),
66
commit.operation LowCardinality(String),
77
commit.collection LowCardinality(String),

mongodb/create_and_load.sh

Lines changed: 9 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,27 +1,29 @@
11
#!/bin/bash
22

33
# Check if the required arguments are provided
4-
if [[ $# -lt 6 ]]; then
5-
echo "Usage: $0 <DB_NAME> <COLLECTION_NAME> <DATA_DIRECTORY> <NUM_FILES> <SUCCESS_LOG> <ERROR_LOG>"
4+
if [[ $# -lt 7 ]]; then
5+
echo "Usage: $0 <DB_NAME> <COLLECTION_NAME> <DDL_FILE> <DATA_DIRECTORY> <NUM_FILES> <SUCCESS_LOG> <ERROR_LOG>"
66
exit 1
77
fi
88

99
# Arguments
1010
DB_NAME="$1"
1111
COLLECTION_NAME="$2"
12-
DATA_DIRECTORY="$3"
13-
NUM_FILES="$4"
14-
SUCCESS_LOG="$5"
15-
ERROR_LOG="$6"
12+
DDL_FILE="$3"
13+
DATA_DIRECTORY="$4"
14+
NUM_FILES="$5"
15+
SUCCESS_LOG="$6"
16+
ERROR_LOG="$7"
1617

1718
# Validate arguments
19+
[[ ! -f "$DDL_FILE" ]] && { echo "Error: DDL file '$DDL_FILE' does not exist."; exit 1; }
1820
[[ ! -d "$DATA_DIRECTORY" ]] && { echo "Error: Data directory '$DATA_DIRECTORY' does not exist."; exit 1; }
1921
[[ ! "$NUM_FILES" =~ ^[0-9]+$ ]] && { echo "Error: NUM_FILES must be a positive integer."; exit 1; }
2022

2123
# Create database and execute DDL file
2224
mongosh --quiet --eval "
2325
db = db.getSiblingDB('$DB_NAME');
24-
load('ddl.js');
26+
load('$DDL_FILE');
2527
"
2628

2729
echo "Loading data"

mongodb/ddl_snappy.js

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
db.createCollection(
2+
"bluesky",
3+
{ storageEngine: { wiredTiger: { configString: "block_compressor=snappy" } } }
4+
);
5+
6+
db.bluesky.createIndex({"kind": 1, "commit.operation": 1, "commit.collection": 1, "did": 1, "time_us": 1});
File renamed without changes.

mongodb/main.sh

Lines changed: 26 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -36,42 +36,50 @@ fi
3636

3737
benchmark() {
3838
local size=$1
39-
local compression=x
39+
local compression=$2
4040
# Check DATA_DIRECTORY contains the required number of files to run the benchmark
4141
file_count=$(find "$DATA_DIRECTORY" -type f | wc -l)
4242
if (( file_count < size )); then
4343
echo "Error: Not enough files in '$DATA_DIRECTORY'. Required: $size, Found: $file_count."
4444
exit 1
4545
fi
46-
./create_and_load.sh "bluesky_${size}m" bluesky "$DATA_DIRECTORY" "$size" "$SUCCESS_LOG" "$ERROR_LOG"
47-
./total_size.sh "bluesky_${size}m" bluesky | tee "${OUTPUT_PREFIX}_bluesky_${size}m.total_size"
48-
./data_size.sh "bluesky_${size}m" bluesky | tee "${OUTPUT_PREFIX}_bluesky_${size}m.data_size"
49-
./index_size.sh "bluesky_${size}m" bluesky | tee "${OUTPUT_PREFIX}_bluesky_${size}m.index_size"
50-
./count.sh "bluesky_${size}m" bluesky | tee "${OUTPUT_PREFIX}_bluesky_${size}m.count"
51-
#./query_results.sh "bluesky_${size}m" | tee "${OUTPUT_PREFIX}_bluesky_${size}m.query_results"
52-
./index_usage.sh "bluesky_${size}m" | tee "${OUTPUT_PREFIX}_bluesky_${size}m.index_usage"
53-
./benchmark.sh "bluesky_${size}m" "${OUTPUT_PREFIX}_bluesky_${size}m.results_runtime"
54-
./drop_table.sh "bluesky_${size}m"
46+
./create_and_load.sh "bluesky_${size}m_${compression}" bluesky "ddl_${compression}.js" "$DATA_DIRECTORY" "$size" "$SUCCESS_LOG" "$ERROR_LOG"
47+
./total_size.sh "bluesky_${size}m_${compression}" bluesky | tee "${OUTPUT_PREFIX}_bluesky_${size}m_${compression}.total_size"
48+
./data_size.sh "bluesky_${size}m_${compression}" bluesky | tee "${OUTPUT_PREFIX}_bluesky_${size}m_${compression}.data_size"
49+
./index_size.sh "bluesky_${size}m_${compression}" bluesky | tee "${OUTPUT_PREFIX}_bluesky_${size}m_${compression}.index_size"
50+
./count.sh "bluesky_${size}m_${compression}" bluesky | tee "${OUTPUT_PREFIX}_bluesky_${size}m_${compression}.count"
51+
#./query_results.sh "bluesky_${size}m_${compression}" | tee "${OUTPUT_PREFIX}_bluesky_${size}m_${compression}.query_results"
52+
./index_usage.sh "bluesky_${size}m_${compression}" | tee "${OUTPUT_PREFIX}_bluesky_${size}m_${compression}.index_usage"
53+
./benchmark.sh "bluesky_${size}m_${compression}" "${OUTPUT_PREFIX}_bluesky_${size}m_${compression}.results_runtime"
54+
./drop_table.sh "bluesky_${size}m_${compression}"
5555
}
5656

5757
case $CHOICE in
5858
2)
59-
benchmark 10
59+
benchmark 10 snappy
60+
benchmark 10 zstd
6061
;;
6162
3)
62-
benchmark 100
63+
benchmark 100 snappy
64+
benchmark 100 zstd
6365
;;
6466
4)
65-
benchmark 1000
67+
benchmark 1000 snappy
68+
benchmark 1000 zstd
6669
;;
6770
5)
68-
benchmark 1
69-
benchmark 10
70-
benchmark 100
71-
benchmark 1000
71+
benchmark 1 snappy
72+
benchmark 1 zstd
73+
benchmark 10 snappy
74+
benchmark 10 zstd
75+
benchmark 100 snappy
76+
benchmark 100 zstd
77+
benchmark 1000 snappy
78+
benchmark 1000 zstd
7279
;;
7380
*)
74-
benchmark 1
81+
benchmark 1 snappy
82+
benchmark 1 zstd
7583
;;
7684
esac
7785

0 commit comments

Comments
 (0)