#!/usr/bin/env bash # Bulk copy one SQL Server table to ClickHouse on Ubuntu (or Debian-based). # # Prerequisites (or run with --install-deps): # - sqlcmd: Microsoft mssql-tools18 (/opt/mssql-tools18/bin/sqlcmd) or mssql-tools (/opt/mssql-tools/bin) # - clickhouse-client # Optional: SQLCMD_PATH=/full/path/to/sqlcmd if installed outside PATH # # Environment (required for export-import): # MSSQL_HOST e.g. db.example.com # MSSQL_PORT default 1433 # MSSQL_USER # MSSQL_PASSWORD # MSSQL_DATABASE # MSSQL_TRUST_SERVER_CERT set to 1 to pass -C (trust self-signed server cert) # MSSQL_ENCRYPT optional; passed to sqlcmd as -N (e.g. optional|mandatory|strict) # # CH_HOST default localhost # CH_PORT native TCP port, default 9000 # CH_USER default default # CH_PASSWORD optional # CH_SECURE set to 1 to use TLS (--secure) # SQLCMD_PATH optional full path to sqlcmd if not on PATH # # Example: # export MSSQL_HOST=sql.mycompany.internal MSSQL_USER=ro MSSQL_PASSWORD='***' MSSQL_DATABASE=sales # export CH_HOST=ch.mycompany.internal CH_PASSWORD='***' # ./mssql_to_clickhouse.sh export-import \ # --mssql-table "dbo.Orders" \ # --ch-database analytics \ # --ch-table orders_raw \ # --out /tmp/orders.tsv # ./mssql_to_clickhouse.sh migrate-db \ # --ch-database cpm \ # --out-dir /tmp/mssql_full_export # set -euo pipefail SCRIPT_NAME=$(basename "$0") die() { echo "$SCRIPT_NAME: $*" >&2 exit 1 } usage() { cat <TABLE (# comments allowed). Use when MSSQL catalogs hide most tables unless VIEW DEFINITION is granted. --ch-database DB ClickHouse database (default: \$CH_DATABASE or default) --ch-table TABLE ClickHouse table name --out PATH File path (default: ./mssql_export.tsv) --out-dir PATH Directory for migrate-db TSV files (default: ./mssql_export_all) Environment: MSSQL_HOST, MSSQL_PORT, MSSQL_USER, MSSQL_PASSWORD, MSSQL_DATABASE MSSQL_TRUST_SERVER_CERT=1 (optional; trust self-signed SQL Server cert with sqlcmd -C) MSSQL_ENCRYPT (optional; forwarded to sqlcmd -N) MSSQL_TABLES_FILE optional explicit table list path for migrate-db CH_HOST, CH_PORT, CH_USER, CH_PASSWORD, CH_SECURE=1 for TLS CH_DOCKER_CONTAINER optional; run clickhouse-client inside this container SQLCMD_PATH optional; full path to sqlcmd when not on PATH Notes: - ClickHouse columns must match export order and types. - TabSeparated fails if fields contain tab characters; use --mssql-query to sanitize or export CSV elsewhere. - Very large tables: run multiple exports with --mssql-query and ranges, then import each file. EOF } ensure_sqlcmd() { if [[ -n "${SQLCMD_PATH:-}" ]]; then [[ -x "$SQLCMD_PATH" ]] || die "SQLCMD_PATH is set but not executable: $SQLCMD_PATH" export PATH="$(dirname "$SQLCMD_PATH"):$PATH" fi if command -v sqlcmd >/dev/null 2>&1; then return 0 fi local d for d in /opt/mssql-tools18/bin /opt/mssql-tools/bin; do if [[ -x "$d/sqlcmd" ]]; then export PATH="$d:$PATH" return 0 fi done die "sqlcmd not found. On this host run once (needs sudo), then open a new shell or re-login: $0 --install-deps If sqlcmd is already installed, add it to PATH or set SQLCMD_PATH=/path/to/sqlcmd" } ensure_clickhouse_client() { if [[ -n "${CH_DOCKER_CONTAINER:-}" ]]; then command -v docker >/dev/null 2>&1 || die "docker not found, but CH_DOCKER_CONTAINER is set" return 0 fi command -v clickhouse-client >/dev/null 2>&1 || die "clickhouse-client not found. Run: $0 --install-deps or set CH_DOCKER_CONTAINER" } ensure_mssql_env() { [[ -n "${MSSQL_HOST:-}" ]] || die "Set MSSQL_HOST" [[ -n "${MSSQL_USER:-}" ]] || die "Set MSSQL_USER" [[ -n "${MSSQL_PASSWORD:-}" ]] || die "Set MSSQL_PASSWORD" [[ -n "${MSSQL_DATABASE:-}" ]] || die "Set MSSQL_DATABASE" } sqlcmd_conn_args() { local port="${MSSQL_PORT:-1433}" local server="${MSSQL_HOST},${port}" local args=( -S "$server" -d "$MSSQL_DATABASE" -U "$MSSQL_USER" -P "$MSSQL_PASSWORD" ) # ODBC 18 validates cert chains by default; -C is commonly needed for self-signed certs. if [[ "${MSSQL_TRUST_SERVER_CERT:-0}" == "1" ]]; then args+=(-C) fi if [[ -n "${MSSQL_ENCRYPT:-}" ]]; then args+=(-N "$MSSQL_ENCRYPT") fi printf '%s\n' "${args[@]}" } sqlcmd_query() { ensure_sqlcmd ensure_mssql_env local sql="${1:-}" mapfile -t conn_args < <(sqlcmd_conn_args) sqlcmd "${conn_args[@]}" \ -Q "SET NOCOUNT ON; ${sql}" \ -h -1 -W -s "$(printf '\t')" -w 65535 -f i:65001,o:65001 -b } run_clickhouse_query() { ensure_clickhouse_client local query="${1:-}" local host="${CH_HOST:-localhost}" local port="${CH_PORT:-9000}" local user="${CH_USER:-default}" local args=( --host "$host" --port "$port" --user "$user" --query "$query" ) [[ -n "${CH_PASSWORD:-}" ]] && args+=(--password "$CH_PASSWORD") [[ "${CH_SECURE:-0}" == "1" ]] && args+=(--secure) if [[ -n "${CH_DOCKER_CONTAINER:-}" ]]; then # Important: do not pass -i here, otherwise docker can consume caller stdin # (e.g. migrate-db table loop) and stop after the first row. docker exec "$CH_DOCKER_CONTAINER" clickhouse-client "${args[@]}" else clickhouse-client "${args[@]}" fi } run_clickhouse_insert() { ensure_clickhouse_client local query="${1:-}" local in_file="${2:-}" local host="${CH_HOST:-localhost}" local port="${CH_PORT:-9000}" local user="${CH_USER:-default}" local args=( --host "$host" --port "$port" --user "$user" --query "$query" ) [[ -n "${CH_PASSWORD:-}" ]] && args+=(--password "$CH_PASSWORD") [[ "${CH_SECURE:-0}" == "1" ]] && args+=(--secure) if [[ -n "${CH_DOCKER_CONTAINER:-}" ]]; then docker exec -i "$CH_DOCKER_CONTAINER" clickhouse-client "${args[@]}" <"$in_file" else clickhouse-client "${args[@]}" <"$in_file" fi } escape_ch_ident() { local ident="${1:-}" ident="${ident//\`/\`\`}" printf "%s" "$ident" } escape_mssql_ident() { local ident="${1:-}" ident="${ident//]/]]}" printf "%s" "$ident" } # Escape single quotes for literals embedded in MSSQL dynamically built strings. escape_mssql_sql_literal() { local s="${1:-}" s="${s//\'/\'\'}" printf "%s" "$s" } strip_cr() { printf '%s' "${1//$'\r'/}" } mssql_discovery_sql_union() { local schema_filter="${1:-}" local where_schema_sys="" local where_schema_is="" local where_schema_obj="" local esc="" if [[ -n "$schema_filter" ]]; then esc="$(escape_mssql_sql_literal "$schema_filter")" where_schema_sys="AND s.name = N'${esc}'" where_schema_is="AND TABLE_SCHEMA = N'${esc}'" where_schema_obj="AND SCHEMA_NAME(o.schema_id) = N'${esc}'" fi printf "%s" " SELECT DISTINCT CAST(x.TABLE_SCHEMA AS nvarchar(256)) COLLATE DATABASE_DEFAULT AS TABLE_SCHEMA, CAST(x.TABLE_NAME AS nvarchar(256)) COLLATE DATABASE_DEFAULT AS TABLE_NAME FROM ( SELECT s.name AS TABLE_SCHEMA, t.name AS TABLE_NAME FROM sys.tables t INNER JOIN sys.schemas s ON s.schema_id = t.schema_id WHERE t.is_ms_shipped = 0 ${where_schema_sys} UNION ALL SELECT TABLE_SCHEMA, TABLE_NAME FROM INFORMATION_SCHEMA.TABLES WHERE TABLE_TYPE = N'BASE TABLE' ${where_schema_is} UNION ALL SELECT SCHEMA_NAME(o.schema_id) AS TABLE_SCHEMA, o.name AS TABLE_NAME FROM sys.objects o WHERE o.type = N'U' AND o.is_ms_shipped = 0 ${where_schema_obj} ) AS x WHERE NULLIF(LTRIM(RTRIM(TABLE_SCHEMA)), N'') IS NOT NULL AND NULLIF(LTRIM(RTRIM(TABLE_NAME)), N'') IS NOT NULL ORDER BY TABLE_SCHEMA, TABLE_NAME; " } list_mssql_tables() { ensure_sqlcmd ensure_mssql_env local schema_filter="${1:-}" sqlcmd_query "$(mssql_discovery_sql_union "$schema_filter")" } migrate_permission_hint() { cat <&2 $SCRIPT_NAME: MSSQL catalogs only expose tables your login may see metadata for. If migrate-db discovers too few rows, grant read + metadata visibility (run as dbo/sa): USE [$MSSQL_DATABASE]; GRANT VIEW DEFINITION ON DATABASE::[$MSSQL_DATABASE] TO [$MSSQL_USER]; EXEC sp_addrolemember N'db_datareader', N'$MSSQL_USER'; Or migrate using an explicit list from a dbo export: $SCRIPT_NAME migrate-db --tables-file ./all_tables.txt --ch-database ... all_tables.txt format (one table per line): dbo.MyTable sales.Orders EOF } mssql_column_expr() { local col_name="${1:-}" local data_type="${2:-}" local col_escaped local col_bracketed col_escaped="$(escape_mssql_ident "$col_name")" col_bracketed="[${col_escaped}]" case "${data_type,,}" in binary|varbinary|image|rowversion|timestamp) printf "CASE WHEN %s IS NULL THEN '\\\\N' ELSE master.dbo.fn_varbintohexstr(%s) END AS [%s]" \ "$col_bracketed" "$col_bracketed" "$col_escaped" ;; *) printf "CASE WHEN %s IS NULL THEN '\\\\N' ELSE REPLACE(REPLACE(REPLACE(CONVERT(nvarchar(max), %s), CHAR(9), ' '), CHAR(10), ' '), CHAR(13), ' ') END AS [%s]" \ "$col_bracketed" "$col_bracketed" "$col_escaped" ;; esac } build_mssql_export_query() { local schema_name="${1:-}" local table_name="${2:-}" local schema_escaped table_escaped local exprs=() local line col_name data_type schema_escaped="$(escape_mssql_ident "$schema_name")" table_escaped="$(escape_mssql_ident "$table_name")" while IFS= read -r line; do [[ -n "$line" ]] || continue col_name="${line%%$'\t'*}" data_type="${line#*$'\t'}" exprs+=("$(mssql_column_expr "$col_name" "$data_type")") done < <( sqlcmd_query " SELECT COLUMN_NAME, DATA_TYPE FROM INFORMATION_SCHEMA.COLUMNS WHERE TABLE_SCHEMA = '$(escape_mssql_sql_literal "$schema_name")' AND TABLE_NAME = '$(escape_mssql_sql_literal "$table_name")' ORDER BY ORDINAL_POSITION; " ) [[ ${#exprs[@]} -gt 0 ]] || die "No columns found for ${schema_name}.${table_name}" local select_list local IFS=", " select_list="${exprs[*]}" printf "SELECT %s FROM [%s].[%s]" "$select_list" "$schema_escaped" "$table_escaped" } # Parse one line from --tables-file: SCHEMA.TABLE, SCHEMATABLE, or bare name (dbo default). parse_mssql_table_line() { local raw="$1" local -n _sch="$2" local -n _tbl="$3" raw="$(strip_cr "$raw")" [[ -z "$raw" ]] && return 1 [[ "$raw" =~ ^[[:space:]]*# ]] && return 1 raw="${raw#"${raw%%[![:space:]]*}"}" raw="${raw%"${raw##*[![:space:]]}"}" [[ -z "$raw" ]] && return 1 if [[ "$raw" == *$'\t'* ]]; then _sch="${raw%%$'\t'*}" _tbl="${raw#*$'\t'}" elif [[ "$raw" == *.* ]]; then _sch="${raw%%.*}" _tbl="${raw#*.}" else _sch="dbo" _tbl="$raw" fi _sch="${_sch#"${_sch%%[![:space:]]*}"}" _sch="${_sch%"${_sch##*[![:space:]]}"}" _tbl="${_tbl#"${_tbl%%[![:space:]]*}"}" _tbl="${_tbl%"${_tbl##*[![:space:]]}"}" [[ -n "$_sch" && -n "$_tbl" ]] || return 1 return 0 } migrate_one_mssql_table() { local ch_db="${1:?}" local out_dir="${2:?}" local schema_name="${3:?}" local table_name="${4:?}" local ch_table columns_ddl column_line column_name exported_file export_query # Use the same table name in ClickHouse as in SQL Server (no schema__ prefix). # If two schemas contain the same table name, the second migrate would target the same CH table. ch_table="${table_name}" columns_ddl="" while IFS= read -r column_line || [[ -n "$column_line" ]]; do column_line="$(strip_cr "$column_line")" [[ -n "$column_line" ]] || continue column_name="$column_line" if [[ -n "$columns_ddl" ]]; then columns_ddl+=", " fi columns_ddl+="\`$(escape_ch_ident "$column_name")\` Nullable(String)" done < <( sqlcmd_query " SELECT COLUMN_NAME FROM INFORMATION_SCHEMA.COLUMNS WHERE TABLE_SCHEMA = '$(escape_mssql_sql_literal "$schema_name")' AND TABLE_NAME = '$(escape_mssql_sql_literal "$table_name")' ORDER BY ORDINAL_POSITION; " ) [[ -n "$columns_ddl" ]] || { echo "Skipping ${schema_name}.${table_name}: no columns readable in INFORMATION_SCHEMA" return 1 } run_clickhouse_query "CREATE TABLE IF NOT EXISTS \`$(escape_ch_ident "$ch_db")\`.\`$(escape_ch_ident "$ch_table")\` (${columns_ddl}) ENGINE = MergeTree ORDER BY tuple()" export_query="$(build_mssql_export_query "$schema_name" "$table_name")" exported_file="${out_dir}/${schema_name}.${table_name}.tsv" run_export "" "$export_query" "$exported_file" run_import "$ch_db" "$ch_table" "$exported_file" echo "Migrated ${schema_name}.${table_name} -> ${ch_db}.${ch_table}" return 0 } run_migrate_db() { ensure_sqlcmd ensure_clickhouse_client ensure_mssql_env local ch_db="${1:-${CH_DATABASE:-default}}" local out_dir="${2:-./mssql_export_all}" local schema_filter="${3:-}" local tables_file="${4:-}" local line schema_name table_name tables_rows local discovered=0 local migrated=0 local skipped=0 local use_file=0 [[ -n "$ch_db" ]] || die "Provide --ch-database or CH_DATABASE" mkdir -p "$out_dir" run_clickhouse_query "CREATE DATABASE IF NOT EXISTS \`$(escape_ch_ident "$ch_db")\`" if [[ -n "$tables_file" ]]; then [[ -f "$tables_file" ]] || die "Tables list file not found: $tables_file" use_file=1 while IFS= read -r line || [[ -n "$line" ]]; do schema_name="" table_name="" parse_mssql_table_line "$line" schema_name table_name || continue discovered=$((discovered + 1)) if migrate_one_mssql_table "$ch_db" "$out_dir" "$schema_name" "$table_name"; then migrated=$((migrated + 1)) else skipped=$((skipped + 1)) fi done <"$tables_file" else tables_rows="$(sqlcmd_query "$(mssql_discovery_sql_union "$schema_filter")")" while IFS= read -r line || [[ -n "$line" ]]; do line="$(strip_cr "$line")" [[ -n "$line" ]] || continue schema_name="${line%%$'\t'*}" table_name="${line#*$'\t'}" schema_name="$(strip_cr "$schema_name")" table_name="$(strip_cr "$table_name")" schema_name="${schema_name#"${schema_name%%[![:space:]]*}"}" schema_name="${schema_name%"${schema_name##*[![:space:]]}"}" table_name="${table_name#"${table_name%%[![:space:]]*}"}" table_name="${table_name%"${table_name##*[![:space:]]}"}" [[ -n "$schema_name" && -n "$table_name" ]] || continue discovered=$((discovered + 1)) if migrate_one_mssql_table "$ch_db" "$out_dir" "$schema_name" "$table_name"; then migrated=$((migrated + 1)) else skipped=$((skipped + 1)) fi done <<<"$tables_rows" fi if [[ "$discovered" -le 0 ]]; then if [[ "$use_file" -eq 1 ]]; then die "No valid SCHEMA.TABLE rows in tables file (after skipping blanks/comments): ${tables_file}" else die "No SQL Server tables discovered in ${MSSQL_DATABASE}. Run: ${SCRIPT_NAME} list-tables (or grant VIEW DEFINITION + db_datareader)." fi fi if [[ "$migrated" -eq 0 ]]; then migrate_permission_hint die "No tables migrated successfully (had $discovered candidate(s), $skipped skipped)." fi if [[ "$use_file" -eq 0 && "$discovered" -eq 1 ]]; then migrate_permission_hint fi echo "Done. Queued $discovered table(s), migrated $migrated, skipped/failed-metadata $skipped into ClickHouse database ${ch_db}." } install_deps() { [[ $(id -u) -eq 0 ]] || command -v sudo >/dev/null 2>&1 || die "Need sudo for --install-deps" local SUDO="" [[ $(id -u) -ne 0 ]] && SUDO="sudo" . /etc/os-release || die "Cannot read /etc/os-release" [[ "${ID:-}" == "ubuntu" || "${ID:-}" == "debian" || "${ID_LIKE:-}" == *"debian"* ]] \ || die "This installer expects Ubuntu or Debian." export DEBIAN_FRONTEND=noninteractive $SUDO apt-get update $SUDO apt-get install -y apt-transport-https ca-certificates curl gnupg lsb-release # Microsoft ODBC + sqlcmd (Ubuntu codename: use current release; fall back if MS has no prod.list yet) curl -fsSL https://packages.microsoft.com/keys/microsoft.asc \ | $SUDO gpg --dearmor -o /etc/apt/trusted.gpg.d/microsoft.gpg local ms_ub ms_url ms_ub="$(lsb_release -rs)" ms_url="https://packages.microsoft.com/config/ubuntu/${ms_ub}/prod.list" if ! curl -fsSL "$ms_url" -o /tmp/mssql-release.list 2>/dev/null; then echo "$SCRIPT_NAME: No Microsoft prod.list for Ubuntu ${ms_ub}; trying 22.04 repo (packages often install on newer LTS)." >&2 ms_ub="22.04" ms_url="https://packages.microsoft.com/config/ubuntu/${ms_ub}/prod.list" curl -fsSL "$ms_url" -o /tmp/mssql-release.list fi $SUDO mv /tmp/mssql-release.list /etc/apt/sources.list.d/mssql-release.list $SUDO apt-get update ACCEPT_EULA=Y $SUDO apt-get install -y msodbcsql18 mssql-tools18 unixodbc-dev local path_line='export PATH="$PATH:/opt/mssql-tools18/bin"' grep -qF 'mssql-tools18/bin' "$HOME/.bashrc" 2>/dev/null || echo "$path_line" >>"$HOME/.bashrc" || true grep -qF 'mssql-tools18/bin' "$HOME/.profile" 2>/dev/null || echo "$path_line" >>"$HOME/.profile" || true export PATH="$PATH:/opt/mssql-tools18/bin" # ClickHouse client $SUDO mkdir -p /etc/apt/keyrings curl -fsSL https://packages.clickhouse.com/rpm/lifecyclePolicies/policy.json >/dev/null 2>&1 || true curl -fsSL https://packages.clickhouse.com/deb/pubkey.gpg \ | $SUDO gpg --dearmor -o /etc/apt/keyrings/clickhouse.gpg echo "deb [signed-by=/etc/apt/keyrings/clickhouse.gpg] https://packages.clickhouse.com/deb stable main" \ | $SUDO tee /etc/apt/sources.list.d/clickhouse.list >/dev/null $SUDO apt-get update $SUDO apt-get install -y clickhouse-client echo "Installed. Open a new shell or run: export PATH=\"\$PATH:/opt/mssql-tools18/bin\"" } run_export() { ensure_sqlcmd ensure_mssql_env local mssql_table="${1:-}" local mssql_query="${2:-}" local out_file="${3:-./mssql_export.tsv}" local conn_args=() mapfile -t conn_args < <(sqlcmd_conn_args) local sql if [[ -n "$mssql_query" ]]; then sql="$mssql_query" else [[ -n "$mssql_table" ]] || die "Provide --mssql-table or --mssql-query" sql="SELECT * FROM ${mssql_table}" fi mkdir -p "$(dirname "$out_file")" # UTF-8 in/out; tab separator; no column headers (-h -1); trim trailing spaces (-W). sqlcmd "${conn_args[@]}" \ -Q "SET NOCOUNT ON; ${sql}" \ -h -1 -W -s "$(printf '\t')" -w 65535 -f i:65001,o:65001 -o "$out_file" -b echo "Exported to $out_file ($(wc -l <"$out_file") lines)" } run_import() { local ch_db="${1:-${CH_DATABASE:-default}}" local ch_table="${2:-}" local in_file="${3:-./mssql_export.tsv}" [[ -n "$ch_table" ]] || die "Provide --ch-table" [[ -f "$in_file" ]] || die "File not found: $in_file" run_clickhouse_insert "INSERT INTO \`$(escape_ch_ident "$ch_db")\`.\`$(escape_ch_ident "$ch_table")\` FORMAT TabSeparated" "$in_file" echo "Imported $in_file into ${ch_db}.${ch_table}" } main() { [[ $# -ge 1 ]] || { usage exit 1 } local cmd="$1" shift || true if [[ "$cmd" == "-h" || "$cmd" == "--help" ]]; then usage exit 0 fi if [[ "$cmd" == "--install-deps" ]]; then install_deps exit 0 fi local mssql_table="" local mssql_query="" local mssql_schema="" local ch_database="" local ch_table="" local out_file="./mssql_export.tsv" local out_dir="./mssql_export_all" local tables_file="${MSSQL_TABLES_FILE:-}" while [[ $# -gt 0 ]]; do case "$1" in --mssql-table) mssql_table="$2" shift 2 ;; --mssql-query) mssql_query="$2" shift 2 ;; --ch-database) ch_database="$2" shift 2 ;; --mssql-schema) mssql_schema="$2" shift 2 ;; --ch-table) ch_table="$2" shift 2 ;; --out) out_file="$2" shift 2 ;; --out-dir) out_dir="$2" shift 2 ;; --tables-file) tables_file="$2" shift 2 ;; *) die "Unknown option: $1" ;; esac done case "$cmd" in export) run_export "$mssql_table" "$mssql_query" "$out_file" ;; import) run_import "${ch_database:-${CH_DATABASE:-default}}" "$ch_table" "$out_file" ;; export-import) [[ -n "$ch_table" ]] || die "export-import requires --ch-table" run_export "$mssql_table" "$mssql_query" "$out_file" run_import "${ch_database:-${CH_DATABASE:-default}}" "$ch_table" "$out_file" ;; list-tables) list_mssql_tables "$mssql_schema" ;; migrate-db) run_migrate_db "${ch_database:-${CH_DATABASE:-default}}" "$out_dir" "$mssql_schema" "$tables_file" ;; *) usage die "Unknown command: $cmd" ;; esac } main "$@"