script
RN APK Build / build (push) Has been cancelled

This commit is contained in:
NishantRajputRN
2026-05-13 17:23:01 +05:30
parent 5d3ed02ef5
commit 99ddc3de84
+553 -216
View File
@@ -1,37 +1,40 @@
#!/usr/bin/env bash
# Migrate SQL Server tables into ClickHouse using sqlcmd + clickhouse-client.
# Bulk copy one SQL Server table to ClickHouse on Ubuntu (or Debian-based).
#
# Recommended paths:
# - Full database (creates Nullable(String) columns, safe TSV): migrate-db
# - One table via temp file (same safety as migrate-db): migrate-table
# Prerequisites (or run with --install-deps):
# - sqlcmd: Microsoft mssql-tools18 (/opt/mssql-tools18/bin/sqlcmd)
# - clickhouse-client
#
# Optional "stream" mode pipes sqlcmd straight into ClickHouse (like a manual
# one-liner). Use only when data has no tabs/newlines and NULL handling matches
# your expectations; prefer migrate-table otherwise.
# Environment (required for export-import):
# MSSQL_HOST e.g. db.example.com
# MSSQL_PORT default 1433
# MSSQL_USER
# MSSQL_PASSWORD
# MSSQL_DATABASE
# MSSQL_TRUST_SERVER_CERT set to 1 to pass -C (trust self-signed server cert)
# MSSQL_ENCRYPT optional; passed to sqlcmd as -N (e.g. optional|mandatory|strict)
#
# Environment (examples — set in your shell or a sourced file):
# export MSSQL_HOST="43.242.212.54"
# export MSSQL_PORT="21443"
# export MSSQL_USER="Nishant_Dev"
# export MSSQL_PASSWORD='nishant@dev'
# export MSSQL_DATABASE="CPMIndiaBusinessInsight_test"
# export MSSQL_TRUST_SERVER_CERT="1"
# # export MSSQL_ENCRYPT="optional"
# CH_HOST default localhost
# CH_PORT native TCP port, default 9000
# CH_USER default default
# CH_PASSWORD optional
# CH_SECURE set to 1 to use TLS (--secure)
#
# export CH_DOCKER_CONTAINER="clickhouse"
# export CH_USER="default"
# export CH_PASSWORD=""
# export CH_PORT="9000"
# export CH_HOST="127.0.0.1"
# export CH_DATABASE="cpm"
#
# export MSSQL_CH_EXPORT_DIR="./mssql_export_all"
# Example:
# export MSSQL_HOST=sql.mycompany.internal MSSQL_USER=ro MSSQL_PASSWORD='***' MSSQL_DATABASE=sales
# export CH_HOST=ch.mycompany.internal CH_PASSWORD='***'
# ./mssql_to_clickhouse.sh export-import \
# --mssql-table "dbo.Orders" \
# --ch-database analytics \
# --ch-table orders_raw \
# --out /tmp/orders.tsv
# ./mssql_to_clickhouse.sh migrate-db \
# --ch-database cpm \
# --out-dir /tmp/mssql_full_export
#
set -euo pipefail
SCRIPT_NAME=$(basename "$0")
ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
CORE="${ROOT_DIR}/mssql_clickhouse_migrate.sh"
die() {
echo "$SCRIPT_NAME: $*" >&2
@@ -40,90 +43,40 @@ die() {
usage() {
cat <<EOF
$SCRIPT_NAME — MSSQL → ClickHouse migration (wraps ${CORE##*/}).
$SCRIPT_NAME — export SQL Server data to TSV and load into ClickHouse (Ubuntu/Debian).
Commands:
migrate-db [--mssql-schema SCHEMA] [--tables-file PATH] [--out-dir DIR]
Create ClickHouse database/tables and copy all discoverable MSSQL base tables.
Uses safe export (NULL as \\N, tabs/newlines stripped in text).
--install-deps Install mssql-tools18 + clickhouse-client (needs sudo)
export Export MSSQL table/query to TSV (TabSeparated)
import Load TSV into ClickHouse (INSERT FORMAT TabSeparated)
export-import Run export then import
migrate-db Export and import all MSSQL base tables
list-tables Print table names MSSQL discovery returns (for debugging metadata permissions)
migrate-table --mssql-table SCHEMA.TABLE [--ch-database DB] [--out-dir DIR]
Create table if needed and copy one MSSQL table (delegates to migrate-db with a one-line list).
Options:
--mssql-table SCHEMA.TABLE Table for SELECT * (export / export-import)
--mssql-query 'SQL' Raw SELECT (overrides --mssql-table)
--mssql-schema SCHEMA Limit migrate-db to one MSSQL schema
--tables-file PATH Migrate only these SCHEMA.TABLE rows (also env MSSQL_TABLES_FILE).
One per line: SCHEMA.TABLE or SCHEMA<TAB>TABLE (# comments allowed).
Use when MSSQL catalogs hide most tables unless VIEW DEFINITION is granted.
--ch-database DB ClickHouse database (default: \$CH_DATABASE or default)
--ch-table TABLE ClickHouse table name
--out PATH File path (default: ./mssql_export.tsv)
--out-dir PATH Directory for migrate-db TSV files (default: ./mssql_export_all)
export-import --mssql-table SCHEMA.TABLE [--ch-database DB] [--ch-table NAME] [--out FILE]
Export then import one table (ClickHouse table must already exist with matching columns).
Environment:
MSSQL_HOST, MSSQL_PORT, MSSQL_USER, MSSQL_PASSWORD, MSSQL_DATABASE
MSSQL_TRUST_SERVER_CERT=1 (optional; trust self-signed SQL Server cert with sqlcmd -C)
MSSQL_ENCRYPT (optional; forwarded to sqlcmd -N)
MSSQL_TABLES_FILE optional explicit table list path for migrate-db
CH_HOST, CH_PORT, CH_USER, CH_PASSWORD, CH_SECURE=1 for TLS
CH_DOCKER_CONTAINER optional; run clickhouse-client inside this container
list-tables [--mssql-schema SCHEMA]
Print SCHEMA<TAB>TABLE rows from SQL Server discovery.
stream-table --mssql-table SCHEMA.TABLE [--ch-database DB] [--ch-table NAME]
Pipe: sqlcmd SELECT * | clickhouse-client INSERT FORMAT TabSeparated.
Less safe than migrate-table; for quick tests or clean numeric/text data.
print-env
Print current MSSQL/CH-related environment (passwords masked).
Environment defaults this wrapper applies before delegating:
MSSQL_PORT=\${MSSQL_PORT:-1433}
CH_PORT=\${CH_PORT:-9000}
CH_USER=\${CH_USER:-default}
CH_HOST=\${CH_HOST:-127.0.0.1}
CH_DATABASE (or pass --ch-database) — ClickHouse database name, e.g. cpm
Examples:
export CH_DATABASE=cpm CH_DOCKER_CONTAINER=clickhouse MSSQL_TRUST_SERVER_CERT=1
$SCRIPT_NAME migrate-table --mssql-table dbo.Category_Execution
$SCRIPT_NAME migrate-db --ch-database cpm --out-dir ./ch_export
EOF
}
apply_defaults() {
export MSSQL_PORT="${MSSQL_PORT:-1433}"
export CH_PORT="${CH_PORT:-9000}"
export CH_USER="${CH_USER:-default}"
export CH_HOST="${CH_HOST:-127.0.0.1}"
}
ensure_core() {
[[ -x "$CORE" || -f "$CORE" ]] || die "Missing core script: $CORE"
}
delegate() {
apply_defaults
ensure_core
bash "$CORE" "$@"
}
mask() {
local v="${1:-}"
if [[ -z "$v" ]]; then
printf "(empty)"
else
printf "***"
fi
}
cmd_print_env() {
apply_defaults
cat <<EOF
MSSQL_HOST=${MSSQL_HOST:-}
MSSQL_PORT=${MSSQL_PORT:-}
MSSQL_USER=${MSSQL_USER:-}
MSSQL_PASSWORD=$(mask "${MSSQL_PASSWORD:-}")
MSSQL_DATABASE=${MSSQL_DATABASE:-}
MSSQL_TRUST_SERVER_CERT=${MSSQL_TRUST_SERVER_CERT:-}
MSSQL_ENCRYPT=${MSSQL_ENCRYPT:-}
MSSQL_TABLES_FILE=${MSSQL_TABLES_FILE:-}
CH_DOCKER_CONTAINER=${CH_DOCKER_CONTAINER:-}
CH_HOST=${CH_HOST:-}
CH_PORT=${CH_PORT:-}
CH_USER=${CH_USER:-}
CH_PASSWORD=$(mask "${CH_PASSWORD:-}")
CH_SECURE=${CH_SECURE:-}
CH_DATABASE=${CH_DATABASE:-}
MSSQL_CH_EXPORT_DIR=${MSSQL_CH_EXPORT_DIR:-}
Notes:
- ClickHouse columns must match export order and types.
- TabSeparated fails if fields contain tab characters; use --mssql-query to sanitize or export CSV elsewhere.
- Very large tables: run multiple exports with --mssql-query and ranges, then import each file.
EOF
}
@@ -135,7 +88,15 @@ ensure_sqlcmd() {
export PATH="/opt/mssql-tools18/bin:$PATH"
return 0
fi
die "sqlcmd not found (install mssql-tools18 or add to PATH)"
die "sqlcmd not found. Install mssql-tools18 or run: $0 --install-deps"
}
ensure_clickhouse_client() {
if [[ -n "${CH_DOCKER_CONTAINER:-}" ]]; then
command -v docker >/dev/null 2>&1 || die "docker not found, but CH_DOCKER_CONTAINER is set"
return 0
fi
command -v clickhouse-client >/dev/null 2>&1 || die "clickhouse-client not found. Run: $0 --install-deps or set CH_DOCKER_CONTAINER"
}
ensure_mssql_env() {
@@ -154,17 +115,77 @@ sqlcmd_conn_args() {
-U "$MSSQL_USER"
-P "$MSSQL_PASSWORD"
)
# ODBC 18 validates cert chains by default; -C is commonly needed for self-signed certs.
if [[ "${MSSQL_TRUST_SERVER_CERT:-0}" == "1" ]]; then
args+=(-C)
fi
if [[ -n "${MSSQL_ENCRYPT:-}" ]]; then
args+=(-N "$MSSQL_ENCRYPT")
fi
printf '%s\n' "${args[@]}"
}
ch_default_database() {
printf '%s' "${CH_DATABASE:-default}"
sqlcmd_query() {
ensure_sqlcmd
ensure_mssql_env
local sql="${1:-}"
mapfile -t conn_args < <(sqlcmd_conn_args)
sqlcmd "${conn_args[@]}" \
-Q "SET NOCOUNT ON; ${sql}" \
-h -1 -W -s "$(printf '\t')" -w 65535 -f i:65001,o:65001 -b
}
run_clickhouse_query() {
ensure_clickhouse_client
local query="${1:-}"
local host="${CH_HOST:-localhost}"
local port="${CH_PORT:-9000}"
local user="${CH_USER:-default}"
local args=(
--host "$host"
--port "$port"
--user "$user"
--query "$query"
)
[[ -n "${CH_PASSWORD:-}" ]] && args+=(--password "$CH_PASSWORD")
[[ "${CH_SECURE:-0}" == "1" ]] && args+=(--secure)
if [[ -n "${CH_DOCKER_CONTAINER:-}" ]]; then
# Important: do not pass -i here, otherwise docker can consume caller stdin
# (e.g. migrate-db table loop) and stop after the first row.
docker exec "$CH_DOCKER_CONTAINER" clickhouse-client "${args[@]}"
else
clickhouse-client "${args[@]}"
fi
}
run_clickhouse_insert() {
ensure_clickhouse_client
local query="${1:-}"
local in_file="${2:-}"
local host="${CH_HOST:-localhost}"
local port="${CH_PORT:-9000}"
local user="${CH_USER:-default}"
local args=(
--host "$host"
--port "$port"
--user "$user"
--query "$query"
)
[[ -n "${CH_PASSWORD:-}" ]] && args+=(--password "$CH_PASSWORD")
[[ "${CH_SECURE:-0}" == "1" ]] && args+=(--secure)
if [[ -n "${CH_DOCKER_CONTAINER:-}" ]]; then
docker exec -i "$CH_DOCKER_CONTAINER" clickhouse-client "${args[@]}" <"$in_file"
else
clickhouse-client "${args[@]}" <"$in_file"
fi
}
escape_ch_ident() {
@@ -173,111 +194,379 @@ escape_ch_ident() {
printf "%s" "$ident"
}
cmd_stream_table() {
apply_defaults
escape_mssql_ident() {
local ident="${1:-}"
ident="${ident//]/]]}"
printf "%s" "$ident"
}
# Escape single quotes for literals embedded in MSSQL dynamically built strings.
escape_mssql_sql_literal() {
local s="${1:-}"
s="${s//\'/\'\'}"
printf "%s" "$s"
}
strip_cr() {
printf '%s' "${1//$'\r'/}"
}
mssql_discovery_sql_union() {
local schema_filter="${1:-}"
local where_schema_sys=""
local where_schema_is=""
local where_schema_obj=""
local esc=""
if [[ -n "$schema_filter" ]]; then
esc="$(escape_mssql_sql_literal "$schema_filter")"
where_schema_sys="AND s.name = N'${esc}'"
where_schema_is="AND TABLE_SCHEMA = N'${esc}'"
where_schema_obj="AND SCHEMA_NAME(o.schema_id) = N'${esc}'"
fi
printf "%s" "
SELECT DISTINCT
CAST(x.TABLE_SCHEMA AS nvarchar(256)) COLLATE DATABASE_DEFAULT AS TABLE_SCHEMA,
CAST(x.TABLE_NAME AS nvarchar(256)) COLLATE DATABASE_DEFAULT AS TABLE_NAME
FROM (
SELECT s.name AS TABLE_SCHEMA, t.name AS TABLE_NAME
FROM sys.tables t
INNER JOIN sys.schemas s ON s.schema_id = t.schema_id
WHERE t.is_ms_shipped = 0 ${where_schema_sys}
UNION ALL
SELECT TABLE_SCHEMA, TABLE_NAME
FROM INFORMATION_SCHEMA.TABLES
WHERE TABLE_TYPE = N'BASE TABLE' ${where_schema_is}
UNION ALL
SELECT SCHEMA_NAME(o.schema_id) AS TABLE_SCHEMA, o.name AS TABLE_NAME
FROM sys.objects o
WHERE o.type = N'U' AND o.is_ms_shipped = 0 ${where_schema_obj}
) AS x
WHERE NULLIF(LTRIM(RTRIM(TABLE_SCHEMA)), N'') IS NOT NULL
AND NULLIF(LTRIM(RTRIM(TABLE_NAME)), N'') IS NOT NULL
ORDER BY TABLE_SCHEMA, TABLE_NAME;
"
}
list_mssql_tables() {
ensure_sqlcmd
ensure_mssql_env
local schema_filter="${1:-}"
sqlcmd_query "$(mssql_discovery_sql_union "$schema_filter")"
}
migrate_permission_hint() {
cat <<EOF >&2
$SCRIPT_NAME: MSSQL catalogs only expose tables your login may see metadata for.
If migrate-db discovers too few rows, grant read + metadata visibility (run as dbo/sa):
USE [$MSSQL_DATABASE];
GRANT VIEW DEFINITION ON DATABASE::[$MSSQL_DATABASE] TO [$MSSQL_USER];
EXEC sp_addrolemember N'db_datareader', N'$MSSQL_USER';
Or migrate using an explicit list from a dbo export:
$SCRIPT_NAME migrate-db --tables-file ./all_tables.txt --ch-database ...
all_tables.txt format (one table per line):
dbo.MyTable
sales.Orders
EOF
}
mssql_column_expr() {
local col_name="${1:-}"
local data_type="${2:-}"
local col_escaped
local col_bracketed
col_escaped="$(escape_mssql_ident "$col_name")"
col_bracketed="[${col_escaped}]"
case "${data_type,,}" in
binary|varbinary|image|rowversion|timestamp)
printf "CASE WHEN %s IS NULL THEN '\\\\N' ELSE master.dbo.fn_varbintohexstr(%s) END AS [%s]" \
"$col_bracketed" "$col_bracketed" "$col_escaped"
;;
*)
printf "CASE WHEN %s IS NULL THEN '\\\\N' ELSE REPLACE(REPLACE(REPLACE(CONVERT(nvarchar(max), %s), CHAR(9), ' '), CHAR(10), ' '), CHAR(13), ' ') END AS [%s]" \
"$col_bracketed" "$col_bracketed" "$col_escaped"
;;
esac
}
build_mssql_export_query() {
local schema_name="${1:-}"
local table_name="${2:-}"
local schema_escaped table_escaped
local exprs=()
local line col_name data_type
schema_escaped="$(escape_mssql_ident "$schema_name")"
table_escaped="$(escape_mssql_ident "$table_name")"
while IFS= read -r line; do
[[ -n "$line" ]] || continue
col_name="${line%%$'\t'*}"
data_type="${line#*$'\t'}"
exprs+=("$(mssql_column_expr "$col_name" "$data_type")")
done < <(
sqlcmd_query "
SELECT COLUMN_NAME, DATA_TYPE
FROM INFORMATION_SCHEMA.COLUMNS
WHERE TABLE_SCHEMA = '$(escape_mssql_sql_literal "$schema_name")' AND TABLE_NAME = '$(escape_mssql_sql_literal "$table_name")'
ORDER BY ORDINAL_POSITION;
"
)
[[ ${#exprs[@]} -gt 0 ]] || die "No columns found for ${schema_name}.${table_name}"
local select_list
local IFS=", "
select_list="${exprs[*]}"
printf "SELECT %s FROM [%s].[%s]" "$select_list" "$schema_escaped" "$table_escaped"
}
# Parse one line from --tables-file: SCHEMA.TABLE, SCHEMA<TAB>TABLE, or bare name (dbo default).
parse_mssql_table_line() {
local raw="$1"
local -n _sch="$2"
local -n _tbl="$3"
raw="$(strip_cr "$raw")"
[[ -z "$raw" ]] && return 1
[[ "$raw" =~ ^[[:space:]]*# ]] && return 1
raw="${raw#"${raw%%[![:space:]]*}"}"
raw="${raw%"${raw##*[![:space:]]}"}"
[[ -z "$raw" ]] && return 1
if [[ "$raw" == *$'\t'* ]]; then
_sch="${raw%%$'\t'*}"
_tbl="${raw#*$'\t'}"
elif [[ "$raw" == *.* ]]; then
_sch="${raw%%.*}"
_tbl="${raw#*.}"
else
_sch="dbo"
_tbl="$raw"
fi
_sch="${_sch#"${_sch%%[![:space:]]*}"}"
_sch="${_sch%"${_sch##*[![:space:]]}"}"
_tbl="${_tbl#"${_tbl%%[![:space:]]*}"}"
_tbl="${_tbl%"${_tbl##*[![:space:]]}"}"
[[ -n "$_sch" && -n "$_tbl" ]] || return 1
return 0
}
migrate_one_mssql_table() {
local ch_db="${1:?}"
local out_dir="${2:?}"
local schema_name="${3:?}"
local table_name="${4:?}"
local ch_table columns_ddl column_line column_name exported_file export_query
# Use the same table name in ClickHouse as in SQL Server (no schema__ prefix).
# If two schemas contain the same table name, the second migrate would target the same CH table.
ch_table="${table_name}"
columns_ddl=""
while IFS= read -r column_line || [[ -n "$column_line" ]]; do
column_line="$(strip_cr "$column_line")"
[[ -n "$column_line" ]] || continue
column_name="$column_line"
if [[ -n "$columns_ddl" ]]; then
columns_ddl+=", "
fi
columns_ddl+="\`$(escape_ch_ident "$column_name")\` Nullable(String)"
done < <(
sqlcmd_query "
SELECT COLUMN_NAME
FROM INFORMATION_SCHEMA.COLUMNS
WHERE TABLE_SCHEMA = '$(escape_mssql_sql_literal "$schema_name")' AND TABLE_NAME = '$(escape_mssql_sql_literal "$table_name")'
ORDER BY ORDINAL_POSITION;
"
)
[[ -n "$columns_ddl" ]] || {
echo "Skipping ${schema_name}.${table_name}: no columns readable in INFORMATION_SCHEMA"
return 1
}
run_clickhouse_query "CREATE TABLE IF NOT EXISTS \`$(escape_ch_ident "$ch_db")\`.\`$(escape_ch_ident "$ch_table")\` (${columns_ddl}) ENGINE = MergeTree ORDER BY tuple()"
export_query="$(build_mssql_export_query "$schema_name" "$table_name")"
exported_file="${out_dir}/${schema_name}.${table_name}.tsv"
run_export "" "$export_query" "$exported_file"
run_import "$ch_db" "$ch_table" "$exported_file"
echo "Migrated ${schema_name}.${table_name} -> ${ch_db}.${ch_table}"
return 0
}
run_migrate_db() {
ensure_sqlcmd
ensure_clickhouse_client
ensure_mssql_env
local ch_db="${1:-${CH_DATABASE:-default}}"
local out_dir="${2:-./mssql_export_all}"
local schema_filter="${3:-}"
local tables_file="${4:-}"
local line schema_name table_name tables_rows
local discovered=0
local migrated=0
local skipped=0
local use_file=0
[[ -n "$ch_db" ]] || die "Provide --ch-database or CH_DATABASE"
mkdir -p "$out_dir"
run_clickhouse_query "CREATE DATABASE IF NOT EXISTS \`$(escape_ch_ident "$ch_db")\`"
if [[ -n "$tables_file" ]]; then
[[ -f "$tables_file" ]] || die "Tables list file not found: $tables_file"
use_file=1
while IFS= read -r line || [[ -n "$line" ]]; do
schema_name=""
table_name=""
parse_mssql_table_line "$line" schema_name table_name || continue
discovered=$((discovered + 1))
if migrate_one_mssql_table "$ch_db" "$out_dir" "$schema_name" "$table_name"; then
migrated=$((migrated + 1))
else
skipped=$((skipped + 1))
fi
done <"$tables_file"
else
tables_rows="$(sqlcmd_query "$(mssql_discovery_sql_union "$schema_filter")")"
while IFS= read -r line || [[ -n "$line" ]]; do
line="$(strip_cr "$line")"
[[ -n "$line" ]] || continue
schema_name="${line%%$'\t'*}"
table_name="${line#*$'\t'}"
schema_name="$(strip_cr "$schema_name")"
table_name="$(strip_cr "$table_name")"
schema_name="${schema_name#"${schema_name%%[![:space:]]*}"}"
schema_name="${schema_name%"${schema_name##*[![:space:]]}"}"
table_name="${table_name#"${table_name%%[![:space:]]*}"}"
table_name="${table_name%"${table_name##*[![:space:]]}"}"
[[ -n "$schema_name" && -n "$table_name" ]] || continue
discovered=$((discovered + 1))
if migrate_one_mssql_table "$ch_db" "$out_dir" "$schema_name" "$table_name"; then
migrated=$((migrated + 1))
else
skipped=$((skipped + 1))
fi
done <<<"$tables_rows"
fi
if [[ "$discovered" -le 0 ]]; then
if [[ "$use_file" -eq 1 ]]; then
die "No valid SCHEMA.TABLE rows in tables file (after skipping blanks/comments): ${tables_file}"
else
die "No SQL Server tables discovered in ${MSSQL_DATABASE}. Run: ${SCRIPT_NAME} list-tables (or grant VIEW DEFINITION + db_datareader)."
fi
fi
if [[ "$migrated" -eq 0 ]]; then
migrate_permission_hint
die "No tables migrated successfully (had $discovered candidate(s), $skipped skipped)."
fi
if [[ "$use_file" -eq 0 && "$discovered" -eq 1 ]]; then
migrate_permission_hint
fi
echo "Done. Queued $discovered table(s), migrated $migrated, skipped/failed-metadata $skipped into ClickHouse database ${ch_db}."
}
install_deps() {
[[ $(id -u) -eq 0 ]] || command -v sudo >/dev/null 2>&1 || die "Need sudo for --install-deps"
local SUDO=""
[[ $(id -u) -ne 0 ]] && SUDO="sudo"
. /etc/os-release || die "Cannot read /etc/os-release"
[[ "${ID:-}" == "ubuntu" || "${ID:-}" == "debian" || "${ID_LIKE:-}" == *"debian"* ]] \
|| die "This installer expects Ubuntu or Debian."
$SUDO apt-get update
$SUDO apt-get install -y apt-transport-https ca-certificates curl gnupg lsb-release
# Microsoft ODBC + sqlcmd
curl -fsSL https://packages.microsoft.com/keys/microsoft.asc \
| $SUDO gpg --dearmor -o /etc/apt/trusted.gpg.d/microsoft.gpg
curl -fsSL "https://packages.microsoft.com/config/ubuntu/$(lsb_release -rs)/prod.list" \
| $SUDO tee /etc/apt/sources.list.d/mssql-release.list >/dev/null
$SUDO apt-get update
ACCEPT_EULA=Y $SUDO apt-get install -y msodbcsql18 mssql-tools18 unixodbc-dev
echo 'export PATH="$PATH:/opt/mssql-tools18/bin"' >>"$HOME/.bashrc" || true
export PATH="$PATH:/opt/mssql-tools18/bin"
# ClickHouse client
$SUDO mkdir -p /etc/apt/keyrings
curl -fsSL https://packages.clickhouse.com/rpm/lifecyclePolicies/policy.json >/dev/null 2>&1 || true
curl -fsSL https://packages.clickhouse.com/deb/pubkey.gpg \
| $SUDO gpg --dearmor -o /etc/apt/keyrings/clickhouse.gpg
echo "deb [signed-by=/etc/apt/keyrings/clickhouse.gpg] https://packages.clickhouse.com/deb stable main" \
| $SUDO tee /etc/apt/sources.list.d/clickhouse.list >/dev/null
$SUDO apt-get update
$SUDO apt-get install -y clickhouse-client
echo "Installed. Open a new shell or run: export PATH=\"\$PATH:/opt/mssql-tools18/bin\""
}
run_export() {
ensure_sqlcmd
ensure_mssql_env
local mssql_table=""
local ch_db=""
local ch_table=""
while [[ $# -gt 0 ]]; do
case "$1" in
--mssql-table)
mssql_table="$2"
shift 2
;;
--ch-database)
ch_db="$2"
shift 2
;;
--ch-table)
ch_table="$2"
shift 2
;;
*)
die "Unknown option: $1"
;;
esac
done
[[ -n "$mssql_table" ]] || die "stream-table requires --mssql-table SCHEMA.TABLE"
ch_db="${ch_db:-$(ch_default_database)}"
if [[ -z "$ch_table" ]]; then
if [[ "$mssql_table" == *.* ]]; then
ch_table="${mssql_table#*.}"
else
ch_table="$mssql_table"
fi
fi
[[ -n "${CH_DOCKER_CONTAINER:-}" ]] || die "stream-table expects CH_DOCKER_CONTAINER for docker exec -i"
command -v docker >/dev/null 2>&1 || die "docker not found"
local mssql_table="${1:-}"
local mssql_query="${2:-}"
local out_file="${3:-./mssql_export.tsv}"
local conn_args=()
mapfile -t conn_args < <(sqlcmd_conn_args)
local insert_q
insert_q="INSERT INTO \`$(escape_ch_ident "$ch_db")\`.\`$(escape_ch_ident "$ch_table")\` FORMAT TabSeparated"
local sql
if [[ -n "$mssql_query" ]]; then
sql="$mssql_query"
else
[[ -n "$mssql_table" ]] || die "Provide --mssql-table or --mssql-query"
sql="SELECT * FROM ${mssql_table}"
fi
echo "$SCRIPT_NAME: streaming SELECT * from $mssql_table${ch_db}.${ch_table} (ensure table exists and types match)." >&2
local ch_args=(
--host "${CH_HOST:-127.0.0.1}"
--port "${CH_PORT:-9000}"
--user "${CH_USER:-default}"
--query "$insert_q"
)
[[ -n "${CH_PASSWORD:-}" ]] && ch_args+=(--password "$CH_PASSWORD")
[[ "${CH_SECURE:-0}" == "1" ]] && ch_args+=(--secure)
mkdir -p "$(dirname "$out_file")"
# UTF-8 in/out; tab separator; no column headers (-h -1); trim trailing spaces (-W).
sqlcmd "${conn_args[@]}" \
-Q "SET NOCOUNT ON; SELECT * FROM ${mssql_table}" \
-h -1 -W -s "$(printf '\t')" -w 65535 -f i:65001,o:65001 -b \
| docker exec -i "$CH_DOCKER_CONTAINER" clickhouse-client "${ch_args[@]}"
-Q "SET NOCOUNT ON; ${sql}" \
-h -1 -W -s "$(printf '\t')" -w 65535 -f i:65001,o:65001 -o "$out_file" -b
echo "Exported to $out_file ($(wc -l <"$out_file") lines)"
}
cmd_migrate_table() {
apply_defaults
ensure_core
run_import() {
local ch_db="${1:-${CH_DATABASE:-default}}"
local ch_table="${2:-}"
local in_file="${3:-./mssql_export.tsv}"
local mssql_table=""
local ch_db=""
local out_dir=""
while [[ $# -gt 0 ]]; do
case "$1" in
--mssql-table)
mssql_table="$2"
shift 2
;;
--ch-database)
ch_db="$2"
shift 2
;;
--out-dir)
out_dir="$2"
shift 2
;;
*)
die "Unknown option: $1"
;;
esac
done
[[ -n "$ch_table" ]] || die "Provide --ch-table"
[[ -f "$in_file" ]] || die "File not found: $in_file"
[[ -n "$mssql_table" ]] || die "migrate-table requires --mssql-table SCHEMA.TABLE"
ch_db="${ch_db:-$(ch_default_database)}"
out_dir="${out_dir:-${MSSQL_CH_EXPORT_DIR:-./mssql_export_all}}"
local tmp
tmp="$(mktemp "${TMPDIR:-/tmp}/${SCRIPT_NAME}.tables.XXXXXX")"
trap 'rm -f "$tmp"' RETURN
printf '%s\n' "$mssql_table" >"$tmp"
bash "$CORE" migrate-db \
--ch-database "$ch_db" \
--out-dir "$out_dir" \
--tables-file "$tmp"
run_clickhouse_insert "INSERT INTO \`$(escape_ch_ident "$ch_db")\`.\`$(escape_ch_ident "$ch_table")\` FORMAT TabSeparated" "$in_file"
echo "Imported $in_file into ${ch_db}.${ch_table}"
}
main() {
@@ -294,29 +583,77 @@ main() {
exit 0
fi
if [[ "$cmd" == "--install-deps" ]]; then
install_deps
exit 0
fi
local mssql_table=""
local mssql_query=""
local mssql_schema=""
local ch_database=""
local ch_table=""
local out_file="./mssql_export.tsv"
local out_dir="./mssql_export_all"
local tables_file="${MSSQL_TABLES_FILE:-}"
while [[ $# -gt 0 ]]; do
case "$1" in
--mssql-table)
mssql_table="$2"
shift 2
;;
--mssql-query)
mssql_query="$2"
shift 2
;;
--ch-database)
ch_database="$2"
shift 2
;;
--mssql-schema)
mssql_schema="$2"
shift 2
;;
--ch-table)
ch_table="$2"
shift 2
;;
--out)
out_file="$2"
shift 2
;;
--out-dir)
out_dir="$2"
shift 2
;;
--tables-file)
tables_file="$2"
shift 2
;;
*)
die "Unknown option: $1"
;;
esac
done
case "$cmd" in
print-env)
cmd_print_env
export)
run_export "$mssql_table" "$mssql_query" "$out_file"
;;
migrate-db)
delegate migrate-db \
--ch-database "$(ch_default_database)" \
--out-dir "${MSSQL_CH_EXPORT_DIR:-./mssql_export_all}" \
"$@"
;;
list-tables)
delegate list-tables "$@"
;;
migrate-table)
cmd_migrate_table "$@"
import)
run_import "${ch_database:-${CH_DATABASE:-default}}" "$ch_table" "$out_file"
;;
export-import)
delegate export-import \
--ch-database "$(ch_default_database)" \
"$@"
[[ -n "$ch_table" ]] || die "export-import requires --ch-table"
run_export "$mssql_table" "$mssql_query" "$out_file"
run_import "${ch_database:-${CH_DATABASE:-default}}" "$ch_table" "$out_file"
;;
stream-table)
cmd_stream_table "$@"
list-tables)
list_mssql_tables "$mssql_schema"
;;
migrate-db)
run_migrate_db "${ch_database:-${CH_DATABASE:-default}}" "$out_dir" "$mssql_schema" "$tables_file"
;;
*)
usage