diff --git a/scripts/mssql_clickhouse_migrate.sh b/scripts/mssql_clickhouse_migrate.sh index bf9a750..dd429a6 100755 --- a/scripts/mssql_clickhouse_migrate.sh +++ b/scripts/mssql_clickhouse_migrate.sh @@ -1,37 +1,40 @@ #!/usr/bin/env bash -# Migrate SQL Server tables into ClickHouse using sqlcmd + clickhouse-client. +# Bulk copy one SQL Server table to ClickHouse on Ubuntu (or Debian-based). # -# Recommended paths: -# - Full database (creates Nullable(String) columns, safe TSV): migrate-db -# - One table via temp file (same safety as migrate-db): migrate-table +# Prerequisites (or run with --install-deps): +# - sqlcmd: Microsoft mssql-tools18 (/opt/mssql-tools18/bin/sqlcmd) +# - clickhouse-client # -# Optional "stream" mode pipes sqlcmd straight into ClickHouse (like a manual -# one-liner). Use only when data has no tabs/newlines and NULL handling matches -# your expectations; prefer migrate-table otherwise. +# Environment (required for export-import): +# MSSQL_HOST e.g. db.example.com +# MSSQL_PORT default 1433 +# MSSQL_USER +# MSSQL_PASSWORD +# MSSQL_DATABASE +# MSSQL_TRUST_SERVER_CERT set to 1 to pass -C (trust self-signed server cert) +# MSSQL_ENCRYPT optional; passed to sqlcmd as -N (e.g. optional|mandatory|strict) # -# Environment (examples — set in your shell or a sourced file): -# export MSSQL_HOST="43.242.212.54" -# export MSSQL_PORT="21443" -# export MSSQL_USER="Nishant_Dev" -# export MSSQL_PASSWORD='nishant@dev' -# export MSSQL_DATABASE="CPMIndiaBusinessInsight_test" -# export MSSQL_TRUST_SERVER_CERT="1" -# # export MSSQL_ENCRYPT="optional" +# CH_HOST default localhost +# CH_PORT native TCP port, default 9000 +# CH_USER default default +# CH_PASSWORD optional +# CH_SECURE set to 1 to use TLS (--secure) # -# export CH_DOCKER_CONTAINER="clickhouse" -# export CH_USER="default" -# export CH_PASSWORD="" -# export CH_PORT="9000" -# export CH_HOST="127.0.0.1" -# export CH_DATABASE="cpm" -# -# export MSSQL_CH_EXPORT_DIR="./mssql_export_all" +# Example: +# export MSSQL_HOST=sql.mycompany.internal MSSQL_USER=ro MSSQL_PASSWORD='***' MSSQL_DATABASE=sales +# export CH_HOST=ch.mycompany.internal CH_PASSWORD='***' +# ./mssql_to_clickhouse.sh export-import \ +# --mssql-table "dbo.Orders" \ +# --ch-database analytics \ +# --ch-table orders_raw \ +# --out /tmp/orders.tsv +# ./mssql_to_clickhouse.sh migrate-db \ +# --ch-database cpm \ +# --out-dir /tmp/mssql_full_export # set -euo pipefail SCRIPT_NAME=$(basename "$0") -ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" -CORE="${ROOT_DIR}/mssql_clickhouse_migrate.sh" die() { echo "$SCRIPT_NAME: $*" >&2 @@ -40,90 +43,40 @@ die() { usage() { cat <TABLE (# comments allowed). + Use when MSSQL catalogs hide most tables unless VIEW DEFINITION is granted. + --ch-database DB ClickHouse database (default: \$CH_DATABASE or default) + --ch-table TABLE ClickHouse table name + --out PATH File path (default: ./mssql_export.tsv) + --out-dir PATH Directory for migrate-db TSV files (default: ./mssql_export_all) - export-import --mssql-table SCHEMA.TABLE [--ch-database DB] [--ch-table NAME] [--out FILE] - Export then import one table (ClickHouse table must already exist with matching columns). +Environment: + MSSQL_HOST, MSSQL_PORT, MSSQL_USER, MSSQL_PASSWORD, MSSQL_DATABASE + MSSQL_TRUST_SERVER_CERT=1 (optional; trust self-signed SQL Server cert with sqlcmd -C) + MSSQL_ENCRYPT (optional; forwarded to sqlcmd -N) + MSSQL_TABLES_FILE optional explicit table list path for migrate-db + CH_HOST, CH_PORT, CH_USER, CH_PASSWORD, CH_SECURE=1 for TLS + CH_DOCKER_CONTAINER optional; run clickhouse-client inside this container - list-tables [--mssql-schema SCHEMA] - Print SCHEMATABLE rows from SQL Server discovery. - - stream-table --mssql-table SCHEMA.TABLE [--ch-database DB] [--ch-table NAME] - Pipe: sqlcmd SELECT * | clickhouse-client INSERT FORMAT TabSeparated. - Less safe than migrate-table; for quick tests or clean numeric/text data. - - print-env - Print current MSSQL/CH-related environment (passwords masked). - -Environment defaults this wrapper applies before delegating: - MSSQL_PORT=\${MSSQL_PORT:-1433} - CH_PORT=\${CH_PORT:-9000} - CH_USER=\${CH_USER:-default} - CH_HOST=\${CH_HOST:-127.0.0.1} - CH_DATABASE (or pass --ch-database) — ClickHouse database name, e.g. cpm - -Examples: - export CH_DATABASE=cpm CH_DOCKER_CONTAINER=clickhouse MSSQL_TRUST_SERVER_CERT=1 - $SCRIPT_NAME migrate-table --mssql-table dbo.Category_Execution - $SCRIPT_NAME migrate-db --ch-database cpm --out-dir ./ch_export -EOF -} - -apply_defaults() { - export MSSQL_PORT="${MSSQL_PORT:-1433}" - export CH_PORT="${CH_PORT:-9000}" - export CH_USER="${CH_USER:-default}" - export CH_HOST="${CH_HOST:-127.0.0.1}" -} - -ensure_core() { - [[ -x "$CORE" || -f "$CORE" ]] || die "Missing core script: $CORE" -} - -delegate() { - apply_defaults - ensure_core - bash "$CORE" "$@" -} - -mask() { - local v="${1:-}" - if [[ -z "$v" ]]; then - printf "(empty)" - else - printf "***" - fi -} - -cmd_print_env() { - apply_defaults - cat </dev/null 2>&1 || die "docker not found, but CH_DOCKER_CONTAINER is set" + return 0 + fi + command -v clickhouse-client >/dev/null 2>&1 || die "clickhouse-client not found. Run: $0 --install-deps or set CH_DOCKER_CONTAINER" } ensure_mssql_env() { @@ -154,17 +115,77 @@ sqlcmd_conn_args() { -U "$MSSQL_USER" -P "$MSSQL_PASSWORD" ) + + # ODBC 18 validates cert chains by default; -C is commonly needed for self-signed certs. if [[ "${MSSQL_TRUST_SERVER_CERT:-0}" == "1" ]]; then args+=(-C) fi if [[ -n "${MSSQL_ENCRYPT:-}" ]]; then args+=(-N "$MSSQL_ENCRYPT") fi + printf '%s\n' "${args[@]}" } -ch_default_database() { - printf '%s' "${CH_DATABASE:-default}" +sqlcmd_query() { + ensure_sqlcmd + ensure_mssql_env + + local sql="${1:-}" + mapfile -t conn_args < <(sqlcmd_conn_args) + + sqlcmd "${conn_args[@]}" \ + -Q "SET NOCOUNT ON; ${sql}" \ + -h -1 -W -s "$(printf '\t')" -w 65535 -f i:65001,o:65001 -b +} + +run_clickhouse_query() { + ensure_clickhouse_client + + local query="${1:-}" + local host="${CH_HOST:-localhost}" + local port="${CH_PORT:-9000}" + local user="${CH_USER:-default}" + local args=( + --host "$host" + --port "$port" + --user "$user" + --query "$query" + ) + [[ -n "${CH_PASSWORD:-}" ]] && args+=(--password "$CH_PASSWORD") + [[ "${CH_SECURE:-0}" == "1" ]] && args+=(--secure) + + if [[ -n "${CH_DOCKER_CONTAINER:-}" ]]; then + # Important: do not pass -i here, otherwise docker can consume caller stdin + # (e.g. migrate-db table loop) and stop after the first row. + docker exec "$CH_DOCKER_CONTAINER" clickhouse-client "${args[@]}" + else + clickhouse-client "${args[@]}" + fi +} + +run_clickhouse_insert() { + ensure_clickhouse_client + + local query="${1:-}" + local in_file="${2:-}" + local host="${CH_HOST:-localhost}" + local port="${CH_PORT:-9000}" + local user="${CH_USER:-default}" + local args=( + --host "$host" + --port "$port" + --user "$user" + --query "$query" + ) + [[ -n "${CH_PASSWORD:-}" ]] && args+=(--password "$CH_PASSWORD") + [[ "${CH_SECURE:-0}" == "1" ]] && args+=(--secure) + + if [[ -n "${CH_DOCKER_CONTAINER:-}" ]]; then + docker exec -i "$CH_DOCKER_CONTAINER" clickhouse-client "${args[@]}" <"$in_file" + else + clickhouse-client "${args[@]}" <"$in_file" + fi } escape_ch_ident() { @@ -173,111 +194,379 @@ escape_ch_ident() { printf "%s" "$ident" } -cmd_stream_table() { - apply_defaults +escape_mssql_ident() { + local ident="${1:-}" + ident="${ident//]/]]}" + printf "%s" "$ident" +} + +# Escape single quotes for literals embedded in MSSQL dynamically built strings. +escape_mssql_sql_literal() { + local s="${1:-}" + s="${s//\'/\'\'}" + printf "%s" "$s" +} + +strip_cr() { + printf '%s' "${1//$'\r'/}" +} + +mssql_discovery_sql_union() { + local schema_filter="${1:-}" + local where_schema_sys="" + local where_schema_is="" + local where_schema_obj="" + local esc="" + if [[ -n "$schema_filter" ]]; then + esc="$(escape_mssql_sql_literal "$schema_filter")" + where_schema_sys="AND s.name = N'${esc}'" + where_schema_is="AND TABLE_SCHEMA = N'${esc}'" + where_schema_obj="AND SCHEMA_NAME(o.schema_id) = N'${esc}'" + fi + + printf "%s" " + SELECT DISTINCT + CAST(x.TABLE_SCHEMA AS nvarchar(256)) COLLATE DATABASE_DEFAULT AS TABLE_SCHEMA, + CAST(x.TABLE_NAME AS nvarchar(256)) COLLATE DATABASE_DEFAULT AS TABLE_NAME + FROM ( + SELECT s.name AS TABLE_SCHEMA, t.name AS TABLE_NAME + FROM sys.tables t + INNER JOIN sys.schemas s ON s.schema_id = t.schema_id + WHERE t.is_ms_shipped = 0 ${where_schema_sys} + + UNION ALL + + SELECT TABLE_SCHEMA, TABLE_NAME + FROM INFORMATION_SCHEMA.TABLES + WHERE TABLE_TYPE = N'BASE TABLE' ${where_schema_is} + + UNION ALL + + SELECT SCHEMA_NAME(o.schema_id) AS TABLE_SCHEMA, o.name AS TABLE_NAME + FROM sys.objects o + WHERE o.type = N'U' AND o.is_ms_shipped = 0 ${where_schema_obj} + ) AS x + WHERE NULLIF(LTRIM(RTRIM(TABLE_SCHEMA)), N'') IS NOT NULL + AND NULLIF(LTRIM(RTRIM(TABLE_NAME)), N'') IS NOT NULL + ORDER BY TABLE_SCHEMA, TABLE_NAME; + " +} + +list_mssql_tables() { ensure_sqlcmd ensure_mssql_env + local schema_filter="${1:-}" + sqlcmd_query "$(mssql_discovery_sql_union "$schema_filter")" +} - local mssql_table="" - local ch_db="" - local ch_table="" - while [[ $# -gt 0 ]]; do - case "$1" in - --mssql-table) - mssql_table="$2" - shift 2 - ;; - --ch-database) - ch_db="$2" - shift 2 - ;; - --ch-table) - ch_table="$2" - shift 2 - ;; - *) - die "Unknown option: $1" - ;; - esac - done +migrate_permission_hint() { + cat <&2 - [[ -n "$mssql_table" ]] || die "stream-table requires --mssql-table SCHEMA.TABLE" - ch_db="${ch_db:-$(ch_default_database)}" - if [[ -z "$ch_table" ]]; then - if [[ "$mssql_table" == *.* ]]; then - ch_table="${mssql_table#*.}" +$SCRIPT_NAME: MSSQL catalogs only expose tables your login may see metadata for. +If migrate-db discovers too few rows, grant read + metadata visibility (run as dbo/sa): + + USE [$MSSQL_DATABASE]; + GRANT VIEW DEFINITION ON DATABASE::[$MSSQL_DATABASE] TO [$MSSQL_USER]; + EXEC sp_addrolemember N'db_datareader', N'$MSSQL_USER'; + +Or migrate using an explicit list from a dbo export: + + $SCRIPT_NAME migrate-db --tables-file ./all_tables.txt --ch-database ... + + all_tables.txt format (one table per line): + dbo.MyTable + sales.Orders + +EOF +} + +mssql_column_expr() { + local col_name="${1:-}" + local data_type="${2:-}" + + local col_escaped + local col_bracketed + col_escaped="$(escape_mssql_ident "$col_name")" + col_bracketed="[${col_escaped}]" + + case "${data_type,,}" in + binary|varbinary|image|rowversion|timestamp) + printf "CASE WHEN %s IS NULL THEN '\\\\N' ELSE master.dbo.fn_varbintohexstr(%s) END AS [%s]" \ + "$col_bracketed" "$col_bracketed" "$col_escaped" + ;; + *) + printf "CASE WHEN %s IS NULL THEN '\\\\N' ELSE REPLACE(REPLACE(REPLACE(CONVERT(nvarchar(max), %s), CHAR(9), ' '), CHAR(10), ' '), CHAR(13), ' ') END AS [%s]" \ + "$col_bracketed" "$col_bracketed" "$col_escaped" + ;; + esac +} + +build_mssql_export_query() { + local schema_name="${1:-}" + local table_name="${2:-}" + local schema_escaped table_escaped + local exprs=() + local line col_name data_type + + schema_escaped="$(escape_mssql_ident "$schema_name")" + table_escaped="$(escape_mssql_ident "$table_name")" + + while IFS= read -r line; do + [[ -n "$line" ]] || continue + col_name="${line%%$'\t'*}" + data_type="${line#*$'\t'}" + exprs+=("$(mssql_column_expr "$col_name" "$data_type")") + done < <( + sqlcmd_query " + SELECT COLUMN_NAME, DATA_TYPE + FROM INFORMATION_SCHEMA.COLUMNS + WHERE TABLE_SCHEMA = '$(escape_mssql_sql_literal "$schema_name")' AND TABLE_NAME = '$(escape_mssql_sql_literal "$table_name")' + ORDER BY ORDINAL_POSITION; + " + ) + + [[ ${#exprs[@]} -gt 0 ]] || die "No columns found for ${schema_name}.${table_name}" + + local select_list + local IFS=", " + select_list="${exprs[*]}" + printf "SELECT %s FROM [%s].[%s]" "$select_list" "$schema_escaped" "$table_escaped" +} + +# Parse one line from --tables-file: SCHEMA.TABLE, SCHEMATABLE, or bare name (dbo default). +parse_mssql_table_line() { + local raw="$1" + local -n _sch="$2" + local -n _tbl="$3" + + raw="$(strip_cr "$raw")" + [[ -z "$raw" ]] && return 1 + [[ "$raw" =~ ^[[:space:]]*# ]] && return 1 + raw="${raw#"${raw%%[![:space:]]*}"}" + raw="${raw%"${raw##*[![:space:]]}"}" + [[ -z "$raw" ]] && return 1 + + if [[ "$raw" == *$'\t'* ]]; then + _sch="${raw%%$'\t'*}" + _tbl="${raw#*$'\t'}" + elif [[ "$raw" == *.* ]]; then + _sch="${raw%%.*}" + _tbl="${raw#*.}" + else + _sch="dbo" + _tbl="$raw" + fi + + _sch="${_sch#"${_sch%%[![:space:]]*}"}" + _sch="${_sch%"${_sch##*[![:space:]]}"}" + _tbl="${_tbl#"${_tbl%%[![:space:]]*}"}" + _tbl="${_tbl%"${_tbl##*[![:space:]]}"}" + + [[ -n "$_sch" && -n "$_tbl" ]] || return 1 + return 0 +} + +migrate_one_mssql_table() { + local ch_db="${1:?}" + local out_dir="${2:?}" + local schema_name="${3:?}" + local table_name="${4:?}" + + local ch_table columns_ddl column_line column_name exported_file export_query + + # Use the same table name in ClickHouse as in SQL Server (no schema__ prefix). + # If two schemas contain the same table name, the second migrate would target the same CH table. + ch_table="${table_name}" + columns_ddl="" + + while IFS= read -r column_line || [[ -n "$column_line" ]]; do + column_line="$(strip_cr "$column_line")" + [[ -n "$column_line" ]] || continue + column_name="$column_line" + if [[ -n "$columns_ddl" ]]; then + columns_ddl+=", " + fi + columns_ddl+="\`$(escape_ch_ident "$column_name")\` Nullable(String)" + done < <( + sqlcmd_query " + SELECT COLUMN_NAME + FROM INFORMATION_SCHEMA.COLUMNS + WHERE TABLE_SCHEMA = '$(escape_mssql_sql_literal "$schema_name")' AND TABLE_NAME = '$(escape_mssql_sql_literal "$table_name")' + ORDER BY ORDINAL_POSITION; + " + ) + + [[ -n "$columns_ddl" ]] || { + echo "Skipping ${schema_name}.${table_name}: no columns readable in INFORMATION_SCHEMA" + return 1 + } + + run_clickhouse_query "CREATE TABLE IF NOT EXISTS \`$(escape_ch_ident "$ch_db")\`.\`$(escape_ch_ident "$ch_table")\` (${columns_ddl}) ENGINE = MergeTree ORDER BY tuple()" + + export_query="$(build_mssql_export_query "$schema_name" "$table_name")" + exported_file="${out_dir}/${schema_name}.${table_name}.tsv" + run_export "" "$export_query" "$exported_file" + run_import "$ch_db" "$ch_table" "$exported_file" + + echo "Migrated ${schema_name}.${table_name} -> ${ch_db}.${ch_table}" + return 0 +} + +run_migrate_db() { + ensure_sqlcmd + ensure_clickhouse_client + ensure_mssql_env + + local ch_db="${1:-${CH_DATABASE:-default}}" + local out_dir="${2:-./mssql_export_all}" + local schema_filter="${3:-}" + local tables_file="${4:-}" + + local line schema_name table_name tables_rows + local discovered=0 + local migrated=0 + local skipped=0 + local use_file=0 + + [[ -n "$ch_db" ]] || die "Provide --ch-database or CH_DATABASE" + mkdir -p "$out_dir" + + run_clickhouse_query "CREATE DATABASE IF NOT EXISTS \`$(escape_ch_ident "$ch_db")\`" + + if [[ -n "$tables_file" ]]; then + [[ -f "$tables_file" ]] || die "Tables list file not found: $tables_file" + use_file=1 + while IFS= read -r line || [[ -n "$line" ]]; do + schema_name="" + table_name="" + parse_mssql_table_line "$line" schema_name table_name || continue + discovered=$((discovered + 1)) + if migrate_one_mssql_table "$ch_db" "$out_dir" "$schema_name" "$table_name"; then + migrated=$((migrated + 1)) + else + skipped=$((skipped + 1)) + fi + done <"$tables_file" + else + tables_rows="$(sqlcmd_query "$(mssql_discovery_sql_union "$schema_filter")")" + + while IFS= read -r line || [[ -n "$line" ]]; do + line="$(strip_cr "$line")" + [[ -n "$line" ]] || continue + schema_name="${line%%$'\t'*}" + table_name="${line#*$'\t'}" + schema_name="$(strip_cr "$schema_name")" + table_name="$(strip_cr "$table_name")" + + schema_name="${schema_name#"${schema_name%%[![:space:]]*}"}" + schema_name="${schema_name%"${schema_name##*[![:space:]]}"}" + table_name="${table_name#"${table_name%%[![:space:]]*}"}" + table_name="${table_name%"${table_name##*[![:space:]]}"}" + + [[ -n "$schema_name" && -n "$table_name" ]] || continue + discovered=$((discovered + 1)) + if migrate_one_mssql_table "$ch_db" "$out_dir" "$schema_name" "$table_name"; then + migrated=$((migrated + 1)) + else + skipped=$((skipped + 1)) + fi + done <<<"$tables_rows" + fi + + if [[ "$discovered" -le 0 ]]; then + if [[ "$use_file" -eq 1 ]]; then + die "No valid SCHEMA.TABLE rows in tables file (after skipping blanks/comments): ${tables_file}" else - ch_table="$mssql_table" + die "No SQL Server tables discovered in ${MSSQL_DATABASE}. Run: ${SCRIPT_NAME} list-tables (or grant VIEW DEFINITION + db_datareader)." fi fi - [[ -n "${CH_DOCKER_CONTAINER:-}" ]] || die "stream-table expects CH_DOCKER_CONTAINER for docker exec -i" - command -v docker >/dev/null 2>&1 || die "docker not found" + if [[ "$migrated" -eq 0 ]]; then + migrate_permission_hint + die "No tables migrated successfully (had $discovered candidate(s), $skipped skipped)." + fi + + if [[ "$use_file" -eq 0 && "$discovered" -eq 1 ]]; then + migrate_permission_hint + fi + + echo "Done. Queued $discovered table(s), migrated $migrated, skipped/failed-metadata $skipped into ClickHouse database ${ch_db}." +} + +install_deps() { + [[ $(id -u) -eq 0 ]] || command -v sudo >/dev/null 2>&1 || die "Need sudo for --install-deps" + local SUDO="" + [[ $(id -u) -ne 0 ]] && SUDO="sudo" + + . /etc/os-release || die "Cannot read /etc/os-release" + [[ "${ID:-}" == "ubuntu" || "${ID:-}" == "debian" || "${ID_LIKE:-}" == *"debian"* ]] \ + || die "This installer expects Ubuntu or Debian." + + $SUDO apt-get update + $SUDO apt-get install -y apt-transport-https ca-certificates curl gnupg lsb-release + + # Microsoft ODBC + sqlcmd + curl -fsSL https://packages.microsoft.com/keys/microsoft.asc \ + | $SUDO gpg --dearmor -o /etc/apt/trusted.gpg.d/microsoft.gpg + curl -fsSL "https://packages.microsoft.com/config/ubuntu/$(lsb_release -rs)/prod.list" \ + | $SUDO tee /etc/apt/sources.list.d/mssql-release.list >/dev/null + $SUDO apt-get update + ACCEPT_EULA=Y $SUDO apt-get install -y msodbcsql18 mssql-tools18 unixodbc-dev + echo 'export PATH="$PATH:/opt/mssql-tools18/bin"' >>"$HOME/.bashrc" || true + export PATH="$PATH:/opt/mssql-tools18/bin" + + # ClickHouse client + $SUDO mkdir -p /etc/apt/keyrings + curl -fsSL https://packages.clickhouse.com/rpm/lifecyclePolicies/policy.json >/dev/null 2>&1 || true + curl -fsSL https://packages.clickhouse.com/deb/pubkey.gpg \ + | $SUDO gpg --dearmor -o /etc/apt/keyrings/clickhouse.gpg + echo "deb [signed-by=/etc/apt/keyrings/clickhouse.gpg] https://packages.clickhouse.com/deb stable main" \ + | $SUDO tee /etc/apt/sources.list.d/clickhouse.list >/dev/null + $SUDO apt-get update + $SUDO apt-get install -y clickhouse-client + + echo "Installed. Open a new shell or run: export PATH=\"\$PATH:/opt/mssql-tools18/bin\"" +} + +run_export() { + ensure_sqlcmd + ensure_mssql_env + + local mssql_table="${1:-}" + local mssql_query="${2:-}" + local out_file="${3:-./mssql_export.tsv}" local conn_args=() mapfile -t conn_args < <(sqlcmd_conn_args) - local insert_q - insert_q="INSERT INTO \`$(escape_ch_ident "$ch_db")\`.\`$(escape_ch_ident "$ch_table")\` FORMAT TabSeparated" + local sql + if [[ -n "$mssql_query" ]]; then + sql="$mssql_query" + else + [[ -n "$mssql_table" ]] || die "Provide --mssql-table or --mssql-query" + sql="SELECT * FROM ${mssql_table}" + fi - echo "$SCRIPT_NAME: streaming SELECT * from $mssql_table → ${ch_db}.${ch_table} (ensure table exists and types match)." >&2 - - local ch_args=( - --host "${CH_HOST:-127.0.0.1}" - --port "${CH_PORT:-9000}" - --user "${CH_USER:-default}" - --query "$insert_q" - ) - [[ -n "${CH_PASSWORD:-}" ]] && ch_args+=(--password "$CH_PASSWORD") - [[ "${CH_SECURE:-0}" == "1" ]] && ch_args+=(--secure) + mkdir -p "$(dirname "$out_file")" + # UTF-8 in/out; tab separator; no column headers (-h -1); trim trailing spaces (-W). sqlcmd "${conn_args[@]}" \ - -Q "SET NOCOUNT ON; SELECT * FROM ${mssql_table}" \ - -h -1 -W -s "$(printf '\t')" -w 65535 -f i:65001,o:65001 -b \ - | docker exec -i "$CH_DOCKER_CONTAINER" clickhouse-client "${ch_args[@]}" + -Q "SET NOCOUNT ON; ${sql}" \ + -h -1 -W -s "$(printf '\t')" -w 65535 -f i:65001,o:65001 -o "$out_file" -b + + echo "Exported to $out_file ($(wc -l <"$out_file") lines)" } -cmd_migrate_table() { - apply_defaults - ensure_core +run_import() { + local ch_db="${1:-${CH_DATABASE:-default}}" + local ch_table="${2:-}" + local in_file="${3:-./mssql_export.tsv}" - local mssql_table="" - local ch_db="" - local out_dir="" - while [[ $# -gt 0 ]]; do - case "$1" in - --mssql-table) - mssql_table="$2" - shift 2 - ;; - --ch-database) - ch_db="$2" - shift 2 - ;; - --out-dir) - out_dir="$2" - shift 2 - ;; - *) - die "Unknown option: $1" - ;; - esac - done + [[ -n "$ch_table" ]] || die "Provide --ch-table" + [[ -f "$in_file" ]] || die "File not found: $in_file" - [[ -n "$mssql_table" ]] || die "migrate-table requires --mssql-table SCHEMA.TABLE" - ch_db="${ch_db:-$(ch_default_database)}" - out_dir="${out_dir:-${MSSQL_CH_EXPORT_DIR:-./mssql_export_all}}" - - local tmp - tmp="$(mktemp "${TMPDIR:-/tmp}/${SCRIPT_NAME}.tables.XXXXXX")" - trap 'rm -f "$tmp"' RETURN - - printf '%s\n' "$mssql_table" >"$tmp" - - bash "$CORE" migrate-db \ - --ch-database "$ch_db" \ - --out-dir "$out_dir" \ - --tables-file "$tmp" + run_clickhouse_insert "INSERT INTO \`$(escape_ch_ident "$ch_db")\`.\`$(escape_ch_ident "$ch_table")\` FORMAT TabSeparated" "$in_file" + echo "Imported $in_file into ${ch_db}.${ch_table}" } main() { @@ -294,29 +583,77 @@ main() { exit 0 fi + if [[ "$cmd" == "--install-deps" ]]; then + install_deps + exit 0 + fi + + local mssql_table="" + local mssql_query="" + local mssql_schema="" + local ch_database="" + local ch_table="" + local out_file="./mssql_export.tsv" + local out_dir="./mssql_export_all" + local tables_file="${MSSQL_TABLES_FILE:-}" + + while [[ $# -gt 0 ]]; do + case "$1" in + --mssql-table) + mssql_table="$2" + shift 2 + ;; + --mssql-query) + mssql_query="$2" + shift 2 + ;; + --ch-database) + ch_database="$2" + shift 2 + ;; + --mssql-schema) + mssql_schema="$2" + shift 2 + ;; + --ch-table) + ch_table="$2" + shift 2 + ;; + --out) + out_file="$2" + shift 2 + ;; + --out-dir) + out_dir="$2" + shift 2 + ;; + --tables-file) + tables_file="$2" + shift 2 + ;; + *) + die "Unknown option: $1" + ;; + esac + done + case "$cmd" in - print-env) - cmd_print_env + export) + run_export "$mssql_table" "$mssql_query" "$out_file" ;; - migrate-db) - delegate migrate-db \ - --ch-database "$(ch_default_database)" \ - --out-dir "${MSSQL_CH_EXPORT_DIR:-./mssql_export_all}" \ - "$@" - ;; - list-tables) - delegate list-tables "$@" - ;; - migrate-table) - cmd_migrate_table "$@" + import) + run_import "${ch_database:-${CH_DATABASE:-default}}" "$ch_table" "$out_file" ;; export-import) - delegate export-import \ - --ch-database "$(ch_default_database)" \ - "$@" + [[ -n "$ch_table" ]] || die "export-import requires --ch-table" + run_export "$mssql_table" "$mssql_query" "$out_file" + run_import "${ch_database:-${CH_DATABASE:-default}}" "$ch_table" "$out_file" ;; - stream-table) - cmd_stream_table "$@" + list-tables) + list_mssql_tables "$mssql_schema" + ;; + migrate-db) + run_migrate_db "${ch_database:-${CH_DATABASE:-default}}" "$out_dir" "$mssql_schema" "$tables_file" ;; *) usage