17 min read

Clickhouse 物理部署记录

Clickhouse 物理部署记录

一、部署信息

  • 版本: 25.8.12.129
  • 操作系统: OpenEuler 24.03 LTS
  • 平台:arm64

数据目录规划

keeper:

  • /data/clickhouse/keeper
  • /data/clickhouse/keeper/coordination/log
  • /data/clickhouse/keeper/coordination/snapshots

clickhouse:

  • /data/clickhouse/server/data
  • /data/clickhouse/server/log

二、系统前置优化配置

调整内核最大线程数量

sysctl -w kernel.threads-max=262144
sysctl -w kernel.task_delayacct=1
sysctl -w vm.swappiness=1

cat >/etc/sysctl.d/99-clickhouse.conf<<EOF
vm.swappiness=1
kernel.task_delayacct = 1
kernel.threads-max = 262144
EOF

编辑 /etc/default/grub,在 GRUB_CMDLINE_LINUX 参数中添加:

GRUB_CMDLINE_LINUX="... transparent_hugepage=never"
grub2-mkconfig -o /boot/grub2/grub.cfg  # CentOS/RHEL
update-grub                             # Ubuntu/Debian

三、配置步骤

下载以下软件包

 clickhouse-client-25.8.12.129.aarch64.rpm
 clickhouse-common-static-25.8.12.129.aarch64.rpm
 clickhouse-keeper-25.8.12.129.aarch64.rpm
 clickhouse-server-25.8.12.129.aarch64.rpm

3.1 clickhouse-keeper

配置keeper: /etc/clickhouse-keeper/keeper_config.xml

默认目录版本:

<clickhouse>
  <logger>
    <level>information</level>
    <log>/var/log/clickhouse-keeper/clickhouse-keeper.log</log>
    <errorlog>/var/log/clickhouse-keeper/clickhouse-keeper.err.log</errorlog>
    <size>1000M</size>
    <count>10</count>
  </logger>

  <max_connections>4096</max_connections>
  <listen_host>0.0.0.0</listen_host>

  <keeper_server>
    <tcp_port>9181</tcp_port>
    <enable_ipv6>false</enable_ipv6>

    <server_id>41</server_id>

    <log_storage_path>/var/lib/clickhouse/coordination/logs</log_storage_path>
    <snapshot_storage_path>/var/lib/clickhouse/coordination/snapshots</snapshot_storage_path>

    <coordination_settings>
        <operation_timeout_ms>10000</operation_timeout_ms>
        <min_session_timeout_ms>10000</min_session_timeout_ms>
        <session_timeout_ms>100000</session_timeout_ms>
        <raft_logs_level>information</raft_logs_level>
        <compress_logs>false</compress_logs>
    </coordination_settings>

    <hostname_checks_enabled>true</hostname_checks_enabled>
    <raft_configuration>
        <server>
            <id>41</id>
            <hostname>ck-01</hostname>
            <port>9234</port>
        </server>
        <server>
            <id>42</id>
            <hostname>ck-02</hostname>
            <port>9234</port>
        </server>
        <server>
            <id>43</id>
            <hostname>ck-03</hostname>
            <port>9234</port>
        </server>
    </raft_configuration>
  </keeper_server>
  <openSSL>
    <server>
          <verificationMode>none</verificationMode>
          <loadDefaultCAFile>false</loadDefaultCAFile>
          <cacheSessions>true</cacheSessions>
          <disableProtocols>sslv2,sslv3</disableProtocols>
          <preferServerCiphers>true</preferServerCiphers>
      </server>
  </openSSL>
</clickhouse>

自定义目录:

<clickhouse>
  <logger>
    <level>information</level>
    <log>/data/clickhouse/keeper/clickhouse-keeper.log</log>
    <errorlog>/data/clickhouse/keeper/clickhouse-keeper.err.log</errorlog>
    <size>1000M</size>
    <count>10</count>
  </logger>

  <max_connections>4096</max_connections>
  <listen_host>0.0.0.0</listen_host>

  <keeper_server>
    <tcp_port>9181</tcp_port>
    <enable_ipv6>false</enable_ipv6>

    <server_id>1</server_id>

    <log_storage_path>/data/clickhouse/keeper/coordination/logs</log_storage_path>
    <snapshot_storage_path>/data/clickhouse/keeper/coordination/snapshots</snapshot_storage_path>

    <coordination_settings>
        <operation_timeout_ms>10000</operation_timeout_ms>
        <min_session_timeout_ms>10000</min_session_timeout_ms>
        <session_timeout_ms>100000</session_timeout_ms>
        <raft_logs_level>information</raft_logs_level>
        <compress_logs>false</compress_logs>
    </coordination_settings>

    <hostname_checks_enabled>true</hostname_checks_enabled>
    <raft_configuration>
        <server>
            <id>41</id>
            <hostname>ck-01</hostname>
            <port>9234</port>
        </server>
        <server>
            <id>42</id>
            <hostname>ck-02</hostname>
            <port>9234</port>
        </server>
        <server>
            <id>43</id>
            <hostname>ck-03</hostname>
            <port>9234</port>
        </server>
    </raft_configuration>
  </keeper_server>
  <openSSL>
    <server>
          <verificationMode>none</verificationMode>
          <loadDefaultCAFile>true</loadDefaultCAFile>
          <cacheSessions>true</cacheSessions>
          <disableProtocols>sslv2,sslv3</disableProtocols>
          <preferServerCiphers>true</preferServerCiphers>
      </server>
  </openSSL>
</clickhouse>

可以使用以下命令来启动

clickhouse-keeper --config  /etc/clickhouse-keeper/keeper_config.xml

检测服务是否正常

echo mntr | nc localhost 9181

3.2 clickhouse-server

备份配置文件

# cp /etc/clickhouse-server/config.xml /etc/clickhouse-server/config.xml.origin

内容更新如下:

sed -i -e 's@<path>/var/lib/clickhouse/</path>@<path>/data/clickhouse/clickhouse-server</path>@' \
       -e 's@<level>trace</level>@<level>information</level>@' \
       -e 's@<!-- <max_connections>4096</max_connections> -->@<max_connections>40960</max_connections>@' \
       -e 's@<!-- <listen_host>::</listen_host> -->@<listen_host>0.0.0.0</listen_host>@' \
       -e 's@/var/log@/data/clickhouse/logs@' \
       /etc/clickhouse-server/config.xml

创建集群拓扑文件

cat > /etc/clickhouse-server/config.d/remote_servers.xml<<EOF
<clickhouse>
    <remote_servers>
        <VQIU_CK_1S_3R>
            <secret>Abcd12345</secret>
            <shard>
                <internal_replication>true</internal_replication>
                <replica><host>ck-01</host><port>9000</port></replica>
                <replica><host>ck-02</host><port>9000</port></replica>
                <replica><host>ck-03</host><port>9000</port></replica>
                </replica>
            </shard>
        </VQIU_CK_1S_3R>
    </remote_servers>
</clickhouse>
EOF

引用keeper

cat > /etc/clickhouse-server/config.d/use-keeper.xml<<EOF
<clickhouse>
    <zookeeper>
        <node><host>ck-01</host><port>9181</port></node>
        <node><host>ck-02</host><port>9181</port></node>
        <node><host>ck-03</host><port>9181</port></node>
    </zookeeper>
</clickhouse>
EOF

定义宏文件

cat >/etc/clickhouse-server/config.d/macros.xml<<EOF
<clickhouse>
    <macros>
        <shard>1</shard>
        <replica>1</replica>
        <cluster>VQIU_CK_1S_3R</cluster>
    </macros>
</clickhouse>
EOF

生成密码[可选]

echo -n 'supersecret' | sha256sum | tr -d '-'

追加到密码文件中

vi /etc/clickhouse-server/users.d/default-password.xml
clickhouse>
    <users>
        <default>
            <password remove='1' />
            <password_sha256_hex>SUPER_SECRET_HASH</password_sha256_hex>
        </default>
    </users>
</clickhouse>

3.3 其它配置

3.3.1 增加最大文件描述符

LimitNOFILE=500000
LimitNPROC=500000

3.3.2 trace_log

trace_log 表记录了查询执行的详细跟踪信息,主要用于性能分析和调试。以下是处理 trace_log 表占用大量空间的方法:

立即清理

-- 查看 trace_log 当前占用空间
SELECT 
    formatReadableSize(sum(bytes)) AS size,
    min(event_time) AS oldest_log,
    max(event_time) AS newest_log,
    count() AS entries
FROM system.parts
WHERE database = 'system' AND table = 'trace_log';

-- 删除所有 trace_log 数据(紧急清理)
ALTER TABLE system.trace_log DELETE WHERE 1=1;

-- 或者删除指定时间前的数据
ALTER TABLE system.trace_log DELETE 
WHERE event_time < now() - INTERVAL 7 DAY;

-- 强制合并,立即释放磁盘空间(删除操作是异步的,可能需要一些时间,请注意,这可能会增加IO负担)
OPTIMIZE TABLE system.trace_log FINAL;

优化手段

限制 trace_log 采样率

<trace_log>
    <database>system</database>
    <table>trace_log</table>
    <partition_by>toYYYYMM(event_date)</partition_by>
    <flush_interval_milliseconds>7500</flush_interval_milliseconds>
    <!-- 只记录 1% 的查询 -->
    <sampling>0.01</sampling>
</trace_log>

增加TTL(数据过期时间)

<trace_log>
    <database>system</database>
    <table>trace_log</table>
    <partition_by>toYYYYMM(event_date)</partition_by>
    <ttl>event_date + INTERVAL 7 DAY</ttl>
    <flush_interval_milliseconds>7500</flush_interval_milliseconds>
</trace_log>

完全禁用trace_log

<!--
<trace_log>
    <database>system</database>
    <table>trace_log</table>
    <partition_by>toYYYYMM(event_date)</partition_by>
    <flush_interval_milliseconds>7500</flush_interval_milliseconds>
</trace_log>
-->

或者使用的是在配置目录下(如/etc/clickhouse-server/config.d/)的覆盖文件,可以在新文件中将trace_log设置为空元素以禁用:

<yandex>
  <trace_log remove="1" />
</yandex>

重启服务

systemctl restart clickhouse-server

如果配置文件中没有设置TTL,你也可以通过ALTER TABLE语句为trace_log表添加TTL:

ALTER TABLE system.trace_log MODIFY TTL event_date + INTERVAL 7 DAY;
这样,超过7天的数据会自动被删除。

最佳实践

  1. 生产环境:建议完全禁用或设置极低的采样率(如0.01)
  2. 开发/测试环境:可保留但设置较短的TTL(3-7天)
  3. 紧急情况:立即执行 ALTER TABLE DELETE 清理数据
  4. 长期方案:修改配置文件并重启ClickHouse服务
<!-- /etc/clickhouse-server/config.d/trace_log_config.xml -->
<yandex>
    <trace_log>
        <database>system</database>
        <table>trace_log</table>
        <partition_by>toYYYYMM(event_date)</partition_by>
        <!-- 只对执行时间超过1秒的查询记录trace -->
        <min_query_duration_ms>1000</min_query_duration_ms>
        <!-- 10% 采样率 -->
        <sampling>0.1</sampling>
        <!-- 3天TTL -->
        <ttl>event_date + INTERVAL 3 DAY</ttl>
        <!-- 降低刷新频率 -->
        <flush_interval_milliseconds>30000</flush_interval_milliseconds>
    </trace_log>
    
    <!-- 同时调整相关设置 -->
    <log_queries>1</log_queries>
    <log_query_threads>0</log_query_threads>  <!-- 关闭线程日志,减少trace数据 -->
    <allow_introspection_functions>0</allow_introspection_functions>
</yandex>

验证与查看

-- 查看trace_log设置是否生效
SELECT 
    name, 
    value, 
    changed,
    description
FROM system.settings 
WHERE name IN (
    'allow_introspection_functions',
    'log_query_threads',
    'trace_profile_events',
    'query_profiler_real_time_period_ns',
    'query_profiler_cpu_time_period_ns'
);

-- 查看trace_log表属性
SELECT 
    name,
    engine,
    partition_key,
    sorting_key,
    ttl_expression,
    total_rows,
    formatReadableSize(total_bytes) as size
FROM system.tables 
WHERE database = 'system' AND name = 'trace_log';

-- 监控trace_log写入频率
SELECT 
    toStartOfMinute(event_time) AS minute,
    count() AS queries_traced
FROM system.trace_log 
WHERE event_time > now() - INTERVAL 1 HOUR
GROUP BY minute
ORDER BY minute DESC;

3.3.3 text_log

ClickHouse 的 text_log 表记录了服务器的文本日志,如果占用大量空间,可以通过以下方法清理和优化:

-- 查看 text_log 当前占用空间
SELECT 
    formatReadableSize(sum(bytes_on_disk)) AS size,
    min(event_time) AS oldest_log,
    max(event_time) AS newest_log,
    count() AS total_entries,
    uniqExact(thread_id) AS unique_threads
FROM system.parts
WHERE database = 'system' AND table = 'text_log' AND active;

-- 按日志级别统计
SELECT 
    level,
    count() AS count,
    formatReadableSize(sum(length(message))) AS message_size
FROM system.text_log
WHERE event_time > now() - INTERVAL 1 DAY
GROUP BY level
ORDER BY count DESC;

-- 清理特定时间前的数据
ALTER TABLE system.text_log DELETE 
WHERE event_time < now() - INTERVAL 7 DAY;

-- 只保留 ERROR 级别的日志,清理其他级别的旧日志
ALTER TABLE system.text_log DELETE 
WHERE event_time < now() - INTERVAL 3 DAY AND level NOT IN ('Error', 'Fatal');

-- 紧急情况:清理所有 text_log 数据
ALTER TABLE system.text_log DELETE WHERE 1=1;

-- 强制合并,立即释放空间
OPTIMIZE TABLE system.text_log FINAL;

最佳实践

  1. 生产环境:设置 level="warning"level="error",TTL为1-3天
  2. 开发环境:可以适当放宽,但也要设置TTL和大小限制
  3. 紧急处理:使用 ALTER TABLE DELETE 立即清理
  4. 长期方案:修改配置文件并重启服务
  5. 监控告警:设置磁盘空间和表大小的监控

建议配置

<!-- /etc/clickhouse-server/config.d/text_log_optimized.xml -->
<yandex>
    <text_log>
        <database>system</database>
        <table>text_log</table>
        <partition_by>toYYYYMMDD(event_time)</partition_by>  <!-- 按天分区 -->
        <level>warning</level>  <!-- 生产环境建议warning或error -->
        <ttl>event_time + INTERVAL 2 DAY</ttl>
        <max_log_message_length>2048</max_log_message_length>  <!-- 限制消息长度 -->
        <flush_interval_milliseconds>10000</flush_interval_milliseconds>  <!-- 10秒刷盘 -->
        
        <!-- 以下字段可以禁用,减少存储 -->
        <!-- <log_thread_names>0</log_thread_names> -->
        <!-- <log_query_threads>0</log_query_threads> -->
    </text_log>
    
    <!-- 调整日志相关全局设置 -->
    <logger>
        <level>warning</level>
        <log>/var/log/clickhouse-server/clickhouse-server.log</log>
        <errorlog>/var/log/clickhouse-server/clickhouse-server.err.log</errorlog>
        <size>100M</size>  <!-- 日志文件大小限制 -->
        <count>10</count>  <!-- 保留的日志文件数量 -->
    </logger>
    
    <!-- 降低日志详细程度 -->
    <log_queries>0</log_queries>
    <log_query_threads>0</log_query_threads>
    <allow_introspection_functions>0</allow_introspection_functions>
</yandex>

验证和监控

-- 创建监控视图
CREATE VIEW system.text_log_monitor AS
SELECT 
    toDate(event_time) AS log_date,
    level,
    count() AS entry_count,
    formatReadableSize(sum(length(message))) AS total_message_size,
    avg(length(message)) AS avg_message_length,
    max(event_time) AS last_entry
FROM system.text_log
WHERE event_time > today() - INTERVAL 7 DAY
GROUP BY log_date, level
ORDER BY log_date DESC, entry_count DESC;

-- 查看日志增长趋势
SELECT 
    toStartOfHour(event_time) AS hour,
    level,
    count() AS entries_per_hour,
    formatReadableSize(sum(length(message))) AS size_per_hour
FROM system.text_log
WHERE event_time > now() - INTERVAL 24 HOUR
GROUP BY hour, level
ORDER BY hour DESC, entries_per_hour DESC;

-- 查看最频繁的日志消息
SELECT 
    substring(message, 1, 100) AS message_preview,
    count() AS frequency,
    level,
    min(event_time) AS first_seen,
    max(event_time) AS last_seen
FROM system.text_log
WHERE event_time > now() - INTERVAL 1 DAY
GROUP BY substring(message, 1, 100), level
HAVING frequency > 10
ORDER BY frequency DESC
LIMIT 20;

3.3.4 query_log

记录所有执行的查询,是分析慢查询、性能瓶颈和查询模式的最重要依据。

如果磁盘空间告急,可以立即执行以下操作:

-- 1. 清理历史数据(例如删除30天前的日志)
ALTER TABLE system.query_log DELETE WHERE event_time < now() - INTERVAL 30 DAY;

-- 2. 清理特定类型的日志(例如只清理`SELECT`查询)
ALTER TABLE system.query_log DELETE WHERE type = 'QueryFinish' AND event_time < now() - INTERVAL 14 DAY;

-- 3. 强制合并数据部分,立即回收空间
OPTIMIZE TABLE system.query_log FINAL;

长期优化及最佳实践

  • 生产环境:建议采用采样(如10%-20%)+ 较短TTL(如30天) 的组合。务必保留 session_log 用于安全审计。
  • 开发/测试环境:可以设置更短的TTL(如7天)和更低的采样率(如1%),甚至完全关闭 query_thread_log
  • 分析与审计环境:如果需要全量日志进行分析,不应设置采样,但必须配置足够大的专用磁盘和严格的TTL,并重点优化 query_settingsthread_ids 等大字段的存储
<!-- /etc/clickhouse-server/config.d/query_log_config.xml -->
<yandex>
    <query_log>
        <database>system</database>
        <table>query_log</table>
        <!-- 按月份分区,便于管理 -->
        <partition_by>toYYYYMM(event_date)</partition_by>
        <!-- 核心1:设置30天自动过期 -->
        <ttl>event_date + INTERVAL 30 DAY</ttl>
        <!-- 核心2:设置采样率,仅记录10%的查询 -->
        <sampling>0.10</sampling>
        <!-- 核心3:不记录查询设置和线程列表,大幅节省空间 -->
        <log_query_settings>0</log_query_settings>
        <log_query_threads>0</log_query_threads>
        <!-- 刷新间隔调整为15秒 -->
        <flush_interval_milliseconds>15000</flush_interval_milliseconds>
    </query_log>
</yandex>

3.3.4 query_thread_log

优化 query_thread_log(查询线程日志)的核心思路是:在保留足够线程级诊断信息与控制存储成本之间取得平衡。以下是具体的优化方案。

如果磁盘空间紧张,可立即执行以下命令:

-- 1. 清理历史数据(例如删除30天前的日志)
ALTER TABLE system.query_thread_log DELETE WHERE event_time < now() - INTERVAL 30 DAY;

-- 2. 可选:与query_log保持一致,清理已不存在的查询记录
-- 注意:此操作较慢,建议在低峰期执行
ALTER TABLE system.query_thread_log DELETE WHERE query_id NOT IN (SELECT query_id FROM system.query_log);

-- 3. 强制合并数据部分,立即回收磁盘空间
OPTIMIZE TABLE system.query_thread_log FINAL;

最佳实践与优化

  • 通用生产环境:建议采用 sampling=0.05(5%采样) + ttl='30 DAY' 的组合,并关闭 log_profile_events。这能在保留关键线程信息的同时,将存储消耗降至极低。
  • 深度性能调优环境:如果需要分析所有线程的详细性能指标(ProfileEvents),则不应降低采样率或关闭相关字段,但必须配置更短的TTL(如7天),并确保有足够磁盘空间。
  • 资源极度受限或仅需审计的环境:可以考虑完全关闭 query_thread_log(在配置文件中将其整个配置块注释或设置为 <query_thread_log remove="1" />),仅依赖 query_log 进行基本查询审计。
<!-- /etc/clickhouse-server/config.d/query_thread_log_config.xml -->
<yandex>
    <query_thread_log>
        <database>system</database>
        <table>query_thread_log</table>
        <partition_by>toYYYYMM(event_date)</partition_by>
        <!-- 核心1:设置自动过期时间(TTL),例如30天 -->
        <ttl>event_date + INTERVAL 30 DAY</ttl>
        <!-- 核心2:设置采样率,通常应<= query_log的采样率 -->
        <sampling>0.05</sampling>
        <!-- 核心3:关闭非关键字段记录,大幅减少单条日志体积 -->
        <log_profile_events>0</log_profile_events>
        <log_settings>0</log_settings>
    </query_thread_log>
</yandex>

3.3.5  part_log

针对 part_log(分区操作日志)的优化,核心思路与之前不同:它不仅是空间管理问题,更是监控MergeTree引擎健康状况的关键工具。优化重点在于保留足够诊断信息以排查写入/合并问题,同时控制其增长。

如果磁盘空间紧张,可立即执行:

-- 1. 清理历史数据(例如删除90天前的日志,因其诊断价值周期较长)
ALTER TABLE system.part_log DELETE WHERE event_time < now() - INTERVAL 90 DAY;

-- 2. 针对性地清理特定类型的事件(例如仅保留`MERGE_PARTS`和`DOWNLOAD_PART`等关键操作)
ALTER TABLE system.part_log DELETE
WHERE event_time < now() - INTERVAL 30 DAY
AND event_type NOT IN ('NewPart', 'MergeParts', 'MutatePart', 'DownloadPart');

-- 3. 强制合并,立即回收空间
OPTIMIZE TABLE system.part_log FINAL;

优化与最佳实践

part_log 的优化更侧重于字段粒度的控制,而非采样(通常不推荐对 part_log 采样,可能导致关键事件丢失):

  • <ttl>:建议设置为 90 DAY 或更长。分区问题可能需要回溯较长时间。
  • <log_queries>:设为 0。除非你需要明确知道是哪条SQL触发了分区操作,否则关闭此字段能显著减少记录体积。
  • <log_server_timezone>:设为 0。事件时间通常足够用于分析。

修改 /etc/clickhouse-server/config.xml 或其 config.d/ 下的文件是根本方法。以下是兼顾监控与存储的配置示例:

<!-- /etc/clickhouse-server/config.d/part_log_config.xml -->
<yandex>
    <part_log>
        <database>system</database>
        <table>part_log</table>
        <partition_by>toYYYYMM(event_date)</partition_by>
        <!-- 保留较长时间,因其用于排查历史数据问题 -->
        <ttl>event_date + INTERVAL 90 DAY</ttl>
        <!-- 核心:关闭非关键或冗余字段的记录 -->
        <log_queries>0</log_queries>           <!-- 通常无需记录触发分区操作的查询 -->
        <log_server_timezone>0</log_server_timezone> <!-- 节省空间 -->
    </part_log>
</yandex>

四、验证测试

  • 查看集群
SHOW clusters;
  • 查看集群拓扑
select cluster,host_name,host_address,user, errors_count, is_active from system.clusters;
  • 查看volume_type 类型
SELECT
    policy_name,
    volume_name,
    volume_type,
    load_balancing,
    disks
FROM system.storage_policies;
  • 查看所有数据库占用的空间大小
-- 查看所有数据库的大小(按数据库分组)
SELECT 
    database,
    formatReadableSize(sum(bytes_on_disk)) AS size,
    formatReadableSize(sum(rows_count) * avg(row_size)) AS estimated_size
FROM system.parts
WHERE active
GROUP BY database
ORDER BY sum(bytes_on_disk) DESC;
  • 查看指定数据库中所有表的大小
-- 查看某个数据库(例如my_db)中所有表的大小
SELECT 
    table AS table_name,
    formatReadableSize(sum(bytes)) AS size,
    formatReadableSize(sum(data_compressed_bytes)) AS compressed_size,
    formatReadableSize(sum(data_uncompressed_bytes)) AS uncompressed_size,
    sum(rows) AS total_rows,
    count() AS parts_count
FROM system.parts
WHERE database = 'my_db' AND active
GROUP BY table
ORDER BY sum(bytes) DESC;
  • 查看特定表详细信息
-- 查看特定表的详细空间使用情况
SELECT 
    partition,
    name AS part_name,
    rows,
    formatReadableSize(bytes_on_disk) AS size_on_disk,
    formatReadableSize(data_compressed_bytes) AS compressed_size,
    formatReadableSize(data_uncompressed_bytes) AS uncompressed_size,
    formatReadableSize(marks_bytes) AS marks_size,
    compression_codec,
    active
FROM system.parts
WHERE database = 'my_db' AND table = 'my_table'
ORDER BY partition, name;
  • 查看每个列的存储大小
-- 查看表中每个列的存储大小
SELECT 
    table,
    column,
    formatReadableSize(sum(column_data_compressed_bytes)) AS compressed_size,
    formatReadableSize(sum(column_data_uncompressed_bytes)) AS uncompressed_size,
    formatReadableSize(sum(column_data_uncompressed_bytes) - 
                      sum(column_data_compressed_bytes)) AS saved_size,
    round((sum(column_data_uncompressed_bytes) - 
           sum(column_data_compressed_bytes)) / 
           sum(column_data_uncompressed_bytes) * 100, 2) AS compression_ratio_percent
FROM system.parts_columns
WHERE database = 'my_db' AND table = 'my_table'
GROUP BY table, column
ORDER BY sum(column_data_compressed_bytes) DESC;
  • 查看分布式表的大小
-- 对于分布式表,查看每个分片的大小
SELECT 
    shard_num,
    formatReadableSize(sum(bytes)) AS size,
    sum(rows) AS rows
FROM clusterAllReplicas('cluster_name', system.parts)
WHERE database = 'my_db' AND table = 'my_table' AND active
GROUP BY shard_num
ORDER BY shard_num;
  • 查看system 库表的大小
SELECT 
    table,
    formatReadableSize(sum(bytes_on_disk)) AS size,
    formatReadableSize(sum(data_uncompressed_bytes)) AS uncompressed_size,
    sum(rows) AS rows,
    count() AS parts_count
FROM system.parts
WHERE database = 'system' AND active
GROUP BY table
ORDER BY sum(bytes_on_disk) DESC
LIMIT 20;
  • 紧急处理所有日志信息
-- 紧急清理所有旧的日志数据
ALTER TABLE system.query_log DELETE WHERE 1=1;
ALTER TABLE system.query_thread_log DELETE WHERE 1=1;
ALTER TABLE system.trace_log DELETE WHERE 1=1;

-- 强制合并parts(回收空间)
OPTIMIZE TABLE system.query_log FINAL;
OPTIMIZE TABLE system.query_thread_log FINAL;

五、参考引用