PgSql亿级数据测试
约 1422 字大约 5 分钟
2025-05-25
直接执行
create database big_data;
comment on database big_data is '亿级数据库';
drop table if exists customer;
create table customer
(
id bigserial not null primary key,
name varchar(100),
tenant_id varchar(100),
user_id varchar(100),
external_user_id varchar(100),
age int,
tag_name varchar(10000),
tag_id text,
tag_id_var varchar(10000),
create_time timestamp,
update_time timestamp
);
-- 生成随机汉字
CREATE OR REPLACE FUNCTION random_chinese_string(start_codepoint INT DEFAULT 20000, codepoint_count INT DEFAULT 3755,
max_length INT DEFAULT 10)
RETURNS TEXT AS
$$
SELECT string_agg(chr(start_codepoint + (random() * codepoint_count)::int), '')
FROM generate_series(1, FLOOR(random() * max_length + 1)::INT);
$$ LANGUAGE sql;
-- 随机选取固定字符串
CREATE OR REPLACE FUNCTION random_fixed_string()
RETURNS TEXT AS
$$
DECLARE
str_array TEXT[] := ARRAY [
'aaaaaaaaaaaaaaaa','bbbbbbbbbbbbbbbb','cccccccccccccccc','dddddddddddddddd',
'eeeeeeeeeeeeeeee','ffffffffffffffff','gggggggggggggggg','hhhhhhhhhhhhhhhh',
'iiiiiiiiiiiiiiii','jjjjjjjjjjjjjjjj','kkkkkkkkkkkkkkkk','llllllllllllllll',
'mmmmmmmmmmmmmmmm','nnnnnnnnnnnnnnnn','oooooooooooooooo','pppppppppppppppp',
'qqqqqqqqqqqqqqqq','rrrrrrrrrrrrrrrr','ssssssssssssssss','tttttttttttttttt',
'uuuuuuuuuuuuuuuu','vvvvvvvvvvvvvvvv','wwwwwwwwwwwwwwww','xxxxxxxxxxxxxxxx',
'yyyyyyyyyyyyyyyy','zzzzzzzzzzzzzzzz'
];
BEGIN
RETURN str_array[floor(random() * array_length(str_array, 1))::INT + 1];
END;
$$ LANGUAGE plpgsql VOLATILE;
-- 生成随机字符串
CREATE OR REPLACE FUNCTION random_string(max_length INTEGER,
char_set TEXT DEFAULT 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz')
RETURNS TEXT AS
$$
DECLARE
result TEXT := '';
actual_length INT := FLOOR(RANDOM() * max_length + 1)::INT;
i INT;
BEGIN
IF max_length < 1 THEN
RAISE EXCEPTION 'Length must be at least 1';
END IF;
FOR i IN 1..actual_length
LOOP
result := result || substr(char_set, FLOOR(RANDOM() * length(char_set))::INT + 1, 1);
END LOOP;
RETURN result;
END;
$$ LANGUAGE plpgsql;
-- 生产固定字符串
CREATE OR REPLACE FUNCTION random_complex_string(length INTEGER)
RETURNS TEXT AS
$$
DECLARE
chars TEXT := 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789';
result TEXT := '';
i INT;
BEGIN
FOR i IN 1..length
LOOP
result := result || substring(chars FROM (random() * length(chars))::int + 1 FOR 1);
END LOOP;
RETURN result;
END;
$$ LANGUAGE plpgsql VOLATILE;
-- 生成随机整数
CREATE OR REPLACE FUNCTION random_int(min_val INT, max_val INT)
RETURNS INT AS
$$
BEGIN
RETURN FLOOR(random() * (max_val - min_val + 1) + min_val)::INT;
END;
$$ LANGUAGE plpgsql;
-- 生成随机字符(汉字)数组
CREATE OR REPLACE FUNCTION random_chinese_string_array(max_length INT, count INT)
RETURNS TEXT[] AS
$$
DECLARE
result TEXT[] := '{}';
i INT;
str TEXT;
BEGIN
FOR i IN 1..count
LOOP
SELECT STRING_AGG(CHR((RANDOM() * (20902 - 19968) + 19968)::INT), '')
INTO str
FROM GENERATE_SERIES(1, (RANDOM() * (max_length - 1) + 1)::INT);
result := array_append(result, str);
END LOOP;
RETURN result;
END;
$$ LANGUAGE plpgsql;
CREATE OR REPLACE FUNCTION random_string_array(max_length INT, count INT)
RETURNS TEXT[] AS
$$
SELECT ARRAY_AGG(random_string((random() * max_length)::INT + 1))
FROM GENERATE_SERIES(1, count);
$$ LANGUAGE SQL;
-- 生成时间
CREATE OR REPLACE FUNCTION random_timestamp_between(start_date TIMESTAMP, end_date TIMESTAMP)
RETURNS TIMESTAMP AS
$$
DECLARE
time_diff INTERVAL := end_date - start_date;
BEGIN
RETURN start_date + (RANDOM() * time_diff);
END;
$$ LANGUAGE plpgsql;
-- 插入表
INSERT INTO customer(name, tenant_id, user_id, external_user_id, age, tag_name, tag_id, tag_id_var, create_time)
SELECT
random_chinese_string(19968, 20901, 4),
random_fixed_string(),
random_string(8, 'abcdefghijklmnopqrstuvwxyz'),
random_complex_string(32),
random_int(1, 100),
random_chinese_string_array(4, 5),
random_string_array(16, 10),
random_string_array(16, 10),
random_timestamp_between('2020-01-01', '2025-05-01')
FROM generate_series(1, 2000000);
SELECT version();
truncate table customer;
select *
from customer
where name like '%余%';
select *
from customer
where tag_id like '%iad%';
select count(1)
from customer;
SELECT relname AS Table,
pg_size_pretty(pg_total_relation_size(relid)) AS Size
FROM pg_catalog.pg_statio_user_tables
ORDER BY pg_total_relation_size(relid) DESC;1.创建表
drop table if exists customer;
create table customer
(
id bigserial not null primary key,
name varchar(100),
tenant_id varchar(100),
user_id varchar(100),
external_user_id varchar(100),
age int,
tag_name varchar(10000),
tag_id text,
tag_id_var varchar(10000),
create_time timestamp,
update_time timestamp
);2.创建函数
2.1 生成随机整数
CREATE OR REPLACE FUNCTION random_int(min_val INT, max_val INT)
RETURNS INT AS
$$
BEGIN
RETURN FLOOR(random() * (max_val - min_val + 1) + min_val)::INT;
END;
$$ LANGUAGE plpgsql;
-- 输出 10 ~ 50 的随机整数
SELECT random_int(10, 50);2.2 生成随机汉字
CREATE OR REPLACE FUNCTION random_chinese(count int)
RETURNS TEXT AS $$
SELECT string_agg(chr(20000 + (random() * 3755)::int), '')
FROM generate_series(1, count);
$$ LANGUAGE sql STRICT;
-- 生成随机的2位数汉字,例如: 学校
select random_chinese(2);
CREATE OR REPLACE FUNCTION random_chinese_string(start_codepoint INT DEFAULT 20000, codepoint_count INT DEFAULT 3755, max_length INT DEFAULT 10)
RETURNS TEXT AS $$
SELECT string_agg(chr(start_codepoint + (random() * codepoint_count)::int), '')
FROM generate_series(1, FLOOR(random() * max_length + 1)::INT);
$$ LANGUAGE sql STRICT;
-- 使用全部常用汉字范围,最长 8 个字符
SELECT random_chinese_string(19968, 20901, 8);2.3 生成随机字符串
CREATE OR REPLACE FUNCTION random_string(max_length INTEGER,
char_set TEXT DEFAULT 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz')
RETURNS TEXT AS
$$
DECLARE
result TEXT := '';
actual_length INT := FLOOR(RANDOM() * max_length + 1)::INT;
i INT;
BEGIN
IF max_length < 1 THEN
RAISE EXCEPTION 'Length must be at least 1';
END IF;
FOR i IN 1..actual_length
LOOP
result := result || substr(char_set, FLOOR(RANDOM() * length(char_set))::INT + 1, 1);
END LOOP;
RETURN result;
END;
$$ LANGUAGE plpgsql;
-- 在默认数据集里生成1-8位字符串,例如: XdZStYVL
SELECT random_string(8);
-- 生成 1~8 位数字字符串,第二个参数限定范围,例如: 3178
SELECT random_string(8, '0123456789');
-- 生产固定字符串
CREATE OR REPLACE FUNCTION random_complex_string(length INTEGER)
RETURNS TEXT AS $$
DECLARE
chars TEXT := 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789';
result TEXT := '';
i INT;
BEGIN
FOR i IN 1..length LOOP
result := result || substring(chars FROM (random() * length(chars))::int + 1 FOR 1);
END LOOP;
RETURN result;
END;
$$ LANGUAGE plpgsql VOLATILE;
SELECT random_complex_string(12); -- 输出类似: 'T8gF3sLmK2jP'2.4 生成随机数字数组
CREATE OR REPLACE FUNCTION random_int_array(min_value INT, max_value INT, count INT)
RETURNS INT[] AS
$$
SELECT ARRAY_AGG((FLOOR(RANDOM() * ($2 - $1 + 1) + $1))::INT)
FROM GENERATE_SERIES(1, $3);
$$ LANGUAGE SQL;
-- 生成 5 个在 [10, 50] 范围内的整数,例如: {49,34,26,43,29,35}
SELECT random_int_array(10, 50, 6);2.5 生成随机字符(汉字)数组
CREATE OR REPLACE FUNCTION random_string_array(max_length INT, count INT)
RETURNS TEXT[] AS
$$
SELECT ARRAY_AGG(random_string((random() * max_length)::INT + 1))
FROM GENERATE_SERIES(1, count);
$$ LANGUAGE SQL;
-- 10代表字符串最大长度,5代表数组大小,例如: {xvRDNOteK,Xr,jv,l,Q}
SELECT random_string_array(10, 5);
CREATE OR REPLACE FUNCTION random_chinese_string_array(max_length INT, count INT)
RETURNS TEXT[] AS $$
DECLARE
result TEXT[] := '{}';
i INT;
str TEXT;
BEGIN
FOR i IN 1..count LOOP
SELECT STRING_AGG(CHR((RANDOM() * (20902 - 19968) + 19968)::INT), '')
INTO str
FROM GENERATE_SERIES(1, (RANDOM() * (max_length - 1) + 1)::INT);
result := array_append(result, str);
END LOOP;
RETURN result;
END;
$$ LANGUAGE plpgsql;
-- 10代表字符串最大长度,5代表数组大小,例如: {"你好世界","北京欢迎你","春暖花开","山高水长","风雨无阻"}
SELECT random_chinese_string_array(10, 5);2.6 生成时间
CREATE OR REPLACE FUNCTION random_timestamp_between(start_date TIMESTAMP, end_date TIMESTAMP)
RETURNS TIMESTAMP AS $$
DECLARE
time_diff INTERVAL := end_date - start_date;
BEGIN
RETURN start_date + (RANDOM() * time_diff);
END;
$$ LANGUAGE plpgsql;
-- 输出:例如 '2024-06-15 12:34:56'
SELECT random_timestamp_between('2020-01-01', '2025-01-01');2.7 固定数组选
CREATE OR REPLACE FUNCTION random_fixed_string()
RETURNS TEXT AS $$
DECLARE
str_array TEXT[] := ARRAY[
'aaaaaaaaaaaaaaaa','bbbbbbbbbbbbbbbb','cccccccccccccccc','dddddddddddddddd',
'eeeeeeeeeeeeeeee','ffffffffffffffff','gggggggggggggggg','hhhhhhhhhhhhhhhh',
'iiiiiiiiiiiiiiii','jjjjjjjjjjjjjjjj','kkkkkkkkkkkkkkkk','llllllllllllllll',
'mmmmmmmmmmmmmmmm','nnnnnnnnnnnnnnnn','oooooooooooooooo','pppppppppppppppp',
'qqqqqqqqqqqqqqqq','rrrrrrrrrrrrrrrr','ssssssssssssssss','tttttttttttttttt',
'uuuuuuuuuuuuuuuu','vvvvvvvvvvvvvvvv','wwwwwwwwwwwwwwww','xxxxxxxxxxxxxxxx',
'yyyyyyyyyyyyyyyy','zzzzzzzzzzzzzzzz'
];
BEGIN
RETURN str_array[floor(random() * array_length(str_array, 1))::INT + 1];
END;
$$ LANGUAGE plpgsql VOLATILE;
SELECT DISTINCT tenant_id
FROM (
SELECT random_fixed_string() AS tenant_id
FROM generate_series(1, 100)
) AS tmp;插入数据
INSERT INTO customer(name, tenant_id, user_id, external_user_id, age, tag_name, tag_id, tag_id_var, create_time)
SELECT
random_chinese_string(19968, 20901, 8),
random_fixed_string(),
random_string(16),
random_complex_string(32),
random_int(1, 120),
random_chinese_string_array(8, 10),
random_string_array(16, 10),
random_string_array(16, 10),
random_timestamp_between('2015-01-01', '2025-05-01')
FROM generate_series(1, 2000000);
-- 获取当前主键的最大值并更新序列
SELECT setval('customer_id_seq', (SELECT MAX(id) FROM customer));
-- 分批插入
DO $$
DECLARE
batch_size INT := 100000;
total INT := 5000000;
i INT := 0;
BEGIN
WHILE i < total LOOP
INSERT INTO customer(name, tenant_id, user_id, external_user_id, age, tag_name, tag_id, tag_id_var, create_time)
SELECT
random_chinese_string(19968, 20901, 8),
random_fixed_string(),
random_string(16),
random_complex_string(32),
random_int(1, 120),
random_chinese_string_array(8, 10),
random_string_array(16, 10),
random_string_array(16, 10),
random_timestamp_between('2015-01-01', '2025-05-01')
FROM generate_series(1, batch_size);
i := i + batch_size;
RAISE NOTICE 'Inserted % records', i;
END LOOP;
END;
$$ LANGUAGE plpgsql;