diff --git a/COPYRIGHT b/COPYRIGHT index 0fc523af94605..655a3c59d60f5 100644 --- a/COPYRIGHT +++ b/COPYRIGHT @@ -1,7 +1,7 @@ PostgreSQL Database Management System (formerly known as Postgres, then as Postgres95) -Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group +Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group Portions Copyright (c) 1994, The Regents of the University of California diff --git a/config/thread_test.c b/config/thread_test.c index ff2eace87d841..784f4fe8ce3cc 100644 --- a/config/thread_test.c +++ b/config/thread_test.c @@ -3,7 +3,7 @@ * thread_test.c * libc threading test program * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * config/thread_test.c diff --git a/configure b/configure index 11a4284e5bd75..ce9ea3699938a 100755 --- a/configure +++ b/configure @@ -11,7 +11,7 @@ # This configure script is free software; the Free Software Foundation # gives unlimited permission to copy, distribute and modify it. # -# Copyright (c) 1996-2020, PostgreSQL Global Development Group +# Copyright (c) 1996-2021, PostgreSQL Global Development Group ## -------------------- ## ## M4sh Initialization. ## ## -------------------- ## @@ -653,6 +653,7 @@ LIBOBJS UUID_LIBS LDAP_LIBS_BE LDAP_LIBS_FE +with_ssl PTHREAD_CFLAGS PTHREAD_LIBS PTHREAD_CC @@ -705,12 +706,10 @@ XML2_LIBS XML2_CFLAGS XML2_CONFIG with_libxml -UUID_EXTRA_OBJS with_uuid with_readline with_systemd with_selinux -with_openssl with_ldap with_krb_srvnam krb_srvtab @@ -855,7 +854,6 @@ with_pam with_bsd_auth with_ldap with_bonjour -with_openssl with_selinux with_systemd with_readline @@ -867,6 +865,8 @@ with_libxslt with_system_tzdata with_zlib with_gnu_ld +with_ssl +with_openssl enable_largefile ' ac_precious_vars='build_alias @@ -1557,7 +1557,6 @@ Optional Packages: --with-bsd-auth build with BSD Authentication support --with-ldap build with LDAP support --with-bonjour build with Bonjour support - --with-openssl build with OpenSSL support --with-selinux build with SELinux support --with-systemd build with systemd support --without-readline do not use GNU Readline nor BSD Libedit for editing @@ -1571,6 +1570,8 @@ Optional Packages: use system time zone data in DIR --without-zlib do not use Zlib --with-gnu-ld assume the C compiler uses GNU ld [default=no] + --with-ssl=LIB use LIB for SSL/TLS support (openssl) + --with-openssl obsolete spelling of --with-ssl=openssl Some influential environment variables: CC C compiler command @@ -1676,7 +1677,7 @@ Copyright (C) 2012 Free Software Foundation, Inc. This configure script is free software; the Free Software Foundation gives unlimited permission to copy, distribute and modify it. -Copyright (c) 1996-2020, PostgreSQL Global Development Group +Copyright (c) 1996-2021, PostgreSQL Global Development Group _ACEOF exit fi @@ -8071,41 +8072,6 @@ fi $as_echo "$with_bonjour" >&6; } -# -# OpenSSL -# -{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether to build with OpenSSL support" >&5 -$as_echo_n "checking whether to build with OpenSSL support... " >&6; } - - - -# Check whether --with-openssl was given. -if test "${with_openssl+set}" = set; then : - withval=$with_openssl; - case $withval in - yes) - -$as_echo "#define USE_OPENSSL 1" >>confdefs.h - - ;; - no) - : - ;; - *) - as_fn_error $? "no argument expected for --with-openssl option" "$LINENO" 5 - ;; - esac - -else - with_openssl=no - -fi - - -{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $with_openssl" >&5 -$as_echo "$with_openssl" >&6; } - - # # SELinux # @@ -8303,30 +8269,26 @@ if test "$with_ossp_uuid" = yes ; then with_uuid=ossp fi -if test "$with_uuid" = bsd ; then +if test "$with_uuid" != no ; then + if test "$with_uuid" = bsd ; then $as_echo "#define HAVE_UUID_BSD 1" >>confdefs.h - UUID_EXTRA_OBJS="sha1.o" -elif test "$with_uuid" = e2fs ; then + elif test "$with_uuid" = e2fs ; then $as_echo "#define HAVE_UUID_E2FS 1" >>confdefs.h - UUID_EXTRA_OBJS="sha1.o" -elif test "$with_uuid" = ossp ; then + elif test "$with_uuid" = ossp ; then $as_echo "#define HAVE_UUID_OSSP 1" >>confdefs.h - UUID_EXTRA_OBJS="" -elif test "$with_uuid" = no ; then - UUID_EXTRA_OBJS="" -else - as_fn_error $? "--with-uuid must specify one of bsd, e2fs, or ossp" "$LINENO" 5 + else + as_fn_error $? "--with-uuid must specify one of bsd, e2fs, or ossp" "$LINENO" 5 + fi fi - # # XML # @@ -12179,7 +12141,64 @@ fi fi fi +# +# SSL Library +# +# There is currently only one supported SSL/TLS library: OpenSSL. +# + + + +# Check whether --with-ssl was given. +if test "${with_ssl+set}" = set; then : + withval=$with_ssl; + case $withval in + yes) + as_fn_error $? "argument required for --with-ssl option" "$LINENO" 5 + ;; + no) + as_fn_error $? "argument required for --with-ssl option" "$LINENO" 5 + ;; + *) + + ;; + esac + +fi + + +if test x"$with_ssl" = x"" ; then + with_ssl=no +fi + + + +# Check whether --with-openssl was given. +if test "${with_openssl+set}" = set; then : + withval=$with_openssl; + case $withval in + yes) + : + ;; + no) + : + ;; + *) + as_fn_error $? "no argument expected for --with-openssl option" "$LINENO" 5 + ;; + esac + +else + with_openssl=no + +fi + + if test "$with_openssl" = yes ; then + with_ssl=openssl +fi + +if test "$with_ssl" = openssl ; then # Minimum required OpenSSL version is 1.0.1 $as_echo "#define OPENSSL_API_COMPAT 0x10001000L" >>confdefs.h @@ -12440,8 +12459,14 @@ _ACEOF fi done + +$as_echo "#define USE_OPENSSL 1" >>confdefs.h + +elif test "$with_ssl" != no ; then + as_fn_error $? "--with-ssl must specify openssl" "$LINENO" 5 fi + if test "$with_pam" = yes ; then { $as_echo "$as_me:${as_lineno-$LINENO}: checking for pam_start in -lpam" >&5 $as_echo_n "checking for pam_start in -lpam... " >&6; } @@ -13061,7 +13086,7 @@ $as_echo "#define HAVE_STDBOOL_H 1" >>confdefs.h fi -for ac_header in atomic.h copyfile.h execinfo.h getopt.h ifaddrs.h langinfo.h mbarrier.h poll.h sys/epoll.h sys/event.h sys/ipc.h sys/prctl.h sys/procctl.h sys/pstat.h sys/resource.h sys/select.h sys/sem.h sys/shm.h sys/sockio.h sys/tas.h sys/un.h termios.h ucred.h wctype.h +for ac_header in atomic.h copyfile.h execinfo.h getopt.h ifaddrs.h langinfo.h mbarrier.h poll.h sys/epoll.h sys/event.h sys/ipc.h sys/prctl.h sys/procctl.h sys/pstat.h sys/resource.h sys/select.h sys/sem.h sys/shm.h sys/sockio.h sys/tas.h sys/uio.h sys/un.h termios.h ucred.h wctype.h do : as_ac_Header=`$as_echo "ac_cv_header_$ac_header" | $as_tr_sh` ac_fn_c_check_header_mongrel "$LINENO" "$ac_header" "$as_ac_Header" "$ac_includes_default" @@ -13327,7 +13352,7 @@ done fi -if test "$with_openssl" = yes ; then +if test "$with_ssl" = openssl ; then ac_fn_c_check_header_mongrel "$LINENO" "openssl/ssl.h" "ac_cv_header_openssl_ssl_h" "$ac_includes_default" if test "x$ac_cv_header_openssl_ssl_h" = xyes; then : @@ -15155,7 +15180,7 @@ fi LIBS_including_readline="$LIBS" LIBS=`echo "$LIBS" | sed -e 's/-ledit//g' -e 's/-lreadline//g'` -for ac_func in backtrace_symbols clock_gettime copyfile fdatasync getifaddrs getpeerucred getrlimit kqueue mbstowcs_l memset_s poll posix_fallocate ppoll pstat pthread_is_threaded_np readlink setproctitle setproctitle_fast setsid shm_open strchrnul strsignal symlink sync_file_range uselocale wcstombs_l +for ac_func in backtrace_symbols clock_gettime copyfile fdatasync getifaddrs getpeerucred getrlimit kqueue mbstowcs_l memset_s poll posix_fallocate ppoll pstat pthread_is_threaded_np readlink readv setproctitle setproctitle_fast setsid shm_open strchrnul strsignal symlink sync_file_range uselocale wcstombs_l writev do : as_ac_var=`$as_echo "ac_cv_func_$ac_func" | $as_tr_sh` ac_fn_c_check_func "$LINENO" "$ac_func" "$as_ac_var" @@ -15845,6 +15870,19 @@ esac fi +ac_fn_c_check_func "$LINENO" "preadv" "ac_cv_func_preadv" +if test "x$ac_cv_func_preadv" = xyes; then : + $as_echo "#define HAVE_PREADV 1" >>confdefs.h + +else + case " $LIBOBJS " in + *" preadv.$ac_objext "* ) ;; + *) LIBOBJS="$LIBOBJS preadv.$ac_objext" + ;; +esac + +fi + ac_fn_c_check_func "$LINENO" "pwrite" "ac_cv_func_pwrite" if test "x$ac_cv_func_pwrite" = xyes; then : $as_echo "#define HAVE_PWRITE 1" >>confdefs.h @@ -15858,6 +15896,19 @@ esac fi +ac_fn_c_check_func "$LINENO" "pwritev" "ac_cv_func_pwritev" +if test "x$ac_cv_func_pwritev" = xyes; then : + $as_echo "#define HAVE_PWRITEV 1" >>confdefs.h + +else + case " $LIBOBJS " in + *" pwritev.$ac_objext "* ) ;; + *) LIBOBJS="$LIBOBJS pwritev.$ac_objext" + ;; +esac + +fi + ac_fn_c_check_func "$LINENO" "random" "ac_cv_func_random" if test "x$ac_cv_func_random" = xyes; then : $as_echo "#define HAVE_RANDOM 1" >>confdefs.h @@ -15959,12 +16010,29 @@ case $host_os in # Windows uses a specialised env handler mingw*) +$as_echo "#define HAVE_SETENV 1" >>confdefs.h + + $as_echo "#define HAVE_UNSETENV 1" >>confdefs.h + ac_cv_func_setenv=yes ac_cv_func_unsetenv=yes ;; *) - ac_fn_c_check_func "$LINENO" "unsetenv" "ac_cv_func_unsetenv" + ac_fn_c_check_func "$LINENO" "setenv" "ac_cv_func_setenv" +if test "x$ac_cv_func_setenv" = xyes; then : + $as_echo "#define HAVE_SETENV 1" >>confdefs.h + +else + case " $LIBOBJS " in + *" setenv.$ac_objext "* ) ;; + *) LIBOBJS="$LIBOBJS setenv.$ac_objext" + ;; +esac + +fi + +ac_fn_c_check_func "$LINENO" "unsetenv" "ac_cv_func_unsetenv" if test "x$ac_cv_func_unsetenv" = xyes; then : $as_echo "#define HAVE_UNSETENV 1" >>confdefs.h @@ -18060,7 +18128,7 @@ fi # will be used. { $as_echo "$as_me:${as_lineno-$LINENO}: checking which random number source to use" >&5 $as_echo_n "checking which random number source to use... " >&6; } -if test x"$with_openssl" = x"yes" ; then +if test x"$with_ssl" = x"openssl" ; then { $as_echo "$as_me:${as_lineno-$LINENO}: result: OpenSSL" >&5 $as_echo "OpenSSL" >&6; } elif test x"$PORTNAME" = x"win32" ; then diff --git a/configure.ac b/configure.ac index fc523c6aeb413..07da84d40172d 100644 --- a/configure.ac +++ b/configure.ac @@ -23,7 +23,7 @@ m4_if(m4_defn([m4_PACKAGE_VERSION]), [2.69], [], [m4_fatal([Autoconf version 2.6 Untested combinations of 'autoconf' and PostgreSQL versions are not recommended. You can remove the check from 'configure.ac' but it is then your responsibility whether the result works or not.])]) -AC_COPYRIGHT([Copyright (c) 1996-2020, PostgreSQL Global Development Group]) +AC_COPYRIGHT([Copyright (c) 1996-2021, PostgreSQL Global Development Group]) AC_CONFIG_SRCDIR([src/backend/access/common/heaptuple.c]) AC_CONFIG_AUX_DIR(config) AC_PREFIX_DEFAULT(/usr/local/pgsql) @@ -852,15 +852,6 @@ PGAC_ARG_BOOL(with, bonjour, no, AC_MSG_RESULT([$with_bonjour]) -# -# OpenSSL -# -AC_MSG_CHECKING([whether to build with OpenSSL support]) -PGAC_ARG_BOOL(with, openssl, no, [build with OpenSSL support], - [AC_DEFINE([USE_OPENSSL], 1, [Define to build with OpenSSL support. (--with-openssl)])]) -AC_MSG_RESULT([$with_openssl]) -AC_SUBST(with_openssl) - # # SELinux # @@ -919,22 +910,18 @@ if test "$with_ossp_uuid" = yes ; then with_uuid=ossp fi -if test "$with_uuid" = bsd ; then - AC_DEFINE([HAVE_UUID_BSD], 1, [Define to 1 if you have BSD UUID support.]) - UUID_EXTRA_OBJS="sha1.o" -elif test "$with_uuid" = e2fs ; then - AC_DEFINE([HAVE_UUID_E2FS], 1, [Define to 1 if you have E2FS UUID support.]) - UUID_EXTRA_OBJS="sha1.o" -elif test "$with_uuid" = ossp ; then - AC_DEFINE([HAVE_UUID_OSSP], 1, [Define to 1 if you have OSSP UUID support.]) - UUID_EXTRA_OBJS="" -elif test "$with_uuid" = no ; then - UUID_EXTRA_OBJS="" -else - AC_MSG_ERROR([--with-uuid must specify one of bsd, e2fs, or ossp]) +if test "$with_uuid" != no ; then + if test "$with_uuid" = bsd ; then + AC_DEFINE([HAVE_UUID_BSD], 1, [Define to 1 if you have BSD UUID support.]) + elif test "$with_uuid" = e2fs ; then + AC_DEFINE([HAVE_UUID_E2FS], 1, [Define to 1 if you have E2FS UUID support.]) + elif test "$with_uuid" = ossp ; then + AC_DEFINE([HAVE_UUID_OSSP], 1, [Define to 1 if you have OSSP UUID support.]) + else + AC_MSG_ERROR([--with-uuid must specify one of bsd, e2fs, or ossp]) + fi fi AC_SUBST(with_uuid) -AC_SUBST(UUID_EXTRA_OBJS) # @@ -1209,7 +1196,21 @@ if test "$with_gssapi" = yes ; then fi fi +# +# SSL Library +# +# There is currently only one supported SSL/TLS library: OpenSSL. +# +PGAC_ARG_REQ(with, ssl, [LIB], [use LIB for SSL/TLS support (openssl)]) +if test x"$with_ssl" = x"" ; then + with_ssl=no +fi +PGAC_ARG_BOOL(with, openssl, no, [obsolete spelling of --with-ssl=openssl]) if test "$with_openssl" = yes ; then + with_ssl=openssl +fi + +if test "$with_ssl" = openssl ; then dnl Order matters! # Minimum required OpenSSL version is 1.0.1 AC_DEFINE(OPENSSL_API_COMPAT, [0x10001000L], @@ -1233,7 +1234,11 @@ if test "$with_openssl" = yes ; then # thread-safety. In 1.1.0, it's no longer required, and CRYPTO_lock() # function was removed. AC_CHECK_FUNCS([CRYPTO_lock]) + AC_DEFINE([USE_OPENSSL], 1, [Define to 1 if you have OpenSSL support.]) +elif test "$with_ssl" != no ; then + AC_MSG_ERROR([--with-ssl must specify openssl]) fi +AC_SUBST(with_ssl) if test "$with_pam" = yes ; then AC_CHECK_LIB(pam, pam_start, [], [AC_MSG_ERROR([library 'pam' is required for PAM])]) @@ -1331,6 +1336,7 @@ AC_CHECK_HEADERS(m4_normalize([ sys/shm.h sys/sockio.h sys/tas.h + sys/uio.h sys/un.h termios.h ucred.h @@ -1405,7 +1411,7 @@ if test "$with_gssapi" = yes ; then [AC_CHECK_HEADERS(gssapi.h, [], [AC_MSG_ERROR([gssapi.h header file is required for GSSAPI])])]) fi -if test "$with_openssl" = yes ; then +if test "$with_ssl" = openssl ; then AC_CHECK_HEADER(openssl/ssl.h, [], [AC_MSG_ERROR([header file is required for OpenSSL])]) AC_CHECK_HEADER(openssl/err.h, [], [AC_MSG_ERROR([header file is required for OpenSSL])]) fi @@ -1663,6 +1669,7 @@ AC_CHECK_FUNCS(m4_normalize([ pstat pthread_is_threaded_np readlink + readv setproctitle setproctitle_fast setsid @@ -1673,6 +1680,7 @@ AC_CHECK_FUNCS(m4_normalize([ sync_file_range uselocale wcstombs_l + writev ])) # These typically are compiler builtins, for which AC_CHECK_FUNCS fails. @@ -1734,7 +1742,9 @@ AC_REPLACE_FUNCS(m4_normalize([ link mkdtemp pread + preadv pwrite + pwritev random srandom strlcat @@ -1757,11 +1767,13 @@ fi case $host_os in # Windows uses a specialised env handler mingw*) + AC_DEFINE(HAVE_SETENV, 1, [Define to 1 because replacement version used.]) AC_DEFINE(HAVE_UNSETENV, 1, [Define to 1 because replacement version used.]) + ac_cv_func_setenv=yes ac_cv_func_unsetenv=yes ;; *) - AC_REPLACE_FUNCS([unsetenv]) + AC_REPLACE_FUNCS([setenv unsetenv]) ;; esac @@ -2156,7 +2168,7 @@ fi # first choice, else the native platform sources (Windows API or /dev/urandom) # will be used. AC_MSG_CHECKING([which random number source to use]) -if test x"$with_openssl" = x"yes" ; then +if test x"$with_ssl" = x"openssl" ; then AC_MSG_RESULT([OpenSSL]) elif test x"$PORTNAME" = x"win32" ; then AC_MSG_RESULT([Windows native]) diff --git a/contrib/Makefile b/contrib/Makefile index 7a4866e338db0..f27e458482e09 100644 --- a/contrib/Makefile +++ b/contrib/Makefile @@ -33,7 +33,6 @@ SUBDIRS = \ pg_buffercache \ pg_freespacemap \ pg_prewarm \ - pg_standby \ pg_stat_statements \ pg_surgery \ pg_trgm \ @@ -52,7 +51,7 @@ SUBDIRS = \ unaccent \ vacuumlo -ifeq ($(with_openssl),yes) +ifeq ($(with_ssl),openssl) SUBDIRS += sslinfo else ALWAYS_SUBDIRS += sslinfo diff --git a/contrib/adminpack/Makefile b/contrib/adminpack/Makefile index 630fea7726c7b..851504f4aefba 100644 --- a/contrib/adminpack/Makefile +++ b/contrib/adminpack/Makefile @@ -4,7 +4,6 @@ MODULE_big = adminpack OBJS = \ $(WIN32RES) \ adminpack.o -PG_CPPFLAGS = -I$(libpq_srcdir) EXTENSION = adminpack DATA = adminpack--1.0.sql adminpack--1.0--1.1.sql adminpack--1.1--2.0.sql\ diff --git a/contrib/adminpack/adminpack.c b/contrib/adminpack/adminpack.c index d064b5a0806df..c3c5e03945de9 100644 --- a/contrib/adminpack/adminpack.c +++ b/contrib/adminpack/adminpack.c @@ -3,7 +3,7 @@ * adminpack.c * * - * Copyright (c) 2002-2020, PostgreSQL Global Development Group + * Copyright (c) 2002-2021, PostgreSQL Global Development Group * * Author: Andreas Pflug * diff --git a/contrib/amcheck/expected/check_heap.out b/contrib/amcheck/expected/check_heap.out index 882f853d56ac3..1fb3823142902 100644 --- a/contrib/amcheck/expected/check_heap.out +++ b/contrib/amcheck/expected/check_heap.out @@ -109,7 +109,7 @@ ERROR: ending block number must be between 0 and 0 SELECT * FROM verify_heapam(relation := 'heaptest', startblock := 10000, endblock := 11000); ERROR: starting block number must be between 0 and 0 -- Vacuum freeze to change the xids encountered in subsequent tests -VACUUM FREEZE heaptest; +VACUUM (FREEZE, DISABLE_PAGE_SKIPPING) heaptest; -- Check that valid options are not rejected nor corruption reported -- for a non-empty frozen table SELECT * FROM verify_heapam(relation := 'heaptest', skip := 'none'); diff --git a/contrib/amcheck/sql/check_heap.sql b/contrib/amcheck/sql/check_heap.sql index c10a25f21cb89..298de6886afd8 100644 --- a/contrib/amcheck/sql/check_heap.sql +++ b/contrib/amcheck/sql/check_heap.sql @@ -51,7 +51,7 @@ SELECT * FROM verify_heapam(relation := 'heaptest', startblock := 0, endblock := SELECT * FROM verify_heapam(relation := 'heaptest', startblock := 10000, endblock := 11000); -- Vacuum freeze to change the xids encountered in subsequent tests -VACUUM FREEZE heaptest; +VACUUM (FREEZE, DISABLE_PAGE_SKIPPING) heaptest; -- Check that valid options are not rejected nor corruption reported -- for a non-empty frozen table diff --git a/contrib/amcheck/t/001_verify_heapam.pl b/contrib/amcheck/t/001_verify_heapam.pl index 1581e51f3ca7f..a2f65b826d375 100644 --- a/contrib/amcheck/t/001_verify_heapam.pl +++ b/contrib/amcheck/t/001_verify_heapam.pl @@ -46,7 +46,7 @@ # Check a corrupt table with all-frozen data # fresh_test_table('test'); -$node->safe_psql('postgres', q(VACUUM FREEZE test)); +$node->safe_psql('postgres', q(VACUUM (FREEZE, DISABLE_PAGE_SKIPPING) test)); corrupt_first_page('test'); detects_heap_corruption("verify_heapam('test')", "all-frozen corrupted table"); diff --git a/contrib/amcheck/verify_heapam.c b/contrib/amcheck/verify_heapam.c index bc34bb0c839cd..88ab32490c0da 100644 --- a/contrib/amcheck/verify_heapam.c +++ b/contrib/amcheck/verify_heapam.c @@ -3,7 +3,7 @@ * verify_heapam.c * Functions to check postgresql heap relations for corruption * - * Copyright (c) 2016-2020, PostgreSQL Global Development Group + * Copyright (c) 2016-2021, PostgreSQL Global Development Group * * contrib/amcheck/verify_heapam.c *------------------------------------------------------------------------- diff --git a/contrib/amcheck/verify_nbtree.c b/contrib/amcheck/verify_nbtree.c index 6d86e3ccdacfb..b8c7793d9e06a 100644 --- a/contrib/amcheck/verify_nbtree.c +++ b/contrib/amcheck/verify_nbtree.c @@ -14,7 +14,7 @@ * that every visible heap tuple has a matching index tuple. * * - * Copyright (c) 2017-2020, PostgreSQL Global Development Group + * Copyright (c) 2017-2021, PostgreSQL Global Development Group * * IDENTIFICATION * contrib/amcheck/verify_nbtree.c diff --git a/contrib/auth_delay/auth_delay.c b/contrib/auth_delay/auth_delay.c index 11c2f059e4c59..5820ac328db15 100644 --- a/contrib/auth_delay/auth_delay.c +++ b/contrib/auth_delay/auth_delay.c @@ -2,7 +2,7 @@ * * auth_delay.c * - * Copyright (c) 2010-2020, PostgreSQL Global Development Group + * Copyright (c) 2010-2021, PostgreSQL Global Development Group * * IDENTIFICATION * contrib/auth_delay/auth_delay.c diff --git a/contrib/auto_explain/.gitignore b/contrib/auto_explain/.gitignore new file mode 100644 index 0000000000000..5dcb3ff972350 --- /dev/null +++ b/contrib/auto_explain/.gitignore @@ -0,0 +1,4 @@ +# Generated subdirectories +/log/ +/results/ +/tmp_check/ diff --git a/contrib/auto_explain/Makefile b/contrib/auto_explain/Makefile index 54d6d45d4004b..efd127d3cae64 100644 --- a/contrib/auto_explain/Makefile +++ b/contrib/auto_explain/Makefile @@ -6,6 +6,8 @@ OBJS = \ auto_explain.o PGFILEDESC = "auto_explain - logging facility for execution plans" +TAP_TESTS = 1 + ifdef USE_PGXS PG_CONFIG = pg_config PGXS := $(shell $(PG_CONFIG) --pgxs) diff --git a/contrib/auto_explain/auto_explain.c b/contrib/auto_explain/auto_explain.c index 56c549d84c1d3..445bb37191217 100644 --- a/contrib/auto_explain/auto_explain.c +++ b/contrib/auto_explain/auto_explain.c @@ -3,7 +3,7 @@ * auto_explain.c * * - * Copyright (c) 2008-2020, PostgreSQL Global Development Group + * Copyright (c) 2008-2021, PostgreSQL Global Development Group * * IDENTIFICATION * contrib/auto_explain/auto_explain.c @@ -371,8 +371,15 @@ explain_ExecutorEnd(QueryDesc *queryDesc) { if (queryDesc->totaltime && auto_explain_enabled()) { + MemoryContext oldcxt; double msec; + /* + * Make sure we operate in the per-query context, so any cruft will be + * discarded later during ExecutorEnd. + */ + oldcxt = MemoryContextSwitchTo(queryDesc->estate->es_query_cxt); + /* * Make sure stats accumulation is done. (Note: it's okay if several * levels of hook all do this.) @@ -424,9 +431,9 @@ explain_ExecutorEnd(QueryDesc *queryDesc) (errmsg("duration: %.3f ms plan:\n%s", msec, es->str->data), errhidestmt(true))); - - pfree(es->str->data); } + + MemoryContextSwitchTo(oldcxt); } if (prev_ExecutorEnd) diff --git a/contrib/auto_explain/t/001_auto_explain.pl b/contrib/auto_explain/t/001_auto_explain.pl new file mode 100644 index 0000000000000..7968be963b125 --- /dev/null +++ b/contrib/auto_explain/t/001_auto_explain.pl @@ -0,0 +1,52 @@ +use strict; +use warnings; + +use PostgresNode; +use TestLib; +use Test::More tests => 4; + +my $node = get_new_node('main'); +$node->init; +$node->append_conf('postgresql.conf', + "shared_preload_libraries = 'auto_explain'"); +$node->append_conf('postgresql.conf', "auto_explain.log_min_duration = 0"); +$node->append_conf('postgresql.conf', "auto_explain.log_analyze = on"); +$node->start; + +# run a couple of queries +$node->safe_psql("postgres", "SELECT * FROM pg_class;"); +$node->safe_psql("postgres", + "SELECT * FROM pg_proc WHERE proname = 'int4pl';"); + +# emit some json too +$node->append_conf('postgresql.conf', "auto_explain.log_format = json"); +$node->reload; +$node->safe_psql("postgres", "SELECT * FROM pg_proc;"); +$node->safe_psql("postgres", + "SELECT * FROM pg_class WHERE relname = 'pg_class';"); + +$node->stop('fast'); + +my $log = $node->logfile(); + +my $log_contents = slurp_file($log); + +like( + $log_contents, + qr/Seq Scan on pg_class/, + "sequential scan logged, text mode"); + +like( + $log_contents, + qr/Index Scan using pg_proc_proname_args_nsp_index on pg_proc/, + "index scan logged, text mode"); + +like( + $log_contents, + qr/"Node Type": "Seq Scan"[^}]*"Relation Name": "pg_proc"/s, + "sequential scan logged, json mode"); + +like( + $log_contents, + qr/"Node Type": "Index Scan"[^}]*"Index Name": "pg_class_relname_nsp_index"/s, + "index scan logged, json mode"); diff --git a/contrib/bloom/blcost.c b/contrib/bloom/blcost.c index 54f954dce8c9a..4af1fc9e1cc0b 100644 --- a/contrib/bloom/blcost.c +++ b/contrib/bloom/blcost.c @@ -3,7 +3,7 @@ * blcost.c * Cost estimate function for bloom indexes. * - * Copyright (c) 2016-2020, PostgreSQL Global Development Group + * Copyright (c) 2016-2021, PostgreSQL Global Development Group * * IDENTIFICATION * contrib/bloom/blcost.c diff --git a/contrib/bloom/blinsert.c b/contrib/bloom/blinsert.c index 6d3fd5c432cd6..d37ceef753ab0 100644 --- a/contrib/bloom/blinsert.c +++ b/contrib/bloom/blinsert.c @@ -3,7 +3,7 @@ * blinsert.c * Bloom index build and insert functions. * - * Copyright (c) 2016-2020, PostgreSQL Global Development Group + * Copyright (c) 2016-2021, PostgreSQL Global Development Group * * IDENTIFICATION * contrib/bloom/blinsert.c @@ -198,6 +198,7 @@ bool blinsert(Relation index, Datum *values, bool *isnull, ItemPointer ht_ctid, Relation heapRel, IndexUniqueCheck checkUnique, + bool indexUnchanged, IndexInfo *indexInfo) { BloomState blstate; diff --git a/contrib/bloom/bloom.h b/contrib/bloom/bloom.h index 23aa7ac4416cc..a22a6dfa40400 100644 --- a/contrib/bloom/bloom.h +++ b/contrib/bloom/bloom.h @@ -3,7 +3,7 @@ * bloom.h * Header for bloom index. * - * Copyright (c) 2016-2020, PostgreSQL Global Development Group + * Copyright (c) 2016-2021, PostgreSQL Global Development Group * * IDENTIFICATION * contrib/bloom/bloom.h @@ -192,6 +192,7 @@ extern bool blvalidate(Oid opclassoid); extern bool blinsert(Relation index, Datum *values, bool *isnull, ItemPointer ht_ctid, Relation heapRel, IndexUniqueCheck checkUnique, + bool indexUnchanged, struct IndexInfo *indexInfo); extern IndexScanDesc blbeginscan(Relation r, int nkeys, int norderbys); extern int64 blgetbitmap(IndexScanDesc scan, TIDBitmap *tbm); diff --git a/contrib/bloom/blscan.c b/contrib/bloom/blscan.c index 3c71401c8c6f4..6ae3710d9f785 100644 --- a/contrib/bloom/blscan.c +++ b/contrib/bloom/blscan.c @@ -3,7 +3,7 @@ * blscan.c * Bloom index scan functions. * - * Copyright (c) 2016-2020, PostgreSQL Global Development Group + * Copyright (c) 2016-2021, PostgreSQL Global Development Group * * IDENTIFICATION * contrib/bloom/blscan.c diff --git a/contrib/bloom/blutils.c b/contrib/bloom/blutils.c index 26b9927c3aaf9..1e505b1da5424 100644 --- a/contrib/bloom/blutils.c +++ b/contrib/bloom/blutils.c @@ -3,7 +3,7 @@ * blutils.c * Bloom index utilities. * - * Portions Copyright (c) 2016-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 2016-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1990-1993, Regents of the University of California * * IDENTIFICATION diff --git a/contrib/bloom/blvacuum.c b/contrib/bloom/blvacuum.c index 3282adde03b1d..88b0a6d29002d 100644 --- a/contrib/bloom/blvacuum.c +++ b/contrib/bloom/blvacuum.c @@ -3,7 +3,7 @@ * blvacuum.c * Bloom VACUUM functions. * - * Copyright (c) 2016-2020, PostgreSQL Global Development Group + * Copyright (c) 2016-2021, PostgreSQL Global Development Group * * IDENTIFICATION * contrib/bloom/blvacuum.c diff --git a/contrib/bloom/blvalidate.c b/contrib/bloom/blvalidate.c index 3c05e5b01c99d..aa8c87c077275 100644 --- a/contrib/bloom/blvalidate.c +++ b/contrib/bloom/blvalidate.c @@ -3,7 +3,7 @@ * blvalidate.c * Opclass validator for bloom. * - * Copyright (c) 2016-2020, PostgreSQL Global Development Group + * Copyright (c) 2016-2021, PostgreSQL Global Development Group * * IDENTIFICATION * contrib/bloom/blvalidate.c diff --git a/contrib/bool_plperl/expected/bool_plperl.out b/contrib/bool_plperl/expected/bool_plperl.out index 84c25acdb4f83..187df8db96f9e 100644 --- a/contrib/bool_plperl/expected/bool_plperl.out +++ b/contrib/bool_plperl/expected/bool_plperl.out @@ -52,7 +52,7 @@ SELECT perl2undef() IS NULL AS p; --- test transforming to perl CREATE FUNCTION bool2perl(bool, bool, bool) RETURNS void LANGUAGE plperl -TRANSFORM FOR TYPE bool +TRANSFORM FOR TYPE bool, for type boolean -- duplicate to test ruleutils AS $$ my ($x, $y, $z) = @_; @@ -68,6 +68,21 @@ SELECT bool2perl (true, false, NULL); (1 row) +--- test ruleutils +\sf bool2perl +CREATE OR REPLACE FUNCTION public.bool2perl(boolean, boolean, boolean) + RETURNS void + TRANSFORM FOR TYPE boolean, FOR TYPE boolean + LANGUAGE plperl +AS $function$ +my ($x, $y, $z) = @_; + +die("NULL mistransformed") if (defined($z)); +die("TRUE mistransformed to UNDEF") if (!defined($x)); +die("FALSE mistransformed to UNDEF") if (!defined($y)); +die("TRUE mistransformed") if (!$x); +die("FALSE mistransformed") if ($y); +$function$ --- test selecting bool through SPI CREATE FUNCTION spi_test() RETURNS void LANGUAGE plperl diff --git a/contrib/bool_plperl/expected/bool_plperlu.out b/contrib/bool_plperl/expected/bool_plperlu.out index 745ba98933862..8337d337e992e 100644 --- a/contrib/bool_plperl/expected/bool_plperlu.out +++ b/contrib/bool_plperl/expected/bool_plperlu.out @@ -52,7 +52,7 @@ SELECT perl2undef() IS NULL AS p; --- test transforming to perl CREATE FUNCTION bool2perl(bool, bool, bool) RETURNS void LANGUAGE plperlu -TRANSFORM FOR TYPE bool +TRANSFORM FOR TYPE bool, for type boolean -- duplicate to test ruleutils AS $$ my ($x, $y, $z) = @_; @@ -68,6 +68,21 @@ SELECT bool2perl (true, false, NULL); (1 row) +--- test ruleutils +\sf bool2perl +CREATE OR REPLACE FUNCTION public.bool2perl(boolean, boolean, boolean) + RETURNS void + TRANSFORM FOR TYPE boolean, FOR TYPE boolean + LANGUAGE plperlu +AS $function$ +my ($x, $y, $z) = @_; + +die("NULL mistransformed") if (defined($z)); +die("TRUE mistransformed to UNDEF") if (!defined($x)); +die("FALSE mistransformed to UNDEF") if (!defined($y)); +die("TRUE mistransformed") if (!$x); +die("FALSE mistransformed") if ($y); +$function$ --- test selecting bool through SPI CREATE FUNCTION spi_test() RETURNS void LANGUAGE plperlu diff --git a/contrib/bool_plperl/sql/bool_plperl.sql b/contrib/bool_plperl/sql/bool_plperl.sql index dd99f545ea98e..b7f570862cee5 100644 --- a/contrib/bool_plperl/sql/bool_plperl.sql +++ b/contrib/bool_plperl/sql/bool_plperl.sql @@ -33,7 +33,7 @@ SELECT perl2undef() IS NULL AS p; CREATE FUNCTION bool2perl(bool, bool, bool) RETURNS void LANGUAGE plperl -TRANSFORM FOR TYPE bool +TRANSFORM FOR TYPE bool, for type boolean -- duplicate to test ruleutils AS $$ my ($x, $y, $z) = @_; @@ -46,6 +46,10 @@ $$; SELECT bool2perl (true, false, NULL); +--- test ruleutils + +\sf bool2perl + --- test selecting bool through SPI CREATE FUNCTION spi_test() RETURNS void diff --git a/contrib/bool_plperl/sql/bool_plperlu.sql b/contrib/bool_plperl/sql/bool_plperlu.sql index b756b0be67685..1480a0433067a 100644 --- a/contrib/bool_plperl/sql/bool_plperlu.sql +++ b/contrib/bool_plperl/sql/bool_plperlu.sql @@ -33,7 +33,7 @@ SELECT perl2undef() IS NULL AS p; CREATE FUNCTION bool2perl(bool, bool, bool) RETURNS void LANGUAGE plperlu -TRANSFORM FOR TYPE bool +TRANSFORM FOR TYPE bool, for type boolean -- duplicate to test ruleutils AS $$ my ($x, $y, $z) = @_; @@ -46,6 +46,10 @@ $$; SELECT bool2perl (true, false, NULL); +--- test ruleutils + +\sf bool2perl + --- test selecting bool through SPI CREATE FUNCTION spi_test() RETURNS void diff --git a/contrib/dblink/dblink.c b/contrib/dblink/dblink.c index 651227f510e9a..3a0beaa88e7b5 100644 --- a/contrib/dblink/dblink.c +++ b/contrib/dblink/dblink.c @@ -9,7 +9,7 @@ * Shridhar Daithankar * * contrib/dblink/dblink.c - * Copyright (c) 2001-2020, PostgreSQL Global Development Group + * Copyright (c) 2001-2021, PostgreSQL Global Development Group * ALL RIGHTS RESERVED; * * Permission to use, copy, modify, and distribute this software and its diff --git a/contrib/dblink/input/paths.source b/contrib/dblink/input/paths.source index aab3a3b2bfb42..881a65314f34e 100644 --- a/contrib/dblink/input/paths.source +++ b/contrib/dblink/input/paths.source @@ -1,8 +1,8 @@ -- Initialization that requires path substitution. -CREATE FUNCTION putenv(text) +CREATE FUNCTION setenv(text, text) RETURNS void - AS '@libdir@/regress@DLSUFFIX@', 'regress_putenv' + AS '@libdir@/regress@DLSUFFIX@', 'regress_setenv' LANGUAGE C STRICT; CREATE FUNCTION wait_pid(int) @@ -11,4 +11,4 @@ CREATE FUNCTION wait_pid(int) LANGUAGE C STRICT; CREATE FUNCTION set_pgservicefile(text) RETURNS void LANGUAGE SQL - AS $$SELECT putenv('PGSERVICEFILE=@abs_srcdir@/' || $1)$$; + AS $$SELECT setenv('PGSERVICEFILE', '@abs_srcdir@/' || $1)$$; diff --git a/contrib/dblink/output/paths.source b/contrib/dblink/output/paths.source index e1097f0996fea..8ed95e1f78259 100644 --- a/contrib/dblink/output/paths.source +++ b/contrib/dblink/output/paths.source @@ -1,11 +1,11 @@ -- Initialization that requires path substitution. -CREATE FUNCTION putenv(text) +CREATE FUNCTION setenv(text, text) RETURNS void - AS '@libdir@/regress@DLSUFFIX@', 'regress_putenv' + AS '@libdir@/regress@DLSUFFIX@', 'regress_setenv' LANGUAGE C STRICT; CREATE FUNCTION wait_pid(int) RETURNS void AS '@libdir@/regress@DLSUFFIX@' LANGUAGE C STRICT; CREATE FUNCTION set_pgservicefile(text) RETURNS void LANGUAGE SQL - AS $$SELECT putenv('PGSERVICEFILE=@abs_srcdir@/' || $1)$$; + AS $$SELECT setenv('PGSERVICEFILE', '@abs_srcdir@/' || $1)$$; diff --git a/contrib/dict_int/dict_int.c b/contrib/dict_int/dict_int.c index a7e9890fcc4f8..3c84208b11e3b 100644 --- a/contrib/dict_int/dict_int.c +++ b/contrib/dict_int/dict_int.c @@ -3,7 +3,7 @@ * dict_int.c * Text search dictionary for integers * - * Copyright (c) 2007-2020, PostgreSQL Global Development Group + * Copyright (c) 2007-2021, PostgreSQL Global Development Group * * IDENTIFICATION * contrib/dict_int/dict_int.c diff --git a/contrib/dict_xsyn/dict_xsyn.c b/contrib/dict_xsyn/dict_xsyn.c index 1065d64ccb0ad..79c4f18f409c7 100644 --- a/contrib/dict_xsyn/dict_xsyn.c +++ b/contrib/dict_xsyn/dict_xsyn.c @@ -3,7 +3,7 @@ * dict_xsyn.c * Extended synonym dictionary * - * Copyright (c) 2007-2020, PostgreSQL Global Development Group + * Copyright (c) 2007-2021, PostgreSQL Global Development Group * * IDENTIFICATION * contrib/dict_xsyn/dict_xsyn.c diff --git a/contrib/file_fdw/file_fdw.c b/contrib/file_fdw/file_fdw.c index 9863e3274807b..2059c07349bf7 100644 --- a/contrib/file_fdw/file_fdw.c +++ b/contrib/file_fdw/file_fdw.c @@ -3,7 +3,7 @@ * file_fdw.c * foreign-data wrapper for server-side flat files (or programs). * - * Copyright (c) 2010-2020, PostgreSQL Global Development Group + * Copyright (c) 2010-2021, PostgreSQL Global Development Group * * IDENTIFICATION * contrib/file_fdw/file_fdw.c diff --git a/contrib/fuzzystrmatch/fuzzystrmatch.c b/contrib/fuzzystrmatch/fuzzystrmatch.c index ccbb84b481ba3..d237772a3b212 100644 --- a/contrib/fuzzystrmatch/fuzzystrmatch.c +++ b/contrib/fuzzystrmatch/fuzzystrmatch.c @@ -6,7 +6,7 @@ * Joe Conway * * contrib/fuzzystrmatch/fuzzystrmatch.c - * Copyright (c) 2001-2020, PostgreSQL Global Development Group + * Copyright (c) 2001-2021, PostgreSQL Global Development Group * ALL RIGHTS RESERVED; * * metaphone() diff --git a/contrib/hstore/expected/hstore.out b/contrib/hstore/expected/hstore.out index fdcc3920cecd8..64a3272b9cf45 100644 --- a/contrib/hstore/expected/hstore.out +++ b/contrib/hstore/expected/hstore.out @@ -1583,6 +1583,10 @@ select f2 from test_json_agg; "d"=>NULL, "x"=>"xyzzy" (3 rows) +-- Test subscripting in plpgsql +do $$ declare h hstore; +begin h['a'] := 'b'; raise notice 'h = %, h[a] = %', h, h['a']; end $$; +NOTICE: h = "a"=>"b", h[a] = b -- Check the hstore_hash() and hstore_hash_extended() function explicitly. SELECT v as value, hstore_hash(v)::bit(32) as standard, hstore_hash_extended(v, 0)::bit(32) as extended0, diff --git a/contrib/hstore/hstore_subs.c b/contrib/hstore/hstore_subs.c index e52de04f1a6b2..ca4c174a51501 100644 --- a/contrib/hstore/hstore_subs.c +++ b/contrib/hstore/hstore_subs.c @@ -12,7 +12,7 @@ * check_subscripts function and just let the fetch and assign functions * do everything. * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * diff --git a/contrib/hstore/sql/hstore.sql b/contrib/hstore/sql/hstore.sql index 8d96e30403042..a59db66b0aee8 100644 --- a/contrib/hstore/sql/hstore.sql +++ b/contrib/hstore/sql/hstore.sql @@ -372,6 +372,10 @@ select f2['d':'e'] from test_json_agg; -- error update test_json_agg set f2['d'] = f2['e'], f2['x'] = 'xyzzy'; select f2 from test_json_agg; +-- Test subscripting in plpgsql +do $$ declare h hstore; +begin h['a'] := 'b'; raise notice 'h = %, h[a] = %', h, h['a']; end $$; + -- Check the hstore_hash() and hstore_hash_extended() function explicitly. SELECT v as value, hstore_hash(v)::bit(32) as standard, hstore_hash_extended(v, 0)::bit(32) as extended0, diff --git a/contrib/hstore_plpython/expected/hstore_plpython.out b/contrib/hstore_plpython/expected/hstore_plpython.out index 1ab5feea93d79..ecf1dd61bc17f 100644 --- a/contrib/hstore_plpython/expected/hstore_plpython.out +++ b/contrib/hstore_plpython/expected/hstore_plpython.out @@ -47,19 +47,29 @@ SELECT test1arr(array['aa=>bb, cc=>NULL'::hstore, 'dd=>ee']); (1 row) -- test python -> hstore -CREATE FUNCTION test2() RETURNS hstore +CREATE FUNCTION test2(a int, b text) RETURNS hstore LANGUAGE plpythonu TRANSFORM FOR TYPE hstore AS $$ -val = {'a': 1, 'b': 'boo', 'c': None} +val = {'a': a, 'b': b, 'c': None} return val $$; -SELECT test2(); +SELECT test2(1, 'boo'); test2 --------------------------------- "a"=>"1", "b"=>"boo", "c"=>NULL (1 row) +--- test ruleutils +\sf test2 +CREATE OR REPLACE FUNCTION public.test2(a integer, b text) + RETURNS hstore + TRANSFORM FOR TYPE hstore + LANGUAGE plpythonu +AS $function$ +val = {'a': a, 'b': b, 'c': None} +return val +$function$ -- test python -> hstore[] CREATE FUNCTION test2arr() RETURNS hstore[] LANGUAGE plpythonu diff --git a/contrib/hstore_plpython/sql/hstore_plpython.sql b/contrib/hstore_plpython/sql/hstore_plpython.sql index 2c54ee6aaad26..b6d98b7dd5371 100644 --- a/contrib/hstore_plpython/sql/hstore_plpython.sql +++ b/contrib/hstore_plpython/sql/hstore_plpython.sql @@ -40,15 +40,18 @@ SELECT test1arr(array['aa=>bb, cc=>NULL'::hstore, 'dd=>ee']); -- test python -> hstore -CREATE FUNCTION test2() RETURNS hstore +CREATE FUNCTION test2(a int, b text) RETURNS hstore LANGUAGE plpythonu TRANSFORM FOR TYPE hstore AS $$ -val = {'a': 1, 'b': 'boo', 'c': None} +val = {'a': a, 'b': b, 'c': None} return val $$; -SELECT test2(); +SELECT test2(1, 'boo'); + +--- test ruleutils +\sf test2 -- test python -> hstore[] diff --git a/contrib/intarray/_int_selfuncs.c b/contrib/intarray/_int_selfuncs.c index a3a538a20d96e..38d68c12d231d 100644 --- a/contrib/intarray/_int_selfuncs.c +++ b/contrib/intarray/_int_selfuncs.c @@ -3,7 +3,7 @@ * _int_selfuncs.c * Functions for selectivity estimation of intarray operators * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * diff --git a/contrib/isn/isn.c b/contrib/isn/isn.c index cf36bb69d4d5a..1cf1669f25cac 100644 --- a/contrib/isn/isn.c +++ b/contrib/isn/isn.c @@ -4,7 +4,7 @@ * PostgreSQL type definitions for ISNs (ISBN, ISMN, ISSN, EAN13, UPC) * * Author: German Mendez Bravo (Kronuz) - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * * IDENTIFICATION * contrib/isn/isn.c diff --git a/contrib/isn/isn.h b/contrib/isn/isn.h index 017f5974db568..4f4935f80d855 100644 --- a/contrib/isn/isn.h +++ b/contrib/isn/isn.h @@ -4,7 +4,7 @@ * PostgreSQL type definitions for ISNs (ISBN, ISMN, ISSN, EAN13, UPC) * * Author: German Mendez Bravo (Kronuz) - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * * IDENTIFICATION * contrib/isn/isn.h diff --git a/contrib/oid2name/oid2name.c b/contrib/oid2name/oid2name.c index 5a884e29049f8..65cce4999366b 100644 --- a/contrib/oid2name/oid2name.c +++ b/contrib/oid2name/oid2name.c @@ -347,8 +347,7 @@ sql_conn(struct options *my_opts) /* check to see that the backend connection was successfully made */ if (PQstatus(conn) == CONNECTION_BAD) { - pg_log_error("could not connect to database %s: %s", - my_opts->dbname, PQerrorMessage(conn)); + pg_log_error("%s", PQerrorMessage(conn)); PQfinish(conn); exit(1); } diff --git a/contrib/old_snapshot/Makefile b/contrib/old_snapshot/Makefile index 77c85df3225d6..adb557532fc1c 100644 --- a/contrib/old_snapshot/Makefile +++ b/contrib/old_snapshot/Makefile @@ -4,7 +4,6 @@ MODULE_big = old_snapshot OBJS = \ $(WIN32RES) \ time_mapping.o -PG_CPPFLAGS = -I$(libpq_srcdir) EXTENSION = old_snapshot DATA = old_snapshot--1.0.sql diff --git a/contrib/old_snapshot/time_mapping.c b/contrib/old_snapshot/time_mapping.c index 37e0055a00860..3df07177ed661 100644 --- a/contrib/old_snapshot/time_mapping.c +++ b/contrib/old_snapshot/time_mapping.c @@ -3,7 +3,7 @@ * time_mapping.c * time to XID mapping information * - * Copyright (c) 2020, PostgreSQL Global Development Group + * Copyright (c) 2020-2021, PostgreSQL Global Development Group * * contrib/old_snapshot/time_mapping.c *------------------------------------------------------------------------- diff --git a/contrib/pageinspect/Makefile b/contrib/pageinspect/Makefile index d9d8177116b03..2d330ddb2857d 100644 --- a/contrib/pageinspect/Makefile +++ b/contrib/pageinspect/Makefile @@ -7,19 +7,21 @@ OBJS = \ btreefuncs.o \ fsmfuncs.o \ ginfuncs.o \ + gistfuncs.o \ hashfuncs.o \ heapfuncs.o \ rawpage.o EXTENSION = pageinspect -DATA = pageinspect--1.7--1.8.sql pageinspect--1.6--1.7.sql \ +DATA = pageinspect--1.8--1.9.sql \ + pageinspect--1.7--1.8.sql pageinspect--1.6--1.7.sql \ pageinspect--1.5.sql pageinspect--1.5--1.6.sql \ pageinspect--1.4--1.5.sql pageinspect--1.3--1.4.sql \ pageinspect--1.2--1.3.sql pageinspect--1.1--1.2.sql \ pageinspect--1.0--1.1.sql PGFILEDESC = "pageinspect - functions to inspect contents of database pages" -REGRESS = page btree brin gin hash checksum +REGRESS = page btree brin gin gist hash checksum oldextversions ifdef USE_PGXS PG_CONFIG = pg_config diff --git a/contrib/pageinspect/brinfuncs.c b/contrib/pageinspect/brinfuncs.c index fb32d74a66a98..0e3c2deb66c02 100644 --- a/contrib/pageinspect/brinfuncs.c +++ b/contrib/pageinspect/brinfuncs.c @@ -2,7 +2,7 @@ * brinfuncs.c * Functions to investigate BRIN indexes * - * Copyright (c) 2014-2020, PostgreSQL Global Development Group + * Copyright (c) 2014-2021, PostgreSQL Global Development Group * * IDENTIFICATION * contrib/pageinspect/brinfuncs.c @@ -252,7 +252,18 @@ brin_page_items(PG_FUNCTION_ARGS) int att = attno - 1; values[0] = UInt16GetDatum(offset); - values[1] = UInt32GetDatum(dtup->bt_blkno); + switch (TupleDescAttr(tupdesc, 1)->atttypid) + { + case INT8OID: + values[1] = Int64GetDatum((int64) dtup->bt_blkno); + break; + case INT4OID: + /* support for old extension version */ + values[1] = UInt32GetDatum(dtup->bt_blkno); + break; + default: + elog(ERROR, "incorrect output types"); + } values[2] = UInt16GetDatum(attno); values[3] = BoolGetDatum(dtup->bt_columns[att].bv_allnulls); values[4] = BoolGetDatum(dtup->bt_columns[att].bv_hasnulls); diff --git a/contrib/pageinspect/btreefuncs.c b/contrib/pageinspect/btreefuncs.c index 445605db58af5..8bb180bbbe0ef 100644 --- a/contrib/pageinspect/btreefuncs.c +++ b/contrib/pageinspect/btreefuncs.c @@ -41,8 +41,10 @@ #include "utils/varlena.h" PG_FUNCTION_INFO_V1(bt_metap); +PG_FUNCTION_INFO_V1(bt_page_items_1_9); PG_FUNCTION_INFO_V1(bt_page_items); PG_FUNCTION_INFO_V1(bt_page_items_bytea); +PG_FUNCTION_INFO_V1(bt_page_stats_1_9); PG_FUNCTION_INFO_V1(bt_page_stats); #define IS_INDEX(r) ((r)->rd_rel->relkind == RELKIND_INDEX) @@ -160,11 +162,11 @@ GetBTPageStatistics(BlockNumber blkno, Buffer buffer, BTPageStat *stat) * Usage: SELECT * FROM bt_page_stats('t1_pkey', 1); * ----------------------------------------------- */ -Datum -bt_page_stats(PG_FUNCTION_ARGS) +static Datum +bt_page_stats_internal(PG_FUNCTION_ARGS, enum pageinspect_version ext_version) { text *relname = PG_GETARG_TEXT_PP(0); - uint32 blkno = PG_GETARG_UINT32(1); + int64 blkno = (ext_version == PAGEINSPECT_V1_8 ? PG_GETARG_UINT32(1) : PG_GETARG_INT64(1)); Buffer buffer; Relation rel; RangeVar *relrv; @@ -197,8 +199,15 @@ bt_page_stats(PG_FUNCTION_ARGS) (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("cannot access temporary tables of other sessions"))); + if (blkno < 0 || blkno > MaxBlockNumber) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("invalid block number"))); + if (blkno == 0) - elog(ERROR, "block 0 is a meta page"); + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("block 0 is a meta page"))); CHECK_RELATION_BLOCK_RANGE(rel, blkno); @@ -219,16 +228,16 @@ bt_page_stats(PG_FUNCTION_ARGS) elog(ERROR, "return type must be a row type"); j = 0; - values[j++] = psprintf("%d", stat.blkno); + values[j++] = psprintf("%u", stat.blkno); values[j++] = psprintf("%c", stat.type); - values[j++] = psprintf("%d", stat.live_items); - values[j++] = psprintf("%d", stat.dead_items); - values[j++] = psprintf("%d", stat.avg_item_size); - values[j++] = psprintf("%d", stat.page_size); - values[j++] = psprintf("%d", stat.free_size); - values[j++] = psprintf("%d", stat.btpo_prev); - values[j++] = psprintf("%d", stat.btpo_next); - values[j++] = psprintf("%d", (stat.type == 'd') ? stat.btpo.xact : stat.btpo.level); + values[j++] = psprintf("%u", stat.live_items); + values[j++] = psprintf("%u", stat.dead_items); + values[j++] = psprintf("%u", stat.avg_item_size); + values[j++] = psprintf("%u", stat.page_size); + values[j++] = psprintf("%u", stat.free_size); + values[j++] = psprintf("%u", stat.btpo_prev); + values[j++] = psprintf("%u", stat.btpo_next); + values[j++] = psprintf("%u", (stat.type == 'd') ? stat.btpo.xact : stat.btpo.level); values[j++] = psprintf("%d", stat.btpo_flags); tuple = BuildTupleFromCStrings(TupleDescGetAttInMetadata(tupleDesc), @@ -239,6 +248,19 @@ bt_page_stats(PG_FUNCTION_ARGS) PG_RETURN_DATUM(result); } +Datum +bt_page_stats_1_9(PG_FUNCTION_ARGS) +{ + return bt_page_stats_internal(fcinfo, PAGEINSPECT_V1_9); +} + +/* entry point for old extension version */ +Datum +bt_page_stats(PG_FUNCTION_ARGS) +{ + return bt_page_stats_internal(fcinfo, PAGEINSPECT_V1_8); +} + /* * cross-call data structure for SRF @@ -405,11 +427,11 @@ bt_page_print_tuples(struct user_args *uargs) * Usage: SELECT * FROM bt_page_items('t1_pkey', 1); *------------------------------------------------------- */ -Datum -bt_page_items(PG_FUNCTION_ARGS) +static Datum +bt_page_items_internal(PG_FUNCTION_ARGS, enum pageinspect_version ext_version) { text *relname = PG_GETARG_TEXT_PP(0); - uint32 blkno = PG_GETARG_UINT32(1); + int64 blkno = (ext_version == PAGEINSPECT_V1_8 ? PG_GETARG_UINT32(1) : PG_GETARG_INT64(1)); Datum result; FuncCallContext *fctx; MemoryContext mctx; @@ -447,8 +469,15 @@ bt_page_items(PG_FUNCTION_ARGS) (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("cannot access temporary tables of other sessions"))); + if (blkno < 0 || blkno > MaxBlockNumber) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("invalid block number"))); + if (blkno == 0) - elog(ERROR, "block 0 is a meta page"); + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("block 0 is a meta page"))); CHECK_RELATION_BLOCK_RANGE(rel, blkno); @@ -506,6 +535,19 @@ bt_page_items(PG_FUNCTION_ARGS) SRF_RETURN_DONE(fctx); } +Datum +bt_page_items_1_9(PG_FUNCTION_ARGS) +{ + return bt_page_items_internal(fcinfo, PAGEINSPECT_V1_9); +} + +/* entry point for old extension version */ +Datum +bt_page_items(PG_FUNCTION_ARGS) +{ + return bt_page_items_internal(fcinfo, PAGEINSPECT_V1_8); +} + /*------------------------------------------------------- * bt_page_items_bytea() * diff --git a/contrib/pageinspect/expected/btree.out b/contrib/pageinspect/expected/btree.out index 17bf0c5470825..a7632be36a116 100644 --- a/contrib/pageinspect/expected/btree.out +++ b/contrib/pageinspect/expected/btree.out @@ -14,6 +14,8 @@ oldest_xact | 0 last_cleanup_num_tuples | -1 allequalimage | t +SELECT * FROM bt_page_stats('test1_a_idx', -1); +ERROR: invalid block number SELECT * FROM bt_page_stats('test1_a_idx', 0); ERROR: block 0 is a meta page SELECT * FROM bt_page_stats('test1_a_idx', 1); @@ -32,6 +34,8 @@ btpo_flags | 3 SELECT * FROM bt_page_stats('test1_a_idx', 2); ERROR: block number out of range +SELECT * FROM bt_page_items('test1_a_idx', -1); +ERROR: invalid block number SELECT * FROM bt_page_items('test1_a_idx', 0); ERROR: block 0 is a meta page SELECT * FROM bt_page_items('test1_a_idx', 1); @@ -48,6 +52,8 @@ tids | SELECT * FROM bt_page_items('test1_a_idx', 2); ERROR: block number out of range +SELECT * FROM bt_page_items(get_raw_page('test1_a_idx', -1)); +ERROR: invalid block number SELECT * FROM bt_page_items(get_raw_page('test1_a_idx', 0)); ERROR: block is a meta page SELECT * FROM bt_page_items(get_raw_page('test1_a_idx', 1)); diff --git a/contrib/pageinspect/expected/gin.out b/contrib/pageinspect/expected/gin.out index 82f63b23b19d7..ef7570b9723be 100644 --- a/contrib/pageinspect/expected/gin.out +++ b/contrib/pageinspect/expected/gin.out @@ -35,3 +35,4 @@ FROM gin_leafpage_items(get_raw_page('test1_y_idx', -[ RECORD 1 ] ?column? | t +DROP TABLE test1; diff --git a/contrib/pageinspect/expected/gist.out b/contrib/pageinspect/expected/gist.out new file mode 100644 index 0000000000000..5f7d8cea71b83 --- /dev/null +++ b/contrib/pageinspect/expected/gist.out @@ -0,0 +1,69 @@ +-- The gist_page_opaque_info() function prints the page's LSN. Normally, +-- that's constant 1 (GistBuildLSN) on every page of a freshly built GiST +-- index. But with wal_level=minimal, the whole relation is dumped to WAL at +-- the end of the transaction if it's smaller than wal_skip_threshold, which +-- updates the LSNs. Wrap the tests on gist_page_opaque_info() in the +-- same transaction with the CREATE INDEX so that we see the LSNs before +-- they are possibly overwritten at end of transaction. +BEGIN; +-- Create a test table and GiST index. +CREATE TABLE test_gist AS SELECT point(i,i) p, i::text t FROM + generate_series(1,1000) i; +CREATE INDEX test_gist_idx ON test_gist USING gist (p); +-- Page 0 is the root, the rest are leaf pages +SELECT * FROM gist_page_opaque_info(get_raw_page('test_gist_idx', 0)); + lsn | nsn | rightlink | flags +-----+-----+------------+------- + 0/1 | 0/0 | 4294967295 | {} +(1 row) + +SELECT * FROM gist_page_opaque_info(get_raw_page('test_gist_idx', 1)); + lsn | nsn | rightlink | flags +-----+-----+------------+-------- + 0/1 | 0/0 | 4294967295 | {leaf} +(1 row) + +SELECT * FROM gist_page_opaque_info(get_raw_page('test_gist_idx', 2)); + lsn | nsn | rightlink | flags +-----+-----+-----------+-------- + 0/1 | 0/0 | 1 | {leaf} +(1 row) + +COMMIT; +SELECT * FROM gist_page_items(get_raw_page('test_gist_idx', 0), 'test_gist_idx'); + itemoffset | ctid | itemlen | keys +------------+-----------+---------+------------------- + 1 | (1,65535) | 40 | (p)=((166,166)) + 2 | (2,65535) | 40 | (p)=((332,332)) + 3 | (3,65535) | 40 | (p)=((498,498)) + 4 | (4,65535) | 40 | (p)=((664,664)) + 5 | (5,65535) | 40 | (p)=((830,830)) + 6 | (6,65535) | 40 | (p)=((996,996)) + 7 | (7,65535) | 40 | (p)=((1000,1000)) +(7 rows) + +SELECT * FROM gist_page_items(get_raw_page('test_gist_idx', 1), 'test_gist_idx') LIMIT 5; + itemoffset | ctid | itemlen | keys +------------+-------+---------+------------- + 1 | (0,1) | 40 | (p)=((1,1)) + 2 | (0,2) | 40 | (p)=((2,2)) + 3 | (0,3) | 40 | (p)=((3,3)) + 4 | (0,4) | 40 | (p)=((4,4)) + 5 | (0,5) | 40 | (p)=((5,5)) +(5 rows) + +-- gist_page_items_bytea prints the raw key data as a bytea. The output of that is +-- platform-dependent (endianess), so omit the actual key data from the output. +SELECT itemoffset, ctid, itemlen FROM gist_page_items_bytea(get_raw_page('test_gist_idx', 0)); + itemoffset | ctid | itemlen +------------+-----------+--------- + 1 | (1,65535) | 40 + 2 | (2,65535) | 40 + 3 | (3,65535) | 40 + 4 | (4,65535) | 40 + 5 | (5,65535) | 40 + 6 | (6,65535) | 40 + 7 | (7,65535) | 40 +(7 rows) + +DROP TABLE test_gist; diff --git a/contrib/pageinspect/expected/hash.out b/contrib/pageinspect/expected/hash.out index 75d7bcfad5f74..bd0628d01369a 100644 --- a/contrib/pageinspect/expected/hash.out +++ b/contrib/pageinspect/expected/hash.out @@ -28,6 +28,8 @@ hash_page_type | bitmap SELECT hash_page_type(get_raw_page('test_hash_a_idx', 6)); ERROR: block number 6 is out of range for relation "test_hash_a_idx" +SELECT * FROM hash_bitmap_info('test_hash_a_idx', -1); +ERROR: invalid block number SELECT * FROM hash_bitmap_info('test_hash_a_idx', 0); ERROR: invalid overflow block number 0 SELECT * FROM hash_bitmap_info('test_hash_a_idx', 1); @@ -40,6 +42,8 @@ SELECT * FROM hash_bitmap_info('test_hash_a_idx', 4); ERROR: invalid overflow block number 4 SELECT * FROM hash_bitmap_info('test_hash_a_idx', 5); ERROR: invalid overflow block number 5 +SELECT * FROM hash_bitmap_info('test_hash_a_idx', 6); +ERROR: block number 6 is out of range for relation "test_hash_a_idx" SELECT magic, version, ntuples, bsize, bmsize, bmshift, maxbucket, highmask, lowmask, ovflpoint, firstfree, nmaps, procid, spares, mapp FROM hash_metapage_info(get_raw_page('test_hash_a_idx', 0)); diff --git a/contrib/pageinspect/expected/oldextversions.out b/contrib/pageinspect/expected/oldextversions.out new file mode 100644 index 0000000000000..04dc7f8640eba --- /dev/null +++ b/contrib/pageinspect/expected/oldextversions.out @@ -0,0 +1,40 @@ +-- test old extension version entry points +DROP EXTENSION pageinspect; +CREATE EXTENSION pageinspect VERSION '1.8'; +CREATE TABLE test1 (a int8, b text); +INSERT INTO test1 VALUES (72057594037927937, 'text'); +CREATE INDEX test1_a_idx ON test1 USING btree (a); +-- from page.sql +SELECT octet_length(get_raw_page('test1', 0)) AS main_0; + main_0 +-------- + 8192 +(1 row) + +SELECT octet_length(get_raw_page('test1', 'main', 0)) AS main_0; + main_0 +-------- + 8192 +(1 row) + +SELECT page_checksum(get_raw_page('test1', 0), 0) IS NOT NULL AS silly_checksum_test; + silly_checksum_test +--------------------- + t +(1 row) + +-- from btree.sql +SELECT * FROM bt_page_stats('test1_a_idx', 1); + blkno | type | live_items | dead_items | avg_item_size | page_size | free_size | btpo_prev | btpo_next | btpo | btpo_flags +-------+------+------------+------------+---------------+-----------+-----------+-----------+-----------+------+------------ + 1 | l | 1 | 0 | 16 | 8192 | 8128 | 0 | 0 | 0 | 3 +(1 row) + +SELECT * FROM bt_page_items('test1_a_idx', 1); + itemoffset | ctid | itemlen | nulls | vars | data | dead | htid | tids +------------+-------+---------+-------+------+-------------------------+------+-------+------ + 1 | (0,1) | 16 | f | f | 01 00 00 00 00 00 00 01 | f | (0,1) | +(1 row) + +DROP TABLE test1; +DROP EXTENSION pageinspect; diff --git a/contrib/pageinspect/expected/page.out b/contrib/pageinspect/expected/page.out index b6aea0124bbc1..4da28f0a1db58 100644 --- a/contrib/pageinspect/expected/page.out +++ b/contrib/pageinspect/expected/page.out @@ -1,7 +1,7 @@ CREATE EXTENSION pageinspect; CREATE TABLE test1 (a int, b int); INSERT INTO test1 VALUES (16777217, 131584); -VACUUM test1; -- set up FSM +VACUUM (DISABLE_PAGE_SKIPPING) test1; -- set up FSM -- The page contents can vary, so just test that it can be read -- successfully, but don't keep the output. SELECT octet_length(get_raw_page('test1', 'main', 0)) AS main_0; @@ -32,6 +32,8 @@ SELECT octet_length(get_raw_page('test1', 'vm', 0)) AS vm_0; SELECT octet_length(get_raw_page('test1', 'vm', 1)) AS vm_1; ERROR: block number 1 is out of range for relation "test1" +SELECT octet_length(get_raw_page('test1', 'main', -1)); +ERROR: invalid block number SELECT octet_length(get_raw_page('xxx', 'main', 0)); ERROR: relation "xxx" does not exist SELECT octet_length(get_raw_page('test1', 'xxx', 0)); @@ -55,6 +57,8 @@ SELECT page_checksum(get_raw_page('test1', 0), 0) IS NOT NULL AS silly_checksum_ t (1 row) +SELECT page_checksum(get_raw_page('test1', 0), -1); +ERROR: invalid block number SELECT tuple_data_split('test1'::regclass, t_data, t_infomask, t_infomask2, t_bits) FROM heap_page_items(get_raw_page('test1', 0)); tuple_data_split @@ -83,18 +87,8 @@ SELECT * FROM fsm_page_contents(get_raw_page('test1', 'fsm', 0)); (1 row) -- If we freeze the only tuple on test1, the infomask should --- always be the same in all test runs. we show raw flags by --- default: HEAP_XMIN_COMMITTED and HEAP_XMIN_INVALID. -VACUUM FREEZE test1; -SELECT t_infomask, t_infomask2, raw_flags, combined_flags -FROM heap_page_items(get_raw_page('test1', 0)), - LATERAL heap_tuple_infomask_flags(t_infomask, t_infomask2); - t_infomask | t_infomask2 | raw_flags | combined_flags -------------+-------------+-----------------------------------------------------------+-------------------- - 2816 | 2 | {HEAP_XMIN_COMMITTED,HEAP_XMIN_INVALID,HEAP_XMAX_INVALID} | {HEAP_XMIN_FROZEN} -(1 row) - --- output the decoded flag HEAP_XMIN_FROZEN instead +-- always be the same in all test runs. +VACUUM (FREEZE, DISABLE_PAGE_SKIPPING) test1; SELECT t_infomask, t_infomask2, raw_flags, combined_flags FROM heap_page_items(get_raw_page('test1', 0)), LATERAL heap_tuple_infomask_flags(t_infomask, t_infomask2); diff --git a/contrib/pageinspect/fsmfuncs.c b/contrib/pageinspect/fsmfuncs.c index 099acbb2fe4b0..930f1df339000 100644 --- a/contrib/pageinspect/fsmfuncs.c +++ b/contrib/pageinspect/fsmfuncs.c @@ -9,7 +9,7 @@ * there's hardly any use case for using these without superuser-rights * anyway. * - * Copyright (c) 2007-2020, PostgreSQL Global Development Group + * Copyright (c) 2007-2021, PostgreSQL Global Development Group * * IDENTIFICATION * contrib/pageinspect/fsmfuncs.c diff --git a/contrib/pageinspect/ginfuncs.c b/contrib/pageinspect/ginfuncs.c index 711473579a86f..e425cbcdb8e19 100644 --- a/contrib/pageinspect/ginfuncs.c +++ b/contrib/pageinspect/ginfuncs.c @@ -2,7 +2,7 @@ * ginfuncs.c * Functions to investigate the content of GIN indexes * - * Copyright (c) 2014-2020, PostgreSQL Global Development Group + * Copyright (c) 2014-2021, PostgreSQL Global Development Group * * IDENTIFICATION * contrib/pageinspect/ginfuncs.c diff --git a/contrib/pageinspect/gistfuncs.c b/contrib/pageinspect/gistfuncs.c new file mode 100644 index 0000000000000..d5da1ea839a57 --- /dev/null +++ b/contrib/pageinspect/gistfuncs.c @@ -0,0 +1,271 @@ +/* + * gistfuncs.c + * Functions to investigate the content of GiST indexes + * + * Copyright (c) 2014-2020, PostgreSQL Global Development Group + * + * IDENTIFICATION + * contrib/pageinspect/gistfuncs.c + */ +#include "postgres.h" + +#include "access/gist.h" +#include "access/gist_private.h" +#include "access/htup.h" +#include "access/relation.h" +#include "catalog/namespace.h" +#include "funcapi.h" +#include "miscadmin.h" +#include "pageinspect.h" +#include "storage/itemptr.h" +#include "utils/array.h" +#include "utils/builtins.h" +#include "utils/rel.h" +#include "utils/pg_lsn.h" +#include "utils/varlena.h" + +PG_FUNCTION_INFO_V1(gist_page_opaque_info); +PG_FUNCTION_INFO_V1(gist_page_items); +PG_FUNCTION_INFO_V1(gist_page_items_bytea); + +#define ItemPointerGetDatum(X) PointerGetDatum(X) + + +Datum +gist_page_opaque_info(PG_FUNCTION_ARGS) +{ + bytea *raw_page = PG_GETARG_BYTEA_P(0); + TupleDesc tupdesc; + Page page; + GISTPageOpaque opaq; + HeapTuple resultTuple; + Datum values[4]; + bool nulls[4]; + Datum flags[16]; + int nflags = 0; + uint16 flagbits; + + if (!superuser()) + ereport(ERROR, + (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), + errmsg("must be superuser to use raw page functions"))); + + page = get_page_from_raw(raw_page); + + opaq = (GISTPageOpaque) PageGetSpecialPointer(page); + + /* Build a tuple descriptor for our result type */ + if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE) + elog(ERROR, "return type must be a row type"); + + /* Convert the flags bitmask to an array of human-readable names */ + flagbits = opaq->flags; + if (flagbits & F_LEAF) + flags[nflags++] = CStringGetTextDatum("leaf"); + if (flagbits & F_DELETED) + flags[nflags++] = CStringGetTextDatum("deleted"); + if (flagbits & F_TUPLES_DELETED) + flags[nflags++] = CStringGetTextDatum("tuples_deleted"); + if (flagbits & F_FOLLOW_RIGHT) + flags[nflags++] = CStringGetTextDatum("follow_right"); + if (flagbits & F_HAS_GARBAGE) + flags[nflags++] = CStringGetTextDatum("has_garbage"); + flagbits &= ~(F_LEAF | F_DELETED | F_TUPLES_DELETED | F_FOLLOW_RIGHT | F_HAS_GARBAGE); + if (flagbits) + { + /* any flags we don't recognize are printed in hex */ + flags[nflags++] = DirectFunctionCall1(to_hex32, Int32GetDatum(flagbits)); + } + + memset(nulls, 0, sizeof(nulls)); + + values[0] = LSNGetDatum(PageGetLSN(page)); + values[1] = LSNGetDatum(GistPageGetNSN(page)); + values[2] = Int64GetDatum(opaq->rightlink); + values[3] = PointerGetDatum(construct_array(flags, nflags, + TEXTOID, + -1, false, TYPALIGN_INT)); + + /* Build and return the result tuple. */ + resultTuple = heap_form_tuple(tupdesc, values, nulls); + + return HeapTupleGetDatum(resultTuple); +} + +Datum +gist_page_items_bytea(PG_FUNCTION_ARGS) +{ + bytea *raw_page = PG_GETARG_BYTEA_P(0); + ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo; + bool randomAccess; + TupleDesc tupdesc; + Tuplestorestate *tupstore; + MemoryContext oldcontext; + Page page; + OffsetNumber offset; + + if (!superuser()) + ereport(ERROR, + (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), + errmsg("must be superuser to use raw page functions"))); + + /* check to see if caller supports us returning a tuplestore */ + if (rsinfo == NULL || !IsA(rsinfo, ReturnSetInfo)) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("set-valued function called in context that cannot accept a set"))); + if (!(rsinfo->allowedModes & SFRM_Materialize)) + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("materialize mode required, but it is not allowed in this context"))); + + /* The tupdesc and tuplestore must be created in ecxt_per_query_memory */ + oldcontext = MemoryContextSwitchTo(rsinfo->econtext->ecxt_per_query_memory); + + if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE) + elog(ERROR, "return type must be a row type"); + + randomAccess = (rsinfo->allowedModes & SFRM_Materialize_Random) != 0; + tupstore = tuplestore_begin_heap(randomAccess, false, work_mem); + rsinfo->returnMode = SFRM_Materialize; + rsinfo->setResult = tupstore; + rsinfo->setDesc = tupdesc; + + MemoryContextSwitchTo(oldcontext); + + page = get_page_from_raw(raw_page); + + if (GistPageIsDeleted(page)) + elog(NOTICE, "page is deleted"); + + for (offset = FirstOffsetNumber; + offset <= PageGetMaxOffsetNumber(page); + offset++) + { + Datum values[4]; + bool nulls[4]; + ItemId id; + IndexTuple itup; + bytea *tuple_bytea; + int tuple_len; + + id = PageGetItemId(page, offset); + + if (!ItemIdIsValid(id)) + elog(ERROR, "invalid ItemId"); + + itup = (IndexTuple) PageGetItem(page, id); + tuple_len = IndexTupleSize(itup); + + memset(nulls, 0, sizeof(nulls)); + + values[0] = DatumGetInt16(offset); + values[1] = ItemPointerGetDatum(&itup->t_tid); + values[2] = Int32GetDatum((int) IndexTupleSize(itup)); + + tuple_bytea = (bytea *) palloc(tuple_len + VARHDRSZ); + SET_VARSIZE(tuple_bytea, tuple_len + VARHDRSZ); + memcpy(VARDATA(tuple_bytea), itup, tuple_len); + values[3] = PointerGetDatum(tuple_bytea); + + tuplestore_putvalues(tupstore, tupdesc, values, nulls); + } + + return (Datum) 0; +} + +Datum +gist_page_items(PG_FUNCTION_ARGS) +{ + bytea *raw_page = PG_GETARG_BYTEA_P(0); + Oid indexRelid = PG_GETARG_OID(1); + ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo; + bool randomAccess; + Relation indexRel; + TupleDesc tupdesc; + Tuplestorestate *tupstore; + MemoryContext oldcontext; + Page page; + OffsetNumber offset; + + if (!superuser()) + ereport(ERROR, + (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), + errmsg("must be superuser to use raw page functions"))); + + /* check to see if caller supports us returning a tuplestore */ + if (rsinfo == NULL || !IsA(rsinfo, ReturnSetInfo)) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("set-valued function called in context that cannot accept a set"))); + if (!(rsinfo->allowedModes & SFRM_Materialize)) + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("materialize mode required, but it is not allowed in this context"))); + + /* The tupdesc and tuplestore must be created in ecxt_per_query_memory */ + oldcontext = MemoryContextSwitchTo(rsinfo->econtext->ecxt_per_query_memory); + + if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE) + elog(ERROR, "return type must be a row type"); + + randomAccess = (rsinfo->allowedModes & SFRM_Materialize_Random) != 0; + tupstore = tuplestore_begin_heap(randomAccess, false, work_mem); + rsinfo->returnMode = SFRM_Materialize; + rsinfo->setResult = tupstore; + rsinfo->setDesc = tupdesc; + + MemoryContextSwitchTo(oldcontext); + + /* Open the relation */ + indexRel = index_open(indexRelid, AccessShareLock); + + page = get_page_from_raw(raw_page); + + if (GistPageIsDeleted(page)) + elog(NOTICE, "page is deleted"); + + for (offset = FirstOffsetNumber; + offset <= PageGetMaxOffsetNumber(page); + offset++) + { + Datum values[4]; + bool nulls[4]; + ItemId id; + IndexTuple itup; + Datum itup_values[INDEX_MAX_KEYS]; + bool itup_isnull[INDEX_MAX_KEYS]; + char *key_desc; + + id = PageGetItemId(page, offset); + + if (!ItemIdIsValid(id)) + elog(ERROR, "invalid ItemId"); + + itup = (IndexTuple) PageGetItem(page, id); + + index_deform_tuple(itup, RelationGetDescr(indexRel), + itup_values, itup_isnull); + + memset(nulls, 0, sizeof(nulls)); + + values[0] = DatumGetInt16(offset); + values[1] = ItemPointerGetDatum(&itup->t_tid); + values[2] = Int32GetDatum((int) IndexTupleSize(itup)); + + key_desc = BuildIndexValueDescription(indexRel, itup_values, itup_isnull); + if (key_desc) + values[3] = CStringGetTextDatum(key_desc); + else + { + values[3] = (Datum) 0; + nulls[3] = true; + } + + tuplestore_putvalues(tupstore, tupdesc, values, nulls); + } + + relation_close(indexRel, AccessShareLock); + + return (Datum) 0; +} diff --git a/contrib/pageinspect/hashfuncs.c b/contrib/pageinspect/hashfuncs.c index 3b2f0339cfe09..ff01119474a47 100644 --- a/contrib/pageinspect/hashfuncs.c +++ b/contrib/pageinspect/hashfuncs.c @@ -2,7 +2,7 @@ * hashfuncs.c * Functions to investigate the content of HASH indexes * - * Copyright (c) 2017-2020, PostgreSQL Global Development Group + * Copyright (c) 2017-2021, PostgreSQL Global Development Group * * IDENTIFICATION * contrib/pageinspect/hashfuncs.c @@ -390,7 +390,7 @@ Datum hash_bitmap_info(PG_FUNCTION_ARGS) { Oid indexRelid = PG_GETARG_OID(0); - uint64 ovflblkno = PG_GETARG_INT64(1); + int64 ovflblkno = PG_GETARG_INT64(1); HashMetaPage metap; Buffer metabuf, mapbuf; @@ -425,11 +425,16 @@ hash_bitmap_info(PG_FUNCTION_ARGS) (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("cannot access temporary tables of other sessions"))); + if (ovflblkno < 0 || ovflblkno > MaxBlockNumber) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("invalid block number"))); + if (ovflblkno >= RelationGetNumberOfBlocks(indexRel)) ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), - errmsg("block number " UINT64_FORMAT " is out of range for relation \"%s\"", - ovflblkno, RelationGetRelationName(indexRel)))); + errmsg("block number %lld is out of range for relation \"%s\"", + (long long int) ovflblkno, RelationGetRelationName(indexRel)))); /* Read the metapage so we can determine which bitmap page to use */ metabuf = _hash_getbuf(indexRel, HASH_METAPAGE, HASH_READ, LH_META_PAGE); diff --git a/contrib/pageinspect/heapfuncs.c b/contrib/pageinspect/heapfuncs.c index f04455da127c0..9abcee32afbcf 100644 --- a/contrib/pageinspect/heapfuncs.c +++ b/contrib/pageinspect/heapfuncs.c @@ -15,7 +15,7 @@ * there's hardly any use case for using these without superuser-rights * anyway. * - * Copyright (c) 2007-2020, PostgreSQL Global Development Group + * Copyright (c) 2007-2021, PostgreSQL Global Development Group * * IDENTIFICATION * contrib/pageinspect/heapfuncs.c diff --git a/contrib/pageinspect/pageinspect--1.8--1.9.sql b/contrib/pageinspect/pageinspect--1.8--1.9.sql new file mode 100644 index 0000000000000..b4248d791f0d1 --- /dev/null +++ b/contrib/pageinspect/pageinspect--1.8--1.9.sql @@ -0,0 +1,118 @@ +/* contrib/pageinspect/pageinspect--1.8--1.9.sql */ + +-- complain if script is sourced in psql, rather than via ALTER EXTENSION +\echo Use "ALTER EXTENSION pageinspect UPDATE TO '1.9'" to load this file. \quit + +-- +-- gist_page_opaque_info() +-- +CREATE FUNCTION gist_page_opaque_info(IN page bytea, + OUT lsn pg_lsn, + OUT nsn pg_lsn, + OUT rightlink bigint, + OUT flags text[]) +AS 'MODULE_PATHNAME', 'gist_page_opaque_info' +LANGUAGE C STRICT PARALLEL SAFE; + + +-- +-- gist_page_items_bytea() +-- +CREATE FUNCTION gist_page_items_bytea(IN page bytea, + OUT itemoffset smallint, + OUT ctid tid, + OUT itemlen smallint, + OUT key_data bytea) +RETURNS SETOF record +AS 'MODULE_PATHNAME', 'gist_page_items_bytea' +LANGUAGE C STRICT PARALLEL SAFE; + +-- +-- gist_page_items() +-- +CREATE FUNCTION gist_page_items(IN page bytea, + IN index_oid regclass, + OUT itemoffset smallint, + OUT ctid tid, + OUT itemlen smallint, + OUT keys text) +RETURNS SETOF record +AS 'MODULE_PATHNAME', 'gist_page_items' +LANGUAGE C STRICT PARALLEL SAFE; + +-- +-- get_raw_page() +-- +DROP FUNCTION get_raw_page(text, int4); +CREATE FUNCTION get_raw_page(text, int8) +RETURNS bytea +AS 'MODULE_PATHNAME', 'get_raw_page_1_9' +LANGUAGE C STRICT PARALLEL SAFE; + +DROP FUNCTION get_raw_page(text, text, int4); +CREATE FUNCTION get_raw_page(text, text, int8) +RETURNS bytea +AS 'MODULE_PATHNAME', 'get_raw_page_fork_1_9' +LANGUAGE C STRICT PARALLEL SAFE; + +-- +-- page_checksum() +-- +DROP FUNCTION page_checksum(IN page bytea, IN blkno int4); +CREATE FUNCTION page_checksum(IN page bytea, IN blkno int8) +RETURNS smallint +AS 'MODULE_PATHNAME', 'page_checksum_1_9' +LANGUAGE C STRICT PARALLEL SAFE; + +-- +-- bt_page_stats() +-- +DROP FUNCTION bt_page_stats(text, int4); +CREATE FUNCTION bt_page_stats(IN relname text, IN blkno int8, + OUT blkno int8, + OUT type "char", + OUT live_items int4, + OUT dead_items int4, + OUT avg_item_size int4, + OUT page_size int4, + OUT free_size int4, + OUT btpo_prev int8, + OUT btpo_next int8, + OUT btpo int4, + OUT btpo_flags int4) +AS 'MODULE_PATHNAME', 'bt_page_stats_1_9' +LANGUAGE C STRICT PARALLEL SAFE; + +-- +-- bt_page_items() +-- +DROP FUNCTION bt_page_items(text, int4); +CREATE FUNCTION bt_page_items(IN relname text, IN blkno int8, + OUT itemoffset smallint, + OUT ctid tid, + OUT itemlen smallint, + OUT nulls bool, + OUT vars bool, + OUT data text, + OUT dead boolean, + OUT htid tid, + OUT tids tid[]) +RETURNS SETOF record +AS 'MODULE_PATHNAME', 'bt_page_items_1_9' +LANGUAGE C STRICT PARALLEL SAFE; + +-- +-- brin_page_items() +-- +DROP FUNCTION brin_page_items(IN page bytea, IN index_oid regclass); +CREATE FUNCTION brin_page_items(IN page bytea, IN index_oid regclass, + OUT itemoffset int, + OUT blknum int8, + OUT attnum int, + OUT allnulls bool, + OUT hasnulls bool, + OUT placeholder bool, + OUT value text) +RETURNS SETOF record +AS 'MODULE_PATHNAME', 'brin_page_items' +LANGUAGE C STRICT PARALLEL SAFE; diff --git a/contrib/pageinspect/pageinspect.control b/contrib/pageinspect/pageinspect.control index f8cdf526c6515..bd716769a174c 100644 --- a/contrib/pageinspect/pageinspect.control +++ b/contrib/pageinspect/pageinspect.control @@ -1,5 +1,5 @@ # pageinspect extension comment = 'inspect the contents of database pages at a low level' -default_version = '1.8' +default_version = '1.9' module_pathname = '$libdir/pageinspect' relocatable = true diff --git a/contrib/pageinspect/pageinspect.h b/contrib/pageinspect/pageinspect.h index 478e0d2d20d88..3812a3c23397a 100644 --- a/contrib/pageinspect/pageinspect.h +++ b/contrib/pageinspect/pageinspect.h @@ -3,7 +3,7 @@ * pageinspect.h * Common functions for pageinspect. * - * Copyright (c) 2017-2020, PostgreSQL Global Development Group + * Copyright (c) 2017-2021, PostgreSQL Global Development Group * * IDENTIFICATION * contrib/pageinspect/pageinspect.h @@ -15,6 +15,15 @@ #include "storage/bufpage.h" +/* + * Extension version number, for supporting older extension versions' objects + */ +enum pageinspect_version +{ + PAGEINSPECT_V1_8, + PAGEINSPECT_V1_9, +}; + /* in rawpage.c */ extern Page get_page_from_raw(bytea *raw_page); diff --git a/contrib/pageinspect/rawpage.c b/contrib/pageinspect/rawpage.c index c0181506a5d0b..9e9ee8a493f87 100644 --- a/contrib/pageinspect/rawpage.c +++ b/contrib/pageinspect/rawpage.c @@ -5,7 +5,7 @@ * * Access-method specific inspection functions are in separate files. * - * Copyright (c) 2007-2020, PostgreSQL Global Development Group + * Copyright (c) 2007-2021, PostgreSQL Global Development Group * * IDENTIFICATION * contrib/pageinspect/rawpage.c @@ -40,6 +40,28 @@ static bytea *get_raw_page_internal(text *relname, ForkNumber forknum, * * Returns a copy of a page from shared buffers as a bytea */ +PG_FUNCTION_INFO_V1(get_raw_page_1_9); + +Datum +get_raw_page_1_9(PG_FUNCTION_ARGS) +{ + text *relname = PG_GETARG_TEXT_PP(0); + int64 blkno = PG_GETARG_INT64(1); + bytea *raw_page; + + if (blkno < 0 || blkno > MaxBlockNumber) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("invalid block number"))); + + raw_page = get_raw_page_internal(relname, MAIN_FORKNUM, blkno); + + PG_RETURN_BYTEA_P(raw_page); +} + +/* + * entry point for old extension version + */ PG_FUNCTION_INFO_V1(get_raw_page); Datum @@ -69,6 +91,32 @@ get_raw_page(PG_FUNCTION_ARGS) * * Same, for any fork */ +PG_FUNCTION_INFO_V1(get_raw_page_fork_1_9); + +Datum +get_raw_page_fork_1_9(PG_FUNCTION_ARGS) +{ + text *relname = PG_GETARG_TEXT_PP(0); + text *forkname = PG_GETARG_TEXT_PP(1); + int64 blkno = PG_GETARG_INT64(2); + bytea *raw_page; + ForkNumber forknum; + + forknum = forkname_to_number(text_to_cstring(forkname)); + + if (blkno < 0 || blkno > MaxBlockNumber) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("invalid block number"))); + + raw_page = get_raw_page_internal(relname, forknum, blkno); + + PG_RETURN_BYTEA_P(raw_page); +} + +/* + * Entry point for old extension version + */ PG_FUNCTION_INFO_V1(get_raw_page_fork); Datum @@ -292,13 +340,14 @@ page_header(PG_FUNCTION_ARGS) * Compute checksum of a raw page */ +PG_FUNCTION_INFO_V1(page_checksum_1_9); PG_FUNCTION_INFO_V1(page_checksum); -Datum -page_checksum(PG_FUNCTION_ARGS) +static Datum +page_checksum_internal(PG_FUNCTION_ARGS, enum pageinspect_version ext_version) { bytea *raw_page = PG_GETARG_BYTEA_P(0); - uint32 blkno = PG_GETARG_INT32(1); + int64 blkno = (ext_version == PAGEINSPECT_V1_8 ? PG_GETARG_UINT32(1) : PG_GETARG_INT64(1)); int raw_page_size; PageHeader page; @@ -307,6 +356,11 @@ page_checksum(PG_FUNCTION_ARGS) (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), errmsg("must be superuser to use raw page functions"))); + if (blkno < 0 || blkno > MaxBlockNumber) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("invalid block number"))); + raw_page_size = VARSIZE(raw_page) - VARHDRSZ; /* @@ -321,3 +375,18 @@ page_checksum(PG_FUNCTION_ARGS) PG_RETURN_INT16(pg_checksum_page((char *) page, blkno)); } + +Datum +page_checksum_1_9(PG_FUNCTION_ARGS) +{ + return page_checksum_internal(fcinfo, PAGEINSPECT_V1_9); +} + +/* + * Entry point for old extension version + */ +Datum +page_checksum(PG_FUNCTION_ARGS) +{ + return page_checksum_internal(fcinfo, PAGEINSPECT_V1_8); +} diff --git a/contrib/pageinspect/sql/btree.sql b/contrib/pageinspect/sql/btree.sql index 8eac64c7b3cb7..963591795973e 100644 --- a/contrib/pageinspect/sql/btree.sql +++ b/contrib/pageinspect/sql/btree.sql @@ -6,14 +6,17 @@ CREATE INDEX test1_a_idx ON test1 USING btree (a); SELECT * FROM bt_metap('test1_a_idx'); +SELECT * FROM bt_page_stats('test1_a_idx', -1); SELECT * FROM bt_page_stats('test1_a_idx', 0); SELECT * FROM bt_page_stats('test1_a_idx', 1); SELECT * FROM bt_page_stats('test1_a_idx', 2); +SELECT * FROM bt_page_items('test1_a_idx', -1); SELECT * FROM bt_page_items('test1_a_idx', 0); SELECT * FROM bt_page_items('test1_a_idx', 1); SELECT * FROM bt_page_items('test1_a_idx', 2); +SELECT * FROM bt_page_items(get_raw_page('test1_a_idx', -1)); SELECT * FROM bt_page_items(get_raw_page('test1_a_idx', 0)); SELECT * FROM bt_page_items(get_raw_page('test1_a_idx', 1)); SELECT * FROM bt_page_items(get_raw_page('test1_a_idx', 2)); diff --git a/contrib/pageinspect/sql/gin.sql b/contrib/pageinspect/sql/gin.sql index d516ed3cbd441..423f5c574999b 100644 --- a/contrib/pageinspect/sql/gin.sql +++ b/contrib/pageinspect/sql/gin.sql @@ -17,3 +17,5 @@ SELECT COUNT(*) > 0 FROM gin_leafpage_items(get_raw_page('test1_y_idx', (pg_relation_size('test1_y_idx') / current_setting('block_size')::bigint)::int - 1)); + +DROP TABLE test1; diff --git a/contrib/pageinspect/sql/gist.sql b/contrib/pageinspect/sql/gist.sql new file mode 100644 index 0000000000000..1560d1e15c315 --- /dev/null +++ b/contrib/pageinspect/sql/gist.sql @@ -0,0 +1,29 @@ +-- The gist_page_opaque_info() function prints the page's LSN. Normally, +-- that's constant 1 (GistBuildLSN) on every page of a freshly built GiST +-- index. But with wal_level=minimal, the whole relation is dumped to WAL at +-- the end of the transaction if it's smaller than wal_skip_threshold, which +-- updates the LSNs. Wrap the tests on gist_page_opaque_info() in the +-- same transaction with the CREATE INDEX so that we see the LSNs before +-- they are possibly overwritten at end of transaction. +BEGIN; + +-- Create a test table and GiST index. +CREATE TABLE test_gist AS SELECT point(i,i) p, i::text t FROM + generate_series(1,1000) i; +CREATE INDEX test_gist_idx ON test_gist USING gist (p); + +-- Page 0 is the root, the rest are leaf pages +SELECT * FROM gist_page_opaque_info(get_raw_page('test_gist_idx', 0)); +SELECT * FROM gist_page_opaque_info(get_raw_page('test_gist_idx', 1)); +SELECT * FROM gist_page_opaque_info(get_raw_page('test_gist_idx', 2)); + +COMMIT; + +SELECT * FROM gist_page_items(get_raw_page('test_gist_idx', 0), 'test_gist_idx'); +SELECT * FROM gist_page_items(get_raw_page('test_gist_idx', 1), 'test_gist_idx') LIMIT 5; + +-- gist_page_items_bytea prints the raw key data as a bytea. The output of that is +-- platform-dependent (endianess), so omit the actual key data from the output. +SELECT itemoffset, ctid, itemlen FROM gist_page_items_bytea(get_raw_page('test_gist_idx', 0)); + +DROP TABLE test_gist; diff --git a/contrib/pageinspect/sql/hash.sql b/contrib/pageinspect/sql/hash.sql index 87ee549a7b4f5..64f33f1d52fd1 100644 --- a/contrib/pageinspect/sql/hash.sql +++ b/contrib/pageinspect/sql/hash.sql @@ -13,12 +13,14 @@ SELECT hash_page_type(get_raw_page('test_hash_a_idx', 5)); SELECT hash_page_type(get_raw_page('test_hash_a_idx', 6)); +SELECT * FROM hash_bitmap_info('test_hash_a_idx', -1); SELECT * FROM hash_bitmap_info('test_hash_a_idx', 0); SELECT * FROM hash_bitmap_info('test_hash_a_idx', 1); SELECT * FROM hash_bitmap_info('test_hash_a_idx', 2); SELECT * FROM hash_bitmap_info('test_hash_a_idx', 3); SELECT * FROM hash_bitmap_info('test_hash_a_idx', 4); SELECT * FROM hash_bitmap_info('test_hash_a_idx', 5); +SELECT * FROM hash_bitmap_info('test_hash_a_idx', 6); SELECT magic, version, ntuples, bsize, bmsize, bmshift, maxbucket, highmask, diff --git a/contrib/pageinspect/sql/oldextversions.sql b/contrib/pageinspect/sql/oldextversions.sql new file mode 100644 index 0000000000000..78e08f40e8247 --- /dev/null +++ b/contrib/pageinspect/sql/oldextversions.sql @@ -0,0 +1,20 @@ +-- test old extension version entry points + +DROP EXTENSION pageinspect; +CREATE EXTENSION pageinspect VERSION '1.8'; + +CREATE TABLE test1 (a int8, b text); +INSERT INTO test1 VALUES (72057594037927937, 'text'); +CREATE INDEX test1_a_idx ON test1 USING btree (a); + +-- from page.sql +SELECT octet_length(get_raw_page('test1', 0)) AS main_0; +SELECT octet_length(get_raw_page('test1', 'main', 0)) AS main_0; +SELECT page_checksum(get_raw_page('test1', 0), 0) IS NOT NULL AS silly_checksum_test; + +-- from btree.sql +SELECT * FROM bt_page_stats('test1_a_idx', 1); +SELECT * FROM bt_page_items('test1_a_idx', 1); + +DROP TABLE test1; +DROP EXTENSION pageinspect; diff --git a/contrib/pageinspect/sql/page.sql b/contrib/pageinspect/sql/page.sql index bd049aeb247fc..d333b763d709a 100644 --- a/contrib/pageinspect/sql/page.sql +++ b/contrib/pageinspect/sql/page.sql @@ -3,7 +3,7 @@ CREATE EXTENSION pageinspect; CREATE TABLE test1 (a int, b int); INSERT INTO test1 VALUES (16777217, 131584); -VACUUM test1; -- set up FSM +VACUUM (DISABLE_PAGE_SKIPPING) test1; -- set up FSM -- The page contents can vary, so just test that it can be read -- successfully, but don't keep the output. @@ -17,6 +17,7 @@ SELECT octet_length(get_raw_page('test1', 'fsm', 1)) AS fsm_1; SELECT octet_length(get_raw_page('test1', 'vm', 0)) AS vm_0; SELECT octet_length(get_raw_page('test1', 'vm', 1)) AS vm_1; +SELECT octet_length(get_raw_page('test1', 'main', -1)); SELECT octet_length(get_raw_page('xxx', 'main', 0)); SELECT octet_length(get_raw_page('test1', 'xxx', 0)); @@ -25,6 +26,7 @@ SELECT get_raw_page('test1', 0) = get_raw_page('test1', 'main', 0); SELECT pagesize, version FROM page_header(get_raw_page('test1', 0)); SELECT page_checksum(get_raw_page('test1', 0), 0) IS NOT NULL AS silly_checksum_test; +SELECT page_checksum(get_raw_page('test1', 0), -1); SELECT tuple_data_split('test1'::regclass, t_data, t_infomask, t_infomask2, t_bits) FROM heap_page_items(get_raw_page('test1', 0)); @@ -32,15 +34,9 @@ SELECT tuple_data_split('test1'::regclass, t_data, t_infomask, t_infomask2, t_bi SELECT * FROM fsm_page_contents(get_raw_page('test1', 'fsm', 0)); -- If we freeze the only tuple on test1, the infomask should --- always be the same in all test runs. we show raw flags by --- default: HEAP_XMIN_COMMITTED and HEAP_XMIN_INVALID. -VACUUM FREEZE test1; +-- always be the same in all test runs. +VACUUM (FREEZE, DISABLE_PAGE_SKIPPING) test1; -SELECT t_infomask, t_infomask2, raw_flags, combined_flags -FROM heap_page_items(get_raw_page('test1', 0)), - LATERAL heap_tuple_infomask_flags(t_infomask, t_infomask2); - --- output the decoded flag HEAP_XMIN_FROZEN instead SELECT t_infomask, t_infomask2, raw_flags, combined_flags FROM heap_page_items(get_raw_page('test1', 0)), LATERAL heap_tuple_infomask_flags(t_infomask, t_infomask2); diff --git a/contrib/passwordcheck/passwordcheck.c b/contrib/passwordcheck/passwordcheck.c index 70f056232fe72..3d644be8dd558 100644 --- a/contrib/passwordcheck/passwordcheck.c +++ b/contrib/passwordcheck/passwordcheck.c @@ -3,7 +3,7 @@ * passwordcheck.c * * - * Copyright (c) 2009-2020, PostgreSQL Global Development Group + * Copyright (c) 2009-2021, PostgreSQL Global Development Group * * Author: Laurenz Albe * diff --git a/contrib/pg_prewarm/autoprewarm.c b/contrib/pg_prewarm/autoprewarm.c index a85ee0450ec33..4c08d2b386b9c 100644 --- a/contrib/pg_prewarm/autoprewarm.c +++ b/contrib/pg_prewarm/autoprewarm.c @@ -16,7 +16,7 @@ * relevant database in turn. The former keeps running after the * initial prewarm is complete to update the dump file periodically. * - * Copyright (c) 2016-2020, PostgreSQL Global Development Group + * Copyright (c) 2016-2021, PostgreSQL Global Development Group * * IDENTIFICATION * contrib/pg_prewarm/autoprewarm.c diff --git a/contrib/pg_prewarm/pg_prewarm.c b/contrib/pg_prewarm/pg_prewarm.c index 33e2d28b2767e..a8554529361e6 100644 --- a/contrib/pg_prewarm/pg_prewarm.c +++ b/contrib/pg_prewarm/pg_prewarm.c @@ -3,7 +3,7 @@ * pg_prewarm.c * prewarming utilities * - * Copyright (c) 2010-2020, PostgreSQL Global Development Group + * Copyright (c) 2010-2021, PostgreSQL Global Development Group * * IDENTIFICATION * contrib/pg_prewarm/pg_prewarm.c diff --git a/contrib/pg_standby/.gitignore b/contrib/pg_standby/.gitignore deleted file mode 100644 index a401b085a895d..0000000000000 --- a/contrib/pg_standby/.gitignore +++ /dev/null @@ -1 +0,0 @@ -/pg_standby diff --git a/contrib/pg_standby/Makefile b/contrib/pg_standby/Makefile deleted file mode 100644 index 87732bedf185f..0000000000000 --- a/contrib/pg_standby/Makefile +++ /dev/null @@ -1,20 +0,0 @@ -# contrib/pg_standby/Makefile - -PGFILEDESC = "pg_standby - supports creation of a warm standby" -PGAPPICON = win32 - -PROGRAM = pg_standby -OBJS = \ - $(WIN32RES) \ - pg_standby.o - -ifdef USE_PGXS -PG_CONFIG = pg_config -PGXS := $(shell $(PG_CONFIG) --pgxs) -include $(PGXS) -else -subdir = contrib/pg_standby -top_builddir = ../.. -include $(top_builddir)/src/Makefile.global -include $(top_srcdir)/contrib/contrib-global.mk -endif diff --git a/contrib/pg_standby/pg_standby.c b/contrib/pg_standby/pg_standby.c deleted file mode 100644 index c9f33e4254c4b..0000000000000 --- a/contrib/pg_standby/pg_standby.c +++ /dev/null @@ -1,907 +0,0 @@ -/* - * contrib/pg_standby/pg_standby.c - * - * - * pg_standby.c - * - * Production-ready example of how to create a Warm Standby - * database server using continuous archiving as a - * replication mechanism - * - * We separate the parameters for archive and nextWALfile - * so that we can check the archive exists, even if the - * WAL file doesn't (yet). - * - * This program will be executed once in full for each file - * requested by the warm standby server. - * - * It is designed to cater to a variety of needs, as well - * providing a customizable section. - * - * Original author: Simon Riggs simon@2ndquadrant.com - * Current maintainer: Simon Riggs - */ -#include "postgres_fe.h" - -#include -#include -#include -#include -#include -#include - -#include "access/xlog_internal.h" -#include "pg_getopt.h" - -const char *progname; - -int WalSegSz = -1; - -/* Options and defaults */ -int sleeptime = 5; /* amount of time to sleep between file checks */ -int waittime = -1; /* how long we have been waiting, -1 no wait - * yet */ -int maxwaittime = 0; /* how long are we prepared to wait for? */ -int keepfiles = 0; /* number of WAL files to keep, 0 keep all */ -int maxretries = 3; /* number of retries on restore command */ -bool debug = false; /* are we debugging? */ -bool need_cleanup = false; /* do we need to remove files from - * archive? */ - -#ifndef WIN32 -static volatile sig_atomic_t signaled = false; -#endif - -char *archiveLocation; /* where to find the archive? */ -char *triggerPath; /* where to find the trigger file? */ -char *xlogFilePath; /* where we are going to restore to */ -char *nextWALFileName; /* the file we need to get from archive */ -char *restartWALFileName; /* the file from which we can restart restore */ -char WALFilePath[MAXPGPATH * 2]; /* the file path including archive */ -char restoreCommand[MAXPGPATH]; /* run this to restore */ -char exclusiveCleanupFileName[MAXFNAMELEN]; /* the file we need to get - * from archive */ - -/* - * Two types of failover are supported (smart and fast failover). - * - * The content of the trigger file determines the type of failover. If the - * trigger file contains the word "smart" (or the file is empty), smart - * failover is chosen: pg_standby acts as cp or ln command itself, on - * successful completion all the available WAL records will be applied - * resulting in zero data loss. But, it might take a long time to finish - * recovery if there's a lot of unapplied WAL. - * - * On the other hand, if the trigger file contains the word "fast", the - * recovery is finished immediately even if unapplied WAL files remain. Any - * transactions in the unapplied WAL files are lost. - * - * An empty trigger file performs smart failover. SIGUSR or SIGINT triggers - * fast failover. A timeout causes fast failover (smart failover would have - * the same effect, since if the timeout is reached there is no unapplied WAL). - */ -#define NoFailover 0 -#define SmartFailover 1 -#define FastFailover 2 - -static int Failover = NoFailover; - -#define RESTORE_COMMAND_COPY 0 -#define RESTORE_COMMAND_LINK 1 -int restoreCommandType; - -#define XLOG_DATA 0 -#define XLOG_HISTORY 1 -int nextWALFileType; - -#define SET_RESTORE_COMMAND(cmd, arg1, arg2) \ - snprintf(restoreCommand, MAXPGPATH, cmd " \"%s\" \"%s\"", arg1, arg2) - -struct stat stat_buf; - -static bool SetWALFileNameForCleanup(void); -static bool SetWALSegSize(void); - - -/* ===================================================================== - * - * Customizable section - * - * ===================================================================== - * - * Currently, this section assumes that the Archive is a locally - * accessible directory. If you want to make other assumptions, - * such as using a vendor-specific archive and access API, these - * routines are the ones you'll need to change. You're - * encouraged to submit any changes to pgsql-hackers@lists.postgresql.org - * or personally to the current maintainer. Those changes may be - * folded in to later versions of this program. - */ - -/* - * Initialize allows customized commands into the warm standby program. - * - * As an example, and probably the common case, we use either - * cp/ln commands on *nix, or copy/move command on Windows. - */ -static void -CustomizableInitialize(void) -{ -#ifdef WIN32 - snprintf(WALFilePath, MAXPGPATH, "%s\\%s", archiveLocation, nextWALFileName); - switch (restoreCommandType) - { - case RESTORE_COMMAND_LINK: - SET_RESTORE_COMMAND("mklink", WALFilePath, xlogFilePath); - break; - case RESTORE_COMMAND_COPY: - default: - SET_RESTORE_COMMAND("copy", WALFilePath, xlogFilePath); - break; - } -#else - snprintf(WALFilePath, MAXPGPATH, "%s/%s", archiveLocation, nextWALFileName); - switch (restoreCommandType) - { - case RESTORE_COMMAND_LINK: - SET_RESTORE_COMMAND("ln -s -f", WALFilePath, xlogFilePath); - break; - case RESTORE_COMMAND_COPY: - default: - SET_RESTORE_COMMAND("cp", WALFilePath, xlogFilePath); - break; - } -#endif - - /* - * This code assumes that archiveLocation is a directory You may wish to - * add code to check for tape libraries, etc.. So, since it is a - * directory, we use stat to test if it's accessible - */ - if (stat(archiveLocation, &stat_buf) != 0) - { - fprintf(stderr, "%s: archive location \"%s\" does not exist\n", progname, archiveLocation); - fflush(stderr); - exit(2); - } -} - -/* - * CustomizableNextWALFileReady() - * - * Is the requested file ready yet? - */ -static bool -CustomizableNextWALFileReady(void) -{ - if (stat(WALFilePath, &stat_buf) == 0) - { - /* - * If we've not seen any WAL segments, we don't know the WAL segment - * size, which we need. If it looks like a WAL segment, determine size - * of segments for the cluster. - */ - if (WalSegSz == -1 && IsXLogFileName(nextWALFileName)) - { - if (SetWALSegSize()) - { - /* - * Successfully determined WAL segment size. Can compute - * cleanup cutoff now. - */ - need_cleanup = SetWALFileNameForCleanup(); - if (debug) - { - fprintf(stderr, - _("WAL segment size: %d \n"), WalSegSz); - fprintf(stderr, "Keep archive history: "); - - if (need_cleanup) - fprintf(stderr, "%s and later\n", - exclusiveCleanupFileName); - else - fprintf(stderr, "no cleanup required\n"); - } - } - } - - /* - * Return only if it's the right size already. - */ - if (WalSegSz > 0 && stat_buf.st_size == WalSegSz) - { -#ifdef WIN32 - - /* - * Windows 'cp' sets the final file size before the copy is - * complete, and not yet ready to be opened by pg_standby. So we - * wait for sleeptime secs before attempting to restore. If that - * is not enough, we will rely on the retry/holdoff mechanism. - * GNUWin32's cp does not have this problem. - */ - pg_usleep(sleeptime * 1000000L); -#endif - nextWALFileType = XLOG_DATA; - return true; - } - - /* - * If still too small, wait until it is the correct size - */ - if (WalSegSz > 0 && stat_buf.st_size > WalSegSz) - { - if (debug) - { - fprintf(stderr, "file size greater than expected\n"); - fflush(stderr); - } - exit(3); - } - } - - return false; -} - -static void -CustomizableCleanupPriorWALFiles(void) -{ - /* - * Work out name of prior file from current filename - */ - if (nextWALFileType == XLOG_DATA) - { - int rc; - DIR *xldir; - struct dirent *xlde; - - /* - * Assume it's OK to keep failing. The failure situation may change - * over time, so we'd rather keep going on the main processing than - * fail because we couldn't clean up yet. - */ - if ((xldir = opendir(archiveLocation)) != NULL) - { - while (errno = 0, (xlde = readdir(xldir)) != NULL) - { - /* - * We ignore the timeline part of the XLOG segment identifiers - * in deciding whether a segment is still needed. This - * ensures that we won't prematurely remove a segment from a - * parent timeline. We could probably be a little more - * proactive about removing segments of non-parent timelines, - * but that would be a whole lot more complicated. - * - * We use the alphanumeric sorting property of the filenames - * to decide which ones are earlier than the - * exclusiveCleanupFileName file. Note that this means files - * are not removed in the order they were originally written, - * in case this worries you. - */ - if (IsXLogFileName(xlde->d_name) && - strcmp(xlde->d_name + 8, exclusiveCleanupFileName + 8) < 0) - { -#ifdef WIN32 - snprintf(WALFilePath, sizeof(WALFilePath), "%s\\%s", archiveLocation, xlde->d_name); -#else - snprintf(WALFilePath, sizeof(WALFilePath), "%s/%s", archiveLocation, xlde->d_name); -#endif - - if (debug) - fprintf(stderr, "\nremoving file \"%s\"", WALFilePath); - - rc = unlink(WALFilePath); - if (rc != 0) - { - fprintf(stderr, "\n%s: ERROR: could not remove file \"%s\": %s\n", - progname, WALFilePath, strerror(errno)); - break; - } - } - } - - if (errno) - fprintf(stderr, "%s: could not read archive location \"%s\": %s\n", - progname, archiveLocation, strerror(errno)); - if (debug) - fprintf(stderr, "\n"); - } - else - fprintf(stderr, "%s: could not open archive location \"%s\": %s\n", - progname, archiveLocation, strerror(errno)); - - if (closedir(xldir)) - fprintf(stderr, "%s: could not close archive location \"%s\": %s\n", - progname, archiveLocation, strerror(errno)); - - fflush(stderr); - } -} - -/* ===================================================================== - * End of Customizable section - * ===================================================================== - */ - -/* - * SetWALFileNameForCleanup() - * - * Set the earliest WAL filename that we want to keep on the archive - * and decide whether we need_cleanup - */ -static bool -SetWALFileNameForCleanup(void) -{ - uint32 tli = 1, - log = 0, - seg = 0; - uint32 log_diff = 0, - seg_diff = 0; - bool cleanup = false; - int max_segments_per_logfile = (0xFFFFFFFF / WalSegSz); - - if (restartWALFileName) - { - /* - * Don't do cleanup if the restartWALFileName provided is later than - * the xlog file requested. This is an error and we must not remove - * these files from archive. This shouldn't happen, but better safe - * than sorry. - */ - if (strcmp(restartWALFileName, nextWALFileName) > 0) - return false; - - strlcpy(exclusiveCleanupFileName, restartWALFileName, sizeof(exclusiveCleanupFileName)); - return true; - } - - if (keepfiles > 0) - { - sscanf(nextWALFileName, "%08X%08X%08X", &tli, &log, &seg); - if (tli > 0 && seg > 0) - { - log_diff = keepfiles / max_segments_per_logfile; - seg_diff = keepfiles % max_segments_per_logfile; - if (seg_diff > seg) - { - log_diff++; - seg = max_segments_per_logfile - (seg_diff - seg); - } - else - seg -= seg_diff; - - if (log >= log_diff) - { - log -= log_diff; - cleanup = true; - } - else - { - log = 0; - seg = 0; - } - } - } - - XLogFileNameById(exclusiveCleanupFileName, tli, log, seg); - - return cleanup; -} - -/* - * Try to set the wal segment size from the WAL file specified by WALFilePath. - * - * Return true if size could be determined, false otherwise. - */ -static bool -SetWALSegSize(void) -{ - bool ret_val = false; - int fd; - PGAlignedXLogBlock buf; - - Assert(WalSegSz == -1); - - if ((fd = open(WALFilePath, O_RDWR, 0)) < 0) - { - fprintf(stderr, "%s: could not open WAL file \"%s\": %s\n", - progname, WALFilePath, strerror(errno)); - return false; - } - - errno = 0; - if (read(fd, buf.data, XLOG_BLCKSZ) == XLOG_BLCKSZ) - { - XLogLongPageHeader longhdr = (XLogLongPageHeader) buf.data; - - WalSegSz = longhdr->xlp_seg_size; - - if (IsValidWalSegSize(WalSegSz)) - { - /* successfully retrieved WAL segment size */ - ret_val = true; - } - else - fprintf(stderr, - "%s: WAL segment size must be a power of two between 1MB and 1GB, but the WAL file header specifies %d bytes\n", - progname, WalSegSz); - } - else - { - /* - * Don't complain loudly, this is to be expected for segments being - * created. - */ - if (errno != 0) - { - if (debug) - fprintf(stderr, "could not read file \"%s\": %s\n", - WALFilePath, strerror(errno)); - } - else - { - if (debug) - fprintf(stderr, "not enough data in file \"%s\"\n", - WALFilePath); - } - } - - fflush(stderr); - - close(fd); - return ret_val; -} - -/* - * CheckForExternalTrigger() - * - * Is there a trigger file? Sets global 'Failover' variable to indicate - * what kind of a trigger file it was. A "fast" trigger file is turned - * into a "smart" file as a side-effect. - */ -static void -CheckForExternalTrigger(void) -{ - char buf[32]; - int fd; - int len; - - /* - * Look for a trigger file, if that option has been selected - * - * We use stat() here because triggerPath is always a file rather than - * potentially being in an archive - */ - if (!triggerPath || stat(triggerPath, &stat_buf) != 0) - return; - - /* - * An empty trigger file performs smart failover. There's a little race - * condition here: if the writer of the trigger file has just created the - * file, but not yet written anything to it, we'll treat that as smart - * shutdown even if the other process was just about to write "fast" to - * it. But that's fine: we'll restore one more WAL file, and when we're - * invoked next time, we'll see the word "fast" and fail over immediately. - */ - if (stat_buf.st_size == 0) - { - Failover = SmartFailover; - fprintf(stderr, "trigger file found: smart failover\n"); - fflush(stderr); - return; - } - - if ((fd = open(triggerPath, O_RDWR, 0)) < 0) - { - fprintf(stderr, "WARNING: could not open \"%s\": %s\n", - triggerPath, strerror(errno)); - fflush(stderr); - return; - } - - if ((len = read(fd, buf, sizeof(buf) - 1)) < 0) - { - fprintf(stderr, "WARNING: could not read \"%s\": %s\n", - triggerPath, strerror(errno)); - fflush(stderr); - close(fd); - return; - } - buf[len] = '\0'; - - if (strncmp(buf, "smart", 5) == 0) - { - Failover = SmartFailover; - fprintf(stderr, "trigger file found: smart failover\n"); - fflush(stderr); - close(fd); - return; - } - - if (strncmp(buf, "fast", 4) == 0) - { - Failover = FastFailover; - - fprintf(stderr, "trigger file found: fast failover\n"); - fflush(stderr); - - /* - * Turn it into a "smart" trigger by truncating the file. Otherwise if - * the server asks us again to restore a segment that was restored - * already, we would return "not found" and upset the server. - */ - if (ftruncate(fd, 0) < 0) - { - fprintf(stderr, "WARNING: could not read \"%s\": %s\n", - triggerPath, strerror(errno)); - fflush(stderr); - } - close(fd); - - return; - } - close(fd); - - fprintf(stderr, "WARNING: invalid content in \"%s\"\n", triggerPath); - fflush(stderr); -} - -/* - * RestoreWALFileForRecovery() - * - * Perform the action required to restore the file from archive - */ -static bool -RestoreWALFileForRecovery(void) -{ - int rc = 0; - int numretries = 0; - - if (debug) - { - fprintf(stderr, "running restore: "); - fflush(stderr); - } - - while (numretries <= maxretries) - { - rc = system(restoreCommand); - if (rc == 0) - { - if (debug) - { - fprintf(stderr, "OK\n"); - fflush(stderr); - } - return true; - } - pg_usleep(numretries++ * sleeptime * 1000000L); - } - - /* - * Allow caller to add additional info - */ - if (debug) - fprintf(stderr, "not restored\n"); - return false; -} - -static void -usage(void) -{ - printf("%s allows PostgreSQL warm standby servers to be configured.\n\n", progname); - printf("Usage:\n"); - printf(" %s [OPTION]... ARCHIVELOCATION NEXTWALFILE XLOGFILEPATH [RESTARTWALFILE]\n", progname); - printf("\nOptions:\n"); - printf(" -c copy file from archive (default)\n"); - printf(" -d generate lots of debugging output (testing only)\n"); - printf(" -k NUMFILESTOKEEP if RESTARTWALFILE is not used, remove files prior to limit\n" - " (0 keeps all)\n"); - printf(" -l does nothing; use of link is now deprecated\n"); - printf(" -r MAXRETRIES max number of times to retry, with progressive wait\n" - " (default=3)\n"); - printf(" -s SLEEPTIME seconds to wait between file checks (min=1, max=60,\n" - " default=5)\n"); - printf(" -t TRIGGERFILE trigger file to initiate failover (no default)\n"); - printf(" -V, --version output version information, then exit\n"); - printf(" -w MAXWAITTIME max seconds to wait for a file (0=no limit) (default=0)\n"); - printf(" -?, --help show this help, then exit\n"); - printf("\n" - "Main intended use as restore_command in postgresql.conf:\n" - " restore_command = 'pg_standby [OPTION]... ARCHIVELOCATION %%f %%p %%r'\n" - "e.g.\n" - " restore_command = 'pg_standby /mnt/server/archiverdir %%f %%p %%r'\n"); - printf("\nReport bugs to <%s>.\n", PACKAGE_BUGREPORT); - printf("%s home page: <%s>\n", PACKAGE_NAME, PACKAGE_URL); -} - -#ifndef WIN32 -static void -sighandler(int sig) -{ - signaled = true; -} - -/* We don't want SIGQUIT to core dump */ -static void -sigquit_handler(int sig) -{ - pqsignal(SIGINT, SIG_DFL); - kill(getpid(), SIGINT); -} -#endif - -/*------------ MAIN ----------------------------------------*/ -int -main(int argc, char **argv) -{ - int c; - - progname = get_progname(argv[0]); - - if (argc > 1) - { - if (strcmp(argv[1], "--help") == 0 || strcmp(argv[1], "-?") == 0) - { - usage(); - exit(0); - } - if (strcmp(argv[1], "--version") == 0 || strcmp(argv[1], "-V") == 0) - { - puts("pg_standby (PostgreSQL) " PG_VERSION); - exit(0); - } - } - -#ifndef WIN32 - - /* - * You can send SIGUSR1 to trigger failover. - * - * Postmaster uses SIGQUIT to request immediate shutdown. The default - * action is to core dump, but we don't want that, so trap it and commit - * suicide without core dump. - * - * We used to use SIGINT and SIGQUIT to trigger failover, but that turned - * out to be a bad idea because postmaster uses SIGQUIT to request - * immediate shutdown. We still trap SIGINT, but that may change in a - * future release. - * - * There's no way to trigger failover via signal on Windows. - */ - (void) pqsignal(SIGUSR1, sighandler); - (void) pqsignal(SIGINT, sighandler); /* deprecated, use SIGUSR1 */ - (void) pqsignal(SIGQUIT, sigquit_handler); -#endif - - while ((c = getopt(argc, argv, "cdk:lr:s:t:w:")) != -1) - { - switch (c) - { - case 'c': /* Use copy */ - restoreCommandType = RESTORE_COMMAND_COPY; - break; - case 'd': /* Debug mode */ - debug = true; - break; - case 'k': /* keepfiles */ - keepfiles = atoi(optarg); - if (keepfiles < 0) - { - fprintf(stderr, "%s: -k keepfiles must be >= 0\n", progname); - exit(2); - } - break; - case 'l': /* Use link */ - - /* - * Link feature disabled, possibly permanently. Linking causes - * a problem after recovery ends that is not currently - * resolved by PostgreSQL. 25 Jun 2009 - */ -#ifdef NOT_USED - restoreCommandType = RESTORE_COMMAND_LINK; -#endif - break; - case 'r': /* Retries */ - maxretries = atoi(optarg); - if (maxretries < 0) - { - fprintf(stderr, "%s: -r maxretries must be >= 0\n", progname); - exit(2); - } - break; - case 's': /* Sleep time */ - sleeptime = atoi(optarg); - if (sleeptime <= 0 || sleeptime > 60) - { - fprintf(stderr, "%s: -s sleeptime incorrectly set\n", progname); - exit(2); - } - break; - case 't': /* Trigger file */ - triggerPath = pg_strdup(optarg); - break; - case 'w': /* Max wait time */ - maxwaittime = atoi(optarg); - if (maxwaittime < 0) - { - fprintf(stderr, "%s: -w maxwaittime incorrectly set\n", progname); - exit(2); - } - break; - default: - fprintf(stderr, "Try \"%s --help\" for more information.\n", progname); - exit(2); - break; - } - } - - /* - * Parameter checking - after checking to see if trigger file present - */ - if (argc == 1) - { - fprintf(stderr, "%s: not enough command-line arguments\n", progname); - exit(2); - } - - /* - * We will go to the archiveLocation to get nextWALFileName. - * nextWALFileName may not exist yet, which would not be an error, so we - * separate the archiveLocation and nextWALFileName so we can check - * separately whether archiveLocation exists, if not that is an error - */ - if (optind < argc) - { - archiveLocation = argv[optind]; - optind++; - } - else - { - fprintf(stderr, "%s: must specify archive location\n", progname); - fprintf(stderr, "Try \"%s --help\" for more information.\n", progname); - exit(2); - } - - if (optind < argc) - { - nextWALFileName = argv[optind]; - optind++; - } - else - { - fprintf(stderr, "%s: must specify WAL file name as second non-option argument (use \"%%f\")\n", progname); - fprintf(stderr, "Try \"%s --help\" for more information.\n", progname); - exit(2); - } - - if (optind < argc) - { - xlogFilePath = argv[optind]; - optind++; - } - else - { - fprintf(stderr, "%s: must specify xlog destination as third non-option argument (use \"%%p\")\n", progname); - fprintf(stderr, "Try \"%s --help\" for more information.\n", progname); - exit(2); - } - - if (optind < argc) - { - restartWALFileName = argv[optind]; - optind++; - } - - CustomizableInitialize(); - - if (debug) - { - fprintf(stderr, "Trigger file: %s\n", triggerPath ? triggerPath : ""); - fprintf(stderr, "Waiting for WAL file: %s\n", nextWALFileName); - fprintf(stderr, "WAL file path: %s\n", WALFilePath); - fprintf(stderr, "Restoring to: %s\n", xlogFilePath); - fprintf(stderr, "Sleep interval: %d second%s\n", - sleeptime, (sleeptime > 1 ? "s" : " ")); - fprintf(stderr, "Max wait interval: %d %s\n", - maxwaittime, (maxwaittime > 0 ? "seconds" : "forever")); - fprintf(stderr, "Command for restore: %s\n", restoreCommand); - fflush(stderr); - } - - /* - * Check for initial history file: always the first file to be requested - * It's OK if the file isn't there - all other files need to wait - */ - if (IsTLHistoryFileName(nextWALFileName)) - { - nextWALFileType = XLOG_HISTORY; - if (RestoreWALFileForRecovery()) - exit(0); - else - { - if (debug) - { - fprintf(stderr, "history file not found\n"); - fflush(stderr); - } - exit(1); - } - } - - /* - * Main wait loop - */ - for (;;) - { - /* Check for trigger file or signal first */ - CheckForExternalTrigger(); -#ifndef WIN32 - if (signaled) - { - Failover = FastFailover; - if (debug) - { - fprintf(stderr, "signaled to exit: fast failover\n"); - fflush(stderr); - } - } -#endif - - /* - * Check for fast failover immediately, before checking if the - * requested WAL file is available - */ - if (Failover == FastFailover) - exit(1); - - if (CustomizableNextWALFileReady()) - { - /* - * Once we have restored this file successfully we can remove some - * prior WAL files. If this restore fails we mustn't remove any - * file because some of them will be requested again immediately - * after the failed restore, or when we restart recovery. - */ - if (RestoreWALFileForRecovery()) - { - if (need_cleanup) - CustomizableCleanupPriorWALFiles(); - - exit(0); - } - else - { - /* Something went wrong in copying the file */ - exit(1); - } - } - - /* Check for smart failover if the next WAL file was not available */ - if (Failover == SmartFailover) - exit(1); - - if (sleeptime <= 60) - pg_usleep(sleeptime * 1000000L); - - waittime += sleeptime; - if (waittime >= maxwaittime && maxwaittime > 0) - { - Failover = FastFailover; - if (debug) - { - fprintf(stderr, "Timed out after %d seconds: fast failover\n", - waittime); - fflush(stderr); - } - } - if (debug) - { - fprintf(stderr, "WAL file not present yet."); - if (triggerPath) - fprintf(stderr, " Checking for trigger file..."); - fprintf(stderr, "\n"); - fflush(stderr); - } - } -} diff --git a/contrib/pg_stat_statements/pg_stat_statements.c b/contrib/pg_stat_statements/pg_stat_statements.c index 196e1e214204f..62cccbfa44dbd 100644 --- a/contrib/pg_stat_statements/pg_stat_statements.c +++ b/contrib/pg_stat_statements/pg_stat_statements.c @@ -49,7 +49,7 @@ * in the file to be read or written while holding only shared lock. * * - * Copyright (c) 2008-2020, PostgreSQL Global Development Group + * Copyright (c) 2008-2021, PostgreSQL Global Development Group * * IDENTIFICATION * contrib/pg_stat_statements/pg_stat_statements.c @@ -1898,6 +1898,11 @@ pg_stat_statements_info(PG_FUNCTION_ARGS) Datum values[PG_STAT_STATEMENTS_INFO_COLS]; bool nulls[PG_STAT_STATEMENTS_INFO_COLS]; + if (!pgss || !pgss_hash) + ereport(ERROR, + (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), + errmsg("pg_stat_statements must be loaded via shared_preload_libraries"))); + /* Build a tuple descriptor for our result type */ if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE) elog(ERROR, "return type must be a row type"); diff --git a/contrib/pg_surgery/heap_surgery.c b/contrib/pg_surgery/heap_surgery.c index eb96b4bb36d84..d31e5f31fd42a 100644 --- a/contrib/pg_surgery/heap_surgery.c +++ b/contrib/pg_surgery/heap_surgery.c @@ -3,7 +3,7 @@ * heap_surgery.c * Functions to perform surgery on the damaged heap table. * - * Copyright (c) 2020, PostgreSQL Global Development Group + * Copyright (c) 2020-2021, PostgreSQL Global Development Group * * IDENTIFICATION * contrib/pg_surgery/heap_surgery.c diff --git a/contrib/pg_surgery/pg_surgery--1.0.sql b/contrib/pg_surgery/pg_surgery--1.0.sql index 2ae7f228c74b8..d1e53a07bc9d5 100644 --- a/contrib/pg_surgery/pg_surgery--1.0.sql +++ b/contrib/pg_surgery/pg_surgery--1.0.sql @@ -15,4 +15,4 @@ RETURNS VOID AS 'MODULE_PATHNAME', 'heap_force_freeze' LANGUAGE C STRICT; -REVOKE EXECUTE ON FUNCTION heap_force_freeze(regclass, tid[]) FROM PUBLIC; \ No newline at end of file +REVOKE EXECUTE ON FUNCTION heap_force_freeze(regclass, tid[]) FROM PUBLIC; diff --git a/contrib/pg_trgm/trgm_regexp.c b/contrib/pg_trgm/trgm_regexp.c index 21e8a9f343514..1e4f0121f3d97 100644 --- a/contrib/pg_trgm/trgm_regexp.c +++ b/contrib/pg_trgm/trgm_regexp.c @@ -181,7 +181,7 @@ * 7) Mark state 3 final because state 5 of source NFA is marked as final. * * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION diff --git a/contrib/pg_visibility/expected/pg_visibility.out b/contrib/pg_visibility/expected/pg_visibility.out index ca4b6e186bcaf..315633bfea66c 100644 --- a/contrib/pg_visibility/expected/pg_visibility.out +++ b/contrib/pg_visibility/expected/pg_visibility.out @@ -105,7 +105,7 @@ ERROR: "test_foreign_table" is not a table, materialized view, or TOAST table create table regular_table (a int, b text); alter table regular_table alter column b set storage external; insert into regular_table values (1, repeat('one', 1000)), (2, repeat('two', 1000)); -vacuum regular_table; +vacuum (disable_page_skipping) regular_table; select count(*) > 0 from pg_visibility('regular_table'); ?column? ---------- @@ -132,7 +132,7 @@ select count(*) > 0 from pg_visibility((select reltoastrelid from pg_class where (1 row) create materialized view matview_visibility_test as select * from regular_table; -vacuum matview_visibility_test; +vacuum (disable_page_skipping) matview_visibility_test; select count(*) > 0 from pg_visibility('matview_visibility_test'); ?column? ---------- @@ -149,7 +149,7 @@ select count(*) > 0 from pg_visibility('matview_visibility_test'); -- regular tables which are part of a partition *do* have visibility maps insert into test_partition values (1); -vacuum test_partition; +vacuum (disable_page_skipping) test_partition; select count(*) > 0 from pg_visibility('test_partition', 0); ?column? ---------- @@ -179,6 +179,69 @@ select pg_truncate_visibility_map('test_partition'); (1 row) +-- test copy freeze +create table copyfreeze (a int, b char(1500)); +-- load all rows via COPY FREEZE and ensure that all pages are set all-visible +-- and all-frozen. +begin; +truncate copyfreeze; +copy copyfreeze from stdin freeze; +commit; +select * from pg_visibility_map('copyfreeze'); + blkno | all_visible | all_frozen +-------+-------------+------------ + 0 | t | t + 1 | t | t + 2 | t | t +(3 rows) + +select * from pg_check_frozen('copyfreeze'); + t_ctid +-------- +(0 rows) + +-- load half the rows via regular COPY and rest via COPY FREEZE. The pages +-- which are touched by regular COPY must not be set all-visible/all-frozen. On +-- the other hand, pages allocated by COPY FREEZE should be marked +-- all-frozen/all-visible. +begin; +truncate copyfreeze; +copy copyfreeze from stdin; +copy copyfreeze from stdin freeze; +commit; +select * from pg_visibility_map('copyfreeze'); + blkno | all_visible | all_frozen +-------+-------------+------------ + 0 | f | f + 1 | f | f + 2 | t | t +(3 rows) + +select * from pg_check_frozen('copyfreeze'); + t_ctid +-------- +(0 rows) + +-- Try a mix of regular COPY and COPY FREEZE. +begin; +truncate copyfreeze; +copy copyfreeze from stdin freeze; +copy copyfreeze from stdin; +copy copyfreeze from stdin freeze; +commit; +select * from pg_visibility_map('copyfreeze'); + blkno | all_visible | all_frozen +-------+-------------+------------ + 0 | t | t + 1 | f | f + 2 | t | t +(3 rows) + +select * from pg_check_frozen('copyfreeze'); + t_ctid +-------- +(0 rows) + -- cleanup drop table test_partitioned; drop view test_view; @@ -188,3 +251,4 @@ drop server dummy_server; drop foreign data wrapper dummy; drop materialized view matview_visibility_test; drop table regular_table; +drop table copyfreeze; diff --git a/contrib/pg_visibility/pg_visibility.c b/contrib/pg_visibility/pg_visibility.c index 54e47b810fd2d..dd0c124e6255e 100644 --- a/contrib/pg_visibility/pg_visibility.c +++ b/contrib/pg_visibility/pg_visibility.c @@ -3,7 +3,7 @@ * pg_visibility.c * display visibility map information and page-level visibility bits * - * Copyright (c) 2016-2020, PostgreSQL Global Development Group + * Copyright (c) 2016-2021, PostgreSQL Global Development Group * * contrib/pg_visibility/pg_visibility.c *------------------------------------------------------------------------- diff --git a/contrib/pg_visibility/sql/pg_visibility.sql b/contrib/pg_visibility/sql/pg_visibility.sql index f79b54480b701..ff3538f9964a1 100644 --- a/contrib/pg_visibility/sql/pg_visibility.sql +++ b/contrib/pg_visibility/sql/pg_visibility.sql @@ -71,7 +71,7 @@ select pg_truncate_visibility_map('test_foreign_table'); create table regular_table (a int, b text); alter table regular_table alter column b set storage external; insert into regular_table values (1, repeat('one', 1000)), (2, repeat('two', 1000)); -vacuum regular_table; +vacuum (disable_page_skipping) regular_table; select count(*) > 0 from pg_visibility('regular_table'); select count(*) > 0 from pg_visibility((select reltoastrelid from pg_class where relname = 'regular_table')); truncate regular_table; @@ -79,7 +79,7 @@ select count(*) > 0 from pg_visibility('regular_table'); select count(*) > 0 from pg_visibility((select reltoastrelid from pg_class where relname = 'regular_table')); create materialized view matview_visibility_test as select * from regular_table; -vacuum matview_visibility_test; +vacuum (disable_page_skipping) matview_visibility_test; select count(*) > 0 from pg_visibility('matview_visibility_test'); insert into regular_table values (1), (2); refresh materialized view matview_visibility_test; @@ -87,13 +87,89 @@ select count(*) > 0 from pg_visibility('matview_visibility_test'); -- regular tables which are part of a partition *do* have visibility maps insert into test_partition values (1); -vacuum test_partition; +vacuum (disable_page_skipping) test_partition; select count(*) > 0 from pg_visibility('test_partition', 0); select count(*) > 0 from pg_visibility_map('test_partition'); select count(*) > 0 from pg_visibility_map_summary('test_partition'); select * from pg_check_frozen('test_partition'); -- hopefully none select pg_truncate_visibility_map('test_partition'); +-- test copy freeze +create table copyfreeze (a int, b char(1500)); + +-- load all rows via COPY FREEZE and ensure that all pages are set all-visible +-- and all-frozen. +begin; +truncate copyfreeze; +copy copyfreeze from stdin freeze; +1 '1' +2 '2' +3 '3' +4 '4' +5 '5' +6 '6' +7 '7' +8 '8' +9 '9' +10 '10' +11 '11' +12 '12' +\. +commit; +select * from pg_visibility_map('copyfreeze'); +select * from pg_check_frozen('copyfreeze'); + +-- load half the rows via regular COPY and rest via COPY FREEZE. The pages +-- which are touched by regular COPY must not be set all-visible/all-frozen. On +-- the other hand, pages allocated by COPY FREEZE should be marked +-- all-frozen/all-visible. +begin; +truncate copyfreeze; +copy copyfreeze from stdin; +1 '1' +2 '2' +3 '3' +4 '4' +5 '5' +6 '6' +\. +copy copyfreeze from stdin freeze; +7 '7' +8 '8' +9 '9' +10 '10' +11 '11' +12 '12' +\. +commit; +select * from pg_visibility_map('copyfreeze'); +select * from pg_check_frozen('copyfreeze'); + +-- Try a mix of regular COPY and COPY FREEZE. +begin; +truncate copyfreeze; +copy copyfreeze from stdin freeze; +1 '1' +2 '2' +3 '3' +4 '4' +5 '5' +\. +copy copyfreeze from stdin; +6 '6' +\. +copy copyfreeze from stdin freeze; +7 '7' +8 '8' +9 '9' +10 '10' +11 '11' +12 '12' +\. +commit; +select * from pg_visibility_map('copyfreeze'); +select * from pg_check_frozen('copyfreeze'); + -- cleanup drop table test_partitioned; drop view test_view; @@ -103,3 +179,4 @@ drop server dummy_server; drop foreign data wrapper dummy; drop materialized view matview_visibility_test; drop table regular_table; +drop table copyfreeze; diff --git a/contrib/pgcrypto/Makefile b/contrib/pgcrypto/Makefile index d881e85add8c7..c0b4f1fcf68f2 100644 --- a/contrib/pgcrypto/Makefile +++ b/contrib/pgcrypto/Makefile @@ -1,6 +1,6 @@ # contrib/pgcrypto/Makefile -INT_SRCS = sha1.c internal.c internal-sha2.c blf.c rijndael.c \ +INT_SRCS = internal.c internal-sha2.c blf.c rijndael.c \ pgp-mpi-internal.c imath.c INT_TESTS = sha2 @@ -10,8 +10,8 @@ OSSL_TESTS = sha2 des 3des cast5 ZLIB_TST = pgp-compression ZLIB_OFF_TST = pgp-zlib-DISABLED -CF_SRCS = $(if $(subst no,,$(with_openssl)), $(OSSL_SRCS), $(INT_SRCS)) -CF_TESTS = $(if $(subst no,,$(with_openssl)), $(OSSL_TESTS), $(INT_TESTS)) +CF_SRCS = $(if $(subst openssl,,$(with_ssl)), $(INT_SRCS), $(OSSL_SRCS)) +CF_TESTS = $(if $(subst openssl,,$(with_ssl)), $(INT_TESTS), $(OSSL_TESTS)) CF_PGP_TESTS = $(if $(subst no,,$(with_zlib)), $(ZLIB_TST), $(ZLIB_OFF_TST)) SRCS = \ diff --git a/contrib/pgcrypto/imath.c b/contrib/pgcrypto/imath.c index 9deaa797c1a0f..0bfa080fa5502 100644 --- a/contrib/pgcrypto/imath.c +++ b/contrib/pgcrypto/imath.c @@ -29,7 +29,7 @@ * * 4. Update this header comment. * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * * IDENTIFICATION * contrib/pgcrypto/imath.c diff --git a/contrib/pgcrypto/internal.c b/contrib/pgcrypto/internal.c index ea377bdf83ae9..ef6ce2fb1ef94 100644 --- a/contrib/pgcrypto/internal.c +++ b/contrib/pgcrypto/internal.c @@ -36,18 +36,10 @@ #include "blf.h" #include "px.h" #include "rijndael.h" -#include "sha1.h" #include "common/cryptohash.h" #include "common/md5.h" - -#ifndef SHA1_DIGEST_LENGTH -#ifdef SHA1_RESULTLEN -#define SHA1_DIGEST_LENGTH SHA1_RESULTLEN -#else -#define SHA1_DIGEST_LENGTH 20 -#endif -#endif +#include "common/sha1.h" #define SHA1_BLOCK_SIZE 64 #define MD5_BLOCK_SIZE 64 @@ -96,7 +88,8 @@ int_md5_update(PX_MD *h, const uint8 *data, unsigned dlen) { pg_cryptohash_ctx *ctx = (pg_cryptohash_ctx *) h->p.ptr; - pg_cryptohash_update(ctx, data, dlen); + if (pg_cryptohash_update(ctx, data, dlen) < 0) + elog(ERROR, "could not update %s context", "MD5"); } static void @@ -104,7 +97,8 @@ int_md5_reset(PX_MD *h) { pg_cryptohash_ctx *ctx = (pg_cryptohash_ctx *) h->p.ptr; - pg_cryptohash_init(ctx); + if (pg_cryptohash_init(ctx) < 0) + elog(ERROR, "could not initialize %s context", "MD5"); } static void @@ -112,7 +106,8 @@ int_md5_finish(PX_MD *h, uint8 *dst) { pg_cryptohash_ctx *ctx = (pg_cryptohash_ctx *) h->p.ptr; - pg_cryptohash_final(ctx, dst); + if (pg_cryptohash_final(ctx, dst) < 0) + elog(ERROR, "could not finalize %s context", "MD5"); } static void @@ -141,34 +136,36 @@ int_sha1_block_len(PX_MD *h) static void int_sha1_update(PX_MD *h, const uint8 *data, unsigned dlen) { - SHA1_CTX *ctx = (SHA1_CTX *) h->p.ptr; + pg_cryptohash_ctx *ctx = (pg_cryptohash_ctx *) h->p.ptr; - SHA1Update(ctx, data, dlen); + if (pg_cryptohash_update(ctx, data, dlen) < 0) + elog(ERROR, "could not update %s context", "SHA1"); } static void int_sha1_reset(PX_MD *h) { - SHA1_CTX *ctx = (SHA1_CTX *) h->p.ptr; + pg_cryptohash_ctx *ctx = (pg_cryptohash_ctx *) h->p.ptr; - SHA1Init(ctx); + if (pg_cryptohash_init(ctx) < 0) + elog(ERROR, "could not initialize %s context", "SHA1"); } static void int_sha1_finish(PX_MD *h, uint8 *dst) { - SHA1_CTX *ctx = (SHA1_CTX *) h->p.ptr; + pg_cryptohash_ctx *ctx = (pg_cryptohash_ctx *) h->p.ptr; - SHA1Final(dst, ctx); + if (pg_cryptohash_final(ctx, dst) < 0) + elog(ERROR, "could not finalize %s context", "SHA1"); } static void int_sha1_free(PX_MD *h) { - SHA1_CTX *ctx = (SHA1_CTX *) h->p.ptr; + pg_cryptohash_ctx *ctx = (pg_cryptohash_ctx *) h->p.ptr; - px_memset(ctx, 0, sizeof(*ctx)); - pfree(ctx); + pg_cryptohash_free(ctx); pfree(h); } @@ -196,9 +193,9 @@ init_md5(PX_MD *md) static void init_sha1(PX_MD *md) { - SHA1_CTX *ctx; + pg_cryptohash_ctx *ctx; - ctx = palloc0(sizeof(*ctx)); + ctx = pg_cryptohash_create(PG_SHA1); md->p.ptr = ctx; diff --git a/contrib/pgcrypto/sha1.c b/contrib/pgcrypto/sha1.c deleted file mode 100644 index 64671ac64d9a5..0000000000000 --- a/contrib/pgcrypto/sha1.c +++ /dev/null @@ -1,331 +0,0 @@ -/* $KAME: sha1.c,v 1.3 2000/02/22 14:01:18 itojun Exp $ */ - -/* - * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. Neither the name of the project nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * contrib/pgcrypto/sha1.c - */ -/* - * FIPS pub 180-1: Secure Hash Algorithm (SHA-1) - * based on: http://www.itl.nist.gov/fipspubs/fip180-1.htm - * implemented by Jun-ichiro itojun Itoh - */ - -#include "postgres.h" - -#include - -#include "sha1.h" - -/* constant table */ -static uint32 _K[] = {0x5a827999, 0x6ed9eba1, 0x8f1bbcdc, 0xca62c1d6}; - -#define K(t) _K[(t) / 20] - -#define F0(b, c, d) (((b) & (c)) | ((~(b)) & (d))) -#define F1(b, c, d) (((b) ^ (c)) ^ (d)) -#define F2(b, c, d) (((b) & (c)) | ((b) & (d)) | ((c) & (d))) -#define F3(b, c, d) (((b) ^ (c)) ^ (d)) - -#define S(n, x) (((x) << (n)) | ((x) >> (32 - (n)))) - -#define H(n) (ctxt->h.b32[(n)]) -#define COUNT (ctxt->count) -#define BCOUNT (ctxt->c.b64[0] / 8) -#define W(n) (ctxt->m.b32[(n)]) - -#define PUTPAD(x) \ -do { \ - ctxt->m.b8[(COUNT % 64)] = (x); \ - COUNT++; \ - COUNT %= 64; \ - if (COUNT % 64 == 0) \ - sha1_step(ctxt); \ -} while (0) - -static void sha1_step(struct sha1_ctxt *); - -static void -sha1_step(struct sha1_ctxt *ctxt) -{ - uint32 a, - b, - c, - d, - e; - size_t t, - s; - uint32 tmp; - -#ifndef WORDS_BIGENDIAN - struct sha1_ctxt tctxt; - - memmove(&tctxt.m.b8[0], &ctxt->m.b8[0], 64); - ctxt->m.b8[0] = tctxt.m.b8[3]; - ctxt->m.b8[1] = tctxt.m.b8[2]; - ctxt->m.b8[2] = tctxt.m.b8[1]; - ctxt->m.b8[3] = tctxt.m.b8[0]; - ctxt->m.b8[4] = tctxt.m.b8[7]; - ctxt->m.b8[5] = tctxt.m.b8[6]; - ctxt->m.b8[6] = tctxt.m.b8[5]; - ctxt->m.b8[7] = tctxt.m.b8[4]; - ctxt->m.b8[8] = tctxt.m.b8[11]; - ctxt->m.b8[9] = tctxt.m.b8[10]; - ctxt->m.b8[10] = tctxt.m.b8[9]; - ctxt->m.b8[11] = tctxt.m.b8[8]; - ctxt->m.b8[12] = tctxt.m.b8[15]; - ctxt->m.b8[13] = tctxt.m.b8[14]; - ctxt->m.b8[14] = tctxt.m.b8[13]; - ctxt->m.b8[15] = tctxt.m.b8[12]; - ctxt->m.b8[16] = tctxt.m.b8[19]; - ctxt->m.b8[17] = tctxt.m.b8[18]; - ctxt->m.b8[18] = tctxt.m.b8[17]; - ctxt->m.b8[19] = tctxt.m.b8[16]; - ctxt->m.b8[20] = tctxt.m.b8[23]; - ctxt->m.b8[21] = tctxt.m.b8[22]; - ctxt->m.b8[22] = tctxt.m.b8[21]; - ctxt->m.b8[23] = tctxt.m.b8[20]; - ctxt->m.b8[24] = tctxt.m.b8[27]; - ctxt->m.b8[25] = tctxt.m.b8[26]; - ctxt->m.b8[26] = tctxt.m.b8[25]; - ctxt->m.b8[27] = tctxt.m.b8[24]; - ctxt->m.b8[28] = tctxt.m.b8[31]; - ctxt->m.b8[29] = tctxt.m.b8[30]; - ctxt->m.b8[30] = tctxt.m.b8[29]; - ctxt->m.b8[31] = tctxt.m.b8[28]; - ctxt->m.b8[32] = tctxt.m.b8[35]; - ctxt->m.b8[33] = tctxt.m.b8[34]; - ctxt->m.b8[34] = tctxt.m.b8[33]; - ctxt->m.b8[35] = tctxt.m.b8[32]; - ctxt->m.b8[36] = tctxt.m.b8[39]; - ctxt->m.b8[37] = tctxt.m.b8[38]; - ctxt->m.b8[38] = tctxt.m.b8[37]; - ctxt->m.b8[39] = tctxt.m.b8[36]; - ctxt->m.b8[40] = tctxt.m.b8[43]; - ctxt->m.b8[41] = tctxt.m.b8[42]; - ctxt->m.b8[42] = tctxt.m.b8[41]; - ctxt->m.b8[43] = tctxt.m.b8[40]; - ctxt->m.b8[44] = tctxt.m.b8[47]; - ctxt->m.b8[45] = tctxt.m.b8[46]; - ctxt->m.b8[46] = tctxt.m.b8[45]; - ctxt->m.b8[47] = tctxt.m.b8[44]; - ctxt->m.b8[48] = tctxt.m.b8[51]; - ctxt->m.b8[49] = tctxt.m.b8[50]; - ctxt->m.b8[50] = tctxt.m.b8[49]; - ctxt->m.b8[51] = tctxt.m.b8[48]; - ctxt->m.b8[52] = tctxt.m.b8[55]; - ctxt->m.b8[53] = tctxt.m.b8[54]; - ctxt->m.b8[54] = tctxt.m.b8[53]; - ctxt->m.b8[55] = tctxt.m.b8[52]; - ctxt->m.b8[56] = tctxt.m.b8[59]; - ctxt->m.b8[57] = tctxt.m.b8[58]; - ctxt->m.b8[58] = tctxt.m.b8[57]; - ctxt->m.b8[59] = tctxt.m.b8[56]; - ctxt->m.b8[60] = tctxt.m.b8[63]; - ctxt->m.b8[61] = tctxt.m.b8[62]; - ctxt->m.b8[62] = tctxt.m.b8[61]; - ctxt->m.b8[63] = tctxt.m.b8[60]; -#endif - - a = H(0); - b = H(1); - c = H(2); - d = H(3); - e = H(4); - - for (t = 0; t < 20; t++) - { - s = t & 0x0f; - if (t >= 16) - W(s) = S(1, W((s + 13) & 0x0f) ^ W((s + 8) & 0x0f) ^ W((s + 2) & 0x0f) ^ W(s)); - tmp = S(5, a) + F0(b, c, d) + e + W(s) + K(t); - e = d; - d = c; - c = S(30, b); - b = a; - a = tmp; - } - for (t = 20; t < 40; t++) - { - s = t & 0x0f; - W(s) = S(1, W((s + 13) & 0x0f) ^ W((s + 8) & 0x0f) ^ W((s + 2) & 0x0f) ^ W(s)); - tmp = S(5, a) + F1(b, c, d) + e + W(s) + K(t); - e = d; - d = c; - c = S(30, b); - b = a; - a = tmp; - } - for (t = 40; t < 60; t++) - { - s = t & 0x0f; - W(s) = S(1, W((s + 13) & 0x0f) ^ W((s + 8) & 0x0f) ^ W((s + 2) & 0x0f) ^ W(s)); - tmp = S(5, a) + F2(b, c, d) + e + W(s) + K(t); - e = d; - d = c; - c = S(30, b); - b = a; - a = tmp; - } - for (t = 60; t < 80; t++) - { - s = t & 0x0f; - W(s) = S(1, W((s + 13) & 0x0f) ^ W((s + 8) & 0x0f) ^ W((s + 2) & 0x0f) ^ W(s)); - tmp = S(5, a) + F3(b, c, d) + e + W(s) + K(t); - e = d; - d = c; - c = S(30, b); - b = a; - a = tmp; - } - - H(0) = H(0) + a; - H(1) = H(1) + b; - H(2) = H(2) + c; - H(3) = H(3) + d; - H(4) = H(4) + e; - - memset(&ctxt->m.b8[0], 0, 64); -} - -/*------------------------------------------------------------*/ - -void -sha1_init(struct sha1_ctxt *ctxt) -{ - memset(ctxt, 0, sizeof(struct sha1_ctxt)); - H(0) = 0x67452301; - H(1) = 0xefcdab89; - H(2) = 0x98badcfe; - H(3) = 0x10325476; - H(4) = 0xc3d2e1f0; -} - -void -sha1_pad(struct sha1_ctxt *ctxt) -{ - size_t padlen; /* pad length in bytes */ - size_t padstart; - - PUTPAD(0x80); - - padstart = COUNT % 64; - padlen = 64 - padstart; - if (padlen < 8) - { - memset(&ctxt->m.b8[padstart], 0, padlen); - COUNT += padlen; - COUNT %= 64; - sha1_step(ctxt); - padstart = COUNT % 64; /* should be 0 */ - padlen = 64 - padstart; /* should be 64 */ - } - memset(&ctxt->m.b8[padstart], 0, padlen - 8); - COUNT += (padlen - 8); - COUNT %= 64; -#ifdef WORDS_BIGENDIAN - PUTPAD(ctxt->c.b8[0]); - PUTPAD(ctxt->c.b8[1]); - PUTPAD(ctxt->c.b8[2]); - PUTPAD(ctxt->c.b8[3]); - PUTPAD(ctxt->c.b8[4]); - PUTPAD(ctxt->c.b8[5]); - PUTPAD(ctxt->c.b8[6]); - PUTPAD(ctxt->c.b8[7]); -#else - PUTPAD(ctxt->c.b8[7]); - PUTPAD(ctxt->c.b8[6]); - PUTPAD(ctxt->c.b8[5]); - PUTPAD(ctxt->c.b8[4]); - PUTPAD(ctxt->c.b8[3]); - PUTPAD(ctxt->c.b8[2]); - PUTPAD(ctxt->c.b8[1]); - PUTPAD(ctxt->c.b8[0]); -#endif -} - -void -sha1_loop(struct sha1_ctxt *ctxt, const uint8 *input0, size_t len) -{ - const uint8 *input; - size_t gaplen; - size_t gapstart; - size_t off; - size_t copysiz; - - input = (const uint8 *) input0; - off = 0; - - while (off < len) - { - gapstart = COUNT % 64; - gaplen = 64 - gapstart; - - copysiz = (gaplen < len - off) ? gaplen : len - off; - memmove(&ctxt->m.b8[gapstart], &input[off], copysiz); - COUNT += copysiz; - COUNT %= 64; - ctxt->c.b64[0] += copysiz * 8; - if (COUNT % 64 == 0) - sha1_step(ctxt); - off += copysiz; - } -} - -void -sha1_result(struct sha1_ctxt *ctxt, uint8 *digest0) -{ - uint8 *digest; - - digest = (uint8 *) digest0; - sha1_pad(ctxt); -#ifdef WORDS_BIGENDIAN - memmove(digest, &ctxt->h.b8[0], 20); -#else - digest[0] = ctxt->h.b8[3]; - digest[1] = ctxt->h.b8[2]; - digest[2] = ctxt->h.b8[1]; - digest[3] = ctxt->h.b8[0]; - digest[4] = ctxt->h.b8[7]; - digest[5] = ctxt->h.b8[6]; - digest[6] = ctxt->h.b8[5]; - digest[7] = ctxt->h.b8[4]; - digest[8] = ctxt->h.b8[11]; - digest[9] = ctxt->h.b8[10]; - digest[10] = ctxt->h.b8[9]; - digest[11] = ctxt->h.b8[8]; - digest[12] = ctxt->h.b8[15]; - digest[13] = ctxt->h.b8[14]; - digest[14] = ctxt->h.b8[13]; - digest[15] = ctxt->h.b8[12]; - digest[16] = ctxt->h.b8[19]; - digest[17] = ctxt->h.b8[18]; - digest[18] = ctxt->h.b8[17]; - digest[19] = ctxt->h.b8[16]; -#endif -} diff --git a/contrib/pgstattuple/pgstatapprox.c b/contrib/pgstattuple/pgstatapprox.c index 23306e11a78d6..1fe193bb256f2 100644 --- a/contrib/pgstattuple/pgstatapprox.c +++ b/contrib/pgstattuple/pgstatapprox.c @@ -3,7 +3,7 @@ * pgstatapprox.c * Bloat estimation functions * - * Copyright (c) 2014-2020, PostgreSQL Global Development Group + * Copyright (c) 2014-2021, PostgreSQL Global Development Group * * IDENTIFICATION * contrib/pgstattuple/pgstatapprox.c diff --git a/contrib/postgres_fdw/Makefile b/contrib/postgres_fdw/Makefile index ee8a80a3921e5..c1b0cad453f25 100644 --- a/contrib/postgres_fdw/Makefile +++ b/contrib/postgres_fdw/Makefile @@ -14,7 +14,7 @@ PG_CPPFLAGS = -I$(libpq_srcdir) SHLIB_LINK_INTERNAL = $(libpq) EXTENSION = postgres_fdw -DATA = postgres_fdw--1.0.sql +DATA = postgres_fdw--1.0.sql postgres_fdw--1.0--1.1.sql REGRESS = postgres_fdw diff --git a/contrib/postgres_fdw/connection.c b/contrib/postgres_fdw/connection.c index d841cec39b563..ee0b4acf0bad6 100644 --- a/contrib/postgres_fdw/connection.c +++ b/contrib/postgres_fdw/connection.c @@ -3,7 +3,7 @@ * connection.c * Connection management functions for postgres_fdw * - * Portions Copyright (c) 2012-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 2012-2021, PostgreSQL Global Development Group * * IDENTIFICATION * contrib/postgres_fdw/connection.c @@ -16,12 +16,14 @@ #include "access/xact.h" #include "catalog/pg_user_mapping.h" #include "commands/defrem.h" +#include "funcapi.h" #include "mb/pg_wchar.h" #include "miscadmin.h" #include "pgstat.h" #include "postgres_fdw.h" #include "storage/fd.h" #include "storage/latch.h" +#include "utils/builtins.h" #include "utils/datetime.h" #include "utils/hsearch.h" #include "utils/inval.h" @@ -57,6 +59,7 @@ typedef struct ConnCacheEntry bool have_error; /* have any subxacts aborted in this xact? */ bool changing_xact_state; /* xact state change in process */ bool invalidated; /* true if reconnect is pending */ + Oid serverid; /* foreign server OID used to get server name */ uint32 server_hashvalue; /* hash value of foreign server OID */ uint32 mapping_hashvalue; /* hash value of user mapping OID */ } ConnCacheEntry; @@ -73,6 +76,13 @@ static unsigned int prep_stmt_number = 0; /* tracks whether any work is needed in callback functions */ static bool xact_got_connection = false; +/* + * SQL functions + */ +PG_FUNCTION_INFO_V1(postgres_fdw_get_connections); +PG_FUNCTION_INFO_V1(postgres_fdw_disconnect); +PG_FUNCTION_INFO_V1(postgres_fdw_disconnect_all); + /* prototypes of private functions */ static void make_new_connection(ConnCacheEntry *entry, UserMapping *user); static PGconn *connect_pg_server(ForeignServer *server, UserMapping *user); @@ -94,6 +104,7 @@ static bool pgfdw_exec_cleanup_query(PGconn *conn, const char *query, static bool pgfdw_get_cleanup_result(PGconn *conn, TimestampTz endtime, PGresult **result); static bool UserMappingPasswordRequired(UserMapping *user); +static bool disconnect_cached_connections(Oid serverid); /* * Get a PGconn which can be used to execute queries on the remote PostgreSQL @@ -273,6 +284,7 @@ make_new_connection(ConnCacheEntry *entry, UserMapping *user) entry->have_error = false; entry->changing_xact_state = false; entry->invalidated = false; + entry->serverid = server->serverid; entry->server_hashvalue = GetSysCacheHashValue1(FOREIGNSERVEROID, ObjectIdGetDatum(server->serverid)); @@ -1138,8 +1150,6 @@ pgfdw_inval_callback(Datum arg, int cacheid, uint32 hashvalue) static void pgfdw_reject_incomplete_xact_state_change(ConnCacheEntry *entry) { - HeapTuple tup; - Form_pg_user_mapping umform; ForeignServer *server; /* nothing to do for inactive entries and entries of sane state */ @@ -1150,13 +1160,7 @@ pgfdw_reject_incomplete_xact_state_change(ConnCacheEntry *entry) disconnect_pg_server(entry); /* find server name to be shown in the message below */ - tup = SearchSysCache1(USERMAPPINGOID, - ObjectIdGetDatum(entry->key)); - if (!HeapTupleIsValid(tup)) - elog(ERROR, "cache lookup failed for user mapping %u", entry->key); - umform = (Form_pg_user_mapping) GETSTRUCT(tup); - server = GetForeignServer(umform->umserver); - ReleaseSysCache(tup); + server = GetForeignServer(entry->serverid); ereport(ERROR, (errcode(ERRCODE_CONNECTION_EXCEPTION), @@ -1341,3 +1345,257 @@ exit: ; *result = last_res; return timed_out; } + +/* + * List active foreign server connections. + * + * This function takes no input parameter and returns setof record made of + * following values: + * - server_name - server name of active connection. In case the foreign server + * is dropped but still the connection is active, then the server name will + * be NULL in output. + * - valid - true/false representing whether the connection is valid or not. + * Note that the connections can get invalidated in pgfdw_inval_callback. + * + * No records are returned when there are no cached connections at all. + */ +Datum +postgres_fdw_get_connections(PG_FUNCTION_ARGS) +{ +#define POSTGRES_FDW_GET_CONNECTIONS_COLS 2 + ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo; + TupleDesc tupdesc; + Tuplestorestate *tupstore; + MemoryContext per_query_ctx; + MemoryContext oldcontext; + HASH_SEQ_STATUS scan; + ConnCacheEntry *entry; + + /* check to see if caller supports us returning a tuplestore */ + if (rsinfo == NULL || !IsA(rsinfo, ReturnSetInfo)) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("set-valued function called in context that cannot accept a set"))); + if (!(rsinfo->allowedModes & SFRM_Materialize)) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("materialize mode required, but it is not allowed in this context"))); + + /* Build a tuple descriptor for our result type */ + if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE) + elog(ERROR, "return type must be a row type"); + + /* Build tuplestore to hold the result rows */ + per_query_ctx = rsinfo->econtext->ecxt_per_query_memory; + oldcontext = MemoryContextSwitchTo(per_query_ctx); + + tupstore = tuplestore_begin_heap(true, false, work_mem); + rsinfo->returnMode = SFRM_Materialize; + rsinfo->setResult = tupstore; + rsinfo->setDesc = tupdesc; + + MemoryContextSwitchTo(oldcontext); + + /* If cache doesn't exist, we return no records */ + if (!ConnectionHash) + { + /* clean up and return the tuplestore */ + tuplestore_donestoring(tupstore); + + PG_RETURN_VOID(); + } + + hash_seq_init(&scan, ConnectionHash); + while ((entry = (ConnCacheEntry *) hash_seq_search(&scan))) + { + ForeignServer *server; + Datum values[POSTGRES_FDW_GET_CONNECTIONS_COLS]; + bool nulls[POSTGRES_FDW_GET_CONNECTIONS_COLS]; + + /* We only look for open remote connections */ + if (!entry->conn) + continue; + + server = GetForeignServerExtended(entry->serverid, FSV_MISSING_OK); + + MemSet(values, 0, sizeof(values)); + MemSet(nulls, 0, sizeof(nulls)); + + /* + * The foreign server may have been dropped in current explicit + * transaction. It is not possible to drop the server from another + * session when the connection associated with it is in use in the + * current transaction, if tried so, the drop query in another session + * blocks until the current transaction finishes. + * + * Even though the server is dropped in the current transaction, the + * cache can still have associated active connection entry, say we + * call such connections dangling. Since we can not fetch the server + * name from system catalogs for dangling connections, instead we show + * NULL value for server name in output. + * + * We could have done better by storing the server name in the cache + * entry instead of server oid so that it could be used in the output. + * But the server name in each cache entry requires 64 bytes of + * memory, which is huge, when there are many cached connections and + * the use case i.e. dropping the foreign server within the explicit + * current transaction seems rare. So, we chose to show NULL value for + * server name in output. + * + * Such dangling connections get closed either in next use or at the + * end of current explicit transaction in pgfdw_xact_callback. + */ + if (!server) + { + /* + * If the server has been dropped in the current explicit + * transaction, then this entry would have been invalidated in + * pgfdw_inval_callback at the end of drop server command. Note + * that this connection would not have been closed in + * pgfdw_inval_callback because it is still being used in the + * current explicit transaction. So, assert that here. + */ + Assert(entry->conn && entry->xact_depth > 0 && entry->invalidated); + + /* Show null, if no server name was found */ + nulls[0] = true; + } + else + values[0] = CStringGetTextDatum(server->servername); + + values[1] = BoolGetDatum(!entry->invalidated); + + tuplestore_putvalues(tupstore, tupdesc, values, nulls); + } + + /* clean up and return the tuplestore */ + tuplestore_donestoring(tupstore); + + PG_RETURN_VOID(); +} + +/* + * Disconnect the specified cached connections. + * + * This function discards the open connections that are established by + * postgres_fdw from the local session to the foreign server with + * the given name. Note that there can be multiple connections to + * the given server using different user mappings. If the connections + * are used in the current local transaction, they are not disconnected + * and warning messages are reported. This function returns true + * if it disconnects at least one connection, otherwise false. If no + * foreign server with the given name is found, an error is reported. + */ +Datum +postgres_fdw_disconnect(PG_FUNCTION_ARGS) +{ + ForeignServer *server; + char *servername; + + servername = text_to_cstring(PG_GETARG_TEXT_PP(0)); + server = GetForeignServerByName(servername, false); + + PG_RETURN_BOOL(disconnect_cached_connections(server->serverid)); +} + +/* + * Disconnect all the cached connections. + * + * This function discards all the open connections that are established by + * postgres_fdw from the local session to the foreign servers. + * If the connections are used in the current local transaction, they are + * not disconnected and warning messages are reported. This function + * returns true if it disconnects at least one connection, otherwise false. + */ +Datum +postgres_fdw_disconnect_all(PG_FUNCTION_ARGS) +{ + PG_RETURN_BOOL(disconnect_cached_connections(InvalidOid)); +} + +/* + * Workhorse to disconnect cached connections. + * + * This function scans all the connection cache entries and disconnects + * the open connections whose foreign server OID matches with + * the specified one. If InvalidOid is specified, it disconnects all + * the cached connections. + * + * This function emits a warning for each connection that's used in + * the current transaction and doesn't close it. It returns true if + * it disconnects at least one connection, otherwise false. + * + * Note that this function disconnects even the connections that are + * established by other users in the same local session using different + * user mappings. This leads even non-superuser to be able to close + * the connections established by superusers in the same local session. + * + * XXX As of now we don't see any security risk doing this. But we should + * set some restrictions on that, for example, prevent non-superuser + * from closing the connections established by superusers even + * in the same session? + */ +static bool +disconnect_cached_connections(Oid serverid) +{ + HASH_SEQ_STATUS scan; + ConnCacheEntry *entry; + bool all = !OidIsValid(serverid); + bool result = false; + + /* + * Connection cache hashtable has not been initialized yet in this + * session, so return false. + */ + if (!ConnectionHash) + return false; + + hash_seq_init(&scan, ConnectionHash); + while ((entry = (ConnCacheEntry *) hash_seq_search(&scan))) + { + /* Ignore cache entry if no open connection right now. */ + if (!entry->conn) + continue; + + if (all || entry->serverid == serverid) + { + /* + * Emit a warning because the connection to close is used in the + * current transaction and cannot be disconnected right now. + */ + if (entry->xact_depth > 0) + { + ForeignServer *server; + + server = GetForeignServerExtended(entry->serverid, + FSV_MISSING_OK); + + if (!server) + { + /* + * If the foreign server was dropped while its connection + * was used in the current transaction, the connection + * must have been marked as invalid by + * pgfdw_inval_callback at the end of DROP SERVER command. + */ + Assert(entry->invalidated); + + ereport(WARNING, + (errmsg("cannot close dropped server connection because it is still in use"))); + } + else + ereport(WARNING, + (errmsg("cannot close connection for server \"%s\" because it is still in use", + server->servername))); + } + else + { + elog(DEBUG3, "discarding connection %p", entry->conn); + disconnect_pg_server(entry); + result = true; + } + } + } + + return result; +} diff --git a/contrib/postgres_fdw/deparse.c b/contrib/postgres_fdw/deparse.c index ca2f9f321570f..6faf499f9a670 100644 --- a/contrib/postgres_fdw/deparse.c +++ b/contrib/postgres_fdw/deparse.c @@ -24,7 +24,7 @@ * with collations that match the remote table's columns, which we can * consider to be user error. * - * Portions Copyright (c) 2012-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 2012-2021, PostgreSQL Global Development Group * * IDENTIFICATION * contrib/postgres_fdw/deparse.c @@ -1705,13 +1705,16 @@ deparseRangeTblRef(StringInfo buf, PlannerInfo *root, RelOptInfo *foreignrel, * The statement text is appended to buf, and we also create an integer List * of the columns being retrieved by WITH CHECK OPTION or RETURNING (if any), * which is returned to *retrieved_attrs. + * + * This also stores end position of the VALUES clause, so that we can rebuild + * an INSERT for a batch of rows later. */ void deparseInsertSql(StringInfo buf, RangeTblEntry *rte, Index rtindex, Relation rel, List *targetAttrs, bool doNothing, List *withCheckOptionList, List *returningList, - List **retrieved_attrs) + List **retrieved_attrs, int *values_end_len) { AttrNumber pindex; bool first; @@ -1754,6 +1757,7 @@ deparseInsertSql(StringInfo buf, RangeTblEntry *rte, } else appendStringInfoString(buf, " DEFAULT VALUES"); + *values_end_len = buf->len; if (doNothing) appendStringInfoString(buf, " ON CONFLICT DO NOTHING"); @@ -1763,6 +1767,54 @@ deparseInsertSql(StringInfo buf, RangeTblEntry *rte, withCheckOptionList, returningList, retrieved_attrs); } +/* + * rebuild remote INSERT statement + * + * Provided a number of rows in a batch, builds INSERT statement with the + * right number of parameters. + */ +void +rebuildInsertSql(StringInfo buf, char *orig_query, + int values_end_len, int num_cols, + int num_rows) +{ + int i, j; + int pindex; + bool first; + + /* Make sure the values_end_len is sensible */ + Assert((values_end_len > 0) && (values_end_len <= strlen(orig_query))); + + /* Copy up to the end of the first record from the original query */ + appendBinaryStringInfo(buf, orig_query, values_end_len); + + /* + * Add records to VALUES clause (we already have parameters for the + * first row, so start at the right offset). + */ + pindex = num_cols + 1; + for (i = 0; i < num_rows; i++) + { + appendStringInfoString(buf, ", ("); + + first = true; + for (j = 0; j < num_cols; j++) + { + if (!first) + appendStringInfoString(buf, ", "); + first = false; + + appendStringInfo(buf, "$%d", pindex); + pindex++; + } + + appendStringInfoChar(buf, ')'); + } + + /* Copy stuff after VALUES clause from the original query */ + appendStringInfoString(buf, orig_query + values_end_len); +} + /* * deparse remote UPDATE statement * diff --git a/contrib/postgres_fdw/expected/postgres_fdw.out b/contrib/postgres_fdw/expected/postgres_fdw.out index c11092f8cc59b..b09dce63f5ffa 100644 --- a/contrib/postgres_fdw/expected/postgres_fdw.out +++ b/contrib/postgres_fdw/expected/postgres_fdw.out @@ -13,12 +13,17 @@ DO $d$ OPTIONS (dbname '$$||current_database()||$$', port '$$||current_setting('port')||$$' )$$; + EXECUTE $$CREATE SERVER loopback3 FOREIGN DATA WRAPPER postgres_fdw + OPTIONS (dbname '$$||current_database()||$$', + port '$$||current_setting('port')||$$' + )$$; END; $d$; CREATE USER MAPPING FOR public SERVER testserver1 OPTIONS (user 'value', password 'value'); CREATE USER MAPPING FOR CURRENT_USER SERVER loopback; CREATE USER MAPPING FOR CURRENT_USER SERVER loopback2; +CREATE USER MAPPING FOR public SERVER loopback3; -- =================================================================== -- create objects used through FDW loopback server -- =================================================================== @@ -129,6 +134,11 @@ CREATE FOREIGN TABLE ft6 ( c2 int NOT NULL, c3 text ) SERVER loopback2 OPTIONS (schema_name 'S 1', table_name 'T 4'); +CREATE FOREIGN TABLE ft7 ( + c1 int NOT NULL, + c2 int NOT NULL, + c3 text +) SERVER loopback3 OPTIONS (schema_name 'S 1', table_name 'T 4'); -- =================================================================== -- tests for validator -- =================================================================== @@ -199,7 +209,8 @@ ALTER FOREIGN TABLE ft2 ALTER COLUMN c1 OPTIONS (column_name 'C 1'); public | ft4 | loopback | (schema_name 'S 1', table_name 'T 3') | public | ft5 | loopback | (schema_name 'S 1', table_name 'T 4') | public | ft6 | loopback2 | (schema_name 'S 1', table_name 'T 4') | -(5 rows) + public | ft7 | loopback3 | (schema_name 'S 1', table_name 'T 4') | +(6 rows) -- Test that alteration of server options causes reconnection -- Remote's errors might be non-English, so hide them to ensure stable results @@ -3875,9 +3886,10 @@ EXPLAIN (VERBOSE, COSTS OFF) EXECUTE st7; ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- Insert on public.ft1 Remote SQL: INSERT INTO "S 1"."T 1"("C 1", c2, c3, c4, c5, c6, c7, c8) VALUES ($1, $2, $3, $4, $5, $6, $7, $8) + Batch Size: 1 -> Result Output: NULL::integer, 1001, 101, 'foo'::text, NULL::timestamp with time zone, NULL::timestamp without time zone, NULL::character varying, 'ft1 '::character(10), NULL::user_enum -(4 rows) +(5 rows) ALTER TABLE "S 1"."T 1" RENAME TO "T 0"; ALTER FOREIGN TABLE ft1 OPTIONS (SET table_name 'T 0'); @@ -3908,9 +3920,10 @@ EXPLAIN (VERBOSE, COSTS OFF) EXECUTE st7; ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- Insert on public.ft1 Remote SQL: INSERT INTO "S 1"."T 0"("C 1", c2, c3, c4, c5, c6, c7, c8) VALUES ($1, $2, $3, $4, $5, $6, $7, $8) + Batch Size: 1 -> Result Output: NULL::integer, 1001, 101, 'foo'::text, NULL::timestamp with time zone, NULL::timestamp without time zone, NULL::character varying, 'ft1 '::character(10), NULL::user_enum -(4 rows) +(5 rows) ALTER TABLE "S 1"."T 0" RENAME TO "T 1"; ALTER FOREIGN TABLE ft1 OPTIONS (SET table_name 'T 1'); @@ -4232,12 +4245,13 @@ INSERT INTO ft2 (c1,c2,c3) SELECT c1+1000,c2+100, c3 || c3 FROM ft2 LIMIT 20; -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- Insert on public.ft2 Remote SQL: INSERT INTO "S 1"."T 1"("C 1", c2, c3, c4, c5, c6, c7, c8) VALUES ($1, $2, $3, $4, $5, $6, $7, $8) + Batch Size: 1 -> Subquery Scan on "*SELECT*" Output: "*SELECT*"."?column?", "*SELECT*"."?column?_1", NULL::integer, "*SELECT*"."?column?_2", NULL::timestamp with time zone, NULL::timestamp without time zone, NULL::character varying, 'ft2 '::character(10), NULL::user_enum -> Foreign Scan on public.ft2 ft2_1 Output: (ft2_1.c1 + 1000), (ft2_1.c2 + 100), (ft2_1.c3 || ft2_1.c3) Remote SQL: SELECT "C 1", c2, c3 FROM "S 1"."T 1" LIMIT 20::bigint -(7 rows) +(8 rows) INSERT INTO ft2 (c1,c2,c3) SELECT c1+1000,c2+100, c3 || c3 FROM ft2 LIMIT 20; INSERT INTO ft2 (c1,c2,c3) @@ -5348,9 +5362,10 @@ INSERT INTO ft2 (c1,c2,c3) VALUES (1200,999,'foo') RETURNING tableoid::regclass; Insert on public.ft2 Output: (ft2.tableoid)::regclass Remote SQL: INSERT INTO "S 1"."T 1"("C 1", c2, c3, c4, c5, c6, c7, c8) VALUES ($1, $2, $3, $4, $5, $6, $7, $8) + Batch Size: 1 -> Result Output: 1200, 999, NULL::integer, 'foo'::text, NULL::timestamp with time zone, NULL::timestamp without time zone, NULL::character varying, 'ft2 '::character(10), NULL::user_enum -(5 rows) +(6 rows) INSERT INTO ft2 (c1,c2,c3) VALUES (1200,999,'foo') RETURNING tableoid::regclass; tableoid @@ -6200,9 +6215,10 @@ INSERT INTO rw_view VALUES (0, 5); -------------------------------------------------------------------------------- Insert on public.foreign_tbl Remote SQL: INSERT INTO public.base_tbl(a, b) VALUES ($1, $2) RETURNING a, b + Batch Size: 1 -> Result Output: 0, 5 -(4 rows) +(5 rows) INSERT INTO rw_view VALUES (0, 5); -- should fail ERROR: new row violates check option for view "rw_view" @@ -6213,9 +6229,10 @@ INSERT INTO rw_view VALUES (0, 15); -------------------------------------------------------------------------------- Insert on public.foreign_tbl Remote SQL: INSERT INTO public.base_tbl(a, b) VALUES ($1, $2) RETURNING a, b + Batch Size: 1 -> Result Output: 0, 15 -(4 rows) +(5 rows) INSERT INTO rw_view VALUES (0, 15); -- ok SELECT * FROM foreign_tbl; @@ -8911,7 +8928,7 @@ DO $d$ END; $d$; ERROR: invalid option "password" -HINT: Valid options in this context are: service, passfile, channel_binding, connect_timeout, dbname, host, hostaddr, port, options, application_name, keepalives, keepalives_idle, keepalives_interval, keepalives_count, tcp_user_timeout, sslmode, sslcompression, sslcert, sslkey, sslrootcert, sslcrl, requirepeer, ssl_min_protocol_version, ssl_max_protocol_version, gssencmode, krbsrvname, gsslib, target_session_attrs, use_remote_estimate, fdw_startup_cost, fdw_tuple_cost, extensions, updatable, fetch_size +HINT: Valid options in this context are: service, passfile, channel_binding, connect_timeout, dbname, host, hostaddr, port, options, application_name, keepalives, keepalives_idle, keepalives_interval, keepalives_count, tcp_user_timeout, sslmode, sslcompression, sslcert, sslkey, sslrootcert, sslcrl, requirepeer, ssl_min_protocol_version, ssl_max_protocol_version, gssencmode, krbsrvname, gsslib, target_session_attrs, use_remote_estimate, fdw_startup_cost, fdw_tuple_cost, extensions, updatable, fetch_size, batch_size CONTEXT: SQL statement "ALTER SERVER loopback_nopw OPTIONS (ADD password 'dummypw')" PL/pgSQL function inline_code_block line 3 at EXECUTE -- If we add a password for our user mapping instead, we should get a different @@ -9035,9 +9052,22 @@ ERROR: 08006 COMMIT; -- Clean up DROP PROCEDURE terminate_backend_and_wait(text); --- =================================================================== --- test connection invalidation cases --- =================================================================== +-- ============================================================================= +-- test connection invalidation cases and postgres_fdw_get_connections function +-- ============================================================================= +-- Let's ensure to close all the existing cached connections. +SELECT 1 FROM postgres_fdw_disconnect_all(); + ?column? +---------- + 1 +(1 row) + +-- No cached connections, so no records should be output. +SELECT server_name FROM postgres_fdw_get_connections() ORDER BY 1; + server_name +------------- +(0 rows) + -- This test case is for closing the connection in pgfdw_xact_callback BEGIN; -- Connection xact depth becomes 1 i.e. the connection is in midst of the xact. @@ -9047,9 +9077,324 @@ SELECT 1 FROM ft1 LIMIT 1; 1 (1 row) --- Connection is not closed at the end of the alter statement in --- pgfdw_inval_callback. That's because the connection is in midst of this --- xact, it is just marked as invalid. +SELECT 1 FROM ft7 LIMIT 1; + ?column? +---------- + 1 +(1 row) + +-- List all the existing cached connections. loopback and loopback3 should be +-- output. +SELECT server_name FROM postgres_fdw_get_connections() ORDER BY 1; + server_name +------------- + loopback + loopback3 +(2 rows) + +-- Connections are not closed at the end of the alter and drop statements. +-- That's because the connections are in midst of this xact, +-- they are just marked as invalid in pgfdw_inval_callback. ALTER SERVER loopback OPTIONS (ADD use_remote_estimate 'off'); --- The invalid connection gets closed in pgfdw_xact_callback during commit. +DROP SERVER loopback3 CASCADE; +NOTICE: drop cascades to 2 other objects +DETAIL: drop cascades to user mapping for public on server loopback3 +drop cascades to foreign table ft7 +-- List all the existing cached connections. loopback and loopback3 +-- should be output as invalid connections. Also the server name for +-- loopback3 should be NULL because the server was dropped. +SELECT * FROM postgres_fdw_get_connections() ORDER BY 1; + server_name | valid +-------------+------- + loopback | f + | f +(2 rows) + +-- The invalid connections get closed in pgfdw_xact_callback during commit. +COMMIT; +-- All cached connections were closed while committing above xact, so no +-- records should be output. +SELECT server_name FROM postgres_fdw_get_connections() ORDER BY 1; + server_name +------------- +(0 rows) + +-- ======================================================================= +-- test postgres_fdw_disconnect and postgres_fdw_disconnect_all functions +-- ======================================================================= +BEGIN; +-- Ensure to cache loopback connection. +SELECT 1 FROM ft1 LIMIT 1; + ?column? +---------- + 1 +(1 row) + +-- Ensure to cache loopback2 connection. +SELECT 1 FROM ft6 LIMIT 1; + ?column? +---------- + 1 +(1 row) + +-- List all the existing cached connections. loopback and loopback2 should be +-- output. +SELECT server_name FROM postgres_fdw_get_connections() ORDER BY 1; + server_name +------------- + loopback + loopback2 +(2 rows) + +-- Issue a warning and return false as loopback connection is still in use and +-- can not be closed. +SELECT postgres_fdw_disconnect('loopback'); +WARNING: cannot close connection for server "loopback" because it is still in use + postgres_fdw_disconnect +------------------------- + f +(1 row) + +-- List all the existing cached connections. loopback and loopback2 should be +-- output. +SELECT server_name FROM postgres_fdw_get_connections() ORDER BY 1; + server_name +------------- + loopback + loopback2 +(2 rows) + +-- Return false as connections are still in use, warnings are issued. +-- But disable warnings temporarily because the order of them is not stable. +SET client_min_messages = 'ERROR'; +SELECT postgres_fdw_disconnect_all(); + postgres_fdw_disconnect_all +----------------------------- + f +(1 row) + +RESET client_min_messages; COMMIT; +-- Ensure that loopback2 connection is closed. +SELECT 1 FROM postgres_fdw_disconnect('loopback2'); + ?column? +---------- + 1 +(1 row) + +SELECT server_name FROM postgres_fdw_get_connections() WHERE server_name = 'loopback2'; + server_name +------------- +(0 rows) + +-- Return false as loopback2 connection is closed already. +SELECT postgres_fdw_disconnect('loopback2'); + postgres_fdw_disconnect +------------------------- + f +(1 row) + +-- Return an error as there is no foreign server with given name. +SELECT postgres_fdw_disconnect('unknownserver'); +ERROR: server "unknownserver" does not exist +-- Let's ensure to close all the existing cached connections. +SELECT 1 FROM postgres_fdw_disconnect_all(); + ?column? +---------- + 1 +(1 row) + +-- No cached connections, so no records should be output. +SELECT server_name FROM postgres_fdw_get_connections() ORDER BY 1; + server_name +------------- +(0 rows) + +-- ============================================================================= +-- test case for having multiple cached connections for a foreign server +-- ============================================================================= +CREATE ROLE regress_multi_conn_user1 SUPERUSER; +CREATE ROLE regress_multi_conn_user2 SUPERUSER; +CREATE USER MAPPING FOR regress_multi_conn_user1 SERVER loopback; +CREATE USER MAPPING FOR regress_multi_conn_user2 SERVER loopback; +BEGIN; +-- Will cache loopback connection with user mapping for regress_multi_conn_user1 +SET ROLE regress_multi_conn_user1; +SELECT 1 FROM ft1 LIMIT 1; + ?column? +---------- + 1 +(1 row) + +RESET ROLE; +-- Will cache loopback connection with user mapping for regress_multi_conn_user2 +SET ROLE regress_multi_conn_user2; +SELECT 1 FROM ft1 LIMIT 1; + ?column? +---------- + 1 +(1 row) + +RESET ROLE; +-- Should output two connections for loopback server +SELECT server_name FROM postgres_fdw_get_connections() ORDER BY 1; + server_name +------------- + loopback + loopback +(2 rows) + +COMMIT; +-- Let's ensure to close all the existing cached connections. +SELECT 1 FROM postgres_fdw_disconnect_all(); + ?column? +---------- + 1 +(1 row) + +-- No cached connections, so no records should be output. +SELECT server_name FROM postgres_fdw_get_connections() ORDER BY 1; + server_name +------------- +(0 rows) + +-- Clean up +DROP USER MAPPING FOR regress_multi_conn_user1 SERVER loopback; +DROP USER MAPPING FOR regress_multi_conn_user2 SERVER loopback; +DROP ROLE regress_multi_conn_user1; +DROP ROLE regress_multi_conn_user2; +-- =================================================================== +-- batch insert +-- =================================================================== +BEGIN; +CREATE SERVER batch10 FOREIGN DATA WRAPPER postgres_fdw OPTIONS( batch_size '10' ); +SELECT count(*) +FROM pg_foreign_server +WHERE srvname = 'batch10' +AND srvoptions @> array['batch_size=10']; + count +------- + 1 +(1 row) + +ALTER SERVER batch10 OPTIONS( SET batch_size '20' ); +SELECT count(*) +FROM pg_foreign_server +WHERE srvname = 'batch10' +AND srvoptions @> array['batch_size=10']; + count +------- + 0 +(1 row) + +SELECT count(*) +FROM pg_foreign_server +WHERE srvname = 'batch10' +AND srvoptions @> array['batch_size=20']; + count +------- + 1 +(1 row) + +CREATE FOREIGN TABLE table30 ( x int ) SERVER batch10 OPTIONS ( batch_size '30' ); +SELECT COUNT(*) +FROM pg_foreign_table +WHERE ftrelid = 'table30'::regclass +AND ftoptions @> array['batch_size=30']; + count +------- + 1 +(1 row) + +ALTER FOREIGN TABLE table30 OPTIONS ( SET batch_size '40'); +SELECT COUNT(*) +FROM pg_foreign_table +WHERE ftrelid = 'table30'::regclass +AND ftoptions @> array['batch_size=30']; + count +------- + 0 +(1 row) + +SELECT COUNT(*) +FROM pg_foreign_table +WHERE ftrelid = 'table30'::regclass +AND ftoptions @> array['batch_size=40']; + count +------- + 1 +(1 row) + +ROLLBACK; +CREATE TABLE batch_table ( x int ); +CREATE FOREIGN TABLE ftable ( x int ) SERVER loopback OPTIONS ( table_name 'batch_table', batch_size '10' ); +EXPLAIN (VERBOSE, COSTS OFF) INSERT INTO ftable SELECT * FROM generate_series(1, 10) i; + QUERY PLAN +------------------------------------------------------------- + Insert on public.ftable + Remote SQL: INSERT INTO public.batch_table(x) VALUES ($1) + Batch Size: 10 + -> Function Scan on pg_catalog.generate_series i + Output: i.i + Function Call: generate_series(1, 10) +(6 rows) + +INSERT INTO ftable SELECT * FROM generate_series(1, 10) i; +INSERT INTO ftable SELECT * FROM generate_series(11, 31) i; +INSERT INTO ftable VALUES (32); +INSERT INTO ftable VALUES (33), (34); +SELECT COUNT(*) FROM ftable; + count +------- + 34 +(1 row) + +TRUNCATE batch_table; +DROP FOREIGN TABLE ftable; +-- Disable batch insert +CREATE FOREIGN TABLE ftable ( x int ) SERVER loopback OPTIONS ( table_name 'batch_table', batch_size '1' ); +EXPLAIN (VERBOSE, COSTS OFF) INSERT INTO ftable VALUES (1), (2); + QUERY PLAN +------------------------------------------------------------- + Insert on public.ftable + Remote SQL: INSERT INTO public.batch_table(x) VALUES ($1) + Batch Size: 1 + -> Values Scan on "*VALUES*" + Output: "*VALUES*".column1 +(5 rows) + +INSERT INTO ftable VALUES (1), (2); +SELECT COUNT(*) FROM ftable; + count +------- + 2 +(1 row) + +DROP FOREIGN TABLE ftable; +DROP TABLE batch_table; +-- Use partitioning +CREATE TABLE batch_table ( x int ) PARTITION BY HASH (x); +CREATE TABLE batch_table_p0 (LIKE batch_table); +CREATE FOREIGN TABLE batch_table_p0f + PARTITION OF batch_table + FOR VALUES WITH (MODULUS 3, REMAINDER 0) + SERVER loopback + OPTIONS (table_name 'batch_table_p0', batch_size '10'); +CREATE TABLE batch_table_p1 (LIKE batch_table); +CREATE FOREIGN TABLE batch_table_p1f + PARTITION OF batch_table + FOR VALUES WITH (MODULUS 3, REMAINDER 1) + SERVER loopback + OPTIONS (table_name 'batch_table_p1', batch_size '1'); +CREATE TABLE batch_table_p2 + PARTITION OF batch_table + FOR VALUES WITH (MODULUS 3, REMAINDER 2); +INSERT INTO batch_table SELECT * FROM generate_series(1, 66) i; +SELECT COUNT(*) FROM batch_table; + count +------- + 66 +(1 row) + +-- Clean up +DROP TABLE batch_table CASCADE; diff --git a/contrib/postgres_fdw/option.c b/contrib/postgres_fdw/option.c index 1a03e02263eeb..64698c4da3a50 100644 --- a/contrib/postgres_fdw/option.c +++ b/contrib/postgres_fdw/option.c @@ -3,7 +3,7 @@ * option.c * FDW option handling for postgres_fdw * - * Portions Copyright (c) 2012-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 2012-2021, PostgreSQL Global Development Group * * IDENTIFICATION * contrib/postgres_fdw/option.c @@ -142,6 +142,17 @@ postgres_fdw_validator(PG_FUNCTION_ARGS) errmsg("%s requires a non-negative integer value", def->defname))); } + else if (strcmp(def->defname, "batch_size") == 0) + { + int batch_size; + + batch_size = strtol(defGetString(def), NULL, 10); + if (batch_size <= 0) + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("%s requires a non-negative integer value", + def->defname))); + } else if (strcmp(def->defname, "password_required") == 0) { bool pw_required = defGetBoolean(def); @@ -203,6 +214,9 @@ InitPgFdwOptions(void) /* fetch_size is available on both server and table */ {"fetch_size", ForeignServerRelationId, false}, {"fetch_size", ForeignTableRelationId, false}, + /* batch_size is available on both server and table */ + {"batch_size", ForeignServerRelationId, false}, + {"batch_size", ForeignTableRelationId, false}, {"password_required", UserMappingRelationId, false}, /* diff --git a/contrib/postgres_fdw/postgres_fdw--1.0--1.1.sql b/contrib/postgres_fdw/postgres_fdw--1.0--1.1.sql new file mode 100644 index 0000000000000..ed4ca378d4abe --- /dev/null +++ b/contrib/postgres_fdw/postgres_fdw--1.0--1.1.sql @@ -0,0 +1,20 @@ +/* contrib/postgres_fdw/postgres_fdw--1.0--1.1.sql */ + +-- complain if script is sourced in psql, rather than via ALTER EXTENSION +\echo Use "ALTER EXTENSION postgres_fdw UPDATE TO '1.1'" to load this file. \quit + +CREATE FUNCTION postgres_fdw_get_connections (OUT server_name text, + OUT valid boolean) +RETURNS SETOF record +AS 'MODULE_PATHNAME' +LANGUAGE C STRICT PARALLEL RESTRICTED; + +CREATE FUNCTION postgres_fdw_disconnect (text) +RETURNS bool +AS 'MODULE_PATHNAME' +LANGUAGE C STRICT PARALLEL RESTRICTED; + +CREATE FUNCTION postgres_fdw_disconnect_all () +RETURNS bool +AS 'MODULE_PATHNAME' +LANGUAGE C STRICT PARALLEL RESTRICTED; diff --git a/contrib/postgres_fdw/postgres_fdw.c b/contrib/postgres_fdw/postgres_fdw.c index b6c72e1d1e642..2ce42ce3f113d 100644 --- a/contrib/postgres_fdw/postgres_fdw.c +++ b/contrib/postgres_fdw/postgres_fdw.c @@ -3,7 +3,7 @@ * postgres_fdw.c * Foreign-data wrapper for remote PostgreSQL servers * - * Portions Copyright (c) 2012-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 2012-2021, PostgreSQL Global Development Group * * IDENTIFICATION * contrib/postgres_fdw/postgres_fdw.c @@ -87,8 +87,10 @@ enum FdwScanPrivateIndex * 1) INSERT/UPDATE/DELETE statement text to be sent to the remote server * 2) Integer list of target attribute numbers for INSERT/UPDATE * (NIL for a DELETE) - * 3) Boolean flag showing if the remote query has a RETURNING clause - * 4) Integer list of attribute numbers retrieved by RETURNING, if any + * 3) Length till the end of VALUES clause for INSERT + * (-1 for a DELETE/UPDATE) + * 4) Boolean flag showing if the remote query has a RETURNING clause + * 5) Integer list of attribute numbers retrieved by RETURNING, if any */ enum FdwModifyPrivateIndex { @@ -96,6 +98,8 @@ enum FdwModifyPrivateIndex FdwModifyPrivateUpdateSql, /* Integer list of target attribute numbers for INSERT/UPDATE */ FdwModifyPrivateTargetAttnums, + /* Length till the end of VALUES clause (as an integer Value node) */ + FdwModifyPrivateLen, /* has-returning flag (as an integer Value node) */ FdwModifyPrivateHasReturning, /* Integer list of attribute numbers retrieved by RETURNING */ @@ -176,7 +180,10 @@ typedef struct PgFdwModifyState /* extracted fdw_private data */ char *query; /* text of INSERT/UPDATE/DELETE command */ + char *orig_query; /* original text of INSERT command */ List *target_attrs; /* list of target attribute numbers */ + int values_end; /* length up to the end of VALUES */ + int batch_size; /* value of FDW option "batch_size" */ bool has_returning; /* is there a RETURNING clause? */ List *retrieved_attrs; /* attr numbers retrieved by RETURNING */ @@ -185,6 +192,9 @@ typedef struct PgFdwModifyState int p_nums; /* number of parameters to transmit */ FmgrInfo *p_flinfo; /* output conversion functions for them */ + /* batch operation stuff */ + int num_slots; /* number of slots to insert */ + /* working memory context */ MemoryContext temp_cxt; /* context for per-tuple temporary data */ @@ -343,6 +353,12 @@ static TupleTableSlot *postgresExecForeignInsert(EState *estate, ResultRelInfo *resultRelInfo, TupleTableSlot *slot, TupleTableSlot *planSlot); +static TupleTableSlot **postgresExecForeignBatchInsert(EState *estate, + ResultRelInfo *resultRelInfo, + TupleTableSlot **slots, + TupleTableSlot **planSlots, + int *numSlots); +static int postgresGetForeignModifyBatchSize(ResultRelInfo *resultRelInfo); static TupleTableSlot *postgresExecForeignUpdate(EState *estate, ResultRelInfo *resultRelInfo, TupleTableSlot *slot, @@ -429,20 +445,24 @@ static PgFdwModifyState *create_foreign_modify(EState *estate, Plan *subplan, char *query, List *target_attrs, + int len, bool has_returning, List *retrieved_attrs); -static TupleTableSlot *execute_foreign_modify(EState *estate, +static TupleTableSlot **execute_foreign_modify(EState *estate, ResultRelInfo *resultRelInfo, CmdType operation, - TupleTableSlot *slot, - TupleTableSlot *planSlot); + TupleTableSlot **slots, + TupleTableSlot **planSlots, + int *numSlots); static void prepare_foreign_modify(PgFdwModifyState *fmstate); static const char **convert_prep_stmt_params(PgFdwModifyState *fmstate, ItemPointer tupleid, - TupleTableSlot *slot); + TupleTableSlot **slots, + int numSlots); static void store_returning_result(PgFdwModifyState *fmstate, TupleTableSlot *slot, PGresult *res); static void finish_foreign_modify(PgFdwModifyState *fmstate); +static void deallocate_query(PgFdwModifyState *fmstate); static List *build_remote_returning(Index rtindex, Relation rel, List *returningList); static void rebuild_fdw_scan_tlist(ForeignScan *fscan, List *tlist); @@ -505,6 +525,7 @@ static void apply_table_options(PgFdwRelationInfo *fpinfo); static void merge_fdw_options(PgFdwRelationInfo *fpinfo, const PgFdwRelationInfo *fpinfo_o, const PgFdwRelationInfo *fpinfo_i); +static int get_batch_size_option(Relation rel); /* @@ -530,6 +551,8 @@ postgres_fdw_handler(PG_FUNCTION_ARGS) routine->PlanForeignModify = postgresPlanForeignModify; routine->BeginForeignModify = postgresBeginForeignModify; routine->ExecForeignInsert = postgresExecForeignInsert; + routine->ExecForeignBatchInsert = postgresExecForeignBatchInsert; + routine->GetForeignModifyBatchSize = postgresGetForeignModifyBatchSize; routine->ExecForeignUpdate = postgresExecForeignUpdate; routine->ExecForeignDelete = postgresExecForeignDelete; routine->EndForeignModify = postgresEndForeignModify; @@ -1665,6 +1688,7 @@ postgresPlanForeignModify(PlannerInfo *root, List *returningList = NIL; List *retrieved_attrs = NIL; bool doNothing = false; + int values_end_len = -1; initStringInfo(&sql); @@ -1752,7 +1776,7 @@ postgresPlanForeignModify(PlannerInfo *root, deparseInsertSql(&sql, rte, resultRelation, rel, targetAttrs, doNothing, withCheckOptionList, returningList, - &retrieved_attrs); + &retrieved_attrs, &values_end_len); break; case CMD_UPDATE: deparseUpdateSql(&sql, rte, resultRelation, rel, @@ -1776,8 +1800,9 @@ postgresPlanForeignModify(PlannerInfo *root, * Build the fdw_private list that will be available to the executor. * Items in the list must match enum FdwModifyPrivateIndex, above. */ - return list_make4(makeString(sql.data), + return list_make5(makeString(sql.data), targetAttrs, + makeInteger(values_end_len), makeInteger((retrieved_attrs != NIL)), retrieved_attrs); } @@ -1797,6 +1822,7 @@ postgresBeginForeignModify(ModifyTableState *mtstate, char *query; List *target_attrs; bool has_returning; + int values_end_len; List *retrieved_attrs; RangeTblEntry *rte; @@ -1812,6 +1838,8 @@ postgresBeginForeignModify(ModifyTableState *mtstate, FdwModifyPrivateUpdateSql)); target_attrs = (List *) list_nth(fdw_private, FdwModifyPrivateTargetAttnums); + values_end_len = intVal(list_nth(fdw_private, + FdwModifyPrivateLen)); has_returning = intVal(list_nth(fdw_private, FdwModifyPrivateHasReturning)); retrieved_attrs = (List *) list_nth(fdw_private, @@ -1829,6 +1857,7 @@ postgresBeginForeignModify(ModifyTableState *mtstate, mtstate->mt_plans[subplan_index]->plan, query, target_attrs, + values_end_len, has_returning, retrieved_attrs); @@ -1846,7 +1875,37 @@ postgresExecForeignInsert(EState *estate, TupleTableSlot *planSlot) { PgFdwModifyState *fmstate = (PgFdwModifyState *) resultRelInfo->ri_FdwState; - TupleTableSlot *rslot; + TupleTableSlot **rslot; + int numSlots = 1; + + /* + * If the fmstate has aux_fmstate set, use the aux_fmstate (see + * postgresBeginForeignInsert()) + */ + if (fmstate->aux_fmstate) + resultRelInfo->ri_FdwState = fmstate->aux_fmstate; + rslot = execute_foreign_modify(estate, resultRelInfo, CMD_INSERT, + &slot, &planSlot, &numSlots); + /* Revert that change */ + if (fmstate->aux_fmstate) + resultRelInfo->ri_FdwState = fmstate; + + return rslot ? *rslot : NULL; +} + +/* + * postgresExecForeignBatchInsert + * Insert multiple rows into a foreign table + */ +static TupleTableSlot ** +postgresExecForeignBatchInsert(EState *estate, + ResultRelInfo *resultRelInfo, + TupleTableSlot **slots, + TupleTableSlot **planSlots, + int *numSlots) +{ + PgFdwModifyState *fmstate = (PgFdwModifyState *) resultRelInfo->ri_FdwState; + TupleTableSlot **rslot; /* * If the fmstate has aux_fmstate set, use the aux_fmstate (see @@ -1855,7 +1914,7 @@ postgresExecForeignInsert(EState *estate, if (fmstate->aux_fmstate) resultRelInfo->ri_FdwState = fmstate->aux_fmstate; rslot = execute_foreign_modify(estate, resultRelInfo, CMD_INSERT, - slot, planSlot); + slots, planSlots, numSlots); /* Revert that change */ if (fmstate->aux_fmstate) resultRelInfo->ri_FdwState = fmstate; @@ -1863,6 +1922,42 @@ postgresExecForeignInsert(EState *estate, return rslot; } +/* + * postgresGetForeignModifyBatchSize + * Determine the maximum number of tuples that can be inserted in bulk + * + * Returns the batch size specified for server or table. When batching is not + * allowed (e.g. for tables with AFTER ROW triggers or with RETURNING clause), + * returns 1. + */ +static int +postgresGetForeignModifyBatchSize(ResultRelInfo *resultRelInfo) +{ + int batch_size; + + /* should be called only once */ + Assert(resultRelInfo->ri_BatchSize == 0); + + /* + * In EXPLAIN without ANALYZE, ri_fdwstate is NULL, so we have to lookup + * the option directly in server/table options. Otherwise just use the + * value we determined earlier. + */ + if (resultRelInfo->ri_FdwState) + batch_size = ((PgFdwModifyState *) resultRelInfo->ri_FdwState)->batch_size; + else + batch_size = get_batch_size_option(resultRelInfo->ri_RelationDesc); + + /* Disable batching when we have to use RETURNING. */ + if (resultRelInfo->ri_projectReturning != NULL || + (resultRelInfo->ri_TrigDesc && + resultRelInfo->ri_TrigDesc->trig_insert_after_row)) + return 1; + + /* Otherwise use the batch size specified for server/table. */ + return batch_size; +} + /* * postgresExecForeignUpdate * Update one row in a foreign table @@ -1873,8 +1968,13 @@ postgresExecForeignUpdate(EState *estate, TupleTableSlot *slot, TupleTableSlot *planSlot) { - return execute_foreign_modify(estate, resultRelInfo, CMD_UPDATE, - slot, planSlot); + TupleTableSlot **rslot; + int numSlots = 1; + + rslot = execute_foreign_modify(estate, resultRelInfo, CMD_UPDATE, + &slot, &planSlot, &numSlots); + + return rslot ? rslot[0] : NULL; } /* @@ -1887,8 +1987,13 @@ postgresExecForeignDelete(EState *estate, TupleTableSlot *slot, TupleTableSlot *planSlot) { - return execute_foreign_modify(estate, resultRelInfo, CMD_DELETE, - slot, planSlot); + TupleTableSlot **rslot; + int numSlots = 1; + + rslot = execute_foreign_modify(estate, resultRelInfo, CMD_DELETE, + &slot, &planSlot, &numSlots); + + return rslot ? rslot[0] : NULL; } /* @@ -1925,6 +2030,7 @@ postgresBeginForeignInsert(ModifyTableState *mtstate, RangeTblEntry *rte; TupleDesc tupdesc = RelationGetDescr(rel); int attnum; + int values_end_len; StringInfoData sql; List *targetAttrs = NIL; List *retrieved_attrs = NIL; @@ -2001,7 +2107,7 @@ postgresBeginForeignInsert(ModifyTableState *mtstate, deparseInsertSql(&sql, rte, resultRelation, rel, targetAttrs, doNothing, resultRelInfo->ri_WithCheckOptions, resultRelInfo->ri_returningList, - &retrieved_attrs); + &retrieved_attrs, &values_end_len); /* Construct an execution state. */ fmstate = create_foreign_modify(mtstate->ps.state, @@ -2011,6 +2117,7 @@ postgresBeginForeignInsert(ModifyTableState *mtstate, NULL, sql.data, targetAttrs, + values_end_len, retrieved_attrs != NIL, retrieved_attrs); @@ -2636,6 +2743,13 @@ postgresExplainForeignModify(ModifyTableState *mtstate, FdwModifyPrivateUpdateSql)); ExplainPropertyText("Remote SQL", sql, es); + + /* + * For INSERT we should always have batch size >= 1, but UPDATE + * and DELETE don't support batching so don't show the property. + */ + if (rinfo->ri_BatchSize > 0) + ExplainPropertyInteger("Batch Size", NULL, rinfo->ri_BatchSize, es); } } @@ -3530,6 +3644,7 @@ create_foreign_modify(EState *estate, Plan *subplan, char *query, List *target_attrs, + int values_end, bool has_returning, List *retrieved_attrs) { @@ -3564,7 +3679,10 @@ create_foreign_modify(EState *estate, /* Set up remote query information. */ fmstate->query = query; + if (operation == CMD_INSERT) + fmstate->orig_query = pstrdup(fmstate->query); fmstate->target_attrs = target_attrs; + fmstate->values_end = values_end; fmstate->has_returning = has_returning; fmstate->retrieved_attrs = retrieved_attrs; @@ -3616,6 +3734,12 @@ create_foreign_modify(EState *estate, Assert(fmstate->p_nums <= n_params); + /* Set batch_size from foreign server/table options. */ + if (operation == CMD_INSERT) + fmstate->batch_size = get_batch_size_option(rel); + + fmstate->num_slots = 1; + /* Initialize auxiliary state */ fmstate->aux_fmstate = NULL; @@ -3626,26 +3750,48 @@ create_foreign_modify(EState *estate, * execute_foreign_modify * Perform foreign-table modification as required, and fetch RETURNING * result if any. (This is the shared guts of postgresExecForeignInsert, - * postgresExecForeignUpdate, and postgresExecForeignDelete.) + * postgresExecForeignBatchInsert, postgresExecForeignUpdate, and + * postgresExecForeignDelete.) */ -static TupleTableSlot * +static TupleTableSlot ** execute_foreign_modify(EState *estate, ResultRelInfo *resultRelInfo, CmdType operation, - TupleTableSlot *slot, - TupleTableSlot *planSlot) + TupleTableSlot **slots, + TupleTableSlot **planSlots, + int *numSlots) { PgFdwModifyState *fmstate = (PgFdwModifyState *) resultRelInfo->ri_FdwState; ItemPointer ctid = NULL; const char **p_values; PGresult *res; int n_rows; + StringInfoData sql; /* The operation should be INSERT, UPDATE, or DELETE */ Assert(operation == CMD_INSERT || operation == CMD_UPDATE || operation == CMD_DELETE); + /* + * If the existing query was deparsed and prepared for a different number + * of rows, rebuild it for the proper number. + */ + if (operation == CMD_INSERT && fmstate->num_slots != *numSlots) + { + /* Destroy the prepared statement created previously */ + if (fmstate->p_name) + deallocate_query(fmstate); + + /* Build INSERT string with numSlots records in its VALUES clause. */ + initStringInfo(&sql); + rebuildInsertSql(&sql, fmstate->orig_query, fmstate->values_end, + fmstate->p_nums, *numSlots - 1); + pfree(fmstate->query); + fmstate->query = sql.data; + fmstate->num_slots = *numSlots; + } + /* Set up the prepared statement on the remote server, if we didn't yet */ if (!fmstate->p_name) prepare_foreign_modify(fmstate); @@ -3658,7 +3804,7 @@ execute_foreign_modify(EState *estate, Datum datum; bool isNull; - datum = ExecGetJunkAttribute(planSlot, + datum = ExecGetJunkAttribute(planSlots[0], fmstate->ctidAttno, &isNull); /* shouldn't ever get a null result... */ @@ -3668,14 +3814,14 @@ execute_foreign_modify(EState *estate, } /* Convert parameters needed by prepared statement to text form */ - p_values = convert_prep_stmt_params(fmstate, ctid, slot); + p_values = convert_prep_stmt_params(fmstate, ctid, slots, *numSlots); /* * Execute the prepared statement. */ if (!PQsendQueryPrepared(fmstate->conn, fmstate->p_name, - fmstate->p_nums, + fmstate->p_nums * (*numSlots), p_values, NULL, NULL, @@ -3696,9 +3842,10 @@ execute_foreign_modify(EState *estate, /* Check number of rows affected, and fetch RETURNING tuple if any */ if (fmstate->has_returning) { + Assert(*numSlots == 1); n_rows = PQntuples(res); if (n_rows > 0) - store_returning_result(fmstate, slot, res); + store_returning_result(fmstate, slots[0], res); } else n_rows = atoi(PQcmdTuples(res)); @@ -3708,10 +3855,12 @@ execute_foreign_modify(EState *estate, MemoryContextReset(fmstate->temp_cxt); + *numSlots = n_rows; + /* * Return NULL if nothing was inserted/updated/deleted on the remote end */ - return (n_rows > 0) ? slot : NULL; + return (n_rows > 0) ? slots : NULL; } /* @@ -3771,52 +3920,64 @@ prepare_foreign_modify(PgFdwModifyState *fmstate) static const char ** convert_prep_stmt_params(PgFdwModifyState *fmstate, ItemPointer tupleid, - TupleTableSlot *slot) + TupleTableSlot **slots, + int numSlots) { const char **p_values; + int i; + int j; int pindex = 0; MemoryContext oldcontext; oldcontext = MemoryContextSwitchTo(fmstate->temp_cxt); - p_values = (const char **) palloc(sizeof(char *) * fmstate->p_nums); + p_values = (const char **) palloc(sizeof(char *) * fmstate->p_nums * numSlots); + + /* ctid is provided only for UPDATE/DELETE, which don't allow batching */ + Assert(!(tupleid != NULL && numSlots > 1)); /* 1st parameter should be ctid, if it's in use */ if (tupleid != NULL) { + Assert(numSlots == 1); /* don't need set_transmission_modes for TID output */ p_values[pindex] = OutputFunctionCall(&fmstate->p_flinfo[pindex], PointerGetDatum(tupleid)); pindex++; } - /* get following parameters from slot */ - if (slot != NULL && fmstate->target_attrs != NIL) + /* get following parameters from slots */ + if (slots != NULL && fmstate->target_attrs != NIL) { int nestlevel; ListCell *lc; nestlevel = set_transmission_modes(); - foreach(lc, fmstate->target_attrs) + for (i = 0; i < numSlots; i++) { - int attnum = lfirst_int(lc); - Datum value; - bool isnull; + j = (tupleid != NULL) ? 1 : 0; + foreach(lc, fmstate->target_attrs) + { + int attnum = lfirst_int(lc); + Datum value; + bool isnull; - value = slot_getattr(slot, attnum, &isnull); - if (isnull) - p_values[pindex] = NULL; - else - p_values[pindex] = OutputFunctionCall(&fmstate->p_flinfo[pindex], - value); - pindex++; + value = slot_getattr(slots[i], attnum, &isnull); + if (isnull) + p_values[pindex] = NULL; + else + p_values[pindex] = OutputFunctionCall(&fmstate->p_flinfo[j], + value); + pindex++; + j++; + } } reset_transmission_modes(nestlevel); } - Assert(pindex == fmstate->p_nums); + Assert(pindex == fmstate->p_nums * numSlots); MemoryContextSwitchTo(oldcontext); @@ -3870,29 +4031,42 @@ finish_foreign_modify(PgFdwModifyState *fmstate) Assert(fmstate != NULL); /* If we created a prepared statement, destroy it */ - if (fmstate->p_name) - { - char sql[64]; - PGresult *res; - - snprintf(sql, sizeof(sql), "DEALLOCATE %s", fmstate->p_name); - - /* - * We don't use a PG_TRY block here, so be careful not to throw error - * without releasing the PGresult. - */ - res = pgfdw_exec_query(fmstate->conn, sql); - if (PQresultStatus(res) != PGRES_COMMAND_OK) - pgfdw_report_error(ERROR, res, fmstate->conn, true, sql); - PQclear(res); - fmstate->p_name = NULL; - } + deallocate_query(fmstate); /* Release remote connection */ ReleaseConnection(fmstate->conn); fmstate->conn = NULL; } +/* + * deallocate_query + * Deallocate a prepared statement for a foreign insert/update/delete + * operation + */ +static void +deallocate_query(PgFdwModifyState *fmstate) +{ + char sql[64]; + PGresult *res; + + /* do nothing if the query is not allocated */ + if (!fmstate->p_name) + return; + + snprintf(sql, sizeof(sql), "DEALLOCATE %s", fmstate->p_name); + + /* + * We don't use a PG_TRY block here, so be careful not to throw error + * without releasing the PGresult. + */ + res = pgfdw_exec_query(fmstate->conn, sql); + if (PQresultStatus(res) != PGRES_COMMAND_OK) + pgfdw_report_error(ERROR, res, fmstate->conn, true, sql); + PQclear(res); + pfree(fmstate->p_name); + fmstate->p_name = NULL; +} + /* * build_remote_returning * Build a RETURNING targetlist of a remote query for performing an @@ -5709,7 +5883,8 @@ foreign_grouping_ok(PlannerInfo *root, RelOptInfo *grouped_rel, * RestrictInfos, so we must make our own. */ Assert(!IsA(expr, RestrictInfo)); - rinfo = make_restrictinfo(expr, + rinfo = make_restrictinfo(root, + expr, true, false, false, @@ -6577,3 +6752,45 @@ find_em_expr_for_input_target(PlannerInfo *root, elog(ERROR, "could not find pathkey item to sort"); return NULL; /* keep compiler quiet */ } + +/* + * Determine batch size for a given foreign table. The option specified for + * a table has precedence. + */ +static int +get_batch_size_option(Relation rel) +{ + Oid foreigntableid = RelationGetRelid(rel); + ForeignTable *table; + ForeignServer *server; + List *options; + ListCell *lc; + + /* we use 1 by default, which means "no batching" */ + int batch_size = 1; + + /* + * Load options for table and server. We append server options after + * table options, because table options take precedence. + */ + table = GetForeignTable(foreigntableid); + server = GetForeignServer(table->serverid); + + options = NIL; + options = list_concat(options, table->options); + options = list_concat(options, server->options); + + /* See if either table or server specifies batch_size. */ + foreach(lc, options) + { + DefElem *def = (DefElem *) lfirst(lc); + + if (strcmp(def->defname, "batch_size") == 0) + { + batch_size = strtol(defGetString(def), NULL, 10); + break; + } + } + + return batch_size; +} diff --git a/contrib/postgres_fdw/postgres_fdw.control b/contrib/postgres_fdw/postgres_fdw.control index f9ed490752b0a..d489382064cfb 100644 --- a/contrib/postgres_fdw/postgres_fdw.control +++ b/contrib/postgres_fdw/postgres_fdw.control @@ -1,5 +1,5 @@ # postgres_fdw extension comment = 'foreign-data wrapper for remote PostgreSQL servers' -default_version = '1.0' +default_version = '1.1' module_pathname = '$libdir/postgres_fdw' relocatable = true diff --git a/contrib/postgres_fdw/postgres_fdw.h b/contrib/postgres_fdw/postgres_fdw.h index eef410db3921f..1f67b4d9fd27f 100644 --- a/contrib/postgres_fdw/postgres_fdw.h +++ b/contrib/postgres_fdw/postgres_fdw.h @@ -3,7 +3,7 @@ * postgres_fdw.h * Foreign-data wrapper for remote PostgreSQL servers * - * Portions Copyright (c) 2012-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 2012-2021, PostgreSQL Global Development Group * * IDENTIFICATION * contrib/postgres_fdw/postgres_fdw.h @@ -77,7 +77,7 @@ typedef struct PgFdwRelationInfo bool use_remote_estimate; Cost fdw_startup_cost; Cost fdw_tuple_cost; - List *shippable_extensions; /* OIDs of whitelisted extensions */ + List *shippable_extensions; /* OIDs of shippable extensions */ /* Cached catalog information. */ ForeignTable *table; @@ -161,7 +161,10 @@ extern void deparseInsertSql(StringInfo buf, RangeTblEntry *rte, Index rtindex, Relation rel, List *targetAttrs, bool doNothing, List *withCheckOptionList, List *returningList, - List **retrieved_attrs); + List **retrieved_attrs, int *values_end_len); +extern void rebuildInsertSql(StringInfo buf, char *orig_query, + int values_end_len, int num_cols, + int num_rows); extern void deparseUpdateSql(StringInfo buf, RangeTblEntry *rte, Index rtindex, Relation rel, List *targetAttrs, diff --git a/contrib/postgres_fdw/shippable.c b/contrib/postgres_fdw/shippable.c index b4766dc5ff807..b27f82e015595 100644 --- a/contrib/postgres_fdw/shippable.c +++ b/contrib/postgres_fdw/shippable.c @@ -7,13 +7,13 @@ * data types are shippable to a remote server for execution --- that is, * do they exist and have the same behavior remotely as they do locally? * Built-in objects are generally considered shippable. Other objects can - * be shipped if they are white-listed by the user. + * be shipped if they are declared as such by the user. * * Note: there are additional filter rules that prevent shipping mutable * functions or functions using nonportable collations. Those considerations * need not be accounted for here. * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * * IDENTIFICATION * contrib/postgres_fdw/shippable.c @@ -110,7 +110,7 @@ InitializeShippableCache(void) * * Right now "shippability" is exclusively a function of whether the object * belongs to an extension declared by the user. In the future we could - * additionally have a whitelist of functions/operators declared one at a time. + * additionally have a list of functions/operators declared one at a time. */ static bool lookup_shippable(Oid objectId, Oid classId, PgFdwRelationInfo *fpinfo) diff --git a/contrib/postgres_fdw/sql/postgres_fdw.sql b/contrib/postgres_fdw/sql/postgres_fdw.sql index 25dbc08b988f4..319c15d635c06 100644 --- a/contrib/postgres_fdw/sql/postgres_fdw.sql +++ b/contrib/postgres_fdw/sql/postgres_fdw.sql @@ -15,6 +15,10 @@ DO $d$ OPTIONS (dbname '$$||current_database()||$$', port '$$||current_setting('port')||$$' )$$; + EXECUTE $$CREATE SERVER loopback3 FOREIGN DATA WRAPPER postgres_fdw + OPTIONS (dbname '$$||current_database()||$$', + port '$$||current_setting('port')||$$' + )$$; END; $d$; @@ -22,6 +26,7 @@ CREATE USER MAPPING FOR public SERVER testserver1 OPTIONS (user 'value', password 'value'); CREATE USER MAPPING FOR CURRENT_USER SERVER loopback; CREATE USER MAPPING FOR CURRENT_USER SERVER loopback2; +CREATE USER MAPPING FOR public SERVER loopback3; -- =================================================================== -- create objects used through FDW loopback server @@ -142,6 +147,12 @@ CREATE FOREIGN TABLE ft6 ( c3 text ) SERVER loopback2 OPTIONS (schema_name 'S 1', table_name 'T 4'); +CREATE FOREIGN TABLE ft7 ( + c1 int NOT NULL, + c2 int NOT NULL, + c3 text +) SERVER loopback3 OPTIONS (schema_name 'S 1', table_name 'T 4'); + -- =================================================================== -- tests for validator -- =================================================================== @@ -2698,16 +2709,193 @@ COMMIT; -- Clean up DROP PROCEDURE terminate_backend_and_wait(text); --- =================================================================== --- test connection invalidation cases --- =================================================================== +-- ============================================================================= +-- test connection invalidation cases and postgres_fdw_get_connections function +-- ============================================================================= +-- Let's ensure to close all the existing cached connections. +SELECT 1 FROM postgres_fdw_disconnect_all(); +-- No cached connections, so no records should be output. +SELECT server_name FROM postgres_fdw_get_connections() ORDER BY 1; -- This test case is for closing the connection in pgfdw_xact_callback BEGIN; -- Connection xact depth becomes 1 i.e. the connection is in midst of the xact. SELECT 1 FROM ft1 LIMIT 1; --- Connection is not closed at the end of the alter statement in --- pgfdw_inval_callback. That's because the connection is in midst of this --- xact, it is just marked as invalid. +SELECT 1 FROM ft7 LIMIT 1; +-- List all the existing cached connections. loopback and loopback3 should be +-- output. +SELECT server_name FROM postgres_fdw_get_connections() ORDER BY 1; +-- Connections are not closed at the end of the alter and drop statements. +-- That's because the connections are in midst of this xact, +-- they are just marked as invalid in pgfdw_inval_callback. ALTER SERVER loopback OPTIONS (ADD use_remote_estimate 'off'); --- The invalid connection gets closed in pgfdw_xact_callback during commit. +DROP SERVER loopback3 CASCADE; +-- List all the existing cached connections. loopback and loopback3 +-- should be output as invalid connections. Also the server name for +-- loopback3 should be NULL because the server was dropped. +SELECT * FROM postgres_fdw_get_connections() ORDER BY 1; +-- The invalid connections get closed in pgfdw_xact_callback during commit. +COMMIT; +-- All cached connections were closed while committing above xact, so no +-- records should be output. +SELECT server_name FROM postgres_fdw_get_connections() ORDER BY 1; + +-- ======================================================================= +-- test postgres_fdw_disconnect and postgres_fdw_disconnect_all functions +-- ======================================================================= +BEGIN; +-- Ensure to cache loopback connection. +SELECT 1 FROM ft1 LIMIT 1; +-- Ensure to cache loopback2 connection. +SELECT 1 FROM ft6 LIMIT 1; +-- List all the existing cached connections. loopback and loopback2 should be +-- output. +SELECT server_name FROM postgres_fdw_get_connections() ORDER BY 1; +-- Issue a warning and return false as loopback connection is still in use and +-- can not be closed. +SELECT postgres_fdw_disconnect('loopback'); +-- List all the existing cached connections. loopback and loopback2 should be +-- output. +SELECT server_name FROM postgres_fdw_get_connections() ORDER BY 1; +-- Return false as connections are still in use, warnings are issued. +-- But disable warnings temporarily because the order of them is not stable. +SET client_min_messages = 'ERROR'; +SELECT postgres_fdw_disconnect_all(); +RESET client_min_messages; +COMMIT; +-- Ensure that loopback2 connection is closed. +SELECT 1 FROM postgres_fdw_disconnect('loopback2'); +SELECT server_name FROM postgres_fdw_get_connections() WHERE server_name = 'loopback2'; +-- Return false as loopback2 connection is closed already. +SELECT postgres_fdw_disconnect('loopback2'); +-- Return an error as there is no foreign server with given name. +SELECT postgres_fdw_disconnect('unknownserver'); +-- Let's ensure to close all the existing cached connections. +SELECT 1 FROM postgres_fdw_disconnect_all(); +-- No cached connections, so no records should be output. +SELECT server_name FROM postgres_fdw_get_connections() ORDER BY 1; + +-- ============================================================================= +-- test case for having multiple cached connections for a foreign server +-- ============================================================================= +CREATE ROLE regress_multi_conn_user1 SUPERUSER; +CREATE ROLE regress_multi_conn_user2 SUPERUSER; +CREATE USER MAPPING FOR regress_multi_conn_user1 SERVER loopback; +CREATE USER MAPPING FOR regress_multi_conn_user2 SERVER loopback; + +BEGIN; +-- Will cache loopback connection with user mapping for regress_multi_conn_user1 +SET ROLE regress_multi_conn_user1; +SELECT 1 FROM ft1 LIMIT 1; +RESET ROLE; + +-- Will cache loopback connection with user mapping for regress_multi_conn_user2 +SET ROLE regress_multi_conn_user2; +SELECT 1 FROM ft1 LIMIT 1; +RESET ROLE; + +-- Should output two connections for loopback server +SELECT server_name FROM postgres_fdw_get_connections() ORDER BY 1; COMMIT; +-- Let's ensure to close all the existing cached connections. +SELECT 1 FROM postgres_fdw_disconnect_all(); +-- No cached connections, so no records should be output. +SELECT server_name FROM postgres_fdw_get_connections() ORDER BY 1; + +-- Clean up +DROP USER MAPPING FOR regress_multi_conn_user1 SERVER loopback; +DROP USER MAPPING FOR regress_multi_conn_user2 SERVER loopback; +DROP ROLE regress_multi_conn_user1; +DROP ROLE regress_multi_conn_user2; + +-- =================================================================== +-- batch insert +-- =================================================================== + +BEGIN; + +CREATE SERVER batch10 FOREIGN DATA WRAPPER postgres_fdw OPTIONS( batch_size '10' ); + +SELECT count(*) +FROM pg_foreign_server +WHERE srvname = 'batch10' +AND srvoptions @> array['batch_size=10']; + +ALTER SERVER batch10 OPTIONS( SET batch_size '20' ); + +SELECT count(*) +FROM pg_foreign_server +WHERE srvname = 'batch10' +AND srvoptions @> array['batch_size=10']; + +SELECT count(*) +FROM pg_foreign_server +WHERE srvname = 'batch10' +AND srvoptions @> array['batch_size=20']; + +CREATE FOREIGN TABLE table30 ( x int ) SERVER batch10 OPTIONS ( batch_size '30' ); + +SELECT COUNT(*) +FROM pg_foreign_table +WHERE ftrelid = 'table30'::regclass +AND ftoptions @> array['batch_size=30']; + +ALTER FOREIGN TABLE table30 OPTIONS ( SET batch_size '40'); + +SELECT COUNT(*) +FROM pg_foreign_table +WHERE ftrelid = 'table30'::regclass +AND ftoptions @> array['batch_size=30']; + +SELECT COUNT(*) +FROM pg_foreign_table +WHERE ftrelid = 'table30'::regclass +AND ftoptions @> array['batch_size=40']; + +ROLLBACK; + +CREATE TABLE batch_table ( x int ); + +CREATE FOREIGN TABLE ftable ( x int ) SERVER loopback OPTIONS ( table_name 'batch_table', batch_size '10' ); +EXPLAIN (VERBOSE, COSTS OFF) INSERT INTO ftable SELECT * FROM generate_series(1, 10) i; +INSERT INTO ftable SELECT * FROM generate_series(1, 10) i; +INSERT INTO ftable SELECT * FROM generate_series(11, 31) i; +INSERT INTO ftable VALUES (32); +INSERT INTO ftable VALUES (33), (34); +SELECT COUNT(*) FROM ftable; +TRUNCATE batch_table; +DROP FOREIGN TABLE ftable; + +-- Disable batch insert +CREATE FOREIGN TABLE ftable ( x int ) SERVER loopback OPTIONS ( table_name 'batch_table', batch_size '1' ); +EXPLAIN (VERBOSE, COSTS OFF) INSERT INTO ftable VALUES (1), (2); +INSERT INTO ftable VALUES (1), (2); +SELECT COUNT(*) FROM ftable; +DROP FOREIGN TABLE ftable; +DROP TABLE batch_table; + +-- Use partitioning +CREATE TABLE batch_table ( x int ) PARTITION BY HASH (x); + +CREATE TABLE batch_table_p0 (LIKE batch_table); +CREATE FOREIGN TABLE batch_table_p0f + PARTITION OF batch_table + FOR VALUES WITH (MODULUS 3, REMAINDER 0) + SERVER loopback + OPTIONS (table_name 'batch_table_p0', batch_size '10'); + +CREATE TABLE batch_table_p1 (LIKE batch_table); +CREATE FOREIGN TABLE batch_table_p1f + PARTITION OF batch_table + FOR VALUES WITH (MODULUS 3, REMAINDER 1) + SERVER loopback + OPTIONS (table_name 'batch_table_p1', batch_size '1'); + +CREATE TABLE batch_table_p2 + PARTITION OF batch_table + FOR VALUES WITH (MODULUS 3, REMAINDER 2); + +INSERT INTO batch_table SELECT * FROM generate_series(1, 66) i; +SELECT COUNT(*) FROM batch_table; + +-- Clean up +DROP TABLE batch_table CASCADE; diff --git a/contrib/sepgsql/database.c b/contrib/sepgsql/database.c index 3881a019cba21..14a74fb29503e 100644 --- a/contrib/sepgsql/database.c +++ b/contrib/sepgsql/database.c @@ -4,7 +4,7 @@ * * Routines corresponding to database objects * - * Copyright (c) 2010-2020, PostgreSQL Global Development Group + * Copyright (c) 2010-2021, PostgreSQL Global Development Group * * ------------------------------------------------------------------------- */ diff --git a/contrib/sepgsql/dml.c b/contrib/sepgsql/dml.c index 75ee612bcdaea..1f96e8b507a42 100644 --- a/contrib/sepgsql/dml.c +++ b/contrib/sepgsql/dml.c @@ -4,7 +4,7 @@ * * Routines to handle DML permission checks * - * Copyright (c) 2010-2020, PostgreSQL Global Development Group + * Copyright (c) 2010-2021, PostgreSQL Global Development Group * * ------------------------------------------------------------------------- */ diff --git a/contrib/sepgsql/expected/label.out b/contrib/sepgsql/expected/label.out index 0300bc6fb45e1..b1b7db55f67ab 100644 --- a/contrib/sepgsql/expected/label.out +++ b/contrib/sepgsql/expected/label.out @@ -6,7 +6,7 @@ -- CREATE TABLE t1 (a int, b text); INSERT INTO t1 VALUES (1, 'aaa'), (2, 'bbb'), (3, 'ccc'); -SELECT * INTO t2 FROM t1 WHERE a % 2 = 0; +CREATE TABLE t2 AS SELECT * FROM t1 WHERE a % 2 = 0; CREATE FUNCTION f1 () RETURNS text AS 'SELECT sepgsql_getcon()' LANGUAGE sql; diff --git a/contrib/sepgsql/hooks.c b/contrib/sepgsql/hooks.c index 853b5b04ab8b1..34de6158d6047 100644 --- a/contrib/sepgsql/hooks.c +++ b/contrib/sepgsql/hooks.c @@ -4,7 +4,7 @@ * * Entrypoints of the hooks in PostgreSQL, and dispatches the callbacks. * - * Copyright (c) 2010-2020, PostgreSQL Global Development Group + * Copyright (c) 2010-2021, PostgreSQL Global Development Group * * ------------------------------------------------------------------------- */ diff --git a/contrib/sepgsql/label.c b/contrib/sepgsql/label.c index bee4380edcd78..7f23124009d58 100644 --- a/contrib/sepgsql/label.c +++ b/contrib/sepgsql/label.c @@ -4,7 +4,7 @@ * * Routines to support SELinux labels (security context) * - * Copyright (c) 2010-2020, PostgreSQL Global Development Group + * Copyright (c) 2010-2021, PostgreSQL Global Development Group * * ------------------------------------------------------------------------- */ diff --git a/contrib/sepgsql/launcher b/contrib/sepgsql/launcher index 0fddaf59634d4..6574eb9ea9e16 100755 --- a/contrib/sepgsql/launcher +++ b/contrib/sepgsql/launcher @@ -2,7 +2,7 @@ # # A wrapper script to launch psql command in regression test # -# Copyright (c) 2010-2020, PostgreSQL Global Development Group +# Copyright (c) 2010-2021, PostgreSQL Global Development Group # # ------------------------------------------------------------------------- diff --git a/contrib/sepgsql/proc.c b/contrib/sepgsql/proc.c index e40d9095ba310..e0ff3f03701a9 100644 --- a/contrib/sepgsql/proc.c +++ b/contrib/sepgsql/proc.c @@ -4,7 +4,7 @@ * * Routines corresponding to procedure objects * - * Copyright (c) 2010-2020, PostgreSQL Global Development Group + * Copyright (c) 2010-2021, PostgreSQL Global Development Group * * ------------------------------------------------------------------------- */ diff --git a/contrib/sepgsql/relation.c b/contrib/sepgsql/relation.c index a783767f81c9e..31e2ed5b14316 100644 --- a/contrib/sepgsql/relation.c +++ b/contrib/sepgsql/relation.c @@ -4,7 +4,7 @@ * * Routines corresponding to relation/attribute objects * - * Copyright (c) 2010-2020, PostgreSQL Global Development Group + * Copyright (c) 2010-2021, PostgreSQL Global Development Group * * ------------------------------------------------------------------------- */ diff --git a/contrib/sepgsql/schema.c b/contrib/sepgsql/schema.c index f7b1a27e1a9ce..0285c57114c19 100644 --- a/contrib/sepgsql/schema.c +++ b/contrib/sepgsql/schema.c @@ -4,7 +4,7 @@ * * Routines corresponding to schema objects * - * Copyright (c) 2010-2020, PostgreSQL Global Development Group + * Copyright (c) 2010-2021, PostgreSQL Global Development Group * * ------------------------------------------------------------------------- */ diff --git a/contrib/sepgsql/selinux.c b/contrib/sepgsql/selinux.c index 64ae53e867de0..f11968bcaa294 100644 --- a/contrib/sepgsql/selinux.c +++ b/contrib/sepgsql/selinux.c @@ -5,7 +5,7 @@ * Interactions between userspace and selinux in kernelspace, * using libselinux api. * - * Copyright (c) 2010-2020, PostgreSQL Global Development Group + * Copyright (c) 2010-2021, PostgreSQL Global Development Group * * ------------------------------------------------------------------------- */ diff --git a/contrib/sepgsql/sepgsql.h b/contrib/sepgsql/sepgsql.h index 38302b530b133..219373426730b 100644 --- a/contrib/sepgsql/sepgsql.h +++ b/contrib/sepgsql/sepgsql.h @@ -4,7 +4,7 @@ * * Definitions corresponding to SE-PostgreSQL * - * Copyright (c) 2010-2020, PostgreSQL Global Development Group + * Copyright (c) 2010-2021, PostgreSQL Global Development Group * * ------------------------------------------------------------------------- */ diff --git a/contrib/sepgsql/sql/label.sql b/contrib/sepgsql/sql/label.sql index d19c6edb4ca8f..76e261bee8037 100644 --- a/contrib/sepgsql/sql/label.sql +++ b/contrib/sepgsql/sql/label.sql @@ -7,7 +7,7 @@ -- CREATE TABLE t1 (a int, b text); INSERT INTO t1 VALUES (1, 'aaa'), (2, 'bbb'), (3, 'ccc'); -SELECT * INTO t2 FROM t1 WHERE a % 2 = 0; +CREATE TABLE t2 AS SELECT * FROM t1 WHERE a % 2 = 0; CREATE FUNCTION f1 () RETURNS text AS 'SELECT sepgsql_getcon()' diff --git a/contrib/sepgsql/uavc.c b/contrib/sepgsql/uavc.c index 97189b7c46f04..4cc48d5f82eb7 100644 --- a/contrib/sepgsql/uavc.c +++ b/contrib/sepgsql/uavc.c @@ -6,7 +6,7 @@ * access control decisions recently used, and reduce number of kernel * invocations to avoid unnecessary performance hit. * - * Copyright (c) 2011-2020, PostgreSQL Global Development Group + * Copyright (c) 2011-2021, PostgreSQL Global Development Group * * ------------------------------------------------------------------------- */ diff --git a/contrib/tablefunc/tablefunc.c b/contrib/tablefunc/tablefunc.c index e9a9741154a2e..779bd4415e6ac 100644 --- a/contrib/tablefunc/tablefunc.c +++ b/contrib/tablefunc/tablefunc.c @@ -10,7 +10,7 @@ * And contributors: * Nabil Sayegh * - * Copyright (c) 2002-2020, PostgreSQL Global Development Group + * Copyright (c) 2002-2021, PostgreSQL Global Development Group * * Permission to use, copy, modify, and distribute this software and its * documentation for any purpose, without fee, and without a written agreement diff --git a/contrib/tablefunc/tablefunc.h b/contrib/tablefunc/tablefunc.h index 794957ca21914..918518223d265 100644 --- a/contrib/tablefunc/tablefunc.h +++ b/contrib/tablefunc/tablefunc.h @@ -10,7 +10,7 @@ * And contributors: * Nabil Sayegh * - * Copyright (c) 2002-2020, PostgreSQL Global Development Group + * Copyright (c) 2002-2021, PostgreSQL Global Development Group * * Permission to use, copy, modify, and distribute this software and its * documentation for any purpose, without fee, and without a written agreement diff --git a/contrib/tcn/tcn.c b/contrib/tcn/tcn.c index 552f107bf6b06..06847024a31b0 100644 --- a/contrib/tcn/tcn.c +++ b/contrib/tcn/tcn.c @@ -3,7 +3,7 @@ * tcn.c * triggered change notification support for PostgreSQL * - * Portions Copyright (c) 2011-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 2011-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * diff --git a/contrib/test_decoding/Makefile b/contrib/test_decoding/Makefile index 9a4c76f013645..c5e28ce5cca78 100644 --- a/contrib/test_decoding/Makefile +++ b/contrib/test_decoding/Makefile @@ -5,9 +5,10 @@ PGFILEDESC = "test_decoding - example of a logical decoding output plugin" REGRESS = ddl xact rewrite toast permissions decoding_in_xact \ decoding_into_rel binary prepared replorigin time messages \ - spill slot truncate stream stats + spill slot truncate stream stats twophase twophase_stream ISOLATION = mxact delayed_startup ondisk_startup concurrent_ddl_dml \ - oldest_xmin snapshot_transfer subxact_without_top concurrent_stream + oldest_xmin snapshot_transfer subxact_without_top concurrent_stream \ + twophase_snapshot REGRESS_OPTS = --temp-config $(top_srcdir)/contrib/test_decoding/logical.conf ISOLATION_OPTS = --temp-config $(top_srcdir)/contrib/test_decoding/logical.conf diff --git a/contrib/test_decoding/expected/twophase.out b/contrib/test_decoding/expected/twophase.out new file mode 100644 index 0000000000000..f9f6bedd1cffb --- /dev/null +++ b/contrib/test_decoding/expected/twophase.out @@ -0,0 +1,235 @@ +-- Test prepared transactions. When two-phase-commit is enabled, transactions are +-- decoded at PREPARE time rather than at COMMIT PREPARED time. +SET synchronous_commit = on; +SELECT 'init' FROM pg_create_logical_replication_slot('regression_slot', 'test_decoding'); + ?column? +---------- + init +(1 row) + +CREATE TABLE test_prepared1(id integer primary key); +CREATE TABLE test_prepared2(id integer primary key); +-- Test that decoding happens at PREPARE time when two-phase-commit is enabled. +-- Decoding after COMMIT PREPARED must have all the commands in the transaction. +BEGIN; +INSERT INTO test_prepared1 VALUES (1); +INSERT INTO test_prepared1 VALUES (2); +-- should show nothing because the xact has not been prepared yet. +SELECT data FROM pg_logical_slot_get_changes('regression_slot', NULL, NULL, 'two-phase-commit', '1', 'include-xids', '0', 'skip-empty-xacts', '1'); + data +------ +(0 rows) + +PREPARE TRANSACTION 'test_prepared#1'; +-- should show both the above inserts and the PREPARE TRANSACTION. +SELECT data FROM pg_logical_slot_get_changes('regression_slot', NULL, NULL, 'two-phase-commit', '1', 'include-xids', '0', 'skip-empty-xacts', '1'); + data +---------------------------------------------------- + BEGIN + table public.test_prepared1: INSERT: id[integer]:1 + table public.test_prepared1: INSERT: id[integer]:2 + PREPARE TRANSACTION 'test_prepared#1' +(4 rows) + +COMMIT PREPARED 'test_prepared#1'; +SELECT data FROM pg_logical_slot_get_changes('regression_slot', NULL, NULL, 'two-phase-commit', '1', 'include-xids', '0', 'skip-empty-xacts', '1'); + data +---------------------------------------------------- + BEGIN + table public.test_prepared1: INSERT: id[integer]:1 + table public.test_prepared1: INSERT: id[integer]:2 + PREPARE TRANSACTION 'test_prepared#1' + COMMIT PREPARED 'test_prepared#1' +(5 rows) + +-- Test that rollback of a prepared xact is decoded. +BEGIN; +INSERT INTO test_prepared1 VALUES (3); +PREPARE TRANSACTION 'test_prepared#2'; +SELECT data FROM pg_logical_slot_get_changes('regression_slot', NULL, NULL, 'two-phase-commit', '1', 'include-xids', '0', 'skip-empty-xacts', '1'); + data +---------------------------------------------------- + BEGIN + table public.test_prepared1: INSERT: id[integer]:3 + PREPARE TRANSACTION 'test_prepared#2' +(3 rows) + +ROLLBACK PREPARED 'test_prepared#2'; +SELECT data FROM pg_logical_slot_get_changes('regression_slot', NULL, NULL, 'two-phase-commit', '1', 'include-xids', '0', 'skip-empty-xacts', '1'); + data +------------------------------------- + ROLLBACK PREPARED 'test_prepared#2' +(1 row) + +-- Test prepare of a xact containing ddl. Leaving xact uncommitted for next test. +BEGIN; +ALTER TABLE test_prepared1 ADD COLUMN data text; +INSERT INTO test_prepared1 VALUES (4, 'frakbar'); +PREPARE TRANSACTION 'test_prepared#3'; +-- confirm that exclusive lock from the ALTER command is held on test_prepared1 table +SELECT 'test_prepared_1' AS relation, locktype, mode +FROM pg_locks +WHERE locktype = 'relation' + AND relation = 'test_prepared1'::regclass; + relation | locktype | mode +-----------------+----------+--------------------- + test_prepared_1 | relation | RowExclusiveLock + test_prepared_1 | relation | AccessExclusiveLock +(2 rows) + +-- The insert should show the newly altered column but not the DDL. +SELECT data FROM pg_logical_slot_get_changes('regression_slot', NULL, NULL, 'two-phase-commit', '1', 'include-xids', '0', 'skip-empty-xacts', '1'); + data +------------------------------------------------------------------------- + BEGIN + table public.test_prepared1: INSERT: id[integer]:4 data[text]:'frakbar' + PREPARE TRANSACTION 'test_prepared#3' +(3 rows) + +-- Test that we decode correctly while an uncommitted prepared xact +-- with ddl exists. +-- +-- Use a separate table for the concurrent transaction because the lock from +-- the ALTER will stop us inserting into the other one. +-- +INSERT INTO test_prepared2 VALUES (5); +SELECT data FROM pg_logical_slot_get_changes('regression_slot', NULL, NULL, 'two-phase-commit', '1', 'include-xids', '0', 'skip-empty-xacts', '1'); + data +---------------------------------------------------- + BEGIN + table public.test_prepared2: INSERT: id[integer]:5 + COMMIT +(3 rows) + +COMMIT PREPARED 'test_prepared#3'; +SELECT data FROM pg_logical_slot_get_changes('regression_slot', NULL, NULL, 'two-phase-commit', '1', 'include-xids', '0', 'skip-empty-xacts', '1'); + data +------------------------------------------------------------------------- + BEGIN + table public.test_prepared1: INSERT: id[integer]:4 data[text]:'frakbar' + PREPARE TRANSACTION 'test_prepared#3' + COMMIT PREPARED 'test_prepared#3' +(4 rows) + +-- make sure stuff still works +INSERT INTO test_prepared1 VALUES (6); +INSERT INTO test_prepared2 VALUES (7); +SELECT data FROM pg_logical_slot_get_changes('regression_slot', NULL, NULL, 'two-phase-commit', '1', 'include-xids', '0', 'skip-empty-xacts', '1'); + data +-------------------------------------------------------------------- + BEGIN + table public.test_prepared1: INSERT: id[integer]:6 data[text]:null + COMMIT + BEGIN + table public.test_prepared2: INSERT: id[integer]:7 + COMMIT +(6 rows) + +-- Check 'CLUSTER' (as operation that hold exclusive lock) doesn't block +-- logical decoding. +BEGIN; +INSERT INTO test_prepared1 VALUES (8, 'othercol'); +CLUSTER test_prepared1 USING test_prepared1_pkey; +INSERT INTO test_prepared1 VALUES (9, 'othercol2'); +PREPARE TRANSACTION 'test_prepared_lock'; +SELECT 'test_prepared1' AS relation, locktype, mode +FROM pg_locks +WHERE locktype = 'relation' + AND relation = 'test_prepared1'::regclass; + relation | locktype | mode +----------------+----------+--------------------- + test_prepared1 | relation | RowExclusiveLock + test_prepared1 | relation | ShareLock + test_prepared1 | relation | AccessExclusiveLock +(3 rows) + +-- The above CLUSTER command shouldn't cause a timeout on 2pc decoding. The +-- call should return within a second. +SET statement_timeout = '1s'; +SELECT data FROM pg_logical_slot_get_changes('regression_slot', NULL, NULL, 'two-phase-commit', '1', 'include-xids', '0', 'skip-empty-xacts', '1'); + data +--------------------------------------------------------------------------- + BEGIN + table public.test_prepared1: INSERT: id[integer]:8 data[text]:'othercol' + table public.test_prepared1: INSERT: id[integer]:9 data[text]:'othercol2' + PREPARE TRANSACTION 'test_prepared_lock' +(4 rows) + +RESET statement_timeout; +COMMIT PREPARED 'test_prepared_lock'; +-- consume the commit +SELECT data FROM pg_logical_slot_get_changes('regression_slot', NULL, NULL, 'two-phase-commit', '1', 'include-xids', '0', 'skip-empty-xacts', '1'); + data +--------------------------------------------------------------------------- + BEGIN + table public.test_prepared1: INSERT: id[integer]:8 data[text]:'othercol' + table public.test_prepared1: INSERT: id[integer]:9 data[text]:'othercol2' + PREPARE TRANSACTION 'test_prepared_lock' + COMMIT PREPARED 'test_prepared_lock' +(5 rows) + +-- Test savepoints and sub-xacts. Creating savepoints will create +-- sub-xacts implicitly. +BEGIN; +CREATE TABLE test_prepared_savepoint (a int); +INSERT INTO test_prepared_savepoint VALUES (1); +SAVEPOINT test_savepoint; +INSERT INTO test_prepared_savepoint VALUES (2); +ROLLBACK TO SAVEPOINT test_savepoint; +PREPARE TRANSACTION 'test_prepared_savepoint'; +-- should show only 1, not 2 +SELECT data FROM pg_logical_slot_get_changes('regression_slot', NULL, NULL, 'two-phase-commit', '1', 'include-xids', '0', 'skip-empty-xacts', '1'); + data +------------------------------------------------------------ + BEGIN + table public.test_prepared_savepoint: INSERT: a[integer]:1 + PREPARE TRANSACTION 'test_prepared_savepoint' +(3 rows) + +COMMIT PREPARED 'test_prepared_savepoint'; +-- consume the commit +SELECT data FROM pg_logical_slot_get_changes('regression_slot', NULL, NULL, 'two-phase-commit', '1', 'include-xids', '0', 'skip-empty-xacts', '1'); + data +------------------------------------------------------------ + BEGIN + table public.test_prepared_savepoint: INSERT: a[integer]:1 + PREPARE TRANSACTION 'test_prepared_savepoint' + COMMIT PREPARED 'test_prepared_savepoint' +(4 rows) + +-- Test that a GID containing "_nodecode" gets decoded at commit prepared time. +BEGIN; +INSERT INTO test_prepared1 VALUES (20); +PREPARE TRANSACTION 'test_prepared_nodecode'; +-- should show nothing +SELECT data FROM pg_logical_slot_get_changes('regression_slot', NULL, NULL, 'two-phase-commit', '1', 'include-xids', '0', 'skip-empty-xacts', '1'); + data +------ +(0 rows) + +COMMIT PREPARED 'test_prepared_nodecode'; +-- should be decoded now +SELECT data FROM pg_logical_slot_get_changes('regression_slot', NULL, NULL, 'two-phase-commit', '1', 'include-xids', '0', 'skip-empty-xacts', '1'); + data +--------------------------------------------------------------------- + BEGIN + table public.test_prepared1: INSERT: id[integer]:20 data[text]:null + COMMIT +(3 rows) + +-- Test 8: +-- cleanup and make sure results are also empty +DROP TABLE test_prepared1; +DROP TABLE test_prepared2; +-- show results. There should be nothing to show +SELECT data FROM pg_logical_slot_get_changes('regression_slot', NULL, NULL, 'two-phase-commit', '1', 'include-xids', '0', 'skip-empty-xacts', '1'); + data +------ +(0 rows) + +SELECT pg_drop_replication_slot('regression_slot'); + pg_drop_replication_slot +-------------------------- + +(1 row) + diff --git a/contrib/test_decoding/expected/twophase_snapshot.out b/contrib/test_decoding/expected/twophase_snapshot.out new file mode 100644 index 0000000000000..14d93876462ed --- /dev/null +++ b/contrib/test_decoding/expected/twophase_snapshot.out @@ -0,0 +1,41 @@ +Parsed test spec with 3 sessions + +starting permutation: s2b s2txid s1init s3b s3txid s2c s2b s2insert s2p s3c s1insert s1start s2cp s1start +step s2b: BEGIN; +step s2txid: SELECT pg_current_xact_id() IS NULL; +?column? + +f +step s1init: SELECT 'init' FROM pg_create_logical_replication_slot('isolation_slot', 'test_decoding'); +step s3b: BEGIN; +step s3txid: SELECT pg_current_xact_id() IS NULL; +?column? + +f +step s2c: COMMIT; +step s2b: BEGIN; +step s2insert: INSERT INTO do_write DEFAULT VALUES; +step s2p: PREPARE TRANSACTION 'test1'; +step s3c: COMMIT; +step s1init: <... completed> +?column? + +init +step s1insert: INSERT INTO do_write DEFAULT VALUES; +step s1start: SELECT data FROM pg_logical_slot_get_changes('isolation_slot', NULL, NULL, 'include-xids', 'false', 'skip-empty-xacts', '1', 'two-phase-commit', '1'); +data + +BEGIN +table public.do_write: INSERT: id[integer]:2 +COMMIT +step s2cp: COMMIT PREPARED 'test1'; +step s1start: SELECT data FROM pg_logical_slot_get_changes('isolation_slot', NULL, NULL, 'include-xids', 'false', 'skip-empty-xacts', '1', 'two-phase-commit', '1'); +data + +BEGIN +table public.do_write: INSERT: id[integer]:1 +PREPARE TRANSACTION 'test1' +COMMIT PREPARED 'test1' +?column? + +stop diff --git a/contrib/test_decoding/expected/twophase_stream.out b/contrib/test_decoding/expected/twophase_stream.out new file mode 100644 index 0000000000000..3acc4acd3651d --- /dev/null +++ b/contrib/test_decoding/expected/twophase_stream.out @@ -0,0 +1,147 @@ +-- Test streaming of two-phase commits +SET synchronous_commit = on; +SELECT 'init' FROM pg_create_logical_replication_slot('regression_slot', 'test_decoding'); + ?column? +---------- + init +(1 row) + +CREATE TABLE stream_test(data text); +-- consume DDL +SELECT data FROM pg_logical_slot_get_changes('regression_slot', NULL, NULL, 'include-xids', '0', 'skip-empty-xacts', '1'); + data +------ +(0 rows) + +-- streaming test with sub-transaction and PREPARE/COMMIT PREPARED +BEGIN; +SAVEPOINT s1; +SELECT 'msg5' FROM pg_logical_emit_message(true, 'test', repeat('a', 50)); + ?column? +---------- + msg5 +(1 row) + +INSERT INTO stream_test SELECT repeat('a', 2000) || g.i FROM generate_series(1, 35) g(i); +TRUNCATE table stream_test; +ROLLBACK TO s1; +INSERT INTO stream_test SELECT repeat('a', 10) || g.i FROM generate_series(1, 20) g(i); +PREPARE TRANSACTION 'test1'; +-- should show the inserts after a ROLLBACK +SELECT data FROM pg_logical_slot_get_changes('regression_slot', NULL,NULL, 'two-phase-commit', '1', 'include-xids', '0', 'skip-empty-xacts', '1', 'stream-changes', '1'); + data +---------------------------------------------------------- + streaming message: transactional: 1 prefix: test, sz: 50 + opening a streamed block for transaction + streaming change for transaction + streaming change for transaction + streaming change for transaction + streaming change for transaction + streaming change for transaction + streaming change for transaction + streaming change for transaction + streaming change for transaction + streaming change for transaction + streaming change for transaction + streaming change for transaction + streaming change for transaction + streaming change for transaction + streaming change for transaction + streaming change for transaction + streaming change for transaction + streaming change for transaction + streaming change for transaction + streaming change for transaction + streaming change for transaction + closing a streamed block for transaction + preparing streamed transaction 'test1' +(24 rows) + +COMMIT PREPARED 'test1'; +--should show the COMMIT PREPARED and the other changes in the transaction +SELECT data FROM pg_logical_slot_get_changes('regression_slot', NULL,NULL, 'two-phase-commit', '1', 'include-xids', '0', 'skip-empty-xacts', '1', 'stream-changes', '1'); + data +------------------------------------------------------------- + BEGIN + table public.stream_test: INSERT: data[text]:'aaaaaaaaaa1' + table public.stream_test: INSERT: data[text]:'aaaaaaaaaa2' + table public.stream_test: INSERT: data[text]:'aaaaaaaaaa3' + table public.stream_test: INSERT: data[text]:'aaaaaaaaaa4' + table public.stream_test: INSERT: data[text]:'aaaaaaaaaa5' + table public.stream_test: INSERT: data[text]:'aaaaaaaaaa6' + table public.stream_test: INSERT: data[text]:'aaaaaaaaaa7' + table public.stream_test: INSERT: data[text]:'aaaaaaaaaa8' + table public.stream_test: INSERT: data[text]:'aaaaaaaaaa9' + table public.stream_test: INSERT: data[text]:'aaaaaaaaaa10' + table public.stream_test: INSERT: data[text]:'aaaaaaaaaa11' + table public.stream_test: INSERT: data[text]:'aaaaaaaaaa12' + table public.stream_test: INSERT: data[text]:'aaaaaaaaaa13' + table public.stream_test: INSERT: data[text]:'aaaaaaaaaa14' + table public.stream_test: INSERT: data[text]:'aaaaaaaaaa15' + table public.stream_test: INSERT: data[text]:'aaaaaaaaaa16' + table public.stream_test: INSERT: data[text]:'aaaaaaaaaa17' + table public.stream_test: INSERT: data[text]:'aaaaaaaaaa18' + table public.stream_test: INSERT: data[text]:'aaaaaaaaaa19' + table public.stream_test: INSERT: data[text]:'aaaaaaaaaa20' + PREPARE TRANSACTION 'test1' + COMMIT PREPARED 'test1' +(23 rows) + +-- streaming test with sub-transaction and PREPARE/COMMIT PREPARED but with +-- filtered gid. gids with '_nodecode' will not be decoded at prepare time. +BEGIN; +SAVEPOINT s1; +SELECT 'msg5' FROM pg_logical_emit_message(true, 'test', repeat('a', 50)); + ?column? +---------- + msg5 +(1 row) + +INSERT INTO stream_test SELECT repeat('a', 2000) || g.i FROM generate_series(1, 35) g(i); +TRUNCATE table stream_test; +ROLLBACK to s1; +INSERT INTO stream_test SELECT repeat('a', 10) || g.i FROM generate_series(1, 20) g(i); +PREPARE TRANSACTION 'test1_nodecode'; +-- should NOT show inserts after a ROLLBACK +SELECT data FROM pg_logical_slot_get_changes('regression_slot', NULL,NULL, 'two-phase-commit', '1', 'include-xids', '0', 'skip-empty-xacts', '1', 'stream-changes', '1'); + data +---------------------------------------------------------- + streaming message: transactional: 1 prefix: test, sz: 50 +(1 row) + +COMMIT PREPARED 'test1_nodecode'; +-- should show the inserts but not show a COMMIT PREPARED but a COMMIT +SELECT data FROM pg_logical_slot_get_changes('regression_slot', NULL,NULL, 'two-phase-commit', '1', 'include-xids', '0', 'skip-empty-xacts', '1', 'stream-changes', '1'); + data +------------------------------------------------------------- + BEGIN + table public.stream_test: INSERT: data[text]:'aaaaaaaaaa1' + table public.stream_test: INSERT: data[text]:'aaaaaaaaaa2' + table public.stream_test: INSERT: data[text]:'aaaaaaaaaa3' + table public.stream_test: INSERT: data[text]:'aaaaaaaaaa4' + table public.stream_test: INSERT: data[text]:'aaaaaaaaaa5' + table public.stream_test: INSERT: data[text]:'aaaaaaaaaa6' + table public.stream_test: INSERT: data[text]:'aaaaaaaaaa7' + table public.stream_test: INSERT: data[text]:'aaaaaaaaaa8' + table public.stream_test: INSERT: data[text]:'aaaaaaaaaa9' + table public.stream_test: INSERT: data[text]:'aaaaaaaaaa10' + table public.stream_test: INSERT: data[text]:'aaaaaaaaaa11' + table public.stream_test: INSERT: data[text]:'aaaaaaaaaa12' + table public.stream_test: INSERT: data[text]:'aaaaaaaaaa13' + table public.stream_test: INSERT: data[text]:'aaaaaaaaaa14' + table public.stream_test: INSERT: data[text]:'aaaaaaaaaa15' + table public.stream_test: INSERT: data[text]:'aaaaaaaaaa16' + table public.stream_test: INSERT: data[text]:'aaaaaaaaaa17' + table public.stream_test: INSERT: data[text]:'aaaaaaaaaa18' + table public.stream_test: INSERT: data[text]:'aaaaaaaaaa19' + table public.stream_test: INSERT: data[text]:'aaaaaaaaaa20' + COMMIT +(22 rows) + +DROP TABLE stream_test; +SELECT pg_drop_replication_slot('regression_slot'); + pg_drop_replication_slot +-------------------------- + +(1 row) + diff --git a/contrib/test_decoding/specs/twophase_snapshot.spec b/contrib/test_decoding/specs/twophase_snapshot.spec new file mode 100644 index 0000000000000..3e700404e0e5f --- /dev/null +++ b/contrib/test_decoding/specs/twophase_snapshot.spec @@ -0,0 +1,53 @@ +# Test decoding of two-phase transactions during the build of a consistent snapshot. +setup +{ + DROP TABLE IF EXISTS do_write; + CREATE TABLE do_write(id serial primary key); +} + +teardown +{ + DROP TABLE do_write; + SELECT 'stop' FROM pg_drop_replication_slot('isolation_slot'); +} + + +session "s1" +setup { SET synchronous_commit=on; } + +step "s1init" {SELECT 'init' FROM pg_create_logical_replication_slot('isolation_slot', 'test_decoding');} +step "s1start" {SELECT data FROM pg_logical_slot_get_changes('isolation_slot', NULL, NULL, 'include-xids', 'false', 'skip-empty-xacts', '1', 'two-phase-commit', '1');} +step "s1insert" { INSERT INTO do_write DEFAULT VALUES; } + +session "s2" +setup { SET synchronous_commit=on; } + +step "s2b" { BEGIN; } +step "s2txid" { SELECT pg_current_xact_id() IS NULL; } +step "s2c" { COMMIT; } +step "s2insert" { INSERT INTO do_write DEFAULT VALUES; } +step "s2p" { PREPARE TRANSACTION 'test1'; } +step "s2cp" { COMMIT PREPARED 'test1'; } + + +session "s3" +setup { SET synchronous_commit=on; } + +step "s3b" { BEGIN; } +step "s3txid" { SELECT pg_current_xact_id() IS NULL; } +step "s3c" { COMMIT; } + +# Force building of a consistent snapshot between a PREPARE and COMMIT PREPARED +# and ensure that the whole transaction is decoded at the time of COMMIT +# PREPARED. +# +# 's1init' step will initialize the replication slot and cause logical decoding +# to wait in initial starting point till the in-progress transaction in s2 is +# committed. 's2c' step will cause logical decoding to go to initial consistent +# point and wait for in-progress transaction s3 to commit. 's3c' step will cause +# logical decoding to find a consistent point while the transaction s2 is +# prepared and not yet committed. This will cause the first s1start to skip +# prepared transaction s2 as that will be before consistent point. The second +# s1start will allow decoding of skipped prepare along with commit prepared done +# as part of s2cp. +permutation "s2b" "s2txid" "s1init" "s3b" "s3txid" "s2c" "s2b" "s2insert" "s2p" "s3c" "s1insert" "s1start" "s2cp" "s1start" diff --git a/contrib/test_decoding/sql/twophase.sql b/contrib/test_decoding/sql/twophase.sql new file mode 100644 index 0000000000000..894e4f5baf15e --- /dev/null +++ b/contrib/test_decoding/sql/twophase.sql @@ -0,0 +1,112 @@ +-- Test prepared transactions. When two-phase-commit is enabled, transactions are +-- decoded at PREPARE time rather than at COMMIT PREPARED time. +SET synchronous_commit = on; +SELECT 'init' FROM pg_create_logical_replication_slot('regression_slot', 'test_decoding'); + +CREATE TABLE test_prepared1(id integer primary key); +CREATE TABLE test_prepared2(id integer primary key); + +-- Test that decoding happens at PREPARE time when two-phase-commit is enabled. +-- Decoding after COMMIT PREPARED must have all the commands in the transaction. +BEGIN; +INSERT INTO test_prepared1 VALUES (1); +INSERT INTO test_prepared1 VALUES (2); +-- should show nothing because the xact has not been prepared yet. +SELECT data FROM pg_logical_slot_get_changes('regression_slot', NULL, NULL, 'two-phase-commit', '1', 'include-xids', '0', 'skip-empty-xacts', '1'); +PREPARE TRANSACTION 'test_prepared#1'; +-- should show both the above inserts and the PREPARE TRANSACTION. +SELECT data FROM pg_logical_slot_get_changes('regression_slot', NULL, NULL, 'two-phase-commit', '1', 'include-xids', '0', 'skip-empty-xacts', '1'); +COMMIT PREPARED 'test_prepared#1'; +SELECT data FROM pg_logical_slot_get_changes('regression_slot', NULL, NULL, 'two-phase-commit', '1', 'include-xids', '0', 'skip-empty-xacts', '1'); + +-- Test that rollback of a prepared xact is decoded. +BEGIN; +INSERT INTO test_prepared1 VALUES (3); +PREPARE TRANSACTION 'test_prepared#2'; +SELECT data FROM pg_logical_slot_get_changes('regression_slot', NULL, NULL, 'two-phase-commit', '1', 'include-xids', '0', 'skip-empty-xacts', '1'); +ROLLBACK PREPARED 'test_prepared#2'; +SELECT data FROM pg_logical_slot_get_changes('regression_slot', NULL, NULL, 'two-phase-commit', '1', 'include-xids', '0', 'skip-empty-xacts', '1'); + +-- Test prepare of a xact containing ddl. Leaving xact uncommitted for next test. +BEGIN; +ALTER TABLE test_prepared1 ADD COLUMN data text; +INSERT INTO test_prepared1 VALUES (4, 'frakbar'); +PREPARE TRANSACTION 'test_prepared#3'; +-- confirm that exclusive lock from the ALTER command is held on test_prepared1 table +SELECT 'test_prepared_1' AS relation, locktype, mode +FROM pg_locks +WHERE locktype = 'relation' + AND relation = 'test_prepared1'::regclass; +-- The insert should show the newly altered column but not the DDL. +SELECT data FROM pg_logical_slot_get_changes('regression_slot', NULL, NULL, 'two-phase-commit', '1', 'include-xids', '0', 'skip-empty-xacts', '1'); + +-- Test that we decode correctly while an uncommitted prepared xact +-- with ddl exists. +-- +-- Use a separate table for the concurrent transaction because the lock from +-- the ALTER will stop us inserting into the other one. +-- +INSERT INTO test_prepared2 VALUES (5); +SELECT data FROM pg_logical_slot_get_changes('regression_slot', NULL, NULL, 'two-phase-commit', '1', 'include-xids', '0', 'skip-empty-xacts', '1'); + +COMMIT PREPARED 'test_prepared#3'; +SELECT data FROM pg_logical_slot_get_changes('regression_slot', NULL, NULL, 'two-phase-commit', '1', 'include-xids', '0', 'skip-empty-xacts', '1'); +-- make sure stuff still works +INSERT INTO test_prepared1 VALUES (6); +INSERT INTO test_prepared2 VALUES (7); +SELECT data FROM pg_logical_slot_get_changes('regression_slot', NULL, NULL, 'two-phase-commit', '1', 'include-xids', '0', 'skip-empty-xacts', '1'); + +-- Check 'CLUSTER' (as operation that hold exclusive lock) doesn't block +-- logical decoding. +BEGIN; +INSERT INTO test_prepared1 VALUES (8, 'othercol'); +CLUSTER test_prepared1 USING test_prepared1_pkey; +INSERT INTO test_prepared1 VALUES (9, 'othercol2'); +PREPARE TRANSACTION 'test_prepared_lock'; + +SELECT 'test_prepared1' AS relation, locktype, mode +FROM pg_locks +WHERE locktype = 'relation' + AND relation = 'test_prepared1'::regclass; +-- The above CLUSTER command shouldn't cause a timeout on 2pc decoding. The +-- call should return within a second. +SET statement_timeout = '1s'; +SELECT data FROM pg_logical_slot_get_changes('regression_slot', NULL, NULL, 'two-phase-commit', '1', 'include-xids', '0', 'skip-empty-xacts', '1'); +RESET statement_timeout; +COMMIT PREPARED 'test_prepared_lock'; +-- consume the commit +SELECT data FROM pg_logical_slot_get_changes('regression_slot', NULL, NULL, 'two-phase-commit', '1', 'include-xids', '0', 'skip-empty-xacts', '1'); + +-- Test savepoints and sub-xacts. Creating savepoints will create +-- sub-xacts implicitly. +BEGIN; +CREATE TABLE test_prepared_savepoint (a int); +INSERT INTO test_prepared_savepoint VALUES (1); +SAVEPOINT test_savepoint; +INSERT INTO test_prepared_savepoint VALUES (2); +ROLLBACK TO SAVEPOINT test_savepoint; +PREPARE TRANSACTION 'test_prepared_savepoint'; +-- should show only 1, not 2 +SELECT data FROM pg_logical_slot_get_changes('regression_slot', NULL, NULL, 'two-phase-commit', '1', 'include-xids', '0', 'skip-empty-xacts', '1'); +COMMIT PREPARED 'test_prepared_savepoint'; +-- consume the commit +SELECT data FROM pg_logical_slot_get_changes('regression_slot', NULL, NULL, 'two-phase-commit', '1', 'include-xids', '0', 'skip-empty-xacts', '1'); + +-- Test that a GID containing "_nodecode" gets decoded at commit prepared time. +BEGIN; +INSERT INTO test_prepared1 VALUES (20); +PREPARE TRANSACTION 'test_prepared_nodecode'; +-- should show nothing +SELECT data FROM pg_logical_slot_get_changes('regression_slot', NULL, NULL, 'two-phase-commit', '1', 'include-xids', '0', 'skip-empty-xacts', '1'); +COMMIT PREPARED 'test_prepared_nodecode'; +-- should be decoded now +SELECT data FROM pg_logical_slot_get_changes('regression_slot', NULL, NULL, 'two-phase-commit', '1', 'include-xids', '0', 'skip-empty-xacts', '1'); + +-- Test 8: +-- cleanup and make sure results are also empty +DROP TABLE test_prepared1; +DROP TABLE test_prepared2; +-- show results. There should be nothing to show +SELECT data FROM pg_logical_slot_get_changes('regression_slot', NULL, NULL, 'two-phase-commit', '1', 'include-xids', '0', 'skip-empty-xacts', '1'); + +SELECT pg_drop_replication_slot('regression_slot'); diff --git a/contrib/test_decoding/sql/twophase_stream.sql b/contrib/test_decoding/sql/twophase_stream.sql new file mode 100644 index 0000000000000..e9dd44fdb37bb --- /dev/null +++ b/contrib/test_decoding/sql/twophase_stream.sql @@ -0,0 +1,45 @@ +-- Test streaming of two-phase commits + +SET synchronous_commit = on; +SELECT 'init' FROM pg_create_logical_replication_slot('regression_slot', 'test_decoding'); + +CREATE TABLE stream_test(data text); + +-- consume DDL +SELECT data FROM pg_logical_slot_get_changes('regression_slot', NULL, NULL, 'include-xids', '0', 'skip-empty-xacts', '1'); + +-- streaming test with sub-transaction and PREPARE/COMMIT PREPARED +BEGIN; +SAVEPOINT s1; +SELECT 'msg5' FROM pg_logical_emit_message(true, 'test', repeat('a', 50)); +INSERT INTO stream_test SELECT repeat('a', 2000) || g.i FROM generate_series(1, 35) g(i); +TRUNCATE table stream_test; +ROLLBACK TO s1; +INSERT INTO stream_test SELECT repeat('a', 10) || g.i FROM generate_series(1, 20) g(i); +PREPARE TRANSACTION 'test1'; +-- should show the inserts after a ROLLBACK +SELECT data FROM pg_logical_slot_get_changes('regression_slot', NULL,NULL, 'two-phase-commit', '1', 'include-xids', '0', 'skip-empty-xacts', '1', 'stream-changes', '1'); + +COMMIT PREPARED 'test1'; +--should show the COMMIT PREPARED and the other changes in the transaction +SELECT data FROM pg_logical_slot_get_changes('regression_slot', NULL,NULL, 'two-phase-commit', '1', 'include-xids', '0', 'skip-empty-xacts', '1', 'stream-changes', '1'); + +-- streaming test with sub-transaction and PREPARE/COMMIT PREPARED but with +-- filtered gid. gids with '_nodecode' will not be decoded at prepare time. +BEGIN; +SAVEPOINT s1; +SELECT 'msg5' FROM pg_logical_emit_message(true, 'test', repeat('a', 50)); +INSERT INTO stream_test SELECT repeat('a', 2000) || g.i FROM generate_series(1, 35) g(i); +TRUNCATE table stream_test; +ROLLBACK to s1; +INSERT INTO stream_test SELECT repeat('a', 10) || g.i FROM generate_series(1, 20) g(i); +PREPARE TRANSACTION 'test1_nodecode'; +-- should NOT show inserts after a ROLLBACK +SELECT data FROM pg_logical_slot_get_changes('regression_slot', NULL,NULL, 'two-phase-commit', '1', 'include-xids', '0', 'skip-empty-xacts', '1', 'stream-changes', '1'); + +COMMIT PREPARED 'test1_nodecode'; +-- should show the inserts but not show a COMMIT PREPARED but a COMMIT +SELECT data FROM pg_logical_slot_get_changes('regression_slot', NULL,NULL, 'two-phase-commit', '1', 'include-xids', '0', 'skip-empty-xacts', '1', 'stream-changes', '1'); + +DROP TABLE stream_test; +SELECT pg_drop_replication_slot('regression_slot'); diff --git a/contrib/test_decoding/test_decoding.c b/contrib/test_decoding/test_decoding.c index e12278beb5817..929255eac7466 100644 --- a/contrib/test_decoding/test_decoding.c +++ b/contrib/test_decoding/test_decoding.c @@ -3,7 +3,7 @@ * test_decoding.c * example logical decoding output plugin * - * Copyright (c) 2012-2020, PostgreSQL Global Development Group + * Copyright (c) 2012-2021, PostgreSQL Global Development Group * * IDENTIFICATION * contrib/test_decoding/test_decoding.c @@ -76,6 +76,20 @@ static void pg_decode_message(LogicalDecodingContext *ctx, ReorderBufferTXN *txn, XLogRecPtr message_lsn, bool transactional, const char *prefix, Size sz, const char *message); +static bool pg_decode_filter_prepare(LogicalDecodingContext *ctx, + const char *gid); +static void pg_decode_begin_prepare_txn(LogicalDecodingContext *ctx, + ReorderBufferTXN *txn); +static void pg_decode_prepare_txn(LogicalDecodingContext *ctx, + ReorderBufferTXN *txn, + XLogRecPtr prepare_lsn); +static void pg_decode_commit_prepared_txn(LogicalDecodingContext *ctx, + ReorderBufferTXN *txn, + XLogRecPtr commit_lsn); +static void pg_decode_rollback_prepared_txn(LogicalDecodingContext *ctx, + ReorderBufferTXN *txn, + XLogRecPtr prepare_end_lsn, + TimestampTz prepare_time); static void pg_decode_stream_start(LogicalDecodingContext *ctx, ReorderBufferTXN *txn); static void pg_output_stream_start(LogicalDecodingContext *ctx, @@ -87,6 +101,9 @@ static void pg_decode_stream_stop(LogicalDecodingContext *ctx, static void pg_decode_stream_abort(LogicalDecodingContext *ctx, ReorderBufferTXN *txn, XLogRecPtr abort_lsn); +static void pg_decode_stream_prepare(LogicalDecodingContext *ctx, + ReorderBufferTXN *txn, + XLogRecPtr prepare_lsn); static void pg_decode_stream_commit(LogicalDecodingContext *ctx, ReorderBufferTXN *txn, XLogRecPtr commit_lsn); @@ -123,9 +140,15 @@ _PG_output_plugin_init(OutputPluginCallbacks *cb) cb->filter_by_origin_cb = pg_decode_filter; cb->shutdown_cb = pg_decode_shutdown; cb->message_cb = pg_decode_message; + cb->filter_prepare_cb = pg_decode_filter_prepare; + cb->begin_prepare_cb = pg_decode_begin_prepare_txn; + cb->prepare_cb = pg_decode_prepare_txn; + cb->commit_prepared_cb = pg_decode_commit_prepared_txn; + cb->rollback_prepared_cb = pg_decode_rollback_prepared_txn; cb->stream_start_cb = pg_decode_stream_start; cb->stream_stop_cb = pg_decode_stream_stop; cb->stream_abort_cb = pg_decode_stream_abort; + cb->stream_prepare_cb = pg_decode_stream_prepare; cb->stream_commit_cb = pg_decode_stream_commit; cb->stream_change_cb = pg_decode_stream_change; cb->stream_message_cb = pg_decode_stream_message; @@ -141,6 +164,7 @@ pg_decode_startup(LogicalDecodingContext *ctx, OutputPluginOptions *opt, ListCell *option; TestDecodingData *data; bool enable_streaming = false; + bool enable_twophase = false; data = palloc0(sizeof(TestDecodingData)); data->context = AllocSetContextCreate(ctx->context, @@ -241,6 +265,16 @@ pg_decode_startup(LogicalDecodingContext *ctx, OutputPluginOptions *opt, errmsg("could not parse value \"%s\" for parameter \"%s\"", strVal(elem->arg), elem->defname))); } + else if (strcmp(elem->defname, "two-phase-commit") == 0) + { + if (elem->arg == NULL) + continue; + else if (!parse_bool(strVal(elem->arg), &enable_twophase)) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("could not parse value \"%s\" for parameter \"%s\"", + strVal(elem->arg), elem->defname))); + } else { ereport(ERROR, @@ -252,6 +286,7 @@ pg_decode_startup(LogicalDecodingContext *ctx, OutputPluginOptions *opt, } ctx->streaming &= enable_streaming; + ctx->twophase &= enable_twophase; } /* cleanup this plugin's resources */ @@ -320,6 +355,111 @@ pg_decode_commit_txn(LogicalDecodingContext *ctx, ReorderBufferTXN *txn, OutputPluginWrite(ctx, true); } +/* BEGIN PREPARE callback */ +static void +pg_decode_begin_prepare_txn(LogicalDecodingContext *ctx, ReorderBufferTXN *txn) +{ + TestDecodingData *data = ctx->output_plugin_private; + TestDecodingTxnData *txndata = + MemoryContextAllocZero(ctx->context, sizeof(TestDecodingTxnData)); + + txndata->xact_wrote_changes = false; + txn->output_plugin_private = txndata; + + if (data->skip_empty_xacts) + return; + + pg_output_begin(ctx, data, txn, true); +} + +/* PREPARE callback */ +static void +pg_decode_prepare_txn(LogicalDecodingContext *ctx, ReorderBufferTXN *txn, + XLogRecPtr prepare_lsn) +{ + TestDecodingData *data = ctx->output_plugin_private; + TestDecodingTxnData *txndata = txn->output_plugin_private; + + if (data->skip_empty_xacts && !txndata->xact_wrote_changes) + return; + + OutputPluginPrepareWrite(ctx, true); + + appendStringInfo(ctx->out, "PREPARE TRANSACTION %s", + quote_literal_cstr(txn->gid)); + + if (data->include_xids) + appendStringInfo(ctx->out, ", txid %u", txn->xid); + + if (data->include_timestamp) + appendStringInfo(ctx->out, " (at %s)", + timestamptz_to_str(txn->commit_time)); + + OutputPluginWrite(ctx, true); +} + +/* COMMIT PREPARED callback */ +static void +pg_decode_commit_prepared_txn(LogicalDecodingContext *ctx, ReorderBufferTXN *txn, + XLogRecPtr commit_lsn) +{ + TestDecodingData *data = ctx->output_plugin_private; + + OutputPluginPrepareWrite(ctx, true); + + appendStringInfo(ctx->out, "COMMIT PREPARED %s", + quote_literal_cstr(txn->gid)); + + if (data->include_xids) + appendStringInfo(ctx->out, ", txid %u", txn->xid); + + if (data->include_timestamp) + appendStringInfo(ctx->out, " (at %s)", + timestamptz_to_str(txn->commit_time)); + + OutputPluginWrite(ctx, true); +} + +/* ROLLBACK PREPARED callback */ +static void +pg_decode_rollback_prepared_txn(LogicalDecodingContext *ctx, + ReorderBufferTXN *txn, + XLogRecPtr prepare_end_lsn, + TimestampTz prepare_time) +{ + TestDecodingData *data = ctx->output_plugin_private; + + OutputPluginPrepareWrite(ctx, true); + + appendStringInfo(ctx->out, "ROLLBACK PREPARED %s", + quote_literal_cstr(txn->gid)); + + if (data->include_xids) + appendStringInfo(ctx->out, ", txid %u", txn->xid); + + if (data->include_timestamp) + appendStringInfo(ctx->out, " (at %s)", + timestamptz_to_str(txn->commit_time)); + + OutputPluginWrite(ctx, true); +} + +/* + * Filter out two-phase transactions. + * + * Each plugin can implement its own filtering logic. Here we demonstrate a + * simple logic by checking the GID. If the GID contains the "_nodecode" + * substring, then we filter it out. + */ +static bool +pg_decode_filter_prepare(LogicalDecodingContext *ctx, const char *gid) +{ + if (strstr(gid, "_nodecode") != NULL) + return true; + + return false; +} + static bool pg_decode_filter(LogicalDecodingContext *ctx, RepOriginId origin_id) @@ -701,6 +841,33 @@ pg_decode_stream_abort(LogicalDecodingContext *ctx, OutputPluginWrite(ctx, true); } +static void +pg_decode_stream_prepare(LogicalDecodingContext *ctx, + ReorderBufferTXN *txn, + XLogRecPtr prepare_lsn) +{ + TestDecodingData *data = ctx->output_plugin_private; + TestDecodingTxnData *txndata = txn->output_plugin_private; + + if (data->skip_empty_xacts && !txndata->xact_wrote_changes) + return; + + OutputPluginPrepareWrite(ctx, true); + + if (data->include_xids) + appendStringInfo(ctx->out, "preparing streamed transaction TXN %s, txid %u", + quote_literal_cstr(txn->gid), txn->xid); + else + appendStringInfo(ctx->out, "preparing streamed transaction %s", + quote_literal_cstr(txn->gid)); + + if (data->include_timestamp) + appendStringInfo(ctx->out, " (at %s)", + timestamptz_to_str(txn->commit_time)); + + OutputPluginWrite(ctx, true); +} + static void pg_decode_stream_commit(LogicalDecodingContext *ctx, ReorderBufferTXN *txn, diff --git a/contrib/tsm_system_rows/tsm_system_rows.c b/contrib/tsm_system_rows/tsm_system_rows.c index 8bf0b2078ccac..4996612902492 100644 --- a/contrib/tsm_system_rows/tsm_system_rows.c +++ b/contrib/tsm_system_rows/tsm_system_rows.c @@ -17,7 +17,7 @@ * won't visit blocks added after the first scan, but that is fine since * such blocks shouldn't contain any visible tuples anyway. * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION diff --git a/contrib/tsm_system_time/tsm_system_time.c b/contrib/tsm_system_time/tsm_system_time.c index 2fda572e6522d..788d8f9a68d3d 100644 --- a/contrib/tsm_system_time/tsm_system_time.c +++ b/contrib/tsm_system_time/tsm_system_time.c @@ -13,7 +13,7 @@ * However, we do what we can to reduce surprising behavior by selecting * the sampling pattern just once per query, much as in tsm_system_rows. * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION diff --git a/contrib/unaccent/unaccent.c b/contrib/unaccent/unaccent.c index 0047efc075f30..2b3819fb2e827 100644 --- a/contrib/unaccent/unaccent.c +++ b/contrib/unaccent/unaccent.c @@ -3,7 +3,7 @@ * unaccent.c * Text search unaccent dictionary * - * Copyright (c) 2009-2020, PostgreSQL Global Development Group + * Copyright (c) 2009-2021, PostgreSQL Global Development Group * * IDENTIFICATION * contrib/unaccent/unaccent.c diff --git a/contrib/uuid-ossp/.gitignore b/contrib/uuid-ossp/.gitignore index d7260edc610ab..5dcb3ff972350 100644 --- a/contrib/uuid-ossp/.gitignore +++ b/contrib/uuid-ossp/.gitignore @@ -1,4 +1,3 @@ -/sha1.c # Generated subdirectories /log/ /results/ diff --git a/contrib/uuid-ossp/Makefile b/contrib/uuid-ossp/Makefile index 0859a5397c97d..c42edf5f1a0c1 100644 --- a/contrib/uuid-ossp/Makefile +++ b/contrib/uuid-ossp/Makefile @@ -2,7 +2,6 @@ MODULE_big = uuid-ossp OBJS = \ - $(UUID_EXTRA_OBJS) \ $(WIN32RES) \ uuid-ossp.o @@ -19,8 +18,6 @@ pgcrypto_src = $(top_srcdir)/contrib/pgcrypto PG_CPPFLAGS = -I$(pgcrypto_src) -EXTRA_CLEAN = sha1.c - ifdef USE_PGXS PG_CONFIG = pg_config PGXS := $(shell $(PG_CONFIG) --pgxs) @@ -31,6 +28,3 @@ top_builddir = ../.. include $(top_builddir)/src/Makefile.global include $(top_srcdir)/contrib/contrib-global.mk endif - -sha1.c: % : $(pgcrypto_src)/% - rm -f $@ && $(LN_S) $< . diff --git a/contrib/uuid-ossp/uuid-ossp.c b/contrib/uuid-ossp/uuid-ossp.c index 2ff7d9448bcad..49a4a5926455c 100644 --- a/contrib/uuid-ossp/uuid-ossp.c +++ b/contrib/uuid-ossp/uuid-ossp.c @@ -2,7 +2,7 @@ * * UUID generation functions using the BSD, E2FS or OSSP UUID library * - * Copyright (c) 2007-2020, PostgreSQL Global Development Group + * Copyright (c) 2007-2021, PostgreSQL Global Development Group * * Portions Copyright (c) 2009 Andrew Gierth * @@ -15,6 +15,7 @@ #include "fmgr.h" #include "common/cryptohash.h" +#include "common/sha1.h" #include "port/pg_bswap.h" #include "utils/builtins.h" #include "utils/uuid.h" @@ -40,15 +41,6 @@ #undef uuid_hash -/* - * Some BSD variants offer sha1 implementation but Linux does not, so we use - * a copy from pgcrypto. Not needed with OSSP, though. - */ -#ifndef HAVE_UUID_OSSP -#include "sha1.h" -#endif - - /* Check our UUID length against OSSP's; better both be 16 */ #if defined(HAVE_UUID_OSSP) && (UUID_LEN != UUID_LEN_BIN) #error UUID length mismatch @@ -338,13 +330,18 @@ uuid_generate_internal(int v, unsigned char *ns, const char *ptr, int len) } else { - SHA1_CTX ctx; - unsigned char sha1result[SHA1_RESULTLEN]; + pg_cryptohash_ctx *ctx = pg_cryptohash_create(PG_SHA1); + unsigned char sha1result[SHA1_DIGEST_LENGTH]; + + if (pg_cryptohash_init(ctx) < 0) + elog(ERROR, "could not initialize %s context", "SHA1"); + if (pg_cryptohash_update(ctx, ns, sizeof(uu)) < 0 || + pg_cryptohash_update(ctx, (unsigned char *) ptr, len) < 0) + elog(ERROR, "could not update %s context", "SHA1"); + if (pg_cryptohash_final(ctx, sha1result) < 0) + elog(ERROR, "could not finalize %s context", "SHA1"); + pg_cryptohash_free(ctx); - SHA1Init(&ctx); - SHA1Update(&ctx, ns, sizeof(uu)); - SHA1Update(&ctx, (unsigned char *) ptr, len); - SHA1Final(sha1result, &ctx); memcpy(&uu, sha1result, sizeof(uu)); } diff --git a/contrib/vacuumlo/vacuumlo.c b/contrib/vacuumlo/vacuumlo.c index 532cc596c4128..dcb95c4320470 100644 --- a/contrib/vacuumlo/vacuumlo.c +++ b/contrib/vacuumlo/vacuumlo.c @@ -3,7 +3,7 @@ * vacuumlo.c * This removes orphaned large objects from a database. * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * @@ -124,8 +124,7 @@ vacuumlo(const char *database, const struct _param *param) /* check to see that the backend connection was successfully made */ if (PQstatus(conn) == CONNECTION_BAD) { - pg_log_error("connection to database \"%s\" failed: %s", - database, PQerrorMessage(conn)); + pg_log_error("%s", PQerrorMessage(conn)); PQfinish(conn); return -1; } diff --git a/doc/src/sgml/amcheck.sgml b/doc/src/sgml/amcheck.sgml index 8dfb01a77be09..a2571d33ae67d 100644 --- a/doc/src/sgml/amcheck.sgml +++ b/doc/src/sgml/amcheck.sgml @@ -393,7 +393,7 @@ SET client_min_messages = DEBUG1; amcheck can be effective at detecting various types of failure modes that data page + linkend="app-initdb-data-checksums">data checksums will fail to catch. These include: @@ -497,7 +497,7 @@ SET client_min_messages = DEBUG1; Structural corruption can happen due to faulty storage hardware, or relation files being overwritten or modified by unrelated software. This kind of corruption can also be detected with - data page + data page checksums. diff --git a/doc/src/sgml/arch-dev.sgml b/doc/src/sgml/arch-dev.sgml index 7883c3cd827ce..e56a13283fa9e 100644 --- a/doc/src/sgml/arch-dev.sgml +++ b/doc/src/sgml/arch-dev.sgml @@ -7,7 +7,7 @@ Author This chapter originated as part of - , Stefan Simkovics' + Stefan Simkovics' Master's Thesis prepared at Vienna University of Technology under the direction of O.Univ.Prof.Dr. Georg Gottlob and Univ.Ass. Mag. Katrin Seyr. @@ -17,10 +17,7 @@ This chapter gives an overview of the internal structure of the backend of PostgreSQL. After having read the following sections you should have an idea of how a query - is processed. This chapter does not aim to provide a detailed - description of the internal operation of - PostgreSQL, as such a document would be - very extensive. Rather, this chapter is intended to help the reader + is processed. This chapter is intended to help the reader understand the general sequence of operations that occur within the backend from the point at which a query is received, to the point at which the results are returned to the client. @@ -30,8 +27,8 @@ The Path of a Query - Here we give a short overview of the stages a query has to pass in - order to obtain a result. + Here we give a short overview of the stages a query has to pass + to obtain a result. @@ -125,10 +122,10 @@ use a supervisor process (also master process) that spawns a new server process every time a connection is requested. This supervisor - process is called postgres and listens at a + process is called postmaster and listens at a specified TCP/IP port for incoming connections. Whenever a request - for a connection is detected the postgres - process spawns a new server process. The server tasks + for a connection is detected the postmaster + process spawns a new server process. The server processes communicate with each other using semaphores and shared memory to ensure data integrity throughout concurrent data access. @@ -230,7 +227,7 @@ A detailed description of bison or the grammar rules given in gram.y would be - beyond the scope of this paper. There are many books and + beyond the scope of this manual. There are many books and documents dealing with flex and bison. You should be familiar with bison before you start to study the @@ -343,8 +340,8 @@ In some situations, examining each possible way in which a query - can be executed would take an excessive amount of time and memory - space. In particular, this occurs when executing queries + can be executed would take an excessive amount of time and memory. + In particular, this occurs when executing queries involving large numbers of join operations. In order to determine a reasonable (not necessarily optimal) query plan in a reasonable amount of time, PostgreSQL uses a Genetic @@ -411,7 +408,7 @@ merge join: Each relation is sorted on the join attributes before the join starts. Then the two relations are scanned in parallel, and matching rows are combined to form - join rows. This kind of join is more + join rows. This kind of join is attractive because each relation has to be scanned only once. The required sorting might be achieved either by an explicit sort step, or by scanning the relation in the proper order using an @@ -442,7 +439,7 @@ If the query uses fewer than relations, a near-exhaustive search is conducted to find the best join sequence. The planner preferentially considers joins between any - two relations for which there exist a corresponding join clause in the + two relations for which there exists a corresponding join clause in the WHERE qualification (i.e., for which a restriction like where rel1.attr1=rel2.attr2 exists). Join pairs with no join clause are considered only when there @@ -529,26 +526,36 @@ - The executor mechanism is used to evaluate all four basic SQL query types: - SELECT, INSERT, UPDATE, and - DELETE. For SELECT, the top-level executor - code only needs to send each row returned by the query plan tree off - to the client. For INSERT, each returned row is inserted - into the target table specified for the INSERT. This is - done in a special top-level plan node called ModifyTable. - (A simple - INSERT ... VALUES command creates a trivial plan tree - consisting of a single Result node, which computes just one - result row, and ModifyTable above it to perform the insertion. - But INSERT ... SELECT can demand the full power - of the executor mechanism.) For UPDATE, the planner arranges - that each computed row includes all the updated column values, plus - the TID (tuple ID, or row ID) of the original target row; - this data is fed into a ModifyTable node, which uses the - information to create a new updated row and mark the old row deleted. - For DELETE, the only column that is actually returned by the - plan is the TID, and the ModifyTable node simply uses the TID - to visit each target row and mark it deleted. + The executor mechanism is used to evaluate all four basic SQL query + types: SELECT, INSERT, + UPDATE, and DELETE. + For SELECT, the top-level executor code + only needs to send each row returned by the query plan tree + off to the client. INSERT ... SELECT, + UPDATE, and DELETE + are effectively SELECTs under a special + top-level plan node called ModifyTable. + + + + INSERT ... SELECT feeds the rows up + to ModifyTable for insertion. For + UPDATE, the planner arranges that each + computed row includes all the updated column values, plus the + TID (tuple ID, or row ID) of the original + target row; this data is fed up to the ModifyTable + node, which uses the information to create a new updated row and + mark the old row deleted. For DELETE, the only + column that is actually returned by the plan is the TID, and the + ModifyTable node simply uses the TID to visit each + target row and mark it deleted. + + + + A simple INSERT ... VALUES command creates a + trivial plan tree consisting of a single Result + node, which computes just one result row, feeding that up + toModifyTable to perform the insertion. diff --git a/doc/src/sgml/backup.sgml b/doc/src/sgml/backup.sgml index 42a8ed328d886..3c8aaed0b6203 100644 --- a/doc/src/sgml/backup.sgml +++ b/doc/src/sgml/backup.sgml @@ -1437,12 +1437,13 @@ restore_command = 'cp /mnt/server/archivedir/%f %p' - The default behavior of recovery is to recover along the same timeline - that was current when the base backup was taken. If you wish to recover - into some child timeline (that is, you want to return to some state that - was itself generated after a recovery attempt), you need to specify the - target timeline ID in . You cannot recover into - timelines that branched off earlier than the base backup. + The default behavior of recovery is to recover to the latest timeline found + in the archive. If you wish to recover to the timeline that was current + when the base backup was taken or into a specific child timeline (that + is, you want to return to some state that was itself generated after a + recovery attempt), you need to specify current or the + target timeline ID in . You + cannot recover into timelines that branched off earlier than the base backup. diff --git a/doc/src/sgml/bki.sgml b/doc/src/sgml/bki.sgml index 036a72c81e9c8..db1b3d5e9a028 100644 --- a/doc/src/sgml/bki.sgml +++ b/doc/src/sgml/bki.sgml @@ -474,10 +474,15 @@ - In such a column, all entries must use the symbolic format except - when writing 0 for InvalidOid. (If the column is + In some catalog columns, it's allowed for entries to be zero instead + of a valid reference. If this is allowed, write + BKI_LOOKUP_OPT instead + of BKI_LOOKUP. Then you can + write 0 for an entry. (If the column is declared regproc, you can optionally write - instead of 0.) + Except for this special case, all entries in + a BKI_LOOKUP column must be symbolic references. genbki.pl will warn about unrecognized names. @@ -535,17 +540,6 @@ expected to be in the pg_catalog schema. - - - - In addition to the generic lookup mechanisms, there is a special - convention that PGNSP is replaced by the OID of - the pg_catalog schema, - and PGUID is replaced by the OID of the bootstrap - superuser role. These usages are somewhat historical but so far - there hasn't been a need to generalize them. - - @@ -554,6 +548,22 @@ therefore no need for the bootstrap backend to deal with symbolic references. + + + It's desirable to mark OID reference columns + with BKI_LOOKUP or BKI_LOOKUP_OPT + even if the catalog has no initial data that requires lookup. This + allows genbki.pl to record the foreign key + relationships that exist in the system catalogs. That information is + used in the regression tests to check for incorrect entries. See also + the macros DECLARE_FOREIGN_KEY, + DECLARE_FOREIGN_KEY_OPT, + DECLARE_ARRAY_FOREIGN_KEY, + and DECLARE_ARRAY_FOREIGN_KEY_OPT, which are + used to declare foreign key relationships that are too complex + for BKI_LOOKUP (typically, multi-column foreign + keys). + diff --git a/doc/src/sgml/btree.sgml b/doc/src/sgml/btree.sgml index bb395e6a85c15..2b716c6443984 100644 --- a/doc/src/sgml/btree.sgml +++ b/doc/src/sgml/btree.sgml @@ -629,6 +629,109 @@ options(relopts local_relopts *) returns + + Bottom-up index deletion + + B-Tree indexes are not directly aware that under MVCC, there might + be multiple extant versions of the same logical table row; to an + index, each tuple is an independent object that needs its own index + entry. Version churn tuples may sometimes + accumulate and adversely affect query latency and throughput. This + typically occurs with UPDATE-heavy workloads + where most individual updates cannot apply the + HOT optimization. Changing the value of only + one column covered by one index during an UPDATE + always necessitates a new set of index tuples + — one for each and every index on the + table. Note in particular that this includes indexes that were not + logically modified by the UPDATE. + All indexes will need a successor physical index tuple that points + to the latest version in the table. Each new tuple within each + index will generally need to coexist with the original + updated tuple for a short period of time (typically + until shortly after the UPDATE transaction + commits). + + + B-Tree indexes incrementally delete version churn index tuples by + performing bottom-up index deletion passes. + Each deletion pass is triggered in reaction to an anticipated + version churn page split. This only happens with + indexes that are not logically modified by + UPDATE statements, where concentrated build up + of obsolete versions in particular pages would occur otherwise. A + page split will usually be avoided, though it's possible that + certain implementation-level heuristics will fail to identify and + delete even one garbage index tuple (in which case a page split or + deduplication pass resolves the issue of an incoming new tuple not + fitting on a leaf page). The worst case number of versions that + any index scan must traverse (for any single logical row) is an + important contributor to overall system responsiveness and + throughput. A bottom-up index deletion pass targets suspected + garbage tuples in a single leaf page based on + qualitative distinctions involving logical + rows and versions. This contrasts with the top-down + index cleanup performed by autovacuum workers, which is triggered + when certain quantitative table-level + thresholds are exceeded (see ). + + + + Not all deletion operations that are performed within B-Tree + indexes are bottom-up deletion operations. There is a distinct + category of index tuple deletion: simple index tuple + deletion. This is a deferred maintenance operation + that deletes index tuples that are known to be safe to delete + (those whose item identifier's LP_DEAD bit is + already set). Like bottom-up index deletion, simple index + deletion takes place at the point that a page split is anticipated + as a way of avoiding the split. + + + Simple deletion is opportunistic in the sense that it can only + take place when recent index scans set the + LP_DEAD bits of affected items in passing. + Prior to PostgreSQL 14, the only + category of B-Tree deletion was simple deletion. The main + differences between it and bottom-up deletion are that only the + former is opportunistically driven by the activity of passing + index scans, while only the latter specifically targets version + churn from UPDATEs that do not logically modify + indexed columns. + + + + Bottom-up index deletion performs the vast majority of all garbage + index tuple cleanup for particular indexes with certain workloads. + This is expected with any B-Tree index that is subject to + significant version churn from UPDATEs that + rarely or never logically modify the columns that the index covers. + The average and worst case number of versions per logical row can + be kept low purely through targeted incremental deletion passes. + It's quite possible that the on-disk size of certain indexes will + never increase by even one single page/block despite + constant version churn from + UPDATEs. Even then, an exhaustive clean + sweep by a VACUUM operation (typically + run in an autovacuum worker process) will eventually be required as + a part of collective cleanup of the table and + each of its indexes. + + + Unlike VACUUM, bottom-up index deletion does not + provide any strong guarantees about how old the oldest garbage + index tuple may be. No index can be permitted to retain + floating garbage index tuples that became dead prior + to a conservative cutoff point shared by the table and all of its + indexes collectively. This fundamental table-level invariant makes + it safe to recycle table TIDs. This is how it + is possible for distinct logical rows to reuse the same table + TID over time (though this can never happen with + two logical rows whose lifetimes span the same + VACUUM cycle). + + + Deduplication @@ -666,15 +769,17 @@ options(relopts local_relopts *) returns The deduplication process occurs lazily, when a new item is - inserted that cannot fit on an existing leaf page. This prevents - (or at least delays) leaf page splits. Unlike GIN posting list - tuples, B-Tree posting list tuples do not need to expand every time - a new duplicate is inserted; they are merely an alternative - physical representation of the original logical contents of the - leaf page. This design prioritizes consistent performance with - mixed read-write workloads. Most client applications will at least - see a moderate performance benefit from using deduplication. - Deduplication is enabled by default. + inserted that cannot fit on an existing leaf page, though only when + index tuple deletion could not free sufficient space for the new + item (typically deletion is briefly considered and then skipped + over). Unlike GIN posting list tuples, B-Tree posting list tuples + do not need to expand every time a new duplicate is inserted; they + are merely an alternative physical representation of the original + logical contents of the leaf page. This design prioritizes + consistent performance with mixed read-write workloads. Most + client applications will at least see a moderate performance + benefit from using deduplication. Deduplication is enabled by + default. CREATE INDEX and REINDEX @@ -702,25 +807,16 @@ options(relopts local_relopts *) returns deduplication isn't usually helpful. - B-Tree indexes are not directly aware that under MVCC, there might - be multiple extant versions of the same logical table row; to an - index, each tuple is an independent object that needs its own index - entry. Version duplicates may sometimes accumulate - and adversely affect query latency and throughput. This typically - occurs with UPDATE-heavy workloads where most - individual updates cannot apply the HOT - optimization (often because at least one indexed column gets - modified, necessitating a new set of index tuple versions — - one new tuple for each and every index). In - effect, B-Tree deduplication ameliorates index bloat caused by - version churn. Note that even the tuples from a unique index are - not necessarily physically unique when stored - on disk due to version churn. The deduplication optimization is - selectively applied within unique indexes. It targets those pages - that appear to have version duplicates. The high level goal is to - give VACUUM more time to run before an - unnecessary page split caused by version churn can - take place. + It is sometimes possible for unique indexes (as well as unique + constraints) to use deduplication. This allows leaf pages to + temporarily absorb extra version churn duplicates. + Deduplication in unique indexes augments bottom-up index deletion, + especially in cases where a long-running transactions holds a + snapshot that blocks garbage collection. The goal is to buy time + for the bottom-up index deletion strategy to become effective + again. Delaying page splits until a single long-running + transaction naturally goes away can allow a bottom-up deletion pass + to succeed where an earlier deletion pass failed. diff --git a/doc/src/sgml/catalogs.sgml b/doc/src/sgml/catalogs.sgml index 3a2266526c4af..ea222c04640b1 100644 --- a/doc/src/sgml/catalogs.sgml +++ b/doc/src/sgml/catalogs.sgml @@ -1150,7 +1150,7 @@ (references pg_type.oid) - The data type of this column + The data type of this column (zero for a dropped column) @@ -1351,7 +1351,7 @@ The defined collation of the column, or zero if the column is - not of a collatable data type. + not of a collatable data type @@ -1899,8 +1899,8 @@ SCRAM-SHA-256$<iteration count>:&l The OID of the data type that corresponds to this table's row type, - if any (zero for indexes, sequences, and toast tables, which have - no pg_type entry) + if any; zero for indexes, sequences, and toast tables, which have + no pg_type entry @@ -1910,7 +1910,7 @@ SCRAM-SHA-256$<iteration count>:&l (references pg_type.oid) - For typed tables, the OID of the underlying composite type, + For typed tables, the OID of the underlying composite type; zero for all other relations @@ -1932,7 +1932,8 @@ SCRAM-SHA-256$<iteration count>:&l If this is a table or an index, the access method used (heap, - B-tree, hash, etc.) + B-tree, hash, etc.); otherwise zero (zero occurs for sequences, + as well as relations without storage, such as views) @@ -2007,7 +2008,7 @@ SCRAM-SHA-256$<iteration count>:&l (references pg_class.oid) - OID of the TOAST table associated with this table, 0 if none. The + OID of the TOAST table associated with this table, zero if none. The TOAST table stores large attributes out of line in a secondary table. @@ -2174,8 +2175,8 @@ SCRAM-SHA-256$<iteration count>:&l For new relations being written during a DDL operation that requires a table rewrite, this contains the OID of the original relation; - otherwise 0. That state is only visible internally; this field should - never contain anything other than 0 for a user-visible relation. + otherwise zero. That state is only visible internally; this field should + never contain anything other than zero for a user-visible relation. @@ -2507,7 +2508,7 @@ SCRAM-SHA-256$<iteration count>:&l (references pg_class.oid) - The table this constraint is on; 0 if not a table constraint + The table this constraint is on; zero if not a table constraint @@ -2517,7 +2518,7 @@ SCRAM-SHA-256$<iteration count>:&l (references pg_type.oid) - The domain this constraint is on; 0 if not a domain constraint + The domain this constraint is on; zero if not a domain constraint @@ -2528,7 +2529,7 @@ SCRAM-SHA-256$<iteration count>:&l The index supporting this constraint, if it's a unique, primary - key, foreign key, or exclusion constraint; else 0 + key, foreign key, or exclusion constraint; else zero @@ -2538,8 +2539,8 @@ SCRAM-SHA-256$<iteration count>:&l (references pg_constraint.oid) - The corresponding constraint in the parent partitioned table, - if this is a constraint in a partition; else 0 + The corresponding constraint of the parent partitioned table, + if this is a constraint on a partition; else zero @@ -2549,7 +2550,7 @@ SCRAM-SHA-256$<iteration count>:&l (references pg_class.oid) - If a foreign key, the referenced table; else 0 + If a foreign key, the referenced table; else zero @@ -3142,7 +3143,7 @@ SCRAM-SHA-256$<iteration count>:&l The OID of the namespace associated with this entry, - or 0 if none + or zero if none @@ -3176,7 +3177,7 @@ SCRAM-SHA-256$<iteration count>:&l A pg_default_acl entry shows the initial privileges to be assigned to an object belonging to the indicated user. There are currently two types of entry: global entries with - defaclnamespace = 0, and per-schema entries + defaclnamespace = zero, and per-schema entries that reference a particular schema. If a global entry is present then it overrides the normal hard-wired default privileges for the object type. A per-schema entry, if present, represents privileges @@ -3236,7 +3237,8 @@ SCRAM-SHA-256$<iteration count>:&l (references pg_class.oid) - The OID of the system catalog the dependent object is in + The OID of the system catalog the dependent object is in, + or zero for a DEPENDENCY_PIN entry @@ -3246,7 +3248,8 @@ SCRAM-SHA-256$<iteration count>:&l (references any OID column) - The OID of the specific dependent object + The OID of the specific dependent object, + or zero for a DEPENDENCY_PIN entry @@ -4687,7 +4690,7 @@ SCRAM-SHA-256$<iteration count>:&l For noninternal languages this references the language handler, which is a special function that is responsible for executing all functions that are written in the particular - language + language. Zero for internal languages. @@ -5189,7 +5192,7 @@ SCRAM-SHA-256$<iteration count>:&l (references pg_type.oid) - Type of the left operand (0 if none) + Type of the left operand (zero for a prefix operator) @@ -5210,6 +5213,7 @@ SCRAM-SHA-256$<iteration count>:&l Type of the result + (zero for a not-yet-defined shell operator) @@ -5219,7 +5223,7 @@ SCRAM-SHA-256$<iteration count>:&l (references pg_operator.oid) - Commutator of this operator, if any + Commutator of this operator (zero if none) @@ -5229,7 +5233,7 @@ SCRAM-SHA-256$<iteration count>:&l (references pg_operator.oid) - Negator of this operator, if any + Negator of this operator (zero if none) @@ -5240,6 +5244,7 @@ SCRAM-SHA-256$<iteration count>:&l Function that implements this operator + (zero for a not-yet-defined shell operator) @@ -5250,6 +5255,7 @@ SCRAM-SHA-256$<iteration count>:&l Restriction selectivity estimation function for this operator + (zero if none) @@ -5260,17 +5266,13 @@ SCRAM-SHA-256$<iteration count>:&l Join selectivity estimation function for this operator + (zero if none) - - Unused columns contain zeroes. For example, oprleft - is zero for a prefix operator. - - @@ -5426,7 +5428,7 @@ SCRAM-SHA-256$<iteration count>:&l partnatts int2 - The number of columns in partition key + The number of columns in the partition key @@ -5438,7 +5440,7 @@ SCRAM-SHA-256$<iteration count>:&l The OID of the pg_class entry for the default partition of this partitioned table, or zero if this partitioned table does not - have a default partition. + have a default partition @@ -5588,7 +5590,9 @@ SCRAM-SHA-256$<iteration count>:&l (references pg_authid.oid) - The roles to which the policy is applied + The roles to which the policy is applied; + zero means PUBLIC + (and normally appears alone in the array) @@ -5744,8 +5748,8 @@ SCRAM-SHA-256$<iteration count>:&l (references pg_proc.oid) - Optional planner support function for this function - (see ) + Planner support function for this function + (see ), or zero if none @@ -5874,7 +5878,7 @@ SCRAM-SHA-256$<iteration count>:&l (references pg_type.oid) - An array with the data types of the function arguments. This includes + An array of the data types of the function arguments. This includes only input arguments (including INOUT and VARIADIC arguments), as well as OUT parameters of procedures, and thus represents @@ -5888,7 +5892,7 @@ SCRAM-SHA-256$<iteration count>:&l (references pg_type.oid) - An array with the data types of the function arguments. This includes + An array of the data types of the function arguments. This includes all arguments (including OUT and INOUT arguments); however, if all the arguments are IN arguments, this field will be null. @@ -5902,7 +5906,7 @@ SCRAM-SHA-256$<iteration count>:&l proargmodes char[] - An array with the modes of the function arguments, encoded as + An array of the modes of the function arguments, encoded as i for IN arguments, o for OUT arguments, b for INOUT arguments, @@ -5920,7 +5924,7 @@ SCRAM-SHA-256$<iteration count>:&l proargnames text[] - An array with the names of the function arguments. + An array of the names of the function arguments. Arguments without a name are set to empty strings in the array. If none of the arguments have a name, this field will be null. Note that subscripts correspond to positions of @@ -5945,9 +5949,12 @@ SCRAM-SHA-256$<iteration count>:&l protrftypes oid[] + (references pg_type.oid) - Data type OIDs for which to apply transforms. + An array of the argument/result data type(s) for which to apply + transforms (from the function's TRANSFORM + clause). Null if none. @@ -6253,7 +6260,7 @@ SCRAM-SHA-256$<iteration count>:&l (references pg_collation.oid) - OID of the collation used for range comparisons, or 0 if none + OID of the collation used for range comparisons, or zero if none @@ -6274,7 +6281,7 @@ SCRAM-SHA-256$<iteration count>:&l OID of the function to convert a range value into canonical form, - or 0 if none + or zero if none @@ -6285,7 +6292,7 @@ SCRAM-SHA-256$<iteration count>:&l OID of the function to return the difference between two element - values as double precision, or 0 if none + values as double precision, or zero if none @@ -6730,6 +6737,7 @@ SCRAM-SHA-256$<iteration count>:&l The OID of the database the dependent object is in, or zero for a shared object + or a SHARED_DEPENDENCY_PIN entry @@ -6739,7 +6747,8 @@ SCRAM-SHA-256$<iteration count>:&l (references pg_class.oid) - The OID of the system catalog the dependent object is in + The OID of the system catalog the dependent object is in, + or zero for a SHARED_DEPENDENCY_PIN entry @@ -6749,7 +6758,8 @@ SCRAM-SHA-256$<iteration count>:&l (references any OID column) - The OID of the specific dependent object + The OID of the specific dependent object, + or zero for a SHARED_DEPENDENCY_PIN entry @@ -6849,10 +6859,21 @@ SCRAM-SHA-256$<iteration count>:&l + + + SHARED_DEPENDENCY_TABLESPACE (t) + + + The referenced object (which must be a tablespace) is mentioned as + the tablespace for a relation that doesn't have storage. + + + Other dependency flavors might be needed in future. Note in particular - that the current definition only supports roles as referenced objects. + that the current definition only supports roles and tablespaces as referenced + objects. @@ -7178,6 +7199,7 @@ SCRAM-SHA-256$<iteration count>:&l Nth slot. For example, a histogram slot would show the < operator that defines the sort order of the data. + Zero if the statistics kind does not require an operator. @@ -7822,7 +7844,7 @@ SCRAM-SHA-256$<iteration count>:&l The OID of the function to use when converting the data type for input to the procedural language (e.g., function parameters). Zero is stored - if this operation is not supported. + if the default behavior should be used. @@ -7834,7 +7856,7 @@ SCRAM-SHA-256$<iteration count>:&l The OID of the function to use when converting output from the procedural language (e.g., return values) to the data type. Zero is - stored if this operation is not supported. + stored if the default behavior should be used. @@ -7897,9 +7919,9 @@ SCRAM-SHA-256$<iteration count>:&l (references pg_trigger.oid) - Parent trigger that this trigger is cloned from, zero if not a clone; - this happens when partitions are created or attached to a partitioned - table. + Parent trigger that this trigger is cloned from (this happens when + partitions are created or attached to a partitioned table); + zero if not a clone @@ -7962,6 +7984,7 @@ SCRAM-SHA-256$<iteration count>:&l The table referenced by a referential integrity constraint + (zero if trigger is not for a referential integrity constraint) @@ -7973,6 +7996,7 @@ SCRAM-SHA-256$<iteration count>:&l The index supporting a unique, primary key, referential integrity, or exclusion constraint + (zero if trigger is not for one of these types of constraint) @@ -7982,7 +8006,8 @@ SCRAM-SHA-256$<iteration count>:&l (references pg_constraint.oid) - The pg_constraint entry associated with the trigger, if any + The pg_constraint entry associated with the trigger + (zero if trigger is not for a constraint) @@ -8464,7 +8489,7 @@ SCRAM-SHA-256$<iteration count>:&l (references pg_proc.oid) - OID of the parser's headline function + OID of the parser's headline function (zero if none) @@ -8552,7 +8577,7 @@ SCRAM-SHA-256$<iteration count>:&l (references pg_proc.oid) - OID of the template's initialization function + OID of the template's initialization function (zero if none) @@ -8772,11 +8797,11 @@ SCRAM-SHA-256$<iteration count>:&l (references pg_type.oid) - If typelem is not 0 then it + If typelem is not zero then it identifies another row in pg_type, - defining the type yielded by subscripting. This should be 0 - if typsubscript is 0. However, it can - be 0 when typsubscript isn't 0, if the + defining the type yielded by subscripting. This should be zero + if typsubscript is zero. However, it can + be zero when typsubscript isn't zero, if the handler doesn't need typelem to determine the subscripting result type. Note that a typelem dependency is @@ -8792,7 +8817,7 @@ SCRAM-SHA-256$<iteration count>:&l (references pg_type.oid) - If typarray is not 0 then it + If typarray is not zero then it identifies another row in pg_type, which is the true array type having this type as element @@ -8824,7 +8849,7 @@ SCRAM-SHA-256$<iteration count>:&l (references pg_proc.oid) - Input conversion function (binary format), or 0 if none + Input conversion function (binary format), or zero if none @@ -8834,7 +8859,7 @@ SCRAM-SHA-256$<iteration count>:&l (references pg_proc.oid) - Output conversion function (binary format), or 0 if none + Output conversion function (binary format), or zero if none @@ -8844,7 +8869,7 @@ SCRAM-SHA-256$<iteration count>:&l (references pg_proc.oid) - Type modifier input function, or 0 if type does not support modifiers + Type modifier input function, or zero if type does not support modifiers @@ -8854,7 +8879,7 @@ SCRAM-SHA-256$<iteration count>:&l (references pg_proc.oid) - Type modifier output function, or 0 to use the standard format + Type modifier output function, or zero to use the standard format @@ -8864,7 +8889,8 @@ SCRAM-SHA-256$<iteration count>:&l (references pg_proc.oid) - Custom function, or 0 to use the standard function + Custom function, + or zero to use the standard function @@ -9184,7 +9210,7 @@ SCRAM-SHA-256$<iteration count>:&l (references pg_authid.oid) - OID of the local role being mapped, 0 if the user mapping is public + OID of the local role being mapped, or zero if the user mapping is public @@ -13389,7 +13415,7 @@ SELECT * FROM pg_locks pl LEFT JOIN pg_prepared_xacts ppx (references pg_authid.oid) - OID of the local role being mapped, 0 if the user mapping is public + OID of the local role being mapped, or zero if the user mapping is public diff --git a/doc/src/sgml/client-auth.sgml b/doc/src/sgml/client-auth.sgml index 9a5c9318cee42..c4b9971a206cc 100644 --- a/doc/src/sgml/client-auth.sgml +++ b/doc/src/sgml/client-auth.sgml @@ -1265,11 +1265,7 @@ omicron bryanh guest1 The location of the server's keytab file is specified by the configuration - parameter. The default is - FILE:/usr/local/pgsql/etc/krb5.keytab - (where the directory part is whatever was specified - as sysconfdir at build time). + linkend="guc-krb-server-keyfile"/> configuration parameter. For security reasons, it is recommended to use a separate keytab just for the PostgreSQL server rather than allowing the server to read the system keytab file. diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml index 048bd6aa08a6b..e17cdcc8167e8 100644 --- a/doc/src/sgml/config.sgml +++ b/doc/src/sgml/config.sgml @@ -263,8 +263,9 @@ shared_buffers = 128MB The SHOW command allows inspection of the - current value of all parameters. The corresponding function is - current_setting(setting_name text). + current value of any parameter. The corresponding SQL function is + current_setting(setting_name text) + (see ). @@ -273,8 +274,9 @@ shared_buffers = 128MB The SET command allows modification of the current value of those parameters that can be set locally to a session; it has no effect on other sessions. - The corresponding function is - set_config(setting_name, new_value, is_local). + The corresponding SQL function is + set_config(setting_name, new_value, is_local) + (see ). @@ -1057,10 +1059,16 @@ include_dir 'conf.d' - Sets the location of the Kerberos server key file. See - - for details. This parameter can only be set in the + Sets the location of the server's Kerberos key file. The default is + FILE:/usr/local/pgsql/etc/krb5.keytab + (where the directory part is whatever was specified + as sysconfdir at build time; use + pg_config --sysconfdir to determine that). + If this parameter is set to an empty string, it is ignored and a + system-dependent default is used. + This parameter can only be set in the postgresql.conf file or on the server command line. + See for more information. @@ -2110,7 +2118,7 @@ include_dir 'conf.d' The estimated cost for vacuuming a buffer that has to be read from disk. This represents the effort to lock the buffer pool, lookup the shared hash table, read the desired block in from - the disk and scan its content. The default value is 10. + the disk and scan its content. The default value is 2. @@ -6946,6 +6954,28 @@ log_line_prefix = '%m [%p] %q%u@%d/%a ' + + log_recovery_conflict_waits (boolean) + + log_recovery_conflict_waits configuration parameter + + + + + Controls whether a log message is produced when the startup process + waits longer than deadlock_timeout + for recovery conflicts. This is useful in determining if recovery + conflicts prevent the recovery from applying WAL. + + + + The default is off. This parameter can only be set + in the postgresql.conf file or on the server + command line. + + + + log_parameter_max_length (integer) @@ -8304,15 +8334,52 @@ COPY postgres_log FROM '/full/path/to/logfile.csv' WITH csv; - Terminate any session with an open transaction that has been idle for - longer than the specified amount of time. This allows any - locks held by that session to be released and the connection slot to be reused; - it also allows tuples visible only to this transaction to be vacuumed. See - for more details about this. + Terminate any session that has been idle (that is, waiting for a + client query) within an open transaction for longer than the + specified amount of time. + If this value is specified without units, it is taken as milliseconds. + A value of zero (the default) disables the timeout. + - If this value is specified without units, it is taken as milliseconds. - A value of zero (the default) disables the timeout. + This option can be used to ensure that idle sessions do not hold + locks for an unreasonable amount of time. Even when no significant + locks are held, an open transaction prevents vacuuming away + recently-dead tuples that may be visible only to this transaction; + so remaining idle for a long time can contribute to table bloat. + See for more details. + + + + + + idle_session_timeout (integer) + + idle_session_timeout configuration parameter + + + + + Terminate any session that has been idle (that is, waiting for a + client query), but not within an open transaction, for longer than + the specified amount of time. + If this value is specified without units, it is taken as milliseconds. + A value of zero (the default) disables the timeout. + + + + Unlike the case with an open transaction, an idle session without a + transaction imposes no large costs on the server, so there is less + need to enable this timeout + than idle_in_transaction_session_timeout. + + + + Be wary of enforcing this timeout on connections made through + connection-pooling software or other middleware, as such a layer + may not react well to unexpected connection closure. It may be + helpful to enable this timeout only for interactive sessions, + perhaps by applying it only to particular users. @@ -9594,13 +9661,14 @@ dynamic_library_path = 'C:\tools\postgresql;H:\my_project\lib;$libdir' Preset Options - The following parameters are read-only, and are determined - when PostgreSQL is compiled or when it is - installed. As such, they have been excluded from the sample + The following parameters are read-only. + As such, they have been excluded from the sample postgresql.conf file. These options report various aspects of PostgreSQL behavior that might be of interest to certain applications, particularly administrative front-ends. + Most of them are determined when PostgreSQL + is compiled or when it is installed. @@ -9645,10 +9713,11 @@ dynamic_library_path = 'C:\tools\postgresql;H:\my_project\lib;$libdir' - On Unix systems this parameter reports the permissions of the data - directory defined by () at startup. + On Unix systems this parameter reports the permissions the data + directory (defined by ) + had at server startup. (On Microsoft Windows this parameter will always display - 0700). See + 0700.) See for more information. @@ -9689,6 +9758,23 @@ dynamic_library_path = 'C:\tools\postgresql;H:\my_project\lib;$libdir' + + in_hot_standby (boolean) + + in_hot_standby configuration parameter + + + + + Reports whether the server is currently in hot standby mode. When + this is on, all transactions are forced to be + read-only. Within a session, this can change only if the server is + promoted to be primary. See for more + information. + + + + lc_collate (string) @@ -9964,6 +10050,43 @@ dynamic_library_path = 'C:\tools\postgresql;H:\my_project\lib;$libdir' + + debug_invalidate_system_caches_always (integer) + + debug_invalidate_system_caches_always configuration parameter + + + + + When set to 1, each cache lookup for a system catalog entry is + invalidated at the first possible opportunity, irrespective of whether + anything that would render it invalid really occurred. Caching of + system catalogs is effectively disabled as a result, so the server + will run extremely slowly. Higher values run the cache invalidation + recursively, which is even slower and useful only useful for testing + in very specific scenarios. + + + + This option can be very helpful when trying to trigger + hard-to-reproduce bugs involving concurrency and catalog changes but + is otherwise rarely needed. See the source code files + inval.c and + pg_config_manual.h for details. + + + + This setting is supported but off by default (0) when + CLOBBER_CACHE_ENABLED is defined at compile time + (which happens automatically when using the + configure option + ). In production builds, its value + will always be 0 and attempts to set it to another + value will raise an error. + + + + ignore_system_indexes (boolean) diff --git a/doc/src/sgml/contrib.sgml b/doc/src/sgml/contrib.sgml index ae2759be55318..d3ca4b6932007 100644 --- a/doc/src/sgml/contrib.sgml +++ b/doc/src/sgml/contrib.sgml @@ -192,13 +192,12 @@ pages. Server Applications - This section covers PostgreSQL server-related - applications in contrib. They are typically run on the - host where the database server resides. See also PostgreSQL server + itself. Currently, no such applications are included in the + contrib directory. See also for information about server applications that are part of the core PostgreSQL distribution. - &pgstandby; diff --git a/doc/src/sgml/datatype.sgml b/doc/src/sgml/datatype.sgml index 58d168c763e96..7c341c8e3fa6d 100644 --- a/doc/src/sgml/datatype.sgml +++ b/doc/src/sgml/datatype.sgml @@ -4995,8 +4995,8 @@ SELECT * FROM pg_attribute Pseudo-Types - - + + Name diff --git a/doc/src/sgml/ddl.sgml b/doc/src/sgml/ddl.sgml index c12a32c8c7f02..1e9a4625cc631 100644 --- a/doc/src/sgml/ddl.sgml +++ b/doc/src/sgml/ddl.sgml @@ -1740,7 +1740,7 @@ REVOKE ALL ON accounts FROM PUBLIC; TRUNCATE - Allows TRUNCATE on a table, view, etc. + Allows TRUNCATE on a table. @@ -4027,8 +4027,8 @@ ALTER INDEX measurement_city_id_logdate_key - BEFORE ROW triggers cannot change which partition - is the final destination for a new row. + BEFORE ROW triggers on INSERT + cannot change which partition is the final destination for a new row. diff --git a/doc/src/sgml/fdwhandler.sgml b/doc/src/sgml/fdwhandler.sgml index 9c9293414c581..854913ae5fc94 100644 --- a/doc/src/sgml/fdwhandler.sgml +++ b/doc/src/sgml/fdwhandler.sgml @@ -523,8 +523,9 @@ BeginForeignModify(ModifyTableState *mtstate, Begin executing a foreign table modification operation. This routine is called during executor startup. It should perform any initialization needed prior to the actual table modifications. Subsequently, - ExecForeignInsert, ExecForeignUpdate or - ExecForeignDelete will be called for each tuple to be + ExecForeignInsert/ExecForeignBatchInsert, + ExecForeignUpdate or + ExecForeignDelete will be called for tuple(s) to be inserted, updated, or deleted. @@ -614,6 +615,81 @@ ExecForeignInsert(EState *estate, +TupleTableSlot ** +ExecForeignBatchInsert(EState *estate, + ResultRelInfo *rinfo, + TupleTableSlot **slots, + TupleTableSlot *planSlots, + int *numSlots); + + + Insert multiple tuples in bulk into the foreign table. + The parameters are the same for ExecForeignInsert + except slots and planSlots contain + multiple tuples and *numSlots> specifies the number of + tuples in those arrays. + + + + The return value is an array of slots containing the data that was + actually inserted (this might differ from the data supplied, for + example as a result of trigger actions.) + The passed-in slots can be re-used for this purpose. + The number of successfully inserted tuples is returned in + *numSlots. + + + + The data in the returned slot is used only if the INSERT + statement involves a view + WITH CHECK OPTION; or if the foreign table has + an AFTER ROW trigger. Triggers require all columns, + but the FDW could choose to optimize away returning some or all columns + depending on the contents of the + WITH CHECK OPTION constraints. + + + + If the ExecForeignBatchInsert or + GetForeignModifyBatchSize pointer is set to + NULL, attempts to insert into the foreign table will + use ExecForeignInsert. + This function is not used if the INSERT has the + RETURNING> clause. + + + + Note that this function is also called when inserting routed tuples into + a foreign-table partition. See the callback functions + described below that allow the FDW to support that. + + + + +int +GetForeignModifyBatchSize(ResultRelInfo *rinfo); + + + Report the maximum number of tuples that a single + ExecForeignBatchInsert call can handle for + the specified foreign table. That is, The executor passes at most + the number of tuples that this function returns to + ExecForeignBatchInsert. + rinfo is the ResultRelInfo struct describing + the target foreign table. + The FDW is expected to provide a foreign server and/or foreign + table option for the user to set this value, or some hard-coded value. + + + + If the ExecForeignBatchInsert or + GetForeignModifyBatchSize pointer is set to + NULL, attempts to insert into the foreign table will + use ExecForeignInsert. + + + + TupleTableSlot * ExecForeignUpdate(EState *estate, ResultRelInfo *rinfo, @@ -741,8 +817,9 @@ BeginForeignInsert(ModifyTableState *mtstate, in both cases when it is the partition chosen for tuple routing and the target specified in a COPY FROM command. It should perform any initialization needed prior to the actual insertion. - Subsequently, ExecForeignInsert will be called for - each tuple to be inserted into the foreign table. + Subsequently, ExecForeignInsert or + ExecForeignBatchInsert will be called for + tuple(s) to be inserted into the foreign table. @@ -773,8 +850,8 @@ BeginForeignInsert(ModifyTableState *mtstate, Note that if the FDW does not support routable foreign-table partitions and/or executing COPY FROM on foreign tables, this - function or ExecForeignInsert subsequently called - must throw error as needed. + function or ExecForeignInsert/ExecForeignBatchInsert + subsequently called must throw error as needed. diff --git a/doc/src/sgml/filelist.sgml b/doc/src/sgml/filelist.sgml index 38e8aa0bbf90f..db1d369743e74 100644 --- a/doc/src/sgml/filelist.sgml +++ b/doc/src/sgml/filelist.sgml @@ -138,7 +138,6 @@ - diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml index 93d17e4b558af..b7150510aba46 100644 --- a/doc/src/sgml/func.sgml +++ b/doc/src/sgml/func.sgml @@ -3262,7 +3262,6 @@ repeat('Pg', 4) PgPgPgPg right ( string text, n integer ) - ) text @@ -3284,7 +3283,6 @@ repeat('Pg', 4) PgPgPgPg rpad ( string text, length integer , fill text ) - ) text @@ -3950,15 +3948,16 @@ SELECT format('Testing %3$s, %2$s, %s', 'one', 'two', 'three'); trim - trim ( BOTH + trim ( LEADING | TRAILING | BOTH bytesremoved bytea FROM bytes bytea ) bytea Removes the longest string containing only bytes appearing in - bytesremoved from the start - and end of bytes. + bytesremoved from the start, + end, or both ends (BOTH is the default) + of bytes. trim('\x9012'::bytea from '\x1234567890'::bytea) @@ -3968,7 +3967,7 @@ SELECT format('Testing %3$s, %2$s, %s', 'one', 'two', 'three'); - trim ( BOTH FROM + trim ( LEADING | TRAILING | BOTH FROM bytes bytea, bytesremoved bytea ) bytea @@ -4111,6 +4110,26 @@ SELECT format('Testing %3$s, %2$s, %s', 'one', 'two', 'three'); + + + + ltrim + + ltrim ( bytes bytea, + bytesremoved bytea ) + bytea + + + Removes the longest string containing only bytes appearing in + bytesremoved from the start of + bytes. + + + ltrim('\x1234567890'::bytea, '\x9012'::bytea) + \x34567890 + + + @@ -4129,6 +4148,26 @@ SELECT format('Testing %3$s, %2$s, %s', 'one', 'two', 'three'); + + + + rtrim + + rtrim ( bytes bytea, + bytesremoved bytea ) + bytea + + + Removes the longest string containing only bytes appearing in + bytesremoved from the end of + bytes. + + + rtrim('\x1234567890'::bytea, '\x9012'::bytea) + \x12345678 + + + @@ -5290,7 +5329,7 @@ substring('foobar' similar '#"o_b#"%' escape '#') NULL - 'thomas' ~ '.*thom.*' + 'thomas' ~ 't.*ma' t @@ -5304,7 +5343,7 @@ substring('foobar' similar '#"o_b#"%' escape '#') NULL - 'thomas' ~* '.*Thom.*' + 'thomas' ~* 'T.*ma' t @@ -5318,8 +5357,8 @@ substring('foobar' similar '#"o_b#"%' escape '#') NULL - 'thomas' !~ '.*thomas.*' - f + 'thomas' !~ 't.*max' + t @@ -5332,8 +5371,8 @@ substring('foobar' similar '#"o_b#"%' escape '#') NULL - 'thomas' !~* '.*vadim.*' - t + 'thomas' !~* 'T.*ma' + f @@ -5367,10 +5406,12 @@ substring('foobar' similar '#"o_b#"%' escape '#') NULL Some examples: -'abc' ~ 'abc' true -'abc' ~ '^a' true -'abc' ~ '(b|d)' true -'abc' ~ '^(b|c)' false +'abcd' ~ 'bc' true +'abcd' ~ 'a.c' true — dot matches any character +'abcd' ~ 'a.*d' true — * repeats the preceding pattern item +'abcd' ~ '(b|x)' true — | means OR, parentheses group +'abcd' ~ '^a' true — ^ anchors to start of string +'abcd' ~ '^(b|c)' false — would match except for anchoring @@ -8190,14 +8231,30 @@ SELECT regexp_match('abc01234xyz', '(?:(.*?)(\d+)(.*)){1,1}'); linkend="datatype-datetime"/>. + + In addition, the usual comparison operators shown in + are available for the + date/time types. Dates and timestamps (with or without time zone) are + all comparable, while times (with or without time zone) and intervals + can only be compared to other values of the same data type. When + comparing a timestamp without time zone to a timestamp with time zone, + the former value is assumed to be given in the time zone specified by + the configuration parameter, and is + rotated to UTC for comparison to the latter value (which is already + in UTC internally). Similarly, a date value is assumed to represent + midnight in the TimeZone zone when comparing it + to a timestamp. + + All the functions and operators described below that take time or timestamp inputs actually come in two variants: one that takes time with time zone or timestamp with time zone, and one that takes time without time zone or timestamp without time zone. For brevity, these variants are not shown separately. Also, the + and * operators come in commutative pairs (for - example both date + integer and integer + date); we show only one of each - such pair. + example both date + integer + and integer + date); we show + only one of each such pair.
@@ -9338,9 +9395,11 @@ SELECT EXTRACT(DOY FROM TIMESTAMP '2001-02-16 20:38:40'); For timestamp with time zone values, the - number of seconds since 1970-01-01 00:00:00 UTC (can be negative); + number of seconds since 1970-01-01 00:00:00 UTC (negative for + timestamps before that); for date and timestamp values, the - number of seconds since 1970-01-01 00:00:00 local time; + nominal number of seconds since 1970-01-01 00:00:00, + without regard to timezone or daylight-savings rules; for interval values, the total number of seconds in the interval @@ -9349,18 +9408,29 @@ SELECT EXTRACT(DOY FROM TIMESTAMP '2001-02-16 20:38:40'); SELECT EXTRACT(EPOCH FROM TIMESTAMP WITH TIME ZONE '2001-02-16 20:38:40.12-08'); Result: 982384720.12 +SELECT EXTRACT(EPOCH FROM TIMESTAMP '2001-02-16 20:38:40.12'); +Result: 982355920.12 + SELECT EXTRACT(EPOCH FROM INTERVAL '5 days 3 hours'); Result: 442800 - You can convert an epoch value back to a time stamp + You can convert an epoch value back to a timestamp with time zone with to_timestamp: SELECT to_timestamp(982384720.12); Result: 2001-02-17 04:38:40.12+00 + + + Beware that applying to_timestamp to an epoch + extracted from a date or timestamp value + could produce a misleading result: the result will effectively + assume that the original value had been given in UTC, which might + not be the case. + @@ -16209,6 +16279,24 @@ strict $.track.segments[*].location + + The .** accessor can lead to surprising results + when using the lax mode. For instance, the following query selects every + HR value twice: + +lax $.**.HR + + This happens because the .** accessor selects both + the segments array and each of its elements, while + the .HR accessor automatically unwraps arrays when + using the lax mode. To avoid surprising results, we recommend using + the .** accessor only in the strict mode. The + following query selects each HR value just once: + +strict $.**.HR + + + @@ -17837,7 +17925,8 @@ SELECT NULLIF(value, '(none)') ... setof anyelement - Expands an array to a set of rows. + Expands an array into a set of rows. + The array's elements are read out in storage order. unnest(ARRAY[1,2]) @@ -17845,6 +17934,16 @@ SELECT NULLIF(value, '(none)') ... 1 2 + + + + unnest(ARRAY[['foo','bar'],['baz','quux']]) + + + foo + bar + baz + quux @@ -17855,10 +17954,10 @@ SELECT NULLIF(value, '(none)') ... setof anyelement, anyelement [, ... ] - Expands multiple arrays (possibly of different data types) to a set of + Expands multiple arrays (possibly of different data types) into a set of rows. If the arrays are not all the same length then the shorter ones - are padded with NULLs. This is only allowed in a - query's FROM clause; see . + are padded with NULLs. This form is only allowed + in a query's FROM clause; see . select * from unnest(ARRAY[1,2], ARRAY['foo','bar','baz']) as x(a,b) @@ -18182,6 +18281,20 @@ SELECT NULLIF(value, '(none)') ... + + + anyrange @> anymultirange + boolean + + + Does the range contain the multirange? + + + '[2,4)'::int4range @> '{[2,3)}'::int4multirange + t + + + anymultirange <@ anymultirange @@ -22676,6 +22789,38 @@ SELECT pg_type_is_visible('myschema.widget'::regtype); + + + + pg_get_catalog_foreign_keys + + pg_get_catalog_foreign_keys () + setof record + ( fktable regclass, + fkcols text[], + pktable regclass, + pkcols text[], + is_array boolean, + is_opt boolean ) + + + Returns a set of records describing the foreign key relationships + that exist within the PostgreSQL system + catalogs. + The fktable column contains the name of the + referencing catalog, and the fkcols column + contains the name(s) of the referencing column(s). Similarly, + the pktable column contains the name of the + referenced catalog, and the pkcols column + contains the name(s) of the referenced column(s). + If is_array is true, the last referencing + column is an array, each of whose elements should match some entry + in the referenced catalog. + If is_opt is true, the referencing column(s) + are allowed to contain zeroes instead of a valid reference. + + + @@ -24482,8 +24627,9 @@ SELECT collation for ('foo' COLLATE "de_DE"); setting setting_name. If there is no such setting, current_setting throws an error unless missing_ok is supplied and - is true. This function corresponds to - the SQL command SHOW. + is true (in which case NULL is returned). + This function corresponds to + the SQL command . current_setting('datestyle') @@ -24506,10 +24652,10 @@ SELECT collation for ('foo' COLLATE "de_DE"); Sets the parameter setting_name to new_value, and returns that value. If is_local is true, the new - value will only apply for the current transaction. If you want the new - value to apply for the current session, use false - instead. This function corresponds to the SQL - command SET. + value will only apply during the current transaction. If you want the + new value to apply for the rest of the current session, + use false instead. This function corresponds to + the SQL command . set_config('log_statement_stats', 'off', false) diff --git a/doc/src/sgml/generate-errcodes-table.pl b/doc/src/sgml/generate-errcodes-table.pl index 66a3ee00298a1..bbce3762c2913 100644 --- a/doc/src/sgml/generate-errcodes-table.pl +++ b/doc/src/sgml/generate-errcodes-table.pl @@ -1,7 +1,7 @@ #!/usr/bin/perl # # Generate the errcodes-table.sgml file from errcodes.txt -# Copyright (c) 2000-2020, PostgreSQL Global Development Group +# Copyright (c) 2000-2021, PostgreSQL Global Development Group use strict; use warnings; diff --git a/doc/src/sgml/generate-keywords-table.pl b/doc/src/sgml/generate-keywords-table.pl index 6332d65aadc78..30037c773d9b7 100644 --- a/doc/src/sgml/generate-keywords-table.pl +++ b/doc/src/sgml/generate-keywords-table.pl @@ -2,7 +2,7 @@ # # Generate the keywords table for the documentation's SQL Key Words appendix # -# Copyright (c) 2019-2020, PostgreSQL Global Development Group +# Copyright (c) 2019-2021, PostgreSQL Global Development Group use strict; use warnings; diff --git a/doc/src/sgml/gist.sgml b/doc/src/sgml/gist.sgml index d1b6cc9a01a31..f22efd1f6e898 100644 --- a/doc/src/sgml/gist.sgml +++ b/doc/src/sgml/gist.sgml @@ -54,6 +54,9 @@
Built-in <acronym>GiST</acronym> Operator Classes + + + Name @@ -64,114 +67,145 @@ box_ops - << (box,box) - <-> (box,point) + << (box, box) + <-> (box, point) - &< (box,box) - && (box,box) - &> (box,box) - >> (box,box) - ~= (box,box) - @> (box,box) - <@ (box,box) - &<| (box,box) - <<| (box,box) - |>> (box,box) - |&> (box,box) - ~ (box,box) - @ (box,box) + &< (box, box) + && (box, box) + &> (box, box) + >> (box, box) + ~= (box, box) + @> (box, box) + <@ (box, box) + &<| (box, box) + <<| (box, box) + |>> (box, box) + |&> (box, box) + ~ (box, box) + @ (box, box) circle_ops - << (circle,circle) - <-> (circle,point) + << (circle, circle) + <-> (circle, point) - &< (circle,circle) - &> (circle,circle) - >> (circle,circle) - <@ (circle,circle) - @> (circle,circle) - ~= (circle,circle) - && (circle,circle) - |>> (circle,circle) - <<| (circle,circle) - &<| (circle,circle) - |&> (circle,circle) - @ (circle,circle) - ~ (circle,circle) + &< (circle, circle) + &> (circle, circle) + >> (circle, circle) + <@ (circle, circle) + @> (circle, circle) + ~= (circle, circle) + && (circle, circle) + |>> (circle, circle) + <<| (circle, circle) + &<| (circle, circle) + |&> (circle, circle) + @ (circle, circle) + ~ (circle, circle) inet_ops - << (inet,inet) + << (inet, inet) - <<= (inet,inet) - >> (inet,inet) - >>= (inet,inet) - = (inet,inet) - <> (inet,inet) - < (inet,inet) - <= (inet,inet) - > (inet,inet) - >= (inet,inet) - && (inet,inet) + <<= (inet, inet) + >> (inet, inet) + >>= (inet, inet) + = (inet, inet) + <> (inet, inet) + < (inet, inet) + <= (inet, inet) + > (inet, inet) + >= (inet, inet) + && (inet, inet) + + + multirange_ops + = (anymultirange, anymultirange) + + + && (anymultirange, anymultirange) + && (anymultirange, anyrange) + @> (anymultirange, anyelement) + @> (anymultirange, anymultirange) + @> (anymultirange, anyrange) + <@ (anymultirange, anymultirange) + <@ (anymultirange, anyrange) + << (anymultirange, anymultirange) + << (anymultirange, anyrange) + >> (anymultirange, anymultirange) + >> (anymultirange, anyrange) + &< (anymultirange, anymultirange) + &< (anymultirange, anyrange) + &> (anymultirange, anymultirange) + &> (anymultirange, anyrange) + -|- (anymultirange, anymultirange) + -|- (anymultirange, anyrange) point_ops - |>> (point,point) - <-> (point,point) + |>> (point, point) + <-> (point, point) - << (point,point) - >> (point,point) - <<| (point,point) - ~= (point,point) - <@ (point,box) - <@ (point,polygon) - <@ (point,circle) + << (point, point) + >> (point, point) + <<| (point, point) + ~= (point, point) + <@ (point, box) + <@ (point, polygon) + <@ (point, circle) poly_ops - << (polygon,polygon) - <-> (polygon,point) + << (polygon, polygon) + <-> (polygon, point) - &< (polygon,polygon) - &> (polygon,polygon) - >> (polygon,polygon) - <@ (polygon,polygon) - @> (polygon,polygon) - ~= (polygon,polygon) - && (polygon,polygon) - <<| (polygon,polygon) - &<| (polygon,polygon) - |&> (polygon,polygon) - |>> (polygon,polygon) - @ (polygon,polygon) - ~ (polygon,polygon) + &< (polygon, polygon) + &> (polygon, polygon) + >> (polygon, polygon) + <@ (polygon, polygon) + @> (polygon, polygon) + ~= (polygon, polygon) + && (polygon, polygon) + <<| (polygon, polygon) + &<| (polygon, polygon) + |&> (polygon, polygon) + |>> (polygon, polygon) + @ (polygon, polygon) + ~ (polygon, polygon) - range_ops - = (anyrange,anyrange) - + range_ops + = (anyrange, anyrange) + - && (anyrange,anyrange) - @> (anyrange,anyelement) - @> (anyrange,anyrange) - <@ (anyrange,anyrange) - << (anyrange,anyrange) - >> (anyrange,anyrange) - &< (anyrange,anyrange) - &> (anyrange,anyrange) - -|- (anyrange,anyrange) + && (anyrange, anyrange) + && (anyrange, anymultirange) + @> (anyrange, anyelement) + @> (anyrange, anyrange) + @> (anyrange, anymultirange) + <@ (anyrange, anyrange) + <@ (anyrange, anymultirange) + << (anyrange, anyrange) + << (anyrange, anymultirange) + >> (anyrange, anyrange) + >> (anyrange, anymultirange) + &< (anyrange, anyrange) + &< (anyrange, anymultirange) + &> (anyrange, anyrange) + &> (anyrange, anymultirange) + -|- (anyrange, anyrange) + -|- (anyrange, anymultirange) tsquery_ops - <@ (tsquery,tsquery) + <@ (tsquery, tsquery) - @> (tsquery,tsquery) + @> (tsquery, tsquery) tsvector_ops - @@ (tsvector,tsquery) + @@ (tsvector, tsquery) diff --git a/doc/src/sgml/high-availability.sgml b/doc/src/sgml/high-availability.sgml index 19d7bd2b28faa..f49f5c01081db 100644 --- a/doc/src/sgml/high-availability.sgml +++ b/doc/src/sgml/high-availability.sgml @@ -699,11 +699,9 @@ protocol to make nodes agree on a serializable transactional order. - Do not use pg_standby or similar tools with the built-in standby mode - described here. should return immediately + should return immediately if the file does not exist; the server will retry the command again if - necessary. See - for using tools like pg_standby. + necessary. @@ -1488,152 +1486,6 @@ synchronous_standby_names = 'ANY 2 (s1, s2, s3)' - - Alternative Method for Log Shipping - - - An alternative to the built-in standby mode described in the previous - sections is to use a restore_command that polls the archive location. - This was the only option available in versions 8.4 and below. See the - module for a reference implementation of this. - - - - Note that in this mode, the server will apply WAL one file at a - time, so if you use the standby server for queries (see Hot Standby), - there is a delay between an action in the primary and when the - action becomes visible in the standby, corresponding to the time it takes - to fill up the WAL file. archive_timeout can be used to make that delay - shorter. Also note that you can't combine streaming replication with - this method. - - - - The operations that occur on both primary and standby servers are - normal continuous archiving and recovery tasks. The only point of - contact between the two database servers is the archive of WAL files - that both share: primary writing to the archive, standby reading from - the archive. Care must be taken to ensure that WAL archives from separate - primary servers do not become mixed together or confused. The archive - need not be large if it is only required for standby operation. - - - - The magic that makes the two loosely coupled servers work together is - simply a restore_command used on the standby that, - when asked for the next WAL file, waits for it to become available from - the primary. Normal recovery - processing would request a file from the WAL archive, reporting failure - if the file was unavailable. For standby processing it is normal for - the next WAL file to be unavailable, so the standby must wait for - it to appear. For files ending in - .history there is no need to wait, and a non-zero return - code must be returned. A waiting restore_command can be - written as a custom script that loops after polling for the existence of - the next WAL file. There must also be some way to trigger failover, which - should interrupt the restore_command, break the loop and - return a file-not-found error to the standby server. This ends recovery - and the standby will then come up as a normal server. - - - - Pseudocode for a suitable restore_command is: - -triggered = false; -while (!NextWALFileReady() && !triggered) -{ - sleep(100000L); /* wait for ~0.1 sec */ - if (CheckForExternalTrigger()) - triggered = true; -} -if (!triggered) - CopyWALFileForRecovery(); - - - - - A working example of a waiting restore_command is provided - in the module. It - should be used as a reference on how to correctly implement the logic - described above. It can also be extended as needed to support specific - configurations and environments. - - - - The method for triggering failover is an important part of planning - and design. One potential option is the restore_command - command. It is executed once for each WAL file, but the process - running the restore_command is created and dies for - each file, so there is no daemon or server process, and - signals or a signal handler cannot be used. Therefore, the - restore_command is not suitable to trigger failover. - It is possible to use a simple timeout facility, especially if - used in conjunction with a known archive_timeout - setting on the primary. However, this is somewhat error prone - since a network problem or busy primary server might be sufficient - to initiate failover. A notification mechanism such as the explicit - creation of a trigger file is ideal, if this can be arranged. - - - - Implementation - - - The short procedure for configuring a standby server using this alternative - method is as follows. For - full details of each step, refer to previous sections as noted. - - - - Set up primary and standby systems as nearly identical as - possible, including two identical copies of - PostgreSQL at the same release level. - - - - - Set up continuous archiving from the primary to a WAL archive - directory on the standby server. Ensure that - , - and - - are set appropriately on the primary - (see ). - - - - - Make a base backup of the primary server (see ), and load this data onto the standby. - - - - - Begin recovery on the standby server from the local WAL - archive, using restore_command that waits - as described previously (see ). - - - - - - - Recovery treats the WAL archive as read-only, so once a WAL file has - been copied to the standby system it can be copied to tape at the same - time as it is being read by the standby database server. - Thus, running a standby server for high availability can be performed at - the same time as files are stored for longer term disaster recovery - purposes. - - - - For testing purposes, it is possible to run both primary and standby - servers on the same system. This does not provide any worthwhile - improvement in server robustness, nor would it be described as HA. - - - - Hot Standby @@ -1859,8 +1711,11 @@ if (!triggered) - Users will be able to tell whether their session is read-only by - issuing SHOW transaction_read_only. In addition, a set of + Users can determine whether hot standby is currently active for their + session by issuing SHOW in_hot_standby. + (In server versions before 14, the in_hot_standby + parameter did not exist; a workable substitute method for older servers + is SHOW transaction_read_only.) In addition, a set of functions () allow users to access information about the standby server. These allow you to write programs that are aware of the current state of the database. These @@ -2068,6 +1923,12 @@ if (!triggered) server. The pg_stat_database system view also contains summary information. + + + Users can control whether a log message is produced when WAL replay is waiting + longer than deadlock_timeout for conflicts. This + is controlled by the parameter. + @@ -2120,18 +1981,14 @@ LOG: database system is ready to accept read only connections - The setting of some parameters on the standby will need reconfiguration - if they have been changed on the primary. For these parameters, - the value on the standby must - be equal to or greater than the value on the primary. - Therefore, if you want to increase these values, you should do so on all - standby servers first, before applying the changes to the primary server. - Conversely, if you want to decrease these values, you should do so on the - primary server first, before applying the changes to all standby servers. - If these parameters - are not set high enough then the standby will refuse to start. - Higher values can then be supplied and the server - restarted to begin recovery again. These parameters are: + The settings of some parameters determine the size of shared memory for + tracking transaction IDs, locks, and prepared transactions. These shared + memory structures must be no smaller on a standby than on the primary in + order to ensure that the standby does not run out of shared memory during + recovery. For example, if the primary had used a prepared transaction but + the standby had not allocated any shared memory for tracking prepared + transactions, then recovery could not continue until the standby's + configuration is changed. The parameters affected are: @@ -2160,6 +2017,37 @@ LOG: database system is ready to accept read only connections + + The easiest way to ensure this does not become a problem is to have these + parameters set on the standbys to values equal to or greater than on the + primary. Therefore, if you want to increase these values, you should do + so on all standby servers first, before applying the changes to the + primary server. Conversely, if you want to decrease these values, you + should do so on the primary server first, before applying the changes to + all standby servers. Keep in mind that when a standby is promoted, it + becomes the new reference for the required parameter settings for the + standbys that follow it. Therefore, to avoid this becoming a problem + during a switchover or failover, it is recommended to keep these settings + the same on all standby servers. + + + + The WAL tracks changes to these parameters on the + primary. If a hot standby processes WAL that indicates that the current + value on the primary is higher than its own value, it will log a warning + and pause recovery, for example: + +WARNING: hot standby is not possible because of insufficient parameter settings +DETAIL: max_connections = 80 is a lower setting than on the primary server, where its value was 100. +LOG: recovery has paused +DETAIL: If recovery is unpaused, the server will shut down. +HINT: You can then restart the server after making the necessary configuration changes. + + At that point, the settings on the standby need to be updated and the + instance restarted before recovery can continue. If the standby is not a + hot standby, then when it encounters the incompatible parameter change, it + will shut down immediately without pausing, since there is then no value + in keeping it up. diff --git a/doc/src/sgml/hstore.sgml b/doc/src/sgml/hstore.sgml index 080706280e809..e867fcc5aee55 100644 --- a/doc/src/sgml/hstore.sgml +++ b/doc/src/sgml/hstore.sgml @@ -883,7 +883,7 @@ SELECT * FROM each('aaa=>bq, b=>NULL, ""=>1'); Using a table: -SELECT (each(h)).key, (each(h)).value INTO stat FROM testhstore; +CREATE TABLE stat AS SELECT (each(h)).key, (each(h)).value FROM testhstore; diff --git a/doc/src/sgml/indexam.sgml b/doc/src/sgml/indexam.sgml index f00268d5b51f2..ec5741df6d164 100644 --- a/doc/src/sgml/indexam.sgml +++ b/doc/src/sgml/indexam.sgml @@ -293,6 +293,7 @@ aminsert (Relation indexRelation, ItemPointer heap_tid, Relation heapRelation, IndexUniqueCheck checkUnique, + bool indexUnchanged, IndexInfo *indexInfo); Insert a new tuple into an existing index. The values and @@ -308,6 +309,20 @@ aminsert (Relation indexRelation, look into the heap to verify tuple liveness). + + The indexUnchanged boolean value gives a hint + about the nature of the tuple to be indexed. When it is true, + the tuple is a duplicate of some existing tuple in the index. The + new tuple is a logically unchanged successor MVCC tuple version. This + happens when an UPDATE takes place that does not + modify any columns covered by the index, but nevertheless requires a + new version in the index. The index AM may use this hint to decide + to apply bottom-up index deletion in parts of the index where many + versions of the same logical row accumulate. Note that updating a + non-key column does not affect the value of + indexUnchanged. + + The function's Boolean result value is significant only when checkUnique is UNIQUE_CHECK_PARTIAL. diff --git a/doc/src/sgml/install-windows.sgml b/doc/src/sgml/install-windows.sgml index 844ef2cbd29af..47e5f7c8ae43b 100644 --- a/doc/src/sgml/install-windows.sgml +++ b/doc/src/sgml/install-windows.sgml @@ -257,7 +257,8 @@ $ENV{MSBFLAGS}="/m"; - The obsolete winflex binaries distributed on the PostgreSQL FTP site + The obsolete winflex binaries distributed in the + downloads section of the PostgreSQL web site and referenced in older documentation will fail with flex: fatal internal error, exec failed on 64-bit Windows hosts. Use Flex from MSYS instead. diff --git a/doc/src/sgml/installation.sgml b/doc/src/sgml/installation.sgml index 0ac1cb999992f..66ad4ba93808f 100644 --- a/doc/src/sgml/installation.sgml +++ b/doc/src/sgml/installation.sgml @@ -967,7 +967,7 @@ build-postgresql: - + OpenSSL SSL @@ -976,11 +976,22 @@ build-postgresql: Build with support for SSL (encrypted) - connections. This requires the OpenSSL - package to be installed. configure will check - for the required header files and libraries to make sure that - your OpenSSL installation is sufficient - before proceeding. + connections. The only LIBRARY + supported is . This requires the + OpenSSL package to be installed. + configure will check for the required + header files and libraries to make sure that your + OpenSSL installation is sufficient + before proceeding. + + + + + + + + + Obsolete equivalent of --with-ssl=openssl. @@ -2353,6 +2364,17 @@ make MAX_CONNECTIONS=5 check installation on + + To build PostgreSQL from source + on macOS, you will need to install Apple's + command line developer tools, which can be done by issuing + +xcode-select --install + + (note that this will pop up a GUI dialog window for confirmation). + You may or may not wish to also install Xcode. + + On recent macOS releases, it's necessary to embed the sysroot path in the include switches used to @@ -2369,7 +2391,7 @@ make PG_SYSROOT=/desired/path all To find out the appropriate path on your machine, run -xcodebuild -version -sdk macosx Path +xcrun --show-sdk-path Note that building an extension using a different sysroot version than was used to build the core server is not really recommended; in the @@ -2383,6 +2405,19 @@ xcodebuild -version -sdk macosx Path ./configure ... PG_SYSROOT=/desired/path + This would primarily be useful to cross-compile for some other + macOS version. There is no guarantee that the resulting executables + will run on the current host. + + + + To suppress the options altogether, use + +./configure ... PG_SYSROOT=none + + (any nonexistent pathname will work). This might be useful if you wish + to build with a non-Apple compiler, but beware that that case is not + tested or supported by the PostgreSQL developers. diff --git a/doc/src/sgml/json.sgml b/doc/src/sgml/json.sgml index 5b9a5557a40fd..e16dd6973d2c0 100644 --- a/doc/src/sgml/json.sgml +++ b/doc/src/sgml/json.sgml @@ -602,6 +602,104 @@ SELECT jdoc->'guid', jdoc->'name' FROM api WHERE jdoc @> '{"tags": ["qu + + <type>jsonb</type> Subscripting + + The jsonb data type supports array-style subscripting expressions + to extract and modify elements. Nested values can be indicated by chaining + subscripting expressions, following the same rules as the path + argument in the jsonb_set function. If a jsonb + value is an array, numeric subscripts start at zero, and negative integers count + backwards from the last element of the array. Slice expressions are not supported. + The result of a subscripting expression is always of the jsonb data type. + + + + UPDATE statements may use subscripting in the + SET clause to modify jsonb values. Subscript + paths must be traversible for all affected values insofar as they exist. For + instance, the path val['a']['b']['c'] can be traversed all + the way to c if every val, + val['a'], and val['a']['b'] is an + object. If any val['a'] or val['a']['b'] + is not defined, it will be created as an empty object and filled as + necessary. However, if any val itself or one of the + intermediary values is defined as a non-object such as a string, number, or + jsonb null, traversal cannot proceed so + an error is raised and the transaction aborted. + + + + An example of subscripting syntax: + + + +-- Extract object value by key +SELECT ('{"a": 1}'::jsonb)['a']; + +-- Extract nested object value by key path +SELECT ('{"a": {"b": {"c": 1}}}'::jsonb)['a']['b']['c']; + +-- Extract array element by index +SELECT ('[1, "2", null]'::jsonb)[1]; + +-- Update object value by key. Note the quotes around '1': the assigned +-- value must be of the jsonb type as well +UPDATE table_name SET jsonb_field['key'] = '1'; + +-- This will raise an error if any record's jsonb_field['a']['b'] is something +-- other than an object. For example, the value {"a": 1} has no 'b' key. +UPDATE table_name SET jsonb_field['a']['b']['c'] = '1'; + +-- Filter records using a WHERE clause with subscripting. Since the result of +-- subscripting is jsonb, the value we compare it against must also be jsonb. +-- The double quotes make "value" also a valid jsonb string. +SELECT * FROM table_name WHERE jsonb_field['key'] = '"value"'; + + + jsonb assignment via subscripting handles a few edge cases + differently from jsonb_set. When a source jsonb + value is NULL, assignment via subscripting will proceed + as if it was an empty JSON value of the type (object or array) implied by the + subscript key: + + +-- Where jsonb_field was NULL, it is now {"a": 1} +UPDATE table_name SET jsonb_field['a'] = '1'; + +-- Where jsonb_field was NULL, it is now [1] +UPDATE table_name SET jsonb_field[0] = '1'; + + + If an index is specified for an array containing too few elements, + NULL elements will be appended until the index is reachable + and the value can be set. + + +-- Where jsonb_field was [], it is now [null, null, 2]; +-- where jsonb_field was [0], it is now [0, null, 2] +UPDATE table_name SET jsonb_field[2] = '2'; + + + A jsonb value will accept assignments to nonexistent subscript + paths as long as the last existing element to be traversed is an object or + array, as implied by the corresponding subscript (the element indicated by + the last subscript in the path is not traversed and may be anything). Nested + array and object structures will be created, and in the former case + null-padded, as specified by the subscript path until the + assigned value can be placed. + + +-- Where jsonb_field was {}, it is now {'a': [{'b': 1}]} +UPDATE table_name SET jsonb_field['a'][0]['b'] = '1'; + +-- Where jsonb_field was [], it is now [null, {'a': 1}] +UPDATE table_name SET jsonb_field[1]['a'] = '1'; + + + + + Transforms diff --git a/doc/src/sgml/legal.sgml b/doc/src/sgml/legal.sgml index 904512f9f4ab0..f3d31b002aa0b 100644 --- a/doc/src/sgml/legal.sgml +++ b/doc/src/sgml/legal.sgml @@ -1,9 +1,9 @@ -2020 +2021 - 1996–2020 + 1996–2021 The PostgreSQL Global Development Group @@ -11,7 +11,7 @@ Legal Notice - PostgreSQL is Copyright © 1996–2020 + PostgreSQL is Copyright © 1996–2021 by the PostgreSQL Global Development Group. diff --git a/doc/src/sgml/libpq.sgml b/doc/src/sgml/libpq.sgml index a5db58d46806c..b7a82453f0da1 100644 --- a/doc/src/sgml/libpq.sgml +++ b/doc/src/sgml/libpq.sgml @@ -645,8 +645,8 @@ void PQreset(PGconn *conn); This function will close the connection - to the server and attempt to reestablish a new - connection to the same server, using all the same + to the server and attempt to establish a new + connection, using all the same parameters previously used. This might be useful for error recovery if a working connection is lost. @@ -669,7 +669,7 @@ PostgresPollingStatusType PQresetPoll(PGconn *conn); These functions will close the connection to the server and attempt to - reestablish a new connection to the same server, using all the same + establish a new connection, using all the same parameters previously used. This can be useful for error recovery if a working connection is lost. They differ from (above) in that they act in a nonblocking manner. These functions suffer from the same @@ -2150,6 +2150,7 @@ const char *PQparameterStatus(const PGconn *conn, const char *paramName); server_encoding, client_encoding, application_name, + in_hot_standby, is_superuser, session_authorization, DateStyle, @@ -2162,7 +2163,10 @@ const char *PQparameterStatus(const PGconn *conn, const char *paramName); standard_conforming_strings was not reported by releases before 8.1; IntervalStyle was not reported by releases before 8.4; - application_name was not reported by releases before 9.0.) + application_name was not reported by releases before + 9.0; + in_hot_standby was not reported by releases before + 14.) Note that server_version, server_encoding and @@ -6833,8 +6837,8 @@ main(void) if (PQstatus(conn) != CONNECTION_OK) { - fprintf(stderr, "Connection to database failed: %s", - PQerrorMessage(conn)); + /* PQerrorMessage's result includes a trailing newline */ + fprintf(stderr, "%s", PQerrorMessage(conn)); PQfinish(conn); return 1; } @@ -8292,8 +8296,7 @@ main(int argc, char **argv) /* Check to see that the backend connection was successfully made */ if (PQstatus(conn) != CONNECTION_OK) { - fprintf(stderr, "Connection to database failed: %s", - PQerrorMessage(conn)); + fprintf(stderr, "%s", PQerrorMessage(conn)); exit_nicely(conn); } @@ -8462,8 +8465,7 @@ main(int argc, char **argv) /* Check to see that the backend connection was successfully made */ if (PQstatus(conn) != CONNECTION_OK) { - fprintf(stderr, "Connection to database failed: %s", - PQerrorMessage(conn)); + fprintf(stderr, "%s", PQerrorMessage(conn)); exit_nicely(conn); } @@ -8690,8 +8692,7 @@ main(int argc, char **argv) /* Check to see that the backend connection was successfully made */ if (PQstatus(conn) != CONNECTION_OK) { - fprintf(stderr, "Connection to database failed: %s", - PQerrorMessage(conn)); + fprintf(stderr, "%s", PQerrorMessage(conn)); exit_nicely(conn); } diff --git a/doc/src/sgml/lobj.sgml b/doc/src/sgml/lobj.sgml index 6329cf0796beb..6d46da42e27b1 100644 --- a/doc/src/sgml/lobj.sgml +++ b/doc/src/sgml/lobj.sgml @@ -717,7 +717,7 @@ SELECT lo_export(image.raster, '/tmp/motd') FROM image * testlo.c * test using large objects with libpq * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * @@ -939,8 +939,7 @@ main(int argc, char **argv) /* check to see that the backend connection was successfully made */ if (PQstatus(conn) != CONNECTION_OK) { - fprintf(stderr, "Connection to database failed: %s", - PQerrorMessage(conn)); + fprintf(stderr, "%s", PQerrorMessage(conn)); exit_nicely(conn); } diff --git a/doc/src/sgml/logicaldecoding.sgml b/doc/src/sgml/logicaldecoding.sgml index ca78a81e9c545..cf705ed9cda36 100644 --- a/doc/src/sgml/logicaldecoding.sgml +++ b/doc/src/sgml/logicaldecoding.sgml @@ -165,7 +165,58 @@ COMMIT 693 ControlC $ pg_recvlogical -d postgres --slot=test --drop-slot - + + + The following example shows SQL interface that can be used to decode prepared + transactions. Before you use two-phase commit commands, you must set + max_prepared_transactions to at least 1. You must also set + the option 'two-phase-commit' to 1 while calling + pg_logical_slot_get_changes. Note that we will stream + the entire transaction after the commit if it is not already decoded. + + +postgres=# BEGIN; +postgres=*# INSERT INTO data(data) VALUES('5'); +postgres=*# PREPARE TRANSACTION 'test_prepared1'; + +postgres=# SELECT * FROM pg_logical_slot_get_changes('regression_slot', NULL, NULL, 'two-phase-commit', '1'); + lsn | xid | data +-----------+-----+--------------------------------------------------------- + 0/1689DC0 | 529 | BEGIN 529 + 0/1689DC0 | 529 | table public.data: INSERT: id[integer]:3 data[text]:'5' + 0/1689FC0 | 529 | PREPARE TRANSACTION 'test_prepared1', txid 529 +(3 rows) + +postgres=# COMMIT PREPARED 'test_prepared1'; +postgres=# select * from pg_logical_slot_get_changes('regression_slot', NULL, NULL, 'two-phase-commit', '1'); + lsn | xid | data +-----------+-----+-------------------------------------------- + 0/1689DC0 | 529 | BEGIN 529 + 0/1689DC0 | 529 | table public.data: INSERT: id[integer]:3 data[text]:'5' + 0/1689FC0 | 529 | PREPARE TRANSACTION 'test_prepared1', txid 529 + 0/168A060 | 529 | COMMIT PREPARED 'test_prepared1', txid 529 +(4 row) + +postgres=#-- you can also rollback a prepared transaction +postgres=# BEGIN; +postgres=*# INSERT INTO data(data) VALUES('6'); +postgres=*# PREPARE TRANSACTION 'test_prepared2'; +postgres=# select * from pg_logical_slot_get_changes('regression_slot', NULL, NULL, 'two-phase-commit', '1'); + lsn | xid | data +-----------+-----+--------------------------------------------------------- + 0/168A180 | 530 | BEGIN 530 + 0/168A1E8 | 530 | table public.data: INSERT: id[integer]:4 data[text]:'6' + 0/168A430 | 530 | PREPARE TRANSACTION 'test_prepared2', txid 530 +(3 rows) + +postgres=# ROLLBACK PREPARED 'test_prepared2'; +postgres=# select * from pg_logical_slot_get_changes('regression_slot', NULL, NULL, 'two-phase-commit', '1'); + lsn | xid | data +-----------+-----+---------------------------------------------- + 0/168A4B8 | 530 | ROLLBACK PREPARED 'test_prepared2', txid 530 +(1 row) + + Logical Decoding Concepts @@ -389,9 +440,15 @@ typedef struct OutputPluginCallbacks LogicalDecodeMessageCB message_cb; LogicalDecodeFilterByOriginCB filter_by_origin_cb; LogicalDecodeShutdownCB shutdown_cb; + LogicalDecodeFilterPrepareCB filter_prepare_cb; + LogicalDecodeBeginPrepareCB begin_prepare_cb; + LogicalDecodePrepareCB prepare_cb; + LogicalDecodeCommitPreparedCB commit_prepared_cb; + LogicalDecodeRollbackPreparedCB rollback_prepared_cb; LogicalDecodeStreamStartCB stream_start_cb; LogicalDecodeStreamStopCB stream_stop_cb; LogicalDecodeStreamAbortCB stream_abort_cb; + LogicalDecodeStreamPrepareCB stream_prepare_cb; LogicalDecodeStreamCommitCB stream_commit_cb; LogicalDecodeStreamChangeCB stream_change_cb; LogicalDecodeStreamMessageCB stream_message_cb; @@ -413,10 +470,20 @@ typedef void (*LogicalOutputPluginInit) (struct OutputPluginCallbacks *cb); An output plugin may also define functions to support streaming of large, in-progress transactions. The stream_start_cb, stream_stop_cb, stream_abort_cb, - stream_commit_cb and stream_change_cb + stream_commit_cb, stream_change_cb, + and stream_prepare_cb are required, while stream_message_cb and stream_truncate_cb are optional. + + + An output plugin may also define functions to support two-phase commits, + which allows actions to be decoded on the PREPARE TRANSACTION. + The begin_prepare_cb, prepare_cb, + stream_prepare_cb, + commit_prepared_cb and rollback_prepared_cb + callbacks are required, while filter_prepare_cb is optional. + @@ -477,7 +544,15 @@ CREATE TABLE another_catalog_table(data text) WITH (user_catalog_table = true); never get decoded. Successful savepoints are folded into the transaction containing them in the order they were - executed within that transaction. + executed within that transaction. A transaction that is prepared for + a two-phase commit using PREPARE TRANSACTION will + also be decoded if the output plugin callbacks needed for decoding + them are provided. It is possible that the current transaction which + is being decoded is aborted concurrently via a ROLLBACK PREPARED + command. In that case, the logical decoding of this transaction will + be aborted too. We will skip all the changes of such a transaction once + the abort is detected and abort the transaction when we read WAL for + ROLLBACK PREPARED. @@ -587,7 +662,13 @@ typedef void (*LogicalDecodeCommitCB) (struct LogicalDecodingContext *ctx, an INSERT, UPDATE, or DELETE. Even if the original command modified several rows at once the callback will be called individually for each - row. + row. The change_cb callback may access system or + user catalog tables to aid in the process of outputting the row + modification details. In case of decoding a prepared (but yet + uncommitted) transaction or decoding of an uncommitted transaction, this + change callback might also error out due to simultaneous rollback of + this very same transaction. In that case, the logical decoding of this + aborted transaction is stopped gracefully. typedef void (*LogicalDecodeChangeCB) (struct LogicalDecodingContext *ctx, ReorderBufferTXN *txn, @@ -685,7 +766,13 @@ typedef void (*LogicalDecodeMessageCB) (struct LogicalDecodingContext *ctx, non-transactional and the XID was not assigned yet in the transaction which logged the message. The lsn has WAL location of the message. The transactional says - if the message was sent as transactional or not. + if the message was sent as transactional or not. Similar to the change + callback, in case of decoding a prepared (but yet uncommitted) + transaction or decoding of an uncommitted transaction, this message + callback might also error out due to simultaneous rollback of + this very same transaction. In that case, the logical decoding of this + aborted transaction is stopped gracefully. + The prefix is arbitrary null-terminated prefix which can be used for identifying interesting messages for the current plugin. And finally the message parameter holds @@ -698,6 +785,111 @@ typedef void (*LogicalDecodeMessageCB) (struct LogicalDecodingContext *ctx, + + Prepare Filter Callback + + + The optional filter_prepare_cb callback + is called to determine whether data that is part of the current + two-phase commit transaction should be considered for decode + at this prepare stage or as a regular one-phase transaction at + COMMIT PREPARED time later. To signal that + decoding should be skipped, return true; + false otherwise. When the callback is not + defined, false is assumed (i.e. nothing is + filtered). + +typedef bool (*LogicalDecodeFilterPrepareCB) (struct LogicalDecodingContext *ctx, + const char *gid); + + The ctx parameter has the same contents as for the + other callbacks. The gid is the identifier that later + identifies this transaction for COMMIT PREPARED or + ROLLBACK PREPARED. + + + The callback has to provide the same static answer for a given + gid every time it is called. + + + + + Transaction Begin Prepare Callback + + + The required begin_prepare_cb callback is called + whenever the start of a prepared transaction has been decoded. The + gid field, which is part of the + txn parameter can be used in this callback to + check if the plugin has already received this prepare in which case it + can skip the remaining changes of the transaction. This can only happen + if the user restarts the decoding after receiving the prepare for a + transaction but before receiving the commit prepared say because of some + error. + + typedef void (*LogicalDecodeBeginPrepareCB) (struct LogicalDecodingContext *ctx, + ReorderBufferTXN *txn); + + + + + + Transaction Prepare Callback + + + The required prepare_cb callback is called whenever + a transaction which is prepared for two-phase commit has been + decoded. The change_cb callback for all modified + rows will have been called before this, if there have been any modified + rows. The gid field, which is part of the + txn parameter can be used in this callback. + + typedef void (*LogicalDecodePrepareCB) (struct LogicalDecodingContext *ctx, + ReorderBufferTXN *txn, + XLogRecPtr prepare_lsn); + + + + + + Transaction Commit Prepared Callback + + + The required commit_prepared_cb callback is called + whenever a transaction commit prepared has been decoded. The + gid field, which is part of the + txn parameter can be used in this callback. + + typedef void (*LogicalDecodeCommitPreparedCB) (struct LogicalDecodingContext *ctx, + ReorderBufferTXN *txn, + XLogRecPtr commit_lsn); + + + + + + Transaction Rollback Prepared Callback + + + The required rollback_prepared_cb callback is called + whenever a transaction rollback prepared has been decoded. The + gid field, which is part of the + txn parameter can be used in this callback. The + parameters prepare_end_lsn and + prepare_time can be used to check if the plugin + has received this prepare transaction in which case it can apply the + rollback, otherwise, it can skip the rollback operation. The + gid alone is not sufficient because the downstream + node can have prepared transaction with same identifier. + + typedef void (*LogicalDecodeRollbackPreparedCB) (struct LogicalDecodingContext *ctx, + ReorderBufferTXN *txn, + XLogRecPtr preapre_end_lsn, + TimestampTz prepare_time); + + + + Stream Start Callback @@ -735,6 +927,19 @@ typedef void (*LogicalDecodeStreamAbortCB) (struct LogicalDecodingContext *ctx, + + Stream Prepare Callback + + The stream_prepare_cb callback is called to prepare + a previously streamed transaction as part of a two-phase commit. + +typedef void (*LogicalDecodeStreamPrepareCB) (struct LogicalDecodingContext *ctx, + ReorderBufferTXN *txn, + XLogRecPtr prepare_lsn); + + + + Stream Commit Callback @@ -913,9 +1118,13 @@ OutputPluginWrite(ctx, true); When streaming an in-progress transaction, the changes (and messages) are streamed in blocks demarcated by stream_start_cb and stream_stop_cb callbacks. Once all the decoded - changes are transmitted, the transaction is committed using the - stream_commit_cb callback (or possibly aborted using - the stream_abort_cb callback). + changes are transmitted, the transaction can be committed using the + the stream_commit_cb callback + (or possibly aborted using the stream_abort_cb callback). + If two-phase commits are supported, the transaction can be prepared using the + stream_prepare_cb callback, commit prepared using the + commit_prepared_cb callback or aborted using the + rollback_prepared_cb. @@ -968,4 +1177,55 @@ stream_commit_cb(...); <-- commit of the streamed transaction + + + Two-phase commit support for Logical Decoding + + + With the basic output plugin callbacks (eg., begin_cb, + change_cb, commit_cb and + message_cb) two-phase commit commands like + PREPARE TRANSACTION, COMMIT PREPARED + and ROLLBACK PREPARED are not decoded. While the + PREPARE TRANSACTION is ignored, + COMMIT PREPARED is decoded as a COMMIT + and ROLLBACK PREPARED is decoded as a + ROLLBACK. + + + + To support the streaming of two-phase commands, an output plugin needs to + provide additional callbacks. There are multiple two-phase commit callbacks + that are required, (begin_prepare_cb, + prepare_cb, commit_prepared_cb, + rollback_prepared_cb and + stream_prepare_cb) and an optional callback + (filter_prepare_cb). + + + + If the output plugin callbacks for decoding two-phase commit commands are + provided, then on PREPARE TRANSACTION, the changes of + that transaction are decoded, passed to the output plugin, and the + prepare_cb callback is invoked. This differs from the + basic decoding setup where changes are only passed to the output plugin + when a transaction is committed. The start of a prepared transaction is + indicated by the begin_prepare_cb callback. + + + + When a prepared transaction is rollbacked using the + ROLLBACK PREPARED, then the + rollback_prepared_cb callback is invoked and when the + prepared transaction is committed using COMMIT PREPARED, + then the commit_prepared_cb callback is invoked. + + + + Optionally the output plugin can specify a name pattern in the + filter_prepare_cb and transactions with gid containing + that name pattern will not be decoded as a two-phase commit transaction. + + + diff --git a/doc/src/sgml/monitoring.sgml b/doc/src/sgml/monitoring.sgml index 3d6c901306777..c602ee44277ba 100644 --- a/doc/src/sgml/monitoring.sgml +++ b/doc/src/sgml/monitoring.sgml @@ -317,7 +317,7 @@ postgres 27093 0.0 0.0 30096 2752 ? Ss 11:34 0:00 postgres: ser pg_stat_replication_slotspg_stat_replication_slots One row per replication slot, showing statistics about - replication slot usage. + the replication slot's usage. See pg_stat_replication_slots for details. @@ -399,6 +399,12 @@ postgres 27093 0.0 0.0 30096 2752 ? Ss 11:34 0:00 postgres: ser + + pg_stat_progress_copypg_stat_progress_copy + One row for each backend running COPY, showing current progress. + See . + +
@@ -1874,10 +1880,6 @@ postgres 27093 0.0 0.0 30096 2752 ? Ss 11:34 0:00 postgres: ser Waiting to associate a data block with a buffer in the buffer pool. - - Checkpoint - Waiting to begin a checkpoint. - CheckpointerComm Waiting to manage fsync requests. @@ -2602,10 +2604,10 @@ SELECT pid, wait_event_type, wait_event FROM pg_stat_activity WHERE wait_event i spill_txns bigint - Number of transactions spilled to disk after the memory used by - logical decoding of changes from WAL for this slot exceeds + Number of transactions spilled to disk once the memory used by + logical decoding to decode changes from WAL has exceeded logical_decoding_work_mem. The counter gets - incremented both for toplevel transactions and subtransactions. + incremented for both toplevel transactions and subtransactions. @@ -2614,9 +2616,10 @@ SELECT pid, wait_event_type, wait_event FROM pg_stat_activity WHERE wait_event i spill_count bigint - Number of times transactions were spilled to disk while decoding changes - from WAL for this slot. Transactions may get spilled repeatedly, and - this counter gets incremented on every such invocation. + Number of times transactions were spilled to disk while decoding + changes from WAL for this slot. This counter is incremented each time + a transaction is spilled, and the same transaction may be spilled + multiple times. @@ -2637,11 +2640,12 @@ SELECT pid, wait_event_type, wait_event FROM pg_stat_activity WHERE wait_event i stream_txns bigint - Number of in-progress transactions streamed to the decoding output plugin - after the memory used by logical decoding of changes from WAL for this - slot exceeds logical_decoding_work_mem. Streaming only + Number of in-progress transactions streamed to the decoding output + plugin after the memory used by logical decoding to decode changes + from WAL for this slot has exceeded + logical_decoding_work_mem. Streaming only works with toplevel transactions (subtransactions can't be streamed - independently), so the counter does not get incremented for subtransactions. + independently), so the counter is not incremented for subtransactions. @@ -2651,9 +2655,9 @@ SELECT pid, wait_event_type, wait_event FROM pg_stat_activity WHERE wait_event i Number of times in-progress transactions were streamed to the decoding - output plugin while decoding changes from WAL for this slot. Transactions - may get streamed repeatedly, and this counter gets incremented on every - such invocation. + output plugin while decoding changes from WAL for this slot. This + counter is incremented each time a transaction is streamed, and the + same transaction may be streamed multiple times. @@ -3731,6 +3735,83 @@ SELECT pid, wait_event_type, wait_event FROM pg_stat_activity WHERE wait_event i + + + session_time double precision + + + Time spent by database sessions in this database, in milliseconds + (note that statistics are only updated when the state of a session + changes, so if sessions have been idle for a long time, this idle time + won't be included) + + + + + + active_time double precision + + + Time spent executing SQL statements in this database, in milliseconds + (this corresponds to the states active and + fastpath function call in + + pg_stat_activity) + + + + + + idle_in_transaction_time double precision + + + Time spent idling while in a transaction in this database, in milliseconds + (this corresponds to the states idle in transaction and + idle in transaction (aborted) in + + pg_stat_activity) + + + + + + sessions bigint + + + Total number of sessions established to this database + + + + + + sessions_abandoned bigint + + + Number of database sessions to this database that were terminated + because connection to the client was lost + + + + + + sessions_fatal bigint + + + Number of database sessions to this database that were terminated + by fatal errors + + + + + + sessions_killed bigint + + + Number of database sessions to this database that were terminated + by operator intervention + + + stats_reset timestamp with time zone @@ -4963,7 +5044,7 @@ SELECT pid, wait_event_type, wait_event FROM pg_stat_activity WHERE wait_event i - + pg_stat_reset_replication_slot @@ -4971,11 +5052,9 @@ SELECT pid, wait_event_type, wait_event FROM pg_stat_activity WHERE wait_event i void - Resets statistics to zero for a single replication slot, or for all - replication slots in the cluster. The argument can be either the name - of the slot to reset the stats or NULL. If the argument is NULL, all - counters shown in the pg_stat_replication_slots - view for all replication slots are reset. + Resets statistics of the replication slot defined by the argument. If + the argument is NULL, resets statistics for all + the replication slots. This function is restricted to superusers by default, but other users @@ -5247,6 +5326,7 @@ SELECT pg_stat_get_backend_pid(s.backendid) AS pid, which support progress reporting are ANALYZE, CLUSTER, CREATE INDEX, VACUUM, + COPY, and (i.e., replication command that issues to take a base backup). @@ -6396,6 +6476,106 @@ SELECT pg_stat_get_backend_pid(s.backendid) AS pid,
+ + + COPY Progress Reporting + + + pg_stat_progress_copy + + + + Whenever COPY is running, the + pg_stat_progress_copy view will contain one row + for each backend that is currently running a COPY command. + The table below describes the information that will be reported and provides + information about how to interpret it. + + + + <structname>pg_stat_progress_copy</structname> View + + + + + Column Type + + + Description + + + + + + + + pid integer + + + Process ID of backend. + + + + + + datid oid + + + OID of the database to which this backend is connected. + + + + + + datname name + + + Name of the database to which this backend is connected. + + + + + + relid oid + + + OID of the table on which the COPY command is executed. + It is set to 0 if copying from a SELECT query. + + + + + + bytes_processed bigint + + + Number of bytes already processed by COPY command. + + + + + + bytes_total bigint + + + Size of source file for COPY FROM command in bytes. + It is set to 0 if not available. + + + + + + lines_processed bigint + + + Number of lines already processed by COPY command. + + + + +
+
+ diff --git a/doc/src/sgml/nls.sgml b/doc/src/sgml/nls.sgml index 3764d49f627eb..d49f44f3f23bf 100644 --- a/doc/src/sgml/nls.sgml +++ b/doc/src/sgml/nls.sgml @@ -90,7 +90,7 @@ msgstr "another translated" ... - The msgid's are extracted from the program source. (They need not + The msgid lines are extracted from the program source. (They need not be, but this is the most common way.) The msgstr lines are initially empty and are filled in with useful strings by the translator. The strings can contain C-style escape characters and @@ -113,7 +113,7 @@ msgstr "another translated" The #. style comments are extracted from the source file where the message is used. Possibly the programmer has inserted information for the translator, such as about expected alignment. The #: - comment indicates the exact location(s) where the message is used + comments indicate the exact locations where the message is used in the source. The translator need not look at the program source, but can if there is doubt about the correct translation. The #, comments contain flags that describe the diff --git a/doc/src/sgml/pageinspect.sgml b/doc/src/sgml/pageinspect.sgml index 687c3606baf55..a0be779940d66 100644 --- a/doc/src/sgml/pageinspect.sgml +++ b/doc/src/sgml/pageinspect.sgml @@ -19,7 +19,7 @@ - get_raw_page(relname text, fork text, blkno int) returns bytea + get_raw_page(relname text, fork text, blkno bigint) returns bytea get_raw_page @@ -40,7 +40,7 @@ - get_raw_page(relname text, blkno int) returns bytea + get_raw_page(relname text, blkno bigint) returns bytea @@ -91,7 +91,7 @@ test=# SELECT * FROM page_header(get_raw_page('pg_class', 0)); - page_checksum(page bytea, blkno int4) returns smallint + page_checksum(page bytea, blkno bigint) returns smallint page_checksum @@ -315,7 +315,7 @@ allequalimage | f - bt_page_stats(relname text, blkno int) returns record + bt_page_stats(relname text, blkno bigint) returns record bt_page_stats @@ -346,7 +346,7 @@ btpo_flags | 3 - bt_page_items(relname text, blkno int) returns setof record + bt_page_items(relname text, blkno bigint) returns setof record bt_page_items @@ -671,6 +671,97 @@ test=# SELECT first_tid, nbytes, tids[0:5] AS some_tids + + GiST Functions + + + + + gist_page_opaque_info(page bytea) returns record + + gist_page_opaque_info + + + + + + gist_page_opaque_info returns information from + a GiST index page's opaque area, such as the NSN, + rightlink and page type. + For example: + +test=# SELECT * FROM gist_page_opaque_info(get_raw_page('test_gist_idx', 2)); + lsn | nsn | rightlink | flags +-----+-----+-----------+-------- + 0/1 | 0/0 | 1 | {leaf} +(1 row) + + + + + + + + gist_page_items(page bytea, index_oid regclass) returns setof record + + gist_page_items + + + + + + gist_page_items returns information about + the data stored in a page of a GiST index. For example: + +test=# SELECT * FROM gist_page_items(get_raw_page('test_gist_idx', 0), 'test_gist_idx'); + itemoffset | ctid | itemlen | keys +------------+-----------+---------+------------------- + 1 | (1,65535) | 40 | (p)=((166,166)) + 2 | (2,65535) | 40 | (p)=((332,332)) + 3 | (3,65535) | 40 | (p)=((498,498)) + 4 | (4,65535) | 40 | (p)=((664,664)) + 5 | (5,65535) | 40 | (p)=((830,830)) + 6 | (6,65535) | 40 | (p)=((996,996)) + 7 | (7,65535) | 40 | (p)=((1000,1000)) +(7 rows) + + + + + + + + gist_page_items_bytea(page bytea) returns setof record + + gist_page_items_bytea + + + + + + Same as gist_page_items, but returns the key data + as a raw bytea blob. Since it does not attempt to decode + the key, it does not need to know which index is involved. For + example: + +test=# SELECT * FROM gist_page_items_bytea(get_raw_page('test_gist_idx', 0)); + itemoffset | ctid | itemlen | key_data +------------+-----------+---------+-------------------------------------------&zwsp;----------------------------------------- + 1 | (1,65535) | 40 | \x00000100ffff28000000000000c0644000000000&zwsp;00c06440000000000000f03f000000000000f03f + 2 | (2,65535) | 40 | \x00000200ffff28000000000000c0744000000000&zwsp;00c074400000000000e064400000000000e06440 + 3 | (3,65535) | 40 | \x00000300ffff28000000000000207f4000000000&zwsp;00207f400000000000d074400000000000d07440 + 4 | (4,65535) | 40 | \x00000400ffff28000000000000c0844000000000&zwsp;00c084400000000000307f400000000000307f40 + 5 | (5,65535) | 40 | \x00000500ffff28000000000000f0894000000000&zwsp;00f089400000000000c884400000000000c88440 + 6 | (6,65535) | 40 | \x00000600ffff28000000000000208f4000000000&zwsp;00208f400000000000f889400000000000f88940 + 7 | (7,65535) | 40 | \x00000700ffff28000000000000408f4000000000&zwsp;00408f400000000000288f400000000000288f40 +(7 rows) + + + + + + + Hash Functions @@ -756,7 +847,7 @@ test=# SELECT * FROM hash_page_items(get_raw_page('con_hash_index', 1)) LIMIT 5; - hash_bitmap_info(index oid, blkno int) returns record + hash_bitmap_info(index oid, blkno bigint) returns record hash_bitmap_info diff --git a/doc/src/sgml/perform.sgml b/doc/src/sgml/perform.sgml index 117a1f7ff92a6..aca1fe86a7c80 100644 --- a/doc/src/sgml/perform.sgml +++ b/doc/src/sgml/perform.sgml @@ -773,7 +773,7 @@ EXPLAIN ANALYZE UPDATE tenk1 SET hundred = hundred + 1 WHERE unique1 < 100; QUERY PLAN -------------------------------------------------------------------&zwsp;------------------------------------------------------------- - Update on tenk1 (cost=5.07..229.46 rows=101 width=250) (actual time=14.628..14.628 rows=0 loops=1) + Update on tenk1 (cost=5.07..229.46 rows=0 width=0) (actual time=14.628..14.628 rows=0 loops=1) -> Bitmap Heap Scan on tenk1 (cost=5.07..229.46 rows=101 width=250) (actual time=0.101..0.439 rows=100 loops=1) Recheck Cond: (unique1 < 100) -> Bitmap Index Scan on tenk1_unique1 (cost=0.00..5.04 rows=101 width=0) (actual time=0.043..0.043 rows=100 loops=1) @@ -809,7 +809,7 @@ ROLLBACK; EXPLAIN UPDATE parent SET f2 = f2 + 1 WHERE f1 = 101; QUERY PLAN -------------------------------------------------------------------&zwsp;---------------- - Update on parent (cost=0.00..24.53 rows=4 width=14) + Update on parent (cost=0.00..24.53 rows=0 width=0) Update on parent Update on child1 Update on child2 diff --git a/doc/src/sgml/pgcrypto.sgml b/doc/src/sgml/pgcrypto.sgml index 3d74e15ec9b79..b6bb23de0f91d 100644 --- a/doc/src/sgml/pgcrypto.sgml +++ b/doc/src/sgml/pgcrypto.sgml @@ -1154,7 +1154,7 @@ gen_random_uuid() returns uuid pgcrypto configures itself according to the findings of the main PostgreSQL configure script. The options that affect it are --with-zlib and - --with-openssl. + --with-ssl=openssl. diff --git a/doc/src/sgml/pgstandby.sgml b/doc/src/sgml/pgstandby.sgml deleted file mode 100644 index 66a62559303fd..0000000000000 --- a/doc/src/sgml/pgstandby.sgml +++ /dev/null @@ -1,394 +0,0 @@ - - - - - pg_standby - - - - pg_standby - 1 - Application - - - - pg_standby - supports the creation of a PostgreSQL warm standby server - - - - - pg_standby - option - archivelocation - nextwalfile - walfilepath - restartwalfile - - - - - Description - - - pg_standby supports creation of a warm standby - database server. It is designed to be a production-ready program, as well - as a customizable template should you require specific modifications. - - - - pg_standby is designed to be a waiting - restore_command, which is needed to turn a standard - archive recovery into a warm standby operation. Other - configuration is required as well, all of which is described in the main - server manual (see ). - - - - To configure a standby - server to use pg_standby, put this into its - postgresql.conf configuration file: - -restore_command = 'pg_standby archiveDir %f %p %r' - - where archiveDir is the directory from which WAL segment - files should be restored. - - - If restartwalfile is specified, normally by using the - %r macro, then all WAL files logically preceding this - file will be removed from archivelocation. This minimizes - the number of files that need to be retained, while preserving - crash-restart capability. Use of this parameter is appropriate if the - archivelocation is a transient staging area for this - particular standby server, but not when the - archivelocation is intended as a long-term WAL archive area. - - - pg_standby assumes that - archivelocation is a directory readable by the - server-owning user. If restartwalfile (or -k) - is specified, - the archivelocation directory must be writable too. - - - There are two ways to fail over to a warm standby database server - when the primary server fails: - - - - Smart Failover - - - In smart failover, the server is brought up after applying all WAL - files available in the archive. This results in zero data loss, even if - the standby server has fallen behind, but if there is a lot of - unapplied WAL it can be a long time before the standby server becomes - ready. To trigger a smart failover, create a trigger file containing - the word smart, or just create it and leave it empty. - - - - - Fast Failover - - - In fast failover, the server is brought up immediately. Any WAL files - in the archive that have not yet been applied will be ignored, and - all transactions in those files are lost. To trigger a fast failover, - create a trigger file and write the word fast into it. - pg_standby can also be configured to execute a fast - failover automatically if no new WAL file appears within a defined - interval. - - - - - - - - - - Options - - - pg_standby accepts the following command-line arguments: - - - - - - - - Use cp or copy command to restore WAL files - from archive. This is the only supported behavior so this option is useless. - - - - - - - - - Print lots of debug logging output on stderr. - - - - - - - - - Remove files from archivelocation so that - no more than this many WAL files before the current one are kept in the - archive. Zero (the default) means not to remove any files from - archivelocation. - This parameter will be silently ignored if - restartwalfile is specified, since that - specification method is more accurate in determining the correct - archive cut-off point. - Use of this parameter is deprecated as of - PostgreSQL 8.3; it is safer and more efficient to - specify a restartwalfile parameter. A too - small setting could result in removal of files that are still needed - for a restart of the standby server, while a too large setting wastes - archive space. - - - - - - maxretries - - - Set the maximum number of times to retry the copy command if - it fails (default 3). After each failure, we wait for - sleeptime * num_retries - so that the wait time increases progressively. So by default, - we will wait 5 secs, 10 secs, then 15 secs before reporting - the failure back to the standby server. This will be - interpreted as end of recovery and the standby will come - up fully as a result. - - - - - - sleeptime - - - Set the number of seconds (up to 60, default 5) to sleep between - tests to see if the WAL file to be restored is available in - the archive yet. The default setting is not necessarily - recommended; consult for discussion. - - - - - - triggerfile - - - Specify a trigger file whose presence should cause failover. - It is recommended that you use a structured file name to - avoid confusion as to which server is being triggered - when multiple servers exist on the same system; for example - /tmp/pgsql.trigger.5432. - - - - - - - - - - Print the pg_standby version and exit. - - - - - - maxwaittime - - - Set the maximum number of seconds to wait for the next WAL file, - after which a fast failover will be performed. - A setting of zero (the default) means wait forever. - The default setting is not necessarily recommended; - consult for discussion. - - - - - - - - - - Show help about pg_standby command line - arguments, and exit. - - - - - - - - - - Notes - - - pg_standby is designed to work with - PostgreSQL 8.2 and later. - - - PostgreSQL 8.3 provides the %r macro, - which is designed to let pg_standby know the - last file it needs to keep. With PostgreSQL 8.2, the - -k option must be used if archive cleanup is - required. This option remains available in 8.3, but its use is deprecated. - - - PostgreSQL 8.4 provides the - recovery_end_command option. Without this option - a leftover trigger file can be hazardous. - - - - pg_standby is written in C and has an - easy-to-modify source code, with specifically designated sections to modify - for your own needs - - - - - Examples - - On Linux or Unix systems, you might use: - - -archive_command = 'cp %p .../archive/%f' - -restore_command = 'pg_standby -d -s 2 -t /tmp/pgsql.trigger.5442 .../archive %f %p %r 2>>standby.log' - -recovery_end_command = 'rm -f /tmp/pgsql.trigger.5442' - - where the archive directory is physically located on the standby server, - so that the archive_command is accessing it across NFS, - but the files are local to the standby (enabling use of ln). - This will: - - - - produce debugging output in standby.log - - - - - sleep for 2 seconds between checks for next WAL file availability - - - - - stop waiting only when a trigger file called - /tmp/pgsql.trigger.5442 appears, - and perform failover according to its content - - - - - remove the trigger file when recovery ends - - - - - remove no-longer-needed files from the archive directory - - - - - - On Windows, you might use: - - -archive_command = 'copy %p ...\\archive\\%f' - -restore_command = 'pg_standby -d -s 5 -t C:\pgsql.trigger.5442 ...\archive %f %p %r 2>>standby.log' - -recovery_end_command = 'del C:\pgsql.trigger.5442' - - Note that backslashes need to be doubled in the - archive_command, but not in the - restore_command or recovery_end_command. - This will: - - - - use the copy command to restore WAL files from archive - - - - - produce debugging output in standby.log - - - - - sleep for 5 seconds between checks for next WAL file availability - - - - - stop waiting only when a trigger file called - C:\pgsql.trigger.5442 appears, - and perform failover according to its content - - - - - remove the trigger file when recovery ends - - - - - remove no-longer-needed files from the archive directory - - - - - - - The copy command on Windows sets the final file size - before the file is completely copied, which would ordinarily confuse - pg_standby. Therefore - pg_standby waits sleeptime - seconds once it sees the proper file size. GNUWin32's cp - sets the file size only after the file copy is complete. - - - - Since the Windows example uses copy at both ends, either - or both servers might be accessing the archive directory across the - network. - - - - - - Author - - - Simon Riggs simon@2ndquadrant.com - - - - - See Also - - - - - - diff --git a/doc/src/sgml/plpgsql.sgml b/doc/src/sgml/plpgsql.sgml index 11246aa65348c..9d41967ad3a18 100644 --- a/doc/src/sgml/plpgsql.sgml +++ b/doc/src/sgml/plpgsql.sgml @@ -917,6 +917,24 @@ PREPARE statement_name(integer, integer) AS SELECT $1 they are useful to know when trying to diagnose a problem. More information appears in . + + + Since an expression is converted to a + SELECT command, it can contain the same clauses + that an ordinary SELECT would, except that it + cannot include a top-level UNION, + INTERSECT, or EXCEPT clause. + Thus for example one could test whether a table is non-empty with + +IF count(*) > 0 FROM my_table THEN ... + + since the expression + between IF and THEN is parsed as + though it were SELECT count(*) > 0 FROM my_table. + The SELECT must produce a single column, and not + more than one row. (If it produces no rows, the result is taken as + NULL.) + @@ -946,8 +964,8 @@ PREPARE statement_name(integer, integer) AS SELECT $1 database engine. The expression must yield a single value (possibly a row value, if the variable is a row or record variable). The target variable can be a simple variable (optionally qualified with a block - name), a field of a row or record variable, or an element of an array - that is a simple variable or field. Equal (=) can be + name), a field of a row or record target, or an element or slice of + an array target. Equal (=) can be used instead of PL/SQL-compliant :=. @@ -968,6 +986,9 @@ PREPARE statement_name(integer, integer) AS SELECT $1 tax := subtotal * 0.06; my_record.user_id := 20; +my_array[j] := 20; +my_array[1:3] := array[1,2,3]; +complex_array[n].realpart = 12.3; diff --git a/doc/src/sgml/postgres-fdw.sgml b/doc/src/sgml/postgres-fdw.sgml index e6fd2143c1056..8d6abd4c54886 100644 --- a/doc/src/sgml/postgres-fdw.sgml +++ b/doc/src/sgml/postgres-fdw.sgml @@ -354,6 +354,19 @@ OPTIONS (ADD password_required 'false'); + + batch_size + + + This option specifies the number of rows postgres_fdw + should insert in each insert operation. It can be specified for a + foreign table or a foreign server. The option specified on a table + overrides an option specified for the server. + The default is 1. + + + + @@ -479,6 +492,85 @@ OPTIONS (ADD password_required 'false'); + + Functions + + + + postgres_fdw_get_connections(OUT server_name text, OUT valid boolean) returns setof record + + + This function returns the foreign server names of all the open + connections that postgres_fdw established from + the local session to the foreign servers. It also returns whether + each connection is valid or not. false is returned + if the foreign server connection is used in the current local + transaction but its foreign server or user mapping is changed or + dropped (Note that server name of an invalid connection will be + NULL if the server is dropped), + and then such invalid connection will be closed at + the end of that transaction. true is returned + otherwise. If there are no open connections, no record is returned. + Example usage of the function: + +postgres=# SELECT * FROM postgres_fdw_get_connections() ORDER BY 1; + server_name | valid +-------------+------- + loopback1 | t + loopback2 | f + + + + + + + postgres_fdw_disconnect(server_name text) returns boolean + + + This function discards the open connections that are established by + postgres_fdw from the local session to + the foreign server with the given name. Note that there can be + multiple connections to the given server using different user mappings. + If the connections are used in the current local transaction, + they are not disconnected and warning messages are reported. + This function returns true if it disconnects + at least one connection, otherwise false. + If no foreign server with the given name is found, an error is reported. + Example usage of the function: + +postgres=# SELECT postgres_fdw_disconnect('loopback1'); + postgres_fdw_disconnect +------------------------- + t + + + + + + + postgres_fdw_disconnect_all() returns boolean + + + This function discards all the open connections that are established by + postgres_fdw from the local session to + the foreign servers. If the connections are used in the current local + transaction, they are not disconnected and warning messages are reported. + This function returns true if it disconnects + at least one connection, otherwise false. + Example usage of the function: + +postgres=# SELECT postgres_fdw_disconnect_all(); + postgres_fdw_disconnect_all +----------------------------- + t + + + + + + + + Connection Management @@ -490,6 +582,26 @@ OPTIONS (ADD password_required 'false'); multiple user identities (user mappings) are used to access the foreign server, a connection is established for each user mapping. + + + When changing the definition of or removing a foreign server or + a user mapping, the corresponding connections are closed. + But note that if the connections are used in the current local transaction + at that moment, they are kept until the end of the transaction. + Closed connections will be established again when they are necessary + by subsequent queries using a foreign table. + + + + Once a connection to a foreign server has been established, + it's usually kept until the local or the corresponding remote + session exits. To disconnect a connection explicitly, + postgres_fdw_disconnect and + postgres_fdw_disconnect_all functions + need to be used. For example, these are useful when closing + the connections that are no longer necessary and then preventing them + from consuming the foreign server connections capacity too much. + diff --git a/doc/src/sgml/protocol.sgml b/doc/src/sgml/protocol.sgml index 98b42bb269231..3763b4b995ff5 100644 --- a/doc/src/sgml/protocol.sgml +++ b/doc/src/sgml/protocol.sgml @@ -1278,6 +1278,7 @@ SELCT 1/0; server_encoding, client_encoding, application_name, + in_hot_standby, is_superuser, session_authorization, DateStyle, @@ -1290,7 +1291,10 @@ SELCT 1/0; standard_conforming_strings was not reported by releases before 8.1; IntervalStyle was not reported by releases before 8.4; - application_name was not reported by releases before 9.0.) + application_name was not reported by releases before + 9.0; + in_hot_standby was not reported by releases before + 14.) Note that server_version, server_encoding and @@ -2061,8 +2065,8 @@ The commands accepted in replication mode are: the history of the server, the server will stream all the WAL on that timeline starting from the requested start point up to the point where the server switched to another timeline. If the client requests - streaming at exactly the end of an old timeline, the server responds - immediately with CommandComplete without entering COPY mode. + streaming at exactly the end of an old timeline, the server skips COPY + mode entirely. @@ -2634,7 +2638,7 @@ The commands accepted in replication mode are: When the backup is started, the server will first send two - ordinary result sets, followed by one or more CopyResponse + ordinary result sets, followed by one or more CopyOutResponse results. @@ -2677,15 +2681,15 @@ The commands accepted in replication mode are: - After the second regular result set, one or more CopyResponse results + After the second regular result set, one or more CopyOutResponse results will be sent, one for the main data directory and one for each additional tablespace other than pg_default and pg_global. The data in - the CopyResponse results will be a tar format (following the + the CopyOutResponse results will be a tar format (following the ustar interchange format specified in the POSIX 1003.1-2008 standard) dump of the tablespace contents, except that the two trailing blocks of zeroes specified in the standard are omitted. After the tar data is complete, and if a backup manifest was requested, - another CopyResponse result is sent, containing the manifest data for the + another CopyOutResponse result is sent, containing the manifest data for the current base backup. In any case, a final ordinary result set will be sent, containing the WAL end position of the backup, in the same format as the start position. diff --git a/doc/src/sgml/queries.sgml b/doc/src/sgml/queries.sgml index ca51204875626..4741506eb564a 100644 --- a/doc/src/sgml/queries.sgml +++ b/doc/src/sgml/queries.sgml @@ -2218,6 +2218,39 @@ SELECT * FROM search_tree ORDER BY depth; in any case. + + + There is built-in syntax to compute a depth- or breadth-first sort column. + For example: + + +WITH RECURSIVE search_tree(id, link, data) AS ( + SELECT t.id, t.link, t.data + FROM tree t + UNION ALL + SELECT t.id, t.link, t.data + FROM tree t, search_tree st + WHERE t.id = st.link +) SEARCH DEPTH FIRST BY id SET ordercol +SELECT * FROM search_tree ORDER BY ordercol; + +WITH RECURSIVE search_tree(id, link, data) AS ( + SELECT t.id, t.link, t.data + FROM tree t + UNION ALL + SELECT t.id, t.link, t.data + FROM tree t, search_tree st + WHERE t.id = st.link +) SEARCH BREADTH FIRST BY id SET ordercol +SELECT * FROM search_tree ORDER BY ordercol; + + This syntax is internally expanded to something similar to the above + hand-written forms. The SEARCH clause specifies whether + depth- or breadth first search is wanted, the list of columns to track for + sorting, and a column name that will contain the result data that can be + used for sorting. That column will implicitly be added to the output rows + of the CTE. + @@ -2305,10 +2338,39 @@ SELECT * FROM search_graph; + + There is built-in syntax to simplify cycle detection. The above query can + also be written like this: + +WITH RECURSIVE search_graph(id, link, data, depth) AS ( + SELECT g.id, g.link, g.data, 1 + FROM graph g + UNION ALL + SELECT g.id, g.link, g.data, sg.depth + 1 + FROM graph g, search_graph sg + WHERE g.id = sg.link +) CYCLE id SET is_cycle TO true DEFAULT false USING path +SELECT * FROM search_graph; + + and it will be internally rewritten to the above form. The + CYCLE clause specifies first the list of columns to + track for cycle detection, then a column name that will show whether a + cycle has been detected, then two values to use in that column for the yes + and no cases, and finally the name of another column that will track the + path. The cycle and path columns will implicitly be added to the output + rows of the CTE. + + The cycle path column is computed in the same way as the depth-first - ordering column show in the previous section. + ordering column show in the previous section. A query can have both a + SEARCH and a CYCLE clause, but a + depth-first search specification and a cycle detection specification would + create redundant computations, so it's more efficient to just use the + CYCLE clause and order by the path column. If + breadth-first ordering is wanted, then specifying both + SEARCH and CYCLE can be useful. diff --git a/doc/src/sgml/rangetypes.sgml b/doc/src/sgml/rangetypes.sgml index 859079c69cad9..91e353d4fdb91 100644 --- a/doc/src/sgml/rangetypes.sgml +++ b/doc/src/sgml/rangetypes.sgml @@ -469,11 +469,13 @@ SELECT '[11:10, 23:00]'::timerange; GiST and SP-GiST indexes can be created for table columns of range types. + GiST indexes can be also created for table columns of multirange types. For instance, to create a GiST index: CREATE INDEX reservation_idx ON reservation USING GIST (during); - A GiST or SP-GiST index can accelerate queries involving these range operators: + A GiST or SP-GiST index on ranges can accelerate queries involving these + range operators: =, &&, <@, @@ -482,8 +484,21 @@ CREATE INDEX reservation_idx ON reservation USING GIST (during); >>, -|-, &<, and - &> - (see for more information). + &>. + A GiST index on multiranges can accelerate queries involving the same + set of multirange operators. + A GiST index on ranges and GiST index on multiranges can also accelerate + queries involving these cross-type range to multirange and multirange to + range operators correspondingly: + &&, + <@, + @>, + <<, + >>, + -|-, + &<, and + &>. + See for more information. diff --git a/doc/src/sgml/ref/allfiles.sgml b/doc/src/sgml/ref/allfiles.sgml index 0f0064150c42e..bee7d28928941 100644 --- a/doc/src/sgml/ref/allfiles.sgml +++ b/doc/src/sgml/ref/allfiles.sgml @@ -215,7 +215,7 @@ Complete list of usable sgml source files in this directory. - + diff --git a/doc/src/sgml/ref/alter_publication.sgml b/doc/src/sgml/ref/alter_publication.sgml index c2946dfe0f8cf..faa114b2c681f 100644 --- a/doc/src/sgml/ref/alter_publication.sgml +++ b/doc/src/sgml/ref/alter_publication.sgml @@ -62,11 +62,12 @@ ALTER PUBLICATION name RENAME TO You must own the publication to use ALTER PUBLICATION. + Adding a table to a publication additionally requires owning that table. To alter the owner, you must also be a direct or indirect member of the new owning role. The new owner must have CREATE privilege on the database. Also, the new owner of a FOR ALL TABLES publication must be a superuser. However, a superuser can change the - ownership of a publication while circumventing these restrictions. + ownership of a publication regardless of these restrictions. diff --git a/doc/src/sgml/ref/create_index.sgml b/doc/src/sgml/ref/create_index.sgml index 2054d5d9436c1..a5271a9f8f2e2 100644 --- a/doc/src/sgml/ref/create_index.sgml +++ b/doc/src/sgml/ref/create_index.sgml @@ -386,17 +386,39 @@ CREATE [ UNIQUE ] INDEX [ CONCURRENTLY ] [ [ IF NOT EXISTS ] The fillfactor for an index is a percentage that determines how full the index method will try to pack index pages. For B-trees, leaf pages - are filled to this percentage during initial index build, and also + are filled to this percentage during initial index builds, and also when extending the index at the right (adding new largest key values). If pages subsequently become completely full, they will be split, leading to - gradual degradation in the index's efficiency. B-trees use a default + fragmentation of the on-disk index structure. B-trees use a default fillfactor of 90, but any integer value from 10 to 100 can be selected. - If the table is static then fillfactor 100 is best to minimize the - index's physical size, but for heavily updated tables a smaller - fillfactor is better to minimize the need for page splits. The - other index methods use fillfactor in different but roughly analogous - ways; the default fillfactor varies between methods. + + + B-tree indexes on tables where many inserts and/or updates are + anticipated can benefit from lower fillfactor settings at + CREATE INDEX time (following bulk loading into the + table). Values in the range of 50 - 90 can usefully smooth + out the rate of page splits during the + early life of the B-tree index (lowering fillfactor like this may even + lower the absolute number of page splits, though this effect is highly + workload dependent). The B-tree bottom-up index deletion technique + described in is dependent on having + some extra space on pages to store extra + tuple versions, and so can be affected by fillfactor (though the effect + is usually not significant). + + + In other specific cases it might be useful to increase fillfactor to + 100 at CREATE INDEX time as a way of maximizing + space utilization. You should only consider this when you are + completely sure that the table is static (i.e. that it will never be + affected by either inserts or updates). A fillfactor setting of 100 + otherwise risks harming performance: even a few + updates or inserts will cause a sudden flood of page splits. + + + The other index methods use fillfactor in different but roughly + analogous ways; the default fillfactor varies between methods. @@ -829,6 +851,12 @@ Indexes: to remove an index. + + Like any long-running transaction, CREATE INDEX on a + table can affect which tuples can be removed by concurrent + VACUUM on any other table. + + Prior releases of PostgreSQL also had an R-tree index method. This method has been removed because diff --git a/doc/src/sgml/ref/grant.sgml b/doc/src/sgml/ref/grant.sgml index c3db393bdea70..a897712de2e5c 100644 --- a/doc/src/sgml/ref/grant.sgml +++ b/doc/src/sgml/ref/grant.sgml @@ -26,58 +26,71 @@ GRANT { { SELECT | INSERT | UPDATE | DELETE | TRUNCATE | REFERENCES | TRIGGER } ON { [ TABLE ] table_name [, ...] | ALL TABLES IN SCHEMA schema_name [, ...] } TO role_specification [, ...] [ WITH GRANT OPTION ] + [ GRANTED BY role_specification ] GRANT { { SELECT | INSERT | UPDATE | REFERENCES } ( column_name [, ...] ) [, ...] | ALL [ PRIVILEGES ] ( column_name [, ...] ) } ON [ TABLE ] table_name [, ...] TO role_specification [, ...] [ WITH GRANT OPTION ] + [ GRANTED BY role_specification ] GRANT { { USAGE | SELECT | UPDATE } [, ...] | ALL [ PRIVILEGES ] } ON { SEQUENCE sequence_name [, ...] | ALL SEQUENCES IN SCHEMA schema_name [, ...] } TO role_specification [, ...] [ WITH GRANT OPTION ] + [ GRANTED BY role_specification ] GRANT { { CREATE | CONNECT | TEMPORARY | TEMP } [, ...] | ALL [ PRIVILEGES ] } ON DATABASE database_name [, ...] TO role_specification [, ...] [ WITH GRANT OPTION ] + [ GRANTED BY role_specification ] GRANT { USAGE | ALL [ PRIVILEGES ] } ON DOMAIN domain_name [, ...] TO role_specification [, ...] [ WITH GRANT OPTION ] + [ GRANTED BY role_specification ] GRANT { USAGE | ALL [ PRIVILEGES ] } ON FOREIGN DATA WRAPPER fdw_name [, ...] TO role_specification [, ...] [ WITH GRANT OPTION ] + [ GRANTED BY role_specification ] GRANT { USAGE | ALL [ PRIVILEGES ] } ON FOREIGN SERVER server_name [, ...] TO role_specification [, ...] [ WITH GRANT OPTION ] + [ GRANTED BY role_specification ] GRANT { EXECUTE | ALL [ PRIVILEGES ] } ON { { FUNCTION | PROCEDURE | ROUTINE } routine_name [ ( [ [ argmode ] [ arg_name ] arg_type [, ...] ] ) ] [, ...] | ALL { FUNCTIONS | PROCEDURES | ROUTINES } IN SCHEMA schema_name [, ...] } TO role_specification [, ...] [ WITH GRANT OPTION ] + [ GRANTED BY role_specification ] GRANT { USAGE | ALL [ PRIVILEGES ] } ON LANGUAGE lang_name [, ...] TO role_specification [, ...] [ WITH GRANT OPTION ] + [ GRANTED BY role_specification ] GRANT { { SELECT | UPDATE } [, ...] | ALL [ PRIVILEGES ] } ON LARGE OBJECT loid [, ...] TO role_specification [, ...] [ WITH GRANT OPTION ] + [ GRANTED BY role_specification ] GRANT { { CREATE | USAGE } [, ...] | ALL [ PRIVILEGES ] } ON SCHEMA schema_name [, ...] TO role_specification [, ...] [ WITH GRANT OPTION ] + [ GRANTED BY role_specification ] GRANT { CREATE | ALL [ PRIVILEGES ] } ON TABLESPACE tablespace_name [, ...] TO role_specification [, ...] [ WITH GRANT OPTION ] + [ GRANTED BY role_specification ] GRANT { USAGE | ALL [ PRIVILEGES ] } ON TYPE type_name [, ...] TO role_specification [, ...] [ WITH GRANT OPTION ] + [ GRANTED BY role_specification ] GRANT role_name [, ...] TO role_specification [, ...] [ WITH ADMIN OPTION ] @@ -133,6 +146,12 @@ GRANT role_name [, ...] TO PUBLIC. + + If GRANTED BY is specified, the specified grantor must + be the current user. This clause is currently present in this form only + for SQL compatibility. + + There is no need to grant privileges to the owner of an object (usually the user that created it), @@ -410,9 +429,9 @@ GRANT admins TO joe; The SQL standard allows the GRANTED BY option to - be used in all forms of GRANT. PostgreSQL only - supports it when granting role membership, and even then only superusers - may use it in nontrivial ways. + specify only CURRENT_USER or + CURRENT_ROLE. The other variants are PostgreSQL + extensions. diff --git a/doc/src/sgml/ref/initdb.sgml b/doc/src/sgml/ref/initdb.sgml index 385ac2515061c..afd344b4c0641 100644 --- a/doc/src/sgml/ref/initdb.sgml +++ b/doc/src/sgml/ref/initdb.sgml @@ -219,6 +219,7 @@ PostgreSQL documentation failures will be reported in the pg_stat_database view. + See for details. @@ -275,6 +276,19 @@ PostgreSQL documentation + + + + + By default, initdb will write instructions for how + to start the cluster at the end of its output. This option causes + those instructions to be left out. This is primarily intended for use + by tools that wrap initdb in platform specific + behavior, where those instructions are likely to be incorrect. + + + + diff --git a/doc/src/sgml/ref/pg_dump.sgml b/doc/src/sgml/ref/pg_dump.sgml index dcb25dc3cd43f..bcbb7a25fb735 100644 --- a/doc/src/sgml/ref/pg_dump.sgml +++ b/doc/src/sgml/ref/pg_dump.sgml @@ -176,8 +176,8 @@ PostgreSQL documentation - This option is only meaningful for the plain-text format. For - the archive formats, you can specify the option when you + This option is ignored when emitting an archive (non-text) output + file. For the archive formats, you can specify the option when you call pg_restore. @@ -208,8 +208,8 @@ PostgreSQL documentation - This option is only meaningful for the plain-text format. For - the archive formats, you can specify the option when you + This option is ignored when emitting an archive (non-text) output + file. For the archive formats, you can specify the option when you call pg_restore. @@ -456,8 +456,8 @@ PostgreSQL documentation - This option is only meaningful for the plain-text format. For - the archive formats, you can specify the option when you + This option is ignored when emitting an archive (non-text) output + file. For the archive formats, you can specify the option when you call pg_restore. @@ -693,8 +693,8 @@ PostgreSQL documentation - This option is only meaningful for the plain-text format. For - the archive formats, you can specify the option when you + This option is ignored when emitting an archive (non-text) output + file. For the archive formats, you can specify the option when you call pg_restore. @@ -924,8 +924,8 @@ PostgreSQL documentation - This option is only meaningful for the plain-text format. For - the archive formats, you can specify the option when you + This option is ignored when emitting an archive (non-text) output + file. For the archive formats, you can specify the option when you call pg_restore. diff --git a/doc/src/sgml/ref/pgarchivecleanup.sgml b/doc/src/sgml/ref/pgarchivecleanup.sgml index 56f02fc0e62e8..e27db3c077377 100644 --- a/doc/src/sgml/ref/pgarchivecleanup.sgml +++ b/doc/src/sgml/ref/pgarchivecleanup.sgml @@ -205,11 +205,4 @@ archive_cleanup_command = 'pg_archivecleanup -d /mnt/standby/archive %r 2>>clean - - See Also - - - - - diff --git a/doc/src/sgml/ref/pgbench.sgml b/doc/src/sgml/ref/pgbench.sgml index b03d0cc50f5b5..faa7c26b0a1fd 100644 --- a/doc/src/sgml/ref/pgbench.sgml +++ b/doc/src/sgml/ref/pgbench.sgml @@ -1008,7 +1008,7 @@ pgbench options d There is a simple variable-substitution facility for script files. Variable names must consist of letters (including non-Latin letters), - digits, and underscores. + digits, and underscores, with the first character not being a digit. Variables can be set by the command-line option, explained above, or by the meta commands explained below. In addition to any variables preset by command-line options, diff --git a/doc/src/sgml/ref/psql-ref.sgml b/doc/src/sgml/ref/psql-ref.sgml index 221a967bfe664..13c1edfa4ddbb 100644 --- a/doc/src/sgml/ref/psql-ref.sgml +++ b/doc/src/sgml/ref/psql-ref.sgml @@ -1919,6 +1919,27 @@ testdb=> + + \dX [ pattern ] + + + Lists extended statistics. + If pattern + is specified, only those extended statistics whose names match the + pattern are listed. + + + + The column of the kind of extended stats (e.g. Ndistinct) shows its status. + NULL means that it doesn't exists. "defined" means that it was requested + when creating the statistics. + You can use pg_stats_ext if you'd like to know whether + ANALYZE was run and statistics are available to the + planner. + + + + \dy[+] [ pattern ] diff --git a/doc/src/sgml/ref/reindex.sgml b/doc/src/sgml/ref/reindex.sgml index 6e1cf067130c9..627b36300c984 100644 --- a/doc/src/sgml/ref/reindex.sgml +++ b/doc/src/sgml/ref/reindex.sgml @@ -436,6 +436,12 @@ Indexes: CONCURRENTLY cannot. + + Like any long-running transaction, REINDEX on a table + can affect which tuples can be removed by concurrent + VACUUM on any other table. + + REINDEX SYSTEM does not support CONCURRENTLY since system catalogs cannot be reindexed diff --git a/doc/src/sgml/ref/revoke.sgml b/doc/src/sgml/ref/revoke.sgml index 35ff87a4f5e26..3014c864ea3ca 100644 --- a/doc/src/sgml/ref/revoke.sgml +++ b/doc/src/sgml/ref/revoke.sgml @@ -27,6 +27,7 @@ REVOKE [ GRANT OPTION FOR ] ON { [ TABLE ] table_name [, ...] | ALL TABLES IN SCHEMA schema_name [, ...] } FROM role_specification [, ...] + [ GRANTED BY role_specification ] [ CASCADE | RESTRICT ] REVOKE [ GRANT OPTION FOR ] @@ -34,6 +35,7 @@ REVOKE [ GRANT OPTION FOR ] [, ...] | ALL [ PRIVILEGES ] ( column_name [, ...] ) } ON [ TABLE ] table_name [, ...] FROM role_specification [, ...] + [ GRANTED BY role_specification ] [ CASCADE | RESTRICT ] REVOKE [ GRANT OPTION FOR ] @@ -42,30 +44,35 @@ REVOKE [ GRANT OPTION FOR ] ON { SEQUENCE sequence_name [, ...] | ALL SEQUENCES IN SCHEMA schema_name [, ...] } FROM role_specification [, ...] + [ GRANTED BY role_specification ] [ CASCADE | RESTRICT ] REVOKE [ GRANT OPTION FOR ] { { CREATE | CONNECT | TEMPORARY | TEMP } [, ...] | ALL [ PRIVILEGES ] } ON DATABASE database_name [, ...] FROM role_specification [, ...] + [ GRANTED BY role_specification ] [ CASCADE | RESTRICT ] REVOKE [ GRANT OPTION FOR ] { USAGE | ALL [ PRIVILEGES ] } ON DOMAIN domain_name [, ...] FROM role_specification [, ...] + [ GRANTED BY role_specification ] [ CASCADE | RESTRICT ] REVOKE [ GRANT OPTION FOR ] { USAGE | ALL [ PRIVILEGES ] } ON FOREIGN DATA WRAPPER fdw_name [, ...] FROM role_specification [, ...] + [ GRANTED BY role_specification ] [ CASCADE | RESTRICT ] REVOKE [ GRANT OPTION FOR ] { USAGE | ALL [ PRIVILEGES ] } ON FOREIGN SERVER server_name [, ...] FROM role_specification [, ...] + [ GRANTED BY role_specification ] [ CASCADE | RESTRICT ] REVOKE [ GRANT OPTION FOR ] @@ -73,36 +80,42 @@ REVOKE [ GRANT OPTION FOR ] ON { { FUNCTION | PROCEDURE | ROUTINE } function_name [ ( [ [ argmode ] [ arg_name ] arg_type [, ...] ] ) ] [, ...] | ALL { FUNCTIONS | PROCEDURES | ROUTINES } IN SCHEMA schema_name [, ...] } FROM role_specification [, ...] + [ GRANTED BY role_specification ] [ CASCADE | RESTRICT ] REVOKE [ GRANT OPTION FOR ] { USAGE | ALL [ PRIVILEGES ] } ON LANGUAGE lang_name [, ...] FROM role_specification [, ...] + [ GRANTED BY role_specification ] [ CASCADE | RESTRICT ] REVOKE [ GRANT OPTION FOR ] { { SELECT | UPDATE } [, ...] | ALL [ PRIVILEGES ] } ON LARGE OBJECT loid [, ...] FROM role_specification [, ...] + [ GRANTED BY role_specification ] [ CASCADE | RESTRICT ] REVOKE [ GRANT OPTION FOR ] { { CREATE | USAGE } [, ...] | ALL [ PRIVILEGES ] } ON SCHEMA schema_name [, ...] FROM role_specification [, ...] + [ GRANTED BY role_specification ] [ CASCADE | RESTRICT ] REVOKE [ GRANT OPTION FOR ] { CREATE | ALL [ PRIVILEGES ] } ON TABLESPACE tablespace_name [, ...] FROM role_specification [, ...] + [ GRANTED BY role_specification ] [ CASCADE | RESTRICT ] REVOKE [ GRANT OPTION FOR ] { USAGE | ALL [ PRIVILEGES ] } ON TYPE type_name [, ...] FROM role_specification [, ...] + [ GRANTED BY role_specification ] [ CASCADE | RESTRICT ] REVOKE [ ADMIN OPTION FOR ] diff --git a/doc/src/sgml/ref/select.sgml b/doc/src/sgml/ref/select.sgml index 6757033e096b8..eb8b52495188a 100644 --- a/doc/src/sgml/ref/select.sgml +++ b/doc/src/sgml/ref/select.sgml @@ -73,6 +73,8 @@ SELECT [ ALL | DISTINCT [ ON ( expressionand with_query is: with_query_name [ ( column_name [, ...] ) ] AS [ [ NOT ] MATERIALIZED ] ( select | values | insert | update | delete ) + [ SEARCH { BREADTH | DEPTH } FIRST BY column_name [, ...] SET search_seq_col_name ] + [ CYCLE column_name [, ...] SET cycle_mark_col_name TO cycle_mark_value DEFAULT cycle_mark_default USING cycle_path_col_name ] TABLE [ ONLY ] table_name [ * ] @@ -276,6 +278,48 @@ TABLE [ ONLY ] table_name [ * ] queries that do not use recursion or forward references. + + The optional SEARCH clause computes a search + sequence column that can be used for ordering the results of a + recursive query in either breadth-first or depth-first order. The + supplied column name list specifies the row key that is to be used for + keeping track of visited rows. A column named + search_seq_col_name will be added to the result + column list of the WITH query. This column can be + ordered by in the outer query to achieve the respective ordering. See + for examples. + + + + The optional CYCLE clause is used to detect cycles in + recursive queries. The supplied column name list specifies the row key + that is to be used for keeping track of visited rows. A column named + cycle_mark_col_name will be added to the result + column list of the WITH query. This column will be set + to cycle_mark_value when a cycle has been + detected, else to cycle_mark_default. + Furthermore, processing of the recursive union will stop when a cycle has + been detected. cycle_mark_value and + cycle_mark_default must be constants and they + must be coercible to a common data type, and the data type must have an + inequality operator. (The SQL standard requires that they be character + strings, but PostgreSQL does not require that.) Furthermore, a column + named cycle_path_col_name will be added to the + result column list of the WITH query. This column is + used internally for tracking visited rows. See for examples. + + + + Both the SEARCH and the CYCLE clause + are only valid for recursive WITH queries. The + with_query must be a UNION + (or UNION ALL) of two SELECT (or + equivalent) commands (no nested UNIONs). If both + clauses are used, the column added by the SEARCH clause + appears before the columns added by the CYCLE clause. + + The primary query and the WITH queries are all (notionally) executed at the same time. This implies that the effects of @@ -476,9 +520,17 @@ TABLE [ ONLY ] table_name [ * ] result sets, but any function can be used.) This acts as though the function's output were created as a temporary table for the duration of this single SELECT command. - When the optional WITH ORDINALITY clause is - added to the function call, a new column is appended after - all the function's output columns with numbering for each row. + If the function's result type is composite (including the case of a + function with multiple OUT parameters), each + attribute becomes a separate column in the implicit table. + + + + When the optional WITH ORDINALITY clause is added + to the function call, an additional column of type bigint + will be appended to the function's result column(s). This column + numbers the rows of the function's result set, starting from 1. + By default, this column is named ordinality. @@ -486,8 +538,7 @@ TABLE [ ONLY ] table_name [ * ] If an alias is written, a column alias list can also be written to provide substitute names for one or more attributes of the function's composite return - type, including the column added by ORDINALITY - if present. + type, including the ordinality column if present. diff --git a/doc/src/sgml/ref/select_into.sgml b/doc/src/sgml/ref/select_into.sgml index 7b327d9eeef32..acc6401485b77 100644 --- a/doc/src/sgml/ref/select_into.sgml +++ b/doc/src/sgml/ref/select_into.sgml @@ -138,9 +138,11 @@ SELECT * INTO films_recent FROM films WHERE date_prod >= '2002-01-01'; in ECPG (see ) and PL/pgSQL (see ). The PostgreSQL usage of SELECT - INTO to represent table creation is historical. It is - best to use CREATE TABLE AS for this purpose in - new code. + INTO to represent table creation is historical. Some other SQL + implementations also use SELECT INTO in this way (but + most SQL implementations support CREATE TABLE AS + instead). Apart from such compatibility considerations, it is best to use + CREATE TABLE AS for this purpose in new code. diff --git a/doc/src/sgml/ref/set.sgml b/doc/src/sgml/ref/set.sgml index 63f312e812a87..339ee9eec9480 100644 --- a/doc/src/sgml/ref/set.sgml +++ b/doc/src/sgml/ref/set.sgml @@ -267,7 +267,7 @@ SELECT setseed(value); The function set_config provides equivalent - functionality; see . + functionality; see . Also, it is possible to UPDATE the pg_settings system view to perform the equivalent of SET. diff --git a/doc/src/sgml/ref/show.sgml b/doc/src/sgml/ref/show.sgml index 945b0491b14ec..93789ee0be058 100644 --- a/doc/src/sgml/ref/show.sgml +++ b/doc/src/sgml/ref/show.sgml @@ -129,7 +129,7 @@ SHOW ALL The function current_setting produces - equivalent output; see . + equivalent output; see . Also, the pg_settings system view produces the same information. diff --git a/doc/src/sgml/regress.sgml b/doc/src/sgml/regress.sgml index 083d0bf46b891..cb401a45b35ab 100644 --- a/doc/src/sgml/regress.sgml +++ b/doc/src/sgml/regress.sgml @@ -351,6 +351,32 @@ make check LANG=C ENCODING=EUC_JP + + Custom Server Settings + + + Custom server settings to use when running a regression test suite can be + set in the PGOPTIONS environment variable (for settings + that allow this): + +make check PGOPTIONS="-c log_checkpoints=on -c work_mem=50MB" + + When running against a temporary installation, custom settings can also be + set by supplying a pre-written postgresql.conf: + +echo 'log_checkpoints = on' > test_postgresql.conf +echo 'work_mem = 50MB' >> test_postgresql.conf +make check EXTRA_REGRESS_OPTS="--temp-config=test_postgresql.conf" + + + + + This can be useful to enable additional logging, adjust resource limits, + or enable extra run-time checks such as . + + + Extra Tests diff --git a/doc/src/sgml/runtime.sgml b/doc/src/sgml/runtime.sgml index 283352d3a4abe..bf877c0e0c1a0 100644 --- a/doc/src/sgml/runtime.sgml +++ b/doc/src/sgml/runtime.sgml @@ -629,9 +629,8 @@ DETAIL: Failed system call was semget(5440126, 17, 03600). -psql: could not connect to server: Connection refused - Is the server running on host "server.joe.com" and accepting - TCP/IP connections on port 5432? +psql: error: connection to server at "server.joe.com" (123.123.123.123), port 5432 failed: Connection refused + Is the server running on that host and accepting TCP/IP connections? This is the generic I couldn't find a server to talk to failure. It looks like the above when TCP/IP @@ -640,19 +639,22 @@ psql: could not connect to server: Connection refused - Alternatively, you'll get this when attempting Unix-domain socket + Alternatively, you might get this when attempting Unix-domain socket communication to a local server: -psql: could not connect to server: No such file or directory - Is the server running locally and accepting - connections on Unix domain socket "/tmp/.s.PGSQL.5432"? +psql: error: connection to server on socket "/tmp/.s.PGSQL.5432" failed: No such file or directory + Is the server running locally and accepting connections on that socket? + If the server is indeed running, check that the client's idea of the + socket path (here /tmp) agrees with the server's + setting. - The last line is useful in verifying that the client is trying to + A connection failure message always shows the server address or socket + path name, which is useful in verifying that the client is trying to connect to the right place. If there is in fact no server - running there, the kernel error message will typically be either + listening there, the kernel error message will typically be either Connection refused or No such file or directory, as illustrated. (It is important to realize that @@ -663,7 +665,7 @@ psql: could not connect to server: No such file or directory linkend="client-authentication-problems"/>.) Other error messages such as Connection timed out might indicate more fundamental problems, like lack of network - connectivity. + connectivity, or a firewall blocking the connection. diff --git a/doc/src/sgml/spi.sgml b/doc/src/sgml/spi.sgml index 6e92e15ca3b3f..6543eaa03435b 100644 --- a/doc/src/sgml/spi.sgml +++ b/doc/src/sgml/spi.sgml @@ -632,25 +632,23 @@ int SPI_exec(const char * command, long count< - - SPI_execute_with_args + + SPI_execute_extended - SPI_execute_with_args + SPI_execute_extended 3 - SPI_execute_with_args + SPI_execute_extended execute a command with out-of-line parameters -int SPI_execute_with_args(const char *command, - int nargs, Oid *argtypes, - Datum *values, const char *nulls, - bool read_only, long count) +int SPI_execute_extended(const char *command, + const SPIExecuteOptions * options) @@ -658,30 +656,28 @@ int SPI_execute_with_args(const char *command, Description - SPI_execute_with_args executes a command that might + SPI_execute_extended executes a command that might include references to externally supplied parameters. The command text - refers to a parameter as $n, and - the call specifies data types and values for each such symbol. - read_only and count have - the same interpretation as in SPI_execute. + refers to a parameter as $n, + and the options->params object (if supplied) + provides values and type information for each such symbol. + Various execution options can be specified + in the options struct, too. - The main advantage of this routine compared to - SPI_execute is that data values can be inserted - into the command without tedious quoting/escaping, and thus with much - less risk of SQL-injection attacks. + The options->params object should normally + mark each parameter with the PARAM_FLAG_CONST flag, + since a one-shot plan is always used for the query. - Similar results can be achieved with SPI_prepare followed by - SPI_execute_plan; however, when using this function - the query plan is always customized to the specific parameter values - provided. - For one-time query execution, this function should be preferred. - If the same command is to be executed with many different parameters, - either method might be faster, depending on the cost of re-planning - versus the benefit of custom plans. + If options->dest is not NULL, then result + tuples are passed to that object as they are generated by the executor, + instead of being accumulated in SPI_tuptable. Using + a caller-supplied DestReceiver object is particularly + helpful for queries that might generate many tuples, since the data can + be processed on-the-fly instead of being accumulated in memory. @@ -699,69 +695,80 @@ int SPI_execute_with_args(const char *command, - int nargs + const SPIExecuteOptions * options - number of input parameters ($1, $2, etc.) + struct containing optional arguments + + + Callers should always zero out the entire options + struct, then fill whichever fields they want to set. This ensures forward + compatibility of code, since any fields that are added to the struct in + future will be defined to behave backwards-compatibly if they are zero. + The currently available options fields are: + + + - Oid * argtypes + ParamListInfo params - an array of length nargs, containing the - OIDs of the data types of the parameters + data structure containing query parameter types and values; NULL if none - Datum * values + bool read_only - - an array of length nargs, containing the actual - parameter values - + true for read-only execution - const char * nulls + bool no_snapshots - an array of length nargs, describing which - parameters are null + true prevents SPI from managing snapshots for + execution of the query; use with extreme caution + + + + uint64 tcount + - If nulls is NULL then - SPI_execute_with_args assumes that no parameters - are null. Otherwise, each entry of the nulls - array should be ' ' if the corresponding parameter - value is non-null, or 'n' if the corresponding parameter - value is null. (In the latter case, the actual value in the - corresponding values entry doesn't matter.) Note - that nulls is not a text string, just an array: - it does not need a '\0' terminator. + maximum number of rows to return, + or 0 for no limit - bool read_only + DestReceiver * dest - true for read-only execution + + DestReceiver object that will receive any tuples + emitted by the query; if NULL, result tuples are accumulated into + a SPI_tuptable structure, as + in SPI_execute + - long count + ResourceOwner owner - maximum number of rows to return, - or 0 for no limit + This field is present for consistency + with SPI_execute_plan_extended, but it is + ignored, since the plan used + by SPI_execute_extended is never saved. @@ -776,35 +783,40 @@ int SPI_execute_with_args(const char *command, + When options->dest is NULL, SPI_processed and SPI_tuptable are set as in - SPI_execute if successful. + SPI_execute. + When options->dest is not NULL, + SPI_processed is set to zero and + SPI_tuptable is set to NULL. If a tuple count + is required, the caller's DestReceiver object must + calculate it. - - SPI_execute_with_receiver + + SPI_execute_with_args - SPI_execute_with_receiver + SPI_execute_with_args 3 - SPI_execute_with_receiver + SPI_execute_with_args execute a command with out-of-line parameters - int SPI_execute_with_receiver(const char *command, - ParamListInfo params, - bool read_only, - long count, - DestReceiver *dest) +int SPI_execute_with_args(const char *command, + int nargs, Oid *argtypes, + Datum *values, const char *nulls, + bool read_only, long count) @@ -812,28 +824,30 @@ int SPI_execute_with_args(const char *command, Description - SPI_execute_with_receiver executes a command that might + SPI_execute_with_args executes a command that might include references to externally supplied parameters. The command text - refers to a parameter as $n, - and the params object provides values and type - information for each such symbol. + refers to a parameter as $n, and + the call specifies data types and values for each such symbol. read_only and count have the same interpretation as in SPI_execute. - If dest is not NULL, then result tuples are passed - to that object as they are generated by the executor, instead of being - accumulated in SPI_tuptable. Using a - caller-supplied DestReceiver object is particularly - helpful for queries that might generate many tuples, since the data can - be processed on-the-fly instead of being accumulated in memory. + The main advantage of this routine compared to + SPI_execute is that data values can be inserted + into the command without tedious quoting/escaping, and thus with much + less risk of SQL-injection attacks. - The params object should normally mark each - parameter with the PARAM_FLAG_CONST flag, since - a one-shot plan is always used for the query. + Similar results can be achieved with SPI_prepare followed by + SPI_execute_plan; however, when using this function + the query plan is always customized to the specific parameter values + provided. + For one-time query execution, this function should be preferred. + If the same command is to be executed with many different parameters, + either method might be faster, depending on the cost of re-planning + versus the benefit of custom plans. @@ -851,38 +865,69 @@ int SPI_execute_with_args(const char *command, - ParamListInfo params + int nargs - data structure containing parameter types and values; NULL if none + number of input parameters ($1, $2, etc.) - bool read_only + Oid * argtypes - true for read-only execution + + an array of length nargs, containing the + OIDs of the data types of the parameters + - long count + Datum * values - maximum number of rows to return, - or 0 for no limit + an array of length nargs, containing the actual + parameter values - DestReceiver * dest + const char * nulls - DestReceiver object that will receive any tuples - emitted by the query; if NULL, tuples are returned - in SPI_tuptable + an array of length nargs, describing which + parameters are null + + + + If nulls is NULL then + SPI_execute_with_args assumes that no parameters + are null. Otherwise, each entry of the nulls + array should be ' ' if the corresponding parameter + value is non-null, or 'n' if the corresponding parameter + value is null. (In the latter case, the actual value in the + corresponding values entry doesn't matter.) Note + that nulls is not a text string, just an array: + it does not need a '\0' terminator. + + + + + + bool read_only + + true for read-only execution + + + + + long count + + + maximum number of rows to return, + or 0 for no limit @@ -897,15 +942,9 @@ int SPI_execute_with_args(const char *command, - When dest is NULL, SPI_processed and SPI_tuptable are set as in - SPI_execute. - When dest is not NULL, - SPI_processed is set to zero and - SPI_tuptable is set to NULL. If a tuple count - is required, the caller's DestReceiver object must - calculate it. + SPI_execute if successful. @@ -1105,6 +1144,11 @@ SPIPlanPtr SPI_prepare_cursor(const char * command, int < for the options field of DeclareCursorStmt. SPI_prepare always takes the cursor options as zero. + + + This function is now deprecated in favor + of SPI_prepare_extended. + @@ -1176,6 +1220,122 @@ SPIPlanPtr SPI_prepare_cursor(const char * command, int < + + SPI_prepare_extended + + + SPI_prepare_extended + 3 + + + + SPI_prepare_extended + prepare a statement, without executing it yet + + + + +SPIPlanPtr SPI_prepare_extended(const char * command, + const SPIPrepareOptions * options) + + + + + Description + + + SPI_prepare_extended creates and returns a prepared + statement for the specified command, but doesn't execute the command. + This function is equivalent to SPI_prepare, + with the addition that the caller can specify options to control + the parsing of external parameter references, as well as other facets + of query parsing and planning. + + + + + Arguments + + + + const char * command + + + command string + + + + + + const SPIPrepareOptions * options + + + struct containing optional arguments + + + + + + + Callers should always zero out the entire options + struct, then fill whichever fields they want to set. This ensures forward + compatibility of code, since any fields that are added to the struct in + future will be defined to behave backwards-compatibly if they are zero. + The currently available options fields are: + + + + + ParserSetupHook parserSetup + + + Parser hook setup function + + + + + + void * parserSetupArg + + + pass-through argument for parserSetup + + + + + + RawParseMode parseMode + + + mode for raw parsing; RAW_PARSE_DEFAULT (zero) + produces default behavior + + + + + + int cursorOptions + + + integer bit mask of cursor options; zero produces default behavior + + + + + + + + Return Value + + + SPI_prepare_extended has the same return conventions as + SPI_prepare. + + + + + + SPI_prepare_params @@ -1208,6 +1368,11 @@ SPIPlanPtr SPI_prepare_params(const char * command, with the addition that the caller can specify parser hook functions to control the parsing of external parameter references. + + + This function is now deprecated in favor + of SPI_prepare_extended. + @@ -1596,25 +1761,23 @@ int SPI_execute_plan(SPIPlanPtr plan, Datum * - - SPI_execute_plan_with_paramlist + + SPI_execute_plan_extended - SPI_execute_plan_with_paramlist + SPI_execute_plan_extended 3 - SPI_execute_plan_with_paramlist + SPI_execute_plan_extended execute a statement prepared by SPI_prepare -int SPI_execute_plan_with_paramlist(SPIPlanPtr plan, - ParamListInfo params, - bool read_only, - long count) +int SPI_execute_plan_extended(SPIPlanPtr plan, + const SPIExecuteOptions * options) @@ -1622,14 +1785,29 @@ int SPI_execute_plan_with_paramlist(SPIPlanPtr plan, Description - SPI_execute_plan_with_paramlist executes a statement - prepared by SPI_prepare. - This function is equivalent to SPI_execute_plan + SPI_execute_plan_extended executes a statement + prepared by SPI_prepare or one of its siblings. + This function is equivalent to SPI_execute_plan, except that information about the parameter values to be passed to the - query is presented differently. The ParamListInfo - representation can be convenient for passing down values that are - already available in that format. It also supports use of dynamic - parameter sets via hook functions specified in ParamListInfo. + query is presented differently, and additional execution-controlling + options can be passed. + + + + Query parameter values are represented by + a ParamListInfo struct, which is convenient for passing + down values that are already available in that format. Dynamic parameter + sets can also be used, via hook functions specified + in ParamListInfo. + + + + Also, instead of always accumulating the result tuples into a + SPI_tuptable structure, tuples can be passed to a + caller-supplied DestReceiver object as they are + generated by the executor. This is particularly helpful for queries + that might generate many tuples, since the data can be processed + on-the-fly instead of being accumulated in memory. @@ -1646,11 +1824,30 @@ int SPI_execute_plan_with_paramlist(SPIPlanPtr plan, + + const SPIExecuteOptions * options + + + struct containing optional arguments + + + + + + + Callers should always zero out the entire options + struct, then fill whichever fields they want to set. This ensures forward + compatibility of code, since any fields that are added to the struct in + future will be defined to behave backwards-compatibly if they are zero. + The currently available options fields are: + + + ParamListInfo params - data structure containing parameter types and values; NULL if none + data structure containing query parameter types and values; NULL if none @@ -1663,7 +1860,17 @@ int SPI_execute_plan_with_paramlist(SPIPlanPtr plan, - long count + bool no_snapshots + + + true prevents SPI from managing snapshots for + execution of the query; use with extreme caution + + + + + + uint64 tcount maximum number of rows to return, @@ -1671,6 +1878,29 @@ int SPI_execute_plan_with_paramlist(SPIPlanPtr plan, + + + DestReceiver * dest + + + DestReceiver object that will receive any tuples + emitted by the query; if NULL, result tuples are accumulated into + a SPI_tuptable structure, as + in SPI_execute_plan + + + + + + ResourceOwner owner + + + The resource owner that will hold a reference count on the plan while + it is executed. If NULL, CurrentResourceOwner is used. Ignored for + non-saved plans, as SPI does not acquire reference counts on those. + + + @@ -1682,35 +1912,40 @@ int SPI_execute_plan_with_paramlist(SPIPlanPtr plan, + When options->dest is NULL, SPI_processed and SPI_tuptable are set as in - SPI_execute_plan if successful. + SPI_execute_plan. + When options->dest is not NULL, + SPI_processed is set to zero and + SPI_tuptable is set to NULL. If a tuple count + is required, the caller's DestReceiver object must + calculate it. - - SPI_execute_plan_with_receiver + + SPI_execute_plan_with_paramlist - SPI_execute_plan_with_receiver + SPI_execute_plan_with_paramlist 3 - SPI_execute_plan_with_receiver + SPI_execute_plan_with_paramlist execute a statement prepared by SPI_prepare -int SPI_execute_plan_with_receiver(SPIPlanPtr plan, - ParamListInfo params, - bool read_only, - long count, - DestReceiver *dest) +int SPI_execute_plan_with_paramlist(SPIPlanPtr plan, + ParamListInfo params, + bool read_only, + long count) @@ -1718,15 +1953,19 @@ int SPI_execute_plan_with_receiver(SPIPlanPtr plan, Description - SPI_execute_plan_with_receiver executes a statement - prepared by SPI_prepare. This function is - equivalent to SPI_execute_plan_with_paramlist - except that, instead of always accumulating the result tuples into a - SPI_tuptable structure, tuples can be passed to a - caller-supplied DestReceiver object as they are - generated by the executor. This is particularly helpful for queries - that might generate many tuples, since the data can be processed - on-the-fly instead of being accumulated in memory. + SPI_execute_plan_with_paramlist executes a statement + prepared by SPI_prepare. + This function is equivalent to SPI_execute_plan + except that information about the parameter values to be passed to the + query is presented differently. The ParamListInfo + representation can be convenient for passing down values that are + already available in that format. It also supports use of dynamic + parameter sets via hook functions specified in ParamListInfo. + + + + This function is now deprecated in favor + of SPI_execute_plan_extended. @@ -1768,17 +2007,6 @@ int SPI_execute_plan_with_receiver(SPIPlanPtr plan, - - - DestReceiver * dest - - - DestReceiver object that will receive any tuples - emitted by the query; if NULL, this function is exactly equivalent to - SPI_execute_plan_with_paramlist - - - @@ -1790,15 +2018,9 @@ int SPI_execute_plan_with_receiver(SPIPlanPtr plan, - When dest is NULL, SPI_processed and SPI_tuptable are set as in - SPI_execute_plan. - When dest is not NULL, - SPI_processed is set to zero and - SPI_tuptable is set to NULL. If a tuple count - is required, the caller's DestReceiver object must - calculate it. + SPI_execute_plan if successful. @@ -2080,6 +2302,12 @@ Portal SPI_cursor_open_with_args(const char *name, The passed-in parameter data will be copied into the cursor's portal, so it can be freed while the cursor still exists. + + + This function is now deprecated in favor + of SPI_cursor_parse_open, which provides equivalent + functionality using a more modern API for handling query parameters. + @@ -2282,26 +2510,24 @@ Portal SPI_cursor_open_with_paramlist(const char *name, - - SPI_cursor_parse_open_with_paramlist + + SPI_cursor_parse_open - SPI_cursor_parse_open_with_paramlist + SPI_cursor_parse_open 3 - SPI_cursor_parse_open_with_paramlist - set up a cursor using a query and parameters + SPI_cursor_parse_open + set up a cursor using a query string and parameters -Portal SPI_cursor_parse_open_with_paramlist(const char *name, - const char *command, - ParamListInfo params, - bool read_only, - int cursorOptions) +Portal SPI_cursor_parse_open(const char *name, + const char *command, + const SPIParseOpenOptions * options) @@ -2309,17 +2535,27 @@ Portal SPI_cursor_parse_open_with_paramlist(const char *nameDescription - SPI_cursor_parse_open_with_paramlist sets up a cursor - (internally, a portal) that will execute the specified query. This - function is equivalent to SPI_cursor_open_with_args - except that any parameters referenced by the query are provided by - a ParamListInfo object, rather than in ad-hoc arrays. + SPI_cursor_parse_open sets up a cursor + (internally, a portal) that will execute the specified query string. + This is comparable to SPI_prepare_cursor followed + by SPI_cursor_open_with_paramlist, except that + parameter references within the query string are handled entirely by + supplying a ParamListInfo object. + + + + For one-time query execution, this function should be preferred + over SPI_prepare_cursor followed by + SPI_cursor_open_with_paramlist. + If the same command is to be executed with many different parameters, + either method might be faster, depending on the cost of re-planning + versus the benefit of custom plans. - The params object should normally mark each - parameter with the PARAM_FLAG_CONST flag, since - a one-shot plan is always used for the query. + The options->params object should normally + mark each parameter with the PARAM_FLAG_CONST flag, + since a one-shot plan is always used for the query. @@ -2352,18 +2588,30 @@ Portal SPI_cursor_parse_open_with_paramlist(const char *name - ParamListInfo params + const SPIParseOpenOptions * options - data structure containing parameter types and values; NULL if none + struct containing optional arguments + + + + Callers should always zero out the entire options + struct, then fill whichever fields they want to set. This ensures forward + compatibility of code, since any fields that are added to the struct in + future will be defined to behave backwards-compatibly if they are zero. + The currently available options fields are: + + - bool read_only + ParamListInfo params - true for read-only execution + + data structure containing query parameter types and values; NULL if none + @@ -2375,6 +2623,13 @@ Portal SPI_cursor_parse_open_with_paramlist(const char *name + + + bool read_only + + true for read-only execution + + diff --git a/doc/src/sgml/sslinfo.sgml b/doc/src/sgml/sslinfo.sgml index 3213c039ca671..2a9c45a111bdf 100644 --- a/doc/src/sgml/sslinfo.sgml +++ b/doc/src/sgml/sslinfo.sgml @@ -22,7 +22,7 @@ This extension won't build at all unless the installation was - configured with --with-openssl. + configured with --with-ssl=openssl. diff --git a/doc/src/sgml/start.sgml b/doc/src/sgml/start.sgml index 9bb5c1a6d5d1f..f4ae1d0fcf781 100644 --- a/doc/src/sgml/start.sgml +++ b/doc/src/sgml/start.sgml @@ -176,19 +176,18 @@ createdb: command not found Another response could be this: -createdb: could not connect to database postgres: could not connect to server: No such file or directory - Is the server running locally and accepting - connections on Unix domain socket "/tmp/.s.PGSQL.5432"? +createdb: error: connection to server on socket "/tmp/.s.PGSQL.5432" failed: No such file or directory + Is the server running locally and accepting connections on that socket? - This means that the server was not started, or it was not started - where createdb expected it. Again, check the + This means that the server was not started, or it is not listening + where createdb expects to contact it. Again, check the installation instructions or consult the administrator. Another response could be this: -createdb: could not connect to database postgres: FATAL: role "joe" does not exist +createdb: error: connection to server on socket "/tmp/.s.PGSQL.5432" failed: FATAL: role "joe" does not exist where your own login name is mentioned. This will happen if the administrator has not created a PostgreSQL user account @@ -208,7 +207,7 @@ createdb: could not connect to database postgres: FATAL: role "joe" does not ex If you have a user account but it does not have the privileges required to create a database, you will see the following: -createdb: database creation failed: ERROR: permission denied to create database +createdb: error: database creation failed: ERROR: permission denied to create database Not every user has authorization to create new databases. If PostgreSQL refuses to create databases diff --git a/doc/src/sgml/wal.sgml b/doc/src/sgml/wal.sgml index f4bc147b10684..66de1ee2f81cb 100644 --- a/doc/src/sgml/wal.sgml +++ b/doc/src/sgml/wal.sgml @@ -196,7 +196,7 @@ Data pages are not currently checksummed by default, though full page images recorded in WAL records will be protected; see initdb - for details about enabling data page checksums. + for details about enabling data checksums. @@ -230,6 +230,53 @@ + + Data Checksums + + checksums + + + + Data pages are not checksum protected by default, but this can optionally be + enabled for a cluster. When enabled, each data page will be assigned a + checksum that is updated when the page is written and verified every time + the page is read. Only data pages are protected by checksums, internal data + structures and temporary files are not. + + + + Checksums are normally enabled when the cluster is initialized using initdb. + They can also be enabled or disabled at a later time as an offline + operation. Data checksums are enabled or disabled at the full cluster + level, and cannot be specified individually for databases or tables. + + + + The current state of checksums in the cluster can be verified by viewing the + value of the read-only configuration variable by issuing the command SHOW + data_checksums. + + + + When attempting to recover from corrupt data it may be necessary to bypass + the checksum protection in order to recover data. To do this, temporarily + set the configuration parameter . + + + + Off-line Enabling of Checksums + + + The pg_checksums + application can be used to enable or disable data checksums, as well as + verify checksums, on an offline cluster. + + + + + Write-Ahead Logging (<acronym>WAL</acronym>) diff --git a/src/Makefile.global.in b/src/Makefile.global.in index 7ca1e9aac5945..74b3a6acd292f 100644 --- a/src/Makefile.global.in +++ b/src/Makefile.global.in @@ -183,7 +183,7 @@ with_icu = @with_icu@ with_perl = @with_perl@ with_python = @with_python@ with_tcl = @with_tcl@ -with_openssl = @with_openssl@ +with_ssl = @with_ssl@ with_readline = @with_readline@ with_selinux = @with_selinux@ with_systemd = @with_systemd@ @@ -289,7 +289,6 @@ LIBS = @LIBS@ LDAP_LIBS_FE = @LDAP_LIBS_FE@ LDAP_LIBS_BE = @LDAP_LIBS_BE@ UUID_LIBS = @UUID_LIBS@ -UUID_EXTRA_OBJS = @UUID_EXTRA_OBJS@ LLVM_LIBS=@LLVM_LIBS@ LD = @LD@ with_gnu_ld = @with_gnu_ld@ diff --git a/src/backend/Makefile b/src/backend/Makefile index 9706a95848842..9672e2cb43aa4 100644 --- a/src/backend/Makefile +++ b/src/backend/Makefile @@ -2,7 +2,7 @@ # # Makefile for the postgres backend # -# Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group +# Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group # Portions Copyright (c) 1994, Regents of the University of California # # src/backend/Makefile diff --git a/src/backend/access/brin/brin.c b/src/backend/access/brin/brin.c index 1f72562c60307..27ba596c6e47d 100644 --- a/src/backend/access/brin/brin.c +++ b/src/backend/access/brin/brin.c @@ -4,7 +4,7 @@ * * See src/backend/access/brin/README for details. * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION @@ -151,6 +151,7 @@ bool brininsert(Relation idxRel, Datum *values, bool *nulls, ItemPointer heaptid, Relation heapRel, IndexUniqueCheck checkUnique, + bool indexUnchanged, IndexInfo *indexInfo) { BlockNumber pagesPerRange; diff --git a/src/backend/access/brin/brin_inclusion.c b/src/backend/access/brin/brin_inclusion.c index 986f76bd9b067..12e5bddd1fc1c 100644 --- a/src/backend/access/brin/brin_inclusion.c +++ b/src/backend/access/brin/brin_inclusion.c @@ -16,7 +16,7 @@ * writing is the INET type, where IPv6 values cannot be merged with IPv4 * values. * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION diff --git a/src/backend/access/brin/brin_minmax.c b/src/backend/access/brin/brin_minmax.c index 4b5d6a7213520..2ffbd9bf0ddba 100644 --- a/src/backend/access/brin/brin_minmax.c +++ b/src/backend/access/brin/brin_minmax.c @@ -2,7 +2,7 @@ * brin_minmax.c * Implementation of Min/Max opclass for BRIN * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION diff --git a/src/backend/access/brin/brin_pageops.c b/src/backend/access/brin/brin_pageops.c index 87de0b855b5e3..df9ffc2fb86e9 100644 --- a/src/backend/access/brin/brin_pageops.c +++ b/src/backend/access/brin/brin_pageops.c @@ -2,7 +2,7 @@ * brin_pageops.c * Page-handling routines for BRIN indexes * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION diff --git a/src/backend/access/brin/brin_revmap.c b/src/backend/access/brin/brin_revmap.c index 35746714a7c4f..bab2a88ee3f3c 100644 --- a/src/backend/access/brin/brin_revmap.c +++ b/src/backend/access/brin/brin_revmap.c @@ -12,7 +12,7 @@ * the metapage. When the revmap needs to be expanded, all tuples on the * regular BRIN page at that block (if any) are moved out of the way. * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION diff --git a/src/backend/access/brin/brin_tuple.c b/src/backend/access/brin/brin_tuple.c index 17e50de530982..a7eb1c9473af7 100644 --- a/src/backend/access/brin/brin_tuple.c +++ b/src/backend/access/brin/brin_tuple.c @@ -23,7 +23,7 @@ * Note the size of the null bitmask may not be the same as that of the * datum array. * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION diff --git a/src/backend/access/brin/brin_validate.c b/src/backend/access/brin/brin_validate.c index fb0615463e0f5..6d4253c05e205 100644 --- a/src/backend/access/brin/brin_validate.c +++ b/src/backend/access/brin/brin_validate.c @@ -3,7 +3,7 @@ * brin_validate.c * Opclass validator for BRIN. * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION diff --git a/src/backend/access/brin/brin_xlog.c b/src/backend/access/brin/brin_xlog.c index da47603e2588a..39dc130e16240 100644 --- a/src/backend/access/brin/brin_xlog.c +++ b/src/backend/access/brin/brin_xlog.c @@ -2,7 +2,7 @@ * brin_xlog.c * XLog replay routines for BRIN indexes * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION diff --git a/src/backend/access/common/attmap.c b/src/backend/access/common/attmap.c index 2cd16d7eafb16..32405f8610636 100644 --- a/src/backend/access/common/attmap.c +++ b/src/backend/access/common/attmap.c @@ -10,7 +10,7 @@ * columns in a different order, taking into account dropped columns. * They are also used by the tuple conversion routines in tupconvert.c. * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * diff --git a/src/backend/access/common/bufmask.c b/src/backend/access/common/bufmask.c index 4bdb1848ad24b..003a0befb25d8 100644 --- a/src/backend/access/common/bufmask.c +++ b/src/backend/access/common/bufmask.c @@ -5,7 +5,7 @@ * in a page which can be different when the WAL is generated * and when the WAL is applied. * - * Portions Copyright (c) 2016-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 2016-2021, PostgreSQL Global Development Group * * Contains common routines required for masking a page. * diff --git a/src/backend/access/common/detoast.c b/src/backend/access/common/detoast.c index 44c37edcbb45a..d1cdbaf648614 100644 --- a/src/backend/access/common/detoast.c +++ b/src/backend/access/common/detoast.c @@ -3,7 +3,7 @@ * detoast.c * Retrieve compressed or external variable size attributes. * - * Copyright (c) 2000-2020, PostgreSQL Global Development Group + * Copyright (c) 2000-2021, PostgreSQL Global Development Group * * IDENTIFICATION * src/backend/access/common/detoast.c @@ -17,6 +17,7 @@ #include "access/table.h" #include "access/tableam.h" #include "access/toast_internals.h" +#include "common/int.h" #include "common/pg_lzcompress.h" #include "utils/expandeddatum.h" #include "utils/rel.h" @@ -196,7 +197,8 @@ detoast_attr(struct varlena *attr) * Public entry point to get back part of a toasted value * from compression or external storage. * - * Note: When slicelength is negative, return suffix of the value. + * sliceoffset is where to start (zero or more) + * If slicelength < 0, return everything beyond sliceoffset * ---------- */ struct varlena * @@ -206,8 +208,21 @@ detoast_attr_slice(struct varlena *attr, struct varlena *preslice; struct varlena *result; char *attrdata; + int32 slicelimit; int32 attrsize; + if (sliceoffset < 0) + elog(ERROR, "invalid sliceoffset: %d", sliceoffset); + + /* + * Compute slicelimit = offset + length, or -1 if we must fetch all of the + * value. In case of integer overflow, we must fetch all. + */ + if (slicelength < 0) + slicelimit = -1; + else if (pg_add_s32_overflow(sliceoffset, slicelength, &slicelimit)) + slicelength = slicelimit = -1; + if (VARATT_IS_EXTERNAL_ONDISK(attr)) { struct varatt_external toast_pointer; @@ -223,7 +238,7 @@ detoast_attr_slice(struct varlena *attr, * at least the requested part (when a prefix is requested). * Otherwise, just fetch all slices. */ - if (slicelength > 0 && sliceoffset >= 0) + if (slicelimit >= 0) { int32 max_size; @@ -231,7 +246,7 @@ detoast_attr_slice(struct varlena *attr, * Determine maximum amount of compressed data needed for a prefix * of a given length (after decompression). */ - max_size = pglz_maximum_compressed_size(sliceoffset + slicelength, + max_size = pglz_maximum_compressed_size(slicelimit, toast_pointer.va_extsize); /* @@ -270,8 +285,8 @@ detoast_attr_slice(struct varlena *attr, struct varlena *tmp = preslice; /* Decompress enough to encompass the slice and the offset */ - if (slicelength > 0 && sliceoffset >= 0) - preslice = toast_decompress_datum_slice(tmp, slicelength + sliceoffset); + if (slicelimit >= 0) + preslice = toast_decompress_datum_slice(tmp, slicelimit); else preslice = toast_decompress_datum(tmp); @@ -297,8 +312,7 @@ detoast_attr_slice(struct varlena *attr, sliceoffset = 0; slicelength = 0; } - - if (((sliceoffset + slicelength) > attrsize) || slicelength < 0) + else if (slicelength < 0 || slicelimit > attrsize) slicelength = attrsize - sliceoffset; result = (struct varlena *) palloc(slicelength + VARHDRSZ); @@ -410,6 +424,11 @@ toast_fetch_datum_slice(struct varlena *attr, int32 sliceoffset, if (VARATT_EXTERNAL_IS_COMPRESSED(toast_pointer) && slicelength > 0) slicelength = slicelength + sizeof(int32); + /* + * Adjust length request if needed. (Note: our sole caller, + * detoast_attr_slice, protects us against sliceoffset + slicelength + * overflowing.) + */ if (((sliceoffset + slicelength) > attrsize) || slicelength < 0) slicelength = attrsize - sliceoffset; diff --git a/src/backend/access/common/heaptuple.c b/src/backend/access/common/heaptuple.c index f08221eed3c9a..24a27e387de0c 100644 --- a/src/backend/access/common/heaptuple.c +++ b/src/backend/access/common/heaptuple.c @@ -45,7 +45,7 @@ * and we'd like to still refer to them via C struct offsets. * * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * diff --git a/src/backend/access/common/indextuple.c b/src/backend/access/common/indextuple.c index 634016b9b7c04..b72a1384973d1 100644 --- a/src/backend/access/common/indextuple.c +++ b/src/backend/access/common/indextuple.c @@ -4,7 +4,7 @@ * This file contains index tuple accessor and mutator routines, * as well as various tuple utilities. * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * diff --git a/src/backend/access/common/printsimple.c b/src/backend/access/common/printsimple.c index df27700df92a2..93c3c4f66a837 100644 --- a/src/backend/access/common/printsimple.c +++ b/src/backend/access/common/printsimple.c @@ -8,7 +8,7 @@ * doesn't handle standalone backends or protocol versions other than * 3.0, because we don't need such handling for current applications. * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION diff --git a/src/backend/access/common/printtup.c b/src/backend/access/common/printtup.c index dd1bac0aa9e5b..4468480e9b36a 100644 --- a/src/backend/access/common/printtup.c +++ b/src/backend/access/common/printtup.c @@ -5,7 +5,7 @@ * clients and standalone backends are supported here). * * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION diff --git a/src/backend/access/common/relation.c b/src/backend/access/common/relation.c index 641da9dbd6cfe..632d13c1eafe4 100644 --- a/src/backend/access/common/relation.c +++ b/src/backend/access/common/relation.c @@ -3,7 +3,7 @@ * relation.c * Generic relation related routines. * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * diff --git a/src/backend/access/common/reloptions.c b/src/backend/access/common/reloptions.c index 8ccc228a8cc04..c687d3ee9ef42 100644 --- a/src/backend/access/common/reloptions.c +++ b/src/backend/access/common/reloptions.c @@ -3,7 +3,7 @@ * reloptions.c * Core support for relation options (pg_class.reloptions) * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * diff --git a/src/backend/access/common/scankey.c b/src/backend/access/common/scankey.c index 3c4bd53f3f869..bf33c50d959a5 100644 --- a/src/backend/access/common/scankey.c +++ b/src/backend/access/common/scankey.c @@ -3,7 +3,7 @@ * scankey.c * scan key support code * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * diff --git a/src/backend/access/common/session.c b/src/backend/access/common/session.c index 0ec61d48a2d1f..61b3206befb95 100644 --- a/src/backend/access/common/session.c +++ b/src/backend/access/common/session.c @@ -12,7 +12,7 @@ * Currently this infrastructure is used to share: * - typemod registry for ephemeral row-types, i.e. BlessTupleDesc etc. * - * Portions Copyright (c) 2017-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 2017-2021, PostgreSQL Global Development Group * * src/backend/access/common/session.c * diff --git a/src/backend/access/common/syncscan.c b/src/backend/access/common/syncscan.c index c1ce156902bef..b7a28af4ad822 100644 --- a/src/backend/access/common/syncscan.c +++ b/src/backend/access/common/syncscan.c @@ -36,7 +36,7 @@ * ss_report_location - update current scan location * * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION diff --git a/src/backend/access/common/toast_internals.c b/src/backend/access/common/toast_internals.c index 25a81e5ec655d..9b9da0f41bcd7 100644 --- a/src/backend/access/common/toast_internals.c +++ b/src/backend/access/common/toast_internals.c @@ -3,7 +3,7 @@ * toast_internals.c * Functions for internal use by the TOAST system. * - * Copyright (c) 2000-2020, PostgreSQL Global Development Group + * Copyright (c) 2000-2021, PostgreSQL Global Development Group * * IDENTIFICATION * src/backend/access/common/toast_internals.c @@ -328,7 +328,7 @@ toast_save_datum(Relation rel, Datum value, toastrel, toastidxs[i]->rd_index->indisunique ? UNIQUE_CHECK_YES : UNIQUE_CHECK_NO, - NULL); + false, NULL); } /* diff --git a/src/backend/access/common/tupconvert.c b/src/backend/access/common/tupconvert.c index 3cb0cbefaa36c..4229c9bf76425 100644 --- a/src/backend/access/common/tupconvert.c +++ b/src/backend/access/common/tupconvert.c @@ -7,7 +7,7 @@ * equivalent but might have columns in a different order or different sets of * dropped columns. * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * diff --git a/src/backend/access/common/tupdesc.c b/src/backend/access/common/tupdesc.c index 30c30cf3a2e44..902f59440cd07 100644 --- a/src/backend/access/common/tupdesc.c +++ b/src/backend/access/common/tupdesc.c @@ -3,7 +3,7 @@ * tupdesc.c * POSTGRES tuple descriptor support code * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * diff --git a/src/backend/access/gin/ginarrayproc.c b/src/backend/access/gin/ginarrayproc.c index 3a6d54b38ce1f..bf73e32932e0c 100644 --- a/src/backend/access/gin/ginarrayproc.c +++ b/src/backend/access/gin/ginarrayproc.c @@ -4,7 +4,7 @@ * support functions for GIN's indexing of any array * * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION diff --git a/src/backend/access/gin/ginbtree.c b/src/backend/access/gin/ginbtree.c index 82788a5c367a9..482cf10877cd2 100644 --- a/src/backend/access/gin/ginbtree.c +++ b/src/backend/access/gin/ginbtree.c @@ -4,7 +4,7 @@ * page utilities routines for the postgres inverted index access method. * * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION diff --git a/src/backend/access/gin/ginbulk.c b/src/backend/access/gin/ginbulk.c index 9008c125fe998..4c5067ccf96e2 100644 --- a/src/backend/access/gin/ginbulk.c +++ b/src/backend/access/gin/ginbulk.c @@ -4,7 +4,7 @@ * routines for fast build of inverted index * * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION diff --git a/src/backend/access/gin/gindatapage.c b/src/backend/access/gin/gindatapage.c index 7a2690e97f2ca..06c05865435f0 100644 --- a/src/backend/access/gin/gindatapage.c +++ b/src/backend/access/gin/gindatapage.c @@ -4,7 +4,7 @@ * routines for handling GIN posting tree pages. * * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION diff --git a/src/backend/access/gin/ginentrypage.c b/src/backend/access/gin/ginentrypage.c index af14baf93cee8..29c36bc0678e1 100644 --- a/src/backend/access/gin/ginentrypage.c +++ b/src/backend/access/gin/ginentrypage.c @@ -4,7 +4,7 @@ * routines for handling GIN entry tree pages. * * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION diff --git a/src/backend/access/gin/ginfast.c b/src/backend/access/gin/ginfast.c index 2e41b34d8d518..e0d99409461c3 100644 --- a/src/backend/access/gin/ginfast.c +++ b/src/backend/access/gin/ginfast.c @@ -7,7 +7,7 @@ * transfer pending entries into the regular index structure. This * wins because bulk insertion is much more efficient than retail. * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION diff --git a/src/backend/access/gin/ginget.c b/src/backend/access/gin/ginget.c index 2cfccdedcf59f..03191e016ce5c 100644 --- a/src/backend/access/gin/ginget.c +++ b/src/backend/access/gin/ginget.c @@ -4,7 +4,7 @@ * fetch tuples from a GIN scan. * * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION diff --git a/src/backend/access/gin/gininsert.c b/src/backend/access/gin/gininsert.c index 77433dc8a41e7..0e8672c9e90cc 100644 --- a/src/backend/access/gin/gininsert.c +++ b/src/backend/access/gin/gininsert.c @@ -4,7 +4,7 @@ * insert routines for the postgres inverted index access method. * * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION @@ -488,6 +488,7 @@ bool gininsert(Relation index, Datum *values, bool *isnull, ItemPointer ht_ctid, Relation heapRel, IndexUniqueCheck checkUnique, + bool indexUnchanged, IndexInfo *indexInfo) { GinState *ginstate = (GinState *) indexInfo->ii_AmCache; diff --git a/src/backend/access/gin/ginlogic.c b/src/backend/access/gin/ginlogic.c index bcbc26efdb674..6bf3288f5b9ed 100644 --- a/src/backend/access/gin/ginlogic.c +++ b/src/backend/access/gin/ginlogic.c @@ -24,7 +24,7 @@ * is used for.) * * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION diff --git a/src/backend/access/gin/ginpostinglist.c b/src/backend/access/gin/ginpostinglist.c index 461ec93fdef51..216b2b9a2c3f5 100644 --- a/src/backend/access/gin/ginpostinglist.c +++ b/src/backend/access/gin/ginpostinglist.c @@ -4,7 +4,7 @@ * routines for dealing with posting lists. * * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION diff --git a/src/backend/access/gin/ginscan.c b/src/backend/access/gin/ginscan.c index 0a685bdbfc653..55e2d49fd7224 100644 --- a/src/backend/access/gin/ginscan.c +++ b/src/backend/access/gin/ginscan.c @@ -4,7 +4,7 @@ * routines to manage scans of inverted index relations * * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION diff --git a/src/backend/access/gin/ginutil.c b/src/backend/access/gin/ginutil.c index ef9b56fd363af..6b9b04cf429e3 100644 --- a/src/backend/access/gin/ginutil.c +++ b/src/backend/access/gin/ginutil.c @@ -4,7 +4,7 @@ * Utility routines for the Postgres inverted index access method. * * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION diff --git a/src/backend/access/gin/ginvacuum.c b/src/backend/access/gin/ginvacuum.c index 0935a6d9e53d6..35b85a9bff0c0 100644 --- a/src/backend/access/gin/ginvacuum.c +++ b/src/backend/access/gin/ginvacuum.c @@ -4,7 +4,7 @@ * delete & vacuum routines for the postgres GIN * * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION diff --git a/src/backend/access/gin/ginvalidate.c b/src/backend/access/gin/ginvalidate.c index 60ce1ae10663b..d2510daadb38c 100644 --- a/src/backend/access/gin/ginvalidate.c +++ b/src/backend/access/gin/ginvalidate.c @@ -3,7 +3,7 @@ * ginvalidate.c * Opclass validator for GIN. * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION diff --git a/src/backend/access/gin/ginxlog.c b/src/backend/access/gin/ginxlog.c index 9f8640565bf92..09ce4d6a5ba58 100644 --- a/src/backend/access/gin/ginxlog.c +++ b/src/backend/access/gin/ginxlog.c @@ -4,7 +4,7 @@ * WAL replay logic for inverted index. * * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION diff --git a/src/backend/access/gist/gist.c b/src/backend/access/gist/gist.c index 3f2b416ce1cef..0683f42c25883 100644 --- a/src/backend/access/gist/gist.c +++ b/src/backend/access/gist/gist.c @@ -4,7 +4,7 @@ * interface routines for the postgres GiST index access method. * * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION @@ -156,6 +156,7 @@ bool gistinsert(Relation r, Datum *values, bool *isnull, ItemPointer ht_ctid, Relation heapRel, IndexUniqueCheck checkUnique, + bool indexUnchanged, IndexInfo *indexInfo) { GISTSTATE *giststate = (GISTSTATE *) indexInfo->ii_AmCache; @@ -247,6 +248,9 @@ gistplacetopage(Relation rel, Size freespace, GISTSTATE *giststate, if (GistFollowRight(page)) elog(ERROR, "concurrent GiST page split was incomplete"); + /* should never try to insert to a deleted page */ + Assert(!GistPageIsDeleted(page)); + *splitinfo = NIL; /* @@ -862,7 +866,7 @@ gistdoinsert(Relation r, IndexTuple itup, Size freespace, */ } else if ((GistFollowRight(stack->page) || - stack->parent->lsn < GistPageGetNSN(stack->page)) && + stack->parent->lsn < GistPageGetNSN(stack->page)) || GistPageIsDeleted(stack->page)) { /* @@ -1641,7 +1645,6 @@ gistprunepage(Relation rel, Page page, Buffer buffer, Relation heapRel) int ndeletable = 0; OffsetNumber offnum, maxoff; - TransactionId latestRemovedXid = InvalidTransactionId; Assert(GistPageIsLeaf(page)); @@ -1660,13 +1663,15 @@ gistprunepage(Relation rel, Page page, Buffer buffer, Relation heapRel) deletable[ndeletable++] = offnum; } - if (XLogStandbyInfoActive() && RelationNeedsWAL(rel)) - latestRemovedXid = - index_compute_xid_horizon_for_tuples(rel, heapRel, buffer, - deletable, ndeletable); - if (ndeletable > 0) { + TransactionId latestRemovedXid = InvalidTransactionId; + + if (XLogStandbyInfoActive() && RelationNeedsWAL(rel)) + latestRemovedXid = + index_compute_xid_horizon_for_tuples(rel, heapRel, buffer, + deletable, ndeletable); + START_CRIT_SECTION(); PageIndexMultiDelete(page, deletable, ndeletable); diff --git a/src/backend/access/gist/gistbuild.c b/src/backend/access/gist/gistbuild.c index 9d3fa9c3b75b2..1054f6f1f2e34 100644 --- a/src/backend/access/gist/gistbuild.c +++ b/src/backend/access/gist/gistbuild.c @@ -22,7 +22,7 @@ * tuples (unless buffering mode is disabled). * * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION diff --git a/src/backend/access/gist/gistbuildbuffers.c b/src/backend/access/gist/gistbuildbuffers.c index 217c199a14709..95cc3348442b9 100644 --- a/src/backend/access/gist/gistbuildbuffers.c +++ b/src/backend/access/gist/gistbuildbuffers.c @@ -4,7 +4,7 @@ * node buffer management functions for GiST buffering build algorithm. * * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION diff --git a/src/backend/access/gist/gistget.c b/src/backend/access/gist/gistget.c index a4edcc77e96f0..c8f7e781c6c36 100644 --- a/src/backend/access/gist/gistget.c +++ b/src/backend/access/gist/gistget.c @@ -4,7 +4,7 @@ * fetch tuples from a GiST scan. * * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION diff --git a/src/backend/access/gist/gistproc.c b/src/backend/access/gist/gistproc.c index 784807c636ba0..b8a39cd543997 100644 --- a/src/backend/access/gist/gistproc.c +++ b/src/backend/access/gist/gistproc.c @@ -7,7 +7,7 @@ * This gives R-tree behavior, with Guttman's poly-time split algorithm. * * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION diff --git a/src/backend/access/gist/gistscan.c b/src/backend/access/gist/gistscan.c index b8aa77f70feac..61e92cf0f5dfb 100644 --- a/src/backend/access/gist/gistscan.c +++ b/src/backend/access/gist/gistscan.c @@ -4,7 +4,7 @@ * routines to manage scans on GiST index relations * * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION diff --git a/src/backend/access/gist/gistsplit.c b/src/backend/access/gist/gistsplit.c index 1bf48b4e14721..526ed1218e60f 100644 --- a/src/backend/access/gist/gistsplit.c +++ b/src/backend/access/gist/gistsplit.c @@ -15,7 +15,7 @@ * gistSplitByKey() is the entry point to this file. * * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION diff --git a/src/backend/access/gist/gistutil.c b/src/backend/access/gist/gistutil.c index 615b5ade23310..cf53dad474372 100644 --- a/src/backend/access/gist/gistutil.c +++ b/src/backend/access/gist/gistutil.c @@ -4,7 +4,7 @@ * utilities routines for the postgres GiST index access method. * * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION diff --git a/src/backend/access/gist/gistvacuum.c b/src/backend/access/gist/gistvacuum.c index a9c616c772454..94a7e12763931 100644 --- a/src/backend/access/gist/gistvacuum.c +++ b/src/backend/access/gist/gistvacuum.c @@ -4,7 +4,7 @@ * vacuuming routines for the postgres GiST index access method. * * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION diff --git a/src/backend/access/gist/gistvalidate.c b/src/backend/access/gist/gistvalidate.c index e600015b12d6e..7d83b1143c665 100644 --- a/src/backend/access/gist/gistvalidate.c +++ b/src/backend/access/gist/gistvalidate.c @@ -3,7 +3,7 @@ * gistvalidate.c * Opclass validator for GiST. * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION diff --git a/src/backend/access/gist/gistxlog.c b/src/backend/access/gist/gistxlog.c index 91b3e111820d2..c1d4b5d4f2321 100644 --- a/src/backend/access/gist/gistxlog.c +++ b/src/backend/access/gist/gistxlog.c @@ -4,7 +4,7 @@ * WAL replay logic for GiST. * * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION diff --git a/src/backend/access/hash/hash.c b/src/backend/access/hash/hash.c index 7c9ccf446c8a4..0752fb38a9248 100644 --- a/src/backend/access/hash/hash.c +++ b/src/backend/access/hash/hash.c @@ -3,7 +3,7 @@ * hash.c * Implementation of Margo Seltzer's Hashing package for postgres. * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * @@ -247,6 +247,7 @@ bool hashinsert(Relation rel, Datum *values, bool *isnull, ItemPointer ht_ctid, Relation heapRel, IndexUniqueCheck checkUnique, + bool indexUnchanged, IndexInfo *indexInfo) { Datum index_values[1]; diff --git a/src/backend/access/hash/hash_xlog.c b/src/backend/access/hash/hash_xlog.c index 3c606776624a6..02d9e6cdfd989 100644 --- a/src/backend/access/hash/hash_xlog.c +++ b/src/backend/access/hash/hash_xlog.c @@ -4,7 +4,7 @@ * WAL replay logic for hash index. * * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION diff --git a/src/backend/access/hash/hashfunc.c b/src/backend/access/hash/hashfunc.c index a8498226e32d7..db20d9d1c1456 100644 --- a/src/backend/access/hash/hashfunc.c +++ b/src/backend/access/hash/hashfunc.c @@ -3,7 +3,7 @@ * hashfunc.c * Support functions for hash access method. * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * diff --git a/src/backend/access/hash/hashinsert.c b/src/backend/access/hash/hashinsert.c index 2ebe671967ba2..d254a00b6ac37 100644 --- a/src/backend/access/hash/hashinsert.c +++ b/src/backend/access/hash/hashinsert.c @@ -3,7 +3,7 @@ * hashinsert.c * Item insertion in hash tables for Postgres. * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * diff --git a/src/backend/access/hash/hashovfl.c b/src/backend/access/hash/hashovfl.c index 00f0a94011628..1ff2e0c18ee0f 100644 --- a/src/backend/access/hash/hashovfl.c +++ b/src/backend/access/hash/hashovfl.c @@ -3,7 +3,7 @@ * hashovfl.c * Overflow page management code for the Postgres hash access method * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * diff --git a/src/backend/access/hash/hashpage.c b/src/backend/access/hash/hashpage.c index c77a189907edc..49a9867787689 100644 --- a/src/backend/access/hash/hashpage.c +++ b/src/backend/access/hash/hashpage.c @@ -3,7 +3,7 @@ * hashpage.c * Hash table page management code for the Postgres hash access method * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * diff --git a/src/backend/access/hash/hashsearch.c b/src/backend/access/hash/hashsearch.c index 995498e48da1e..2ffa28e8f7711 100644 --- a/src/backend/access/hash/hashsearch.c +++ b/src/backend/access/hash/hashsearch.c @@ -3,7 +3,7 @@ * hashsearch.c * search code for postgres hash tables * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * diff --git a/src/backend/access/hash/hashsort.c b/src/backend/access/hash/hashsort.c index 2c7b5857b530a..3ce42483ed199 100644 --- a/src/backend/access/hash/hashsort.c +++ b/src/backend/access/hash/hashsort.c @@ -14,7 +14,7 @@ * plenty of locality of access. * * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION diff --git a/src/backend/access/hash/hashutil.c b/src/backend/access/hash/hashutil.c index eb510be3324ce..519872850e0b7 100644 --- a/src/backend/access/hash/hashutil.c +++ b/src/backend/access/hash/hashutil.c @@ -3,7 +3,7 @@ * hashutil.c * Utility code for Postgres hash implementation. * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * diff --git a/src/backend/access/hash/hashvalidate.c b/src/backend/access/hash/hashvalidate.c index 0fe97e8276b63..1e343df0afc55 100644 --- a/src/backend/access/hash/hashvalidate.c +++ b/src/backend/access/hash/hashvalidate.c @@ -3,7 +3,7 @@ * hashvalidate.c * Opclass validator for hash. * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION @@ -312,7 +312,7 @@ check_hash_func_signature(Oid funcid, int16 amprocnum, Oid argtype) * that are different from but physically compatible with the opclass * datatype. In some of these cases, even a "binary coercible" check * fails because there's no relevant cast. For the moment, fix it by - * having a whitelist of allowed cases. Test the specific function + * having a list of allowed cases. Test the specific function * identity, not just its input type, because hashvarlena() takes * INTERNAL and allowing any such function seems too scary. */ diff --git a/src/backend/access/heap/heapam.c b/src/backend/access/heap/heapam.c index a9583f3103688..9926e2bd546ae 100644 --- a/src/backend/access/heap/heapam.c +++ b/src/backend/access/heap/heapam.c @@ -3,7 +3,7 @@ * heapam.c * heap access method code * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * @@ -55,6 +55,7 @@ #include "miscadmin.h" #include "pgstat.h" #include "port/atomics.h" +#include "port/pg_bitutils.h" #include "storage/bufmgr.h" #include "storage/freespace.h" #include "storage/lmgr.h" @@ -102,6 +103,8 @@ static void MultiXactIdWait(MultiXactId multi, MultiXactStatus status, uint16 in int *remaining); static bool ConditionalMultiXactIdWait(MultiXactId multi, MultiXactStatus status, uint16 infomask, Relation rel, int *remaining); +static void index_delete_sort(TM_IndexDeleteOp *delstate); +static int bottomup_sort_and_shrink(TM_IndexDeleteOp *delstate); static XLogRecPtr log_heap_new_cid(Relation relation, HeapTuple tup); static HeapTuple ExtractReplicaIdentity(Relation rel, HeapTuple tup, bool key_changed, bool *copy); @@ -166,18 +169,33 @@ static const struct #ifdef USE_PREFETCH /* - * heap_compute_xid_horizon_for_tuples and xid_horizon_prefetch_buffer use - * this structure to coordinate prefetching activity. + * heap_index_delete_tuples and index_delete_prefetch_buffer use this + * structure to coordinate prefetching activity */ typedef struct { BlockNumber cur_hblkno; int next_item; - int nitems; - ItemPointerData *tids; -} XidHorizonPrefetchState; + int ndeltids; + TM_IndexDelete *deltids; +} IndexDeletePrefetchState; #endif +/* heap_index_delete_tuples bottom-up index deletion costing constants */ +#define BOTTOMUP_MAX_NBLOCKS 6 +#define BOTTOMUP_TOLERANCE_NBLOCKS 3 + +/* + * heap_index_delete_tuples uses this when determining which heap blocks it + * must visit to help its bottom-up index deletion caller + */ +typedef struct IndexDeleteCounts +{ + int16 npromisingtids; /* Number of "promising" TIDs in group */ + int16 ntids; /* Number of TIDs in group */ + int16 ifirsttid; /* Offset to group's first deltid */ +} IndexDeleteCounts; + /* * This table maps tuple lock strength values for each particular * MultiXactStatus value. @@ -585,8 +603,14 @@ heapgettup(HeapScanDesc scan, * forward scanners. */ scan->rs_base.rs_flags &= ~SO_ALLOW_SYNC; - /* start from last page of the scan */ - if (scan->rs_startblock > 0) + + /* + * Start from last page of the scan. Ensure we take into account + * rs_numblocks if it's been adjusted by heap_setscanlimits(). + */ + if (scan->rs_numblocks != InvalidBlockNumber) + page = (scan->rs_startblock + scan->rs_numblocks - 1) % scan->rs_nblocks; + else if (scan->rs_startblock > 0) page = scan->rs_startblock - 1; else page = scan->rs_nblocks - 1; @@ -900,8 +924,14 @@ heapgettup_pagemode(HeapScanDesc scan, * forward scanners. */ scan->rs_base.rs_flags &= ~SO_ALLOW_SYNC; - /* start from last page of the scan */ - if (scan->rs_startblock > 0) + + /* + * Start from last page of the scan. Ensure we take into account + * rs_numblocks if it's been adjusted by heap_setscanlimits(). + */ + if (scan->rs_numblocks != InvalidBlockNumber) + page = (scan->rs_startblock + scan->rs_numblocks - 1) % scan->rs_nblocks; + else if (scan->rs_startblock > 0) page = scan->rs_startblock - 1; else page = scan->rs_nblocks - 1; @@ -1862,8 +1892,12 @@ heap_insert(Relation relation, HeapTuple tup, CommandId cid, TransactionId xid = GetCurrentTransactionId(); HeapTuple heaptup; Buffer buffer; + Page page = NULL; Buffer vmbuffer = InvalidBuffer; + bool starting_with_empty_page; bool all_visible_cleared = false; + bool all_frozen_set = false; + uint8 vmstatus = 0; /* * Fill in tuple header fields and toast the tuple if necessary. @@ -1876,11 +1910,36 @@ heap_insert(Relation relation, HeapTuple tup, CommandId cid, /* * Find buffer to insert this tuple into. If the page is all visible, * this will also pin the requisite visibility map page. + * + * Also pin visibility map page if COPY FREEZE inserts tuples into an + * empty page. See all_frozen_set below. */ buffer = RelationGetBufferForTuple(relation, heaptup->t_len, InvalidBuffer, options, bistate, &vmbuffer, NULL); + + /* + * If we're inserting frozen entry into an empty page, + * set visibility map bits and PageAllVisible() hint. + * + * If we're inserting frozen entry into already all_frozen page, + * preserve this state. + */ + if (options & HEAP_INSERT_FROZEN) + { + page = BufferGetPage(buffer); + + starting_with_empty_page = PageGetMaxOffsetNumber(page) == 0; + + if (visibilitymap_pin_ok(BufferGetBlockNumber(buffer), vmbuffer)) + vmstatus = visibilitymap_get_status(relation, + BufferGetBlockNumber(buffer), &vmbuffer); + + if ((starting_with_empty_page || vmstatus & VISIBILITYMAP_ALL_FROZEN)) + all_frozen_set = true; + } + /* * We're about to do the actual insert -- but check for conflict first, to * avoid possibly having to roll back work we've just done. @@ -1904,7 +1963,14 @@ heap_insert(Relation relation, HeapTuple tup, CommandId cid, RelationPutHeapTuple(relation, buffer, heaptup, (options & HEAP_INSERT_SPECULATIVE) != 0); - if (PageIsAllVisible(BufferGetPage(buffer))) + /* + * If the page is all visible, need to clear that, unless we're only + * going to add further frozen rows to it. + * + * If we're only adding already frozen rows to a page that was empty or + * marked as all visible, mark it as all-visible. + */ + if (PageIsAllVisible(BufferGetPage(buffer)) && !(options & HEAP_INSERT_FROZEN)) { all_visible_cleared = true; PageClearAllVisible(BufferGetPage(buffer)); @@ -1912,6 +1978,13 @@ heap_insert(Relation relation, HeapTuple tup, CommandId cid, ItemPointerGetBlockNumber(&(heaptup->t_self)), vmbuffer, VISIBILITYMAP_VALID_BITS); } + else if (all_frozen_set) + { + /* We only ever set all_frozen_set after reading the page. */ + Assert(page); + + PageSetAllVisible(page); + } /* * XXX Should we set PageSetPrunable on this page ? @@ -1959,6 +2032,8 @@ heap_insert(Relation relation, HeapTuple tup, CommandId cid, xlrec.flags = 0; if (all_visible_cleared) xlrec.flags |= XLH_INSERT_ALL_VISIBLE_CLEARED; + if (all_frozen_set) + xlrec.flags = XLH_INSERT_ALL_FROZEN_SET; if (options & HEAP_INSERT_SPECULATIVE) xlrec.flags |= XLH_INSERT_IS_SPECULATIVE; Assert(ItemPointerGetBlockNumber(&heaptup->t_self) == BufferGetBlockNumber(buffer)); @@ -2007,6 +2082,29 @@ heap_insert(Relation relation, HeapTuple tup, CommandId cid, END_CRIT_SECTION(); + /* + * If we've frozen everything on the page, update the visibilitymap. + * We're already holding pin on the vmbuffer. + * + * No need to update the visibilitymap if it had all_frozen bit set + * before this insertion. + */ + if (all_frozen_set && ((vmstatus & VISIBILITYMAP_ALL_FROZEN) == 0)) + { + Assert(PageIsAllVisible(page)); + Assert(visibilitymap_pin_ok(BufferGetBlockNumber(buffer), vmbuffer)); + + /* + * It's fine to use InvalidTransactionId here - this is only used + * when HEAP_INSERT_FROZEN is specified, which intentionally + * violates visibility rules. + */ + visibilitymap_set(relation, BufferGetBlockNumber(buffer), buffer, + InvalidXLogRecPtr, vmbuffer, + InvalidTransactionId, + VISIBILITYMAP_ALL_VISIBLE | VISIBILITYMAP_ALL_FROZEN); + } + UnlockReleaseBuffer(buffer); if (vmbuffer != InvalidBuffer) ReleaseBuffer(vmbuffer); @@ -2103,6 +2201,7 @@ heap_multi_insert(Relation relation, TupleTableSlot **slots, int ntuples, int ndone; PGAlignedBlock scratch; Page page; + Buffer vmbuffer = InvalidBuffer; bool needwal; Size saveFreeSpace; bool need_tuple_data = RelationIsLogicallyLogged(relation); @@ -2157,8 +2256,9 @@ heap_multi_insert(Relation relation, TupleTableSlot **slots, int ntuples, while (ndone < ntuples) { Buffer buffer; - Buffer vmbuffer = InvalidBuffer; + bool starting_with_empty_page; bool all_visible_cleared = false; + bool all_frozen_set = false; int nthispage; CHECK_FOR_INTERRUPTS(); @@ -2166,12 +2266,20 @@ heap_multi_insert(Relation relation, TupleTableSlot **slots, int ntuples, /* * Find buffer where at least the next tuple will fit. If the page is * all-visible, this will also pin the requisite visibility map page. + * + * Also pin visibility map page if COPY FREEZE inserts tuples into an + * empty page. See all_frozen_set below. */ buffer = RelationGetBufferForTuple(relation, heaptuples[ndone]->t_len, InvalidBuffer, options, bistate, &vmbuffer, NULL); page = BufferGetPage(buffer); + starting_with_empty_page = PageGetMaxOffsetNumber(page) == 0; + + if (starting_with_empty_page && (options & HEAP_INSERT_FROZEN)) + all_frozen_set = true; + /* NO EREPORT(ERROR) from here till changes are logged */ START_CRIT_SECTION(); @@ -2205,7 +2313,14 @@ heap_multi_insert(Relation relation, TupleTableSlot **slots, int ntuples, log_heap_new_cid(relation, heaptup); } - if (PageIsAllVisible(page)) + /* + * If the page is all visible, need to clear that, unless we're only + * going to add further frozen rows to it. + * + * If we're only adding already frozen rows to a previously empty + * page, mark it as all-visible. + */ + if (PageIsAllVisible(page) && !(options & HEAP_INSERT_FROZEN)) { all_visible_cleared = true; PageClearAllVisible(page); @@ -2213,6 +2328,8 @@ heap_multi_insert(Relation relation, TupleTableSlot **slots, int ntuples, BufferGetBlockNumber(buffer), vmbuffer, VISIBILITYMAP_VALID_BITS); } + else if (all_frozen_set) + PageSetAllVisible(page); /* * XXX Should we set PageSetPrunable on this page ? See heap_insert() @@ -2236,8 +2353,7 @@ heap_multi_insert(Relation relation, TupleTableSlot **slots, int ntuples, * If the page was previously empty, we can reinit the page * instead of restoring the whole thing. */ - init = (ItemPointerGetOffsetNumber(&(heaptuples[ndone]->t_self)) == FirstOffsetNumber && - PageGetMaxOffsetNumber(page) == FirstOffsetNumber + nthispage - 1); + init = starting_with_empty_page; /* allocate xl_heap_multi_insert struct from the scratch area */ xlrec = (xl_heap_multi_insert *) scratchptr; @@ -2255,7 +2371,15 @@ heap_multi_insert(Relation relation, TupleTableSlot **slots, int ntuples, /* the rest of the scratch space is used for tuple data */ tupledata = scratchptr; - xlrec->flags = all_visible_cleared ? XLH_INSERT_ALL_VISIBLE_CLEARED : 0; + /* check that the mutually exclusive flags are not both set */ + Assert (!(all_visible_cleared && all_frozen_set)); + + xlrec->flags = 0; + if (all_visible_cleared) + xlrec->flags = XLH_INSERT_ALL_VISIBLE_CLEARED; + if (all_frozen_set) + xlrec->flags = XLH_INSERT_ALL_FROZEN_SET; + xlrec->ntuples = nthispage; /* @@ -2329,13 +2453,40 @@ heap_multi_insert(Relation relation, TupleTableSlot **slots, int ntuples, END_CRIT_SECTION(); - UnlockReleaseBuffer(buffer); - if (vmbuffer != InvalidBuffer) - ReleaseBuffer(vmbuffer); + /* + * If we've frozen everything on the page, update the visibilitymap. + * We're already holding pin on the vmbuffer. + */ + if (all_frozen_set) + { + Assert(PageIsAllVisible(page)); + Assert(visibilitymap_pin_ok(BufferGetBlockNumber(buffer), vmbuffer)); + /* + * It's fine to use InvalidTransactionId here - this is only used + * when HEAP_INSERT_FROZEN is specified, which intentionally + * violates visibility rules. + */ + visibilitymap_set(relation, BufferGetBlockNumber(buffer), buffer, + InvalidXLogRecPtr, vmbuffer, + InvalidTransactionId, + VISIBILITYMAP_ALL_VISIBLE | VISIBILITYMAP_ALL_FROZEN); + } + + UnlockReleaseBuffer(buffer); ndone += nthispage; + + /* + * NB: Only release vmbuffer after inserting all tuples - it's fairly + * likely that we'll insert into subsequent heap pages that are likely + * to use the same vm page. + */ } + /* We're done with inserting all tuples, so release the last vmbuffer. */ + if (vmbuffer != InvalidBuffer) + ReleaseBuffer(vmbuffer); + /* * We're done with the actual inserts. Check for conflicts again, to * ensure that all rw-conflicts in to these inserts are detected. Without @@ -6936,28 +7087,31 @@ HeapTupleHeaderAdvanceLatestRemovedXid(HeapTupleHeader tuple, #ifdef USE_PREFETCH /* - * Helper function for heap_compute_xid_horizon_for_tuples. Issue prefetch - * requests for the number of buffers indicated by prefetch_count. The - * prefetch_state keeps track of all the buffers that we can prefetch and - * which ones have already been prefetched; each call to this function picks - * up where the previous call left off. + * Helper function for heap_index_delete_tuples. Issues prefetch requests for + * prefetch_count buffers. The prefetch_state keeps track of all the buffers + * we can prefetch, and which have already been prefetched; each call to this + * function picks up where the previous call left off. + * + * Note: we expect the deltids array to be sorted in an order that groups TIDs + * by heap block, with all TIDs for each block appearing together in exactly + * one group. */ static void -xid_horizon_prefetch_buffer(Relation rel, - XidHorizonPrefetchState *prefetch_state, - int prefetch_count) +index_delete_prefetch_buffer(Relation rel, + IndexDeletePrefetchState *prefetch_state, + int prefetch_count) { BlockNumber cur_hblkno = prefetch_state->cur_hblkno; int count = 0; int i; - int nitems = prefetch_state->nitems; - ItemPointerData *tids = prefetch_state->tids; + int ndeltids = prefetch_state->ndeltids; + TM_IndexDelete *deltids = prefetch_state->deltids; for (i = prefetch_state->next_item; - i < nitems && count < prefetch_count; + i < ndeltids && count < prefetch_count; i++) { - ItemPointer htid = &tids[i]; + ItemPointer htid = &deltids[i].tid; if (cur_hblkno == InvalidBlockNumber || ItemPointerGetBlockNumber(htid) != cur_hblkno) @@ -6978,52 +7132,67 @@ xid_horizon_prefetch_buffer(Relation rel, #endif /* - * Get the latestRemovedXid from the heap pages pointed at by the index - * tuples being deleted. + * heapam implementation of tableam's index_delete_tuples interface. * - * We used to do this during recovery rather than on the primary, but that - * approach now appears inferior. It meant that the primary could generate - * a lot of work for the standby without any back-pressure to slow down the - * primary, and it required the standby to have reached consistency, whereas - * we want to have correct information available even before that point. + * This helper function is called by index AMs during index tuple deletion. + * See tableam header comments for an explanation of the interface implemented + * here and a general theory of operation. Note that each call here is either + * a simple index deletion call, or a bottom-up index deletion call. * * It's possible for this to generate a fair amount of I/O, since we may be * deleting hundreds of tuples from a single index block. To amortize that * cost to some degree, this uses prefetching and combines repeat accesses to - * the same block. + * the same heap block. */ TransactionId -heap_compute_xid_horizon_for_tuples(Relation rel, - ItemPointerData *tids, - int nitems) +heap_index_delete_tuples(Relation rel, TM_IndexDeleteOp *delstate) { + /* Initial assumption is that earlier pruning took care of conflict */ TransactionId latestRemovedXid = InvalidTransactionId; - BlockNumber hblkno; + BlockNumber blkno = InvalidBlockNumber; Buffer buf = InvalidBuffer; - Page hpage; + Page page = NULL; + OffsetNumber maxoff = InvalidOffsetNumber; + TransactionId priorXmax; #ifdef USE_PREFETCH - XidHorizonPrefetchState prefetch_state; + IndexDeletePrefetchState prefetch_state; int prefetch_distance; #endif + SnapshotData SnapshotNonVacuumable; + int finalndeltids = 0, + nblocksaccessed = 0; + + /* State that's only used in bottom-up index deletion case */ + int nblocksfavorable = 0; + int curtargetfreespace = delstate->bottomupfreespace, + lastfreespace = 0, + actualfreespace = 0; + bool bottomup_final_block = false; + + InitNonVacuumableSnapshot(SnapshotNonVacuumable, GlobalVisTestFor(rel)); + + /* Sort caller's deltids array by TID for further processing */ + index_delete_sort(delstate); /* - * Sort to avoid repeated lookups for the same page, and to make it more - * likely to access items in an efficient order. In particular, this - * ensures that if there are multiple pointers to the same page, they all - * get processed looking up and locking the page just once. + * Bottom-up case: resort deltids array in an order attuned to where the + * greatest number of promising TIDs are to be found, and determine how + * many blocks from the start of sorted array should be considered + * favorable. This will also shrink the deltids array in order to + * eliminate completely unfavorable blocks up front. */ - qsort((void *) tids, nitems, sizeof(ItemPointerData), - (int (*) (const void *, const void *)) ItemPointerCompare); + if (delstate->bottomup) + nblocksfavorable = bottomup_sort_and_shrink(delstate); #ifdef USE_PREFETCH /* Initialize prefetch state. */ prefetch_state.cur_hblkno = InvalidBlockNumber; prefetch_state.next_item = 0; - prefetch_state.nitems = nitems; - prefetch_state.tids = tids; + prefetch_state.ndeltids = delstate->ndeltids; + prefetch_state.deltids = delstate->deltids; /* - * Compute the prefetch distance that we will attempt to maintain. + * Determine the prefetch distance that we will attempt to maintain. * * Since the caller holds a buffer lock somewhere in rel, we'd better make * sure that isn't a catalog relation before we call code that does @@ -7035,36 +7204,111 @@ heap_compute_xid_horizon_for_tuples(Relation rel, prefetch_distance = get_tablespace_maintenance_io_concurrency(rel->rd_rel->reltablespace); + /* Cap initial prefetch distance for bottom-up deletion caller */ + if (delstate->bottomup) + { + Assert(nblocksfavorable >= 1); + Assert(nblocksfavorable <= BOTTOMUP_MAX_NBLOCKS); + prefetch_distance = Min(prefetch_distance, nblocksfavorable); + } + /* Start prefetching. */ - xid_horizon_prefetch_buffer(rel, &prefetch_state, prefetch_distance); + index_delete_prefetch_buffer(rel, &prefetch_state, prefetch_distance); #endif - /* Iterate over all tids, and check their horizon */ - hblkno = InvalidBlockNumber; - hpage = NULL; - for (int i = 0; i < nitems; i++) + /* Iterate over deltids, determine which to delete, check their horizon */ + Assert(delstate->ndeltids > 0); + for (int i = 0; i < delstate->ndeltids; i++) { - ItemPointer htid = &tids[i]; - ItemId hitemid; - OffsetNumber hoffnum; + TM_IndexDelete *ideltid = &delstate->deltids[i]; + TM_IndexStatus *istatus = delstate->status + ideltid->id; + ItemPointer htid = &ideltid->tid; + OffsetNumber offnum; /* - * Read heap buffer, but avoid refetching if it's the same block as - * required for the last tid. + * Read buffer, and perform required extra steps each time a new block + * is encountered. Avoid refetching if it's the same block as the one + * from the last htid. */ - if (hblkno == InvalidBlockNumber || - ItemPointerGetBlockNumber(htid) != hblkno) + if (blkno == InvalidBlockNumber || + ItemPointerGetBlockNumber(htid) != blkno) { - /* release old buffer */ - if (BufferIsValid(buf)) + /* + * Consider giving up early for bottom-up index deletion caller + * first. (Only prefetch next-next block afterwards, when it + * becomes clear that we're at least going to access the next + * block in line.) + * + * Sometimes the first block frees so much space for bottom-up + * caller that the deletion process can end without accessing any + * more blocks. It is usually necessary to access 2 or 3 blocks + * per bottom-up deletion operation, though. + */ + if (delstate->bottomup) { - LockBuffer(buf, BUFFER_LOCK_UNLOCK); - ReleaseBuffer(buf); + /* + * We often allow caller to delete a few additional items + * whose entries we reached after the point that space target + * from caller was satisfied. The cost of accessing the page + * was already paid at that point, so it made sense to finish + * it off. When that happened, we finalize everything here + * (by finishing off the whole bottom-up deletion operation + * without needlessly paying the cost of accessing any more + * blocks). + */ + if (bottomup_final_block) + break; + + /* + * Give up when we didn't enable our caller to free any + * additional space as a result of processing the page that we + * just finished up with. This rule is the main way in which + * we keep the cost of bottom-up deletion under control. + */ + if (nblocksaccessed >= 1 && actualfreespace == lastfreespace) + break; + lastfreespace = actualfreespace; /* for next time */ + + /* + * Deletion operation (which is bottom-up) will definitely + * access the next block in line. Prepare for that now. + * + * Decay target free space so that we don't hang on for too + * long with a marginal case. (Space target is only truly + * helpful when it allows us to recognize that we don't need + * to access more than 1 or 2 blocks to satisfy caller due to + * agreeable workload characteristics.) + * + * We are a bit more patient when we encounter contiguous + * blocks, though: these are treated as favorable blocks. The + * decay process is only applied when the next block in line + * is not a favorable/contiguous block. This is not an + * exception to the general rule; we still insist on finding + * at least one deletable item per block accessed. See + * bottomup_nblocksfavorable() for full details of the theory + * behind favorable blocks and heap block locality in general. + * + * Note: The first block in line is always treated as a + * favorable block, so the earliest possible point that the + * decay can be applied is just before we access the second + * block in line. The Assert() verifies this for us. + */ + Assert(nblocksaccessed > 0 || nblocksfavorable > 0); + if (nblocksfavorable > 0) + nblocksfavorable--; + else + curtargetfreespace /= 2; } - hblkno = ItemPointerGetBlockNumber(htid); + /* release old buffer */ + if (BufferIsValid(buf)) + UnlockReleaseBuffer(buf); - buf = ReadBuffer(rel, hblkno); + blkno = ItemPointerGetBlockNumber(htid); + buf = ReadBuffer(rel, blkno); + nblocksaccessed++; + Assert(!delstate->bottomup || + nblocksaccessed <= BOTTOMUP_MAX_NBLOCKS); #ifdef USE_PREFETCH @@ -7072,69 +7316,490 @@ heap_compute_xid_horizon_for_tuples(Relation rel, * To maintain the prefetch distance, prefetch one more page for * each page we read. */ - xid_horizon_prefetch_buffer(rel, &prefetch_state, 1); + index_delete_prefetch_buffer(rel, &prefetch_state, 1); #endif - hpage = BufferGetPage(buf); - LockBuffer(buf, BUFFER_LOCK_SHARE); - } - hoffnum = ItemPointerGetOffsetNumber(htid); - hitemid = PageGetItemId(hpage, hoffnum); + page = BufferGetPage(buf); + maxoff = PageGetMaxOffsetNumber(page); + } - /* - * Follow any redirections until we find something useful. - */ - while (ItemIdIsRedirected(hitemid)) + if (istatus->knowndeletable) + Assert(!delstate->bottomup && !istatus->promising); + else { - hoffnum = ItemIdGetRedirect(hitemid); - hitemid = PageGetItemId(hpage, hoffnum); + ItemPointerData tmp = *htid; + HeapTupleData heapTuple; + + /* Are any tuples from this HOT chain non-vacuumable? */ + if (heap_hot_search_buffer(&tmp, rel, buf, &SnapshotNonVacuumable, + &heapTuple, NULL, true)) + continue; /* can't delete entry */ + + /* Caller will delete, since whole HOT chain is vacuumable */ + istatus->knowndeletable = true; + + /* Maintain index free space info for bottom-up deletion case */ + if (delstate->bottomup) + { + Assert(istatus->freespace > 0); + actualfreespace += istatus->freespace; + if (actualfreespace >= curtargetfreespace) + bottomup_final_block = true; + } } /* - * If the heap item has storage, then read the header and use that to - * set latestRemovedXid. - * - * Some LP_DEAD items may not be accessible, so we ignore them. + * Maintain latestRemovedXid value for deletion operation as a whole + * by advancing current value using heap tuple headers. This is + * loosely based on the logic for pruning a HOT chain. */ - if (ItemIdHasStorage(hitemid)) + offnum = ItemPointerGetOffsetNumber(htid); + priorXmax = InvalidTransactionId; /* cannot check first XMIN */ + for (;;) { - HeapTupleHeader htuphdr; + ItemId lp; + HeapTupleHeader htup; - htuphdr = (HeapTupleHeader) PageGetItem(hpage, hitemid); + /* Some sanity checks */ + if (offnum < FirstOffsetNumber || offnum > maxoff) + { + Assert(false); + break; + } + + lp = PageGetItemId(page, offnum); + if (ItemIdIsRedirected(lp)) + { + offnum = ItemIdGetRedirect(lp); + continue; + } + + /* + * We'll often encounter LP_DEAD line pointers (especially with an + * entry marked knowndeletable by our caller up front). No heap + * tuple headers get examined for an htid that leads us to an + * LP_DEAD item. This is okay because the earlier pruning + * operation that made the line pointer LP_DEAD in the first place + * must have considered the original tuple header as part of + * generating its own latestRemovedXid value. + * + * Relying on XLOG_HEAP2_CLEAN records like this is the same + * strategy that index vacuuming uses in all cases. Index VACUUM + * WAL records don't even have a latestRemovedXid field of their + * own for this reason. + */ + if (!ItemIdIsNormal(lp)) + break; + + htup = (HeapTupleHeader) PageGetItem(page, lp); + + /* + * Check the tuple XMIN against prior XMAX, if any + */ + if (TransactionIdIsValid(priorXmax) && + !TransactionIdEquals(HeapTupleHeaderGetXmin(htup), priorXmax)) + break; + + HeapTupleHeaderAdvanceLatestRemovedXid(htup, &latestRemovedXid); - HeapTupleHeaderAdvanceLatestRemovedXid(htuphdr, &latestRemovedXid); - } - else if (ItemIdIsDead(hitemid)) - { /* - * Conjecture: if hitemid is dead then it had xids before the xids - * marked on LP_NORMAL items. So we just ignore this item and move - * onto the next, for the purposes of calculating - * latestRemovedXid. + * If the tuple is not HOT-updated, then we are at the end of this + * HOT-chain. No need to visit later tuples from the same update + * chain (they get their own index entries) -- just move on to + * next htid from index AM caller. */ + if (!HeapTupleHeaderIsHotUpdated(htup)) + break; + + /* Advance to next HOT chain member */ + Assert(ItemPointerGetBlockNumber(&htup->t_ctid) == blkno); + offnum = ItemPointerGetOffsetNumber(&htup->t_ctid); + priorXmax = HeapTupleHeaderGetUpdateXid(htup); } - else - Assert(!ItemIdIsUsed(hitemid)); + /* Enable further/final shrinking of deltids for caller */ + finalndeltids = i + 1; } - if (BufferIsValid(buf)) + UnlockReleaseBuffer(buf); + + /* + * Shrink deltids array to exclude non-deletable entries at the end. This + * is not just a minor optimization. Final deltids array size might be + * zero for a bottom-up caller. Index AM is explicitly allowed to rely on + * ndeltids being zero in all cases with zero total deletable entries. + */ + Assert(finalndeltids > 0 || delstate->bottomup); + delstate->ndeltids = finalndeltids; + + return latestRemovedXid; +} + +/* + * Specialized inlineable comparison function for index_delete_sort() + */ +static inline int +index_delete_sort_cmp(TM_IndexDelete *deltid1, TM_IndexDelete *deltid2) +{ + ItemPointer tid1 = &deltid1->tid; + ItemPointer tid2 = &deltid2->tid; + + { + BlockNumber blk1 = ItemPointerGetBlockNumber(tid1); + BlockNumber blk2 = ItemPointerGetBlockNumber(tid2); + + if (blk1 != blk2) + return (blk1 < blk2) ? -1 : 1; + } { - LockBuffer(buf, BUFFER_LOCK_UNLOCK); - ReleaseBuffer(buf); + OffsetNumber pos1 = ItemPointerGetOffsetNumber(tid1); + OffsetNumber pos2 = ItemPointerGetOffsetNumber(tid2); + + if (pos1 != pos2) + return (pos1 < pos2) ? -1 : 1; } + pg_unreachable(); + + return 0; +} + +/* + * Sort deltids array from delstate by TID. This prepares it for further + * processing by heap_index_delete_tuples(). + * + * This operation becomes a noticeable consumer of CPU cycles with some + * workloads, so we go to the trouble of specialization/micro optimization. + * We use shellsort for this because it's easy to specialize, compiles to + * relatively few instructions, and is adaptive to presorted inputs/subsets + * (which are typical here). + */ +static void +index_delete_sort(TM_IndexDeleteOp *delstate) +{ + TM_IndexDelete *deltids = delstate->deltids; + int ndeltids = delstate->ndeltids; + int low = 0; + /* - * If all heap tuples were LP_DEAD then we will be returning - * InvalidTransactionId here, which avoids conflicts. This matches - * existing logic which assumes that LP_DEAD tuples must already be older - * than the latestRemovedXid on the cleanup record that set them as - * LP_DEAD, hence must already have generated a conflict. + * Shellsort gap sequence (taken from Sedgewick-Incerpi paper). + * + * This implementation is fast with array sizes up to ~4500. This covers + * all supported BLCKSZ values. */ + const int gaps[9] = {1968, 861, 336, 112, 48, 21, 7, 3, 1}; - return latestRemovedXid; + /* Think carefully before changing anything here -- keep swaps cheap */ + StaticAssertStmt(sizeof(TM_IndexDelete) <= 8, + "element size exceeds 8 bytes"); + + for (int g = 0; g < lengthof(gaps); g++) + { + for (int hi = gaps[g], i = low + hi; i < ndeltids; i++) + { + TM_IndexDelete d = deltids[i]; + int j = i; + + while (j >= hi && index_delete_sort_cmp(&deltids[j - hi], &d) >= 0) + { + deltids[j] = deltids[j - hi]; + j -= hi; + } + deltids[j] = d; + } + } +} + +/* + * Returns how many blocks should be considered favorable/contiguous for a + * bottom-up index deletion pass. This is a number of heap blocks that starts + * from and includes the first block in line. + * + * There is always at least one favorable block during bottom-up index + * deletion. In the worst case (i.e. with totally random heap blocks) the + * first block in line (the only favorable block) can be thought of as a + * degenerate array of contiguous blocks that consists of a single block. + * heap_index_delete_tuples() will expect this. + * + * Caller passes blockgroups, a description of the final order that deltids + * will be sorted in for heap_index_delete_tuples() bottom-up index deletion + * processing. Note that deltids need not actually be sorted just yet (caller + * only passes deltids to us so that we can interpret blockgroups). + * + * You might guess that the existence of contiguous blocks cannot matter much, + * since in general the main factor that determines which blocks we visit is + * the number of promising TIDs, which is a fixed hint from the index AM. + * We're not really targeting the general case, though -- the actual goal is + * to adapt our behavior to a wide variety of naturally occurring conditions. + * The effects of most of the heuristics we apply are only noticeable in the + * aggregate, over time and across many _related_ bottom-up index deletion + * passes. + * + * Deeming certain blocks favorable allows heapam to recognize and adapt to + * workloads where heap blocks visited during bottom-up index deletion can be + * accessed contiguously, in the sense that each newly visited block is the + * neighbor of the block that bottom-up deletion just finished processing (or + * close enough to it). It will likely be cheaper to access more favorable + * blocks sooner rather than later (e.g. in this pass, not across a series of + * related bottom-up passes). Either way it is probably only a matter of time + * (or a matter of further correlated version churn) before all blocks that + * appear together as a single large batch of favorable blocks get accessed by + * _some_ bottom-up pass. Large batches of favorable blocks tend to either + * appear almost constantly or not even once (it all depends on per-index + * workload characteristics). + * + * Note that the blockgroups sort order applies a power-of-two bucketing + * scheme that creates opportunities for contiguous groups of blocks to get + * batched together, at least with workloads that are naturally amenable to + * being driven by heap block locality. This doesn't just enhance the spatial + * locality of bottom-up heap block processing in the obvious way. It also + * enables temporal locality of access, since sorting by heap block number + * naturally tends to make the bottom-up processing order deterministic. + * + * Consider the following example to get a sense of how temporal locality + * might matter: There is a heap relation with several indexes, each of which + * is low to medium cardinality. It is subject to constant non-HOT updates. + * The updates are skewed (in one part of the primary key, perhaps). None of + * the indexes are logically modified by the UPDATE statements (if they were + * then bottom-up index deletion would not be triggered in the first place). + * Naturally, each new round of index tuples (for each heap tuple that gets a + * heap_update() call) will have the same heap TID in each and every index. + * Since these indexes are low cardinality and never get logically modified, + * heapam processing during bottom-up deletion passes will access heap blocks + * in approximately sequential order. Temporal locality of access occurs due + * to bottom-up deletion passes behaving very similarly across each of the + * indexes at any given moment. This keeps the number of buffer misses needed + * to visit heap blocks to a minimum. + */ +static int +bottomup_nblocksfavorable(IndexDeleteCounts *blockgroups, int nblockgroups, + TM_IndexDelete *deltids) +{ + int64 lastblock = -1; + int nblocksfavorable = 0; + + Assert(nblockgroups >= 1); + Assert(nblockgroups <= BOTTOMUP_MAX_NBLOCKS); + + /* + * We tolerate heap blocks that will be accessed only slightly out of + * physical order. Small blips occur when a pair of almost-contiguous + * blocks happen to fall into different buckets (perhaps due only to a + * small difference in npromisingtids that the bucketing scheme didn't + * quite manage to ignore). We effectively ignore these blips by applying + * a small tolerance. The precise tolerance we use is a little arbitrary, + * but it works well enough in practice. + */ + for (int b = 0; b < nblockgroups; b++) + { + IndexDeleteCounts *group = blockgroups + b; + TM_IndexDelete *firstdtid = deltids + group->ifirsttid; + BlockNumber block = ItemPointerGetBlockNumber(&firstdtid->tid); + + if (lastblock != -1 && + ((int64) block < lastblock - BOTTOMUP_TOLERANCE_NBLOCKS || + (int64) block > lastblock + BOTTOMUP_TOLERANCE_NBLOCKS)) + break; + + nblocksfavorable++; + lastblock = block; + } + + /* Always indicate that there is at least 1 favorable block */ + Assert(nblocksfavorable >= 1); + + return nblocksfavorable; +} + +/* + * qsort comparison function for bottomup_sort_and_shrink() + */ +static int +bottomup_sort_and_shrink_cmp(const void *arg1, const void *arg2) +{ + const IndexDeleteCounts *group1 = (const IndexDeleteCounts *) arg1; + const IndexDeleteCounts *group2 = (const IndexDeleteCounts *) arg2; + + /* + * Most significant field is npromisingtids (which we invert the order of + * so as to sort in desc order). + * + * Caller should have already normalized npromisingtids fields into + * power-of-two values (buckets). + */ + if (group1->npromisingtids > group2->npromisingtids) + return -1; + if (group1->npromisingtids < group2->npromisingtids) + return 1; + + /* + * Tiebreak: desc ntids sort order. + * + * We cannot expect power-of-two values for ntids fields. We should + * behave as if they were already rounded up for us instead. + */ + if (group1->ntids != group2->ntids) + { + uint32 ntids1 = pg_nextpower2_32((uint32) group1->ntids); + uint32 ntids2 = pg_nextpower2_32((uint32) group2->ntids); + + if (ntids1 > ntids2) + return -1; + if (ntids1 < ntids2) + return 1; + } + + /* + * Tiebreak: asc offset-into-deltids-for-block (offset to first TID for + * block in deltids array) order. + * + * This is equivalent to sorting in ascending heap block number order + * (among otherwise equal subsets of the array). This approach allows us + * to avoid accessing the out-of-line TID. (We rely on the assumption + * that the deltids array was sorted in ascending heap TID order when + * these offsets to the first TID from each heap block group were formed.) + */ + if (group1->ifirsttid > group2->ifirsttid) + return 1; + if (group1->ifirsttid < group2->ifirsttid) + return -1; + + pg_unreachable(); + + return 0; +} + +/* + * heap_index_delete_tuples() helper function for bottom-up deletion callers. + * + * Sorts deltids array in the order needed for useful processing by bottom-up + * deletion. The array should already be sorted in TID order when we're + * called. The sort process groups heap TIDs from deltids into heap block + * groupings. Earlier/more-promising groups/blocks are usually those that are + * known to have the most "promising" TIDs. + * + * Sets new size of deltids array (ndeltids) in state. deltids will only have + * TIDs from the BOTTOMUP_MAX_NBLOCKS most promising heap blocks when we + * return. This often means that deltids will be shrunk to a small fraction + * of its original size (we eliminate many heap blocks from consideration for + * caller up front). + * + * Returns the number of "favorable" blocks. See bottomup_nblocksfavorable() + * for a definition and full details. + */ +static int +bottomup_sort_and_shrink(TM_IndexDeleteOp *delstate) +{ + IndexDeleteCounts *blockgroups; + TM_IndexDelete *reordereddeltids; + BlockNumber curblock = InvalidBlockNumber; + int nblockgroups = 0; + int ncopied = 0; + int nblocksfavorable = 0; + + Assert(delstate->bottomup); + Assert(delstate->ndeltids > 0); + + /* Calculate per-heap-block count of TIDs */ + blockgroups = palloc(sizeof(IndexDeleteCounts) * delstate->ndeltids); + for (int i = 0; i < delstate->ndeltids; i++) + { + TM_IndexDelete *ideltid = &delstate->deltids[i]; + TM_IndexStatus *istatus = delstate->status + ideltid->id; + ItemPointer htid = &ideltid->tid; + bool promising = istatus->promising; + + if (curblock != ItemPointerGetBlockNumber(htid)) + { + /* New block group */ + nblockgroups++; + + Assert(curblock < ItemPointerGetBlockNumber(htid) || + !BlockNumberIsValid(curblock)); + + curblock = ItemPointerGetBlockNumber(htid); + blockgroups[nblockgroups - 1].ifirsttid = i; + blockgroups[nblockgroups - 1].ntids = 1; + blockgroups[nblockgroups - 1].npromisingtids = 0; + } + else + { + blockgroups[nblockgroups - 1].ntids++; + } + + if (promising) + blockgroups[nblockgroups - 1].npromisingtids++; + } + + /* + * We're about ready to sort block groups to determine the optimal order + * for visiting heap blocks. But before we do, round the number of + * promising tuples for each block group up to the nearest power-of-two + * (except for block groups where npromisingtids is already 0). + * + * This scheme divides heap blocks/block groups into buckets. Each bucket + * contains blocks that have _approximately_ the same number of promising + * TIDs as each other. The goal is to ignore relatively small differences + * in the total number of promising entries, so that the whole process can + * give a little weight to heapam factors (like heap block locality) + * instead. This isn't a trade-off, really -- we have nothing to lose. + * It would be foolish to interpret small differences in npromisingtids + * values as anything more than noise. + * + * We tiebreak on nhtids when sorting block group subsets that have the + * same npromisingtids, but this has the same issues as npromisingtids, + * and so nhtids is subject to the same power-of-two bucketing scheme. + * The only reason that we don't fix nhtids in the same way here too is + * that we'll need accurate nhtids values after the sort. We handle + * nhtids bucketization dynamically instead (in the sort comparator). + * + * See bottomup_nblocksfavorable() for a full explanation of when and how + * heap locality/favorable blocks can significantly influence when and how + * heap blocks are accessed. + */ + for (int b = 0; b < nblockgroups; b++) + { + IndexDeleteCounts *group = blockgroups + b; + + /* Better off falling back on nhtids with low npromisingtids */ + if (group->npromisingtids <= 4) + group->npromisingtids = 4; + else + group->npromisingtids = + pg_nextpower2_32((uint32) group->npromisingtids); + } + + /* Sort groups and rearrange caller's deltids array */ + qsort(blockgroups, nblockgroups, sizeof(IndexDeleteCounts), + bottomup_sort_and_shrink_cmp); + reordereddeltids = palloc(delstate->ndeltids * sizeof(TM_IndexDelete)); + + nblockgroups = Min(BOTTOMUP_MAX_NBLOCKS, nblockgroups); + /* Determine number of favorable blocks at the start of final deltids */ + nblocksfavorable = bottomup_nblocksfavorable(blockgroups, nblockgroups, + delstate->deltids); + + for (int b = 0; b < nblockgroups; b++) + { + IndexDeleteCounts *group = blockgroups + b; + TM_IndexDelete *firstdtid = delstate->deltids + group->ifirsttid; + + memcpy(reordereddeltids + ncopied, firstdtid, + sizeof(TM_IndexDelete) * group->ntids); + ncopied += group->ntids; + } + + /* Copy final grouped and sorted TIDs back into start of caller's array */ + memcpy(delstate->deltids, reordereddeltids, + sizeof(TM_IndexDelete) * ncopied); + delstate->ndeltids = ncopied; + + pfree(reordereddeltids); + pfree(blockgroups); + + return nblocksfavorable; } /* @@ -8123,6 +8788,10 @@ heap_xlog_insert(XLogReaderState *record) ItemPointerSetBlockNumber(&target_tid, blkno); ItemPointerSetOffsetNumber(&target_tid, xlrec->offnum); + /* check that the mutually exclusive flags are not both set */ + Assert (!((xlrec->flags & XLH_INSERT_ALL_VISIBLE_CLEARED) && + (xlrec->flags & XLH_INSERT_ALL_FROZEN_SET))); + /* * The visibility map may need to be fixed even if the heap page is * already up-to-date. @@ -8193,6 +8862,10 @@ heap_xlog_insert(XLogReaderState *record) if (xlrec->flags & XLH_INSERT_ALL_VISIBLE_CLEARED) PageClearAllVisible(page); + /* XLH_INSERT_ALL_FROZEN_SET implies that all tuples are visible */ + if (xlrec->flags & XLH_INSERT_ALL_FROZEN_SET) + PageSetAllVisible(page); + MarkBufferDirty(buffer); } if (BufferIsValid(buffer)) @@ -8243,6 +8916,10 @@ heap_xlog_multi_insert(XLogReaderState *record) XLogRecGetBlockTag(record, 0, &rnode, NULL, &blkno); + /* check that the mutually exclusive flags are not both set */ + Assert (!((xlrec->flags & XLH_INSERT_ALL_VISIBLE_CLEARED) && + (xlrec->flags & XLH_INSERT_ALL_FROZEN_SET))); + /* * The visibility map may need to be fixed even if the heap page is * already up-to-date. @@ -8332,6 +9009,10 @@ heap_xlog_multi_insert(XLogReaderState *record) if (xlrec->flags & XLH_INSERT_ALL_VISIBLE_CLEARED) PageClearAllVisible(page); + /* XLH_INSERT_ALL_FROZEN_SET implies that all tuples are visible */ + if (xlrec->flags & XLH_INSERT_ALL_FROZEN_SET) + PageSetAllVisible(page); + MarkBufferDirty(buffer); } if (BufferIsValid(buffer)) diff --git a/src/backend/access/heap/heapam_handler.c b/src/backend/access/heap/heapam_handler.c index 3eea215b85274..4a70e20a14308 100644 --- a/src/backend/access/heap/heapam_handler.c +++ b/src/backend/access/heap/heapam_handler.c @@ -3,7 +3,7 @@ * heapam_handler.c * heap table access method code * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * @@ -1956,6 +1956,7 @@ heapam_index_validate_scan(Relation heapRelation, heapRelation, indexInfo->ii_Unique ? UNIQUE_CHECK_YES : UNIQUE_CHECK_NO, + false, indexInfo); state->tups_inserted += 1; @@ -2562,7 +2563,7 @@ static const TableAmRoutine heapam_methods = { .tuple_get_latest_tid = heap_get_latest_tid, .tuple_tid_valid = heapam_tuple_tid_valid, .tuple_satisfies_snapshot = heapam_tuple_satisfies_snapshot, - .compute_xid_horizon_for_tuples = heap_compute_xid_horizon_for_tuples, + .index_delete_tuples = heap_index_delete_tuples, .relation_set_new_filenode = heapam_relation_set_new_filenode, .relation_nontransactional_truncate = heapam_relation_nontransactional_truncate, diff --git a/src/backend/access/heap/heapam_visibility.c b/src/backend/access/heap/heapam_visibility.c index 80bd4940769c1..65f91c82599cf 100644 --- a/src/backend/access/heap/heapam_visibility.c +++ b/src/backend/access/heap/heapam_visibility.c @@ -52,7 +52,7 @@ * HeapTupleSatisfiesAny() * all tuples are visible * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION diff --git a/src/backend/access/heap/heaptoast.c b/src/backend/access/heap/heaptoast.c index 584f101dd987b..55bbe1d584760 100644 --- a/src/backend/access/heap/heaptoast.c +++ b/src/backend/access/heap/heaptoast.c @@ -4,7 +4,7 @@ * Heap-specific definitions for external and compressed storage * of variable size attributes. * - * Copyright (c) 2000-2020, PostgreSQL Global Development Group + * Copyright (c) 2000-2021, PostgreSQL Global Development Group * * * IDENTIFICATION diff --git a/src/backend/access/heap/hio.c b/src/backend/access/heap/hio.c index ca357410a293c..fb7ad0bab47ac 100644 --- a/src/backend/access/heap/hio.c +++ b/src/backend/access/heap/hio.c @@ -3,7 +3,7 @@ * hio.c * POSTGRES heap access method input/output code. * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * @@ -396,19 +396,19 @@ RelationGetBufferForTuple(Relation relation, Size len, * target. */ targetBlock = GetPageWithFreeSpace(relation, len + saveFreeSpace); + } - /* - * If the FSM knows nothing of the rel, try the last page before we - * give up and extend. This avoids one-tuple-per-page syndrome during - * bootstrapping or in a recently-started system. - */ - if (targetBlock == InvalidBlockNumber) - { - BlockNumber nblocks = RelationGetNumberOfBlocks(relation); + /* + * If the FSM knows nothing of the rel, try the last page before we + * give up and extend. This avoids one-tuple-per-page syndrome during + * bootstrapping or in a recently-started system. + */ + if (targetBlock == InvalidBlockNumber) + { + BlockNumber nblocks = RelationGetNumberOfBlocks(relation); - if (nblocks > 0) - targetBlock = nblocks - 1; - } + if (nblocks > 0) + targetBlock = nblocks - 1; } loop: @@ -433,6 +433,14 @@ RelationGetBufferForTuple(Relation relation, Size len, buffer = ReadBufferBI(relation, targetBlock, RBM_NORMAL, bistate); if (PageIsAllVisible(BufferGetPage(buffer))) visibilitymap_pin(relation, targetBlock, vmbuffer); + + /* + * If the page is empty, pin vmbuffer to set all_frozen bit later. + */ + if ((options & HEAP_INSERT_FROZEN) && + (PageGetMaxOffsetNumber(BufferGetPage(buffer)) == 0)) + visibilitymap_pin(relation, targetBlock, vmbuffer); + LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE); } else if (otherBlock == targetBlock) @@ -619,6 +627,15 @@ RelationGetBufferForTuple(Relation relation, Size len, PageInit(page, BufferGetPageSize(buffer), 0); MarkBufferDirty(buffer); + /* + * The page is empty, pin vmbuffer to set all_frozen bit. + */ + if (options & HEAP_INSERT_FROZEN) + { + Assert(PageGetMaxOffsetNumber(BufferGetPage(buffer)) == 0); + visibilitymap_pin(relation, BufferGetBlockNumber(buffer), vmbuffer); + } + /* * Release the file-extension lock; it's now OK for someone else to extend * the relation some more. diff --git a/src/backend/access/heap/pruneheap.c b/src/backend/access/heap/pruneheap.c index 9e04bc712c94f..e3a716a2a2f24 100644 --- a/src/backend/access/heap/pruneheap.c +++ b/src/backend/access/heap/pruneheap.c @@ -3,7 +3,7 @@ * pruneheap.c * heap page pruning and HOT-chain management code * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * diff --git a/src/backend/access/heap/rewriteheap.c b/src/backend/access/heap/rewriteheap.c index 65942cc4281b3..fcaad9ba0b7f7 100644 --- a/src/backend/access/heap/rewriteheap.c +++ b/src/backend/access/heap/rewriteheap.c @@ -92,7 +92,7 @@ * heap's TOAST table will go through the normal bufmgr. * * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994-5, Regents of the University of California * * IDENTIFICATION @@ -1256,8 +1256,8 @@ CheckPointLogicalRewriteHeap(void) /* * The file cannot vanish due to concurrency since this function - * is the only one removing logical mappings and it's run while - * CheckpointLock is held exclusively. + * is the only one removing logical mappings and only one + * checkpoint can be in progress at a time. */ if (fd < 0) ereport(ERROR, diff --git a/src/backend/access/heap/vacuumlazy.c b/src/backend/access/heap/vacuumlazy.c index 25f2d5df1b8c7..f3d2265fad7e4 100644 --- a/src/backend/access/heap/vacuumlazy.c +++ b/src/backend/access/heap/vacuumlazy.c @@ -37,7 +37,7 @@ * parallel mode we update the index statistics after exiting from the * parallel mode. * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * diff --git a/src/backend/access/heap/visibilitymap.c b/src/backend/access/heap/visibilitymap.c index b1072183bcd6d..e198df65d8276 100644 --- a/src/backend/access/heap/visibilitymap.c +++ b/src/backend/access/heap/visibilitymap.c @@ -3,7 +3,7 @@ * visibilitymap.c * bitmap for tracking visibility of heap tuples * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * diff --git a/src/backend/access/index/amapi.c b/src/backend/access/index/amapi.c index 4e3d7b030ebd3..d30bc435146db 100644 --- a/src/backend/access/index/amapi.c +++ b/src/backend/access/index/amapi.c @@ -3,7 +3,7 @@ * amapi.c * Support routines for API for Postgres index access methods. * - * Copyright (c) 2015-2020, PostgreSQL Global Development Group + * Copyright (c) 2015-2021, PostgreSQL Global Development Group * * * IDENTIFICATION diff --git a/src/backend/access/index/amvalidate.c b/src/backend/access/index/amvalidate.c index b58c34aa5f2fd..9dd0ae663ba14 100644 --- a/src/backend/access/index/amvalidate.c +++ b/src/backend/access/index/amvalidate.c @@ -4,7 +4,7 @@ * Support routines for index access methods' amvalidate and * amadjustmembers functions. * - * Copyright (c) 2016-2020, PostgreSQL Global Development Group + * Copyright (c) 2016-2021, PostgreSQL Global Development Group * * * IDENTIFICATION diff --git a/src/backend/access/index/genam.c b/src/backend/access/index/genam.c index e3164e674a7bc..1c3e937c61534 100644 --- a/src/backend/access/index/genam.c +++ b/src/backend/access/index/genam.c @@ -3,7 +3,7 @@ * genam.c * general index access method routines * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * @@ -276,11 +276,18 @@ BuildIndexValueDescription(Relation indexRelation, /* * Get the latestRemovedXid from the table entries pointed at by the index - * tuples being deleted. - * - * Note: index access methods that don't consistently use the standard - * IndexTuple + heap TID item pointer representation will need to provide - * their own version of this function. + * tuples being deleted using an AM-generic approach. + * + * This is a table_index_delete_tuples() shim used by index AMs that have + * simple requirements. These callers only need to consult the tableam to get + * a latestRemovedXid value, and only expect to delete tuples that are already + * known deletable. When a latestRemovedXid value isn't needed in index AM's + * deletion WAL record, it is safe for it to skip calling here entirely. + * + * We assume that caller index AM uses the standard IndexTuple representation, + * with table TIDs stored in the t_tid field. We also expect (and assert) + * that the line pointers on page for 'itemnos' offsets are already marked + * LP_DEAD. */ TransactionId index_compute_xid_horizon_for_tuples(Relation irel, @@ -289,12 +296,19 @@ index_compute_xid_horizon_for_tuples(Relation irel, OffsetNumber *itemnos, int nitems) { - ItemPointerData *ttids = - (ItemPointerData *) palloc(sizeof(ItemPointerData) * nitems); + TM_IndexDeleteOp delstate; TransactionId latestRemovedXid = InvalidTransactionId; Page ipage = BufferGetPage(ibuf); IndexTuple itup; + Assert(nitems > 0); + + delstate.bottomup = false; + delstate.bottomupfreespace = 0; + delstate.ndeltids = 0; + delstate.deltids = palloc(nitems * sizeof(TM_IndexDelete)); + delstate.status = palloc(nitems * sizeof(TM_IndexStatus)); + /* identify what the index tuples about to be deleted point to */ for (int i = 0; i < nitems; i++) { @@ -303,14 +317,26 @@ index_compute_xid_horizon_for_tuples(Relation irel, iitemid = PageGetItemId(ipage, itemnos[i]); itup = (IndexTuple) PageGetItem(ipage, iitemid); - ItemPointerCopy(&itup->t_tid, &ttids[i]); + Assert(ItemIdIsDead(iitemid)); + + ItemPointerCopy(&itup->t_tid, &delstate.deltids[i].tid); + delstate.deltids[i].id = delstate.ndeltids; + delstate.status[i].idxoffnum = InvalidOffsetNumber; /* unused */ + delstate.status[i].knowndeletable = true; /* LP_DEAD-marked */ + delstate.status[i].promising = false; /* unused */ + delstate.status[i].freespace = 0; /* unused */ + + delstate.ndeltids++; } /* determine the actual xid horizon */ - latestRemovedXid = - table_compute_xid_horizon_for_tuples(hrel, ttids, nitems); + latestRemovedXid = table_index_delete_tuples(hrel, &delstate); + + /* assert tableam agrees that all items are deletable */ + Assert(delstate.ndeltids == nitems); - pfree(ttids); + pfree(delstate.deltids); + pfree(delstate.status); return latestRemovedXid; } diff --git a/src/backend/access/index/indexam.c b/src/backend/access/index/indexam.c index 3fb8688f8f4c1..3d2dbed708309 100644 --- a/src/backend/access/index/indexam.c +++ b/src/backend/access/index/indexam.c @@ -3,7 +3,7 @@ * indexam.c * general index access method routines * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * @@ -179,6 +179,7 @@ index_insert(Relation indexRelation, ItemPointer heap_t_ctid, Relation heapRelation, IndexUniqueCheck checkUnique, + bool indexUnchanged, IndexInfo *indexInfo) { RELATION_CHECKS; @@ -191,7 +192,8 @@ index_insert(Relation indexRelation, return indexRelation->rd_indam->aminsert(indexRelation, values, isnull, heap_t_ctid, heapRelation, - checkUnique, indexInfo); + checkUnique, indexUnchanged, + indexInfo); } /* diff --git a/src/backend/access/nbtree/README b/src/backend/access/nbtree/README index 27f555177eca3..92205325fbe4a 100644 --- a/src/backend/access/nbtree/README +++ b/src/backend/access/nbtree/README @@ -82,8 +82,8 @@ page.) A backwards scan has one additional bit of complexity: after following the left-link we must account for the possibility that the left sibling page got split before we could read it. So, we have to move right until we find a page whose right-link matches the page we -came from. (Actually, it's even harder than that; see deletion discussion -below.) +came from. (Actually, it's even harder than that; see page deletion +discussion below.) Page read locks are held only for as long as a scan is examining a page. To minimize lock/unlock traffic, an index scan always searches a leaf page @@ -163,16 +163,16 @@ pages (though suffix truncation is also considered). Note we must include the incoming item in this calculation, otherwise it is possible to find that the incoming item doesn't fit on the split page where it needs to go! -The Deletion Algorithm ----------------------- +Deleting index tuples during VACUUM +----------------------------------- Before deleting a leaf item, we get a super-exclusive lock on the target page, so that no other backend has a pin on the page when the deletion starts. This is not necessary for correctness in terms of the btree index operations themselves; as explained above, index scans logically stop "between" pages and so can't lose their place. The reason we do it is to -provide an interlock between non-full VACUUM and indexscans. Since VACUUM -deletes index entries before reclaiming heap tuple line pointers, the +provide an interlock between VACUUM and indexscans. Since VACUUM deletes +index entries before reclaiming heap tuple line pointers, the super-exclusive lock guarantees that VACUUM can't reclaim for re-use a line pointer that an indexscanning process might be about to visit. This guarantee works only for simple indexscans that visit the heap in sync @@ -202,7 +202,8 @@ from the page have been processed. This guarantees that the btbulkdelete call cannot return while any indexscan is still holding a copy of a deleted index tuple if the scan could be confused by that. Note that this requirement does not say that btbulkdelete must visit the pages in any -particular order. (See also on-the-fly deletion, below.) +particular order. (See also simple deletion and bottom-up deletion, +below.) There is no such interlocking for deletion of items in internal pages, since backends keep no lock nor pin on a page they have descended past. @@ -213,8 +214,8 @@ page). Since we hold a lock on the lower page (per L&Y) until we have re-found the parent item that links to it, we can be assured that the parent item does still exist and can't have been deleted. -Page Deletion -------------- +Deleting entire pages during VACUUM +----------------------------------- We consider deleting an entire page from the btree only when it's become completely empty of items. (Merging partly-full pages would allow better @@ -419,8 +420,8 @@ without a backend's cached page also being detected as invalidated, but only when we happen to recycle a block that once again gets recycled as the rightmost leaf page. -On-the-Fly Deletion Of Index Tuples ------------------------------------ +Simple deletion +--------------- If a process visits a heap tuple and finds that it's dead and removable (ie, dead to all open transactions, not only that process), then we can @@ -434,24 +435,27 @@ LP_DEAD bits are often set when checking a unique index for conflicts on insert (this is simpler because it takes place when we hold an exclusive lock on the leaf page). -Once an index tuple has been marked LP_DEAD it can actually be removed +Once an index tuple has been marked LP_DEAD it can actually be deleted from the index immediately; since index scans only stop "between" pages, no scan can lose its place from such a deletion. We separate the steps because we allow LP_DEAD to be set with only a share lock (it's exactly like a hint bit for a heap tuple), but physically removing tuples requires -exclusive lock. In the current code we try to remove LP_DEAD tuples when -we are otherwise faced with having to split a page to do an insertion (and -hence have exclusive lock on it already). Deduplication can also prevent -a page split, but removing LP_DEAD tuples is the preferred approach. -(Note that posting list tuples can only have their LP_DEAD bit set when -every table TID within the posting list is known dead.) - -This leaves the index in a state where it has no entry for a dead tuple -that still exists in the heap. This is not a problem for the current -implementation of VACUUM, but it could be a problem for anything that -explicitly tries to find index entries for dead tuples. (However, the -same situation is created by REINDEX, since it doesn't enter dead -tuples into the index.) +exclusive lock. Also, delaying the deletion often allows us to pick up +extra index tuples that weren't initially safe for index scans to mark +LP_DEAD. We do this with index tuples whose TIDs point to the same table +blocks as an LP_DEAD-marked tuple. They're practically free to check in +passing, and have a pretty good chance of being safe to delete due to +various locality effects. + +We only try to delete LP_DEAD tuples (and nearby tuples) when we are +otherwise faced with having to split a page to do an insertion (and hence +have exclusive lock on it already). Deduplication and bottom-up index +deletion can also prevent a page split, but simple deletion is always our +preferred approach. (Note that posting list tuples can only have their +LP_DEAD bit set when every table TID within the posting list is known +dead. This isn't much of a problem in practice because LP_DEAD bits are +just a starting point for simple deletion -- we still manage to perform +granular deletes of posting list TIDs quite often.) It's sufficient to have an exclusive lock on the index page, not a super-exclusive lock, to do deletion of LP_DEAD items. It might seem @@ -469,6 +473,70 @@ LSN of the page, and only act to set LP_DEAD bits when the LSN has not changed at all. (Avoiding dropping the pin entirely also makes it safe, of course.) +Bottom-Up deletion +------------------ + +We attempt to delete whatever duplicates happen to be present on the page +when the duplicates are suspected to be caused by version churn from +successive UPDATEs. This only happens when we receive an executor hint +indicating that optimizations like heapam's HOT have not worked out for +the index -- the incoming tuple must be a logically unchanged duplicate +which is needed for MVCC purposes, suggesting that that might well be the +dominant source of new index tuples on the leaf page in question. (Also, +bottom-up deletion is triggered within unique indexes in cases with +continual INSERT and DELETE related churn, since that is easy to detect +without any external hint.) + +Simple deletion will already have failed to prevent a page split when a +bottom-up deletion pass takes place (often because no LP_DEAD bits were +ever set on the page). The two mechanisms have closely related +implementations. The same WAL records are used for each operation, and +the same tableam infrastructure is used to determine what TIDs/tuples are +actually safe to delete. The implementations only differ in how they pick +TIDs to consider for deletion, and whether or not the tableam will give up +before accessing all table blocks (bottom-up deletion lives with the +uncertainty of its success by keeping the cost of failure low). Even +still, the two mechanisms are clearly distinct at the conceptual level. + +Bottom-up index deletion is driven entirely by heuristics (whereas simple +deletion is guaranteed to delete at least those index tuples that are +already LP_DEAD marked -- there must be at least one). We have no +certainty that we'll find even one index tuple to delete. That's why we +closely cooperate with the tableam to keep the costs it pays in balance +with the benefits we receive. The interface that we use for this is +described in detail in access/tableam.h. + +Bottom-up index deletion can be thought of as a backstop mechanism against +unnecessary version-driven page splits. It is based in part on an idea +from generational garbage collection: the "generational hypothesis". This +is the empirical observation that "most objects die young". Within +nbtree, new index tuples often quickly appear in the same place, and then +quickly become garbage. There can be intense concentrations of garbage in +relatively few leaf pages with certain workloads (or there could be in +earlier versions of PostgreSQL without bottom-up index deletion, at +least). See doc/src/sgml/btree.sgml for a high-level description of the +design principles behind bottom-up index deletion in nbtree, including +details of how it complements VACUUM. + +We expect to find a reasonably large number of tuples that are safe to +delete within each bottom-up pass. If we don't then we won't need to +consider the question of bottom-up deletion for the same leaf page for +quite a while (usually because the page splits, which resolves the +situation for the time being). We expect to perform regular bottom-up +deletion operations against pages that are at constant risk of unnecessary +page splits caused only by version churn. When the mechanism works well +we'll constantly be "on the verge" of having version-churn-driven page +splits, but never actually have even one. + +Our duplicate heuristics work well despite being fairly simple. +Unnecessary page splits only occur when there are truly pathological +levels of version churn (in theory a small amount of version churn could +make a page split occur earlier than strictly necessary, but that's pretty +harmless). We don't have to understand the underlying workload; we only +have to understand the general nature of the pathology that we target. +Version churn is easy to spot when it is truly pathological. Affected +leaf pages are fairly homogeneous. + WAL Considerations ------------------ @@ -767,9 +835,10 @@ into a single physical tuple with a posting list (a simple array of heap TIDs with the standard item pointer format). Deduplication is always applied lazily, at the point where it would otherwise be necessary to perform a page split. It occurs only when LP_DEAD items have been -removed, as our last line of defense against splitting a leaf page. We -can set the LP_DEAD bit with posting list tuples, though only when all -TIDs are known dead. +removed, as our last line of defense against splitting a leaf page +(bottom-up index deletion may be attempted first, as our second last line +of defense). We can set the LP_DEAD bit with posting list tuples, though +only when all TIDs are known dead. Our lazy approach to deduplication allows the page space accounting used during page splits to have absolutely minimal special case logic for @@ -788,7 +857,10 @@ page space accounting (see later section), so it's not clear how compression could be integrated with nbtree. Besides, posting list compression does not offer a compelling trade-off for nbtree, since in general nbtree is optimized for consistent performance with many -concurrent readers and writers. +concurrent readers and writers. Compression would also make the deletion +of a subset of TIDs from a posting list slow and complicated, which would +be a big problem for workloads that depend heavily on bottom-up index +deletion. A major goal of our lazy approach to deduplication is to limit the performance impact of deduplication with random updates. Even concurrent @@ -826,6 +898,16 @@ delay a split that is probably inevitable anyway. This allows us to avoid the overhead of attempting to deduplicate with unique indexes that always have few or no duplicates. +Note: Avoiding "unnecessary" page splits driven by version churn is also +the goal of bottom-up index deletion, which was added to PostgreSQL 14. +Bottom-up index deletion is now the preferred way to deal with this +problem (with all kinds of indexes, though especially with unique +indexes). Still, deduplication can sometimes augment bottom-up index +deletion. When deletion cannot free tuples (due to an old snapshot +holding up cleanup), falling back on deduplication provides additional +capacity. Delaying the page split by deduplicating can allow a future +bottom-up deletion pass of the same page to succeed. + Posting list splits ------------------- diff --git a/src/backend/access/nbtree/nbtcompare.c b/src/backend/access/nbtree/nbtcompare.c index fdaa7a335fb98..7ac73cb8c2d53 100644 --- a/src/backend/access/nbtree/nbtcompare.c +++ b/src/backend/access/nbtree/nbtcompare.c @@ -3,7 +3,7 @@ * nbtcompare.c * Comparison functions for btree access method. * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * diff --git a/src/backend/access/nbtree/nbtdedup.c b/src/backend/access/nbtree/nbtdedup.c index 9e535124c4652..854e3b2cf9acf 100644 --- a/src/backend/access/nbtree/nbtdedup.c +++ b/src/backend/access/nbtree/nbtdedup.c @@ -1,9 +1,9 @@ /*------------------------------------------------------------------------- * * nbtdedup.c - * Deduplicate items in Postgres btrees. + * Deduplicate or bottom-up delete items in Postgres btrees. * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * @@ -19,6 +19,8 @@ #include "miscadmin.h" #include "utils/rel.h" +static void _bt_bottomupdel_finish_pending(Page page, BTDedupState state, + TM_IndexDeleteOp *delstate); static bool _bt_do_singleval(Relation rel, Page page, BTDedupState state, OffsetNumber minoff, IndexTuple newitem); static void _bt_singleval_fillfactor(Page page, BTDedupState state, @@ -267,6 +269,147 @@ _bt_dedup_pass(Relation rel, Buffer buf, Relation heapRel, IndexTuple newitem, pfree(state); } +/* + * Perform bottom-up index deletion pass. + * + * See if duplicate index tuples (plus certain nearby tuples) are eligible to + * be deleted via bottom-up index deletion. The high level goal here is to + * entirely prevent "unnecessary" page splits caused by MVCC version churn + * from UPDATEs (when the UPDATEs don't logically modify any of the columns + * covered by the 'rel' index). This is qualitative, not quantitative -- we + * do not particularly care about once-off opportunities to delete many index + * tuples together. + * + * See nbtree/README for details on the design of nbtree bottom-up deletion. + * See access/tableam.h for a description of how we're expected to cooperate + * with the tableam. + * + * Returns true on success, in which case caller can assume page split will be + * avoided for a reasonable amount of time. Returns false when caller should + * deduplicate the page (if possible at all). + * + * Note: Occasionally we return true despite failing to delete enough items to + * avoid a split. This makes caller skip deduplication and go split the page + * right away. Our return value is always just advisory information. + * + * Note: Caller should have already deleted all existing items with their + * LP_DEAD bits set. + */ +bool +_bt_bottomupdel_pass(Relation rel, Buffer buf, Relation heapRel, + Size newitemsz) +{ + OffsetNumber offnum, + minoff, + maxoff; + Page page = BufferGetPage(buf); + BTPageOpaque opaque = (BTPageOpaque) PageGetSpecialPointer(page); + BTDedupState state; + TM_IndexDeleteOp delstate; + bool neverdedup; + int nkeyatts = IndexRelationGetNumberOfKeyAttributes(rel); + + /* Passed-in newitemsz is MAXALIGNED but does not include line pointer */ + newitemsz += sizeof(ItemIdData); + + /* Initialize deduplication state */ + state = (BTDedupState) palloc(sizeof(BTDedupStateData)); + state->deduplicate = true; + state->nmaxitems = 0; + state->maxpostingsize = BLCKSZ; /* We're not really deduplicating */ + state->base = NULL; + state->baseoff = InvalidOffsetNumber; + state->basetupsize = 0; + state->htids = palloc(state->maxpostingsize); + state->nhtids = 0; + state->nitems = 0; + state->phystupsize = 0; + state->nintervals = 0; + + /* + * Initialize tableam state that describes bottom-up index deletion + * operation. + * + * We'll go on to ask the tableam to search for TIDs whose index tuples we + * can safely delete. The tableam will search until our leaf page space + * target is satisfied, or until the cost of continuing with the tableam + * operation seems too high. It focuses its efforts on TIDs associated + * with duplicate index tuples that we mark "promising". + * + * This space target is a little arbitrary. The tableam must be able to + * keep the costs and benefits in balance. We provide the tableam with + * exhaustive information about what might work, without directly + * concerning ourselves with avoiding work during the tableam call. Our + * role in costing the bottom-up deletion process is strictly advisory. + */ + delstate.bottomup = true; + delstate.bottomupfreespace = Max(BLCKSZ / 16, newitemsz); + delstate.ndeltids = 0; + delstate.deltids = palloc(MaxTIDsPerBTreePage * sizeof(TM_IndexDelete)); + delstate.status = palloc(MaxTIDsPerBTreePage * sizeof(TM_IndexStatus)); + + minoff = P_FIRSTDATAKEY(opaque); + maxoff = PageGetMaxOffsetNumber(page); + for (offnum = minoff; + offnum <= maxoff; + offnum = OffsetNumberNext(offnum)) + { + ItemId itemid = PageGetItemId(page, offnum); + IndexTuple itup = (IndexTuple) PageGetItem(page, itemid); + + Assert(!ItemIdIsDead(itemid)); + + if (offnum == minoff) + { + /* itup starts first pending interval */ + _bt_dedup_start_pending(state, itup, offnum); + } + else if (_bt_keep_natts_fast(rel, state->base, itup) > nkeyatts && + _bt_dedup_save_htid(state, itup)) + { + /* Tuple is equal; just added its TIDs to pending interval */ + } + else + { + /* Finalize interval -- move its TIDs to delete state */ + _bt_bottomupdel_finish_pending(page, state, &delstate); + + /* itup starts new pending interval */ + _bt_dedup_start_pending(state, itup, offnum); + } + } + /* Finalize final interval -- move its TIDs to delete state */ + _bt_bottomupdel_finish_pending(page, state, &delstate); + + /* + * We don't give up now in the event of having few (or even zero) + * promising tuples for the tableam because it's not up to us as the index + * AM to manage costs (note that the tableam might have heuristics of its + * own that work out what to do). We should at least avoid having our + * caller do a useless deduplication pass after we return in the event of + * zero promising tuples, though. + */ + neverdedup = false; + if (state->nintervals == 0) + neverdedup = true; + + pfree(state->htids); + pfree(state); + + /* Ask tableam which TIDs are deletable, then physically delete them */ + _bt_delitems_delete_check(rel, buf, heapRel, &delstate); + + pfree(delstate.deltids); + pfree(delstate.status); + + /* Report "success" to caller unconditionally to avoid deduplication */ + if (neverdedup) + return true; + + /* Don't dedup when we won't end up back here any time soon anyway */ + return PageGetExactFreeSpace(page) >= Max(BLCKSZ / 24, newitemsz); +} + /* * Create a new pending posting list tuple based on caller's base tuple. * @@ -452,6 +595,150 @@ _bt_dedup_finish_pending(Page newpage, BTDedupState state) return spacesaving; } +/* + * Finalize interval during bottom-up index deletion. + * + * During a bottom-up pass we expect that TIDs will be recorded in dedup state + * first, and then get moved over to delstate (in variable-sized batches) by + * calling here. Call here happens when the number of TIDs in a dedup + * interval is known, and interval gets finalized (i.e. when caller sees next + * tuple on the page is not a duplicate, or when caller runs out of tuples to + * process from leaf page). + * + * This is where bottom-up deletion determines and remembers which entries are + * duplicates. This will be important information to the tableam delete + * infrastructure later on. Plain index tuple duplicates are marked + * "promising" here, per tableam contract. + * + * Our approach to marking entries whose TIDs come from posting lists is more + * complicated. Posting lists can only be formed by a deduplication pass (or + * during an index build), so recent version churn affecting the pointed-to + * logical rows is not particularly likely. We may still give a weak signal + * about posting list tuples' entries (by marking just one of its TIDs/entries + * promising), though this is only a possibility in the event of further + * duplicate index tuples in final interval that covers posting list tuple (as + * in the plain tuple case). A weak signal/hint will be useful to the tableam + * when it has no stronger signal to go with for the deletion operation as a + * whole. + * + * The heuristics we use work well in practice because we only need to give + * the tableam the right _general_ idea about where to look. Garbage tends to + * naturally get concentrated in relatively few table blocks with workloads + * that bottom-up deletion targets. The tableam cannot possibly rank all + * available table blocks sensibly based on the hints we provide, but that's + * okay -- only the extremes matter. The tableam just needs to be able to + * predict which few table blocks will have the most tuples that are safe to + * delete for each deletion operation, with low variance across related + * deletion operations. + */ +static void +_bt_bottomupdel_finish_pending(Page page, BTDedupState state, + TM_IndexDeleteOp *delstate) +{ + bool dupinterval = (state->nitems > 1); + + Assert(state->nitems > 0); + Assert(state->nitems <= state->nhtids); + Assert(state->intervals[state->nintervals].baseoff == state->baseoff); + + for (int i = 0; i < state->nitems; i++) + { + OffsetNumber offnum = state->baseoff + i; + ItemId itemid = PageGetItemId(page, offnum); + IndexTuple itup = (IndexTuple) PageGetItem(page, itemid); + TM_IndexDelete *ideltid = &delstate->deltids[delstate->ndeltids]; + TM_IndexStatus *istatus = &delstate->status[delstate->ndeltids]; + + if (!BTreeTupleIsPosting(itup)) + { + /* Simple case: A plain non-pivot tuple */ + ideltid->tid = itup->t_tid; + ideltid->id = delstate->ndeltids; + istatus->idxoffnum = offnum; + istatus->knowndeletable = false; /* for now */ + istatus->promising = dupinterval; /* simple rule */ + istatus->freespace = ItemIdGetLength(itemid) + sizeof(ItemIdData); + + delstate->ndeltids++; + } + else + { + /* + * Complicated case: A posting list tuple. + * + * We make the conservative assumption that there can only be at + * most one affected logical row per posting list tuple. There + * will be at most one promising entry in deltids to represent + * this presumed lone logical row. Note that this isn't even + * considered unless the posting list tuple is also in an interval + * of duplicates -- this complicated rule is just a variant of the + * simple rule used to decide if plain index tuples are promising. + */ + int nitem = BTreeTupleGetNPosting(itup); + bool firstpromising = false; + bool lastpromising = false; + + Assert(_bt_posting_valid(itup)); + + if (dupinterval) + { + /* + * Complicated rule: either the first or last TID in the + * posting list gets marked promising (if any at all) + */ + BlockNumber minblocklist, + midblocklist, + maxblocklist; + ItemPointer mintid, + midtid, + maxtid; + + mintid = BTreeTupleGetHeapTID(itup); + midtid = BTreeTupleGetPostingN(itup, nitem / 2); + maxtid = BTreeTupleGetMaxHeapTID(itup); + minblocklist = ItemPointerGetBlockNumber(mintid); + midblocklist = ItemPointerGetBlockNumber(midtid); + maxblocklist = ItemPointerGetBlockNumber(maxtid); + + /* Only entry with predominant table block can be promising */ + firstpromising = (minblocklist == midblocklist); + lastpromising = (!firstpromising && + midblocklist == maxblocklist); + } + + for (int p = 0; p < nitem; p++) + { + ItemPointer htid = BTreeTupleGetPostingN(itup, p); + + ideltid->tid = *htid; + ideltid->id = delstate->ndeltids; + istatus->idxoffnum = offnum; + istatus->knowndeletable = false; /* for now */ + istatus->promising = false; + if ((firstpromising && p == 0) || + (lastpromising && p == nitem - 1)) + istatus->promising = true; + istatus->freespace = sizeof(ItemPointerData); /* at worst */ + + ideltid++; + istatus++; + delstate->ndeltids++; + } + } + } + + if (dupinterval) + { + state->intervals[state->nintervals].nitems = state->nitems; + state->nintervals++; + } + + /* Reset state for next interval */ + state->nhtids = 0; + state->nitems = 0; + state->phystupsize = 0; +} + /* * Determine if page non-pivot tuples (data items) are all duplicates of the * same value -- if they are, deduplication's "single value" strategy should @@ -622,8 +909,8 @@ _bt_form_posting(IndexTuple base, ItemPointer htids, int nhtids) * Generate a replacement tuple by "updating" a posting list tuple so that it * no longer has TIDs that need to be deleted. * - * Used by VACUUM. Caller's vacposting argument points to the existing - * posting list tuple to be updated. + * Used by both VACUUM and index deletion. Caller's vacposting argument + * points to the existing posting list tuple to be updated. * * On return, caller's vacposting argument will point to final "updated" * tuple, which will be palloc()'d in caller's memory context. diff --git a/src/backend/access/nbtree/nbtinsert.c b/src/backend/access/nbtree/nbtinsert.c index dde43b1415ac1..e3336039125c9 100644 --- a/src/backend/access/nbtree/nbtinsert.c +++ b/src/backend/access/nbtree/nbtinsert.c @@ -3,7 +3,7 @@ * nbtinsert.c * Item insertion in Lehman and Yao btrees for Postgres. * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * @@ -17,9 +17,9 @@ #include "access/nbtree.h" #include "access/nbtxlog.h" -#include "access/tableam.h" #include "access/transam.h" #include "access/xloginsert.h" +#include "lib/qunique.h" #include "miscadmin.h" #include "storage/lmgr.h" #include "storage/predicate.h" @@ -37,6 +37,7 @@ static TransactionId _bt_check_unique(Relation rel, BTInsertState insertstate, static OffsetNumber _bt_findinsertloc(Relation rel, BTInsertState insertstate, bool checkingunique, + bool indexUnchanged, BTStack stack, Relation heapRel); static void _bt_stepright(Relation rel, BTInsertState insertstate, BTStack stack); @@ -60,8 +61,16 @@ static inline bool _bt_pgaddtup(Page page, Size itemsize, IndexTuple itup, OffsetNumber itup_off, bool newfirstdataitem); static void _bt_delete_or_dedup_one_page(Relation rel, Relation heapRel, BTInsertState insertstate, - bool lpdeadonly, bool checkingunique, - bool uniquedup); + bool simpleonly, bool checkingunique, + bool uniquedup, bool indexUnchanged); +static void _bt_simpledel_pass(Relation rel, Buffer buffer, Relation heapRel, + OffsetNumber *deletable, int ndeletable, + IndexTuple newitem, OffsetNumber minoff, + OffsetNumber maxoff); +static BlockNumber *_bt_deadblocks(Page page, OffsetNumber *deletable, + int ndeletable, IndexTuple newitem, + int *nblocks); +static inline int _bt_blk_cmp(const void *arg1, const void *arg2); /* * _bt_doinsert() -- Handle insertion of a single index tuple in the tree. @@ -75,6 +84,11 @@ static void _bt_delete_or_dedup_one_page(Relation rel, Relation heapRel, * For UNIQUE_CHECK_EXISTING we merely run the duplicate check, and * don't actually insert. * + * indexUnchanged executor hint indicates if itup is from an + * UPDATE that didn't logically change the indexed value, but + * must nevertheless have a new entry to point to a successor + * version. + * * The result value is only significant for UNIQUE_CHECK_PARTIAL: * it must be true if the entry is known unique, else false. * (In the current implementation we'll also return true after a @@ -83,7 +97,8 @@ static void _bt_delete_or_dedup_one_page(Relation rel, Relation heapRel, */ bool _bt_doinsert(Relation rel, IndexTuple itup, - IndexUniqueCheck checkUnique, Relation heapRel) + IndexUniqueCheck checkUnique, bool indexUnchanged, + Relation heapRel) { bool is_unique = false; BTInsertStateData insertstate; @@ -238,7 +253,7 @@ _bt_doinsert(Relation rel, IndexTuple itup, * checkingunique. */ newitemoff = _bt_findinsertloc(rel, &insertstate, checkingunique, - stack, heapRel); + indexUnchanged, stack, heapRel); _bt_insertonpg(rel, itup_key, insertstate.buf, InvalidBuffer, stack, itup, insertstate.itemsz, newitemoff, insertstate.postingoff, false); @@ -480,11 +495,7 @@ _bt_check_unique(Relation rel, BTInsertState insertstate, Relation heapRel, * items as quickly as we can. We only apply _bt_compare() when * we get to a non-killed item. We could reuse the bounds to * avoid _bt_compare() calls for known equal tuples, but it - * doesn't seem worth it. Workloads with heavy update activity - * tend to have many deduplication passes, so we'll often avoid - * most of those comparisons, too (we call _bt_compare() when the - * posting list tuple is initially encountered, though not when - * processing later TIDs from the same tuple). + * doesn't seem worth it. */ if (!inposting) curitemid = PageGetItemId(page, offset); @@ -777,6 +788,17 @@ _bt_check_unique(Relation rel, BTInsertState insertstate, Relation heapRel, * room for the new tuple, this function moves right, trying to find a * legal page that does.) * + * If 'indexUnchanged' is true, this is for an UPDATE that didn't + * logically change the indexed value, but must nevertheless have a new + * entry to point to a successor version. This hint from the executor + * will influence our behavior when the page might have to be split and + * we must consider our options. Bottom-up index deletion can avoid + * pathological version-driven page splits, but we only want to go to the + * trouble of trying it when we already have moderate confidence that + * it's appropriate. The hint should not significantly affect our + * behavior over time unless practically all inserts on to the leaf page + * get the hint. + * * On exit, insertstate buffer contains the chosen insertion page, and * the offset within that page is returned. If _bt_findinsertloc needed * to move right, the lock and pin on the original page are released, and @@ -793,6 +815,7 @@ static OffsetNumber _bt_findinsertloc(Relation rel, BTInsertState insertstate, bool checkingunique, + bool indexUnchanged, BTStack stack, Relation heapRel) { @@ -817,7 +840,7 @@ _bt_findinsertloc(Relation rel, if (itup_key->heapkeyspace) { /* Keep track of whether checkingunique duplicate seen */ - bool uniquedup = false; + bool uniquedup = indexUnchanged; /* * If we're inserting into a unique index, we may have to walk right @@ -874,14 +897,13 @@ _bt_findinsertloc(Relation rel, } /* - * If the target page is full, see if we can obtain enough space using - * one or more strategies (e.g. erasing LP_DEAD items, deduplication). - * Page splits are expensive, and should only go ahead when truly - * necessary. + * If the target page cannot fit newitem, try to avoid splitting the + * page on insert by performing deletion or deduplication now */ if (PageGetFreeSpace(page) < insertstate->itemsz) _bt_delete_or_dedup_one_page(rel, heapRel, insertstate, false, - checkingunique, uniquedup); + checkingunique, uniquedup, + indexUnchanged); } else { @@ -921,9 +943,9 @@ _bt_findinsertloc(Relation rel, */ if (P_HAS_GARBAGE(opaque)) { - /* Erase LP_DEAD items (won't deduplicate) */ + /* Perform simple deletion */ _bt_delete_or_dedup_one_page(rel, heapRel, insertstate, true, - checkingunique, false); + false, false, false); if (PageGetFreeSpace(page) >= insertstate->itemsz) break; /* OK, now we have enough space */ @@ -970,14 +992,11 @@ _bt_findinsertloc(Relation rel, /* * There is an overlapping posting list tuple with its LP_DEAD bit * set. We don't want to unnecessarily unset its LP_DEAD bit while - * performing a posting list split, so delete all LP_DEAD items early. - * This is the only case where LP_DEAD deletes happen even though - * there is space for newitem on the page. - * - * This can only erase LP_DEAD items (it won't deduplicate). + * performing a posting list split, so perform simple index tuple + * deletion early. */ _bt_delete_or_dedup_one_page(rel, heapRel, insertstate, true, - checkingunique, false); + false, false, false); /* * Do new binary search. New insert location cannot overlap with any @@ -2606,21 +2625,19 @@ _bt_pgaddtup(Page page, } /* - * _bt_delete_or_dedup_one_page - Try to avoid a leaf page split by attempting - * a variety of operations. - * - * There are two operations performed here: deleting items already marked - * LP_DEAD, and deduplication. If both operations fail to free enough space - * for the incoming item then caller will go on to split the page. We always - * attempt our preferred strategy (which is to delete items whose LP_DEAD bit - * are set) first. If that doesn't work out we move on to deduplication. + * _bt_delete_or_dedup_one_page - Try to avoid a leaf page split. * - * Caller's checkingunique and uniquedup arguments help us decide if we should - * perform deduplication, which is primarily useful with low cardinality data, - * but can sometimes absorb version churn. + * There are three operations performed here: simple index deletion, bottom-up + * index deletion, and deduplication. If all three operations fail to free + * enough space for the incoming item then caller will go on to split the + * page. We always consider simple deletion first. If that doesn't work out + * we consider alternatives. Callers that only want us to consider simple + * deletion (without any fallback) ask for that using the 'simpleonly' + * argument. * - * Callers that only want us to look for/delete LP_DEAD items can ask for that - * directly by passing true 'lpdeadonly' argument. + * We usually pick only one alternative "complex" operation when simple + * deletion alone won't prevent a page split. The 'checkingunique', + * 'uniquedup', and 'indexUnchanged' arguments are used for that. * * Note: We used to only delete LP_DEAD items when the BTP_HAS_GARBAGE page * level flag was found set. The flag was useful back when there wasn't @@ -2638,12 +2655,13 @@ _bt_pgaddtup(Page page, static void _bt_delete_or_dedup_one_page(Relation rel, Relation heapRel, BTInsertState insertstate, - bool lpdeadonly, bool checkingunique, - bool uniquedup) + bool simpleonly, bool checkingunique, + bool uniquedup, bool indexUnchanged) { OffsetNumber deletable[MaxIndexTuplesPerPage]; int ndeletable = 0; OffsetNumber offnum, + minoff, maxoff; Buffer buffer = insertstate->buf; BTScanInsert itup_key = insertstate->itup_key; @@ -2651,14 +2669,19 @@ _bt_delete_or_dedup_one_page(Relation rel, Relation heapRel, BTPageOpaque opaque = (BTPageOpaque) PageGetSpecialPointer(page); Assert(P_ISLEAF(opaque)); - Assert(lpdeadonly || itup_key->heapkeyspace); + Assert(simpleonly || itup_key->heapkeyspace); + Assert(!simpleonly || (!checkingunique && !uniquedup && !indexUnchanged)); /* * Scan over all items to see which ones need to be deleted according to - * LP_DEAD flags. + * LP_DEAD flags. We'll usually manage to delete a few extra items that + * are not marked LP_DEAD in passing. Often the extra items that actually + * end up getting deleted are items that would have had their LP_DEAD bit + * set before long anyway (if we opted not to include them as extras). */ + minoff = P_FIRSTDATAKEY(opaque); maxoff = PageGetMaxOffsetNumber(page); - for (offnum = P_FIRSTDATAKEY(opaque); + for (offnum = minoff; offnum <= maxoff; offnum = OffsetNumberNext(offnum)) { @@ -2670,7 +2693,8 @@ _bt_delete_or_dedup_one_page(Relation rel, Relation heapRel, if (ndeletable > 0) { - _bt_delitems_delete(rel, buffer, deletable, ndeletable, heapRel); + _bt_simpledel_pass(rel, buffer, heapRel, deletable, ndeletable, + insertstate->itup, minoff, maxoff); insertstate->bounds_valid = false; /* Return when a page split has already been avoided */ @@ -2682,37 +2706,288 @@ _bt_delete_or_dedup_one_page(Relation rel, Relation heapRel, } /* - * Some callers only want to delete LP_DEAD items. Return early for these - * callers. + * We're done with simple deletion. Return early with callers that only + * call here so that simple deletion can be considered. This includes + * callers that explicitly ask for this and checkingunique callers that + * probably don't have any version churn duplicates on the page. * * Note: The page's BTP_HAS_GARBAGE hint flag may still be set when we * return at this point (or when we go on the try either or both of our * other strategies and they also fail). We do not bother expending a * separate write to clear it, however. Caller will definitely clear it - * when it goes on to split the page (plus deduplication knows to clear - * the flag when it actually modifies the page). + * when it goes on to split the page (note also that the deduplication + * process will clear the flag in passing, just to keep things tidy). */ - if (lpdeadonly) - return; - - /* - * We can get called in the checkingunique case when there is no reason to - * believe that there are any duplicates on the page; we should at least - * still check for LP_DEAD items. If that didn't work out, give up and - * let caller split the page. Deduplication cannot be justified given - * there is no reason to think that there are duplicates. - */ - if (checkingunique && !uniquedup) + if (simpleonly || (checkingunique && !uniquedup)) + { + Assert(!indexUnchanged); return; + } /* Assume bounds about to be invalidated (this is almost certain now) */ insertstate->bounds_valid = false; /* - * Perform deduplication pass, though only when it is enabled for the - * index and known to be safe (it must be an allequalimage index). + * Perform bottom-up index deletion pass when executor hint indicated that + * incoming item is logically unchanged, or for a unique index that is + * known to have physical duplicates for some other reason. (There is a + * large overlap between these two cases for a unique index. It's worth + * having both triggering conditions in order to apply the optimization in + * the event of successive related INSERT and DELETE statements.) + * + * We'll go on to do a deduplication pass when a bottom-up pass fails to + * delete an acceptable amount of free space (a significant fraction of + * the page, or space for the new item, whichever is greater). + * + * Note: Bottom-up index deletion uses the same equality/equivalence + * routines as deduplication internally. However, it does not merge + * together index tuples, so the same correctness considerations do not + * apply. We deliberately omit an index-is-allequalimage test here. */ + if ((indexUnchanged || uniquedup) && + _bt_bottomupdel_pass(rel, buffer, heapRel, insertstate->itemsz)) + return; + + /* Perform deduplication pass (when enabled and index-is-allequalimage) */ if (BTGetDeduplicateItems(rel) && itup_key->allequalimage) _bt_dedup_pass(rel, buffer, heapRel, insertstate->itup, insertstate->itemsz, checkingunique); } + +/* + * _bt_simpledel_pass - Simple index tuple deletion pass. + * + * We delete all LP_DEAD-set index tuples on a leaf page. The offset numbers + * of all such tuples are determined by caller (caller passes these to us as + * its 'deletable' argument). + * + * We might also delete extra index tuples that turn out to be safe to delete + * in passing (though they must be cheap to check in passing to begin with). + * There is no certainty that any extra tuples will be deleted, though. The + * high level goal of the approach we take is to get the most out of each call + * here (without noticeably increasing the per-call overhead compared to what + * we need to do just to be able to delete the page's LP_DEAD-marked index + * tuples). + * + * The number of extra index tuples that turn out to be deletable might + * greatly exceed the number of LP_DEAD-marked index tuples due to various + * locality related effects. For example, it's possible that the total number + * of table blocks (pointed to by all TIDs on the leaf page) is naturally + * quite low, in which case we might end up checking if it's possible to + * delete _most_ index tuples on the page (without the tableam needing to + * access additional table blocks). The tableam will sometimes stumble upon + * _many_ extra deletable index tuples in indexes where this pattern is + * common. + * + * See nbtree/README for further details on simple index tuple deletion. + */ +static void +_bt_simpledel_pass(Relation rel, Buffer buffer, Relation heapRel, + OffsetNumber *deletable, int ndeletable, IndexTuple newitem, + OffsetNumber minoff, OffsetNumber maxoff) +{ + Page page = BufferGetPage(buffer); + BlockNumber *deadblocks; + int ndeadblocks; + TM_IndexDeleteOp delstate; + OffsetNumber offnum; + + /* Get array of table blocks pointed to by LP_DEAD-set tuples */ + deadblocks = _bt_deadblocks(page, deletable, ndeletable, newitem, + &ndeadblocks); + + /* Initialize tableam state that describes index deletion operation */ + delstate.bottomup = false; + delstate.bottomupfreespace = 0; + delstate.ndeltids = 0; + delstate.deltids = palloc(MaxTIDsPerBTreePage * sizeof(TM_IndexDelete)); + delstate.status = palloc(MaxTIDsPerBTreePage * sizeof(TM_IndexStatus)); + + for (offnum = minoff; + offnum <= maxoff; + offnum = OffsetNumberNext(offnum)) + { + ItemId itemid = PageGetItemId(page, offnum); + IndexTuple itup = (IndexTuple) PageGetItem(page, itemid); + TM_IndexDelete *odeltid = &delstate.deltids[delstate.ndeltids]; + TM_IndexStatus *ostatus = &delstate.status[delstate.ndeltids]; + BlockNumber tidblock; + void *match; + + if (!BTreeTupleIsPosting(itup)) + { + tidblock = ItemPointerGetBlockNumber(&itup->t_tid); + match = bsearch(&tidblock, deadblocks, ndeadblocks, + sizeof(BlockNumber), _bt_blk_cmp); + + if (!match) + { + Assert(!ItemIdIsDead(itemid)); + continue; + } + + /* + * TID's table block is among those pointed to by the TIDs from + * LP_DEAD-bit set tuples on page -- add TID to deltids + */ + odeltid->tid = itup->t_tid; + odeltid->id = delstate.ndeltids; + ostatus->idxoffnum = offnum; + ostatus->knowndeletable = ItemIdIsDead(itemid); + ostatus->promising = false; /* unused */ + ostatus->freespace = 0; /* unused */ + + delstate.ndeltids++; + } + else + { + int nitem = BTreeTupleGetNPosting(itup); + + for (int p = 0; p < nitem; p++) + { + ItemPointer tid = BTreeTupleGetPostingN(itup, p); + + tidblock = ItemPointerGetBlockNumber(tid); + match = bsearch(&tidblock, deadblocks, ndeadblocks, + sizeof(BlockNumber), _bt_blk_cmp); + + if (!match) + { + Assert(!ItemIdIsDead(itemid)); + continue; + } + + /* + * TID's table block is among those pointed to by the TIDs + * from LP_DEAD-bit set tuples on page -- add TID to deltids + */ + odeltid->tid = *tid; + odeltid->id = delstate.ndeltids; + ostatus->idxoffnum = offnum; + ostatus->knowndeletable = ItemIdIsDead(itemid); + ostatus->promising = false; /* unused */ + ostatus->freespace = 0; /* unused */ + + odeltid++; + ostatus++; + delstate.ndeltids++; + } + } + } + + pfree(deadblocks); + + Assert(delstate.ndeltids >= ndeletable); + + /* Physically delete LP_DEAD tuples (plus any delete-safe extra TIDs) */ + _bt_delitems_delete_check(rel, buffer, heapRel, &delstate); + + pfree(delstate.deltids); + pfree(delstate.status); +} + +/* + * _bt_deadblocks() -- Get LP_DEAD related table blocks. + * + * Builds sorted and unique-ified array of table block numbers from index + * tuple TIDs whose line pointers are marked LP_DEAD. Also adds the table + * block from incoming newitem just in case it isn't among the LP_DEAD-related + * table blocks. + * + * Always counting the newitem's table block as an LP_DEAD related block makes + * sense because the cost is consistently low; it is practically certain that + * the table block will not incur a buffer miss in tableam. On the other hand + * the benefit is often quite high. There is a decent chance that there will + * be some deletable items from this block, since in general most garbage + * tuples became garbage in the recent past (in many cases this won't be the + * first logical row that core code added to/modified in table block + * recently). + * + * Returns final array, and sets *nblocks to its final size for caller. + */ +static BlockNumber * +_bt_deadblocks(Page page, OffsetNumber *deletable, int ndeletable, + IndexTuple newitem, int *nblocks) +{ + int spacentids, + ntids; + BlockNumber *tidblocks; + + /* + * Accumulate each TID's block in array whose initial size has space for + * one table block per LP_DEAD-set tuple (plus space for the newitem table + * block). Array will only need to grow when there are LP_DEAD-marked + * posting list tuples (which is not that common). + */ + spacentids = ndeletable + 1; + ntids = 0; + tidblocks = (BlockNumber *) palloc(sizeof(BlockNumber) * spacentids); + + /* + * First add the table block for the incoming newitem. This is the one + * case where simple deletion can visit a table block that doesn't have + * any known deletable items. + */ + Assert(!BTreeTupleIsPosting(newitem) && !BTreeTupleIsPivot(newitem)); + tidblocks[ntids++] = ItemPointerGetBlockNumber(&newitem->t_tid); + + for (int i = 0; i < ndeletable; i++) + { + ItemId itemid = PageGetItemId(page, deletable[i]); + IndexTuple itup = (IndexTuple) PageGetItem(page, itemid); + + Assert(ItemIdIsDead(itemid)); + + if (!BTreeTupleIsPosting(itup)) + { + if (ntids + 1 > spacentids) + { + spacentids *= 2; + tidblocks = (BlockNumber *) + repalloc(tidblocks, sizeof(BlockNumber) * spacentids); + } + + tidblocks[ntids++] = ItemPointerGetBlockNumber(&itup->t_tid); + } + else + { + int nposting = BTreeTupleGetNPosting(itup); + + if (ntids + nposting > spacentids) + { + spacentids = Max(spacentids * 2, ntids + nposting); + tidblocks = (BlockNumber *) + repalloc(tidblocks, sizeof(BlockNumber) * spacentids); + } + + for (int j = 0; j < nposting; j++) + { + ItemPointer tid = BTreeTupleGetPostingN(itup, j); + + tidblocks[ntids++] = ItemPointerGetBlockNumber(tid); + } + } + } + + qsort(tidblocks, ntids, sizeof(BlockNumber), _bt_blk_cmp); + *nblocks = qunique(tidblocks, ntids, sizeof(BlockNumber), _bt_blk_cmp); + + return tidblocks; +} + +/* + * _bt_blk_cmp() -- qsort comparison function for _bt_simpledel_pass + */ +static inline int +_bt_blk_cmp(const void *arg1, const void *arg2) +{ + BlockNumber b1 = *((BlockNumber *) arg1); + BlockNumber b2 = *((BlockNumber *) arg2); + + if (b1 < b2) + return -1; + else if (b1 > b2) + return 1; + + return 0; +} diff --git a/src/backend/access/nbtree/nbtpage.c b/src/backend/access/nbtree/nbtpage.c index 793434c026ca0..41dc3f8fdff6c 100644 --- a/src/backend/access/nbtree/nbtpage.c +++ b/src/backend/access/nbtree/nbtpage.c @@ -4,7 +4,7 @@ * BTree-specific page management code for the Postgres btree access * method. * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * @@ -38,8 +38,13 @@ static BTMetaPageData *_bt_getmeta(Relation rel, Buffer metabuf); static void _bt_log_reuse_page(Relation rel, BlockNumber blkno, TransactionId latestRemovedXid); -static TransactionId _bt_xid_horizon(Relation rel, Relation heapRel, Page page, - OffsetNumber *deletable, int ndeletable); +static void _bt_delitems_delete(Relation rel, Buffer buf, + TransactionId latestRemovedXid, + OffsetNumber *deletable, int ndeletable, + BTVacuumPosting *updatable, int nupdatable); +static char *_bt_delitems_update(BTVacuumPosting *updatable, int nupdatable, + OffsetNumber *updatedoffsets, + Size *updatedbuflen, bool needswal); static bool _bt_mark_page_halfdead(Relation rel, Buffer leafbuf, BTStack stack); static bool _bt_unlink_halfdead_page(Relation rel, Buffer leafbuf, @@ -1110,15 +1115,16 @@ _bt_page_recyclable(Page page) * sorted in ascending order. * * Routine deals with deleting TIDs when some (but not all) of the heap TIDs - * in an existing posting list item are to be removed by VACUUM. This works - * by updating/overwriting an existing item with caller's new version of the - * item (a version that lacks the TIDs that are to be deleted). + * in an existing posting list item are to be removed. This works by + * updating/overwriting an existing item with caller's new version of the item + * (a version that lacks the TIDs that are to be deleted). * * We record VACUUMs and b-tree deletes differently in WAL. Deletes must - * generate their own latestRemovedXid by accessing the heap directly, whereas - * VACUUMs rely on the initial heap scan taking care of it indirectly. Also, - * only VACUUM can perform granular deletes of individual TIDs in posting list - * tuples. + * generate their own latestRemovedXid by accessing the table directly, + * whereas VACUUMs rely on the initial VACUUM table scan performing + * WAL-logging that takes care of the issue for the table's indexes + * indirectly. Also, we remove the VACUUM cycle ID from pages, which b-tree + * deletes don't do. */ void _bt_delitems_vacuum(Relation rel, Buffer buf, @@ -1127,7 +1133,7 @@ _bt_delitems_vacuum(Relation rel, Buffer buf, { Page page = BufferGetPage(buf); BTPageOpaque opaque; - Size itemsz; + bool needswal = RelationNeedsWAL(rel); char *updatedbuf = NULL; Size updatedbuflen = 0; OffsetNumber updatedoffsets[MaxIndexTuplesPerPage]; @@ -1135,45 +1141,11 @@ _bt_delitems_vacuum(Relation rel, Buffer buf, /* Shouldn't be called unless there's something to do */ Assert(ndeletable > 0 || nupdatable > 0); - for (int i = 0; i < nupdatable; i++) - { - /* Replace work area IndexTuple with updated version */ - _bt_update_posting(updatable[i]); - - /* Maintain array of updatable page offsets for WAL record */ - updatedoffsets[i] = updatable[i]->updatedoffset; - } - - /* XLOG stuff -- allocate and fill buffer before critical section */ - if (nupdatable > 0 && RelationNeedsWAL(rel)) - { - Size offset = 0; - - for (int i = 0; i < nupdatable; i++) - { - BTVacuumPosting vacposting = updatable[i]; - - itemsz = SizeOfBtreeUpdate + - vacposting->ndeletedtids * sizeof(uint16); - updatedbuflen += itemsz; - } - - updatedbuf = palloc(updatedbuflen); - for (int i = 0; i < nupdatable; i++) - { - BTVacuumPosting vacposting = updatable[i]; - xl_btree_update update; - - update.ndeletedtids = vacposting->ndeletedtids; - memcpy(updatedbuf + offset, &update.ndeletedtids, - SizeOfBtreeUpdate); - offset += SizeOfBtreeUpdate; - - itemsz = update.ndeletedtids * sizeof(uint16); - memcpy(updatedbuf + offset, vacposting->deletetids, itemsz); - offset += itemsz; - } - } + /* Generate new version of posting lists without deleted TIDs */ + if (nupdatable > 0) + updatedbuf = _bt_delitems_update(updatable, nupdatable, + updatedoffsets, &updatedbuflen, + needswal); /* No ereport(ERROR) until changes are logged */ START_CRIT_SECTION(); @@ -1194,6 +1166,7 @@ _bt_delitems_vacuum(Relation rel, Buffer buf, { OffsetNumber updatedoffset = updatedoffsets[i]; IndexTuple itup; + Size itemsz; itup = updatable[i]->itup; itemsz = MAXALIGN(IndexTupleSize(itup)); @@ -1218,7 +1191,7 @@ _bt_delitems_vacuum(Relation rel, Buffer buf, * Clear the BTP_HAS_GARBAGE page flag. * * This flag indicates the presence of LP_DEAD items on the page (though - * not reliably). Note that we only trust it with pg_upgrade'd + * not reliably). Note that we only rely on it with pg_upgrade'd * !heapkeyspace indexes. That's why clearing it here won't usually * interfere with _bt_delitems_delete(). */ @@ -1227,7 +1200,7 @@ _bt_delitems_vacuum(Relation rel, Buffer buf, MarkBufferDirty(buf); /* XLOG stuff */ - if (RelationNeedsWAL(rel)) + if (needswal) { XLogRecPtr recptr; xl_btree_vacuum xlrec_vacuum; @@ -1260,7 +1233,7 @@ _bt_delitems_vacuum(Relation rel, Buffer buf, /* can't leak memory here */ if (updatedbuf != NULL) pfree(updatedbuf); - /* free tuples generated by calling _bt_update_posting() */ + /* free tuples allocated within _bt_delitems_update() */ for (int i = 0; i < nupdatable; i++) pfree(updatable[i]->itup); } @@ -1269,40 +1242,65 @@ _bt_delitems_vacuum(Relation rel, Buffer buf, * Delete item(s) from a btree leaf page during single-page cleanup. * * This routine assumes that the caller has pinned and write locked the - * buffer. Also, the given deletable array *must* be sorted in ascending - * order. + * buffer. Also, the given deletable and updatable arrays *must* be sorted in + * ascending order. + * + * Routine deals with deleting TIDs when some (but not all) of the heap TIDs + * in an existing posting list item are to be removed. This works by + * updating/overwriting an existing item with caller's new version of the item + * (a version that lacks the TIDs that are to be deleted). * * This is nearly the same as _bt_delitems_vacuum as far as what it does to - * the page, but it needs to generate its own latestRemovedXid by accessing - * the heap. This is used by the REDO routine to generate recovery conflicts. - * Also, it doesn't handle posting list tuples unless the entire tuple can be - * deleted as a whole (since there is only one LP_DEAD bit per line pointer). + * the page, but it needs its own latestRemovedXid from caller (caller gets + * this from tableam). This is used by the REDO routine to generate recovery + * conflicts. The other difference is that only _bt_delitems_vacuum will + * clear page's VACUUM cycle ID. */ -void -_bt_delitems_delete(Relation rel, Buffer buf, +static void +_bt_delitems_delete(Relation rel, Buffer buf, TransactionId latestRemovedXid, OffsetNumber *deletable, int ndeletable, - Relation heapRel) + BTVacuumPosting *updatable, int nupdatable) { Page page = BufferGetPage(buf); BTPageOpaque opaque; - TransactionId latestRemovedXid = InvalidTransactionId; + bool needswal = RelationNeedsWAL(rel); + char *updatedbuf = NULL; + Size updatedbuflen = 0; + OffsetNumber updatedoffsets[MaxIndexTuplesPerPage]; /* Shouldn't be called unless there's something to do */ - Assert(ndeletable > 0); + Assert(ndeletable > 0 || nupdatable > 0); - if (XLogStandbyInfoActive() && RelationNeedsWAL(rel)) - latestRemovedXid = - _bt_xid_horizon(rel, heapRel, page, deletable, ndeletable); + /* Generate new versions of posting lists without deleted TIDs */ + if (nupdatable > 0) + updatedbuf = _bt_delitems_update(updatable, nupdatable, + updatedoffsets, &updatedbuflen, + needswal); /* No ereport(ERROR) until changes are logged */ START_CRIT_SECTION(); - /* Fix the page */ - PageIndexMultiDelete(page, deletable, ndeletable); + /* Handle updates and deletes just like _bt_delitems_vacuum */ + for (int i = 0; i < nupdatable; i++) + { + OffsetNumber updatedoffset = updatedoffsets[i]; + IndexTuple itup; + Size itemsz; + + itup = updatable[i]->itup; + itemsz = MAXALIGN(IndexTupleSize(itup)); + if (!PageIndexTupleOverwrite(page, updatedoffset, (Item) itup, + itemsz)) + elog(PANIC, "failed to update partially dead item in block %u of index \"%s\"", + BufferGetBlockNumber(buf), RelationGetRelationName(rel)); + } + + if (ndeletable > 0) + PageIndexMultiDelete(page, deletable, ndeletable); /* - * Unlike _bt_delitems_vacuum, we *must not* clear the vacuum cycle ID, - * because this is not called by VACUUM + * Unlike _bt_delitems_vacuum, we *must not* clear the vacuum cycle ID at + * this point. The VACUUM command alone controls vacuum cycle IDs. */ opaque = (BTPageOpaque) PageGetSpecialPointer(page); @@ -1310,7 +1308,7 @@ _bt_delitems_delete(Relation rel, Buffer buf, * Clear the BTP_HAS_GARBAGE page flag. * * This flag indicates the presence of LP_DEAD items on the page (though - * not reliably). Note that we only trust it with pg_upgrade'd + * not reliably). Note that we only rely on it with pg_upgrade'd * !heapkeyspace indexes. */ opaque->btpo_flags &= ~BTP_HAS_GARBAGE; @@ -1318,25 +1316,29 @@ _bt_delitems_delete(Relation rel, Buffer buf, MarkBufferDirty(buf); /* XLOG stuff */ - if (RelationNeedsWAL(rel)) + if (needswal) { XLogRecPtr recptr; xl_btree_delete xlrec_delete; xlrec_delete.latestRemovedXid = latestRemovedXid; xlrec_delete.ndeleted = ndeletable; + xlrec_delete.nupdated = nupdatable; XLogBeginInsert(); XLogRegisterBuffer(0, buf, REGBUF_STANDARD); XLogRegisterData((char *) &xlrec_delete, SizeOfBtreeDelete); - /* - * The deletable array is not in the buffer, but pretend that it is. - * When XLogInsert stores the whole buffer, the array need not be - * stored too. - */ - XLogRegisterBufData(0, (char *) deletable, - ndeletable * sizeof(OffsetNumber)); + if (ndeletable > 0) + XLogRegisterBufData(0, (char *) deletable, + ndeletable * sizeof(OffsetNumber)); + + if (nupdatable > 0) + { + XLogRegisterBufData(0, (char *) updatedoffsets, + nupdatable * sizeof(OffsetNumber)); + XLogRegisterBufData(0, updatedbuf, updatedbuflen); + } recptr = XLogInsert(RM_BTREE_ID, XLOG_BTREE_DELETE); @@ -1344,83 +1346,313 @@ _bt_delitems_delete(Relation rel, Buffer buf, } END_CRIT_SECTION(); + + /* can't leak memory here */ + if (updatedbuf != NULL) + pfree(updatedbuf); + /* free tuples allocated within _bt_delitems_update() */ + for (int i = 0; i < nupdatable; i++) + pfree(updatable[i]->itup); } /* - * Get the latestRemovedXid from the table entries pointed to by the non-pivot - * tuples being deleted. + * Set up state needed to delete TIDs from posting list tuples via "updating" + * the tuple. Performs steps common to both _bt_delitems_vacuum and + * _bt_delitems_delete. These steps must take place before each function's + * critical section begins. + * + * updatabable and nupdatable are inputs, though note that we will use + * _bt_update_posting() to replace the original itup with a pointer to a final + * version in palloc()'d memory. Caller should free the tuples when its done. + * + * The first nupdatable entries from updatedoffsets are set to the page offset + * number for posting list tuples that caller updates. This is mostly useful + * because caller may need to WAL-log the page offsets (though we always do + * this for caller out of convenience). * - * This is a specialized version of index_compute_xid_horizon_for_tuples(). - * It's needed because btree tuples don't always store table TID using the - * standard index tuple header field. + * Returns buffer consisting of an array of xl_btree_update structs that + * describe the steps we perform here for caller (though only when needswal is + * true). Also sets *updatedbuflen to the final size of the buffer. This + * buffer is used by caller when WAL logging is required. */ -static TransactionId -_bt_xid_horizon(Relation rel, Relation heapRel, Page page, - OffsetNumber *deletable, int ndeletable) +static char * +_bt_delitems_update(BTVacuumPosting *updatable, int nupdatable, + OffsetNumber *updatedoffsets, Size *updatedbuflen, + bool needswal) { - TransactionId latestRemovedXid = InvalidTransactionId; - int spacenhtids; - int nhtids; - ItemPointer htids; - - /* Array will grow iff there are posting list tuples to consider */ - spacenhtids = ndeletable; - nhtids = 0; - htids = (ItemPointer) palloc(sizeof(ItemPointerData) * spacenhtids); - for (int i = 0; i < ndeletable; i++) + char *updatedbuf = NULL; + Size buflen = 0; + + /* Shouldn't be called unless there's something to do */ + Assert(nupdatable > 0); + + for (int i = 0; i < nupdatable; i++) { - ItemId itemid; - IndexTuple itup; + BTVacuumPosting vacposting = updatable[i]; + Size itemsz; - itemid = PageGetItemId(page, deletable[i]); - itup = (IndexTuple) PageGetItem(page, itemid); + /* Replace work area IndexTuple with updated version */ + _bt_update_posting(vacposting); - Assert(ItemIdIsDead(itemid)); - Assert(!BTreeTupleIsPivot(itup)); + /* Keep track of size of xl_btree_update for updatedbuf in passing */ + itemsz = SizeOfBtreeUpdate + vacposting->ndeletedtids * sizeof(uint16); + buflen += itemsz; - if (!BTreeTupleIsPosting(itup)) + /* Build updatedoffsets buffer in passing */ + updatedoffsets[i] = vacposting->updatedoffset; + } + + /* XLOG stuff */ + if (needswal) + { + Size offset = 0; + + /* Allocate, set final size for caller */ + updatedbuf = palloc(buflen); + *updatedbuflen = buflen; + for (int i = 0; i < nupdatable; i++) { - if (nhtids + 1 > spacenhtids) - { - spacenhtids *= 2; - htids = (ItemPointer) - repalloc(htids, sizeof(ItemPointerData) * spacenhtids); - } + BTVacuumPosting vacposting = updatable[i]; + Size itemsz; + xl_btree_update update; + + update.ndeletedtids = vacposting->ndeletedtids; + memcpy(updatedbuf + offset, &update.ndeletedtids, + SizeOfBtreeUpdate); + offset += SizeOfBtreeUpdate; - Assert(ItemPointerIsValid(&itup->t_tid)); - ItemPointerCopy(&itup->t_tid, &htids[nhtids]); - nhtids++; + itemsz = update.ndeletedtids * sizeof(uint16); + memcpy(updatedbuf + offset, vacposting->deletetids, itemsz); + offset += itemsz; } - else + } + + return updatedbuf; +} + +/* + * Comparator used by _bt_delitems_delete_check() to restore deltids array + * back to its original leaf-page-wise sort order + */ +static int +_bt_delitems_cmp(const void *a, const void *b) +{ + TM_IndexDelete *indexdelete1 = (TM_IndexDelete *) a; + TM_IndexDelete *indexdelete2 = (TM_IndexDelete *) b; + + if (indexdelete1->id > indexdelete2->id) + return 1; + if (indexdelete1->id < indexdelete2->id) + return -1; + + Assert(false); + + return 0; +} + +/* + * Try to delete item(s) from a btree leaf page during single-page cleanup. + * + * nbtree interface to table_index_delete_tuples(). Deletes a subset of index + * tuples from caller's deltids array: those whose TIDs are found safe to + * delete by the tableam (or already marked LP_DEAD in index, and so already + * known to be deletable by our simple index deletion caller). We physically + * delete index tuples from buf leaf page last of all (for index tuples where + * that is known to be safe following our table_index_delete_tuples() call). + * + * Simple index deletion caller only includes TIDs from index tuples marked + * LP_DEAD, as well as extra TIDs it found on the same leaf page that can be + * included without increasing the total number of distinct table blocks for + * the deletion operation as a whole. This approach often allows us to delete + * some extra index tuples that were practically free for tableam to check in + * passing (when they actually turn out to be safe to delete). It probably + * only makes sense for the tableam to go ahead with these extra checks when + * it is block-orientated (otherwise the checks probably won't be practically + * free, which we rely on). The tableam interface requires the tableam side + * to handle the problem, though, so this is okay (we as an index AM are free + * to make the simplifying assumption that all tableams must be block-based). + * + * Bottom-up index deletion caller provides all the TIDs from the leaf page, + * without expecting that tableam will check most of them. The tableam has + * considerable discretion around which entries/blocks it checks. Our role in + * costing the bottom-up deletion operation is strictly advisory. + * + * Note: Caller must have added deltids entries (i.e. entries that go in + * delstate's main array) in leaf-page-wise order: page offset number order, + * TID order among entries taken from the same posting list tuple (tiebreak on + * TID). This order is convenient to work with here. + * + * Note: We also rely on the id field of each deltids element "capturing" this + * original leaf-page-wise order. That is, we expect to be able to get back + * to the original leaf-page-wise order just by sorting deltids on the id + * field (tableam will sort deltids for its own reasons, so we'll need to put + * it back in leaf-page-wise order afterwards). + */ +void +_bt_delitems_delete_check(Relation rel, Buffer buf, Relation heapRel, + TM_IndexDeleteOp *delstate) +{ + Page page = BufferGetPage(buf); + TransactionId latestRemovedXid; + OffsetNumber postingidxoffnum = InvalidOffsetNumber; + int ndeletable = 0, + nupdatable = 0; + OffsetNumber deletable[MaxIndexTuplesPerPage]; + BTVacuumPosting updatable[MaxIndexTuplesPerPage]; + + /* Use tableam interface to determine which tuples to delete first */ + latestRemovedXid = table_index_delete_tuples(heapRel, delstate); + + /* Should not WAL-log latestRemovedXid unless it's required */ + if (!XLogStandbyInfoActive() || !RelationNeedsWAL(rel)) + latestRemovedXid = InvalidTransactionId; + + /* + * Construct a leaf-page-wise description of what _bt_delitems_delete() + * needs to do to physically delete index tuples from the page. + * + * Must sort deltids array to restore leaf-page-wise order (original order + * before call to tableam). This is the order that the loop expects. + * + * Note that deltids array might be a lot smaller now. It might even have + * no entries at all (with bottom-up deletion caller), in which case there + * is nothing left to do. + */ + qsort(delstate->deltids, delstate->ndeltids, sizeof(TM_IndexDelete), + _bt_delitems_cmp); + if (delstate->ndeltids == 0) + { + Assert(delstate->bottomup); + return; + } + + /* We definitely have to delete at least one index tuple (or one TID) */ + for (int i = 0; i < delstate->ndeltids; i++) + { + TM_IndexStatus *dstatus = delstate->status + delstate->deltids[i].id; + OffsetNumber idxoffnum = dstatus->idxoffnum; + ItemId itemid = PageGetItemId(page, idxoffnum); + IndexTuple itup = (IndexTuple) PageGetItem(page, itemid); + int nestedi, + nitem; + BTVacuumPosting vacposting; + + Assert(OffsetNumberIsValid(idxoffnum)); + + if (idxoffnum == postingidxoffnum) + { + /* + * This deltid entry is a TID from a posting list tuple that has + * already been completely processed + */ + Assert(BTreeTupleIsPosting(itup)); + Assert(ItemPointerCompare(BTreeTupleGetHeapTID(itup), + &delstate->deltids[i].tid) < 0); + Assert(ItemPointerCompare(BTreeTupleGetMaxHeapTID(itup), + &delstate->deltids[i].tid) >= 0); + continue; + } + + if (!BTreeTupleIsPosting(itup)) + { + /* Plain non-pivot tuple */ + Assert(ItemPointerEquals(&itup->t_tid, &delstate->deltids[i].tid)); + if (dstatus->knowndeletable) + deletable[ndeletable++] = idxoffnum; + continue; + } + + /* + * itup is a posting list tuple whose lowest deltids entry (which may + * or may not be for the first TID from itup) is considered here now. + * We should process all of the deltids entries for the posting list + * together now, though (not just the lowest). Remember to skip over + * later itup-related entries during later iterations of outermost + * loop. + */ + postingidxoffnum = idxoffnum; /* Remember work in outermost loop */ + nestedi = i; /* Initialize for first itup deltids entry */ + vacposting = NULL; /* Describes final action for itup */ + nitem = BTreeTupleGetNPosting(itup); + for (int p = 0; p < nitem; p++) { - int nposting = BTreeTupleGetNPosting(itup); + ItemPointer ptid = BTreeTupleGetPostingN(itup, p); + int ptidcmp = -1; - if (nhtids + nposting > spacenhtids) + /* + * This nested loop reuses work across ptid TIDs taken from itup. + * We take advantage of the fact that both itup's TIDs and deltids + * entries (within a single itup/posting list grouping) must both + * be in ascending TID order. + */ + for (; nestedi < delstate->ndeltids; nestedi++) { - spacenhtids = Max(spacenhtids * 2, nhtids + nposting); - htids = (ItemPointer) - repalloc(htids, sizeof(ItemPointerData) * spacenhtids); + TM_IndexDelete *tcdeltid = &delstate->deltids[nestedi]; + TM_IndexStatus *tdstatus = (delstate->status + tcdeltid->id); + + /* Stop once we get past all itup related deltids entries */ + Assert(tdstatus->idxoffnum >= idxoffnum); + if (tdstatus->idxoffnum != idxoffnum) + break; + + /* Skip past non-deletable itup related entries up front */ + if (!tdstatus->knowndeletable) + continue; + + /* Entry is first partial ptid match (or an exact match)? */ + ptidcmp = ItemPointerCompare(&tcdeltid->tid, ptid); + if (ptidcmp >= 0) + { + /* Greater than or equal (partial or exact) match... */ + break; + } } - for (int j = 0; j < nposting; j++) - { - ItemPointer htid = BTreeTupleGetPostingN(itup, j); + /* ...exact ptid match to a deletable deltids entry? */ + if (ptidcmp != 0) + continue; - Assert(ItemPointerIsValid(htid)); - ItemPointerCopy(htid, &htids[nhtids]); - nhtids++; + /* Exact match for deletable deltids entry -- ptid gets deleted */ + if (vacposting == NULL) + { + vacposting = palloc(offsetof(BTVacuumPostingData, deletetids) + + nitem * sizeof(uint16)); + vacposting->itup = itup; + vacposting->updatedoffset = idxoffnum; + vacposting->ndeletedtids = 0; } + vacposting->deletetids[vacposting->ndeletedtids++] = p; } - } - Assert(nhtids >= ndeletable); + /* Final decision on itup, a posting list tuple */ - latestRemovedXid = - table_compute_xid_horizon_for_tuples(heapRel, htids, nhtids); + if (vacposting == NULL) + { + /* No TIDs to delete from itup -- do nothing */ + } + else if (vacposting->ndeletedtids == nitem) + { + /* Straight delete of itup (to delete all TIDs) */ + deletable[ndeletable++] = idxoffnum; + /* Turns out we won't need granular information */ + pfree(vacposting); + } + else + { + /* Delete some (but not all) TIDs from itup */ + Assert(vacposting->ndeletedtids > 0 && + vacposting->ndeletedtids < nitem); + updatable[nupdatable++] = vacposting; + } + } - pfree(htids); + /* Physically delete tuples (or TIDs) using deletable (or updatable) */ + _bt_delitems_delete(rel, buf, latestRemovedXid, deletable, ndeletable, + updatable, nupdatable); - return latestRemovedXid; + /* be tidy */ + for (int i = 0; i < nupdatable; i++) + pfree(updatable[i]); } /* diff --git a/src/backend/access/nbtree/nbtree.c b/src/backend/access/nbtree/nbtree.c index 0abec1079830d..289bd3c15daa0 100644 --- a/src/backend/access/nbtree/nbtree.c +++ b/src/backend/access/nbtree/nbtree.c @@ -8,7 +8,7 @@ * This file contains only the public interface routines. * * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION @@ -199,6 +199,7 @@ bool btinsert(Relation rel, Datum *values, bool *isnull, ItemPointer ht_ctid, Relation heapRel, IndexUniqueCheck checkUnique, + bool indexUnchanged, IndexInfo *indexInfo) { bool result; @@ -208,7 +209,7 @@ btinsert(Relation rel, Datum *values, bool *isnull, itup = index_form_tuple(RelationGetDescr(rel), values, isnull); itup->t_tid = *ht_ctid; - result = _bt_doinsert(rel, itup, checkUnique, heapRel); + result = _bt_doinsert(rel, itup, checkUnique, indexUnchanged, heapRel); pfree(itup); @@ -1281,10 +1282,10 @@ btvacuumpage(BTVacState *vstate, BlockNumber scanblkno) * as long as the callback function only considers whether the * index tuple refers to pre-cutoff heap tuples that were * certainly already pruned away during VACUUM's initial heap - * scan by the time we get here. (XLOG_HEAP2_CLEANUP_INFO - * records produce conflicts using a latestRemovedXid value - * for the entire VACUUM, so there is no need to produce our - * own conflict now.) + * scan by the time we get here. (heapam's XLOG_HEAP2_CLEAN + * and XLOG_HEAP2_CLEANUP_INFO records produce conflicts using + * a latestRemovedXid value for the pointed-to heap tuples, so + * there is no need to produce our own conflict now.) * * Backends with snapshots acquired after a VACUUM starts but * before it finishes could have visibility cutoff with a diff --git a/src/backend/access/nbtree/nbtsearch.c b/src/backend/access/nbtree/nbtsearch.c index 8f6575fdf15c2..2e3bda8171d77 100644 --- a/src/backend/access/nbtree/nbtsearch.c +++ b/src/backend/access/nbtree/nbtsearch.c @@ -4,7 +4,7 @@ * Search code for postgres btrees. * * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION diff --git a/src/backend/access/nbtree/nbtsort.c b/src/backend/access/nbtree/nbtsort.c index 8730de25ed719..5683daa34d3b7 100644 --- a/src/backend/access/nbtree/nbtsort.c +++ b/src/backend/access/nbtree/nbtsort.c @@ -34,7 +34,7 @@ * This code isn't concerned about the FSM at all. The caller is responsible * for initializing that. * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION @@ -49,7 +49,6 @@ #include "access/parallel.h" #include "access/relscan.h" #include "access/table.h" -#include "access/tableam.h" #include "access/xact.h" #include "access/xlog.h" #include "access/xloginsert.h" @@ -487,17 +486,17 @@ _bt_spools_heapscan(Relation heap, Relation index, BTBuildState *buildstate, * values set by table_index_build_scan */ { - const int index[] = { + const int progress_index[] = { PROGRESS_CREATEIDX_TUPLES_TOTAL, PROGRESS_SCAN_BLOCKS_TOTAL, PROGRESS_SCAN_BLOCKS_DONE }; - const int64 val[] = { + const int64 progress_vals[] = { buildstate->indtuples, 0, 0 }; - pgstat_progress_update_multi_param(3, index, val); + pgstat_progress_update_multi_param(3, progress_index, progress_vals); } /* okay, all heap tuples are spooled */ diff --git a/src/backend/access/nbtree/nbtsplitloc.c b/src/backend/access/nbtree/nbtsplitloc.c index ef6dd1cf1920b..3485e93ef6470 100644 --- a/src/backend/access/nbtree/nbtsplitloc.c +++ b/src/backend/access/nbtree/nbtsplitloc.c @@ -3,7 +3,7 @@ * nbtsplitloc.c * Choose split point code for Postgres btree implementation. * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * diff --git a/src/backend/access/nbtree/nbtutils.c b/src/backend/access/nbtree/nbtutils.c index 2f5f14e527dd0..d5243107239bb 100644 --- a/src/backend/access/nbtree/nbtutils.c +++ b/src/backend/access/nbtree/nbtutils.c @@ -3,7 +3,7 @@ * nbtutils.c * Utility code for Postgres btree implementation. * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * diff --git a/src/backend/access/nbtree/nbtvalidate.c b/src/backend/access/nbtree/nbtvalidate.c index 5be728ad07cff..7acb64ee69895 100644 --- a/src/backend/access/nbtree/nbtvalidate.c +++ b/src/backend/access/nbtree/nbtvalidate.c @@ -3,7 +3,7 @@ * nbtvalidate.c * Opclass validator for btree. * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION diff --git a/src/backend/access/nbtree/nbtxlog.c b/src/backend/access/nbtree/nbtxlog.c index 5135b800af6d3..c1d578cc01609 100644 --- a/src/backend/access/nbtree/nbtxlog.c +++ b/src/backend/access/nbtree/nbtxlog.c @@ -4,7 +4,7 @@ * WAL replay logic for btrees. * * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION @@ -556,6 +556,47 @@ btree_xlog_dedup(XLogReaderState *record) UnlockReleaseBuffer(buf); } +static void +btree_xlog_updates(Page page, OffsetNumber *updatedoffsets, + xl_btree_update *updates, int nupdated) +{ + BTVacuumPosting vacposting; + IndexTuple origtuple; + ItemId itemid; + Size itemsz; + + for (int i = 0; i < nupdated; i++) + { + itemid = PageGetItemId(page, updatedoffsets[i]); + origtuple = (IndexTuple) PageGetItem(page, itemid); + + vacposting = palloc(offsetof(BTVacuumPostingData, deletetids) + + updates->ndeletedtids * sizeof(uint16)); + vacposting->updatedoffset = updatedoffsets[i]; + vacposting->itup = origtuple; + vacposting->ndeletedtids = updates->ndeletedtids; + memcpy(vacposting->deletetids, + (char *) updates + SizeOfBtreeUpdate, + updates->ndeletedtids * sizeof(uint16)); + + _bt_update_posting(vacposting); + + /* Overwrite updated version of tuple */ + itemsz = MAXALIGN(IndexTupleSize(vacposting->itup)); + if (!PageIndexTupleOverwrite(page, updatedoffsets[i], + (Item) vacposting->itup, itemsz)) + elog(PANIC, "failed to update partially dead item"); + + pfree(vacposting->itup); + pfree(vacposting); + + /* advance to next xl_btree_update from array */ + updates = (xl_btree_update *) + ((char *) updates + SizeOfBtreeUpdate + + updates->ndeletedtids * sizeof(uint16)); + } +} + static void btree_xlog_vacuum(XLogReaderState *record) { @@ -589,41 +630,7 @@ btree_xlog_vacuum(XLogReaderState *record) xlrec->nupdated * sizeof(OffsetNumber)); - for (int i = 0; i < xlrec->nupdated; i++) - { - BTVacuumPosting vacposting; - IndexTuple origtuple; - ItemId itemid; - Size itemsz; - - itemid = PageGetItemId(page, updatedoffsets[i]); - origtuple = (IndexTuple) PageGetItem(page, itemid); - - vacposting = palloc(offsetof(BTVacuumPostingData, deletetids) + - updates->ndeletedtids * sizeof(uint16)); - vacposting->updatedoffset = updatedoffsets[i]; - vacposting->itup = origtuple; - vacposting->ndeletedtids = updates->ndeletedtids; - memcpy(vacposting->deletetids, - (char *) updates + SizeOfBtreeUpdate, - updates->ndeletedtids * sizeof(uint16)); - - _bt_update_posting(vacposting); - - /* Overwrite updated version of tuple */ - itemsz = MAXALIGN(IndexTupleSize(vacposting->itup)); - if (!PageIndexTupleOverwrite(page, updatedoffsets[i], - (Item) vacposting->itup, itemsz)) - elog(PANIC, "failed to update partially dead item"); - - pfree(vacposting->itup); - pfree(vacposting); - - /* advance to next xl_btree_update from array */ - updates = (xl_btree_update *) - ((char *) updates + SizeOfBtreeUpdate + - updates->ndeletedtids * sizeof(uint16)); - } + btree_xlog_updates(page, updatedoffsets, updates, xlrec->nupdated); } if (xlrec->ndeleted > 0) @@ -675,7 +682,22 @@ btree_xlog_delete(XLogReaderState *record) page = (Page) BufferGetPage(buffer); - PageIndexMultiDelete(page, (OffsetNumber *) ptr, xlrec->ndeleted); + if (xlrec->nupdated > 0) + { + OffsetNumber *updatedoffsets; + xl_btree_update *updates; + + updatedoffsets = (OffsetNumber *) + (ptr + xlrec->ndeleted * sizeof(OffsetNumber)); + updates = (xl_btree_update *) ((char *) updatedoffsets + + xlrec->nupdated * + sizeof(OffsetNumber)); + + btree_xlog_updates(page, updatedoffsets, updates, xlrec->nupdated); + } + + if (xlrec->ndeleted > 0) + PageIndexMultiDelete(page, (OffsetNumber *) ptr, xlrec->ndeleted); /* Mark the page as not containing any LP_DEAD items */ opaque = (BTPageOpaque) PageGetSpecialPointer(page); diff --git a/src/backend/access/rmgrdesc/brindesc.c b/src/backend/access/rmgrdesc/brindesc.c index 0dc56e5549614..b6265a49bc060 100644 --- a/src/backend/access/rmgrdesc/brindesc.c +++ b/src/backend/access/rmgrdesc/brindesc.c @@ -3,7 +3,7 @@ * brindesc.c * rmgr descriptor routines for BRIN indexes * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * diff --git a/src/backend/access/rmgrdesc/clogdesc.c b/src/backend/access/rmgrdesc/clogdesc.c index fb510e4cd191a..b12f43a1bba28 100644 --- a/src/backend/access/rmgrdesc/clogdesc.c +++ b/src/backend/access/rmgrdesc/clogdesc.c @@ -3,7 +3,7 @@ * clogdesc.c * rmgr descriptor routines for access/transam/clog.c * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * diff --git a/src/backend/access/rmgrdesc/committsdesc.c b/src/backend/access/rmgrdesc/committsdesc.c index c8bdae761abc1..7ebd3d35efd02 100644 --- a/src/backend/access/rmgrdesc/committsdesc.c +++ b/src/backend/access/rmgrdesc/committsdesc.c @@ -3,7 +3,7 @@ * committsdesc.c * rmgr descriptor routines for access/transam/commit_ts.c * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * diff --git a/src/backend/access/rmgrdesc/dbasedesc.c b/src/backend/access/rmgrdesc/dbasedesc.c index 47580feaeae41..26609845aac65 100644 --- a/src/backend/access/rmgrdesc/dbasedesc.c +++ b/src/backend/access/rmgrdesc/dbasedesc.c @@ -3,7 +3,7 @@ * dbasedesc.c * rmgr descriptor routines for commands/dbcommands.c * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * diff --git a/src/backend/access/rmgrdesc/genericdesc.c b/src/backend/access/rmgrdesc/genericdesc.c index f0fd4286195e1..7242d0d214173 100644 --- a/src/backend/access/rmgrdesc/genericdesc.c +++ b/src/backend/access/rmgrdesc/genericdesc.c @@ -4,7 +4,7 @@ * rmgr descriptor routines for access/transam/generic_xlog.c * * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * src/backend/access/rmgrdesc/genericdesc.c diff --git a/src/backend/access/rmgrdesc/gindesc.c b/src/backend/access/rmgrdesc/gindesc.c index 9ab0d8e1f7e7d..ee9e69cdd094e 100644 --- a/src/backend/access/rmgrdesc/gindesc.c +++ b/src/backend/access/rmgrdesc/gindesc.c @@ -3,7 +3,7 @@ * gindesc.c * rmgr descriptor routines for access/transam/gin/ginxlog.c * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * diff --git a/src/backend/access/rmgrdesc/gistdesc.c b/src/backend/access/rmgrdesc/gistdesc.c index de309fb1227e8..8ae31126ebf99 100644 --- a/src/backend/access/rmgrdesc/gistdesc.c +++ b/src/backend/access/rmgrdesc/gistdesc.c @@ -3,7 +3,7 @@ * gistdesc.c * rmgr descriptor routines for access/gist/gistxlog.c * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * diff --git a/src/backend/access/rmgrdesc/hashdesc.c b/src/backend/access/rmgrdesc/hashdesc.c index f7728850419c0..90ccea08e2c49 100644 --- a/src/backend/access/rmgrdesc/hashdesc.c +++ b/src/backend/access/rmgrdesc/hashdesc.c @@ -3,7 +3,7 @@ * hashdesc.c * rmgr descriptor routines for access/hash/hash.c * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * @@ -113,7 +113,7 @@ hash_desc(StringInfo buf, XLogReaderState *record) { xl_hash_vacuum_one_page *xlrec = (xl_hash_vacuum_one_page *) rec; - appendStringInfo(buf, "ntuples %d, latest removed xid %u", + appendStringInfo(buf, "ntuples %d, latestRemovedXid %u", xlrec->ntuples, xlrec->latestRemovedXid); break; diff --git a/src/backend/access/rmgrdesc/heapdesc.c b/src/backend/access/rmgrdesc/heapdesc.c index 3c16e6ef1f211..e60e32b9353de 100644 --- a/src/backend/access/rmgrdesc/heapdesc.c +++ b/src/backend/access/rmgrdesc/heapdesc.c @@ -3,7 +3,7 @@ * heapdesc.c * rmgr descriptor routines for access/heap/heapam.c * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * @@ -125,7 +125,7 @@ heap2_desc(StringInfo buf, XLogReaderState *record) { xl_heap_clean *xlrec = (xl_heap_clean *) rec; - appendStringInfo(buf, "remxid %u", xlrec->latestRemovedXid); + appendStringInfo(buf, "latestRemovedXid %u", xlrec->latestRemovedXid); } else if (info == XLOG_HEAP2_FREEZE_PAGE) { @@ -138,7 +138,7 @@ heap2_desc(StringInfo buf, XLogReaderState *record) { xl_heap_cleanup_info *xlrec = (xl_heap_cleanup_info *) rec; - appendStringInfo(buf, "remxid %u", xlrec->latestRemovedXid); + appendStringInfo(buf, "latestRemovedXid %u", xlrec->latestRemovedXid); } else if (info == XLOG_HEAP2_VISIBLE) { diff --git a/src/backend/access/rmgrdesc/logicalmsgdesc.c b/src/backend/access/rmgrdesc/logicalmsgdesc.c index 83ab93a24be9e..d64ce2e7eff21 100644 --- a/src/backend/access/rmgrdesc/logicalmsgdesc.c +++ b/src/backend/access/rmgrdesc/logicalmsgdesc.c @@ -3,7 +3,7 @@ * logicalmsgdesc.c * rmgr descriptor routines for replication/logical/message.c * - * Portions Copyright (c) 2015-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 2015-2021, PostgreSQL Global Development Group * * * IDENTIFICATION diff --git a/src/backend/access/rmgrdesc/mxactdesc.c b/src/backend/access/rmgrdesc/mxactdesc.c index 4dd6d7d1f4f17..8c37690e659d7 100644 --- a/src/backend/access/rmgrdesc/mxactdesc.c +++ b/src/backend/access/rmgrdesc/mxactdesc.c @@ -3,7 +3,7 @@ * mxactdesc.c * rmgr descriptor routines for access/transam/multixact.c * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * diff --git a/src/backend/access/rmgrdesc/nbtdesc.c b/src/backend/access/rmgrdesc/nbtdesc.c index e099107f91ec9..6e0d6a2b729e8 100644 --- a/src/backend/access/rmgrdesc/nbtdesc.c +++ b/src/backend/access/rmgrdesc/nbtdesc.c @@ -3,7 +3,7 @@ * nbtdesc.c * rmgr descriptor routines for access/nbtree/nbtxlog.c * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * @@ -63,8 +63,8 @@ btree_desc(StringInfo buf, XLogReaderState *record) { xl_btree_delete *xlrec = (xl_btree_delete *) rec; - appendStringInfo(buf, "latestRemovedXid %u; ndeleted %u", - xlrec->latestRemovedXid, xlrec->ndeleted); + appendStringInfo(buf, "latestRemovedXid %u; ndeleted %u; nupdated %u", + xlrec->latestRemovedXid, xlrec->ndeleted, xlrec->nupdated); break; } case XLOG_BTREE_MARK_PAGE_HALFDEAD: diff --git a/src/backend/access/rmgrdesc/relmapdesc.c b/src/backend/access/rmgrdesc/relmapdesc.c index 8a8d59495675e..2f9d4f54ba8b4 100644 --- a/src/backend/access/rmgrdesc/relmapdesc.c +++ b/src/backend/access/rmgrdesc/relmapdesc.c @@ -3,7 +3,7 @@ * relmapdesc.c * rmgr descriptor routines for utils/cache/relmapper.c * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * diff --git a/src/backend/access/rmgrdesc/replorigindesc.c b/src/backend/access/rmgrdesc/replorigindesc.c index 19e14f910bafd..2e29ecc6d576c 100644 --- a/src/backend/access/rmgrdesc/replorigindesc.c +++ b/src/backend/access/rmgrdesc/replorigindesc.c @@ -3,7 +3,7 @@ * replorigindesc.c * rmgr descriptor routines for replication/logical/origin.c * - * Portions Copyright (c) 2015-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 2015-2021, PostgreSQL Global Development Group * * * IDENTIFICATION diff --git a/src/backend/access/rmgrdesc/seqdesc.c b/src/backend/access/rmgrdesc/seqdesc.c index 1cb1e91a3e952..0bd294687b734 100644 --- a/src/backend/access/rmgrdesc/seqdesc.c +++ b/src/backend/access/rmgrdesc/seqdesc.c @@ -3,7 +3,7 @@ * seqdesc.c * rmgr descriptor routines for commands/sequence.c * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * diff --git a/src/backend/access/rmgrdesc/smgrdesc.c b/src/backend/access/rmgrdesc/smgrdesc.c index a7c0cb1bc3154..7755553d57f5f 100644 --- a/src/backend/access/rmgrdesc/smgrdesc.c +++ b/src/backend/access/rmgrdesc/smgrdesc.c @@ -3,7 +3,7 @@ * smgrdesc.c * rmgr descriptor routines for catalog/storage.c * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * diff --git a/src/backend/access/rmgrdesc/spgdesc.c b/src/backend/access/rmgrdesc/spgdesc.c index a5478e3fb45f1..3610dc1b46c89 100644 --- a/src/backend/access/rmgrdesc/spgdesc.c +++ b/src/backend/access/rmgrdesc/spgdesc.c @@ -3,7 +3,7 @@ * spgdesc.c * rmgr descriptor routines for access/spgist/spgxlog.c * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * diff --git a/src/backend/access/rmgrdesc/standbydesc.c b/src/backend/access/rmgrdesc/standbydesc.c index 1ce2664014630..01ee7ac6d2ca5 100644 --- a/src/backend/access/rmgrdesc/standbydesc.c +++ b/src/backend/access/rmgrdesc/standbydesc.c @@ -3,7 +3,7 @@ * standbydesc.c * rmgr descriptor routines for storage/ipc/standby.c * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * diff --git a/src/backend/access/rmgrdesc/tblspcdesc.c b/src/backend/access/rmgrdesc/tblspcdesc.c index 2cd361b2c0025..cb356eaa48f15 100644 --- a/src/backend/access/rmgrdesc/tblspcdesc.c +++ b/src/backend/access/rmgrdesc/tblspcdesc.c @@ -3,7 +3,7 @@ * tblspcdesc.c * rmgr descriptor routines for commands/tablespace.c * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * diff --git a/src/backend/access/rmgrdesc/xactdesc.c b/src/backend/access/rmgrdesc/xactdesc.c index addd95faec140..c6fb1ec572794 100644 --- a/src/backend/access/rmgrdesc/xactdesc.c +++ b/src/backend/access/rmgrdesc/xactdesc.c @@ -3,7 +3,7 @@ * xactdesc.c * rmgr descriptor routines for access/transam/xact.c * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * diff --git a/src/backend/access/rmgrdesc/xlogdesc.c b/src/backend/access/rmgrdesc/xlogdesc.c index 3200f777f5a37..92cc7ea073517 100644 --- a/src/backend/access/rmgrdesc/xlogdesc.c +++ b/src/backend/access/rmgrdesc/xlogdesc.c @@ -3,7 +3,7 @@ * xlogdesc.c * rmgr descriptor routines for access/transam/xlog.c * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * diff --git a/src/backend/access/spgist/spgdoinsert.c b/src/backend/access/spgist/spgdoinsert.c index 934d65b89f2d0..7bd269fd2a0e2 100644 --- a/src/backend/access/spgist/spgdoinsert.c +++ b/src/backend/access/spgist/spgdoinsert.c @@ -4,7 +4,7 @@ * implementation of insert algorithm * * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION diff --git a/src/backend/access/spgist/spginsert.c b/src/backend/access/spgist/spginsert.c index e4508a2b923a2..0ca621450e647 100644 --- a/src/backend/access/spgist/spginsert.c +++ b/src/backend/access/spgist/spginsert.c @@ -5,7 +5,7 @@ * * All the actual insertion logic is in spgdoinsert.c. * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION @@ -207,6 +207,7 @@ bool spginsert(Relation index, Datum *values, bool *isnull, ItemPointer ht_ctid, Relation heapRel, IndexUniqueCheck checkUnique, + bool indexUnchanged, IndexInfo *indexInfo) { SpGistState spgstate; diff --git a/src/backend/access/spgist/spgkdtreeproc.c b/src/backend/access/spgist/spgkdtreeproc.c index 6581238cef2f9..d9b3f6a0ea7e6 100644 --- a/src/backend/access/spgist/spgkdtreeproc.c +++ b/src/backend/access/spgist/spgkdtreeproc.c @@ -4,7 +4,7 @@ * implementation of k-d tree over points for SP-GiST * * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION diff --git a/src/backend/access/spgist/spgproc.c b/src/backend/access/spgist/spgproc.c index 94454f6b70d4f..1bad5d6c06d2e 100644 --- a/src/backend/access/spgist/spgproc.c +++ b/src/backend/access/spgist/spgproc.c @@ -4,7 +4,7 @@ * Common supporting procedures for SP-GiST opclasses. * * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION diff --git a/src/backend/access/spgist/spgquadtreeproc.c b/src/backend/access/spgist/spgquadtreeproc.c index 249b3828fee23..a52d924fdc92c 100644 --- a/src/backend/access/spgist/spgquadtreeproc.c +++ b/src/backend/access/spgist/spgquadtreeproc.c @@ -4,7 +4,7 @@ * implementation of quad tree over points for SP-GiST * * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION diff --git a/src/backend/access/spgist/spgscan.c b/src/backend/access/spgist/spgscan.c index 4d506bfb9a9b8..20e67c3f7d1eb 100644 --- a/src/backend/access/spgist/spgscan.c +++ b/src/backend/access/spgist/spgscan.c @@ -4,7 +4,7 @@ * routines for scanning SP-GiST indexes * * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION diff --git a/src/backend/access/spgist/spgtextproc.c b/src/backend/access/spgist/spgtextproc.c index b5ec81937c4b9..f340555336386 100644 --- a/src/backend/access/spgist/spgtextproc.c +++ b/src/backend/access/spgist/spgtextproc.c @@ -29,7 +29,7 @@ * No new entries ever get pushed into a -2-labeled child, either. * * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION diff --git a/src/backend/access/spgist/spgutils.c b/src/backend/access/spgist/spgutils.c index 64d3ba82887bd..d8b18150612d0 100644 --- a/src/backend/access/spgist/spgutils.c +++ b/src/backend/access/spgist/spgutils.c @@ -4,7 +4,7 @@ * various support functions for SP-GiST * * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION diff --git a/src/backend/access/spgist/spgvacuum.c b/src/backend/access/spgist/spgvacuum.c index e1c58933f979b..0d02a02222e9e 100644 --- a/src/backend/access/spgist/spgvacuum.c +++ b/src/backend/access/spgist/spgvacuum.c @@ -4,7 +4,7 @@ * vacuum for SP-GiST * * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION diff --git a/src/backend/access/spgist/spgvalidate.c b/src/backend/access/spgist/spgvalidate.c index d4f5841e2656b..8bc3889a4de2a 100644 --- a/src/backend/access/spgist/spgvalidate.c +++ b/src/backend/access/spgist/spgvalidate.c @@ -3,7 +3,7 @@ * spgvalidate.c * Opclass validator for SP-GiST. * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION diff --git a/src/backend/access/spgist/spgxlog.c b/src/backend/access/spgist/spgxlog.c index 999d0ca15d568..d40c7b58776d7 100644 --- a/src/backend/access/spgist/spgxlog.c +++ b/src/backend/access/spgist/spgxlog.c @@ -4,7 +4,7 @@ * WAL replay logic for SP-GiST * * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION diff --git a/src/backend/access/table/table.c b/src/backend/access/table/table.c index 7c29091e6c193..545007e6ed480 100644 --- a/src/backend/access/table/table.c +++ b/src/backend/access/table/table.c @@ -3,7 +3,7 @@ * table.c * Generic routines for table related code. * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * diff --git a/src/backend/access/table/tableam.c b/src/backend/access/table/tableam.c index 6438c457161ac..5ea5bdd810433 100644 --- a/src/backend/access/table/tableam.c +++ b/src/backend/access/table/tableam.c @@ -3,7 +3,7 @@ * tableam.c * Table access method routines too big to be inline functions. * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * diff --git a/src/backend/access/table/tableamapi.c b/src/backend/access/table/tableamapi.c index 58de0743ba05d..325ecdc122914 100644 --- a/src/backend/access/table/tableamapi.c +++ b/src/backend/access/table/tableamapi.c @@ -3,7 +3,7 @@ * tableamapi.c * Support routines for API for Postgres table access methods * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * src/backend/access/table/tableamapi.c @@ -66,7 +66,7 @@ GetTableAmRoutine(Oid amhandler) Assert(routine->tuple_tid_valid != NULL); Assert(routine->tuple_get_latest_tid != NULL); Assert(routine->tuple_satisfies_snapshot != NULL); - Assert(routine->compute_xid_horizon_for_tuples != NULL); + Assert(routine->index_delete_tuples != NULL); Assert(routine->tuple_insert != NULL); diff --git a/src/backend/access/table/toast_helper.c b/src/backend/access/table/toast_helper.c index 739b6ae9900ec..fb36151ce55f2 100644 --- a/src/backend/access/table/toast_helper.c +++ b/src/backend/access/table/toast_helper.c @@ -4,7 +4,7 @@ * Helper functions for table AMs implementing compressed or * out-of-line storage of varlena attributes. * - * Copyright (c) 2000-2020, PostgreSQL Global Development Group + * Copyright (c) 2000-2021, PostgreSQL Global Development Group * * IDENTIFICATION * src/backend/access/table/toast_helper.c diff --git a/src/backend/access/tablesample/bernoulli.c b/src/backend/access/tablesample/bernoulli.c index 606730d6cbe85..ae6e4f581145a 100644 --- a/src/backend/access/tablesample/bernoulli.c +++ b/src/backend/access/tablesample/bernoulli.c @@ -13,7 +13,7 @@ * cutoff value computed from the selection probability by BeginSampleScan. * * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION diff --git a/src/backend/access/tablesample/system.c b/src/backend/access/tablesample/system.c index 29b7c0d3c284d..b0869e5039529 100644 --- a/src/backend/access/tablesample/system.c +++ b/src/backend/access/tablesample/system.c @@ -13,7 +13,7 @@ * cutoff value computed from the selection probability by BeginSampleScan. * * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION diff --git a/src/backend/access/tablesample/tablesample.c b/src/backend/access/tablesample/tablesample.c index f0e2f7be44107..02f2a95e84f17 100644 --- a/src/backend/access/tablesample/tablesample.c +++ b/src/backend/access/tablesample/tablesample.c @@ -3,7 +3,7 @@ * tablesample.c * Support functions for TABLESAMPLE feature * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * diff --git a/src/backend/access/transam/clog.c b/src/backend/access/transam/clog.c index 034349aa7b986..6fa4713fb4d8a 100644 --- a/src/backend/access/transam/clog.c +++ b/src/backend/access/transam/clog.c @@ -23,7 +23,7 @@ * for aborts (whether sync or async), since the post-crash assumption would * be that such transactions failed anyway. * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * src/backend/access/transam/clog.c @@ -694,6 +694,7 @@ CLOGShmemInit(void) SimpleLruInit(XactCtl, "Xact", CLOGShmemBuffers(), CLOG_LSNS_PER_PAGE, XactSLRULock, "pg_xact", LWTRANCHE_XACT_BUFFER, SYNC_HANDLER_CLOG); + SlruPagePrecedesUnitTests(XactCtl, CLOG_XACTS_PER_PAGE); } /* @@ -772,11 +773,6 @@ TrimCLOG(void) LWLockAcquire(XactSLRULock, LW_EXCLUSIVE); - /* - * Re-Initialize our idea of the latest page number. - */ - XactCtl->shared->latest_page_number = pageno; - /* * Zero out the remainder of the current clog page. Under normal * circumstances it should be zeroes already, but it seems at least @@ -912,13 +908,22 @@ TruncateCLOG(TransactionId oldestXact, Oid oldestxid_datoid) /* - * Decide which of two CLOG page numbers is "older" for truncation purposes. + * Decide whether a CLOG page number is "older" for truncation purposes. * * We need to use comparison of TransactionIds here in order to do the right - * thing with wraparound XID arithmetic. However, if we are asked about - * page number zero, we don't want to hand InvalidTransactionId to - * TransactionIdPrecedes: it'll get weird about permanent xact IDs. So, - * offset both xids by FirstNormalTransactionId to avoid that. + * thing with wraparound XID arithmetic. However, TransactionIdPrecedes() + * would get weird about permanent xact IDs. So, offset both such that xid1, + * xid2, and xid2 + CLOG_XACTS_PER_PAGE - 1 are all normal XIDs; this offset + * is relevant to page 0 and to the page preceding page 0. + * + * The page containing oldestXact-2^31 is the important edge case. The + * portion of that page equaling or following oldestXact-2^31 is expendable, + * but the portion preceding oldestXact-2^31 is not. When oldestXact-2^31 is + * the first XID of a page and segment, the entire page and segment is + * expendable, and we could truncate the segment. Recognizing that case would + * require making oldestXact, not just the page containing oldestXact, + * available to this callback. The benefit would be rare and small, so we + * don't optimize that edge case. */ static bool CLOGPagePrecedes(int page1, int page2) @@ -927,11 +932,12 @@ CLOGPagePrecedes(int page1, int page2) TransactionId xid2; xid1 = ((TransactionId) page1) * CLOG_XACTS_PER_PAGE; - xid1 += FirstNormalTransactionId; + xid1 += FirstNormalTransactionId + 1; xid2 = ((TransactionId) page2) * CLOG_XACTS_PER_PAGE; - xid2 += FirstNormalTransactionId; + xid2 += FirstNormalTransactionId + 1; - return TransactionIdPrecedes(xid1, xid2); + return (TransactionIdPrecedes(xid1, xid2) && + TransactionIdPrecedes(xid1, xid2 + CLOG_XACTS_PER_PAGE - 1)); } @@ -1000,12 +1006,6 @@ clog_redo(XLogReaderState *record) memcpy(&xlrec, XLogRecGetData(record), sizeof(xl_clog_truncate)); - /* - * During XLOG replay, latest_page_number isn't set up yet; insert a - * suitable value to bypass the sanity test in SimpleLruTruncate. - */ - XactCtl->shared->latest_page_number = xlrec.pageno; - AdvanceOldestClogXid(xlrec.oldestXact); SimpleLruTruncate(XactCtl, xlrec.pageno); diff --git a/src/backend/access/transam/commit_ts.c b/src/backend/access/transam/commit_ts.c index 2fe551f17e776..9f42461e12c2c 100644 --- a/src/backend/access/transam/commit_ts.c +++ b/src/backend/access/transam/commit_ts.c @@ -15,7 +15,7 @@ * re-perform the status update on redo; so we need make no additional XLOG * entry here. * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * src/backend/access/transam/commit_ts.c @@ -557,6 +557,7 @@ CommitTsShmemInit(void) CommitTsSLRULock, "pg_commit_ts", LWTRANCHE_COMMITTS_BUFFER, SYNC_HANDLER_COMMIT_TS); + SlruPagePrecedesUnitTests(CommitTsCtl, COMMIT_TS_XACTS_PER_PAGE); commitTsShared = ShmemInitStruct("CommitTs shared", sizeof(CommitTimestampShared), @@ -927,14 +928,27 @@ AdvanceOldestCommitTsXid(TransactionId oldestXact) /* - * Decide which of two commitTS page numbers is "older" for truncation - * purposes. + * Decide whether a commitTS page number is "older" for truncation purposes. + * Analogous to CLOGPagePrecedes(). * - * We need to use comparison of TransactionIds here in order to do the right - * thing with wraparound XID arithmetic. However, if we are asked about - * page number zero, we don't want to hand InvalidTransactionId to - * TransactionIdPrecedes: it'll get weird about permanent xact IDs. So, - * offset both xids by FirstNormalTransactionId to avoid that. + * At default BLCKSZ, (1 << 31) % COMMIT_TS_XACTS_PER_PAGE == 128. This + * introduces differences compared to CLOG and the other SLRUs having (1 << + * 31) % per_page == 0. This function never tests exactly + * TransactionIdPrecedes(x-2^31, x). When the system reaches xidStopLimit, + * there are two possible counts of page boundaries between oldestXact and the + * latest XID assigned, depending on whether oldestXact is within the first + * 128 entries of its page. Since this function doesn't know the location of + * oldestXact within page2, it returns false for one page that actually is + * expendable. This is a wider (yet still negligible) version of the + * truncation opportunity that CLOGPagePrecedes() cannot recognize. + * + * For the sake of a worked example, number entries with decimal values such + * that page1==1 entries range from 1.0 to 1.999. Let N+0.15 be the number of + * pages that 2^31 entries will span (N is an integer). If oldestXact=N+2.1, + * then the final safe XID assignment leaves newestXact=1.95. We keep page 2, + * because entry=2.85 is the border that toggles whether entries precede the + * last entry of the oldestXact page. While page 2 is expendable at + * oldestXact=N+2.1, it would be precious at oldestXact=N+2.9. */ static bool CommitTsPagePrecedes(int page1, int page2) @@ -943,11 +957,12 @@ CommitTsPagePrecedes(int page1, int page2) TransactionId xid2; xid1 = ((TransactionId) page1) * COMMIT_TS_XACTS_PER_PAGE; - xid1 += FirstNormalTransactionId; + xid1 += FirstNormalTransactionId + 1; xid2 = ((TransactionId) page2) * COMMIT_TS_XACTS_PER_PAGE; - xid2 += FirstNormalTransactionId; + xid2 += FirstNormalTransactionId + 1; - return TransactionIdPrecedes(xid1, xid2); + return (TransactionIdPrecedes(xid1, xid2) && + TransactionIdPrecedes(xid1, xid2 + COMMIT_TS_XACTS_PER_PAGE - 1)); } diff --git a/src/backend/access/transam/generic_xlog.c b/src/backend/access/transam/generic_xlog.c index 5164a1c2f30db..63301a1ab1684 100644 --- a/src/backend/access/transam/generic_xlog.c +++ b/src/backend/access/transam/generic_xlog.c @@ -4,7 +4,7 @@ * Implementation of generic xlog records. * * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * src/backend/access/transam/generic_xlog.c diff --git a/src/backend/access/transam/multixact.c b/src/backend/access/transam/multixact.c index eb8de7cf3293c..7dcfa02323698 100644 --- a/src/backend/access/transam/multixact.c +++ b/src/backend/access/transam/multixact.c @@ -59,7 +59,7 @@ * counter does not fall within the wraparound horizon considering the global * minimum value. * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * src/backend/access/transam/multixact.c @@ -1852,11 +1852,13 @@ MultiXactShmemInit(void) MultiXactOffsetSLRULock, "pg_multixact/offsets", LWTRANCHE_MULTIXACTOFFSET_BUFFER, SYNC_HANDLER_MULTIXACT_OFFSET); + SlruPagePrecedesUnitTests(MultiXactOffsetCtl, MULTIXACT_OFFSETS_PER_PAGE); SimpleLruInit(MultiXactMemberCtl, "MultiXactMember", NUM_MULTIXACTMEMBER_BUFFERS, 0, MultiXactMemberSLRULock, "pg_multixact/members", LWTRANCHE_MULTIXACTMEMBER_BUFFER, SYNC_HANDLER_MULTIXACT_MEMBER); + /* doesn't call SimpleLruTruncate() or meet criteria for unit tests */ /* Initialize our shared state struct */ MultiXactState = ShmemInitStruct("Shared MultiXact State", @@ -2982,6 +2984,14 @@ TruncateMultiXact(MultiXactId newOldestMulti, Oid newOldestMultiDB) * truncate the members SLRU. So we first scan the directory to determine * the earliest offsets page number that we can read without error. * + * When nextMXact is less than one segment away from multiWrapLimit, + * SlruScanDirCbFindEarliest can find some early segment other than the + * actual earliest. (MultiXactOffsetPagePrecedes(EARLIEST, LATEST) + * returns false, because not all pairs of entries have the same answer.) + * That can also arise when an earlier truncation attempt failed unlink() + * or returned early from this function. The only consequence is + * returning early, which wastes space that we could have liberated. + * * NB: It's also possible that the page that oldestMulti is on has already * been truncated away, and we crashed before updating oldestMulti. */ @@ -3096,15 +3106,11 @@ TruncateMultiXact(MultiXactId newOldestMulti, Oid newOldestMultiDB) } /* - * Decide which of two MultiXactOffset page numbers is "older" for truncation - * purposes. + * Decide whether a MultiXactOffset page number is "older" for truncation + * purposes. Analogous to CLOGPagePrecedes(). * - * We need to use comparison of MultiXactId here in order to do the right - * thing with wraparound. However, if we are asked about page number zero, we - * don't want to hand InvalidMultiXactId to MultiXactIdPrecedes: it'll get - * weird. So, offset both multis by FirstMultiXactId to avoid that. - * (Actually, the current implementation doesn't do anything weird with - * InvalidMultiXactId, but there's no harm in leaving this code like this.) + * Offsetting the values is optional, because MultiXactIdPrecedes() has + * translational symmetry. */ static bool MultiXactOffsetPagePrecedes(int page1, int page2) @@ -3113,15 +3119,17 @@ MultiXactOffsetPagePrecedes(int page1, int page2) MultiXactId multi2; multi1 = ((MultiXactId) page1) * MULTIXACT_OFFSETS_PER_PAGE; - multi1 += FirstMultiXactId; + multi1 += FirstMultiXactId + 1; multi2 = ((MultiXactId) page2) * MULTIXACT_OFFSETS_PER_PAGE; - multi2 += FirstMultiXactId; + multi2 += FirstMultiXactId + 1; - return MultiXactIdPrecedes(multi1, multi2); + return (MultiXactIdPrecedes(multi1, multi2) && + MultiXactIdPrecedes(multi1, + multi2 + MULTIXACT_OFFSETS_PER_PAGE - 1)); } /* - * Decide which of two MultiXactMember page numbers is "older" for truncation + * Decide whether a MultiXactMember page number is "older" for truncation * purposes. There is no "invalid offset number" so use the numbers verbatim. */ static bool @@ -3133,7 +3141,9 @@ MultiXactMemberPagePrecedes(int page1, int page2) offset1 = ((MultiXactOffset) page1) * MULTIXACT_MEMBERS_PER_PAGE; offset2 = ((MultiXactOffset) page2) * MULTIXACT_MEMBERS_PER_PAGE; - return MultiXactOffsetPrecedes(offset1, offset2); + return (MultiXactOffsetPrecedes(offset1, offset2) && + MultiXactOffsetPrecedes(offset1, + offset2 + MULTIXACT_MEMBERS_PER_PAGE - 1)); } /* diff --git a/src/backend/access/transam/parallel.c b/src/backend/access/transam/parallel.c index b0426960c786c..3550ef13baa40 100644 --- a/src/backend/access/transam/parallel.c +++ b/src/backend/access/transam/parallel.c @@ -3,7 +3,7 @@ * parallel.c * Infrastructure for launching parallel workers * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION @@ -75,7 +75,7 @@ #define PARALLEL_KEY_PENDING_SYNCS UINT64CONST(0xFFFFFFFFFFFF000B) #define PARALLEL_KEY_REINDEX_STATE UINT64CONST(0xFFFFFFFFFFFF000C) #define PARALLEL_KEY_RELMAPPER_STATE UINT64CONST(0xFFFFFFFFFFFF000D) -#define PARALLEL_KEY_ENUMBLACKLIST UINT64CONST(0xFFFFFFFFFFFF000E) +#define PARALLEL_KEY_UNCOMMITTEDENUMS UINT64CONST(0xFFFFFFFFFFFF000E) /* Fixed-size parallel state. */ typedef struct FixedParallelState @@ -211,7 +211,7 @@ InitializeParallelDSM(ParallelContext *pcxt) Size pendingsyncslen = 0; Size reindexlen = 0; Size relmapperlen = 0; - Size enumblacklistlen = 0; + Size uncommittedenumslen = 0; Size segsize = 0; int i; FixedParallelState *fps; @@ -267,8 +267,8 @@ InitializeParallelDSM(ParallelContext *pcxt) shm_toc_estimate_chunk(&pcxt->estimator, reindexlen); relmapperlen = EstimateRelationMapSpace(); shm_toc_estimate_chunk(&pcxt->estimator, relmapperlen); - enumblacklistlen = EstimateEnumBlacklistSpace(); - shm_toc_estimate_chunk(&pcxt->estimator, enumblacklistlen); + uncommittedenumslen = EstimateUncommittedEnumsSpace(); + shm_toc_estimate_chunk(&pcxt->estimator, uncommittedenumslen); /* If you add more chunks here, you probably need to add keys. */ shm_toc_estimate_keys(&pcxt->estimator, 11); @@ -348,7 +348,7 @@ InitializeParallelDSM(ParallelContext *pcxt) char *error_queue_space; char *session_dsm_handle_space; char *entrypointstate; - char *enumblacklistspace; + char *uncommittedenumsspace; Size lnamelen; /* Serialize shared libraries we have loaded. */ @@ -404,11 +404,12 @@ InitializeParallelDSM(ParallelContext *pcxt) shm_toc_insert(pcxt->toc, PARALLEL_KEY_RELMAPPER_STATE, relmapperspace); - /* Serialize enum blacklist state. */ - enumblacklistspace = shm_toc_allocate(pcxt->toc, enumblacklistlen); - SerializeEnumBlacklist(enumblacklistspace, enumblacklistlen); - shm_toc_insert(pcxt->toc, PARALLEL_KEY_ENUMBLACKLIST, - enumblacklistspace); + /* Serialize uncommitted enum state. */ + uncommittedenumsspace = shm_toc_allocate(pcxt->toc, + uncommittedenumslen); + SerializeUncommittedEnums(uncommittedenumsspace, uncommittedenumslen); + shm_toc_insert(pcxt->toc, PARALLEL_KEY_UNCOMMITTEDENUMS, + uncommittedenumsspace); /* Allocate space for worker information. */ pcxt->worker = palloc0(sizeof(ParallelWorkerInfo) * pcxt->nworkers); @@ -1257,7 +1258,7 @@ ParallelWorkerMain(Datum main_arg) char *pendingsyncsspace; char *reindexspace; char *relmapperspace; - char *enumblacklistspace; + char *uncommittedenumsspace; StringInfoData msgbuf; char *session_dsm_handle_space; @@ -1449,10 +1450,10 @@ ParallelWorkerMain(Datum main_arg) relmapperspace = shm_toc_lookup(toc, PARALLEL_KEY_RELMAPPER_STATE, false); RestoreRelationMap(relmapperspace); - /* Restore enum blacklist. */ - enumblacklistspace = shm_toc_lookup(toc, PARALLEL_KEY_ENUMBLACKLIST, - false); - RestoreEnumBlacklist(enumblacklistspace); + /* Restore uncommitted enums. */ + uncommittedenumsspace = shm_toc_lookup(toc, PARALLEL_KEY_UNCOMMITTEDENUMS, + false); + RestoreUncommittedEnums(uncommittedenumsspace); /* Attach to the leader's serializable transaction, if SERIALIZABLE. */ AttachSerializableXact(fps->serializable_xact_handle); diff --git a/src/backend/access/transam/slru.c b/src/backend/access/transam/slru.c index cec17cb2aec0e..e49e06e896414 100644 --- a/src/backend/access/transam/slru.c +++ b/src/backend/access/transam/slru.c @@ -38,7 +38,7 @@ * by re-setting the page's page_dirty flag. * * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * src/backend/access/transam/slru.c @@ -1230,11 +1230,6 @@ SimpleLruTruncate(SlruCtl ctl, int cutoffPage) /* update the stats counter of truncates */ pgstat_count_slru_truncate(shared->slru_stats_idx); - /* - * The cutoff point is the start of the segment containing cutoffPage. - */ - cutoffPage -= cutoffPage % SLRU_PAGES_PER_SEGMENT; - /* * Scan shared memory and remove any pages preceding the cutoff page, to * ensure we won't rewrite them later. (Since this is normally called in @@ -1247,9 +1242,7 @@ restart:; /* * While we are holding the lock, make an important safety check: the - * planned cutoff point must be <= the current endpoint page. Otherwise we - * have already wrapped around, and proceeding with the truncation would - * risk removing the current segment. + * current endpoint page must not be eligible for removal. */ if (ctl->PagePrecedes(shared->latest_page_number, cutoffPage)) { @@ -1281,8 +1274,11 @@ restart:; * Hmm, we have (or may have) I/O operations acting on the page, so * we've got to wait for them to finish and then start again. This is * the same logic as in SlruSelectLRUPage. (XXX if page is dirty, - * wouldn't it be OK to just discard it without writing it? For now, - * keep the logic the same as it was.) + * wouldn't it be OK to just discard it without writing it? + * SlruMayDeleteSegment() uses a stricter qualification, so we might + * not delete this page in the end; even if we don't delete it, we + * won't have cause to read its data again. For now, keep the logic + * the same as it was.) */ if (shared->page_status[slotno] == SLRU_PAGE_VALID) SlruInternalWritePage(ctl, slotno, NULL); @@ -1377,19 +1373,134 @@ SlruDeleteSegment(SlruCtl ctl, int segno) LWLockRelease(shared->ControlLock); } +/* + * Determine whether a segment is okay to delete. + * + * segpage is the first page of the segment, and cutoffPage is the oldest (in + * PagePrecedes order) page in the SLRU containing still-useful data. Since + * every core PagePrecedes callback implements "wrap around", check the + * segment's first and last pages: + * + * first=cutoff: no; cutoff falls inside this segment + * first>=cutoff && last=cutoff && last>=cutoff: no; every page of this segment is too young + */ +static bool +SlruMayDeleteSegment(SlruCtl ctl, int segpage, int cutoffPage) +{ + int seg_last_page = segpage + SLRU_PAGES_PER_SEGMENT - 1; + + Assert(segpage % SLRU_PAGES_PER_SEGMENT == 0); + + return (ctl->PagePrecedes(segpage, cutoffPage) && + ctl->PagePrecedes(seg_last_page, cutoffPage)); +} + +#ifdef USE_ASSERT_CHECKING +static void +SlruPagePrecedesTestOffset(SlruCtl ctl, int per_page, uint32 offset) +{ + TransactionId lhs, + rhs; + int newestPage, + oldestPage; + TransactionId newestXact, + oldestXact; + + /* + * Compare an XID pair having undefined order (see RFC 1982), a pair at + * "opposite ends" of the XID space. TransactionIdPrecedes() treats each + * as preceding the other. If RHS is oldestXact, LHS is the first XID we + * must not assign. + */ + lhs = per_page + offset; /* skip first page to avoid non-normal XIDs */ + rhs = lhs + (1U << 31); + Assert(TransactionIdPrecedes(lhs, rhs)); + Assert(TransactionIdPrecedes(rhs, lhs)); + Assert(!TransactionIdPrecedes(lhs - 1, rhs)); + Assert(TransactionIdPrecedes(rhs, lhs - 1)); + Assert(TransactionIdPrecedes(lhs + 1, rhs)); + Assert(!TransactionIdPrecedes(rhs, lhs + 1)); + Assert(!TransactionIdFollowsOrEquals(lhs, rhs)); + Assert(!TransactionIdFollowsOrEquals(rhs, lhs)); + Assert(!ctl->PagePrecedes(lhs / per_page, lhs / per_page)); + Assert(!ctl->PagePrecedes(lhs / per_page, rhs / per_page)); + Assert(!ctl->PagePrecedes(rhs / per_page, lhs / per_page)); + Assert(!ctl->PagePrecedes((lhs - per_page) / per_page, rhs / per_page)); + Assert(ctl->PagePrecedes(rhs / per_page, (lhs - 3 * per_page) / per_page)); + Assert(ctl->PagePrecedes(rhs / per_page, (lhs - 2 * per_page) / per_page)); + Assert(ctl->PagePrecedes(rhs / per_page, (lhs - 1 * per_page) / per_page) + || (1U << 31) % per_page != 0); /* See CommitTsPagePrecedes() */ + Assert(ctl->PagePrecedes((lhs + 1 * per_page) / per_page, rhs / per_page) + || (1U << 31) % per_page != 0); + Assert(ctl->PagePrecedes((lhs + 2 * per_page) / per_page, rhs / per_page)); + Assert(ctl->PagePrecedes((lhs + 3 * per_page) / per_page, rhs / per_page)); + Assert(!ctl->PagePrecedes(rhs / per_page, (lhs + per_page) / per_page)); + + /* + * GetNewTransactionId() has assigned the last XID it can safely use, and + * that XID is in the *LAST* page of the second segment. We must not + * delete that segment. + */ + newestPage = 2 * SLRU_PAGES_PER_SEGMENT - 1; + newestXact = newestPage * per_page + offset; + Assert(newestXact / per_page == newestPage); + oldestXact = newestXact + 1; + oldestXact -= 1U << 31; + oldestPage = oldestXact / per_page; + Assert(!SlruMayDeleteSegment(ctl, + (newestPage - + newestPage % SLRU_PAGES_PER_SEGMENT), + oldestPage)); + + /* + * GetNewTransactionId() has assigned the last XID it can safely use, and + * that XID is in the *FIRST* page of the second segment. We must not + * delete that segment. + */ + newestPage = SLRU_PAGES_PER_SEGMENT; + newestXact = newestPage * per_page + offset; + Assert(newestXact / per_page == newestPage); + oldestXact = newestXact + 1; + oldestXact -= 1U << 31; + oldestPage = oldestXact / per_page; + Assert(!SlruMayDeleteSegment(ctl, + (newestPage - + newestPage % SLRU_PAGES_PER_SEGMENT), + oldestPage)); +} + +/* + * Unit-test a PagePrecedes function. + * + * This assumes every uint32 >= FirstNormalTransactionId is a valid key. It + * assumes each value occupies a contiguous, fixed-size region of SLRU bytes. + * (MultiXactMemberCtl separates flags from XIDs. AsyncCtl has + * variable-length entries, no keys, and no random access. These unit tests + * do not apply to them.) + */ +void +SlruPagePrecedesUnitTests(SlruCtl ctl, int per_page) +{ + /* Test first, middle and last entries of a page. */ + SlruPagePrecedesTestOffset(ctl, per_page, 0); + SlruPagePrecedesTestOffset(ctl, per_page, per_page / 2); + SlruPagePrecedesTestOffset(ctl, per_page, per_page - 1); +} +#endif + /* * SlruScanDirectory callback - * This callback reports true if there's any segment prior to the one - * containing the page passed as "data". + * This callback reports true if there's any segment wholly prior to the + * one containing the page passed as "data". */ bool SlruScanDirCbReportPresence(SlruCtl ctl, char *filename, int segpage, void *data) { int cutoffPage = *(int *) data; - cutoffPage -= cutoffPage % SLRU_PAGES_PER_SEGMENT; - - if (ctl->PagePrecedes(segpage, cutoffPage)) + if (SlruMayDeleteSegment(ctl, segpage, cutoffPage)) return true; /* found one; don't iterate any more */ return false; /* keep going */ @@ -1404,7 +1515,7 @@ SlruScanDirCbDeleteCutoff(SlruCtl ctl, char *filename, int segpage, void *data) { int cutoffPage = *(int *) data; - if (ctl->PagePrecedes(segpage, cutoffPage)) + if (SlruMayDeleteSegment(ctl, segpage, cutoffPage)) SlruInternalDeleteSegment(ctl, segpage / SLRU_PAGES_PER_SEGMENT); return false; /* keep going */ diff --git a/src/backend/access/transam/subtrans.c b/src/backend/access/transam/subtrans.c index 0111e867c79a2..6a8e521f89405 100644 --- a/src/backend/access/transam/subtrans.c +++ b/src/backend/access/transam/subtrans.c @@ -19,7 +19,7 @@ * data across crashes. During database startup, we simply force the * currently-active page of SUBTRANS to zeroes. * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * src/backend/access/transam/subtrans.c @@ -194,6 +194,7 @@ SUBTRANSShmemInit(void) SimpleLruInit(SubTransCtl, "Subtrans", NUM_SUBTRANS_BUFFERS, 0, SubtransSLRULock, "pg_subtrans", LWTRANCHE_SUBTRANS_BUFFER, SYNC_HANDLER_NONE); + SlruPagePrecedesUnitTests(SubTransCtl, SUBTRANS_XACTS_PER_PAGE); } /* @@ -354,13 +355,8 @@ TruncateSUBTRANS(TransactionId oldestXact) /* - * Decide which of two SUBTRANS page numbers is "older" for truncation purposes. - * - * We need to use comparison of TransactionIds here in order to do the right - * thing with wraparound XID arithmetic. However, if we are asked about - * page number zero, we don't want to hand InvalidTransactionId to - * TransactionIdPrecedes: it'll get weird about permanent xact IDs. So, - * offset both xids by FirstNormalTransactionId to avoid that. + * Decide whether a SUBTRANS page number is "older" for truncation purposes. + * Analogous to CLOGPagePrecedes(). */ static bool SubTransPagePrecedes(int page1, int page2) @@ -369,9 +365,10 @@ SubTransPagePrecedes(int page1, int page2) TransactionId xid2; xid1 = ((TransactionId) page1) * SUBTRANS_XACTS_PER_PAGE; - xid1 += FirstNormalTransactionId; + xid1 += FirstNormalTransactionId + 1; xid2 = ((TransactionId) page2) * SUBTRANS_XACTS_PER_PAGE; - xid2 += FirstNormalTransactionId; + xid2 += FirstNormalTransactionId + 1; - return TransactionIdPrecedes(xid1, xid2); + return (TransactionIdPrecedes(xid1, xid2) && + TransactionIdPrecedes(xid1, xid2 + SUBTRANS_XACTS_PER_PAGE - 1)); } diff --git a/src/backend/access/transam/timeline.c b/src/backend/access/transam/timeline.c index e6a29d9a9b7f0..690471ac4ed16 100644 --- a/src/backend/access/transam/timeline.c +++ b/src/backend/access/transam/timeline.c @@ -21,7 +21,7 @@ * The fields are separated by tabs. Lines beginning with # are comments, and * are ignored. Empty lines are also ignored. * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * src/backend/access/transam/timeline.c diff --git a/src/backend/access/transam/transam.c b/src/backend/access/transam/transam.c index a28918657cfb1..1ba4bbead5599 100644 --- a/src/backend/access/transam/transam.c +++ b/src/backend/access/transam/transam.c @@ -3,7 +3,7 @@ * transam.c * postgres transaction (commit) log interface routines * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * diff --git a/src/backend/access/transam/twophase.c b/src/backend/access/transam/twophase.c index 873bf9bad9850..fc18b778324dd 100644 --- a/src/backend/access/transam/twophase.c +++ b/src/backend/access/transam/twophase.c @@ -3,7 +3,7 @@ * twophase.c * Two-phase commit support functions. * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION diff --git a/src/backend/access/transam/twophase_rmgr.c b/src/backend/access/transam/twophase_rmgr.c index 3a6a2d1fafd25..1fd785567cc83 100644 --- a/src/backend/access/transam/twophase_rmgr.c +++ b/src/backend/access/transam/twophase_rmgr.c @@ -3,7 +3,7 @@ * twophase_rmgr.c * Two-phase-commit resource managers tables * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * diff --git a/src/backend/access/transam/varsup.c b/src/backend/access/transam/varsup.c index a4944faa32e34..2264c2c849cac 100644 --- a/src/backend/access/transam/varsup.c +++ b/src/backend/access/transam/varsup.c @@ -3,7 +3,7 @@ * varsup.c * postgres OID & XID variables support routines * - * Copyright (c) 2000-2020, PostgreSQL Global Development Group + * Copyright (c) 2000-2021, PostgreSQL Global Development Group * * IDENTIFICATION * src/backend/access/transam/varsup.c diff --git a/src/backend/access/transam/xact.c b/src/backend/access/transam/xact.c index 9cd0b7c11bc9f..a2068e3fd45d8 100644 --- a/src/backend/access/transam/xact.c +++ b/src/backend/access/transam/xact.c @@ -5,7 +5,7 @@ * * See src/backend/access/transam/README for more information. * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c index 9867e1b4039cd..f03bd473e2b30 100644 --- a/src/backend/access/transam/xlog.c +++ b/src/backend/access/transam/xlog.c @@ -4,7 +4,7 @@ * PostgreSQL write-ahead log manager * * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * src/backend/access/transam/xlog.c @@ -48,6 +48,7 @@ #include "pg_trace.h" #include "pgstat.h" #include "port/atomics.h" +#include "port/pg_iovec.h" #include "postmaster/bgwriter.h" #include "postmaster/startup.h" #include "postmaster/walwriter.h" @@ -429,10 +430,6 @@ static XLogRecPtr RedoStartLSN = InvalidXLogRecPtr; * ControlFileLock: must be held to read/update control file or create * new log file. * - * CheckpointLock: must be held to do a checkpoint or restartpoint (ensures - * only one checkpointer at a time; currently, with all checkpoints done by - * the checkpointer, this is just pro forma). - * *---------- */ @@ -929,7 +926,8 @@ static void XLogFileClose(void); static void PreallocXlogFiles(XLogRecPtr endptr); static void RemoveTempXlogFiles(void); static void RemoveOldXlogFiles(XLogSegNo segno, XLogRecPtr lastredoptr, XLogRecPtr endptr); -static void RemoveXlogFile(const char *segname, XLogRecPtr lastredoptr, XLogRecPtr endptr); +static void RemoveXlogFile(const char *segname, XLogSegNo recycleSegNo, + XLogSegNo *endlogSegNo); static void UpdateLastRemovedPtr(char *filename); static void ValidateXLOGDirectoryStructure(void); static void CleanupBackupHistory(void); @@ -3270,7 +3268,6 @@ XLogFileInit(XLogSegNo logsegno, bool *use_existent, bool use_lock) XLogSegNo installed_segno; XLogSegNo max_segno; int fd; - int nbytes; int save_errno; XLogFilePath(path, ThisTimeLineID, logsegno, wal_segment_size); @@ -3317,6 +3314,9 @@ XLogFileInit(XLogSegNo logsegno, bool *use_existent, bool use_lock) save_errno = 0; if (wal_init_zero) { + struct iovec iov[PG_IOV_MAX]; + int blocks; + /* * Zero-fill the file. With this setting, we do this the hard way to * ensure that all the file space has really been allocated. On @@ -3326,15 +3326,28 @@ XLogFileInit(XLogSegNo logsegno, bool *use_existent, bool use_lock) * indirect blocks are down on disk. Therefore, fdatasync(2) or * O_DSYNC will be sufficient to sync future writes to the log file. */ - for (nbytes = 0; nbytes < wal_segment_size; nbytes += XLOG_BLCKSZ) + + /* Prepare to write out a lot of copies of our zero buffer at once. */ + for (int i = 0; i < lengthof(iov); ++i) { - errno = 0; - if (write(fd, zbuffer.data, XLOG_BLCKSZ) != XLOG_BLCKSZ) + iov[i].iov_base = zbuffer.data; + iov[i].iov_len = XLOG_BLCKSZ; + } + + /* Loop, writing as many blocks as we can for each system call. */ + blocks = wal_segment_size / XLOG_BLCKSZ; + for (int i = 0; i < blocks;) + { + int iovcnt = Min(blocks - i, lengthof(iov)); + off_t offset = i * XLOG_BLCKSZ; + + if (pg_pwritev_with_retry(fd, iov, iovcnt, offset) < 0) { - /* if write didn't set errno, assume no disk space */ - save_errno = errno ? errno : ENOSPC; + save_errno = errno; break; } + + i += iovcnt; } } else @@ -4039,6 +4052,12 @@ RemoveOldXlogFiles(XLogSegNo segno, XLogRecPtr lastredoptr, XLogRecPtr endptr) DIR *xldir; struct dirent *xlde; char lastoff[MAXFNAMELEN]; + XLogSegNo endlogSegNo; + XLogSegNo recycleSegNo; + + /* Initialize info about where to try to recycle to */ + XLByteToSeg(endptr, endlogSegNo, wal_segment_size); + recycleSegNo = XLOGfileslop(lastredoptr); /* * Construct a filename of the last segment to be kept. The timeline ID @@ -4077,7 +4096,7 @@ RemoveOldXlogFiles(XLogSegNo segno, XLogRecPtr lastredoptr, XLogRecPtr endptr) /* Update the last removed location in shared memory first */ UpdateLastRemovedPtr(xlde->d_name); - RemoveXlogFile(xlde->d_name, lastredoptr, endptr); + RemoveXlogFile(xlde->d_name, recycleSegNo, &endlogSegNo); } } } @@ -4107,13 +4126,21 @@ RemoveNonParentXlogFiles(XLogRecPtr switchpoint, TimeLineID newTLI) struct dirent *xlde; char switchseg[MAXFNAMELEN]; XLogSegNo endLogSegNo; + XLogSegNo switchLogSegNo; + XLogSegNo recycleSegNo; - XLByteToPrevSeg(switchpoint, endLogSegNo, wal_segment_size); + /* + * Initialize info about where to begin the work. This will recycle, + * somewhat arbitrarily, 10 future segments. + */ + XLByteToPrevSeg(switchpoint, switchLogSegNo, wal_segment_size); + XLByteToSeg(switchpoint, endLogSegNo, wal_segment_size); + recycleSegNo = endLogSegNo + 10; /* * Construct a filename of the last segment to be kept. */ - XLogFileName(switchseg, newTLI, endLogSegNo, wal_segment_size); + XLogFileName(switchseg, newTLI, switchLogSegNo, wal_segment_size); elog(DEBUG2, "attempting to remove WAL segments newer than log file %s", switchseg); @@ -4141,7 +4168,7 @@ RemoveNonParentXlogFiles(XLogRecPtr switchpoint, TimeLineID newTLI) * - but seems safer to let them be archived and removed later. */ if (!XLogArchiveIsReady(xlde->d_name)) - RemoveXlogFile(xlde->d_name, InvalidXLogRecPtr, switchpoint); + RemoveXlogFile(xlde->d_name, recycleSegNo, &endLogSegNo); } } @@ -4151,48 +4178,34 @@ RemoveNonParentXlogFiles(XLogRecPtr switchpoint, TimeLineID newTLI) /* * Recycle or remove a log file that's no longer needed. * - * endptr is current (or recent) end of xlog, and lastredoptr is the - * redo pointer of the last checkpoint. These are used to determine - * whether we want to recycle rather than delete no-longer-wanted log files. - * If lastredoptr is not known, pass invalid, and the function will recycle, - * somewhat arbitrarily, 10 future segments. + * segname is the name of the segment to recycle or remove. recycleSegNo + * is the segment number to recycle up to. endlogSegNo is the segment + * number of the current (or recent) end of WAL. + * + * endlogSegNo gets incremented if the segment is recycled so as it is not + * checked again with future callers of this function. */ static void -RemoveXlogFile(const char *segname, XLogRecPtr lastredoptr, XLogRecPtr endptr) +RemoveXlogFile(const char *segname, XLogSegNo recycleSegNo, + XLogSegNo *endlogSegNo) { char path[MAXPGPATH]; #ifdef WIN32 char newpath[MAXPGPATH]; #endif struct stat statbuf; - XLogSegNo endlogSegNo; - XLogSegNo recycleSegNo; - - if (wal_recycle) - { - /* - * Initialize info about where to try to recycle to. - */ - XLByteToSeg(endptr, endlogSegNo, wal_segment_size); - if (lastredoptr == InvalidXLogRecPtr) - recycleSegNo = endlogSegNo + 10; - else - recycleSegNo = XLOGfileslop(lastredoptr); - } - else - recycleSegNo = 0; /* keep compiler quiet */ snprintf(path, MAXPGPATH, XLOGDIR "/%s", segname); /* * Before deleting the file, see if it can be recycled as a future log - * segment. Only recycle normal files, pg_standby for example can create + * segment. Only recycle normal files, because we don't want to recycle * symbolic links pointing to a separate archive directory. */ if (wal_recycle && - endlogSegNo <= recycleSegNo && + *endlogSegNo <= recycleSegNo && lstat(path, &statbuf) == 0 && S_ISREG(statbuf.st_mode) && - InstallXLogFileSegment(&endlogSegNo, path, + InstallXLogFileSegment(endlogSegNo, path, true, recycleSegNo, true)) { ereport(DEBUG2, @@ -4200,7 +4213,7 @@ RemoveXlogFile(const char *segname, XLogRecPtr lastredoptr, XLogRecPtr endptr) segname))); CheckpointStats.ckpt_segs_recycled++; /* Needn't recheck that slot on future iterations */ - endlogSegNo++; + (*endlogSegNo)++; } else { @@ -6244,12 +6257,61 @@ static void RecoveryRequiresIntParameter(const char *param_name, int currValue, int minValue) { if (currValue < minValue) - ereport(ERROR, + { + if (LocalHotStandbyActive) + { + bool warned_for_promote = false; + + ereport(WARNING, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("hot standby is not possible because of insufficient parameter settings"), + errdetail("%s = %d is a lower setting than on the primary server, where its value was %d.", + param_name, + currValue, + minValue))); + + SetRecoveryPause(true); + + ereport(LOG, + (errmsg("recovery has paused"), + errdetail("If recovery is unpaused, the server will shut down."), + errhint("You can then restart the server after making the necessary configuration changes."))); + + while (RecoveryIsPaused()) + { + HandleStartupProcInterrupts(); + + if (CheckForStandbyTrigger()) + { + if (!warned_for_promote) + ereport(WARNING, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("promotion is not possible because of insufficient parameter settings"), + /* Repeat the detail from above so it's easy to find in the log. */ + errdetail("%s = %d is a lower setting than on the primary server, where its value was %d.", + param_name, + currValue, + minValue), + errhint("Restart the server after making the necessary configuration changes."))); + warned_for_promote = true; + } + + pgstat_report_wait_start(WAIT_EVENT_RECOVERY_PAUSE); + pg_usleep(1000000L); /* 1000 ms */ + pgstat_report_wait_end(); + } + } + + ereport(FATAL, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), - errmsg("hot standby is not possible because %s = %d is a lower setting than on the primary server (its value was %d)", - param_name, - currValue, - minValue))); + errmsg("recovery aborted because of insufficient parameter settings"), + /* Repeat the detail from above so it's easy to find in the log. */ + errdetail("%s = %d is a lower setting than on the primary server, where its value was %d.", + param_name, + currValue, + minValue), + errhint("You can restart the server after making the necessary configuration changes."))); + } } /* @@ -6793,6 +6855,13 @@ StartupXLOG(void) */ StartupReorderBuffer(); + /* + * Startup CLOG. This must be done after ShmemVariableCache->nextXid + * has been initialized and before we accept connections or begin WAL + * replay. + */ + StartupCLOG(); + /* * Startup MultiXact. We need to do this early to be able to replay * truncations. @@ -7063,11 +7132,10 @@ StartupXLOG(void) ProcArrayInitRecovery(XidFromFullTransactionId(ShmemVariableCache->nextXid)); /* - * Startup commit log and subtrans only. MultiXact and commit + * Startup subtrans only. CLOG, MultiXact and commit * timestamp have already been started up and other SLRUs are not * maintained during recovery and need not be started yet. */ - StartupCLOG(); StartupSUBTRANS(oldestActiveXID); /* @@ -7883,14 +7951,11 @@ StartupXLOG(void) LWLockRelease(ProcArrayLock); /* - * Start up the commit log and subtrans, if not already done for hot - * standby. (commit timestamps are started below, if necessary.) + * Start up subtrans, if not already done for hot standby. (commit + * timestamps are started below, if necessary.) */ if (standbyState == STANDBY_DISABLED) - { - StartupCLOG(); StartupSUBTRANS(oldestActiveXID); - } /* * Perform end of recovery actions for any SLRUs that need it. @@ -8798,14 +8863,6 @@ CreateCheckPoint(int flags) */ InitXLogInsert(); - /* - * Acquire CheckpointLock to ensure only one checkpoint happens at a time. - * (This is just pro forma, since in the present system structure there is - * only one process that is allowed to issue checkpoints at any given - * time.) - */ - LWLockAcquire(CheckpointLock, LW_EXCLUSIVE); - /* * Prepare to accumulate statistics. * @@ -8875,7 +8932,6 @@ CreateCheckPoint(int flags) if (last_important_lsn == ControlFile->checkPoint) { WALInsertLockRelease(); - LWLockRelease(CheckpointLock); END_CRIT_SECTION(); ereport(DEBUG1, (errmsg("checkpoint skipped because system is idle"))); @@ -9175,15 +9231,12 @@ CreateCheckPoint(int flags) CheckpointStats.ckpt_segs_added, CheckpointStats.ckpt_segs_removed, CheckpointStats.ckpt_segs_recycled); - - LWLockRelease(CheckpointLock); } /* * Mark the end of recovery in WAL though without running a full checkpoint. * We can expect that a restartpoint is likely to be in progress as we - * do this, though we are unwilling to wait for it to complete. So be - * careful to avoid taking the CheckpointLock anywhere here. + * do this, though we are unwilling to wait for it to complete. * * CreateRestartPoint() allows for the case where recovery may end before * the restartpoint completes so there is no concern of concurrent behaviour. @@ -9333,12 +9386,6 @@ CreateRestartPoint(int flags) XLogSegNo _logSegNo; TimestampTz xtime; - /* - * Acquire CheckpointLock to ensure only one restartpoint or checkpoint - * happens at a time. - */ - LWLockAcquire(CheckpointLock, LW_EXCLUSIVE); - /* Get a local copy of the last safe checkpoint record. */ SpinLockAcquire(&XLogCtl->info_lck); lastCheckPointRecPtr = XLogCtl->lastCheckPointRecPtr; @@ -9354,7 +9401,6 @@ CreateRestartPoint(int flags) { ereport(DEBUG2, (errmsg("skipping restartpoint, recovery has already ended"))); - LWLockRelease(CheckpointLock); return false; } @@ -9389,7 +9435,6 @@ CreateRestartPoint(int flags) UpdateControlFile(); LWLockRelease(ControlFileLock); } - LWLockRelease(CheckpointLock); return false; } @@ -9555,8 +9600,6 @@ CreateRestartPoint(int flags) xtime ? errdetail("Last completed transaction was at log time %s.", timestamptz_to_str(xtime)) : 0)); - LWLockRelease(CheckpointLock); - /* * Finally, execute archive_cleanup_command, if any. */ diff --git a/src/backend/access/transam/xlogarchive.c b/src/backend/access/transam/xlogarchive.c index f39dc4ddf1a55..1c5a4f8b5a548 100644 --- a/src/backend/access/transam/xlogarchive.c +++ b/src/backend/access/transam/xlogarchive.c @@ -4,7 +4,7 @@ * Functions for archiving WAL files and restoring from the archive. * * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * src/backend/access/transam/xlogarchive.c diff --git a/src/backend/access/transam/xlogfuncs.c b/src/backend/access/transam/xlogfuncs.c index 290658b22c17e..5e1aab319dda8 100644 --- a/src/backend/access/transam/xlogfuncs.c +++ b/src/backend/access/transam/xlogfuncs.c @@ -7,7 +7,7 @@ * This file contains WAL control and information functions. * * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * src/backend/access/transam/xlogfuncs.c diff --git a/src/backend/access/transam/xloginsert.c b/src/backend/access/transam/xloginsert.c index 1f0e4e01e69b1..7052dc245ee02 100644 --- a/src/backend/access/transam/xloginsert.c +++ b/src/backend/access/transam/xloginsert.c @@ -9,7 +9,7 @@ * of XLogRecData structs by a call to XLogRecordAssemble(). See * access/transam/README for details. * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * src/backend/access/transam/xloginsert.c diff --git a/src/backend/access/transam/xlogreader.c b/src/backend/access/transam/xlogreader.c index a63ad8cfd0bfc..bb95e0e527f09 100644 --- a/src/backend/access/transam/xlogreader.c +++ b/src/backend/access/transam/xlogreader.c @@ -3,7 +3,7 @@ * xlogreader.c * Generic XLog reading facility * - * Portions Copyright (c) 2013-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 2013-2021, PostgreSQL Global Development Group * * IDENTIFICATION * src/backend/access/transam/xlogreader.c @@ -1545,7 +1545,7 @@ XLogRecGetBlockData(XLogReaderState *record, uint8 block_id, Size *len) /* * Restore a full-page image from a backup block attached to an XLOG record. * - * Returns the buffer number containing the page. + * Returns true if a full-page image is restored. */ bool RestoreBlockImage(XLogReaderState *record, uint8 block_id, char *page) diff --git a/src/backend/access/transam/xlogutils.c b/src/backend/access/transam/xlogutils.c index e0ca3859a9553..e72325329755b 100644 --- a/src/backend/access/transam/xlogutils.c +++ b/src/backend/access/transam/xlogutils.c @@ -8,7 +8,7 @@ * None of this code is used during normal system operation. * * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * src/backend/access/transam/xlogutils.c diff --git a/src/backend/bootstrap/bootparse.y b/src/backend/bootstrap/bootparse.y index 6bb0c6ed1ea9c..5fcd004e1b182 100644 --- a/src/backend/bootstrap/bootparse.y +++ b/src/backend/bootstrap/bootparse.y @@ -4,7 +4,7 @@ * bootparse.y * yacc grammar for the "bootstrap" mode (BKI file format) * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * diff --git a/src/backend/bootstrap/bootscanner.l b/src/backend/bootstrap/bootscanner.l index 62d67a695ec46..7aecd895fba47 100644 --- a/src/backend/bootstrap/bootscanner.l +++ b/src/backend/bootstrap/bootscanner.l @@ -4,7 +4,7 @@ * bootscanner.l * a lexical scanner for the bootstrap parser * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * diff --git a/src/backend/bootstrap/bootstrap.c b/src/backend/bootstrap/bootstrap.c index a7ed93fdc14d1..6f615e66220b3 100644 --- a/src/backend/bootstrap/bootstrap.c +++ b/src/backend/bootstrap/bootstrap.c @@ -4,7 +4,7 @@ * routines to support running postgres in 'bootstrap' mode * bootstrap mode is used to create the initial template database * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION diff --git a/src/backend/catalog/.gitignore b/src/backend/catalog/.gitignore index 11e2e5202326b..237ff541659b1 100644 --- a/src/backend/catalog/.gitignore +++ b/src/backend/catalog/.gitignore @@ -1,4 +1,6 @@ /postgres.bki /schemapg.h +/system_fk_info.h +/system_constraints.sql /pg_*_d.h /bki-stamp diff --git a/src/backend/catalog/Catalog.pm b/src/backend/catalog/Catalog.pm index dd39a086ce4fd..b44d568b54440 100644 --- a/src/backend/catalog/Catalog.pm +++ b/src/backend/catalog/Catalog.pm @@ -4,7 +4,7 @@ # Perl module that extracts info from catalog files into Perl # data structures # -# Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group +# Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group # Portions Copyright (c) 1994, Regents of the University of California # # src/backend/catalog/Catalog.pm @@ -94,14 +94,26 @@ sub ParseHeader push @{ $catalog{toasting} }, { parent_table => $1, toast_oid => $2, toast_index_oid => $3 }; } - elsif (/^DECLARE_(UNIQUE_)?INDEX\(\s*(\w+),\s*(\d+),\s*(.+)\)/) + elsif (/^DECLARE_(UNIQUE_)?INDEX(_PKEY)?\(\s*(\w+),\s*(\d+),\s*(.+)\)/) { push @{ $catalog{indexing} }, - { + { is_unique => $1 ? 1 : 0, - index_name => $2, - index_oid => $3, - index_decl => $4 + is_pkey => $2 ? 1 : 0, + index_name => $3, + index_oid => $4, + index_decl => $5 + }; + } + elsif (/^DECLARE_(ARRAY_)?FOREIGN_KEY(_OPT)?\(\s*\(([^)]+)\),\s*(\w+),\s*\(([^)]+)\)\)/) + { + push @{ $catalog{foreign_keys} }, + { + is_array => $1 ? 1 : 0, + is_opt => $2 ? 1 : 0, + fk_cols => $3, + pk_table => $4, + pk_cols => $5 }; } elsif (/^CATALOG\((\w+),(\d+),(\w+)\)/) @@ -196,9 +208,22 @@ sub ParseHeader { $column{array_default} = $1; } - elsif ($attopt =~ /BKI_LOOKUP\((\w+)\)/) + elsif ($attopt =~ /BKI_LOOKUP(_OPT)?\((\w+)\)/) { - $column{lookup} = $1; + $column{lookup} = $2; + $column{lookup_opt} = $1 ? 1 : 0; + # BKI_LOOKUP implicitly makes an FK reference + push @{ $catalog{foreign_keys} }, + { + is_array => + ($atttype eq 'oidvector' || $atttype eq '_oid') + ? 1 + : 0, + is_opt => $column{lookup_opt}, + fk_cols => $attname, + pk_table => $column{lookup}, + pk_cols => 'oid' + }; } else { diff --git a/src/backend/catalog/Makefile b/src/backend/catalog/Makefile index 2519771210800..70bc2123df7bf 100644 --- a/src/backend/catalog/Makefile +++ b/src/backend/catalog/Makefile @@ -2,7 +2,7 @@ # # Makefile for backend/catalog # -# Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group +# Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group # Portions Copyright (c) 1994, Regents of the University of California # # src/backend/catalog/Makefile @@ -70,7 +70,7 @@ CATALOG_HEADERS := \ pg_sequence.h pg_publication.h pg_publication_rel.h pg_subscription.h \ pg_subscription_rel.h -GENERATED_HEADERS := $(CATALOG_HEADERS:%.h=%_d.h) schemapg.h +GENERATED_HEADERS := $(CATALOG_HEADERS:%.h=%_d.h) schemapg.h system_fk_info.h POSTGRES_BKI_SRCS := $(addprefix $(top_srcdir)/src/include/catalog/, $(CATALOG_HEADERS)) @@ -121,6 +121,7 @@ $(top_builddir)/src/include/catalog/header-stamp: bki-stamp .PHONY: install-data install-data: bki-stamp installdirs $(INSTALL_DATA) $(call vpathsearch,postgres.bki) '$(DESTDIR)$(datadir)/postgres.bki' + $(INSTALL_DATA) $(call vpathsearch,system_constraints.sql) '$(DESTDIR)$(datadir)/system_constraints.sql' $(INSTALL_DATA) $(srcdir)/system_views.sql '$(DESTDIR)$(datadir)/system_views.sql' $(INSTALL_DATA) $(srcdir)/information_schema.sql '$(DESTDIR)$(datadir)/information_schema.sql' $(INSTALL_DATA) $(srcdir)/sql_features.txt '$(DESTDIR)$(datadir)/sql_features.txt' @@ -130,11 +131,11 @@ installdirs: .PHONY: uninstall-data uninstall-data: - rm -f $(addprefix '$(DESTDIR)$(datadir)'/, postgres.bki system_views.sql information_schema.sql sql_features.txt) + rm -f $(addprefix '$(DESTDIR)$(datadir)'/, postgres.bki system_constraints.sql system_views.sql information_schema.sql sql_features.txt) -# postgres.bki and the generated headers are in the distribution tarball, -# so they are not cleaned here. +# postgres.bki, system_constraints.sql, and the generated headers are +# in the distribution tarball, so they are not cleaned here. clean: maintainer-clean: clean - rm -f bki-stamp postgres.bki $(GENERATED_HEADERS) + rm -f bki-stamp postgres.bki system_constraints.sql $(GENERATED_HEADERS) diff --git a/src/backend/catalog/aclchk.c b/src/backend/catalog/aclchk.c index c4594b0b095ac..add3d147e766c 100644 --- a/src/backend/catalog/aclchk.c +++ b/src/backend/catalog/aclchk.c @@ -3,7 +3,7 @@ * aclchk.c * Routines to check access control permissions. * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * @@ -363,6 +363,22 @@ ExecuteGrantStmt(GrantStmt *stmt) const char *errormsg; AclMode all_privileges; + if (stmt->grantor) + { + Oid grantor; + + grantor = get_rolespec_oid(stmt->grantor, false); + + /* + * Currently, this clause is only for SQL compatibility, not very + * interesting otherwise. + */ + if (grantor != GetUserId()) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("grantor must be current user"))); + } + /* * Turn the regular GrantStmt into the InternalGrant form. */ @@ -1365,6 +1381,9 @@ SetDefaultACL(InternalDefaultACL *iacls) ReleaseSysCache(tuple); table_close(rel, RowExclusiveLock); + + /* prevent error when processing duplicate objects */ + CommandCounterIncrement(); } diff --git a/src/backend/catalog/catalog.c b/src/backend/catalog/catalog.c index f984514fe062a..e2ed80a5de94d 100644 --- a/src/backend/catalog/catalog.c +++ b/src/backend/catalog/catalog.c @@ -5,7 +5,7 @@ * bits of hard-wired knowledge * * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * diff --git a/src/backend/catalog/dependency.c b/src/backend/catalog/dependency.c index 119006159b60a..132573362497e 100644 --- a/src/backend/catalog/dependency.c +++ b/src/backend/catalog/dependency.c @@ -4,7 +4,7 @@ * Routines to support inter-object dependencies. * * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION @@ -2264,6 +2264,21 @@ find_expr_references_walker(Node *node, context->addrs); /* fall through to examine substructure */ } + else if (IsA(node, CTECycleClause)) + { + CTECycleClause *cc = (CTECycleClause *) node; + + if (OidIsValid(cc->cycle_mark_type)) + add_object_address(OCLASS_TYPE, cc->cycle_mark_type, 0, + context->addrs); + if (OidIsValid(cc->cycle_mark_collation)) + add_object_address(OCLASS_COLLATION, cc->cycle_mark_collation, 0, + context->addrs); + if (OidIsValid(cc->cycle_mark_neop)) + add_object_address(OCLASS_OPERATOR, cc->cycle_mark_neop, 0, + context->addrs); + /* fall through to examine substructure */ + } else if (IsA(node, Query)) { /* Recurse into RTE subquery or not-yet-planned sublink subquery */ diff --git a/src/backend/catalog/genbki.pl b/src/backend/catalog/genbki.pl index 66fdaf67b135b..b15995811234a 100644 --- a/src/backend/catalog/genbki.pl +++ b/src/backend/catalog/genbki.pl @@ -6,7 +6,7 @@ # headers from specially formatted header files and data files. # postgres.bki is used to initialize the postgres template database. # -# Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group +# Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group # Portions Copyright (c) 1994, Regents of the University of California # # src/backend/catalog/genbki.pl @@ -55,6 +55,7 @@ my @toast_decls; my @index_decls; my %oidcounts; +my @system_constraints; foreach my $header (@ARGV) { @@ -137,6 +138,17 @@ $index->{index_name}, $index->{index_oid}, $index->{index_decl}; $oidcounts{ $index->{index_oid} }++; + + if ($index->{is_unique}) + { + $index->{index_decl} =~ /on (\w+) using/; + my $tblname = $1; + push @system_constraints, + sprintf "ALTER TABLE %s ADD %s USING INDEX %s;", + $tblname, + $index->{is_pkey} ? "PRIMARY KEY" : "UNIQUE", + $index->{index_name}; + } } } @@ -172,15 +184,9 @@ # within a given Postgres release, such as fixed OIDs. Do not substitute # anything that could depend on platform or configuration. (The right place # to handle those sorts of things is in initdb.c's bootstrap_template1().) -my $BOOTSTRAP_SUPERUSERID = - Catalog::FindDefinedSymbolFromData($catalog_data{pg_authid}, - 'BOOTSTRAP_SUPERUSERID'); my $C_COLLATION_OID = Catalog::FindDefinedSymbolFromData($catalog_data{pg_collation}, 'C_COLLATION_OID'); -my $PG_CATALOG_NAMESPACE = - Catalog::FindDefinedSymbolFromData($catalog_data{pg_namespace}, - 'PG_CATALOG_NAMESPACE'); # Fill in pg_class.relnatts by looking at the referenced catalog's schema. @@ -201,6 +207,13 @@ $amoids{ $row->{amname} } = $row->{oid}; } +# role OID lookup +my %authidoids; +foreach my $row (@{ $catalog_data{pg_authid} }) +{ + $authidoids{ $row->{rolname} } = $row->{oid}; +} + # class (relation) OID lookup (note this only covers bootstrap catalogs!) my %classoids; foreach my $row (@{ $catalog_data{pg_class} }) @@ -222,6 +235,13 @@ $langoids{ $row->{lanname} } = $row->{oid}; } +# namespace (schema) OID lookup +my %namespaceoids; +foreach my $row (@{ $catalog_data{pg_namespace} }) +{ + $namespaceoids{ $row->{nspname} } = $row->{oid}; +} + # opclass OID lookup my %opcoids; foreach my $row (@{ $catalog_data{pg_opclass} }) @@ -364,9 +384,11 @@ # Map lookup name to the corresponding hash table. my %lookup_kind = ( pg_am => \%amoids, + pg_authid => \%authidoids, pg_class => \%classoids, pg_collation => \%collationoids, pg_language => \%langoids, + pg_namespace => \%namespaceoids, pg_opclass => \%opcoids, pg_operator => \%operoids, pg_opfamily => \%opfoids, @@ -388,6 +410,12 @@ my $schemafile = $output_path . 'schemapg.h'; open my $schemapg, '>', $schemafile . $tmpext or die "can't open $schemafile$tmpext: $!"; +my $fk_info_file = $output_path . 'system_fk_info.h'; +open my $fk_info, '>', $fk_info_file . $tmpext + or die "can't open $fk_info_file$tmpext: $!"; +my $constraints_file = $output_path . 'system_constraints.sql'; +open my $constraints, '>', $constraints_file . $tmpext + or die "can't open $constraints_file$tmpext: $!"; # Generate postgres.bki and pg_*_d.h headers. @@ -415,7 +443,7 @@ * %s_d.h * Macro definitions for %s * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * NOTES @@ -539,18 +567,14 @@ $GenbkiNextOid++; } - # Substitute constant values we acquired above. - # (It's intentional that this can apply to parts of a field). - $bki_values{$attname} =~ s/\bPGUID\b/$BOOTSTRAP_SUPERUSERID/g; - $bki_values{$attname} =~ s/\bPGNSP\b/$PG_CATALOG_NAMESPACE/g; - # Replace OID synonyms with OIDs per the appropriate lookup rule. # # If the column type is oidvector or _oid, we have to replace # each element of the array as per the lookup rule. if ($column->{lookup}) { - my $lookup = $lookup_kind{ $column->{lookup} }; + my $lookup = $lookup_kind{ $column->{lookup} }; + my $lookup_opt = $column->{lookup_opt}; my @lookupnames; my @lookupoids; @@ -560,8 +584,9 @@ if ($atttype eq 'oidvector') { @lookupnames = split /\s+/, $bki_values{$attname}; - @lookupoids = lookup_oids($lookup, $catname, \%bki_values, - @lookupnames); + @lookupoids = + lookup_oids($lookup, $catname, $attname, $lookup_opt, + \%bki_values, @lookupnames); $bki_values{$attname} = join(' ', @lookupoids); } elsif ($atttype eq '_oid') @@ -571,8 +596,8 @@ $bki_values{$attname} =~ s/[{}]//g; @lookupnames = split /,/, $bki_values{$attname}; @lookupoids = - lookup_oids($lookup, $catname, \%bki_values, - @lookupnames); + lookup_oids($lookup, $catname, $attname, + $lookup_opt, \%bki_values, @lookupnames); $bki_values{$attname} = sprintf "{%s}", join(',', @lookupoids); } @@ -580,8 +605,9 @@ else { $lookupnames[0] = $bki_values{$attname}; - @lookupoids = lookup_oids($lookup, $catname, \%bki_values, - @lookupnames); + @lookupoids = + lookup_oids($lookup, $catname, $attname, $lookup_opt, + \%bki_values, @lookupnames); $bki_values{$attname} = $lookupoids[0]; } } @@ -648,6 +674,12 @@ "genbki OID counter reached $GenbkiNextOid, overrunning FirstBootstrapObjectId\n" if $GenbkiNextOid > $FirstBootstrapObjectId; +# Now generate system_constraints.sql + +foreach my $c (@system_constraints) +{ + print $constraints $c, "\n"; +} # Now generate schemapg.h @@ -658,7 +690,7 @@ * schemapg.h * Schema_pg_xxx macros for use by relcache.c * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * NOTES @@ -685,13 +717,79 @@ # Closing boilerplate for schemapg.h print $schemapg "\n#endif\t\t\t\t\t\t\t/* SCHEMAPG_H */\n"; +# Now generate system_fk_info.h + +# Opening boilerplate for system_fk_info.h +print $fk_info <{foreign_keys} }) + { + my $pktabname = $fkinfo->{pk_table}; + + # We use BKI_LOOKUP for encodings, but there's no real catalog there + next if $pktabname eq 'encoding'; + + printf $fk_info + "\t{ /* %s */ %s, /* %s */ %s, \"{%s}\", \"{%s}\", %s, %s},\n", + $catname, $catalog->{relation_oid}, + $pktabname, $catalogs{$pktabname}->{relation_oid}, + $fkinfo->{fk_cols}, + $fkinfo->{pk_cols}, + ($fkinfo->{is_array} ? "true" : "false"), + ($fkinfo->{is_opt} ? "true" : "false"); + } +} + +# Closing boilerplate for system_fk_info.h +print $fk_info "};\n\n#endif\t\t\t\t\t\t\t/* SYSTEM_FK_INFO_H */\n"; + # We're done emitting data close $bki; close $schemapg; +close $fk_info; +close $constraints; # Finally, rename the completed files into place. Catalog::RenameTempFile($bkifile, $tmpext); Catalog::RenameTempFile($schemafile, $tmpext); +Catalog::RenameTempFile($fk_info_file, $tmpext); +Catalog::RenameTempFile($constraints_file, $tmpext); exit 0; @@ -925,7 +1023,8 @@ sub morph_row_for_schemapg # within this genbki.pl run.) sub lookup_oids { - my ($lookup, $catname, $bki_values, @lookupnames) = @_; + my ($lookup, $catname, $attname, $lookup_opt, $bki_values, @lookupnames) + = @_; my @lookupoids; foreach my $lookupname (@lookupnames) @@ -938,10 +1037,19 @@ sub lookup_oids else { push @lookupoids, $lookupname; - warn sprintf - "unresolved OID reference \"%s\" in %s.dat line %s\n", - $lookupname, $catname, $bki_values->{line_number} - if $lookupname ne '-' and $lookupname ne '0'; + if ($lookupname eq '-' or $lookupname eq '0') + { + warn sprintf + "invalid zero OID reference in %s.dat field %s line %s\n", + $catname, $attname, $bki_values->{line_number} + if !$lookup_opt; + } + else + { + warn sprintf + "unresolved OID reference \"%s\" in %s.dat field %s line %s\n", + $lookupname, $catname, $attname, $bki_values->{line_number}; + } } } return @lookupoids; diff --git a/src/backend/catalog/heap.c b/src/backend/catalog/heap.c index 51b5c4f7f682b..9abc4a1f5563d 100644 --- a/src/backend/catalog/heap.c +++ b/src/backend/catalog/heap.c @@ -3,7 +3,7 @@ * heap.c * code to create and destroy POSTGRES heap relations * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * @@ -440,6 +440,15 @@ heap_create(const char *relname, } } + /* + * If a tablespace is specified, removal of that tablespace is normally + * protected by the existence of a physical file; but for relations with + * no files, add a pg_shdepend entry to account for that. + */ + if (!create_storage && reltablespace != InvalidOid) + recordDependencyOnTablespace(RelationRelationId, relid, + reltablespace); + return rel; } diff --git a/src/backend/catalog/index.c b/src/backend/catalog/index.c index 731610c70193d..8350c65beb69e 100644 --- a/src/backend/catalog/index.c +++ b/src/backend/catalog/index.c @@ -3,7 +3,7 @@ * index.c * code to create and destroy POSTGRES index relations * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * @@ -3045,7 +3045,7 @@ index_build(Relation heapRelation, /* Set up initial progress report status */ { - const int index[] = { + const int progress_index[] = { PROGRESS_CREATEIDX_PHASE, PROGRESS_CREATEIDX_SUBPHASE, PROGRESS_CREATEIDX_TUPLES_DONE, @@ -3053,13 +3053,13 @@ index_build(Relation heapRelation, PROGRESS_SCAN_BLOCKS_DONE, PROGRESS_SCAN_BLOCKS_TOTAL }; - const int64 val[] = { + const int64 progress_vals[] = { PROGRESS_CREATEIDX_PHASE_BUILD, PROGRESS_CREATEIDX_SUBPHASE_INITIALIZE, 0, 0, 0, 0 }; - pgstat_progress_update_multi_param(6, index, val); + pgstat_progress_update_multi_param(6, progress_index, progress_vals); } /* @@ -3351,19 +3351,19 @@ validate_index(Oid heapId, Oid indexId, Snapshot snapshot) int save_nestlevel; { - const int index[] = { + const int progress_index[] = { PROGRESS_CREATEIDX_PHASE, PROGRESS_CREATEIDX_TUPLES_DONE, PROGRESS_CREATEIDX_TUPLES_TOTAL, PROGRESS_SCAN_BLOCKS_DONE, PROGRESS_SCAN_BLOCKS_TOTAL }; - const int64 val[] = { + const int64 progress_vals[] = { PROGRESS_CREATEIDX_PHASE_VALIDATE_IDXSCAN, 0, 0, 0, 0 }; - pgstat_progress_update_multi_param(5, index, val); + pgstat_progress_update_multi_param(5, progress_index, progress_vals); } /* Open and lock the parent heap relation */ @@ -3420,17 +3420,17 @@ validate_index(Oid heapId, Oid indexId, Snapshot snapshot) /* Execute the sort */ { - const int index[] = { + const int progress_index[] = { PROGRESS_CREATEIDX_PHASE, PROGRESS_SCAN_BLOCKS_DONE, PROGRESS_SCAN_BLOCKS_TOTAL }; - const int64 val[] = { + const int64 progress_vals[] = { PROGRESS_CREATEIDX_PHASE_VALIDATE_SORT, 0, 0 }; - pgstat_progress_update_multi_param(3, index, val); + pgstat_progress_update_multi_param(3, progress_index, progress_vals); } tuplesort_performsort(state.tuplesort); @@ -3594,7 +3594,7 @@ IndexGetRelation(Oid indexId, bool missing_ok) */ void reindex_index(Oid indexId, bool skip_constraint_checks, char persistence, - int options) + ReindexParams *params) { Relation iRel, heapRelation; @@ -3602,7 +3602,7 @@ reindex_index(Oid indexId, bool skip_constraint_checks, char persistence, IndexInfo *indexInfo; volatile bool skipped_constraint = false; PGRUsage ru0; - bool progress = (options & REINDEXOPT_REPORT_PROGRESS) != 0; + bool progress = ((params->options & REINDEXOPT_REPORT_PROGRESS) != 0); pg_rusage_init(&ru0); @@ -3611,12 +3611,12 @@ reindex_index(Oid indexId, bool skip_constraint_checks, char persistence, * we only need to be sure no schema or data changes are going on. */ heapId = IndexGetRelation(indexId, - (options & REINDEXOPT_MISSING_OK) != 0); + (params->options & REINDEXOPT_MISSING_OK) != 0); /* if relation is missing, leave */ if (!OidIsValid(heapId)) return; - if ((options & REINDEXOPT_MISSING_OK) != 0) + if ((params->options & REINDEXOPT_MISSING_OK) != 0) heapRelation = try_table_open(heapId, ShareLock); else heapRelation = table_open(heapId, ShareLock); @@ -3792,7 +3792,7 @@ reindex_index(Oid indexId, bool skip_constraint_checks, char persistence, } /* Log what we did */ - if (options & REINDEXOPT_VERBOSE) + if ((params->options & REINDEXOPT_VERBOSE) != 0) ereport(INFO, (errmsg("index \"%s\" was reindexed", get_rel_name(indexId)), @@ -3846,7 +3846,7 @@ reindex_index(Oid indexId, bool skip_constraint_checks, char persistence, * index rebuild. */ bool -reindex_relation(Oid relid, int flags, int options) +reindex_relation(Oid relid, int flags, ReindexParams *params) { Relation rel; Oid toast_relid; @@ -3861,7 +3861,7 @@ reindex_relation(Oid relid, int flags, int options) * to prevent schema and data changes in it. The lock level used here * should match ReindexTable(). */ - if ((options & REINDEXOPT_MISSING_OK) != 0) + if ((params->options & REINDEXOPT_MISSING_OK) != 0) rel = try_table_open(relid, ShareLock); else rel = table_open(relid, ShareLock); @@ -3935,7 +3935,7 @@ reindex_relation(Oid relid, int flags, int options) } reindex_index(indexOid, !(flags & REINDEX_REL_CHECK_CONSTRAINTS), - persistence, options); + persistence, params); CommandCounterIncrement(); @@ -3965,8 +3965,10 @@ reindex_relation(Oid relid, int flags, int options) * Note that this should fail if the toast relation is missing, so * reset REINDEXOPT_MISSING_OK. */ - result |= reindex_relation(toast_relid, flags, - options & ~(REINDEXOPT_MISSING_OK)); + ReindexParams newparams = *params; + + newparams.options &= ~(REINDEXOPT_MISSING_OK); + result |= reindex_relation(toast_relid, flags, &newparams); } return result; diff --git a/src/backend/catalog/indexing.c b/src/backend/catalog/indexing.c index 538f6a06b872d..284ceaa6b9c00 100644 --- a/src/backend/catalog/indexing.c +++ b/src/backend/catalog/indexing.c @@ -4,7 +4,7 @@ * This file contains routines to support indexes defined on system * catalogs. * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * @@ -162,6 +162,7 @@ CatalogIndexInsert(CatalogIndexState indstate, HeapTuple heapTuple) heapRelation, index->rd_index->indisunique ? UNIQUE_CHECK_YES : UNIQUE_CHECK_NO, + false, indexInfo); } diff --git a/src/backend/catalog/information_schema.sql b/src/backend/catalog/information_schema.sql index 5ab47e774316d..4907855043d87 100644 --- a/src/backend/catalog/information_schema.sql +++ b/src/backend/catalog/information_schema.sql @@ -2,7 +2,7 @@ * SQL Information Schema * as defined in ISO/IEC 9075-11:2016 * - * Copyright (c) 2003-2020, PostgreSQL Global Development Group + * Copyright (c) 2003-2021, PostgreSQL Global Development Group * * src/backend/catalog/information_schema.sql * diff --git a/src/backend/catalog/namespace.c b/src/backend/catalog/namespace.c index 740570c566dd3..005e029c38265 100644 --- a/src/backend/catalog/namespace.c +++ b/src/backend/catalog/namespace.c @@ -9,7 +9,7 @@ * and implementing search-path-controlled searches. * * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION diff --git a/src/backend/catalog/objectaccess.c b/src/backend/catalog/objectaccess.c index 17d7c56198a90..4aa445a077ef8 100644 --- a/src/backend/catalog/objectaccess.c +++ b/src/backend/catalog/objectaccess.c @@ -3,7 +3,7 @@ * objectaccess.c * functions for object_access_hook on various events * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * ------------------------------------------------------------------------- diff --git a/src/backend/catalog/objectaddress.c b/src/backend/catalog/objectaddress.c index a5eccdffd0cb9..6d88b690d87a8 100644 --- a/src/backend/catalog/objectaddress.c +++ b/src/backend/catalog/objectaddress.c @@ -3,7 +3,7 @@ * objectaddress.c * functions for working with ObjectAddresses * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * diff --git a/src/backend/catalog/partition.c b/src/backend/catalog/partition.c index 4dfac39adfe48..af7754d6ab76a 100644 --- a/src/backend/catalog/partition.c +++ b/src/backend/catalog/partition.c @@ -3,7 +3,7 @@ * partition.c * Partitioning related data structures and functions. * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * diff --git a/src/backend/catalog/pg_aggregate.c b/src/backend/catalog/pg_aggregate.c index 7664bb62859d1..89f23d0add8f8 100644 --- a/src/backend/catalog/pg_aggregate.c +++ b/src/backend/catalog/pg_aggregate.c @@ -3,7 +3,7 @@ * pg_aggregate.c * routines to support manipulation of the pg_aggregate relation * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * diff --git a/src/backend/catalog/pg_cast.c b/src/backend/catalog/pg_cast.c index d3f2db41863b9..b9cda3cf50436 100644 --- a/src/backend/catalog/pg_cast.c +++ b/src/backend/catalog/pg_cast.c @@ -3,7 +3,7 @@ * pg_cast.c * routines to support manipulation of the pg_cast relation * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * diff --git a/src/backend/catalog/pg_collation.c b/src/backend/catalog/pg_collation.c index 3c84378d0255d..40d98a334a6e9 100644 --- a/src/backend/catalog/pg_collation.c +++ b/src/backend/catalog/pg_collation.c @@ -3,7 +3,7 @@ * pg_collation.c * routines to support manipulation of the pg_collation relation * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * diff --git a/src/backend/catalog/pg_constraint.c b/src/backend/catalog/pg_constraint.c index 93774c9d21a59..0081558c48ad3 100644 --- a/src/backend/catalog/pg_constraint.c +++ b/src/backend/catalog/pg_constraint.c @@ -3,7 +3,7 @@ * pg_constraint.c * routines to support manipulation of the pg_constraint relation * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * diff --git a/src/backend/catalog/pg_conversion.c b/src/backend/catalog/pg_conversion.c index 28b676a1fa843..02ac7c4a88552 100644 --- a/src/backend/catalog/pg_conversion.c +++ b/src/backend/catalog/pg_conversion.c @@ -3,7 +3,7 @@ * pg_conversion.c * routines to support manipulation of the pg_conversion relation * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * diff --git a/src/backend/catalog/pg_db_role_setting.c b/src/backend/catalog/pg_db_role_setting.c index 33fc53af7f90c..0c2b02bb2d089 100644 --- a/src/backend/catalog/pg_db_role_setting.c +++ b/src/backend/catalog/pg_db_role_setting.c @@ -2,7 +2,7 @@ * pg_db_role_setting.c * Routines to support manipulation of the pg_db_role_setting relation * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION diff --git a/src/backend/catalog/pg_depend.c b/src/backend/catalog/pg_depend.c index 429791694f0fe..63da24322d911 100644 --- a/src/backend/catalog/pg_depend.c +++ b/src/backend/catalog/pg_depend.c @@ -3,7 +3,7 @@ * pg_depend.c * routines to support manipulation of the pg_depend relation * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * diff --git a/src/backend/catalog/pg_enum.c b/src/backend/catalog/pg_enum.c index f2e7bab62a28f..f958f1541dec4 100644 --- a/src/backend/catalog/pg_enum.c +++ b/src/backend/catalog/pg_enum.c @@ -3,7 +3,7 @@ * pg_enum.c * routines to support manipulation of the pg_enum relation * - * Copyright (c) 2006-2020, PostgreSQL Global Development Group + * Copyright (c) 2006-2021, PostgreSQL Global Development Group * * * IDENTIFICATION @@ -41,10 +41,11 @@ Oid binary_upgrade_next_pg_enum_oid = InvalidOid; * committed; otherwise, they might get into indexes where we can't clean * them up, and then if the transaction rolls back we have a broken index. * (See comments for check_safe_enum_use() in enum.c.) Values created by - * EnumValuesCreate are *not* blacklisted; we assume those are created during - * CREATE TYPE, so they can't go away unless the enum type itself does. + * EnumValuesCreate are *not* entered into the table; we assume those are + * created during CREATE TYPE, so they can't go away unless the enum type + * itself does. */ -static HTAB *enum_blacklist = NULL; +static HTAB *uncommitted_enums = NULL; static void RenumberEnumType(Relation pg_enum, HeapTuple *existing, int nelems); static int sort_order_cmp(const void *p1, const void *p2); @@ -181,20 +182,20 @@ EnumValuesDelete(Oid enumTypeOid) } /* - * Initialize the enum blacklist for this transaction. + * Initialize the uncommitted enum table for this transaction. */ static void -init_enum_blacklist(void) +init_uncommitted_enums(void) { HASHCTL hash_ctl; hash_ctl.keysize = sizeof(Oid); hash_ctl.entrysize = sizeof(Oid); hash_ctl.hcxt = TopTransactionContext; - enum_blacklist = hash_create("Enum value blacklist", - 32, - &hash_ctl, - HASH_ELEM | HASH_BLOBS | HASH_CONTEXT); + uncommitted_enums = hash_create("Uncommitted enums", + 32, + &hash_ctl, + HASH_ELEM | HASH_BLOBS | HASH_CONTEXT); } /* @@ -490,12 +491,12 @@ AddEnumLabel(Oid enumTypeOid, table_close(pg_enum, RowExclusiveLock); - /* Set up the blacklist hash if not already done in this transaction */ - if (enum_blacklist == NULL) - init_enum_blacklist(); + /* Set up the uncommitted enum table if not already done in this tx */ + if (uncommitted_enums == NULL) + init_uncommitted_enums(); - /* Add the new value to the blacklist */ - (void) hash_search(enum_blacklist, &newOid, HASH_ENTER, NULL); + /* Add the new value to the table */ + (void) hash_search(uncommitted_enums, &newOid, HASH_ENTER, NULL); } @@ -584,19 +585,19 @@ RenameEnumLabel(Oid enumTypeOid, /* - * Test if the given enum value is on the blacklist + * Test if the given enum value is in the table of uncommitted enums. */ bool -EnumBlacklisted(Oid enum_id) +EnumUncommitted(Oid enum_id) { bool found; - /* If we've made no blacklist table, all values are safe */ - if (enum_blacklist == NULL) + /* If we've made no uncommitted table, all values are safe */ + if (uncommitted_enums == NULL) return false; /* Else, is it in the table? */ - (void) hash_search(enum_blacklist, &enum_id, HASH_FIND, &found); + (void) hash_search(uncommitted_enums, &enum_id, HASH_FIND, &found); return found; } @@ -608,11 +609,11 @@ void AtEOXact_Enum(void) { /* - * Reset the blacklist table, as all our enum values are now committed. + * Reset the uncommitted table, as all our enum values are now committed. * The memory will go away automatically when TopTransactionContext is * freed; it's sufficient to clear our pointer. */ - enum_blacklist = NULL; + uncommitted_enums = NULL; } @@ -691,12 +692,12 @@ sort_order_cmp(const void *p1, const void *p2) } Size -EstimateEnumBlacklistSpace(void) +EstimateUncommittedEnumsSpace(void) { size_t entries; - if (enum_blacklist) - entries = hash_get_num_entries(enum_blacklist); + if (uncommitted_enums) + entries = hash_get_num_entries(uncommitted_enums); else entries = 0; @@ -705,7 +706,7 @@ EstimateEnumBlacklistSpace(void) } void -SerializeEnumBlacklist(void *space, Size size) +SerializeUncommittedEnums(void *space, Size size) { Oid *serialized = (Oid *) space; @@ -713,15 +714,15 @@ SerializeEnumBlacklist(void *space, Size size) * Make sure the hash table hasn't changed in size since the caller * reserved the space. */ - Assert(size == EstimateEnumBlacklistSpace()); + Assert(size == EstimateUncommittedEnumsSpace()); /* Write out all the values from the hash table, if there is one. */ - if (enum_blacklist) + if (uncommitted_enums) { HASH_SEQ_STATUS status; Oid *value; - hash_seq_init(&status, enum_blacklist); + hash_seq_init(&status, uncommitted_enums); while ((value = (Oid *) hash_seq_search(&status))) *serialized++ = *value; } @@ -737,11 +738,11 @@ SerializeEnumBlacklist(void *space, Size size) } void -RestoreEnumBlacklist(void *space) +RestoreUncommittedEnums(void *space) { Oid *serialized = (Oid *) space; - Assert(!enum_blacklist); + Assert(!uncommitted_enums); /* * As a special case, if the list is empty then don't even bother to @@ -752,9 +753,9 @@ RestoreEnumBlacklist(void *space) return; /* Read all the values into a new hash table. */ - init_enum_blacklist(); + init_uncommitted_enums(); do { - hash_search(enum_blacklist, serialized++, HASH_ENTER, NULL); + hash_search(uncommitted_enums, serialized++, HASH_ENTER, NULL); } while (OidIsValid(*serialized)); } diff --git a/src/backend/catalog/pg_inherits.c b/src/backend/catalog/pg_inherits.c index 5c3c78a0e6c80..f3783961b7a11 100644 --- a/src/backend/catalog/pg_inherits.c +++ b/src/backend/catalog/pg_inherits.c @@ -8,7 +8,7 @@ * Perhaps someday that code should be moved here, but it'd have to be * disentangled from other stuff such as pg_depend updates. * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * diff --git a/src/backend/catalog/pg_largeobject.c b/src/backend/catalog/pg_largeobject.c index ae9365e3a033c..047bc6883cdde 100644 --- a/src/backend/catalog/pg_largeobject.c +++ b/src/backend/catalog/pg_largeobject.c @@ -3,7 +3,7 @@ * pg_largeobject.c * routines to support manipulation of the pg_largeobject relation * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * diff --git a/src/backend/catalog/pg_namespace.c b/src/backend/catalog/pg_namespace.c index 7d2e26fd359b3..e66e090d4f30b 100644 --- a/src/backend/catalog/pg_namespace.c +++ b/src/backend/catalog/pg_namespace.c @@ -3,7 +3,7 @@ * pg_namespace.c * routines to support manipulation of the pg_namespace relation * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * diff --git a/src/backend/catalog/pg_operator.c b/src/backend/catalog/pg_operator.c index 904cb8ef820c1..6c270f9338957 100644 --- a/src/backend/catalog/pg_operator.c +++ b/src/backend/catalog/pg_operator.c @@ -3,7 +3,7 @@ * pg_operator.c * routines to support manipulation of the pg_operator relation * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * diff --git a/src/backend/catalog/pg_proc.c b/src/backend/catalog/pg_proc.c index 1dd9ecc0634aa..e14eee5a19e14 100644 --- a/src/backend/catalog/pg_proc.c +++ b/src/backend/catalog/pg_proc.c @@ -3,7 +3,7 @@ * pg_proc.c * routines to support manipulation of the pg_proc relation * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * diff --git a/src/backend/catalog/pg_publication.c b/src/backend/catalog/pg_publication.c index 09946be788de3..84d2efcfd2f16 100644 --- a/src/backend/catalog/pg_publication.c +++ b/src/backend/catalog/pg_publication.c @@ -3,7 +3,7 @@ * pg_publication.c * publication C API manipulation * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION @@ -67,7 +67,7 @@ check_publication_add_relation(Relation targetrel) errdetail("System tables cannot be added to publications."))); /* UNLOGGED and TEMP relations cannot be part of publication. */ - if (!RelationNeedsWAL(targetrel)) + if (targetrel->rd_rel->relpersistence != RELPERSISTENCE_PERMANENT) ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), errmsg("table \"%s\" cannot be replicated", diff --git a/src/backend/catalog/pg_range.c b/src/backend/catalog/pg_range.c index 91b0fb0611a78..839b65eb797d7 100644 --- a/src/backend/catalog/pg_range.c +++ b/src/backend/catalog/pg_range.c @@ -3,7 +3,7 @@ * pg_range.c * routines to support manipulation of the pg_range relation * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * diff --git a/src/backend/catalog/pg_shdepend.c b/src/backend/catalog/pg_shdepend.c index 3dd7afd343391..90b7a5de29962 100644 --- a/src/backend/catalog/pg_shdepend.c +++ b/src/backend/catalog/pg_shdepend.c @@ -3,7 +3,7 @@ * pg_shdepend.c * routines to support manipulation of the pg_shdepend relation * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * @@ -59,6 +59,7 @@ #include "commands/schemacmds.h" #include "commands/subscriptioncmds.h" #include "commands/tablecmds.h" +#include "commands/tablespace.h" #include "commands/typecmds.h" #include "miscadmin.h" #include "storage/lmgr.h" @@ -186,11 +187,14 @@ recordDependencyOnOwner(Oid classId, Oid objectId, Oid owner) * * There must be no more than one existing entry for the given dependent * object and dependency type! So in practice this can only be used for - * updating SHARED_DEPENDENCY_OWNER entries, which should have that property. + * updating SHARED_DEPENDENCY_OWNER and SHARED_DEPENDENCY_TABLESPACE + * entries, which should have that property. * * If there is no previous entry, we assume it was referencing a PINned * object, so we create a new entry. If the new referenced object is * PINned, we don't create an entry (and drop the old one, if any). + * (For tablespaces, we don't record dependencies in certain cases, so + * there are other possible reasons for entries to be missing.) * * sdepRel must be the pg_shdepend relation, already opened and suitably * locked. @@ -344,6 +348,58 @@ changeDependencyOnOwner(Oid classId, Oid objectId, Oid newOwnerId) table_close(sdepRel, RowExclusiveLock); } +/* + * recordDependencyOnTablespace + * + * A convenient wrapper of recordSharedDependencyOn -- register the specified + * tablespace as default for the given object. + * + * Note: it's the caller's responsibility to ensure that there isn't a + * tablespace entry for the object already. + */ +void +recordDependencyOnTablespace(Oid classId, Oid objectId, Oid tablespace) +{ + ObjectAddress myself, + referenced; + + ObjectAddressSet(myself, classId, objectId); + ObjectAddressSet(referenced, TableSpaceRelationId, tablespace); + + recordSharedDependencyOn(&myself, &referenced, + SHARED_DEPENDENCY_TABLESPACE); +} + +/* + * changeDependencyOnTablespace + * + * Update the shared dependencies to account for the new tablespace. + * + * Note: we don't need an objsubid argument because only whole objects + * have tablespaces. + */ +void +changeDependencyOnTablespace(Oid classId, Oid objectId, Oid newTablespaceId) +{ + Relation sdepRel; + + sdepRel = table_open(SharedDependRelationId, RowExclusiveLock); + + if (newTablespaceId != DEFAULTTABLESPACE_OID && + newTablespaceId != InvalidOid) + shdepChangeDep(sdepRel, + classId, objectId, 0, + TableSpaceRelationId, newTablespaceId, + SHARED_DEPENDENCY_TABLESPACE); + else + shdepDropDependency(sdepRel, + classId, objectId, 0, true, + InvalidOid, InvalidOid, + SHARED_DEPENDENCY_INVALID); + + table_close(sdepRel, RowExclusiveLock); +} + /* * getOidListDiff * Helper for updateAclDependencies. @@ -1121,13 +1177,6 @@ shdepLockAndCheckObject(Oid classId, Oid objectId) objectId))); break; - /* - * Currently, this routine need not support any other shared - * object types besides roles. If we wanted to record explicit - * dependencies on databases or tablespaces, we'd need code along - * these lines: - */ -#ifdef NOT_USED case TableSpaceRelationId: { /* For lack of a syscache on pg_tablespace, do this: */ @@ -1141,7 +1190,6 @@ shdepLockAndCheckObject(Oid classId, Oid objectId) pfree(tablespace); break; } -#endif case DatabaseRelationId: { @@ -1201,6 +1249,8 @@ storeObjectDescription(StringInfo descs, appendStringInfo(descs, _("privileges for %s"), objdesc); else if (deptype == SHARED_DEPENDENCY_POLICY) appendStringInfo(descs, _("target of %s"), objdesc); + else if (deptype == SHARED_DEPENDENCY_TABLESPACE) + appendStringInfo(descs, _("tablespace for %s"), objdesc); else elog(ERROR, "unrecognized dependency type: %d", (int) deptype); diff --git a/src/backend/catalog/pg_subscription.c b/src/backend/catalog/pg_subscription.c index ca78d395181a9..44cb285b68650 100644 --- a/src/backend/catalog/pg_subscription.c +++ b/src/backend/catalog/pg_subscription.c @@ -3,7 +3,7 @@ * pg_subscription.c * replication subscriptions * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION diff --git a/src/backend/catalog/pg_type.c b/src/backend/catalog/pg_type.c index 2aa686a640fbd..ec5d1224323cf 100644 --- a/src/backend/catalog/pg_type.c +++ b/src/backend/catalog/pg_type.c @@ -3,7 +3,7 @@ * pg_type.c * routines to support manipulation of the pg_type relation * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * diff --git a/src/backend/catalog/sql_features.txt b/src/backend/catalog/sql_features.txt index caa971c4356ac..86519ad2974a0 100644 --- a/src/backend/catalog/sql_features.txt +++ b/src/backend/catalog/sql_features.txt @@ -475,7 +475,7 @@ T324 Explicit security for SQL routines NO T325 Qualified SQL parameter references YES T326 Table functions NO T331 Basic roles YES -T332 Extended roles NO mostly supported +T332 Extended roles YES T341 Overloading of SQL-invoked functions and procedures YES T351 Bracketed SQL comments (/*...*/ comments) YES T431 Extended grouping capabilities YES diff --git a/src/backend/catalog/storage.c b/src/backend/catalog/storage.c index d538f25726f45..cba7a9ada07e1 100644 --- a/src/backend/catalog/storage.c +++ b/src/backend/catalog/storage.c @@ -3,7 +3,7 @@ * storage.c * code to create and destroy physical storage for relations * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * diff --git a/src/backend/catalog/system_views.sql b/src/backend/catalog/system_views.sql index b140c210bc799..fa58afd9d7814 100644 --- a/src/backend/catalog/system_views.sql +++ b/src/backend/catalog/system_views.sql @@ -1,7 +1,7 @@ /* * PostgreSQL System Views * - * Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Copyright (c) 1996-2021, PostgreSQL Global Development Group * * src/backend/catalog/system_views.sql * @@ -924,6 +924,13 @@ CREATE VIEW pg_stat_database AS pg_stat_get_db_checksum_last_failure(D.oid) AS checksum_last_failure, pg_stat_get_db_blk_read_time(D.oid) AS blk_read_time, pg_stat_get_db_blk_write_time(D.oid) AS blk_write_time, + pg_stat_get_db_session_time(D.oid) AS session_time, + pg_stat_get_db_active_time(D.oid) AS active_time, + pg_stat_get_db_idle_in_transaction_time(D.oid) AS idle_in_transaction_time, + pg_stat_get_db_sessions(D.oid) AS sessions, + pg_stat_get_db_sessions_abandoned(D.oid) AS sessions_abandoned, + pg_stat_get_db_sessions_fatal(D.oid) AS sessions_fatal, + pg_stat_get_db_sessions_killed(D.oid) AS sessions_killed, pg_stat_get_db_stat_reset_time(D.oid) AS stats_reset FROM ( SELECT 0 AS oid, NULL::name AS datname @@ -1117,6 +1124,17 @@ CREATE VIEW pg_stat_progress_basebackup AS S.param5 AS tablespaces_streamed FROM pg_stat_get_progress_info('BASEBACKUP') AS S; + +CREATE VIEW pg_stat_progress_copy AS + SELECT + S.pid AS pid, S.datid AS datid, D.datname AS datname, + S.relid AS relid, + S.param1 AS bytes_processed, + S.param2 AS bytes_total, + S.param3 AS lines_processed + FROM pg_stat_get_progress_info('COPY') AS S + LEFT JOIN pg_database D ON S.datid = D.oid; + CREATE VIEW pg_user_mappings AS SELECT U.oid AS umid, diff --git a/src/backend/catalog/toasting.c b/src/backend/catalog/toasting.c index f1850436bd745..d7b806020dd28 100644 --- a/src/backend/catalog/toasting.c +++ b/src/backend/catalog/toasting.c @@ -4,7 +4,7 @@ * This file contains routines to support creation of toast tables * * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION diff --git a/src/backend/commands/aggregatecmds.c b/src/backend/commands/aggregatecmds.c index 6892204a9aaa6..69c50ac087738 100644 --- a/src/backend/commands/aggregatecmds.c +++ b/src/backend/commands/aggregatecmds.c @@ -4,7 +4,7 @@ * * Routines for aggregate-manipulation commands * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * diff --git a/src/backend/commands/alter.c b/src/backend/commands/alter.c index b11ebf0f618bb..29249498a9182 100644 --- a/src/backend/commands/alter.c +++ b/src/backend/commands/alter.c @@ -3,7 +3,7 @@ * alter.c * Drivers for generic alter commands * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * diff --git a/src/backend/commands/amcmds.c b/src/backend/commands/amcmds.c index 6f05ee715b480..eff9535ed0eae 100644 --- a/src/backend/commands/amcmds.c +++ b/src/backend/commands/amcmds.c @@ -3,7 +3,7 @@ * amcmds.c * Routines for SQL commands that manipulate access methods. * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * diff --git a/src/backend/commands/analyze.c b/src/backend/commands/analyze.c index 8af12b5c6b2b3..7295cf02156e9 100644 --- a/src/backend/commands/analyze.c +++ b/src/backend/commands/analyze.c @@ -3,7 +3,7 @@ * analyze.c * the Postgres statistics generator * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * diff --git a/src/backend/commands/async.c b/src/backend/commands/async.c index e04afd9963f17..42b232d98b179 100644 --- a/src/backend/commands/async.c +++ b/src/backend/commands/async.c @@ -3,7 +3,7 @@ * async.c * Asynchronous notification: NOTIFY, LISTEN, UNLISTEN * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION @@ -490,7 +490,12 @@ asyncQueuePageDiff(int p, int q) return diff; } -/* Is p < q, accounting for wraparound? */ +/* + * Is p < q, accounting for wraparound? + * + * Since asyncQueueIsFull() blocks creation of a page that could precede any + * extant page, we need not assess entries within a page. + */ static bool asyncQueuePagePrecedes(int p, int q) { @@ -1352,8 +1357,8 @@ asyncQueueIsFull(void) * logically precedes the current global tail pointer, ie, the head * pointer would wrap around compared to the tail. We cannot create such * a head page for fear of confusing slru.c. For safety we round the tail - * pointer back to a segment boundary (compare the truncation logic in - * asyncQueueAdvanceTail). + * pointer back to a segment boundary (truncation logic in + * asyncQueueAdvanceTail does not do this, so doing it here is optional). * * Note that this test is *not* dependent on how much space there is on * the current head page. This is necessary because asyncQueueAddEntries diff --git a/src/backend/commands/cluster.c b/src/backend/commands/cluster.c index fd5a6eec86261..096a06f7b3b8d 100644 --- a/src/backend/commands/cluster.c +++ b/src/backend/commands/cluster.c @@ -6,7 +6,7 @@ * There is hardly anything left of Paul Brown's original implementation... * * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994-5, Regents of the University of California * * @@ -103,7 +103,7 @@ void cluster(ParseState *pstate, ClusterStmt *stmt, bool isTopLevel) { ListCell *lc; - int options = 0; + ClusterParams params = {0}; bool verbose = false; /* Parse option list */ @@ -121,7 +121,7 @@ cluster(ParseState *pstate, ClusterStmt *stmt, bool isTopLevel) parser_errposition(pstate, opt->location))); } - options = (verbose ? CLUOPT_VERBOSE : 0); + params.options = (verbose ? CLUOPT_VERBOSE : 0); if (stmt->relation != NULL) { @@ -192,7 +192,7 @@ cluster(ParseState *pstate, ClusterStmt *stmt, bool isTopLevel) table_close(rel, NoLock); /* Do the job. */ - cluster_rel(tableOid, indexOid, options); + cluster_rel(tableOid, indexOid, ¶ms); } else { @@ -234,14 +234,16 @@ cluster(ParseState *pstate, ClusterStmt *stmt, bool isTopLevel) foreach(rv, rvs) { RelToCluster *rvtc = (RelToCluster *) lfirst(rv); + ClusterParams cluster_params = params; /* Start a new transaction for each relation. */ StartTransactionCommand(); /* functions in indexes may want a snapshot set */ PushActiveSnapshot(GetTransactionSnapshot()); /* Do the job. */ + cluster_params.options |= CLUOPT_RECHECK; cluster_rel(rvtc->tableOid, rvtc->indexOid, - options | CLUOPT_RECHECK); + &cluster_params); PopActiveSnapshot(); CommitTransactionCommand(); } @@ -272,11 +274,11 @@ cluster(ParseState *pstate, ClusterStmt *stmt, bool isTopLevel) * and error messages should refer to the operation as VACUUM not CLUSTER. */ void -cluster_rel(Oid tableOid, Oid indexOid, int options) +cluster_rel(Oid tableOid, Oid indexOid, ClusterParams *params) { Relation OldHeap; - bool verbose = ((options & CLUOPT_VERBOSE) != 0); - bool recheck = ((options & CLUOPT_RECHECK) != 0); + bool verbose = ((params->options & CLUOPT_VERBOSE) != 0); + bool recheck = ((params->options & CLUOPT_RECHECK) != 0); /* Check for user-requested abort. */ CHECK_FOR_INTERRUPTS(); @@ -1355,6 +1357,7 @@ finish_heap_swap(Oid OIDOldHeap, Oid OIDNewHeap, ObjectAddress object; Oid mapped_tables[4]; int reindex_flags; + ReindexParams reindex_params = {0}; int i; /* Report that we are now swapping relation files */ @@ -1412,7 +1415,7 @@ finish_heap_swap(Oid OIDOldHeap, Oid OIDNewHeap, pgstat_progress_update_param(PROGRESS_CLUSTER_PHASE, PROGRESS_CLUSTER_PHASE_REBUILD_INDEX); - reindex_relation(OIDOldHeap, reindex_flags, 0); + reindex_relation(OIDOldHeap, reindex_flags, &reindex_params); /* Report that we are now doing clean up */ pgstat_progress_update_param(PROGRESS_CLUSTER_PHASE, diff --git a/src/backend/commands/collationcmds.c b/src/backend/commands/collationcmds.c index d62c8defbabc9..9634ae6809dab 100644 --- a/src/backend/commands/collationcmds.c +++ b/src/backend/commands/collationcmds.c @@ -3,7 +3,7 @@ * collationcmds.c * collation-related commands support code * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * diff --git a/src/backend/commands/comment.c b/src/backend/commands/comment.c index 0ff9ca9f2c8c6..216b8d3068825 100644 --- a/src/backend/commands/comment.c +++ b/src/backend/commands/comment.c @@ -4,7 +4,7 @@ * * PostgreSQL object comments utility code. * - * Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Copyright (c) 1996-2021, PostgreSQL Global Development Group * * IDENTIFICATION * src/backend/commands/comment.c diff --git a/src/backend/commands/constraint.c b/src/backend/commands/constraint.c index fc19307bf2fb3..d0063164a7e4e 100644 --- a/src/backend/commands/constraint.c +++ b/src/backend/commands/constraint.c @@ -3,7 +3,7 @@ * constraint.c * PostgreSQL CONSTRAINT support code. * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION @@ -175,7 +175,7 @@ unique_key_recheck(PG_FUNCTION_ARGS) */ index_insert(indexRel, values, isnull, &checktid, trigdata->tg_relation, UNIQUE_CHECK_EXISTING, - indexInfo); + false, indexInfo); } else { diff --git a/src/backend/commands/conversioncmds.c b/src/backend/commands/conversioncmds.c index 0ee3b6d19a398..f7ff321de71a0 100644 --- a/src/backend/commands/conversioncmds.c +++ b/src/backend/commands/conversioncmds.c @@ -3,7 +3,7 @@ * conversioncmds.c * conversion creation command support code * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * diff --git a/src/backend/commands/copy.c b/src/backend/commands/copy.c index b6143b8bf2178..8c712c8737fa9 100644 --- a/src/backend/commands/copy.c +++ b/src/backend/commands/copy.c @@ -3,7 +3,7 @@ * copy.c * Implements the COPY utility command * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * diff --git a/src/backend/commands/copyfrom.c b/src/backend/commands/copyfrom.c index 1b14e9a6eb034..c39cc736ed2bc 100644 --- a/src/backend/commands/copyfrom.c +++ b/src/backend/commands/copyfrom.c @@ -3,7 +3,7 @@ * copyfrom.c * COPY FROM file/program/client * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * @@ -25,6 +25,7 @@ #include "access/xlog.h" #include "commands/copy.h" #include "commands/copyfrom_internal.h" +#include "commands/progress.h" #include "commands/trigger.h" #include "executor/execPartition.h" #include "executor/executor.h" @@ -35,6 +36,7 @@ #include "libpq/pqformat.h" #include "miscadmin.h" #include "optimizer/optimizer.h" +#include "pgstat.h" #include "rewrite/rewriteHandler.h" #include "storage/fd.h" #include "tcop/tcopprot.h" @@ -340,8 +342,8 @@ CopyMultiInsertBufferFlush(CopyMultiInsertInfo *miinfo, cstate->cur_lineno = buffer->linenos[i]; recheckIndexes = ExecInsertIndexTuples(resultRelInfo, - buffer->slots[i], estate, false, NULL, - NIL); + buffer->slots[i], estate, false, false, + NULL, NIL); ExecARInsertTriggers(estate, resultRelInfo, slots[i], recheckIndexes, cstate->transition_capture); @@ -1085,6 +1087,7 @@ CopyFrom(CopyFromState cstate) myslot, estate, false, + false, NULL, NIL); } @@ -1100,9 +1103,10 @@ CopyFrom(CopyFromState cstate) /* * We count only tuples not suppressed by a BEFORE INSERT trigger * or FDW; this is the same definition used by nodeModifyTable.c - * for counting tuples inserted by an INSERT command. + * for counting tuples inserted by an INSERT command. Update + * progress of the COPY command as well. */ - processed++; + pgstat_progress_update_param(PROGRESS_COPY_LINES_PROCESSED, ++processed); } } @@ -1415,6 +1419,12 @@ BeginCopyFrom(ParseState *pstate, } } + + /* initialize progress */ + pgstat_progress_start_command(PROGRESS_COMMAND_COPY, + cstate->rel ? RelationGetRelid(cstate->rel) : InvalidOid); + cstate->bytes_processed = 0; + /* We keep those variables in cstate. */ cstate->in_functions = in_functions; cstate->typioparams = typioparams; @@ -1479,6 +1489,8 @@ BeginCopyFrom(ParseState *pstate, ereport(ERROR, (errcode(ERRCODE_WRONG_OBJECT_TYPE), errmsg("\"%s\" is a directory", cstate->filename))); + + pgstat_progress_update_param(PROGRESS_COPY_BYTES_TOTAL, st.st_size); } } @@ -1522,6 +1534,8 @@ EndCopyFrom(CopyFromState cstate) cstate->filename))); } + pgstat_progress_end_command(); + MemoryContextDelete(cstate->copycontext); pfree(cstate); } diff --git a/src/backend/commands/copyfromparse.c b/src/backend/commands/copyfromparse.c index 34ed3cfcd5b43..4c74067f849cc 100644 --- a/src/backend/commands/copyfromparse.c +++ b/src/backend/commands/copyfromparse.c @@ -3,7 +3,7 @@ * copyfromparse.c * Parse CSV/text/binary format for COPY FROM. * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * @@ -20,11 +20,13 @@ #include "commands/copy.h" #include "commands/copyfrom_internal.h" +#include "commands/progress.h" #include "executor/executor.h" #include "libpq/libpq.h" #include "libpq/pqformat.h" #include "mb/pg_wchar.h" #include "miscadmin.h" +#include "pgstat.h" #include "port/pg_bswap.h" #include "utils/memutils.h" #include "utils/rel.h" @@ -384,6 +386,8 @@ CopyLoadRawBuf(CopyFromState cstate) cstate->raw_buf[nbytes] = '\0'; cstate->raw_buf_index = 0; cstate->raw_buf_len = nbytes; + cstate->bytes_processed += nbytes; + pgstat_progress_update_param(PROGRESS_COPY_BYTES_PROCESSED, cstate->bytes_processed); return (inbytes > 0); } diff --git a/src/backend/commands/copyto.c b/src/backend/commands/copyto.c index c7e5f04446310..e04ec1e331b4b 100644 --- a/src/backend/commands/copyto.c +++ b/src/backend/commands/copyto.c @@ -3,7 +3,7 @@ * copyto.c * COPY
TO file/program/client * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * @@ -24,6 +24,7 @@ #include "access/xact.h" #include "access/xlog.h" #include "commands/copy.h" +#include "commands/progress.h" #include "executor/execdesc.h" #include "executor/executor.h" #include "executor/tuptable.h" @@ -32,6 +33,7 @@ #include "mb/pg_wchar.h" #include "miscadmin.h" #include "optimizer/optimizer.h" +#include "pgstat.h" #include "rewrite/rewriteHandler.h" #include "storage/fd.h" #include "tcop/tcopprot.h" @@ -95,6 +97,7 @@ typedef struct CopyToStateData FmgrInfo *out_functions; /* lookup info for output functions */ MemoryContext rowcontext; /* per-row evaluation context */ + uint64 bytes_processed; /* number of bytes processed so far */ } CopyToStateData; @@ -288,6 +291,10 @@ CopySendEndOfRow(CopyToState cstate) break; } + /* Update the progress */ + cstate->bytes_processed += fe_msgbuf->len; + pgstat_progress_update_param(PROGRESS_COPY_BYTES_PROCESSED, cstate->bytes_processed); + resetStringInfo(fe_msgbuf); } @@ -363,6 +370,8 @@ EndCopy(CopyToState cstate) cstate->filename))); } + pgstat_progress_end_command(); + MemoryContextDelete(cstate->copycontext); pfree(cstate); } @@ -760,6 +769,11 @@ BeginCopyTo(ParseState *pstate, } } + /* initialize progress */ + pgstat_progress_start_command(PROGRESS_COMMAND_COPY, + cstate->rel ? RelationGetRelid(cstate->rel) : InvalidOid); + cstate->bytes_processed = 0; + MemoryContextSwitchTo(oldcontext); return cstate; @@ -938,7 +952,9 @@ CopyTo(CopyToState cstate) /* Format and send the data */ CopyOneRowTo(cstate, slot); - processed++; + + /* Increment amount of processed tuples and update the progress */ + pgstat_progress_update_param(PROGRESS_COPY_LINES_PROCESSED, ++processed); } ExecDropSingleTupleTableSlot(slot); @@ -1303,7 +1319,9 @@ copy_dest_receive(TupleTableSlot *slot, DestReceiver *self) /* Send the data */ CopyOneRowTo(cstate, slot); - myState->processed++; + + /* Increment amount of processed tuples and update the progress */ + pgstat_progress_update_param(PROGRESS_COPY_LINES_PROCESSED, ++myState->processed); return true; } diff --git a/src/backend/commands/createas.c b/src/backend/commands/createas.c index 6bf6c5a3106ff..dce882012e6ff 100644 --- a/src/backend/commands/createas.c +++ b/src/backend/commands/createas.c @@ -13,7 +13,7 @@ * we must return a tuples-processed count in the QueryCompletion. (We no * longer do that for CTAS ... WITH NO DATA, however.) * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * @@ -239,21 +239,9 @@ ExecCreateTableAs(ParseState *pstate, CreateTableAsStmt *stmt, PlannedStmt *plan; QueryDesc *queryDesc; - if (stmt->if_not_exists) - { - Oid nspid; - - nspid = RangeVarGetCreationNamespace(stmt->into->rel); - - if (get_relname_relid(stmt->into->rel->relname, nspid)) - { - ereport(NOTICE, - (errcode(ERRCODE_DUPLICATE_TABLE), - errmsg("relation \"%s\" already exists, skipping", - stmt->into->rel->relname))); - return InvalidObjectAddress; - } - } + /* Check if the relation exists or not */ + if (CreateTableAsRelExists(stmt)) + return InvalidObjectAddress; /* * Create the tuple receiver object and insert info it will need @@ -400,6 +388,41 @@ GetIntoRelEFlags(IntoClause *intoClause) return flags; } +/* + * CreateTableAsRelExists --- check existence of relation for CreateTableAsStmt + * + * Utility wrapper checking if the relation pending for creation in this + * CreateTableAsStmt query already exists or not. Returns true if the + * relation exists, otherwise false. + */ +bool +CreateTableAsRelExists(CreateTableAsStmt *ctas) +{ + Oid nspid; + IntoClause *into = ctas->into; + + nspid = RangeVarGetCreationNamespace(into->rel); + + if (get_relname_relid(into->rel->relname, nspid)) + { + if (!ctas->if_not_exists) + ereport(ERROR, + (errcode(ERRCODE_DUPLICATE_TABLE), + errmsg("relation \"%s\" already exists", + into->rel->relname))); + + /* The relation exists and IF NOT EXISTS has been specified */ + ereport(NOTICE, + (errcode(ERRCODE_DUPLICATE_TABLE), + errmsg("relation \"%s\" already exists, skipping", + into->rel->relname))); + return true; + } + + /* Relation does not exist, it can be created */ + return false; +} + /* * CreateIntoRelDestReceiver -- create a suitable DestReceiver object * diff --git a/src/backend/commands/dbcommands.c b/src/backend/commands/dbcommands.c index f27c3fe8c1ca2..2b159b60ebb33 100644 --- a/src/backend/commands/dbcommands.c +++ b/src/backend/commands/dbcommands.c @@ -8,7 +8,7 @@ * stepping on each others' toes. Formerly we used table-level locks * on pg_database, but that's too coarse-grained. * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * diff --git a/src/backend/commands/define.c b/src/backend/commands/define.c index 3a2aff79c2847..84487b7d4b426 100644 --- a/src/backend/commands/define.c +++ b/src/backend/commands/define.c @@ -4,7 +4,7 @@ * Support routines for various kinds of object creation. * * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * diff --git a/src/backend/commands/discard.c b/src/backend/commands/discard.c index 90dfc58c24d6c..57d3d7dd9b3d7 100644 --- a/src/backend/commands/discard.c +++ b/src/backend/commands/discard.c @@ -3,7 +3,7 @@ * discard.c * The implementation of the DISCARD command * - * Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Copyright (c) 1996-2021, PostgreSQL Global Development Group * * * IDENTIFICATION diff --git a/src/backend/commands/dropcmds.c b/src/backend/commands/dropcmds.c index 81f03801086d9..97e5e9a765ca5 100644 --- a/src/backend/commands/dropcmds.c +++ b/src/backend/commands/dropcmds.c @@ -3,7 +3,7 @@ * dropcmds.c * handle various "DROP" operations * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * diff --git a/src/backend/commands/event_trigger.c b/src/backend/commands/event_trigger.c index 3ffba4e63ec27..5bde507c7526a 100644 --- a/src/backend/commands/event_trigger.c +++ b/src/backend/commands/event_trigger.c @@ -3,7 +3,7 @@ * event_trigger.c * PostgreSQL EVENT TRIGGER support code. * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION diff --git a/src/backend/commands/explain.c b/src/backend/commands/explain.c index 43f9b01e833b1..5d7eb3574c823 100644 --- a/src/backend/commands/explain.c +++ b/src/backend/commands/explain.c @@ -3,7 +3,7 @@ * explain.c * Explain query execution plans * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994-5, Regents of the University of California * * IDENTIFICATION @@ -435,6 +435,22 @@ ExplainOneUtility(Node *utilityStmt, IntoClause *into, ExplainState *es, CreateTableAsStmt *ctas = (CreateTableAsStmt *) utilityStmt; List *rewritten; + /* + * Check if the relation exists or not. This is done at this stage to + * avoid query planning or execution. + */ + if (CreateTableAsRelExists(ctas)) + { + if (ctas->objtype == OBJECT_TABLE) + ExplainDummyGroup("CREATE TABLE AS", NULL, es); + else if (ctas->objtype == OBJECT_MATVIEW) + ExplainDummyGroup("CREATE MATERIALIZED VIEW", NULL, es); + else + elog(ERROR, "unexpected object type: %d", + (int) ctas->objtype); + return; + } + rewritten = QueryRewrite(castNode(Query, copyObject(ctas->query))); Assert(list_length(rewritten) == 1); ExplainOneQuery(linitial_node(Query, rewritten), diff --git a/src/backend/commands/extension.c b/src/backend/commands/extension.c index b5630b4c8d981..19db329fe6f08 100644 --- a/src/backend/commands/extension.c +++ b/src/backend/commands/extension.c @@ -12,7 +12,7 @@ * postgresql.conf. An extension also has an installation script file, * containing SQL commands to create the extension's objects. * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * @@ -682,7 +682,7 @@ read_extension_script_file(const ExtensionControlFile *control, src_encoding = control->encoding; /* make sure that source string is valid in the expected encoding */ - pg_verify_mbstr_len(src_encoding, src_str, len, false); + (void) pg_verify_mbstr(src_encoding, src_str, len, false); /* * Convert the encoding to the database encoding. read_whole_file diff --git a/src/backend/commands/foreigncmds.c b/src/backend/commands/foreigncmds.c index de31ddd1f38cd..eb7103fd3b11f 100644 --- a/src/backend/commands/foreigncmds.c +++ b/src/backend/commands/foreigncmds.c @@ -3,7 +3,7 @@ * foreigncmds.c * foreign-data wrapper/server creation/manipulation commands * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * * * IDENTIFICATION diff --git a/src/backend/commands/functioncmds.c b/src/backend/commands/functioncmds.c index c3ce480c8f568..7a4e104623bef 100644 --- a/src/backend/commands/functioncmds.c +++ b/src/backend/commands/functioncmds.c @@ -5,7 +5,7 @@ * Routines for CREATE and DROP FUNCTION commands and CREATE and DROP * CAST commands. * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * @@ -1628,6 +1628,7 @@ CreateCast(CreateCastStmt *stmt) case COERCION_ASSIGNMENT: castcontext = COERCION_CODE_ASSIGNMENT; break; + /* COERCION_PLPGSQL is intentionally not covered here */ case COERCION_EXPLICIT: castcontext = COERCION_CODE_EXPLICIT; break; diff --git a/src/backend/commands/indexcmds.c b/src/backend/commands/indexcmds.c index 14d24b3cc4034..f9f3ff3b629b3 100644 --- a/src/backend/commands/indexcmds.c +++ b/src/backend/commands/indexcmds.c @@ -3,7 +3,7 @@ * indexcmds.c * POSTGRES define and remove index code. * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * @@ -86,12 +86,21 @@ static char *ChooseIndexName(const char *tabname, Oid namespaceId, bool primary, bool isconstraint); static char *ChooseIndexNameAddition(List *colnames); static List *ChooseIndexColumnNames(List *indexElems); +static void ReindexIndex(RangeVar *indexRelation, ReindexParams *params, + bool isTopLevel); static void RangeVarCallbackForReindexIndex(const RangeVar *relation, Oid relId, Oid oldRelId, void *arg); +static Oid ReindexTable(RangeVar *relation, ReindexParams *params, + bool isTopLevel); +static void ReindexMultipleTables(const char *objectName, + ReindexObjectType objectKind, ReindexParams *params); static void reindex_error_callback(void *args); -static void ReindexPartitions(Oid relid, int options, bool isTopLevel); -static void ReindexMultipleInternal(List *relids, int options); -static bool ReindexRelationConcurrently(Oid relationOid, int options); +static void ReindexPartitions(Oid relid, ReindexParams *params, + bool isTopLevel); +static void ReindexMultipleInternal(List *relids, + ReindexParams *params); +static bool ReindexRelationConcurrently(Oid relationOid, + ReindexParams *params); static void update_relispartition(Oid relationId, bool newval); static inline void set_indexsafe_procflags(void); @@ -100,7 +109,7 @@ static inline void set_indexsafe_procflags(void); */ struct ReindexIndexCallbackState { - int options; /* options from statement */ + ReindexParams params; /* options from statement */ Oid locked_table_oid; /* tracks previously locked table */ }; @@ -385,7 +394,7 @@ CompareOpclassOptions(Datum *opts1, Datum *opts2, int natts) * lazy VACUUMs, because they won't be fazed by missing index entries * either. (Manual ANALYZEs, however, can't be excluded because they * might be within transactions that are going to do arbitrary operations - * later.) Processes running CREATE INDEX CONCURRENTLY + * later.) Processes running CREATE INDEX CONCURRENTLY or REINDEX CONCURRENTLY * on indexes that are neither expressional nor partial are also safe to * ignore, since we know that those processes won't examine any data * outside the table they're indexing. @@ -2452,14 +2461,17 @@ ChooseIndexColumnNames(List *indexElems) } /* - * ReindexParseOptions - * Parse list of REINDEX options, returning a bitmask of ReindexOption. + * ExecReindex + * + * Primary entry point for manual REINDEX commands. This is mainly a + * preparation wrapper for the real operations that will happen in + * each subroutine of REINDEX. */ -int -ReindexParseOptions(ParseState *pstate, ReindexStmt *stmt) +void +ExecReindex(ParseState *pstate, ReindexStmt *stmt, bool isTopLevel) { + ReindexParams params = {0}; ListCell *lc; - int options = 0; bool concurrently = false; bool verbose = false; @@ -2480,19 +2492,51 @@ ReindexParseOptions(ParseState *pstate, ReindexStmt *stmt) parser_errposition(pstate, opt->location))); } - options = + if (concurrently) + PreventInTransactionBlock(isTopLevel, + "REINDEX CONCURRENTLY"); + + params.options = (verbose ? REINDEXOPT_VERBOSE : 0) | (concurrently ? REINDEXOPT_CONCURRENTLY : 0); - return options; + switch (stmt->kind) + { + case REINDEX_OBJECT_INDEX: + ReindexIndex(stmt->relation, ¶ms, isTopLevel); + break; + case REINDEX_OBJECT_TABLE: + ReindexTable(stmt->relation, ¶ms, isTopLevel); + break; + case REINDEX_OBJECT_SCHEMA: + case REINDEX_OBJECT_SYSTEM: + case REINDEX_OBJECT_DATABASE: + + /* + * This cannot run inside a user transaction block; if we were + * inside a transaction, then its commit- and + * start-transaction-command calls would not have the intended + * effect! + */ + PreventInTransactionBlock(isTopLevel, + (stmt->kind == REINDEX_OBJECT_SCHEMA) ? "REINDEX SCHEMA" : + (stmt->kind == REINDEX_OBJECT_SYSTEM) ? "REINDEX SYSTEM" : + "REINDEX DATABASE"); + ReindexMultipleTables(stmt->name, stmt->kind, ¶ms); + break; + default: + elog(ERROR, "unrecognized object type: %d", + (int) stmt->kind); + break; + } } /* * ReindexIndex * Recreate a specific index. */ -void -ReindexIndex(RangeVar *indexRelation, int options, bool isTopLevel) +static void +ReindexIndex(RangeVar *indexRelation, ReindexParams *params, bool isTopLevel) { struct ReindexIndexCallbackState state; Oid indOid; @@ -2509,10 +2553,10 @@ ReindexIndex(RangeVar *indexRelation, int options, bool isTopLevel) * upgrade the lock, but that's OK, because other sessions can't hold * locks on our temporary table. */ - state.options = options; + state.params = *params; state.locked_table_oid = InvalidOid; indOid = RangeVarGetRelidExtended(indexRelation, - (options & REINDEXOPT_CONCURRENTLY) != 0 ? + (params->options & REINDEXOPT_CONCURRENTLY) != 0 ? ShareUpdateExclusiveLock : AccessExclusiveLock, 0, RangeVarCallbackForReindexIndex, @@ -2526,13 +2570,17 @@ ReindexIndex(RangeVar *indexRelation, int options, bool isTopLevel) relkind = get_rel_relkind(indOid); if (relkind == RELKIND_PARTITIONED_INDEX) - ReindexPartitions(indOid, options, isTopLevel); - else if ((options & REINDEXOPT_CONCURRENTLY) != 0 && + ReindexPartitions(indOid, params, isTopLevel); + else if ((params->options & REINDEXOPT_CONCURRENTLY) != 0 && persistence != RELPERSISTENCE_TEMP) - ReindexRelationConcurrently(indOid, options); + ReindexRelationConcurrently(indOid, params); else - reindex_index(indOid, false, persistence, - options | REINDEXOPT_REPORT_PROGRESS); + { + ReindexParams newparams = *params; + + newparams.options |= REINDEXOPT_REPORT_PROGRESS; + reindex_index(indOid, false, persistence, &newparams); + } } /* @@ -2553,7 +2601,7 @@ RangeVarCallbackForReindexIndex(const RangeVar *relation, * non-concurrent case and table locks used by index_concurrently_*() for * concurrent case. */ - table_lockmode = ((state->options & REINDEXOPT_CONCURRENTLY) != 0) ? + table_lockmode = (state->params.options & REINDEXOPT_CONCURRENTLY) != 0 ? ShareUpdateExclusiveLock : ShareLock; /* @@ -2610,8 +2658,8 @@ RangeVarCallbackForReindexIndex(const RangeVar *relation, * ReindexTable * Recreate all indexes of a table (and of its toast table, if any) */ -Oid -ReindexTable(RangeVar *relation, int options, bool isTopLevel) +static Oid +ReindexTable(RangeVar *relation, ReindexParams *params, bool isTopLevel) { Oid heapOid; bool result; @@ -2625,17 +2673,17 @@ ReindexTable(RangeVar *relation, int options, bool isTopLevel) * locks on our temporary table. */ heapOid = RangeVarGetRelidExtended(relation, - (options & REINDEXOPT_CONCURRENTLY) != 0 ? + (params->options & REINDEXOPT_CONCURRENTLY) != 0 ? ShareUpdateExclusiveLock : ShareLock, 0, RangeVarCallbackOwnsTable, NULL); if (get_rel_relkind(heapOid) == RELKIND_PARTITIONED_TABLE) - ReindexPartitions(heapOid, options, isTopLevel); - else if ((options & REINDEXOPT_CONCURRENTLY) != 0 && + ReindexPartitions(heapOid, params, isTopLevel); + else if ((params->options & REINDEXOPT_CONCURRENTLY) != 0 && get_rel_persistence(heapOid) != RELPERSISTENCE_TEMP) { - result = ReindexRelationConcurrently(heapOid, options); + result = ReindexRelationConcurrently(heapOid, params); if (!result) ereport(NOTICE, @@ -2644,10 +2692,13 @@ ReindexTable(RangeVar *relation, int options, bool isTopLevel) } else { + ReindexParams newparams = *params; + + newparams.options |= REINDEXOPT_REPORT_PROGRESS; result = reindex_relation(heapOid, REINDEX_REL_PROCESS_TOAST | REINDEX_REL_CHECK_CONSTRAINTS, - options | REINDEXOPT_REPORT_PROGRESS); + &newparams); if (!result) ereport(NOTICE, (errmsg("table \"%s\" has no indexes to reindex", @@ -2665,9 +2716,9 @@ ReindexTable(RangeVar *relation, int options, bool isTopLevel) * separate transaction, so we can release the lock on it right away. * That means this must not be called within a user transaction block! */ -void +static void ReindexMultipleTables(const char *objectName, ReindexObjectType objectKind, - int options) + ReindexParams *params) { Oid objectOid; Relation relationRelation; @@ -2686,7 +2737,7 @@ ReindexMultipleTables(const char *objectName, ReindexObjectType objectKind, objectKind == REINDEX_OBJECT_DATABASE); if (objectKind == REINDEX_OBJECT_SYSTEM && - (options & REINDEXOPT_CONCURRENTLY) != 0) + (params->options & REINDEXOPT_CONCURRENTLY) != 0) ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("cannot reindex system catalogs concurrently"))); @@ -2794,7 +2845,7 @@ ReindexMultipleTables(const char *objectName, ReindexObjectType objectKind, * Skip system tables, since index_create() would reject indexing them * concurrently (and it would likely fail if we tried). */ - if ((options & REINDEXOPT_CONCURRENTLY) != 0 && + if ((params->options & REINDEXOPT_CONCURRENTLY) != 0 && IsCatalogRelationOid(relid)) { if (!concurrent_warning) @@ -2829,7 +2880,7 @@ ReindexMultipleTables(const char *objectName, ReindexObjectType objectKind, * Process each relation listed in a separate transaction. Note that this * commits and then starts a new transaction immediately. */ - ReindexMultipleInternal(relids, options); + ReindexMultipleInternal(relids, params); MemoryContextDelete(private_context); } @@ -2860,7 +2911,7 @@ reindex_error_callback(void *arg) * by the caller. */ static void -ReindexPartitions(Oid relid, int options, bool isTopLevel) +ReindexPartitions(Oid relid, ReindexParams *params, bool isTopLevel) { List *partitions = NIL; char relkind = get_rel_relkind(relid); @@ -2937,7 +2988,7 @@ ReindexPartitions(Oid relid, int options, bool isTopLevel) * Process each partition listed in a separate transaction. Note that * this commits and then starts a new transaction immediately. */ - ReindexMultipleInternal(partitions, options); + ReindexMultipleInternal(partitions, params); /* * Clean up working storage --- note we must do this after @@ -2955,7 +3006,7 @@ ReindexPartitions(Oid relid, int options, bool isTopLevel) * and starts a new transaction when finished. */ static void -ReindexMultipleInternal(List *relids, int options) +ReindexMultipleInternal(List *relids, ReindexParams *params) { ListCell *l; @@ -2991,35 +3042,38 @@ ReindexMultipleInternal(List *relids, int options) Assert(relkind != RELKIND_PARTITIONED_INDEX && relkind != RELKIND_PARTITIONED_TABLE); - if ((options & REINDEXOPT_CONCURRENTLY) != 0 && + if ((params->options & REINDEXOPT_CONCURRENTLY) != 0 && relpersistence != RELPERSISTENCE_TEMP) { - (void) ReindexRelationConcurrently(relid, - options | - REINDEXOPT_MISSING_OK); + ReindexParams newparams = *params; + + newparams.options |= REINDEXOPT_MISSING_OK; + (void) ReindexRelationConcurrently(relid, &newparams); /* ReindexRelationConcurrently() does the verbose output */ } else if (relkind == RELKIND_INDEX) { - reindex_index(relid, false, relpersistence, - options | - REINDEXOPT_REPORT_PROGRESS | - REINDEXOPT_MISSING_OK); + ReindexParams newparams = *params; + + newparams.options |= + REINDEXOPT_REPORT_PROGRESS | REINDEXOPT_MISSING_OK; + reindex_index(relid, false, relpersistence, &newparams); PopActiveSnapshot(); /* reindex_index() does the verbose output */ } else { bool result; + ReindexParams newparams = *params; + newparams.options |= + REINDEXOPT_REPORT_PROGRESS | REINDEXOPT_MISSING_OK; result = reindex_relation(relid, REINDEX_REL_PROCESS_TOAST | REINDEX_REL_CHECK_CONSTRAINTS, - options | - REINDEXOPT_REPORT_PROGRESS | - REINDEXOPT_MISSING_OK); + &newparams); - if (result && (options & REINDEXOPT_VERBOSE)) + if (result && (params->options & REINDEXOPT_VERBOSE) != 0) ereport(INFO, (errmsg("table \"%s.%s\" was reindexed", get_namespace_name(get_rel_namespace(relid)), @@ -3059,8 +3113,15 @@ ReindexMultipleInternal(List *relids, int options) * anyway, and a non-concurrent reindex is more efficient. */ static bool -ReindexRelationConcurrently(Oid relationOid, int options) +ReindexRelationConcurrently(Oid relationOid, ReindexParams *params) { + typedef struct ReindexIndexInfo + { + Oid indexId; + Oid tableId; + Oid amId; + bool safe; /* for set_indexsafe_procflags */ + } ReindexIndexInfo; List *heapRelationIds = NIL; List *indexIds = NIL; List *newIndexIds = NIL; @@ -3092,7 +3153,7 @@ ReindexRelationConcurrently(Oid relationOid, int options) "ReindexConcurrent", ALLOCSET_SMALL_SIZES); - if (options & REINDEXOPT_VERBOSE) + if ((params->options & REINDEXOPT_VERBOSE) != 0) { /* Save data needed by REINDEX VERBOSE in private context */ oldcontext = MemoryContextSwitchTo(private_context); @@ -3137,7 +3198,7 @@ ReindexRelationConcurrently(Oid relationOid, int options) errmsg("cannot reindex system catalogs concurrently"))); /* Open relation to get its indexes */ - if ((options & REINDEXOPT_MISSING_OK) != 0) + if ((params->options & REINDEXOPT_MISSING_OK) != 0) { heapRelation = try_table_open(relationOid, ShareUpdateExclusiveLock); @@ -3170,10 +3231,16 @@ ReindexRelationConcurrently(Oid relationOid, int options) get_rel_name(cellOid)))); else { + ReindexIndexInfo *idx; + /* Save the list of relation OIDs in private context */ oldcontext = MemoryContextSwitchTo(private_context); - indexIds = lappend_oid(indexIds, cellOid); + idx = palloc(sizeof(ReindexIndexInfo)); + idx->indexId = cellOid; + /* other fields set later */ + + indexIds = lappend(indexIds, idx); MemoryContextSwitchTo(oldcontext); } @@ -3210,13 +3277,18 @@ ReindexRelationConcurrently(Oid relationOid, int options) get_rel_name(cellOid)))); else { + ReindexIndexInfo *idx; + /* * Save the list of relation OIDs in private * context */ oldcontext = MemoryContextSwitchTo(private_context); - indexIds = lappend_oid(indexIds, cellOid); + idx = palloc(sizeof(ReindexIndexInfo)); + idx->indexId = cellOid; + indexIds = lappend(indexIds, idx); + /* other fields set later */ MemoryContextSwitchTo(oldcontext); } @@ -3233,8 +3305,9 @@ ReindexRelationConcurrently(Oid relationOid, int options) case RELKIND_INDEX: { Oid heapId = IndexGetRelation(relationOid, - (options & REINDEXOPT_MISSING_OK) != 0); + (params->options & REINDEXOPT_MISSING_OK) != 0); Relation heapRelation; + ReindexIndexInfo *idx; /* if relation is missing, leave */ if (!OidIsValid(heapId)) @@ -3262,7 +3335,7 @@ ReindexRelationConcurrently(Oid relationOid, int options) * to rebuild is not complete yet, and REINDEXOPT_MISSING_OK * should not be used once all the session locks are taken. */ - if ((options & REINDEXOPT_MISSING_OK) != 0) + if ((params->options & REINDEXOPT_MISSING_OK) != 0) { heapRelation = try_table_open(heapId, ShareUpdateExclusiveLock); @@ -3285,7 +3358,10 @@ ReindexRelationConcurrently(Oid relationOid, int options) * Save the list of relation OIDs in private context. Note * that invalid indexes are allowed here. */ - indexIds = lappend_oid(indexIds, relationOid); + idx = palloc(sizeof(ReindexIndexInfo)); + idx->indexId = relationOid; + indexIds = lappend(indexIds, idx); + /* other fields set later */ MemoryContextSwitchTo(oldcontext); break; @@ -3344,31 +3420,39 @@ ReindexRelationConcurrently(Oid relationOid, int options) foreach(lc, indexIds) { char *concurrentName; - Oid indexId = lfirst_oid(lc); + ReindexIndexInfo *idx = lfirst(lc); + ReindexIndexInfo *newidx; Oid newIndexId; Relation indexRel; Relation heapRel; Relation newIndexRel; LockRelId *lockrelid; - indexRel = index_open(indexId, ShareUpdateExclusiveLock); + indexRel = index_open(idx->indexId, ShareUpdateExclusiveLock); heapRel = table_open(indexRel->rd_index->indrelid, ShareUpdateExclusiveLock); + /* determine safety of this index for set_indexsafe_procflags */ + idx->safe = (indexRel->rd_indexprs == NIL && + indexRel->rd_indpred == NIL); + idx->tableId = RelationGetRelid(heapRel); + idx->amId = indexRel->rd_rel->relam; + /* This function shouldn't be called for temporary relations. */ if (indexRel->rd_rel->relpersistence == RELPERSISTENCE_TEMP) elog(ERROR, "cannot reindex a temporary table concurrently"); pgstat_progress_start_command(PROGRESS_COMMAND_CREATE_INDEX, - RelationGetRelid(heapRel)); + idx->tableId); + progress_vals[0] = PROGRESS_CREATEIDX_COMMAND_REINDEX_CONCURRENTLY; progress_vals[1] = 0; /* initializing */ - progress_vals[2] = indexId; - progress_vals[3] = indexRel->rd_rel->relam; + progress_vals[2] = idx->indexId; + progress_vals[3] = idx->amId; pgstat_progress_update_multi_param(4, progress_index, progress_vals); /* Choose a temporary relation name for the new index */ - concurrentName = ChooseRelationName(get_rel_name(indexId), + concurrentName = ChooseRelationName(get_rel_name(idx->indexId), NULL, "ccnew", get_rel_namespace(indexRel->rd_index->indrelid), @@ -3376,7 +3460,7 @@ ReindexRelationConcurrently(Oid relationOid, int options) /* Create new index definition based on given index */ newIndexId = index_concurrently_create_copy(heapRel, - indexId, + idx->indexId, concurrentName); /* @@ -3390,7 +3474,13 @@ ReindexRelationConcurrently(Oid relationOid, int options) */ oldcontext = MemoryContextSwitchTo(private_context); - newIndexIds = lappend_oid(newIndexIds, newIndexId); + newidx = palloc(sizeof(ReindexIndexInfo)); + newidx->indexId = newIndexId; + newidx->safe = idx->safe; + newidx->tableId = idx->tableId; + newidx->amId = idx->amId; + + newIndexIds = lappend(newIndexIds, newidx); /* * Save lockrelid to protect each relation from drop then close @@ -3454,6 +3544,11 @@ ReindexRelationConcurrently(Oid relationOid, int options) CommitTransactionCommand(); StartTransactionCommand(); + /* + * Because we don't take a snapshot in this transaction, there's no need + * to set the PROC_IN_SAFE_IC flag here. + */ + /* * Phase 2 of REINDEX CONCURRENTLY * @@ -3471,10 +3566,7 @@ ReindexRelationConcurrently(Oid relationOid, int options) foreach(lc, newIndexIds) { - Relation newIndexRel; - Oid newIndexId = lfirst_oid(lc); - Oid heapId; - Oid indexam; + ReindexIndexInfo *newidx = lfirst(lc); /* Start new transaction for this index's concurrent build */ StartTransactionCommand(); @@ -3486,37 +3578,38 @@ ReindexRelationConcurrently(Oid relationOid, int options) */ CHECK_FOR_INTERRUPTS(); + /* Tell concurrent indexing to ignore us, if index qualifies */ + if (newidx->safe) + set_indexsafe_procflags(); + /* Set ActiveSnapshot since functions in the indexes may need it */ PushActiveSnapshot(GetTransactionSnapshot()); - /* - * Index relation has been closed by previous commit, so reopen it to - * get its information. - */ - newIndexRel = index_open(newIndexId, ShareUpdateExclusiveLock); - heapId = newIndexRel->rd_index->indrelid; - indexam = newIndexRel->rd_rel->relam; - index_close(newIndexRel, NoLock); - /* * Update progress for the index to build, with the correct parent * table involved. */ - pgstat_progress_start_command(PROGRESS_COMMAND_CREATE_INDEX, heapId); + pgstat_progress_start_command(PROGRESS_COMMAND_CREATE_INDEX, newidx->tableId); progress_vals[0] = PROGRESS_CREATEIDX_COMMAND_REINDEX_CONCURRENTLY; progress_vals[1] = PROGRESS_CREATEIDX_PHASE_BUILD; - progress_vals[2] = newIndexId; - progress_vals[3] = indexam; + progress_vals[2] = newidx->indexId; + progress_vals[3] = newidx->amId; pgstat_progress_update_multi_param(4, progress_index, progress_vals); /* Perform concurrent build of new index */ - index_concurrently_build(heapId, newIndexId); + index_concurrently_build(newidx->tableId, newidx->indexId); PopActiveSnapshot(); CommitTransactionCommand(); } + StartTransactionCommand(); + /* + * Because we don't take a snapshot or Xid in this transaction, there's no + * need to set the PROC_IN_SAFE_IC flag here. + */ + /* * Phase 3 of REINDEX CONCURRENTLY * @@ -3532,12 +3625,9 @@ ReindexRelationConcurrently(Oid relationOid, int options) foreach(lc, newIndexIds) { - Oid newIndexId = lfirst_oid(lc); - Oid heapId; + ReindexIndexInfo *newidx = lfirst(lc); TransactionId limitXmin; Snapshot snapshot; - Relation newIndexRel; - Oid indexam; StartTransactionCommand(); @@ -3548,6 +3638,10 @@ ReindexRelationConcurrently(Oid relationOid, int options) */ CHECK_FOR_INTERRUPTS(); + /* Tell concurrent indexing to ignore us, if index qualifies */ + if (newidx->safe) + set_indexsafe_procflags(); + /* * Take the "reference snapshot" that will be used by validate_index() * to filter candidate tuples. @@ -3555,27 +3649,19 @@ ReindexRelationConcurrently(Oid relationOid, int options) snapshot = RegisterSnapshot(GetTransactionSnapshot()); PushActiveSnapshot(snapshot); - /* - * Index relation has been closed by previous commit, so reopen it to - * get its information. - */ - newIndexRel = index_open(newIndexId, ShareUpdateExclusiveLock); - heapId = newIndexRel->rd_index->indrelid; - indexam = newIndexRel->rd_rel->relam; - index_close(newIndexRel, NoLock); - /* * Update progress for the index to build, with the correct parent * table involved. */ - pgstat_progress_start_command(PROGRESS_COMMAND_CREATE_INDEX, heapId); + pgstat_progress_start_command(PROGRESS_COMMAND_CREATE_INDEX, + newidx->tableId); progress_vals[0] = PROGRESS_CREATEIDX_COMMAND_REINDEX_CONCURRENTLY; progress_vals[1] = PROGRESS_CREATEIDX_PHASE_VALIDATE_IDXSCAN; - progress_vals[2] = newIndexId; - progress_vals[3] = indexam; + progress_vals[2] = newidx->indexId; + progress_vals[3] = newidx->amId; pgstat_progress_update_multi_param(4, progress_index, progress_vals); - validate_index(heapId, newIndexId, snapshot); + validate_index(newidx->tableId, newidx->indexId, snapshot); /* * We can now do away with our active snapshot, we still need to save @@ -3599,6 +3685,9 @@ ReindexRelationConcurrently(Oid relationOid, int options) * interesting tuples. But since it might not contain tuples deleted * just before the reference snap was taken, we have to wait out any * transactions that might have older snapshots. + * + * Because we don't take a snapshot or Xid in this transaction, + * there's no need to set the PROC_IN_SAFE_IC flag here. */ pgstat_progress_update_param(PROGRESS_CREATEIDX_PHASE, PROGRESS_CREATEIDX_PHASE_WAIT_3); @@ -3620,12 +3709,18 @@ ReindexRelationConcurrently(Oid relationOid, int options) StartTransactionCommand(); + /* + * Because this transaction only does catalog manipulations and doesn't do + * any index operations, we can set the PROC_IN_SAFE_IC flag here + * unconditionally. + */ + set_indexsafe_procflags(); + forboth(lc, indexIds, lc2, newIndexIds) { + ReindexIndexInfo *oldidx = lfirst(lc); + ReindexIndexInfo *newidx = lfirst(lc2); char *oldName; - Oid oldIndexId = lfirst_oid(lc); - Oid newIndexId = lfirst_oid(lc2); - Oid heapId; /* * Check for user-requested abort. This is inside a transaction so as @@ -3634,27 +3729,25 @@ ReindexRelationConcurrently(Oid relationOid, int options) */ CHECK_FOR_INTERRUPTS(); - heapId = IndexGetRelation(oldIndexId, false); - /* Choose a relation name for old index */ - oldName = ChooseRelationName(get_rel_name(oldIndexId), + oldName = ChooseRelationName(get_rel_name(oldidx->indexId), NULL, "ccold", - get_rel_namespace(heapId), + get_rel_namespace(oldidx->tableId), false); /* * Swap old index with the new one. This also marks the new one as * valid and the old one as not valid. */ - index_concurrently_swap(newIndexId, oldIndexId, oldName); + index_concurrently_swap(newidx->indexId, oldidx->indexId, oldName); /* * Invalidate the relcache for the table, so that after this commit * all sessions will refresh any cached plans that might reference the * index. */ - CacheInvalidateRelcacheByRelid(heapId); + CacheInvalidateRelcacheByRelid(oldidx->tableId); /* * CCI here so that subsequent iterations see the oldName in the @@ -3670,6 +3763,12 @@ ReindexRelationConcurrently(Oid relationOid, int options) CommitTransactionCommand(); StartTransactionCommand(); + /* + * While we could set PROC_IN_SAFE_IC if all indexes qualified, there's no + * real need for that, because we only acquire an Xid after the wait is + * done, and that lasts for a very short period. + */ + /* * Phase 5 of REINDEX CONCURRENTLY * @@ -3684,8 +3783,7 @@ ReindexRelationConcurrently(Oid relationOid, int options) foreach(lc, indexIds) { - Oid oldIndexId = lfirst_oid(lc); - Oid heapId; + ReindexIndexInfo *oldidx = lfirst(lc); /* * Check for user-requested abort. This is inside a transaction so as @@ -3694,14 +3792,19 @@ ReindexRelationConcurrently(Oid relationOid, int options) */ CHECK_FOR_INTERRUPTS(); - heapId = IndexGetRelation(oldIndexId, false); - index_concurrently_set_dead(heapId, oldIndexId); + index_concurrently_set_dead(oldidx->tableId, oldidx->indexId); } /* Commit this transaction to make the updates visible. */ CommitTransactionCommand(); StartTransactionCommand(); + /* + * While we could set PROC_IN_SAFE_IC if all indexes qualified, there's no + * real need for that, because we only acquire an Xid after the wait is + * done, and that lasts for a very short period. + */ + /* * Phase 6 of REINDEX CONCURRENTLY * @@ -3719,11 +3822,11 @@ ReindexRelationConcurrently(Oid relationOid, int options) foreach(lc, indexIds) { - Oid oldIndexId = lfirst_oid(lc); + ReindexIndexInfo *idx = lfirst(lc); ObjectAddress object; object.classId = RelationRelationId; - object.objectId = oldIndexId; + object.objectId = idx->indexId; object.objectSubId = 0; add_exact_object_address(&object, objects); @@ -3754,7 +3857,7 @@ ReindexRelationConcurrently(Oid relationOid, int options) StartTransactionCommand(); /* Log what we did */ - if (options & REINDEXOPT_VERBOSE) + if ((params->options & REINDEXOPT_VERBOSE) != 0) { if (relkind == RELKIND_INDEX) ereport(INFO, @@ -3766,7 +3869,8 @@ ReindexRelationConcurrently(Oid relationOid, int options) { foreach(lc, newIndexIds) { - Oid indOid = lfirst_oid(lc); + ReindexIndexInfo *idx = lfirst(lc); + Oid indOid = idx->indexId; ereport(INFO, (errmsg("index \"%s.%s\" was reindexed", diff --git a/src/backend/commands/lockcmds.c b/src/backend/commands/lockcmds.c index 098227656a823..34f2270cedffc 100644 --- a/src/backend/commands/lockcmds.c +++ b/src/backend/commands/lockcmds.c @@ -3,7 +3,7 @@ * lockcmds.c * LOCK command support code * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * diff --git a/src/backend/commands/matview.c b/src/backend/commands/matview.c index cfc63915f38cd..c5c25ce11d5e8 100644 --- a/src/backend/commands/matview.c +++ b/src/backend/commands/matview.c @@ -3,7 +3,7 @@ * matview.c * materialized view support * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * diff --git a/src/backend/commands/opclasscmds.c b/src/backend/commands/opclasscmds.c index c46db7d11cb47..fad39e2b75c16 100644 --- a/src/backend/commands/opclasscmds.c +++ b/src/backend/commands/opclasscmds.c @@ -4,7 +4,7 @@ * * Routines for opclass (and opfamily) manipulation commands * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * diff --git a/src/backend/commands/operatorcmds.c b/src/backend/commands/operatorcmds.c index a791e99092d5b..809043c5d195d 100644 --- a/src/backend/commands/operatorcmds.c +++ b/src/backend/commands/operatorcmds.c @@ -4,7 +4,7 @@ * * Routines for operator manipulation commands * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * diff --git a/src/backend/commands/policy.c b/src/backend/commands/policy.c index d3f8e8f06c136..5cacc088cd877 100644 --- a/src/backend/commands/policy.c +++ b/src/backend/commands/policy.c @@ -3,7 +3,7 @@ * policy.c * Commands for manipulating policies. * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * src/backend/commands/policy.c diff --git a/src/backend/commands/portalcmds.c b/src/backend/commands/portalcmds.c index 0b64204975d39..6f2397bd360e6 100644 --- a/src/backend/commands/portalcmds.c +++ b/src/backend/commands/portalcmds.c @@ -9,7 +9,7 @@ * storage management for portals (but doesn't run any queries in them). * * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * diff --git a/src/backend/commands/prepare.c b/src/backend/commands/prepare.c index 89087a7be3190..f767751c71ae3 100644 --- a/src/backend/commands/prepare.c +++ b/src/backend/commands/prepare.c @@ -7,7 +7,7 @@ * accessed via the extended FE/BE query protocol. * * - * Copyright (c) 2002-2020, PostgreSQL Global Development Group + * Copyright (c) 2002-2021, PostgreSQL Global Development Group * * IDENTIFICATION * src/backend/commands/prepare.c @@ -230,7 +230,7 @@ ExecuteQuery(ParseState *pstate, entry->plansource->query_string); /* Replan if needed, and increment plan refcount for portal */ - cplan = GetCachedPlan(entry->plansource, paramLI, false, NULL); + cplan = GetCachedPlan(entry->plansource, paramLI, NULL, NULL); plan_list = cplan->stmt_list; /* @@ -651,7 +651,8 @@ ExplainExecuteQuery(ExecuteStmt *execstmt, IntoClause *into, ExplainState *es, } /* Replan if needed, and acquire a transient refcount */ - cplan = GetCachedPlan(entry->plansource, paramLI, true, queryEnv); + cplan = GetCachedPlan(entry->plansource, paramLI, + CurrentResourceOwner, queryEnv); INSTR_TIME_SET_CURRENT(planduration); INSTR_TIME_SUBTRACT(planduration, planstart); @@ -687,7 +688,7 @@ ExplainExecuteQuery(ExecuteStmt *execstmt, IntoClause *into, ExplainState *es, if (estate) FreeExecutorState(estate); - ReleaseCachedPlan(cplan, true); + ReleaseCachedPlan(cplan, CurrentResourceOwner); } /* diff --git a/src/backend/commands/proclang.c b/src/backend/commands/proclang.c index 8ef60374f590b..81598d3e08e76 100644 --- a/src/backend/commands/proclang.c +++ b/src/backend/commands/proclang.c @@ -3,7 +3,7 @@ * proclang.c * PostgreSQL LANGUAGE support code. * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION diff --git a/src/backend/commands/publicationcmds.c b/src/backend/commands/publicationcmds.c index eabbc7473bbf1..95c253c8e08ee 100644 --- a/src/backend/commands/publicationcmds.c +++ b/src/backend/commands/publicationcmds.c @@ -3,7 +3,7 @@ * publicationcmds.c * publication manipulation * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION diff --git a/src/backend/commands/schemacmds.c b/src/backend/commands/schemacmds.c index 3013862528885..a60eb161e4aae 100644 --- a/src/backend/commands/schemacmds.c +++ b/src/backend/commands/schemacmds.c @@ -3,7 +3,7 @@ * schemacmds.c * schema creation/manipulation commands * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * diff --git a/src/backend/commands/seclabel.c b/src/backend/commands/seclabel.c index ee036e908795e..6906714298694 100644 --- a/src/backend/commands/seclabel.c +++ b/src/backend/commands/seclabel.c @@ -3,7 +3,7 @@ * seclabel.c * routines to support security label feature. * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * ------------------------------------------------------------------------- diff --git a/src/backend/commands/sequence.c b/src/backend/commands/sequence.c index fa2eea8af2177..0415df9ccb7eb 100644 --- a/src/backend/commands/sequence.c +++ b/src/backend/commands/sequence.c @@ -3,7 +3,7 @@ * sequence.c * PostgreSQL sequences support code. * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * diff --git a/src/backend/commands/statscmds.c b/src/backend/commands/statscmds.c index 3057d89d50c01..2bae205845992 100644 --- a/src/backend/commands/statscmds.c +++ b/src/backend/commands/statscmds.c @@ -3,7 +3,7 @@ * statscmds.c * Commands for creating and altering extended statistics objects * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * @@ -135,6 +135,13 @@ CreateStatistics(CreateStatsStmt *stmt) if (!pg_class_ownercheck(RelationGetRelid(rel), stxowner)) aclcheck_error(ACLCHECK_NOT_OWNER, get_relkind_objtype(rel->rd_rel->relkind), RelationGetRelationName(rel)); + + /* Creating statistics on system catalogs is not allowed */ + if (!allowSystemTableMods && IsSystemRelation(rel)) + ereport(ERROR, + (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), + errmsg("permission denied: \"%s\" is a system catalog", + RelationGetRelationName(rel)))); } Assert(rel); diff --git a/src/backend/commands/subscriptioncmds.c b/src/backend/commands/subscriptioncmds.c index 1696454c0bbb6..082f7855b89c1 100644 --- a/src/backend/commands/subscriptioncmds.c +++ b/src/backend/commands/subscriptioncmds.c @@ -3,7 +3,7 @@ * subscriptioncmds.c * subscription catalog manipulation functions * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION @@ -1267,7 +1267,7 @@ fetch_table_list(WalReceiverConn *wrconn, List *publications) relname = TextDatumGetCString(slot_getattr(slot, 2, &isnull)); Assert(!isnull); - rv = makeRangeVar(pstrdup(nspname), pstrdup(relname), -1); + rv = makeRangeVar(nspname, relname, -1); tablelist = lappend(tablelist, rv); ExecClearTuple(slot); diff --git a/src/backend/commands/tablecmds.c b/src/backend/commands/tablecmds.c index 1fa9f19f08cbc..420991e31539c 100644 --- a/src/backend/commands/tablecmds.c +++ b/src/backend/commands/tablecmds.c @@ -3,7 +3,7 @@ * tablecmds.c * Commands for creating and altering table structures and settings * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * @@ -1854,6 +1854,7 @@ ExecuteTruncateGuts(List *explicit_rels, List *relids, List *relids_logged, { Oid heap_relid; Oid toast_relid; + ReindexParams reindex_params = {0}; /* * This effectively deletes all rows in the table, and may be done @@ -1891,7 +1892,8 @@ ExecuteTruncateGuts(List *explicit_rels, List *relids, List *relids_logged, /* * Reconstruct the indexes to match, and we're done. */ - reindex_relation(heap_relid, REINDEX_REL_PROCESS_TOAST, 0); + reindex_relation(heap_relid, REINDEX_REL_PROCESS_TOAST, + &reindex_params); } pgstat_count_truncate(rel); @@ -3035,6 +3037,112 @@ SetRelationHasSubclass(Oid relationId, bool relhassubclass) table_close(relationRelation, RowExclusiveLock); } +/* + * CheckRelationTableSpaceMove + * Check if relation can be moved to new tablespace. + * + * NOTE: The caller must hold AccessExclusiveLock on the relation. + * + * Returns true if the relation can be moved to the new tablespace; raises + * an error if it is not possible to do the move; returns false if the move + * would have no effect. + */ +bool +CheckRelationTableSpaceMove(Relation rel, Oid newTableSpaceId) +{ + Oid oldTableSpaceId; + + /* + * No work if no change in tablespace. Note that MyDatabaseTableSpace is + * stored as 0. + */ + oldTableSpaceId = rel->rd_rel->reltablespace; + if (newTableSpaceId == oldTableSpaceId || + (newTableSpaceId == MyDatabaseTableSpace && oldTableSpaceId == 0)) + return false; + + /* + * We cannot support moving mapped relations into different tablespaces. + * (In particular this eliminates all shared catalogs.) + */ + if (RelationIsMapped(rel)) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("cannot move system relation \"%s\"", + RelationGetRelationName(rel)))); + + /* Cannot move a non-shared relation into pg_global */ + if (newTableSpaceId == GLOBALTABLESPACE_OID) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("only shared relations can be placed in pg_global tablespace"))); + + /* + * Do not allow moving temp tables of other backends ... their local + * buffer manager is not going to cope. + */ + if (RELATION_IS_OTHER_TEMP(rel)) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("cannot move temporary tables of other sessions"))); + + return true; +} + +/* + * SetRelationTableSpace + * Set new reltablespace and relfilenode in pg_class entry. + * + * newTableSpaceId is the new tablespace for the relation, and + * newRelFileNode its new filenode. If newRelFileNode is InvalidOid, + * this field is not updated. + * + * NOTE: The caller must hold AccessExclusiveLock on the relation. + * + * The caller of this routine had better check if a relation can be + * moved to this new tablespace by calling CheckRelationTableSpaceMove() + * first, and is responsible for making the change visible with + * CommandCounterIncrement(). + */ +void +SetRelationTableSpace(Relation rel, + Oid newTableSpaceId, + Oid newRelFileNode) +{ + Relation pg_class; + HeapTuple tuple; + Form_pg_class rd_rel; + Oid reloid = RelationGetRelid(rel); + + Assert(CheckRelationTableSpaceMove(rel, newTableSpaceId)); + + /* Get a modifiable copy of the relation's pg_class row. */ + pg_class = table_open(RelationRelationId, RowExclusiveLock); + + tuple = SearchSysCacheCopy1(RELOID, ObjectIdGetDatum(reloid)); + if (!HeapTupleIsValid(tuple)) + elog(ERROR, "cache lookup failed for relation %u", reloid); + rd_rel = (Form_pg_class) GETSTRUCT(tuple); + + /* Update the pg_class row. */ + rd_rel->reltablespace = (newTableSpaceId == MyDatabaseTableSpace) ? + InvalidOid : newTableSpaceId; + if (OidIsValid(newRelFileNode)) + rd_rel->relfilenode = newRelFileNode; + CatalogTupleUpdate(pg_class, &tuple->t_self, tuple); + + /* + * Record dependency on tablespace. This is only required for relations + * that have no physical storage. + */ + if (!RELKIND_HAS_STORAGE(rel->rd_rel->relkind)) + changeDependencyOnTablespace(RelationRelationId, reloid, + rd_rel->reltablespace); + + heap_freetuple(tuple); + table_close(pg_class, RowExclusiveLock); +} + /* * renameatt_check - basic sanity checks before attribute rename */ @@ -12095,7 +12203,7 @@ ATPostAlterTypeParse(Oid oldId, Oid oldRelId, Oid refRelId, char *cmd, * parse_analyze() or the rewriter, but instead we need to pass them * through parse_utilcmd.c to make them ready for execution. */ - raw_parsetree_list = raw_parser(cmd); + raw_parsetree_list = raw_parser(cmd, RAW_PARSE_DEFAULT); querytree_list = NIL; foreach(list_item, raw_parsetree_list) { @@ -13158,13 +13266,9 @@ static void ATExecSetTableSpace(Oid tableOid, Oid newTableSpace, LOCKMODE lockmode) { Relation rel; - Oid oldTableSpace; Oid reltoastrelid; Oid newrelfilenode; RelFileNode newrnode; - Relation pg_class; - HeapTuple tuple; - Form_pg_class rd_rel; List *reltoastidxids = NIL; ListCell *lc; @@ -13173,45 +13277,15 @@ ATExecSetTableSpace(Oid tableOid, Oid newTableSpace, LOCKMODE lockmode) */ rel = relation_open(tableOid, lockmode); - /* - * No work if no change in tablespace. - */ - oldTableSpace = rel->rd_rel->reltablespace; - if (newTableSpace == oldTableSpace || - (newTableSpace == MyDatabaseTableSpace && oldTableSpace == 0)) + /* Check first if relation can be moved to new tablespace */ + if (!CheckRelationTableSpaceMove(rel, newTableSpace)) { InvokeObjectPostAlterHook(RelationRelationId, RelationGetRelid(rel), 0); - relation_close(rel, NoLock); return; } - /* - * We cannot support moving mapped relations into different tablespaces. - * (In particular this eliminates all shared catalogs.) - */ - if (RelationIsMapped(rel)) - ereport(ERROR, - (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), - errmsg("cannot move system relation \"%s\"", - RelationGetRelationName(rel)))); - - /* Can't move a non-shared relation into pg_global */ - if (newTableSpace == GLOBALTABLESPACE_OID) - ereport(ERROR, - (errcode(ERRCODE_INVALID_PARAMETER_VALUE), - errmsg("only shared relations can be placed in pg_global tablespace"))); - - /* - * Don't allow moving temp tables of other backends ... their local buffer - * manager is not going to cope. - */ - if (RELATION_IS_OTHER_TEMP(rel)) - ereport(ERROR, - (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), - errmsg("cannot move temporary tables of other sessions"))); - reltoastrelid = rel->rd_rel->reltoastrelid; /* Fetch the list of indexes on toast relation if necessary */ if (OidIsValid(reltoastrelid)) @@ -13222,14 +13296,6 @@ ATExecSetTableSpace(Oid tableOid, Oid newTableSpace, LOCKMODE lockmode) relation_close(toastRel, lockmode); } - /* Get a modifiable copy of the relation's pg_class row */ - pg_class = table_open(RelationRelationId, RowExclusiveLock); - - tuple = SearchSysCacheCopy1(RELOID, ObjectIdGetDatum(tableOid)); - if (!HeapTupleIsValid(tuple)) - elog(ERROR, "cache lookup failed for relation %u", tableOid); - rd_rel = (Form_pg_class) GETSTRUCT(tuple); - /* * Relfilenodes are not unique in databases across tablespaces, so we need * to allocate a new one in the new tablespace. @@ -13260,18 +13326,13 @@ ATExecSetTableSpace(Oid tableOid, Oid newTableSpace, LOCKMODE lockmode) * * NB: This wouldn't work if ATExecSetTableSpace() were allowed to be * executed on pg_class or its indexes (the above copy wouldn't contain - * the updated pg_class entry), but that's forbidden above. + * the updated pg_class entry), but that's forbidden with + * CheckRelationTableSpaceMove(). */ - rd_rel->reltablespace = (newTableSpace == MyDatabaseTableSpace) ? InvalidOid : newTableSpace; - rd_rel->relfilenode = newrelfilenode; - CatalogTupleUpdate(pg_class, &tuple->t_self, tuple); + SetRelationTableSpace(rel, newTableSpace, newrelfilenode); InvokeObjectPostAlterHook(RelationRelationId, RelationGetRelid(rel), 0); - heap_freetuple(tuple); - - table_close(pg_class, RowExclusiveLock); - RelationAssumeNewRelfilenode(rel); relation_close(rel, NoLock); @@ -13299,52 +13360,25 @@ ATExecSetTableSpace(Oid tableOid, Oid newTableSpace, LOCKMODE lockmode) static void ATExecSetTableSpaceNoStorage(Relation rel, Oid newTableSpace) { - HeapTuple tuple; - Oid oldTableSpace; - Relation pg_class; - Form_pg_class rd_rel; - Oid reloid = RelationGetRelid(rel); - /* * Shouldn't be called on relations having storage; these are processed in * phase 3. */ Assert(!RELKIND_HAS_STORAGE(rel->rd_rel->relkind)); - /* Can't allow a non-shared relation in pg_global */ - if (newTableSpace == GLOBALTABLESPACE_OID) - ereport(ERROR, - (errcode(ERRCODE_INVALID_PARAMETER_VALUE), - errmsg("only shared relations can be placed in pg_global tablespace"))); - - /* - * No work if no change in tablespace. - */ - oldTableSpace = rel->rd_rel->reltablespace; - if (newTableSpace == oldTableSpace || - (newTableSpace == MyDatabaseTableSpace && oldTableSpace == 0)) + /* check if relation can be moved to its new tablespace */ + if (!CheckRelationTableSpaceMove(rel, newTableSpace)) { - InvokeObjectPostAlterHook(RelationRelationId, reloid, 0); + InvokeObjectPostAlterHook(RelationRelationId, + RelationGetRelid(rel), + 0); return; } - /* Get a modifiable copy of the relation's pg_class row */ - pg_class = table_open(RelationRelationId, RowExclusiveLock); - - tuple = SearchSysCacheCopy1(RELOID, ObjectIdGetDatum(reloid)); - if (!HeapTupleIsValid(tuple)) - elog(ERROR, "cache lookup failed for relation %u", reloid); - rd_rel = (Form_pg_class) GETSTRUCT(tuple); - - /* update the pg_class row */ - rd_rel->reltablespace = (newTableSpace == MyDatabaseTableSpace) ? InvalidOid : newTableSpace; - CatalogTupleUpdate(pg_class, &tuple->t_self, tuple); - - InvokeObjectPostAlterHook(RelationRelationId, reloid, 0); + /* Update can be done, so change reltablespace */ + SetRelationTableSpace(rel, newTableSpace, InvalidOid); - heap_freetuple(tuple); - - table_close(pg_class, RowExclusiveLock); + InvokeObjectPostAlterHook(RelationRelationId, RelationGetRelid(rel), 0); /* Make sure the reltablespace change is visible */ CommandCounterIncrement(); diff --git a/src/backend/commands/tablespace.c b/src/backend/commands/tablespace.c index 2c3b9050b2787..69ea155d50278 100644 --- a/src/backend/commands/tablespace.c +++ b/src/backend/commands/tablespace.c @@ -35,7 +35,7 @@ * and munge the system catalogs of the new database. * * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * @@ -420,6 +420,8 @@ DropTableSpace(DropTableSpaceStmt *stmt) Form_pg_tablespace spcform; ScanKeyData entry[1]; Oid tablespaceoid; + char *detail; + char *detail_log; /* * Find the target tuple @@ -468,6 +470,16 @@ DropTableSpace(DropTableSpaceStmt *stmt) aclcheck_error(ACLCHECK_NO_PRIV, OBJECT_TABLESPACE, tablespacename); + /* Check for pg_shdepend entries depending on this tablespace */ + if (checkSharedDependencies(TableSpaceRelationId, tablespaceoid, + &detail, &detail_log)) + ereport(ERROR, + (errcode(ERRCODE_DEPENDENT_OBJECTS_STILL_EXIST), + errmsg("tablespace \"%s\" cannot be dropped because some objects depend on it", + tablespacename), + errdetail_internal("%s", detail), + errdetail_log("%s", detail_log))); + /* DROP hook for the tablespace being removed */ InvokeObjectDropHook(TableSpaceRelationId, tablespaceoid, 0); diff --git a/src/backend/commands/trigger.c b/src/backend/commands/trigger.c index c336b238aac05..2d687f6dfb6bb 100644 --- a/src/backend/commands/trigger.c +++ b/src/backend/commands/trigger.c @@ -3,7 +3,7 @@ * trigger.c * PostgreSQL TRIGGERs support code. * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION @@ -71,10 +71,8 @@ int SessionReplicationRole = SESSION_REPLICATION_ROLE_ORIGIN; static int MyTriggerDepth = 0; /* - * Note that similar macros also exist in executor/execMain.c. There does not - * appear to be any good header to put them into, given the structures that - * they use, so we let them be duplicated. Be sure to update all if one needs - * to be changed, however. + * The authoritative version of this macro is in executor/execMain.c. Be sure + * to keep everything in sync. */ #define GetAllUpdatedColumns(relinfo, estate) \ (bms_union(exec_rt_fetch((relinfo)->ri_RangeTableIndex, estate)->updatedCols, \ @@ -2801,16 +2799,6 @@ ExecBRUpdateTriggers(EState *estate, EPQState *epqstate, { ExecForceStoreHeapTuple(newtuple, newslot, false); - if (trigger->tgisclone && - !ExecPartitionCheck(relinfo, newslot, estate, false)) - ereport(ERROR, - (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), - errmsg("moving row to another partition during a BEFORE trigger is not supported"), - errdetail("Before executing trigger \"%s\", the row was to be in partition \"%s.%s\".", - trigger->tgname, - get_namespace_name(RelationGetNamespace(relinfo->ri_RelationDesc)), - RelationGetRelationName(relinfo->ri_RelationDesc)))); - /* * If the tuple returned by the trigger / being stored, is the old * row version, and the heap tuple passed to the trigger was diff --git a/src/backend/commands/tsearchcmds.c b/src/backend/commands/tsearchcmds.c index f5d1d137b8141..e06fb32b3d1ec 100644 --- a/src/backend/commands/tsearchcmds.c +++ b/src/backend/commands/tsearchcmds.c @@ -4,7 +4,7 @@ * * Routines for tsearch manipulation commands * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * diff --git a/src/backend/commands/typecmds.c b/src/backend/commands/typecmds.c index a0a8695b1bd08..76218fb47ed4a 100644 --- a/src/backend/commands/typecmds.c +++ b/src/backend/commands/typecmds.c @@ -3,7 +3,7 @@ * typecmds.c * Routines for SQL commands that manipulate types (and domains). * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * diff --git a/src/backend/commands/user.c b/src/backend/commands/user.c index 0e6800bf3e4f3..ed243e3d1415f 100644 --- a/src/backend/commands/user.c +++ b/src/backend/commands/user.c @@ -3,7 +3,7 @@ * user.c * Commands for manipulating roles (formerly called users). * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * src/backend/commands/user.c diff --git a/src/backend/commands/vacuum.c b/src/backend/commands/vacuum.c index 98270a10495d7..462f9a0f8225c 100644 --- a/src/backend/commands/vacuum.c +++ b/src/backend/commands/vacuum.c @@ -9,7 +9,7 @@ * in cluster.c. * * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * @@ -530,7 +530,7 @@ vacuum(List *relations, VacuumParams *params, * ANALYZE. */ bool -vacuum_is_relation_owner(Oid relid, Form_pg_class reltuple, int options) +vacuum_is_relation_owner(Oid relid, Form_pg_class reltuple, bits32 options) { char *relname; @@ -604,7 +604,7 @@ vacuum_is_relation_owner(Oid relid, Form_pg_class reltuple, int options) * or locked, a log is emitted if possible. */ Relation -vacuum_open_relation(Oid relid, RangeVar *relation, int options, +vacuum_open_relation(Oid relid, RangeVar *relation, bits32 options, bool verbose, LOCKMODE lmode) { Relation onerel; @@ -1916,17 +1916,17 @@ vacuum_rel(Oid relid, RangeVar *relation, VacuumParams *params) */ if (params->options & VACOPT_FULL) { - int cluster_options = 0; + ClusterParams cluster_params = {0}; /* close relation before vacuuming, but hold lock until commit */ relation_close(onerel, NoLock); onerel = NULL; if ((params->options & VACOPT_VERBOSE) != 0) - cluster_options |= CLUOPT_VERBOSE; + cluster_params.options |= CLUOPT_VERBOSE; /* VACUUM FULL is now a variant of CLUSTER; see cluster.c */ - cluster_rel(relid, InvalidOid, cluster_options); + cluster_rel(relid, InvalidOid, &cluster_params); } else table_relation_vacuum(onerel, params, vac_strategy); diff --git a/src/backend/commands/variable.c b/src/backend/commands/variable.c index 484f7ea2c0e6f..c5cf08b423780 100644 --- a/src/backend/commands/variable.c +++ b/src/backend/commands/variable.c @@ -4,7 +4,7 @@ * Routines for handling specialized SET variables. * * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * diff --git a/src/backend/commands/view.c b/src/backend/commands/view.c index 6e65103febc60..f2642dba6c982 100644 --- a/src/backend/commands/view.c +++ b/src/backend/commands/view.c @@ -3,7 +3,7 @@ * view.c * use rewrite rules to construct views * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * diff --git a/src/backend/executor/execAmi.c b/src/backend/executor/execAmi.c index 0c10f1d35c2b4..23bdb53cd1037 100644 --- a/src/backend/executor/execAmi.c +++ b/src/backend/executor/execAmi.c @@ -3,7 +3,7 @@ * execAmi.c * miscellaneous executor access method routines * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * src/backend/executor/execAmi.c diff --git a/src/backend/executor/execCurrent.c b/src/backend/executor/execCurrent.c index f89319fcd89b8..33221a4d6ce1a 100644 --- a/src/backend/executor/execCurrent.c +++ b/src/backend/executor/execCurrent.c @@ -3,7 +3,7 @@ * execCurrent.c * executor support for WHERE CURRENT OF cursor * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * src/backend/executor/execCurrent.c @@ -299,6 +299,10 @@ fetch_cursor_param_value(ExprContext *econtext, int paramId) * Search through a PlanState tree for a scan node on the specified table. * Return NULL if not found or multiple candidates. * + * CAUTION: this function is not charged simply with finding some candidate + * scan, but with ensuring that that scan returned the plan tree's current + * output row. That's why we must reject multiple-match cases. + * * If a candidate is found, set *pending_rescan to true if that candidate * or any node above it has a pending rescan action, i.e. chgParam != NULL. * That indicates that we shouldn't consider the node to be positioned on a @@ -317,7 +321,14 @@ search_plan_tree(PlanState *node, Oid table_oid, switch (nodeTag(node)) { /* - * Relation scan nodes can all be treated alike + * Relation scan nodes can all be treated alike: check to see if + * they are scanning the specified table. + * + * ForeignScan and CustomScan might not have a currentRelation, in + * which case we just ignore them. (We dare not descend to any + * child plan nodes they might have, since we do not know the + * relationship of such a node's current output tuple to the + * children's current outputs.) */ case T_SeqScanState: case T_SampleScanState: @@ -330,14 +341,33 @@ search_plan_tree(PlanState *node, Oid table_oid, { ScanState *sstate = (ScanState *) node; - if (RelationGetRelid(sstate->ss_currentRelation) == table_oid) + if (sstate->ss_currentRelation && + RelationGetRelid(sstate->ss_currentRelation) == table_oid) result = sstate; break; } /* - * For Append, we must look through the members; watch out for - * multiple matches (possible if it was from UNION ALL) + * For Append, we can check each input node. It is safe to + * descend to the inputs because only the input that resulted in + * the Append's current output node could be positioned on a tuple + * at all; the other inputs are either at EOF or not yet started. + * Hence, if the desired table is scanned by some + * currently-inactive input node, we will find that node but then + * our caller will realize that it didn't emit the tuple of + * interest. + * + * We do need to watch out for multiple matches (possible if + * Append was from UNION ALL rather than an inheritance tree). + * + * Note: we can NOT descend through MergeAppend similarly, since + * its inputs are likely all active, and we don't know which one + * returned the current output tuple. (Perhaps that could be + * fixed if we were to let this code know more about MergeAppend's + * internal state, but it does not seem worth the trouble. Users + * should not expect plans for ORDER BY queries to be considered + * simply-updatable, since they won't be if the sorting is + * implemented by a Sort node.) */ case T_AppendState: { @@ -359,29 +389,6 @@ search_plan_tree(PlanState *node, Oid table_oid, break; } - /* - * Similarly for MergeAppend - */ - case T_MergeAppendState: - { - MergeAppendState *mstate = (MergeAppendState *) node; - int i; - - for (i = 0; i < mstate->ms_nplans; i++) - { - ScanState *elem = search_plan_tree(mstate->mergeplans[i], - table_oid, - pending_rescan); - - if (!elem) - continue; - if (result) - return NULL; /* multiple matches */ - result = elem; - } - break; - } - /* * Result and Limit can be descended through (these are safe * because they always return their input's current row) diff --git a/src/backend/executor/execExpr.c b/src/backend/executor/execExpr.c index 5c3210a9ccadd..2e463f5499052 100644 --- a/src/backend/executor/execExpr.c +++ b/src/backend/executor/execExpr.c @@ -19,7 +19,7 @@ * and "Expression Evaluation" sections. * * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * @@ -3067,8 +3067,10 @@ ExecBuildAggTrans(AggState *aggstate, AggStatePerPhase phase, scratch.resnull = &trans_fcinfo->args[argno + 1].isnull; ExprEvalPushStep(state, &scratch); - adjust_bailout = lappend_int(adjust_bailout, - state->steps_len - 1); + /* don't add an adjustment unless the function is strict */ + if (pertrans->deserialfn.fn_strict) + adjust_bailout = lappend_int(adjust_bailout, + state->steps_len - 1); /* restore normal settings of scratch fields */ scratch.resvalue = &state->resvalue; diff --git a/src/backend/executor/execExprInterp.c b/src/backend/executor/execExprInterp.c index 6b9fc38134b8c..6308286d8c35c 100644 --- a/src/backend/executor/execExprInterp.c +++ b/src/backend/executor/execExprInterp.c @@ -46,7 +46,7 @@ * exported rather than being "static" in this file.) * * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION diff --git a/src/backend/executor/execGrouping.c b/src/backend/executor/execGrouping.c index 90d04f9228aa4..5fd0b26cbc16b 100644 --- a/src/backend/executor/execGrouping.c +++ b/src/backend/executor/execGrouping.c @@ -3,7 +3,7 @@ * execGrouping.c * executor utility routines for grouping, hashing, and aggregation * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * diff --git a/src/backend/executor/execIndexing.c b/src/backend/executor/execIndexing.c index c6b5bcba7b47f..1f0fe145ce8eb 100644 --- a/src/backend/executor/execIndexing.c +++ b/src/backend/executor/execIndexing.c @@ -95,7 +95,7 @@ * with the higher XID backs out. * * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * @@ -124,6 +124,15 @@ typedef enum CEOUC_LIVELOCK_PREVENTING_WAIT } CEOUC_WAIT_MODE; +/* + * The authoritative version of these macro are in executor/execMain.c. Be + * sure to keep everything in sync. + */ +#define GetUpdatedColumns(relinfo, estate) \ + (exec_rt_fetch((relinfo)->ri_RangeTableIndex, estate)->updatedCols) +#define GetExtraUpdatedColumns(relinfo, estate) \ + (exec_rt_fetch((relinfo)->ri_RangeTableIndex, estate)->extraUpdatedCols) + static bool check_exclusion_or_unique_constraint(Relation heap, Relation index, IndexInfo *indexInfo, ItemPointer tupleid, @@ -136,6 +145,11 @@ static bool check_exclusion_or_unique_constraint(Relation heap, Relation index, static bool index_recheck_constraint(Relation index, Oid *constr_procs, Datum *existing_values, bool *existing_isnull, Datum *new_values); +static bool index_unchanged_by_update(ResultRelInfo *resultRelInfo, + EState *estate, IndexInfo *indexInfo, + Relation indexRelation); +static bool index_expression_changed_walker(Node *node, + Bitmapset *allUpdatedCols); /* ---------------------------------------------------------------- * ExecOpenIndices @@ -254,6 +268,16 @@ ExecCloseIndices(ResultRelInfo *resultRelInfo) * into all the relations indexing the result relation * when a heap tuple is inserted into the result relation. * + * When 'update' is true, executor is performing an UPDATE + * that could not use an optimization like heapam's HOT (in + * more general terms a call to table_tuple_update() took + * place and set 'update_indexes' to true). Receiving this + * hint makes us consider if we should pass down the + * 'indexUnchanged' hint in turn. That's something that we + * figure out for each index_insert() call iff 'update' is + * true. (When 'update' is false we already know not to pass + * the hint to any index.) + * * Unique and exclusion constraints are enforced at the same * time. This returns a list of index OIDs for any unique or * exclusion constraints that are deferred and that had @@ -263,16 +287,13 @@ ExecCloseIndices(ResultRelInfo *resultRelInfo) * * If 'arbiterIndexes' is nonempty, noDupErr applies only to * those indexes. NIL means noDupErr applies to all indexes. - * - * CAUTION: this must not be called for a HOT update. - * We can't defend against that here for lack of info. - * Should we change the API to make it safer? * ---------------------------------------------------------------- */ List * ExecInsertIndexTuples(ResultRelInfo *resultRelInfo, TupleTableSlot *slot, EState *estate, + bool update, bool noDupErr, bool *specConflict, List *arbiterIndexes) @@ -319,6 +340,7 @@ ExecInsertIndexTuples(ResultRelInfo *resultRelInfo, IndexInfo *indexInfo; bool applyNoDupErr; IndexUniqueCheck checkUnique; + bool indexUnchanged; bool satisfiesConstraint; if (indexRelation == NULL) @@ -389,6 +411,16 @@ ExecInsertIndexTuples(ResultRelInfo *resultRelInfo, else checkUnique = UNIQUE_CHECK_PARTIAL; + /* + * There's definitely going to be an index_insert() call for this + * index. If we're being called as part of an UPDATE statement, + * consider if the 'indexUnchanged' = true hint should be passed. + */ + indexUnchanged = update && index_unchanged_by_update(resultRelInfo, + estate, + indexInfo, + indexRelation); + satisfiesConstraint = index_insert(indexRelation, /* index relation */ values, /* array of index Datums */ @@ -396,6 +428,7 @@ ExecInsertIndexTuples(ResultRelInfo *resultRelInfo, tupleid, /* tid of heap tuple */ heapRelation, /* heap relation */ checkUnique, /* type of uniqueness check to do */ + indexUnchanged, /* UPDATE without logical change? */ indexInfo); /* index AM may need this */ /* @@ -899,3 +932,122 @@ index_recheck_constraint(Relation index, Oid *constr_procs, return true; } + +/* + * Check if ExecInsertIndexTuples() should pass indexUnchanged hint. + * + * When the executor performs an UPDATE that requires a new round of index + * tuples, determine if we should pass 'indexUnchanged' = true hint for one + * single index. + */ +static bool +index_unchanged_by_update(ResultRelInfo *resultRelInfo, EState *estate, + IndexInfo *indexInfo, Relation indexRelation) +{ + Bitmapset *updatedCols = GetUpdatedColumns(resultRelInfo, estate); + Bitmapset *extraUpdatedCols = GetExtraUpdatedColumns(resultRelInfo, estate); + Bitmapset *allUpdatedCols; + bool hasexpression = false; + List *idxExprs; + + /* + * Check for indexed attribute overlap with updated columns. + * + * Only do this for key columns. A change to a non-key column within an + * INCLUDE index should not be counted here. Non-key column values are + * opaque payload state to the index AM, a little like an extra table TID. + */ + for (int attr = 0; attr < indexInfo->ii_NumIndexKeyAttrs; attr++) + { + int keycol = indexInfo->ii_IndexAttrNumbers[attr]; + + if (keycol <= 0) + { + /* + * Skip expressions for now, but remember to deal with them later + * on + */ + hasexpression = true; + continue; + } + + if (bms_is_member(keycol - FirstLowInvalidHeapAttributeNumber, + updatedCols) || + bms_is_member(keycol - FirstLowInvalidHeapAttributeNumber, + extraUpdatedCols)) + { + /* Changed key column -- don't hint for this index */ + return false; + } + } + + /* + * When we get this far and index has no expressions, return true so that + * index_insert() call will go on to pass 'indexUnchanged' = true hint. + * + * The _absence_ of an indexed key attribute that overlaps with updated + * attributes (in addition to the total absence of indexed expressions) + * shows that the index as a whole is logically unchanged by UPDATE. + */ + if (!hasexpression) + return true; + + /* + * Need to pass only one bms to expression_tree_walker helper function. + * Avoid allocating memory in common case where there are no extra cols. + */ + if (!extraUpdatedCols) + allUpdatedCols = updatedCols; + else + allUpdatedCols = bms_union(updatedCols, extraUpdatedCols); + + /* + * We have to work slightly harder in the event of indexed expressions, + * but the principle is the same as before: try to find columns (Vars, + * actually) that overlap with known-updated columns. + * + * If we find any matching Vars, don't pass hint for index. Otherwise + * pass hint. + */ + idxExprs = RelationGetIndexExpressions(indexRelation); + hasexpression = index_expression_changed_walker((Node *) idxExprs, + allUpdatedCols); + list_free(idxExprs); + if (extraUpdatedCols) + bms_free(allUpdatedCols); + + if (hasexpression) + return false; + + return true; +} + +/* + * Indexed expression helper for index_unchanged_by_update(). + * + * Returns true when Var that appears within allUpdatedCols located. + */ +static bool +index_expression_changed_walker(Node *node, Bitmapset *allUpdatedCols) +{ + if (node == NULL) + return false; + + if (IsA(node, Var)) + { + Var *var = (Var *) node; + + if (bms_is_member(var->varattno - FirstLowInvalidHeapAttributeNumber, + allUpdatedCols)) + { + /* Var was updated -- indicates that we should not hint */ + return true; + } + + /* Still haven't found a reason to not pass the hint */ + return false; + } + + return expression_tree_walker(node, index_expression_changed_walker, + (void *) allUpdatedCols); +} diff --git a/src/backend/executor/execJunk.c b/src/backend/executor/execJunk.c index 1a822ff24b382..970e1c325e33b 100644 --- a/src/backend/executor/execJunk.c +++ b/src/backend/executor/execJunk.c @@ -3,7 +3,7 @@ * execJunk.c * Junk attribute support stuff.... * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * diff --git a/src/backend/executor/execMain.c b/src/backend/executor/execMain.c index 7179f589f9491..f4dd47acc76ac 100644 --- a/src/backend/executor/execMain.c +++ b/src/backend/executor/execMain.c @@ -26,7 +26,7 @@ * before ExecutorEnd. This can be omitted only in case of EXPLAIN, * which should also omit ExecutorRun. * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * @@ -101,10 +101,10 @@ static char *ExecBuildSlotValueDescription(Oid reloid, static void EvalPlanQualStart(EPQState *epqstate, Plan *planTree); /* - * Note that GetAllUpdatedColumns() also exists in commands/trigger.c. There does - * not appear to be any good header to put it into, given the structures that - * it uses, so we let them be duplicated. Be sure to update both if one needs - * to be changed, however. + * Note that variants of these macros exists in commands/trigger.c and in + * execIndexing.c. There does not appear to be any good header to put it + * into, given the structures that it uses, so we let them be duplicated. Be + * sure to keep everything in sync. */ #define GetInsertedColumns(relinfo, estate) \ (exec_rt_fetch((relinfo)->ri_RangeTableIndex, estate)->insertedCols) diff --git a/src/backend/executor/execParallel.c b/src/backend/executor/execParallel.c index befde526910ae..c95d5170e415a 100644 --- a/src/backend/executor/execParallel.c +++ b/src/backend/executor/execParallel.c @@ -3,7 +3,7 @@ * execParallel.c * Support routines for parallel execution. * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * This file contains routines that are intended to support setting up, diff --git a/src/backend/executor/execPartition.c b/src/backend/executor/execPartition.c index 97bfc8bd71767..746cd1e9d7a1b 100644 --- a/src/backend/executor/execPartition.c +++ b/src/backend/executor/execPartition.c @@ -3,7 +3,7 @@ * execPartition.c * Support routines for partitioning. * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION @@ -993,6 +993,23 @@ ExecInitRoutingInfo(ModifyTableState *mtstate, partRelInfo->ri_FdwRoutine->BeginForeignInsert != NULL) partRelInfo->ri_FdwRoutine->BeginForeignInsert(mtstate, partRelInfo); + /* + * Determine if the FDW supports batch insert and determine the batch + * size (a FDW may support batching, but it may be disabled for the + * server/table or for this particular query). + * + * If the FDW does not support batching, we set the batch size to 1. + */ + if (partRelInfo->ri_FdwRoutine != NULL && + partRelInfo->ri_FdwRoutine->GetForeignModifyBatchSize && + partRelInfo->ri_FdwRoutine->ExecForeignBatchInsert) + partRelInfo->ri_BatchSize = + partRelInfo->ri_FdwRoutine->GetForeignModifyBatchSize(partRelInfo); + else + partRelInfo->ri_BatchSize = 1; + + Assert(partRelInfo->ri_BatchSize >= 1); + partRelInfo->ri_CopyMultiInsertBuffer = NULL; /* @@ -1306,16 +1323,14 @@ get_partition_for_tuple(PartitionDispatch pd, Datum *values, bool *isnull) { case PARTITION_STRATEGY_HASH: { - int greatest_modulus; uint64 rowHash; - greatest_modulus = get_hash_partition_greatest_modulus(boundinfo); rowHash = compute_partition_hash_value(key->partnatts, key->partsupfunc, key->partcollation, values, isnull); - part_index = boundinfo->indexes[rowHash % greatest_modulus]; + part_index = boundinfo->indexes[rowHash % boundinfo->nindexes]; } break; diff --git a/src/backend/executor/execProcnode.c b/src/backend/executor/execProcnode.c index 01b7b926bf705..414df50a0545e 100644 --- a/src/backend/executor/execProcnode.c +++ b/src/backend/executor/execProcnode.c @@ -7,7 +7,7 @@ * ExecProcNode, or ExecEndNode on its subnodes and do the appropriate * processing. * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * diff --git a/src/backend/executor/execReplication.c b/src/backend/executor/execReplication.c index 01d26881e770c..1e285e0349f46 100644 --- a/src/backend/executor/execReplication.c +++ b/src/backend/executor/execReplication.c @@ -3,7 +3,7 @@ * execReplication.c * miscellaneous executor routines for logical replication * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION @@ -444,8 +444,8 @@ ExecSimpleRelationInsert(ResultRelInfo *resultRelInfo, if (resultRelInfo->ri_NumIndices > 0) recheckIndexes = ExecInsertIndexTuples(resultRelInfo, - slot, estate, false, NULL, - NIL); + slot, estate, false, false, + NULL, NIL); /* AFTER ROW INSERT Triggers */ ExecARInsertTriggers(estate, resultRelInfo, slot, @@ -512,8 +512,8 @@ ExecSimpleRelationUpdate(ResultRelInfo *resultRelInfo, if (resultRelInfo->ri_NumIndices > 0 && update_indexes) recheckIndexes = ExecInsertIndexTuples(resultRelInfo, - slot, estate, false, NULL, - NIL); + slot, estate, true, false, + NULL, NIL); /* AFTER ROW UPDATE Triggers */ ExecARUpdateTriggers(estate, resultRelInfo, diff --git a/src/backend/executor/execSRF.c b/src/backend/executor/execSRF.c index b0ea72de68500..8aec3b549bdea 100644 --- a/src/backend/executor/execSRF.c +++ b/src/backend/executor/execSRF.c @@ -7,7 +7,7 @@ * common code for calling set-returning functions according to the * ReturnSetInfo API. * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * diff --git a/src/backend/executor/execScan.c b/src/backend/executor/execScan.c index 642805d90cdc4..69ab34573ec59 100644 --- a/src/backend/executor/execScan.c +++ b/src/backend/executor/execScan.c @@ -7,7 +7,7 @@ * stuff - checking the qualification and projecting the tuple * appropriately. * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * diff --git a/src/backend/executor/execTuples.c b/src/backend/executor/execTuples.c index 4c90ac5236fb2..73c35df9c9686 100644 --- a/src/backend/executor/execTuples.c +++ b/src/backend/executor/execTuples.c @@ -46,7 +46,7 @@ * to avoid physically constructing projection tuples in many cases. * * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * diff --git a/src/backend/executor/execUtils.c b/src/backend/executor/execUtils.c index 071a0007ebcd8..d84fbaded9619 100644 --- a/src/backend/executor/execUtils.c +++ b/src/backend/executor/execUtils.c @@ -3,7 +3,7 @@ * execUtils.c * miscellaneous executor utility routines * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * diff --git a/src/backend/executor/functions.c b/src/backend/executor/functions.c index ca8d637e73ab6..7bb752ace3ada 100644 --- a/src/backend/executor/functions.c +++ b/src/backend/executor/functions.c @@ -3,7 +3,7 @@ * functions.c * Execution of SQL-language functions * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * diff --git a/src/backend/executor/instrument.c b/src/backend/executor/instrument.c index fbedb5aaf6002..237e13361b5d0 100644 --- a/src/backend/executor/instrument.c +++ b/src/backend/executor/instrument.c @@ -4,7 +4,7 @@ * functions for instrumentation of plan execution * * - * Copyright (c) 2001-2020, PostgreSQL Global Development Group + * Copyright (c) 2001-2021, PostgreSQL Global Development Group * * IDENTIFICATION * src/backend/executor/instrument.c diff --git a/src/backend/executor/nodeAgg.c b/src/backend/executor/nodeAgg.c index da483268cf707..601b6dab03f1e 100644 --- a/src/backend/executor/nodeAgg.c +++ b/src/backend/executor/nodeAgg.c @@ -228,7 +228,7 @@ * to filter expressions having to be evaluated early, and allows to JIT * the entire expression into one native function. * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION diff --git a/src/backend/executor/nodeAppend.c b/src/backend/executor/nodeAppend.c index 88919e62faee0..15e4115bd6df4 100644 --- a/src/backend/executor/nodeAppend.c +++ b/src/backend/executor/nodeAppend.c @@ -3,7 +3,7 @@ * nodeAppend.c * routines to handle append nodes. * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * diff --git a/src/backend/executor/nodeBitmapAnd.c b/src/backend/executor/nodeBitmapAnd.c index 276b6f11a6505..a8d7b1e5eddcc 100644 --- a/src/backend/executor/nodeBitmapAnd.c +++ b/src/backend/executor/nodeBitmapAnd.c @@ -3,7 +3,7 @@ * nodeBitmapAnd.c * routines to handle BitmapAnd nodes. * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * diff --git a/src/backend/executor/nodeBitmapHeapscan.c b/src/backend/executor/nodeBitmapHeapscan.c index 5a5c410106ac6..2db1914affb3b 100644 --- a/src/backend/executor/nodeBitmapHeapscan.c +++ b/src/backend/executor/nodeBitmapHeapscan.c @@ -16,7 +16,7 @@ * required index qual conditions. * * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * diff --git a/src/backend/executor/nodeBitmapIndexscan.c b/src/backend/executor/nodeBitmapIndexscan.c index 81a120815734a..48c2036297cd6 100644 --- a/src/backend/executor/nodeBitmapIndexscan.c +++ b/src/backend/executor/nodeBitmapIndexscan.c @@ -3,7 +3,7 @@ * nodeBitmapIndexscan.c * Routines to support bitmapped index scans of relations * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * diff --git a/src/backend/executor/nodeBitmapOr.c b/src/backend/executor/nodeBitmapOr.c index 32fb72f04fbdf..4a8c01d04f706 100644 --- a/src/backend/executor/nodeBitmapOr.c +++ b/src/backend/executor/nodeBitmapOr.c @@ -3,7 +3,7 @@ * nodeBitmapOr.c * routines to handle BitmapOr nodes. * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * diff --git a/src/backend/executor/nodeCtescan.c b/src/backend/executor/nodeCtescan.c index 3de9b10c1f7b0..9c2b08d1d8838 100644 --- a/src/backend/executor/nodeCtescan.c +++ b/src/backend/executor/nodeCtescan.c @@ -3,7 +3,7 @@ * nodeCtescan.c * routines to handle CteScan nodes. * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * diff --git a/src/backend/executor/nodeCustom.c b/src/backend/executor/nodeCustom.c index cfa9e46e55f15..c82060e6d1a87 100644 --- a/src/backend/executor/nodeCustom.c +++ b/src/backend/executor/nodeCustom.c @@ -3,7 +3,7 @@ * nodeCustom.c * Routines to handle execution of custom scan node * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * ------------------------------------------------------------------------ diff --git a/src/backend/executor/nodeForeignscan.c b/src/backend/executor/nodeForeignscan.c index 0b20f94035edd..0969e53c3a44d 100644 --- a/src/backend/executor/nodeForeignscan.c +++ b/src/backend/executor/nodeForeignscan.c @@ -3,7 +3,7 @@ * nodeForeignscan.c * Routines to support scans of foreign tables * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * diff --git a/src/backend/executor/nodeFunctionscan.c b/src/backend/executor/nodeFunctionscan.c index ccb66ce1aab2b..b31b2b2886209 100644 --- a/src/backend/executor/nodeFunctionscan.c +++ b/src/backend/executor/nodeFunctionscan.c @@ -3,7 +3,7 @@ * nodeFunctionscan.c * Support routines for scanning RangeFunctions (functions in rangetable). * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * diff --git a/src/backend/executor/nodeGather.c b/src/backend/executor/nodeGather.c index a01b46af14802..9e1dc464cb097 100644 --- a/src/backend/executor/nodeGather.c +++ b/src/backend/executor/nodeGather.c @@ -3,7 +3,7 @@ * nodeGather.c * Support routines for scanning a plan via multiple workers. * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * A Gather executor launches parallel workers to run multiple copies of a diff --git a/src/backend/executor/nodeGatherMerge.c b/src/backend/executor/nodeGatherMerge.c index 47129344f3276..aa5743cebfc1a 100644 --- a/src/backend/executor/nodeGatherMerge.c +++ b/src/backend/executor/nodeGatherMerge.c @@ -3,7 +3,7 @@ * nodeGatherMerge.c * Scan a plan in multiple workers, and do order-preserving merge. * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION diff --git a/src/backend/executor/nodeGroup.c b/src/backend/executor/nodeGroup.c index c9a846df660a6..1721b2aae48ba 100644 --- a/src/backend/executor/nodeGroup.c +++ b/src/backend/executor/nodeGroup.c @@ -3,7 +3,7 @@ * nodeGroup.c * Routines to handle group nodes (used for queries with GROUP BY clause). * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * diff --git a/src/backend/executor/nodeHash.c b/src/backend/executor/nodeHash.c index ea69eeb2a1e4b..c5f2d1d22b16a 100644 --- a/src/backend/executor/nodeHash.c +++ b/src/backend/executor/nodeHash.c @@ -3,7 +3,7 @@ * nodeHash.c * Routines to hash relations for hashjoin * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * diff --git a/src/backend/executor/nodeHashjoin.c b/src/backend/executor/nodeHashjoin.c index 5532b91a71dca..510bdd39adc39 100644 --- a/src/backend/executor/nodeHashjoin.c +++ b/src/backend/executor/nodeHashjoin.c @@ -3,7 +3,7 @@ * nodeHashjoin.c * Routines to handle hash join nodes * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * diff --git a/src/backend/executor/nodeIncrementalSort.c b/src/backend/executor/nodeIncrementalSort.c index eb1c1326dea26..73e42d79451aa 100644 --- a/src/backend/executor/nodeIncrementalSort.c +++ b/src/backend/executor/nodeIncrementalSort.c @@ -3,7 +3,7 @@ * nodeIncrementalSort.c * Routines to handle incremental sorting of relations. * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION diff --git a/src/backend/executor/nodeIndexonlyscan.c b/src/backend/executor/nodeIndexonlyscan.c index 5617ac29e74cb..0754e28a9aac1 100644 --- a/src/backend/executor/nodeIndexonlyscan.c +++ b/src/backend/executor/nodeIndexonlyscan.c @@ -3,7 +3,7 @@ * nodeIndexonlyscan.c * Routines to support index-only scans * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * diff --git a/src/backend/executor/nodeIndexscan.c b/src/backend/executor/nodeIndexscan.c index d0a96a38e0160..2fffb1b4371eb 100644 --- a/src/backend/executor/nodeIndexscan.c +++ b/src/backend/executor/nodeIndexscan.c @@ -3,7 +3,7 @@ * nodeIndexscan.c * Routines to support indexed scans of relations * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * diff --git a/src/backend/executor/nodeLimit.c b/src/backend/executor/nodeLimit.c index c5896e579089f..128eb3e57814b 100644 --- a/src/backend/executor/nodeLimit.c +++ b/src/backend/executor/nodeLimit.c @@ -3,7 +3,7 @@ * nodeLimit.c * Routines to handle limiting of query results where appropriate * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * diff --git a/src/backend/executor/nodeLockRows.c b/src/backend/executor/nodeLockRows.c index 554c2a5a2c5d4..b2e5c30079e58 100644 --- a/src/backend/executor/nodeLockRows.c +++ b/src/backend/executor/nodeLockRows.c @@ -3,7 +3,7 @@ * nodeLockRows.c * Routines to handle FOR UPDATE/FOR SHARE row locking * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * diff --git a/src/backend/executor/nodeMaterial.c b/src/backend/executor/nodeMaterial.c index dd077f4323032..7c53f8e60f539 100644 --- a/src/backend/executor/nodeMaterial.c +++ b/src/backend/executor/nodeMaterial.c @@ -3,7 +3,7 @@ * nodeMaterial.c * Routines to handle materialization nodes. * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * diff --git a/src/backend/executor/nodeMergeAppend.c b/src/backend/executor/nodeMergeAppend.c index 70090a4906580..617bffb206271 100644 --- a/src/backend/executor/nodeMergeAppend.c +++ b/src/backend/executor/nodeMergeAppend.c @@ -3,7 +3,7 @@ * nodeMergeAppend.c * routines to handle MergeAppend nodes. * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * diff --git a/src/backend/executor/nodeMergejoin.c b/src/backend/executor/nodeMergejoin.c index c5658f03ced3b..b41454ab6d92f 100644 --- a/src/backend/executor/nodeMergejoin.c +++ b/src/backend/executor/nodeMergejoin.c @@ -3,7 +3,7 @@ * nodeMergejoin.c * routines supporting merge joins * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * diff --git a/src/backend/executor/nodeModifyTable.c b/src/backend/executor/nodeModifyTable.c index ab3d655e603b7..5d90337498371 100644 --- a/src/backend/executor/nodeModifyTable.c +++ b/src/backend/executor/nodeModifyTable.c @@ -3,7 +3,7 @@ * nodeModifyTable.c * routines to handle ModifyTable nodes. * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * @@ -58,6 +58,13 @@ #include "utils/rel.h" +static void ExecBatchInsert(ModifyTableState *mtstate, + ResultRelInfo *resultRelInfo, + TupleTableSlot **slots, + TupleTableSlot **planSlots, + int numSlots, + EState *estate, + bool canSetTag); static bool ExecOnConflictUpdate(ModifyTableState *mtstate, ResultRelInfo *resultRelInfo, ItemPointer conflictTid, @@ -389,6 +396,7 @@ ExecInsert(ModifyTableState *mtstate, ModifyTable *node = (ModifyTable *) mtstate->ps.plan; OnConflictAction onconflict = node->onConflictAction; PartitionTupleRouting *proute = mtstate->mt_partition_tuple_routing; + MemoryContext oldContext; /* * If the input result relation is a partitioned table, find the leaf @@ -441,6 +449,55 @@ ExecInsert(ModifyTableState *mtstate, ExecComputeStoredGenerated(resultRelInfo, estate, slot, CMD_INSERT); + /* + * If the FDW supports batching, and batching is requested, accumulate + * rows and insert them in batches. Otherwise use the per-row inserts. + */ + if (resultRelInfo->ri_BatchSize > 1) + { + /* + * If a certain number of tuples have already been accumulated, + * or a tuple has come for a different relation than that for + * the accumulated tuples, perform the batch insert + */ + if (resultRelInfo->ri_NumSlots == resultRelInfo->ri_BatchSize) + { + ExecBatchInsert(mtstate, resultRelInfo, + resultRelInfo->ri_Slots, + resultRelInfo->ri_PlanSlots, + resultRelInfo->ri_NumSlots, + estate, canSetTag); + resultRelInfo->ri_NumSlots = 0; + } + + oldContext = MemoryContextSwitchTo(estate->es_query_cxt); + + if (resultRelInfo->ri_Slots == NULL) + { + resultRelInfo->ri_Slots = palloc(sizeof(TupleTableSlot *) * + resultRelInfo->ri_BatchSize); + resultRelInfo->ri_PlanSlots = palloc(sizeof(TupleTableSlot *) * + resultRelInfo->ri_BatchSize); + } + + resultRelInfo->ri_Slots[resultRelInfo->ri_NumSlots] = + MakeSingleTupleTableSlot(slot->tts_tupleDescriptor, + slot->tts_ops); + ExecCopySlot(resultRelInfo->ri_Slots[resultRelInfo->ri_NumSlots], + slot); + resultRelInfo->ri_PlanSlots[resultRelInfo->ri_NumSlots] = + MakeSingleTupleTableSlot(planSlot->tts_tupleDescriptor, + planSlot->tts_ops); + ExecCopySlot(resultRelInfo->ri_PlanSlots[resultRelInfo->ri_NumSlots], + planSlot); + + resultRelInfo->ri_NumSlots++; + + MemoryContextSwitchTo(oldContext); + + return NULL; + } + /* * insert into foreign table: let the FDW do it */ @@ -599,7 +656,7 @@ ExecInsert(ModifyTableState *mtstate, /* insert index entries for tuple */ recheckIndexes = ExecInsertIndexTuples(resultRelInfo, - slot, estate, true, + slot, estate, false, true, &specConflict, arbiterIndexes); @@ -640,7 +697,7 @@ ExecInsert(ModifyTableState *mtstate, if (resultRelInfo->ri_NumIndices > 0) recheckIndexes = ExecInsertIndexTuples(resultRelInfo, slot, estate, false, - NULL, NIL); + false, NULL, NIL); } } @@ -698,6 +755,70 @@ ExecInsert(ModifyTableState *mtstate, return result; } +/* ---------------------------------------------------------------- + * ExecBatchInsert + * + * Insert multiple tuples in an efficient way. + * Currently, this handles inserting into a foreign table without + * RETURNING clause. + * ---------------------------------------------------------------- + */ +static void +ExecBatchInsert(ModifyTableState *mtstate, + ResultRelInfo *resultRelInfo, + TupleTableSlot **slots, + TupleTableSlot **planSlots, + int numSlots, + EState *estate, + bool canSetTag) +{ + int i; + int numInserted = numSlots; + TupleTableSlot *slot = NULL; + TupleTableSlot **rslots; + + /* + * insert into foreign table: let the FDW do it + */ + rslots = resultRelInfo->ri_FdwRoutine->ExecForeignBatchInsert(estate, + resultRelInfo, + slots, + planSlots, + &numInserted); + + for (i = 0; i < numInserted; i++) + { + slot = rslots[i]; + + /* + * AFTER ROW Triggers or RETURNING expressions might reference the + * tableoid column, so (re-)initialize tts_tableOid before evaluating + * them. + */ + slot->tts_tableOid = RelationGetRelid(resultRelInfo->ri_RelationDesc); + + /* AFTER ROW INSERT Triggers */ + ExecARInsertTriggers(estate, resultRelInfo, slot, NIL, + mtstate->mt_transition_capture); + + /* + * Check any WITH CHECK OPTION constraints from parent views. See the + * comment in ExecInsert. + */ + if (resultRelInfo->ri_WithCheckOptions != NIL) + ExecWithCheckOptions(WCO_VIEW_CHECK, resultRelInfo, slot, estate); + } + + if (canSetTag && numInserted > 0) + estate->es_processed += numInserted; + + for (i = 0; i < numSlots; i++) + { + ExecDropSingleTupleTableSlot(slots[i]); + ExecDropSingleTupleTableSlot(planSlots[i]); + } +} + /* ---------------------------------------------------------------- * ExecDelete * @@ -1511,7 +1632,7 @@ lreplace:; /* insert index entries for tuple if necessary */ if (resultRelInfo->ri_NumIndices > 0 && update_indexes) recheckIndexes = ExecInsertIndexTuples(resultRelInfo, - slot, estate, false, + slot, estate, true, false, NULL, NIL); } @@ -1937,6 +2058,9 @@ ExecModifyTable(PlanState *pstate) ItemPointerData tuple_ctid; HeapTupleData oldtupdata; HeapTuple oldtuple; + PartitionTupleRouting *proute = node->mt_partition_tuple_routing; + List *relinfos = NIL; + ListCell *lc; CHECK_FOR_INTERRUPTS(); @@ -2152,6 +2276,25 @@ ExecModifyTable(PlanState *pstate) return slot; } + /* + * Insert remaining tuples for batch insert. + */ + if (proute) + relinfos = estate->es_tuple_routing_result_relations; + else + relinfos = estate->es_opened_result_relations; + + foreach(lc, relinfos) + { + resultRelInfo = lfirst(lc); + if (resultRelInfo->ri_NumSlots > 0) + ExecBatchInsert(node, resultRelInfo, + resultRelInfo->ri_Slots, + resultRelInfo->ri_PlanSlots, + resultRelInfo->ri_NumSlots, + estate, node->canSetTag); + } + /* * We're done, but fire AFTER STATEMENT triggers before exiting. */ @@ -2650,6 +2793,34 @@ ExecInitModifyTable(ModifyTable *node, EState *estate, int eflags) } } + /* + * Determine if the FDW supports batch insert and determine the batch + * size (a FDW may support batching, but it may be disabled for the + * server/table). + * + * We only do this for INSERT, so that for UPDATE/DELETE the batch + * size remains set to 0. + */ + if (operation == CMD_INSERT) + { + resultRelInfo = mtstate->resultRelInfo; + for (i = 0; i < nplans; i++) + { + if (!resultRelInfo->ri_usesFdwDirectModify && + resultRelInfo->ri_FdwRoutine != NULL && + resultRelInfo->ri_FdwRoutine->GetForeignModifyBatchSize && + resultRelInfo->ri_FdwRoutine->ExecForeignBatchInsert) + resultRelInfo->ri_BatchSize = + resultRelInfo->ri_FdwRoutine->GetForeignModifyBatchSize(resultRelInfo); + else + resultRelInfo->ri_BatchSize = 1; + + Assert(resultRelInfo->ri_BatchSize >= 1); + + resultRelInfo++; + } + } + /* * Lastly, if this is not the primary (canSetTag) ModifyTable node, add it * to estate->es_auxmodifytables so that it will be run to completion by diff --git a/src/backend/executor/nodeNamedtuplestorescan.c b/src/backend/executor/nodeNamedtuplestorescan.c index 3135c7a27e18a..c0d1069f5985c 100644 --- a/src/backend/executor/nodeNamedtuplestorescan.c +++ b/src/backend/executor/nodeNamedtuplestorescan.c @@ -3,7 +3,7 @@ * nodeNamedtuplestorescan.c * routines to handle NamedTuplestoreScan nodes. * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * diff --git a/src/backend/executor/nodeNestloop.c b/src/backend/executor/nodeNestloop.c index b07c2996d4c93..41e5ecabc72f7 100644 --- a/src/backend/executor/nodeNestloop.c +++ b/src/backend/executor/nodeNestloop.c @@ -3,7 +3,7 @@ * nodeNestloop.c * routines to support nest-loop joins * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * diff --git a/src/backend/executor/nodeProjectSet.c b/src/backend/executor/nodeProjectSet.c index e8da6eaec9148..07be814d7b52d 100644 --- a/src/backend/executor/nodeProjectSet.c +++ b/src/backend/executor/nodeProjectSet.c @@ -11,7 +11,7 @@ * can't be inside more-complex expressions. If that'd otherwise be * the case, the planner adds additional ProjectSet nodes. * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION diff --git a/src/backend/executor/nodeRecursiveunion.c b/src/backend/executor/nodeRecursiveunion.c index 046242682f017..f9e91fdcfc2bd 100644 --- a/src/backend/executor/nodeRecursiveunion.c +++ b/src/backend/executor/nodeRecursiveunion.c @@ -7,7 +7,7 @@ * already seen. The hash key is computed from the grouping columns. * * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * diff --git a/src/backend/executor/nodeResult.c b/src/backend/executor/nodeResult.c index 99f91bca53ba2..1762b87c99936 100644 --- a/src/backend/executor/nodeResult.c +++ b/src/backend/executor/nodeResult.c @@ -34,7 +34,7 @@ * plan normally and pass back the results. * * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION diff --git a/src/backend/executor/nodeSamplescan.c b/src/backend/executor/nodeSamplescan.c index 4732c926f7ba7..44232d50d0a32 100644 --- a/src/backend/executor/nodeSamplescan.c +++ b/src/backend/executor/nodeSamplescan.c @@ -3,7 +3,7 @@ * nodeSamplescan.c * Support routines for sample scans of relations (table sampling). * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * diff --git a/src/backend/executor/nodeSeqscan.c b/src/backend/executor/nodeSeqscan.c index 1a7c1e919f366..066f9ae37e0b1 100644 --- a/src/backend/executor/nodeSeqscan.c +++ b/src/backend/executor/nodeSeqscan.c @@ -3,7 +3,7 @@ * nodeSeqscan.c * Support routines for sequential scans of relations. * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * diff --git a/src/backend/executor/nodeSetOp.c b/src/backend/executor/nodeSetOp.c index 8d4ccff19cc62..aad7ac0ea2a5e 100644 --- a/src/backend/executor/nodeSetOp.c +++ b/src/backend/executor/nodeSetOp.c @@ -32,7 +32,7 @@ * input group. * * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * diff --git a/src/backend/executor/nodeSort.c b/src/backend/executor/nodeSort.c index 9d2bfd7ed6b54..b99027e0d7f4e 100644 --- a/src/backend/executor/nodeSort.c +++ b/src/backend/executor/nodeSort.c @@ -3,7 +3,7 @@ * nodeSort.c * Routines to handle sorting of relations. * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * diff --git a/src/backend/executor/nodeSubplan.c b/src/backend/executor/nodeSubplan.c index 152c7ae7eb4ea..d46227e65c5a4 100644 --- a/src/backend/executor/nodeSubplan.c +++ b/src/backend/executor/nodeSubplan.c @@ -11,7 +11,7 @@ * subplans, which are re-evaluated every time their result is required. * * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION diff --git a/src/backend/executor/nodeSubqueryscan.c b/src/backend/executor/nodeSubqueryscan.c index e82c10981becd..c09f628ded128 100644 --- a/src/backend/executor/nodeSubqueryscan.c +++ b/src/backend/executor/nodeSubqueryscan.c @@ -7,7 +7,7 @@ * we need two sets of code. Ought to look at trying to unify the cases. * * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * diff --git a/src/backend/executor/nodeTableFuncscan.c b/src/backend/executor/nodeTableFuncscan.c index 06437a469148f..4d7eca4acedfa 100644 --- a/src/backend/executor/nodeTableFuncscan.c +++ b/src/backend/executor/nodeTableFuncscan.c @@ -3,7 +3,7 @@ * nodeTableFuncscan.c * Support routines for scanning RangeTableFunc (XMLTABLE like functions). * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * diff --git a/src/backend/executor/nodeTidscan.c b/src/backend/executor/nodeTidscan.c index 8049fdc64eac8..48c3737da2e58 100644 --- a/src/backend/executor/nodeTidscan.c +++ b/src/backend/executor/nodeTidscan.c @@ -3,7 +3,7 @@ * nodeTidscan.c * Routines to support direct tid scans of relations * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * diff --git a/src/backend/executor/nodeUnique.c b/src/backend/executor/nodeUnique.c index a40d619b0ae6f..9214d6fd28f28 100644 --- a/src/backend/executor/nodeUnique.c +++ b/src/backend/executor/nodeUnique.c @@ -11,7 +11,7 @@ * (It's debatable whether the savings justifies carrying two plan node * types, though.) * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * diff --git a/src/backend/executor/nodeValuesscan.c b/src/backend/executor/nodeValuesscan.c index 88661217e9c1e..5de1429fdaa21 100644 --- a/src/backend/executor/nodeValuesscan.c +++ b/src/backend/executor/nodeValuesscan.c @@ -4,7 +4,7 @@ * Support routines for scanning Values lists * ("VALUES (...), (...), ..." in rangetable). * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * diff --git a/src/backend/executor/nodeWindowAgg.c b/src/backend/executor/nodeWindowAgg.c index de58df3d3f733..f8ea9e96d86be 100644 --- a/src/backend/executor/nodeWindowAgg.c +++ b/src/backend/executor/nodeWindowAgg.c @@ -23,7 +23,7 @@ * aggregate function over all rows in the current row's window frame. * * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION diff --git a/src/backend/executor/nodeWorktablescan.c b/src/backend/executor/nodeWorktablescan.c index e8f5caf34649e..91d3bf376bc46 100644 --- a/src/backend/executor/nodeWorktablescan.c +++ b/src/backend/executor/nodeWorktablescan.c @@ -3,7 +3,7 @@ * nodeWorktablescan.c * routines to handle WorkTableScan nodes. * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * diff --git a/src/backend/executor/spi.c b/src/backend/executor/spi.c index 055ebb77ae2ae..00aa78ea53993 100644 --- a/src/backend/executor/spi.c +++ b/src/backend/executor/spi.c @@ -3,7 +3,7 @@ * spi.c * Server Programming Interface * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * @@ -51,6 +51,12 @@ static _SPI_connection *_SPI_current = NULL; static int _SPI_stack_depth = 0; /* allocated size of _SPI_stack */ static int _SPI_connected = -1; /* current stack index */ +typedef struct SPICallbackArg +{ + const char *query; + RawParseMode mode; +} SPICallbackArg; + static Portal SPI_cursor_open_internal(const char *name, SPIPlanPtr plan, ParamListInfo paramLI, bool read_only); @@ -60,8 +66,10 @@ static void _SPI_prepare_oneshot_plan(const char *src, SPIPlanPtr plan); static int _SPI_execute_plan(SPIPlanPtr plan, ParamListInfo paramLI, Snapshot snapshot, Snapshot crosscheck_snapshot, - bool read_only, bool fire_triggers, uint64 tcount, - DestReceiver *caller_dest); + bool read_only, bool no_snapshots, + bool fire_triggers, uint64 tcount, + DestReceiver *caller_dest, + ResourceOwner plan_owner); static ParamListInfo _SPI_convert_params(int nargs, Oid *argtypes, Datum *Values, const char *Nulls); @@ -508,13 +516,16 @@ SPI_execute(const char *src, bool read_only, long tcount) memset(&plan, 0, sizeof(_SPI_plan)); plan.magic = _SPI_PLAN_MAGIC; + plan.parse_mode = RAW_PARSE_DEFAULT; plan.cursor_options = CURSOR_OPT_PARALLEL_OK; _SPI_prepare_oneshot_plan(src, &plan); res = _SPI_execute_plan(&plan, NULL, InvalidSnapshot, InvalidSnapshot, - read_only, true, tcount, NULL); + read_only, false, + true, tcount, + NULL, NULL); _SPI_end_call(true); return res; @@ -527,6 +538,43 @@ SPI_exec(const char *src, long tcount) return SPI_execute(src, false, tcount); } +/* Parse, plan, and execute a query string, with extensible options */ +int +SPI_execute_extended(const char *src, + const SPIExecuteOptions *options) +{ + int res; + _SPI_plan plan; + + if (src == NULL || options == NULL) + return SPI_ERROR_ARGUMENT; + + res = _SPI_begin_call(true); + if (res < 0) + return res; + + memset(&plan, 0, sizeof(_SPI_plan)); + plan.magic = _SPI_PLAN_MAGIC; + plan.parse_mode = RAW_PARSE_DEFAULT; + plan.cursor_options = CURSOR_OPT_PARALLEL_OK; + if (options->params) + { + plan.parserSetup = options->params->parserSetup; + plan.parserSetupArg = options->params->parserSetupArg; + } + + _SPI_prepare_oneshot_plan(src, &plan); + + res = _SPI_execute_plan(&plan, options->params, + InvalidSnapshot, InvalidSnapshot, + options->read_only, options->no_snapshots, + true, options->tcount, + options->dest, options->owner); + + _SPI_end_call(true); + return res; +} + /* Execute a previously prepared plan */ int SPI_execute_plan(SPIPlanPtr plan, Datum *Values, const char *Nulls, @@ -548,7 +596,9 @@ SPI_execute_plan(SPIPlanPtr plan, Datum *Values, const char *Nulls, _SPI_convert_params(plan->nargs, plan->argtypes, Values, Nulls), InvalidSnapshot, InvalidSnapshot, - read_only, true, tcount, NULL); + read_only, false, + true, tcount, + NULL, NULL); _SPI_end_call(true); return res; @@ -563,37 +613,32 @@ SPI_execp(SPIPlanPtr plan, Datum *Values, const char *Nulls, long tcount) /* Execute a previously prepared plan */ int -SPI_execute_plan_with_paramlist(SPIPlanPtr plan, ParamListInfo params, - bool read_only, long tcount) +SPI_execute_plan_extended(SPIPlanPtr plan, + const SPIExecuteOptions *options) { int res; - if (plan == NULL || plan->magic != _SPI_PLAN_MAGIC || tcount < 0) + if (plan == NULL || plan->magic != _SPI_PLAN_MAGIC || options == NULL) return SPI_ERROR_ARGUMENT; res = _SPI_begin_call(true); if (res < 0) return res; - res = _SPI_execute_plan(plan, params, + res = _SPI_execute_plan(plan, options->params, InvalidSnapshot, InvalidSnapshot, - read_only, true, tcount, NULL); + options->read_only, options->no_snapshots, + true, options->tcount, + options->dest, options->owner); _SPI_end_call(true); return res; } -/* - * Execute a previously prepared plan. If dest isn't NULL, we send result - * tuples to the caller-supplied DestReceiver rather than through the usual - * SPI output arrangements. If dest is NULL this is equivalent to - * SPI_execute_plan_with_paramlist. - */ +/* Execute a previously prepared plan */ int -SPI_execute_plan_with_receiver(SPIPlanPtr plan, - ParamListInfo params, - bool read_only, long tcount, - DestReceiver *dest) +SPI_execute_plan_with_paramlist(SPIPlanPtr plan, ParamListInfo params, + bool read_only, long tcount) { int res; @@ -606,7 +651,9 @@ SPI_execute_plan_with_receiver(SPIPlanPtr plan, res = _SPI_execute_plan(plan, params, InvalidSnapshot, InvalidSnapshot, - read_only, true, tcount, dest); + read_only, false, + true, tcount, + NULL, NULL); _SPI_end_call(true); return res; @@ -647,7 +694,9 @@ SPI_execute_snapshot(SPIPlanPtr plan, _SPI_convert_params(plan->nargs, plan->argtypes, Values, Nulls), snapshot, crosscheck_snapshot, - read_only, fire_triggers, tcount, NULL); + read_only, false, + fire_triggers, tcount, + NULL, NULL); _SPI_end_call(true); return res; @@ -681,6 +730,7 @@ SPI_execute_with_args(const char *src, memset(&plan, 0, sizeof(_SPI_plan)); plan.magic = _SPI_PLAN_MAGIC; + plan.parse_mode = RAW_PARSE_DEFAULT; plan.cursor_options = CURSOR_OPT_PARALLEL_OK; plan.nargs = nargs; plan.argtypes = argtypes; @@ -694,50 +744,9 @@ SPI_execute_with_args(const char *src, res = _SPI_execute_plan(&plan, paramLI, InvalidSnapshot, InvalidSnapshot, - read_only, true, tcount, NULL); - - _SPI_end_call(true); - return res; -} - -/* - * SPI_execute_with_receiver -- plan and execute a query with arguments - * - * This is the same as SPI_execute_with_args except that parameters are - * supplied through a ParamListInfo, and (if dest isn't NULL) we send - * result tuples to the caller-supplied DestReceiver rather than through - * the usual SPI output arrangements. - */ -int -SPI_execute_with_receiver(const char *src, - ParamListInfo params, - bool read_only, long tcount, - DestReceiver *dest) -{ - int res; - _SPI_plan plan; - - if (src == NULL || tcount < 0) - return SPI_ERROR_ARGUMENT; - - res = _SPI_begin_call(true); - if (res < 0) - return res; - - memset(&plan, 0, sizeof(_SPI_plan)); - plan.magic = _SPI_PLAN_MAGIC; - plan.cursor_options = CURSOR_OPT_PARALLEL_OK; - if (params) - { - plan.parserSetup = params->parserSetup; - plan.parserSetupArg = params->parserSetupArg; - } - - _SPI_prepare_oneshot_plan(src, &plan); - - res = _SPI_execute_plan(&plan, params, - InvalidSnapshot, InvalidSnapshot, - read_only, true, tcount, dest); + read_only, false, + true, tcount, + NULL, NULL); _SPI_end_call(true); return res; @@ -768,6 +777,7 @@ SPI_prepare_cursor(const char *src, int nargs, Oid *argtypes, memset(&plan, 0, sizeof(_SPI_plan)); plan.magic = _SPI_PLAN_MAGIC; + plan.parse_mode = RAW_PARSE_DEFAULT; plan.cursor_options = cursorOptions; plan.nargs = nargs; plan.argtypes = argtypes; @@ -784,6 +794,42 @@ SPI_prepare_cursor(const char *src, int nargs, Oid *argtypes, return result; } +SPIPlanPtr +SPI_prepare_extended(const char *src, + const SPIPrepareOptions *options) +{ + _SPI_plan plan; + SPIPlanPtr result; + + if (src == NULL || options == NULL) + { + SPI_result = SPI_ERROR_ARGUMENT; + return NULL; + } + + SPI_result = _SPI_begin_call(true); + if (SPI_result < 0) + return NULL; + + memset(&plan, 0, sizeof(_SPI_plan)); + plan.magic = _SPI_PLAN_MAGIC; + plan.parse_mode = options->parseMode; + plan.cursor_options = options->cursorOptions; + plan.nargs = 0; + plan.argtypes = NULL; + plan.parserSetup = options->parserSetup; + plan.parserSetupArg = options->parserSetupArg; + + _SPI_prepare_plan(src, &plan); + + /* copy plan to procedure context */ + result = _SPI_make_plan_non_temp(&plan); + + _SPI_end_call(true); + + return result; +} + SPIPlanPtr SPI_prepare_params(const char *src, ParserSetupHook parserSetup, @@ -805,6 +851,7 @@ SPI_prepare_params(const char *src, memset(&plan, 0, sizeof(_SPI_plan)); plan.magic = _SPI_PLAN_MAGIC; + plan.parse_mode = RAW_PARSE_DEFAULT; plan.cursor_options = cursorOptions; plan.nargs = 0; plan.argtypes = NULL; @@ -1340,6 +1387,7 @@ SPI_cursor_open_with_args(const char *name, memset(&plan, 0, sizeof(_SPI_plan)); plan.magic = _SPI_PLAN_MAGIC; + plan.parse_mode = RAW_PARSE_DEFAULT; plan.cursor_options = cursorOptions; plan.nargs = nargs; plan.argtypes = argtypes; @@ -1376,42 +1424,38 @@ SPI_cursor_open_with_paramlist(const char *name, SPIPlanPtr plan, return SPI_cursor_open_internal(name, plan, params, read_only); } -/* - * SPI_cursor_parse_open_with_paramlist() - * - * Same as SPI_cursor_open_with_args except that parameters (if any) are passed - * as a ParamListInfo, which supports dynamic parameter set determination - */ +/* Parse a query and open it as a cursor */ Portal -SPI_cursor_parse_open_with_paramlist(const char *name, - const char *src, - ParamListInfo params, - bool read_only, int cursorOptions) +SPI_cursor_parse_open(const char *name, + const char *src, + const SPIParseOpenOptions *options) { Portal result; _SPI_plan plan; - if (src == NULL) - elog(ERROR, "SPI_cursor_parse_open_with_paramlist called with invalid arguments"); + if (src == NULL || options == NULL) + elog(ERROR, "SPI_cursor_parse_open called with invalid arguments"); SPI_result = _SPI_begin_call(true); if (SPI_result < 0) - elog(ERROR, "SPI_cursor_parse_open_with_paramlist called while not connected"); + elog(ERROR, "SPI_cursor_parse_open called while not connected"); memset(&plan, 0, sizeof(_SPI_plan)); plan.magic = _SPI_PLAN_MAGIC; - plan.cursor_options = cursorOptions; - if (params) + plan.parse_mode = RAW_PARSE_DEFAULT; + plan.cursor_options = options->cursorOptions; + if (options->params) { - plan.parserSetup = params->parserSetup; - plan.parserSetupArg = params->parserSetupArg; + plan.parserSetup = options->params->parserSetup; + plan.parserSetupArg = options->params->parserSetupArg; } _SPI_prepare_plan(src, &plan); /* We needn't copy the plan; SPI_cursor_open_internal will do so */ - result = SPI_cursor_open_internal(name, &plan, params, read_only); + result = SPI_cursor_open_internal(name, &plan, + options->params, options->read_only); /* And clean up */ _SPI_end_call(true); @@ -1436,6 +1480,7 @@ SPI_cursor_open_internal(const char *name, SPIPlanPtr plan, Snapshot snapshot; MemoryContext oldcontext; Portal portal; + SPICallbackArg spicallbackarg; ErrorContextCallback spierrcontext; /* @@ -1490,8 +1535,10 @@ SPI_cursor_open_internal(const char *name, SPIPlanPtr plan, * Setup error traceback support for ereport(), in case GetCachedPlan * throws an error. */ + spicallbackarg.query = plansource->query_string; + spicallbackarg.mode = plan->parse_mode; spierrcontext.callback = _SPI_error_callback; - spierrcontext.arg = unconstify(char *, plansource->query_string); + spierrcontext.arg = &spicallbackarg; spierrcontext.previous = error_context_stack; error_context_stack = &spierrcontext; @@ -1502,7 +1549,7 @@ SPI_cursor_open_internal(const char *name, SPIPlanPtr plan, */ /* Replan if needed, and increment plan refcount for portal */ - cplan = GetCachedPlan(plansource, paramLI, false, _SPI_current->queryEnv); + cplan = GetCachedPlan(plansource, paramLI, NULL, _SPI_current->queryEnv); stmt_list = cplan->stmt_list; if (!plan->saved) @@ -1516,7 +1563,7 @@ SPI_cursor_open_internal(const char *name, SPIPlanPtr plan, oldcontext = MemoryContextSwitchTo(portal->portalContext); stmt_list = copyObject(stmt_list); MemoryContextSwitchTo(oldcontext); - ReleaseCachedPlan(cplan, false); + ReleaseCachedPlan(cplan, NULL); cplan = NULL; /* portal shouldn't depend on cplan */ } @@ -1898,7 +1945,10 @@ SPI_plan_get_plan_sources(SPIPlanPtr plan) /* * SPI_plan_get_cached_plan --- get a SPI plan's generic CachedPlan, * if the SPI plan contains exactly one CachedPlanSource. If not, - * return NULL. Caller is responsible for doing ReleaseCachedPlan(). + * return NULL. + * + * The plan's refcount is incremented (and logged in CurrentResourceOwner, + * if it's a saved plan). Caller is responsible for doing ReleaseCachedPlan. * * This is exported so that PL/pgSQL can use it (this beats letting PL/pgSQL * look directly into the SPIPlan for itself). It's not documented in @@ -1909,6 +1959,7 @@ SPI_plan_get_cached_plan(SPIPlanPtr plan) { CachedPlanSource *plansource; CachedPlan *cplan; + SPICallbackArg spicallbackarg; ErrorContextCallback spierrcontext; Assert(plan->magic == _SPI_PLAN_MAGIC); @@ -1923,13 +1974,16 @@ SPI_plan_get_cached_plan(SPIPlanPtr plan) plansource = (CachedPlanSource *) linitial(plan->plancache_list); /* Setup error traceback support for ereport() */ + spicallbackarg.query = plansource->query_string; + spicallbackarg.mode = plan->parse_mode; spierrcontext.callback = _SPI_error_callback; - spierrcontext.arg = unconstify(char *, plansource->query_string); + spierrcontext.arg = &spicallbackarg; spierrcontext.previous = error_context_stack; error_context_stack = &spierrcontext; /* Get the generic plan for the query */ - cplan = GetCachedPlan(plansource, NULL, plan->saved, + cplan = GetCachedPlan(plansource, NULL, + plan->saved ? CurrentResourceOwner : NULL, _SPI_current->queryEnv); Assert(cplan == plansource->gplan); @@ -2036,7 +2090,8 @@ spi_printtup(TupleTableSlot *slot, DestReceiver *self) * Parse and analyze a querystring. * * At entry, plan->argtypes and plan->nargs (or alternatively plan->parserSetup - * and plan->parserSetupArg) must be valid, as must plan->cursor_options. + * and plan->parserSetupArg) must be valid, as must plan->parse_mode and + * plan->cursor_options. * * Results are stored into *plan (specifically, plan->plancache_list). * Note that the result data is all in CurrentMemoryContext or child contexts @@ -2050,20 +2105,23 @@ _SPI_prepare_plan(const char *src, SPIPlanPtr plan) List *raw_parsetree_list; List *plancache_list; ListCell *list_item; + SPICallbackArg spicallbackarg; ErrorContextCallback spierrcontext; /* * Setup error traceback support for ereport() */ + spicallbackarg.query = src; + spicallbackarg.mode = plan->parse_mode; spierrcontext.callback = _SPI_error_callback; - spierrcontext.arg = unconstify(char *, src); + spierrcontext.arg = &spicallbackarg; spierrcontext.previous = error_context_stack; error_context_stack = &spierrcontext; /* * Parse the request string into a list of raw parse trees. */ - raw_parsetree_list = pg_parse_query(src); + raw_parsetree_list = raw_parser(src, plan->parse_mode); /* * Do parse analysis and rule rewrite for each raw parsetree, storing the @@ -2155,20 +2213,23 @@ _SPI_prepare_oneshot_plan(const char *src, SPIPlanPtr plan) List *raw_parsetree_list; List *plancache_list; ListCell *list_item; + SPICallbackArg spicallbackarg; ErrorContextCallback spierrcontext; /* * Setup error traceback support for ereport() */ + spicallbackarg.query = src; + spicallbackarg.mode = plan->parse_mode; spierrcontext.callback = _SPI_error_callback; - spierrcontext.arg = unconstify(char *, src); + spierrcontext.arg = &spicallbackarg; spierrcontext.previous = error_context_stack; error_context_stack = &spierrcontext; /* * Parse the request string into a list of raw parse trees. */ - raw_parsetree_list = pg_parse_query(src); + raw_parsetree_list = raw_parser(src, plan->parse_mode); /* * Construct plancache entries, but don't do parse analysis yet. @@ -2203,22 +2264,27 @@ _SPI_prepare_oneshot_plan(const char *src, SPIPlanPtr plan) * behavior of taking a new snapshot for each query. * crosscheck_snapshot: for RI use, all others pass InvalidSnapshot * read_only: true for read-only execution (no CommandCounterIncrement) + * no_snapshots: true to skip snapshot management * fire_triggers: true to fire AFTER triggers at end of query (normal case); * false means any AFTER triggers are postponed to end of outer query * tcount: execution tuple-count limit, or 0 for none * caller_dest: DestReceiver to receive output, or NULL for normal SPI output + * plan_owner: ResourceOwner that will be used to hold refcount on plan; + * if NULL, CurrentResourceOwner is used (ignored for non-saved plan) */ static int _SPI_execute_plan(SPIPlanPtr plan, ParamListInfo paramLI, Snapshot snapshot, Snapshot crosscheck_snapshot, - bool read_only, bool fire_triggers, uint64 tcount, - DestReceiver *caller_dest) + bool read_only, bool no_snapshots, + bool fire_triggers, uint64 tcount, + DestReceiver *caller_dest, ResourceOwner plan_owner) { int my_res = 0; uint64 my_processed = 0; SPITupleTable *my_tuptable = NULL; int res = 0; bool pushed_active_snap = false; + SPICallbackArg spicallbackarg; ErrorContextCallback spierrcontext; CachedPlan *cplan = NULL; ListCell *lc1; @@ -2226,8 +2292,10 @@ _SPI_execute_plan(SPIPlanPtr plan, ParamListInfo paramLI, /* * Setup error traceback support for ereport() */ + spicallbackarg.query = NULL; /* we'll fill this below */ + spicallbackarg.mode = plan->parse_mode; spierrcontext.callback = _SPI_error_callback; - spierrcontext.arg = NULL; /* we'll fill this below */ + spierrcontext.arg = &spicallbackarg; spierrcontext.previous = error_context_stack; error_context_stack = &spierrcontext; @@ -2250,10 +2318,10 @@ _SPI_execute_plan(SPIPlanPtr plan, ParamListInfo paramLI, * In the first two cases, we can just push the snap onto the stack once * for the whole plan list. * - * But if the plan has no_snapshots set to true, then don't manage - * snapshots at all. The caller should then take care of that. + * But if no_snapshots is true, then don't manage snapshots at all here. + * The caller must then take care of that. */ - if (snapshot != InvalidSnapshot && !plan->no_snapshots) + if (snapshot != InvalidSnapshot && !no_snapshots) { if (read_only) { @@ -2268,13 +2336,22 @@ _SPI_execute_plan(SPIPlanPtr plan, ParamListInfo paramLI, } } + /* + * Ensure that we have a resource owner if plan is saved, and not if it + * isn't. + */ + if (!plan->saved) + plan_owner = NULL; + else if (plan_owner == NULL) + plan_owner = CurrentResourceOwner; + foreach(lc1, plan->plancache_list) { CachedPlanSource *plansource = (CachedPlanSource *) lfirst(lc1); List *stmt_list; ListCell *lc2; - spierrcontext.arg = unconstify(char *, plansource->query_string); + spicallbackarg.query = plansource->query_string; /* * If this is a one-shot plan, we still need to do parse analysis. @@ -2323,16 +2400,18 @@ _SPI_execute_plan(SPIPlanPtr plan, ParamListInfo paramLI, /* * Replan if needed, and increment plan refcount. If it's a saved - * plan, the refcount must be backed by the CurrentResourceOwner. + * plan, the refcount must be backed by the plan_owner. */ - cplan = GetCachedPlan(plansource, paramLI, plan->saved, _SPI_current->queryEnv); + cplan = GetCachedPlan(plansource, paramLI, + plan_owner, _SPI_current->queryEnv); + stmt_list = cplan->stmt_list; /* * In the default non-read-only case, get a new snapshot, replacing * any that we pushed in a previous cycle. */ - if (snapshot == InvalidSnapshot && !read_only && !plan->no_snapshots) + if (snapshot == InvalidSnapshot && !read_only && !no_snapshots) { if (pushed_active_snap) PopActiveSnapshot(); @@ -2385,7 +2464,7 @@ _SPI_execute_plan(SPIPlanPtr plan, ParamListInfo paramLI, * If not read-only mode, advance the command counter before each * command and update the snapshot. */ - if (!read_only && !plan->no_snapshots) + if (!read_only && !no_snapshots) { CommandCounterIncrement(); UpdateActiveSnapshotCommandId(); @@ -2434,7 +2513,7 @@ _SPI_execute_plan(SPIPlanPtr plan, ParamListInfo paramLI, * caller must be in a nonatomic SPI context and manage * snapshots itself. */ - if (_SPI_current->atomic || !plan->no_snapshots) + if (_SPI_current->atomic || !no_snapshots) context = PROCESS_UTILITY_QUERY; else context = PROCESS_UTILITY_QUERY_NONATOMIC; @@ -2521,7 +2600,7 @@ _SPI_execute_plan(SPIPlanPtr plan, ParamListInfo paramLI, } /* Done with this plan, so release refcount */ - ReleaseCachedPlan(cplan, plan->saved); + ReleaseCachedPlan(cplan, plan_owner); cplan = NULL; /* @@ -2541,7 +2620,7 @@ _SPI_execute_plan(SPIPlanPtr plan, ParamListInfo paramLI, /* We no longer need the cached plan refcount, if any */ if (cplan) - ReleaseCachedPlan(cplan, plan->saved); + ReleaseCachedPlan(cplan, plan_owner); /* * Pop the error context stack @@ -2678,7 +2757,8 @@ _SPI_pquery(QueryDesc *queryDesc, bool fire_triggers, uint64 tcount) static void _SPI_error_callback(void *arg) { - const char *query = (const char *) arg; + SPICallbackArg *carg = (SPICallbackArg *) arg; + const char *query = carg->query; int syntaxerrposition; if (query == NULL) /* in case arg wasn't set yet */ @@ -2696,7 +2776,23 @@ _SPI_error_callback(void *arg) internalerrquery(query); } else - errcontext("SQL statement \"%s\"", query); + { + /* Use the parse mode to decide how to describe the query */ + switch (carg->mode) + { + case RAW_PARSE_PLPGSQL_EXPR: + errcontext("SQL expression \"%s\"", query); + break; + case RAW_PARSE_PLPGSQL_ASSIGN1: + case RAW_PARSE_PLPGSQL_ASSIGN2: + case RAW_PARSE_PLPGSQL_ASSIGN3: + errcontext("PL/pgSQL assignment \"%s\"", query); + break; + default: + errcontext("SQL statement \"%s\"", query); + break; + } + } } /* @@ -2866,6 +2962,7 @@ _SPI_make_plan_non_temp(SPIPlanPtr plan) newplan = (SPIPlanPtr) palloc0(sizeof(_SPI_plan)); newplan->magic = _SPI_PLAN_MAGIC; newplan->plancxt = plancxt; + newplan->parse_mode = plan->parse_mode; newplan->cursor_options = plan->cursor_options; newplan->nargs = plan->nargs; if (plan->nargs > 0) @@ -2930,6 +3027,7 @@ _SPI_save_plan(SPIPlanPtr plan) newplan = (SPIPlanPtr) palloc0(sizeof(_SPI_plan)); newplan->magic = _SPI_PLAN_MAGIC; newplan->plancxt = plancxt; + newplan->parse_mode = plan->parse_mode; newplan->cursor_options = plan->cursor_options; newplan->nargs = plan->nargs; if (plan->nargs > 0) diff --git a/src/backend/executor/tqueue.c b/src/backend/executor/tqueue.c index 30a264ebea985..7af9fbe984891 100644 --- a/src/backend/executor/tqueue.c +++ b/src/backend/executor/tqueue.c @@ -8,7 +8,7 @@ * * A TupleQueueReader reads tuples from a shm_mq and returns the tuples. * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION diff --git a/src/backend/executor/tstoreReceiver.c b/src/backend/executor/tstoreReceiver.c index e8172bedd0192..e07664ff7085d 100644 --- a/src/backend/executor/tstoreReceiver.c +++ b/src/backend/executor/tstoreReceiver.c @@ -11,7 +11,7 @@ * Also optionally, we can apply a tuple conversion map before storing. * * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION diff --git a/src/backend/foreign/foreign.c b/src/backend/foreign/foreign.c index 3e79c852c1715..5564dc3a1e285 100644 --- a/src/backend/foreign/foreign.c +++ b/src/backend/foreign/foreign.c @@ -3,7 +3,7 @@ * foreign.c * support for foreign-data wrappers, servers and user mappings. * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * * IDENTIFICATION * src/backend/foreign/foreign.c diff --git a/src/backend/jit/jit.c b/src/backend/jit/jit.c index 5ca3f922fed79..2da300e000d48 100644 --- a/src/backend/jit/jit.c +++ b/src/backend/jit/jit.c @@ -8,7 +8,7 @@ * should end up here. * * - * Copyright (c) 2016-2020, PostgreSQL Global Development Group + * Copyright (c) 2016-2021, PostgreSQL Global Development Group * * IDENTIFICATION * src/backend/jit/jit.c diff --git a/src/backend/jit/llvm/llvmjit.c b/src/backend/jit/llvm/llvmjit.c index 9c4fc75f6567a..b0789a5fb8012 100644 --- a/src/backend/jit/llvm/llvmjit.c +++ b/src/backend/jit/llvm/llvmjit.c @@ -3,7 +3,7 @@ * llvmjit.c * Core part of the LLVM JIT provider. * - * Copyright (c) 2016-2020, PostgreSQL Global Development Group + * Copyright (c) 2016-2021, PostgreSQL Global Development Group * * IDENTIFICATION * src/backend/jit/llvm/llvmjit.c diff --git a/src/backend/jit/llvm/llvmjit_deform.c b/src/backend/jit/llvm/llvmjit_deform.c index 8a3064e6819bf..008cd617f6c1d 100644 --- a/src/backend/jit/llvm/llvmjit_deform.c +++ b/src/backend/jit/llvm/llvmjit_deform.c @@ -7,7 +7,7 @@ * knowledge of the tuple descriptor. Fixed column widths, NOT NULLness, etc * can be taken advantage of. * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION diff --git a/src/backend/jit/llvm/llvmjit_error.cpp b/src/backend/jit/llvm/llvmjit_error.cpp index b36c8d53261be..26bc828875ec7 100644 --- a/src/backend/jit/llvm/llvmjit_error.cpp +++ b/src/backend/jit/llvm/llvmjit_error.cpp @@ -6,7 +6,7 @@ * Unfortunately neither (re)setting the C++ new handler, nor the LLVM OOM * handler are exposed to C. Therefore this file wraps the necessary code. * - * Copyright (c) 2016-2020, PostgreSQL Global Development Group + * Copyright (c) 2016-2021, PostgreSQL Global Development Group * * IDENTIFICATION * src/backend/jit/llvm/llvmjit_error.cpp diff --git a/src/backend/jit/llvm/llvmjit_expr.c b/src/backend/jit/llvm/llvmjit_expr.c index 3aa08a974300e..42bf4754c526f 100644 --- a/src/backend/jit/llvm/llvmjit_expr.c +++ b/src/backend/jit/llvm/llvmjit_expr.c @@ -3,7 +3,7 @@ * llvmjit_expr.c * JIT compile expressions. * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * diff --git a/src/backend/jit/llvm/llvmjit_inline.cpp b/src/backend/jit/llvm/llvmjit_inline.cpp index 2617a461caddf..ea90fd5b24f4b 100644 --- a/src/backend/jit/llvm/llvmjit_inline.cpp +++ b/src/backend/jit/llvm/llvmjit_inline.cpp @@ -11,7 +11,7 @@ * so for all external functions, all the referenced functions (and * prerequisites) will be imported. * - * Copyright (c) 2016-2020, PostgreSQL Global Development Group + * Copyright (c) 2016-2021, PostgreSQL Global Development Group * * IDENTIFICATION * src/backend/lib/llvmjit/llvmjit_inline.cpp diff --git a/src/backend/jit/llvm/llvmjit_types.c b/src/backend/jit/llvm/llvmjit_types.c index fb7400e99d4de..8bc58b641cfbe 100644 --- a/src/backend/jit/llvm/llvmjit_types.c +++ b/src/backend/jit/llvm/llvmjit_types.c @@ -16,7 +16,7 @@ * bitcode. * * - * Copyright (c) 2016-2020, PostgreSQL Global Development Group + * Copyright (c) 2016-2021, PostgreSQL Global Development Group * * IDENTIFICATION * src/backend/jit/llvm/llvmjit_types.c diff --git a/src/backend/jit/llvm/llvmjit_wrap.cpp b/src/backend/jit/llvm/llvmjit_wrap.cpp index 37c006a1ff50f..692483d3b9384 100644 --- a/src/backend/jit/llvm/llvmjit_wrap.cpp +++ b/src/backend/jit/llvm/llvmjit_wrap.cpp @@ -3,7 +3,7 @@ * llvmjit_wrap.cpp * Parts of the LLVM interface not (yet) exposed to C. * - * Copyright (c) 2016-2020, PostgreSQL Global Development Group + * Copyright (c) 2016-2021, PostgreSQL Global Development Group * * IDENTIFICATION * src/backend/lib/llvm/llvmjit_wrap.cpp diff --git a/src/backend/lib/binaryheap.c b/src/backend/lib/binaryheap.c index a1b4f62a71e1a..d54e245299194 100644 --- a/src/backend/lib/binaryheap.c +++ b/src/backend/lib/binaryheap.c @@ -3,7 +3,7 @@ * binaryheap.c * A simple binary heap implementation * - * Portions Copyright (c) 2012-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 2012-2021, PostgreSQL Global Development Group * * IDENTIFICATION * src/backend/lib/binaryheap.c diff --git a/src/backend/lib/bipartite_match.c b/src/backend/lib/bipartite_match.c index 9372c0c83a1e9..baa1c139100be 100644 --- a/src/backend/lib/bipartite_match.c +++ b/src/backend/lib/bipartite_match.c @@ -7,7 +7,7 @@ * * https://en.wikipedia.org/w/index.php?title=Hopcroft%E2%80%93Karp_algorithm&oldid=593898016 * - * Copyright (c) 2015-2020, PostgreSQL Global Development Group + * Copyright (c) 2015-2021, PostgreSQL Global Development Group * * IDENTIFICATION * src/backend/lib/bipartite_match.c diff --git a/src/backend/lib/bloomfilter.c b/src/backend/lib/bloomfilter.c index f040e83c01629..daf2c40ebf549 100644 --- a/src/backend/lib/bloomfilter.c +++ b/src/backend/lib/bloomfilter.c @@ -24,7 +24,7 @@ * caller many authoritative lookups, such as expensive probes of a much larger * on-disk structure. * - * Copyright (c) 2018-2020, PostgreSQL Global Development Group + * Copyright (c) 2018-2021, PostgreSQL Global Development Group * * IDENTIFICATION * src/backend/lib/bloomfilter.c diff --git a/src/backend/lib/dshash.c b/src/backend/lib/dshash.c index 78ccf03217fe1..e0c763be3261a 100644 --- a/src/backend/lib/dshash.c +++ b/src/backend/lib/dshash.c @@ -20,7 +20,7 @@ * Future versions may support iterators and incremental resizing; for now * the implementation is minimalist. * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION diff --git a/src/backend/lib/hyperloglog.c b/src/backend/lib/hyperloglog.c index 351fed8186fb2..f4e02410adb97 100644 --- a/src/backend/lib/hyperloglog.c +++ b/src/backend/lib/hyperloglog.c @@ -3,7 +3,7 @@ * hyperloglog.c * HyperLogLog cardinality estimator * - * Portions Copyright (c) 2014-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 2014-2021, PostgreSQL Global Development Group * * Based on Hideaki Ohno's C++ implementation. This is probably not ideally * suited to estimating the cardinality of very large sets; in particular, we diff --git a/src/backend/lib/ilist.c b/src/backend/lib/ilist.c index 9b02d54607602..e9a07c14f7b5b 100644 --- a/src/backend/lib/ilist.c +++ b/src/backend/lib/ilist.c @@ -3,7 +3,7 @@ * ilist.c * support for integrated/inline doubly- and singly- linked lists * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * diff --git a/src/backend/lib/integerset.c b/src/backend/lib/integerset.c index 069a35fb23cd5..278a91bdbf823 100644 --- a/src/backend/lib/integerset.c +++ b/src/backend/lib/integerset.c @@ -61,7 +61,7 @@ * (https://doi.org/10.1002/spe.948) * * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION diff --git a/src/backend/lib/knapsack.c b/src/backend/lib/knapsack.c index 8ab734b445dfe..50c84b4aed18c 100644 --- a/src/backend/lib/knapsack.c +++ b/src/backend/lib/knapsack.c @@ -15,7 +15,7 @@ * allows approximate solutions in polynomial time (the general case of the * exact problem is NP-hard). * - * Copyright (c) 2017-2020, PostgreSQL Global Development Group + * Copyright (c) 2017-2021, PostgreSQL Global Development Group * * IDENTIFICATION * src/backend/lib/knapsack.c diff --git a/src/backend/lib/pairingheap.c b/src/backend/lib/pairingheap.c index 1e45729fc7501..bed3d2efb499d 100644 --- a/src/backend/lib/pairingheap.c +++ b/src/backend/lib/pairingheap.c @@ -14,7 +14,7 @@ * The pairing heap: a new form of self-adjusting heap. * Algorithmica 1, 1 (January 1986), pages 111-129. DOI: 10.1007/BF01840439 * - * Portions Copyright (c) 2012-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 2012-2021, PostgreSQL Global Development Group * * IDENTIFICATION * src/backend/lib/pairingheap.c diff --git a/src/backend/lib/rbtree.c b/src/backend/lib/rbtree.c index 28681b8f6118e..536df1f7715b1 100644 --- a/src/backend/lib/rbtree.c +++ b/src/backend/lib/rbtree.c @@ -17,7 +17,7 @@ * longest path from root to leaf is only about twice as long as the shortest, * so lookups are guaranteed to run in O(lg n) time. * - * Copyright (c) 2009-2020, PostgreSQL Global Development Group + * Copyright (c) 2009-2021, PostgreSQL Global Development Group * * IDENTIFICATION * src/backend/lib/rbtree.c diff --git a/src/backend/libpq/Makefile b/src/backend/libpq/Makefile index efc5ef760aa93..8d1d16b0fc54b 100644 --- a/src/backend/libpq/Makefile +++ b/src/backend/libpq/Makefile @@ -28,7 +28,7 @@ OBJS = \ pqmq.o \ pqsignal.o -ifeq ($(with_openssl),yes) +ifeq ($(with_ssl),openssl) OBJS += be-secure-openssl.o endif diff --git a/src/backend/libpq/auth-scram.c b/src/backend/libpq/auth-scram.c index 6879a81618326..8d857f39df5a0 100644 --- a/src/backend/libpq/auth-scram.c +++ b/src/backend/libpq/auth-scram.c @@ -80,7 +80,7 @@ * general, after logging in, but let's do what we can here. * * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * src/backend/libpq/auth-scram.c diff --git a/src/backend/libpq/auth.c b/src/backend/libpq/auth.c index 515ae95fe109a..545635f41a916 100644 --- a/src/backend/libpq/auth.c +++ b/src/backend/libpq/auth.c @@ -3,7 +3,7 @@ * auth.c * Routines to handle network authentication * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * @@ -1054,29 +1054,18 @@ pg_GSS_recvauth(Port *port) (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("GSSAPI is not supported in protocol version 2"))); - if (pg_krb_server_keyfile && strlen(pg_krb_server_keyfile) > 0) + /* + * Use the configured keytab, if there is one. Unfortunately, Heimdal + * doesn't support the cred store extensions, so use the env var. + */ + if (pg_krb_server_keyfile != NULL && pg_krb_server_keyfile[0] != '\0') { - /* - * Set default Kerberos keytab file for the Krb5 mechanism. - * - * setenv("KRB5_KTNAME", pg_krb_server_keyfile, 0); except setenv() - * not always available. - */ - if (getenv("KRB5_KTNAME") == NULL) + if (setenv("KRB5_KTNAME", pg_krb_server_keyfile, 1) != 0) { - size_t kt_len = strlen(pg_krb_server_keyfile) + 14; - char *kt_path = malloc(kt_len); - - if (!kt_path || - snprintf(kt_path, kt_len, "KRB5_KTNAME=%s", - pg_krb_server_keyfile) != kt_len - 2 || - putenv(kt_path) != 0) - { - ereport(LOG, - (errcode(ERRCODE_OUT_OF_MEMORY), - errmsg("out of memory"))); - return STATUS_ERROR; - } + /* The only likely failure cause is OOM, so use that errcode */ + ereport(FATAL, + (errcode(ERRCODE_OUT_OF_MEMORY), + errmsg("could not set environment: %m"))); } } diff --git a/src/backend/libpq/be-fsstubs.c b/src/backend/libpq/be-fsstubs.c index 6073540d8d9ad..70b4111c14bec 100644 --- a/src/backend/libpq/be-fsstubs.c +++ b/src/backend/libpq/be-fsstubs.c @@ -3,7 +3,7 @@ * be-fsstubs.c * Builtin functions for open/close/read/write operations on large objects * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * diff --git a/src/backend/libpq/be-gssapi-common.c b/src/backend/libpq/be-gssapi-common.c index be5d051c2027a..cb2df0bfb3dff 100644 --- a/src/backend/libpq/be-gssapi-common.c +++ b/src/backend/libpq/be-gssapi-common.c @@ -3,7 +3,7 @@ * be-gssapi-common.c * Common code for GSSAPI authentication and encryption * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION diff --git a/src/backend/libpq/be-secure-common.c b/src/backend/libpq/be-secure-common.c index 94cdf4c8874da..a212308666a9a 100644 --- a/src/backend/libpq/be-secure-common.c +++ b/src/backend/libpq/be-secure-common.c @@ -8,7 +8,7 @@ * communications code calls, this file contains support routines that are * used by the library-specific implementations such as be-secure-openssl.c. * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION diff --git a/src/backend/libpq/be-secure-gssapi.c b/src/backend/libpq/be-secure-gssapi.c index 1747fccb143e2..316ca65db55e9 100644 --- a/src/backend/libpq/be-secure-gssapi.c +++ b/src/backend/libpq/be-secure-gssapi.c @@ -3,7 +3,7 @@ * be-secure-gssapi.c * GSSAPI encryption support * - * Portions Copyright (c) 2018-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 2018-2021, PostgreSQL Global Development Group * * IDENTIFICATION * src/backend/libpq/be-secure-gssapi.c @@ -525,8 +525,16 @@ secure_open_gssapi(Port *port) * Use the configured keytab, if there is one. Unfortunately, Heimdal * doesn't support the cred store extensions, so use the env var. */ - if (pg_krb_server_keyfile != NULL && strlen(pg_krb_server_keyfile) > 0) - setenv("KRB5_KTNAME", pg_krb_server_keyfile, 1); + if (pg_krb_server_keyfile != NULL && pg_krb_server_keyfile[0] != '\0') + { + if (setenv("KRB5_KTNAME", pg_krb_server_keyfile, 1) != 0) + { + /* The only likely failure cause is OOM, so use that errcode */ + ereport(FATAL, + (errcode(ERRCODE_OUT_OF_MEMORY), + errmsg("could not set environment: %m"))); + } + } while (true) { diff --git a/src/backend/libpq/be-secure-openssl.c b/src/backend/libpq/be-secure-openssl.c index e10260051f184..1e2ecc6e7ab74 100644 --- a/src/backend/libpq/be-secure-openssl.c +++ b/src/backend/libpq/be-secure-openssl.c @@ -4,7 +4,7 @@ * functions for OpenSSL support in the backend. * * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * @@ -381,6 +381,9 @@ be_tls_open_server(Port *port) return -1; } + /* set up debugging/info callback */ + SSL_CTX_set_info_callback(SSL_context, info_cb); + if (!(port->ssl = SSL_new(SSL_context))) { ereport(COMMERROR, @@ -562,9 +565,6 @@ be_tls_open_server(Port *port) port->peer_cert_valid = true; } - /* set up debugging/info callback */ - SSL_CTX_set_info_callback(SSL_context, info_cb); - return 0; } @@ -999,39 +999,43 @@ verify_cb(int ok, X509_STORE_CTX *ctx) static void info_cb(const SSL *ssl, int type, int args) { + const char *desc; + + desc = SSL_state_string_long(ssl); + switch (type) { case SSL_CB_HANDSHAKE_START: ereport(DEBUG4, - (errmsg_internal("SSL: handshake start"))); + (errmsg_internal("SSL: handshake start: \"%s\"", desc))); break; case SSL_CB_HANDSHAKE_DONE: ereport(DEBUG4, - (errmsg_internal("SSL: handshake done"))); + (errmsg_internal("SSL: handshake done: \"%s\"", desc))); break; case SSL_CB_ACCEPT_LOOP: ereport(DEBUG4, - (errmsg_internal("SSL: accept loop"))); + (errmsg_internal("SSL: accept loop: \"%s\"", desc))); break; case SSL_CB_ACCEPT_EXIT: ereport(DEBUG4, - (errmsg_internal("SSL: accept exit (%d)", args))); + (errmsg_internal("SSL: accept exit (%d): \"%s\"", args, desc))); break; case SSL_CB_CONNECT_LOOP: ereport(DEBUG4, - (errmsg_internal("SSL: connect loop"))); + (errmsg_internal("SSL: connect loop: \"%s\"", desc))); break; case SSL_CB_CONNECT_EXIT: ereport(DEBUG4, - (errmsg_internal("SSL: connect exit (%d)", args))); + (errmsg_internal("SSL: connect exit (%d): \"%s\"", args, desc))); break; case SSL_CB_READ_ALERT: ereport(DEBUG4, - (errmsg_internal("SSL: read alert (0x%04x)", args))); + (errmsg_internal("SSL: read alert (0x%04x): \"%s\"", args, desc))); break; case SSL_CB_WRITE_ALERT: ereport(DEBUG4, - (errmsg_internal("SSL: write alert (0x%04x)", args))); + (errmsg_internal("SSL: write alert (0x%04x): \"%s\"", args, desc))); break; } } diff --git a/src/backend/libpq/be-secure.c b/src/backend/libpq/be-secure.c index 59bc02e79c68c..4cf139a223fdc 100644 --- a/src/backend/libpq/be-secure.c +++ b/src/backend/libpq/be-secure.c @@ -6,7 +6,7 @@ * message integrity and endpoint authentication. * * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * diff --git a/src/backend/libpq/crypt.c b/src/backend/libpq/crypt.c index 17b91ac9e6052..3fcad991a7e63 100644 --- a/src/backend/libpq/crypt.c +++ b/src/backend/libpq/crypt.c @@ -4,7 +4,7 @@ * Functions for dealing with encrypted passwords stored in * pg_authid.rolpassword. * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * src/backend/libpq/crypt.c diff --git a/src/backend/libpq/hba.c b/src/backend/libpq/hba.c index 99319b273aa9a..20bf1461cef28 100644 --- a/src/backend/libpq/hba.c +++ b/src/backend/libpq/hba.c @@ -5,7 +5,7 @@ * wherein you authenticate a user by seeing what IP address the system * says he comes from and choosing authentication method based on it). * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * @@ -1041,7 +1041,7 @@ parse_hba_line(TokenizedLine *tok_line, int elevel) ereport(elevel, (errcode(ERRCODE_CONFIG_FILE_ERROR), errmsg("hostssl record cannot match because SSL is not supported by this build"), - errhint("Compile with --with-openssl to use SSL connections."), + errhint("Compile with --with-ssl=openssl to use SSL connections."), errcontext("line %d of configuration file \"%s\"", line_num, HbaFileName))); *err_msg = "hostssl record cannot match because SSL is not supported by this build"; diff --git a/src/backend/libpq/ifaddr.c b/src/backend/libpq/ifaddr.c index 82adecbf06f45..75760f3b1c179 100644 --- a/src/backend/libpq/ifaddr.c +++ b/src/backend/libpq/ifaddr.c @@ -3,7 +3,7 @@ * ifaddr.c * IP netmask calculations, and enumerating network interfaces. * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * diff --git a/src/backend/libpq/pqcomm.c b/src/backend/libpq/pqcomm.c index 3ea7c6167eb21..1e6b6db54002f 100644 --- a/src/backend/libpq/pqcomm.c +++ b/src/backend/libpq/pqcomm.c @@ -27,7 +27,7 @@ * the backend's "backend/libpq" is quite separate from "interfaces/libpq". * All that remains is similarities of names to trap the unwary... * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * src/backend/libpq/pqcomm.c diff --git a/src/backend/libpq/pqformat.c b/src/backend/libpq/pqformat.c index a6f990c2d299c..19998988190c9 100644 --- a/src/backend/libpq/pqformat.c +++ b/src/backend/libpq/pqformat.c @@ -21,7 +21,7 @@ * are different. * * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * src/backend/libpq/pqformat.c diff --git a/src/backend/libpq/pqmq.c b/src/backend/libpq/pqmq.c index f51d935daf83f..f468441b7a2c7 100644 --- a/src/backend/libpq/pqmq.c +++ b/src/backend/libpq/pqmq.c @@ -3,7 +3,7 @@ * pqmq.c * Use the frontend/backend protocol for communication over a shm_mq * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * src/backend/libpq/pqmq.c diff --git a/src/backend/libpq/pqsignal.c b/src/backend/libpq/pqsignal.c index 9289493118f9d..b43af220303e7 100644 --- a/src/backend/libpq/pqsignal.c +++ b/src/backend/libpq/pqsignal.c @@ -3,7 +3,7 @@ * pqsignal.c * Backend signal(2) support (see also src/port/pqsignal.c) * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * diff --git a/src/backend/main/main.c b/src/backend/main/main.c index b6e5128832690..e58e24a6465e1 100644 --- a/src/backend/main/main.c +++ b/src/backend/main/main.c @@ -9,7 +9,7 @@ * proper FooMain() routine for the incarnation. * * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * diff --git a/src/backend/nodes/bitmapset.c b/src/backend/nodes/bitmapset.c index 2719ea45a3eb5..649478b0d4d39 100644 --- a/src/backend/nodes/bitmapset.c +++ b/src/backend/nodes/bitmapset.c @@ -11,7 +11,7 @@ * bms_is_empty() in preference to testing for NULL.) * * - * Copyright (c) 2003-2020, PostgreSQL Global Development Group + * Copyright (c) 2003-2021, PostgreSQL Global Development Group * * IDENTIFICATION * src/backend/nodes/bitmapset.c diff --git a/src/backend/nodes/copyfuncs.c b/src/backend/nodes/copyfuncs.c index 70f8b718e0d6c..65bbc18ecbadb 100644 --- a/src/backend/nodes/copyfuncs.c +++ b/src/backend/nodes/copyfuncs.c @@ -11,7 +11,7 @@ * be handled easily in a simple depth-first traversal. * * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION @@ -2589,6 +2589,38 @@ _copyOnConflictClause(const OnConflictClause *from) return newnode; } +static CTESearchClause * +_copyCTESearchClause(const CTESearchClause *from) +{ + CTESearchClause *newnode = makeNode(CTESearchClause); + + COPY_NODE_FIELD(search_col_list); + COPY_SCALAR_FIELD(search_breadth_first); + COPY_STRING_FIELD(search_seq_column); + COPY_LOCATION_FIELD(location); + + return newnode; +} + +static CTECycleClause * +_copyCTECycleClause(const CTECycleClause *from) +{ + CTECycleClause *newnode = makeNode(CTECycleClause); + + COPY_NODE_FIELD(cycle_col_list); + COPY_STRING_FIELD(cycle_mark_column); + COPY_NODE_FIELD(cycle_mark_value); + COPY_NODE_FIELD(cycle_mark_default); + COPY_STRING_FIELD(cycle_path_column); + COPY_LOCATION_FIELD(location); + COPY_SCALAR_FIELD(cycle_mark_type); + COPY_SCALAR_FIELD(cycle_mark_typmod); + COPY_SCALAR_FIELD(cycle_mark_collation); + COPY_SCALAR_FIELD(cycle_mark_neop); + + return newnode; +} + static CommonTableExpr * _copyCommonTableExpr(const CommonTableExpr *from) { @@ -2598,6 +2630,8 @@ _copyCommonTableExpr(const CommonTableExpr *from) COPY_NODE_FIELD(aliascolnames); COPY_SCALAR_FIELD(ctematerialized); COPY_NODE_FIELD(ctequery); + COPY_NODE_FIELD(search_clause); + COPY_NODE_FIELD(cycle_clause); COPY_LOCATION_FIELD(location); COPY_SCALAR_FIELD(cterecursive); COPY_SCALAR_FIELD(cterefcount); @@ -3199,6 +3233,20 @@ _copySetOperationStmt(const SetOperationStmt *from) return newnode; } +static PLAssignStmt * +_copyPLAssignStmt(const PLAssignStmt *from) +{ + PLAssignStmt *newnode = makeNode(PLAssignStmt); + + COPY_STRING_FIELD(name); + COPY_NODE_FIELD(indirection); + COPY_SCALAR_FIELD(nnames); + COPY_NODE_FIELD(val); + COPY_LOCATION_FIELD(location); + + return newnode; +} + static AlterTableStmt * _copyAlterTableStmt(const AlterTableStmt *from) { @@ -3256,6 +3304,7 @@ _copyGrantStmt(const GrantStmt *from) COPY_NODE_FIELD(privileges); COPY_NODE_FIELD(grantees); COPY_SCALAR_FIELD(grant_option); + COPY_NODE_FIELD(grantor); COPY_SCALAR_FIELD(behavior); return newnode; @@ -5220,6 +5269,9 @@ copyObjectImpl(const void *from) case T_SetOperationStmt: retval = _copySetOperationStmt(from); break; + case T_PLAssignStmt: + retval = _copyPLAssignStmt(from); + break; case T_AlterTableStmt: retval = _copyAlterTableStmt(from); break; @@ -5664,6 +5716,12 @@ copyObjectImpl(const void *from) case T_OnConflictClause: retval = _copyOnConflictClause(from); break; + case T_CTESearchClause: + retval = _copyCTESearchClause(from); + break; + case T_CTECycleClause: + retval = _copyCTECycleClause(from); + break; case T_CommonTableExpr: retval = _copyCommonTableExpr(from); break; diff --git a/src/backend/nodes/equalfuncs.c b/src/backend/nodes/equalfuncs.c index 541e0e6b48522..c2d73626fcc22 100644 --- a/src/backend/nodes/equalfuncs.c +++ b/src/backend/nodes/equalfuncs.c @@ -18,7 +18,7 @@ * "x" to be considered equal() to another reference to "x" in the query. * * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION @@ -1085,6 +1085,18 @@ _equalSetOperationStmt(const SetOperationStmt *a, const SetOperationStmt *b) return true; } +static bool +_equalPLAssignStmt(const PLAssignStmt *a, const PLAssignStmt *b) +{ + COMPARE_STRING_FIELD(name); + COMPARE_NODE_FIELD(indirection); + COMPARE_SCALAR_FIELD(nnames); + COMPARE_NODE_FIELD(val); + COMPARE_LOCATION_FIELD(location); + + return true; +} + static bool _equalAlterTableStmt(const AlterTableStmt *a, const AlterTableStmt *b) { @@ -1133,6 +1145,7 @@ _equalGrantStmt(const GrantStmt *a, const GrantStmt *b) COMPARE_NODE_FIELD(privileges); COMPARE_NODE_FIELD(grantees); COMPARE_SCALAR_FIELD(grant_option); + COMPARE_NODE_FIELD(grantor); COMPARE_SCALAR_FIELD(behavior); return true; @@ -2828,6 +2841,34 @@ _equalOnConflictClause(const OnConflictClause *a, const OnConflictClause *b) return true; } +static bool +_equalCTESearchClause(const CTESearchClause *a, const CTESearchClause *b) +{ + COMPARE_NODE_FIELD(search_col_list); + COMPARE_SCALAR_FIELD(search_breadth_first); + COMPARE_STRING_FIELD(search_seq_column); + COMPARE_LOCATION_FIELD(location); + + return true; +} + +static bool +_equalCTECycleClause(const CTECycleClause *a, const CTECycleClause *b) +{ + COMPARE_NODE_FIELD(cycle_col_list); + COMPARE_STRING_FIELD(cycle_mark_column); + COMPARE_NODE_FIELD(cycle_mark_value); + COMPARE_NODE_FIELD(cycle_mark_default); + COMPARE_STRING_FIELD(cycle_path_column); + COMPARE_LOCATION_FIELD(location); + COMPARE_SCALAR_FIELD(cycle_mark_type); + COMPARE_SCALAR_FIELD(cycle_mark_typmod); + COMPARE_SCALAR_FIELD(cycle_mark_collation); + COMPARE_SCALAR_FIELD(cycle_mark_neop); + + return true; +} + static bool _equalCommonTableExpr(const CommonTableExpr *a, const CommonTableExpr *b) { @@ -2835,6 +2876,8 @@ _equalCommonTableExpr(const CommonTableExpr *a, const CommonTableExpr *b) COMPARE_NODE_FIELD(aliascolnames); COMPARE_SCALAR_FIELD(ctematerialized); COMPARE_NODE_FIELD(ctequery); + COMPARE_NODE_FIELD(search_clause); + COMPARE_NODE_FIELD(cycle_clause); COMPARE_LOCATION_FIELD(location); COMPARE_SCALAR_FIELD(cterecursive); COMPARE_SCALAR_FIELD(cterefcount); @@ -3275,6 +3318,9 @@ equal(const void *a, const void *b) case T_SetOperationStmt: retval = _equalSetOperationStmt(a, b); break; + case T_PLAssignStmt: + retval = _equalPLAssignStmt(a, b); + break; case T_AlterTableStmt: retval = _equalAlterTableStmt(a, b); break; @@ -3719,6 +3765,12 @@ equal(const void *a, const void *b) case T_OnConflictClause: retval = _equalOnConflictClause(a, b); break; + case T_CTESearchClause: + retval = _equalCTESearchClause(a, b); + break; + case T_CTECycleClause: + retval = _equalCTECycleClause(a, b); + break; case T_CommonTableExpr: retval = _equalCommonTableExpr(a, b); break; diff --git a/src/backend/nodes/extensible.c b/src/backend/nodes/extensible.c index 3a6cfc44d3d72..1489df0729a6f 100644 --- a/src/backend/nodes/extensible.c +++ b/src/backend/nodes/extensible.c @@ -10,7 +10,7 @@ * and GetExtensibleNodeMethods to get information about a previously * registered type of extensible node. * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION diff --git a/src/backend/nodes/list.c b/src/backend/nodes/list.c index efa44342c4b8a..dbf6b30233aa9 100644 --- a/src/backend/nodes/list.c +++ b/src/backend/nodes/list.c @@ -6,7 +6,7 @@ * See comments in pg_list.h. * * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * @@ -277,6 +277,21 @@ list_make4_impl(NodeTag t, ListCell datum1, ListCell datum2, return list; } +List * +list_make5_impl(NodeTag t, ListCell datum1, ListCell datum2, + ListCell datum3, ListCell datum4, ListCell datum5) +{ + List *list = new_list(t, 5); + + list->elements[0] = datum1; + list->elements[1] = datum2; + list->elements[2] = datum3; + list->elements[3] = datum4; + list->elements[4] = datum5; + check_list_invariants(list); + return list; +} + /* * Make room for a new head cell in the given (non-NIL) list. * diff --git a/src/backend/nodes/makefuncs.c b/src/backend/nodes/makefuncs.c index ee033ae779446..01c110cd2fc17 100644 --- a/src/backend/nodes/makefuncs.c +++ b/src/backend/nodes/makefuncs.c @@ -4,7 +4,7 @@ * creator functions for various nodes. The functions here are for the * most frequently created nodes. * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * diff --git a/src/backend/nodes/nodeFuncs.c b/src/backend/nodes/nodeFuncs.c index 963f71e99d4fd..49357ac5c2da5 100644 --- a/src/backend/nodes/nodeFuncs.c +++ b/src/backend/nodes/nodeFuncs.c @@ -3,7 +3,7 @@ * nodeFuncs.c * Various general-purpose manipulations of Node trees * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * @@ -1566,6 +1566,12 @@ exprLocation(const Node *expr) case T_OnConflictClause: loc = ((const OnConflictClause *) expr)->location; break; + case T_CTESearchClause: + loc = ((const CTESearchClause *) expr)->location; + break; + case T_CTECycleClause: + loc = ((const CTECycleClause *) expr)->location; + break; case T_CommonTableExpr: loc = ((const CommonTableExpr *) expr)->location; break; @@ -1909,6 +1915,7 @@ expression_tree_walker(Node *node, case T_NextValueExpr: case T_RangeTblRef: case T_SortGroupClause: + case T_CTESearchClause: /* primitive node types with no expression subnodes */ break; case T_WithCheckOption: @@ -2148,6 +2155,16 @@ expression_tree_walker(Node *node, return true; } break; + case T_CTECycleClause: + { + CTECycleClause *cc = (CTECycleClause *) node; + + if (walker(cc->cycle_mark_value, context)) + return true; + if (walker(cc->cycle_mark_default, context)) + return true; + } + break; case T_CommonTableExpr: { CommonTableExpr *cte = (CommonTableExpr *) node; @@ -2156,7 +2173,13 @@ expression_tree_walker(Node *node, * Invoke the walker on the CTE's Query node, so it can * recurse into the sub-query if it wants to. */ - return walker(cte->ctequery, context); + if (walker(cte->ctequery, context)) + return true; + + if (walker(cte->search_clause, context)) + return true; + if (walker(cte->cycle_clause, context)) + return true; } break; case T_List: @@ -2615,6 +2638,7 @@ expression_tree_mutator(Node *node, case T_NextValueExpr: case T_RangeTblRef: case T_SortGroupClause: + case T_CTESearchClause: return (Node *) copyObject(node); case T_WithCheckOption: { @@ -3019,6 +3043,17 @@ expression_tree_mutator(Node *node, return (Node *) newnode; } break; + case T_CTECycleClause: + { + CTECycleClause *cc = (CTECycleClause *) node; + CTECycleClause *newnode; + + FLATCOPY(newnode, cc, CTECycleClause); + MUTATE(newnode->cycle_mark_value, cc->cycle_mark_value, Node *); + MUTATE(newnode->cycle_mark_default, cc->cycle_mark_default, Node *); + return (Node *) newnode; + } + break; case T_CommonTableExpr: { CommonTableExpr *cte = (CommonTableExpr *) node; @@ -3031,6 +3066,10 @@ expression_tree_mutator(Node *node, * recurse into the sub-query if it wants to. */ MUTATE(newnode->ctequery, cte->ctequery, Node *); + + MUTATE(newnode->search_clause, cte->search_clause, CTESearchClause *); + MUTATE(newnode->cycle_clause, cte->cycle_clause, CTECycleClause *); + return (Node *) newnode; } break; @@ -3669,6 +3708,16 @@ raw_expression_tree_walker(Node *node, return true; } break; + case T_PLAssignStmt: + { + PLAssignStmt *stmt = (PLAssignStmt *) node; + + if (walker(stmt->indirection, context)) + return true; + if (walker(stmt->val, context)) + return true; + } + break; case T_A_Expr: { A_Expr *expr = (A_Expr *) node; @@ -3903,6 +3952,7 @@ raw_expression_tree_walker(Node *node, } break; case T_CommonTableExpr: + /* search_clause and cycle_clause are not interesting here */ return walker(((CommonTableExpr *) node)->ctequery, context); default: elog(ERROR, "unrecognized node type: %d", diff --git a/src/backend/nodes/nodes.c b/src/backend/nodes/nodes.c index e5dcda3f58ca7..a292b412f282e 100644 --- a/src/backend/nodes/nodes.c +++ b/src/backend/nodes/nodes.c @@ -4,7 +4,7 @@ * support code for nodes (now that we have removed the home-brew * inheritance system, our support code for nodes is much simpler) * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * diff --git a/src/backend/nodes/outfuncs.c b/src/backend/nodes/outfuncs.c index d78b16ed1d95d..f5dcedf6e89ec 100644 --- a/src/backend/nodes/outfuncs.c +++ b/src/backend/nodes/outfuncs.c @@ -3,7 +3,7 @@ * outfuncs.c * Output functions for Postgres tree nodes. * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * @@ -1871,7 +1871,6 @@ _outAppendPath(StringInfo str, const AppendPath *node) _outPathInfo(str, (const Path *) node); - WRITE_NODE_FIELD(partitioned_rels); WRITE_NODE_FIELD(subpaths); WRITE_INT_FIELD(first_partial_path); WRITE_FLOAT_FIELD(limit_tuples, "%.0f"); @@ -1884,7 +1883,6 @@ _outMergeAppendPath(StringInfo str, const MergeAppendPath *node) _outPathInfo(str, (const Path *) node); - WRITE_NODE_FIELD(partitioned_rels); WRITE_NODE_FIELD(subpaths); WRITE_FLOAT_FIELD(limit_tuples, "%.0f"); } @@ -2775,6 +2773,18 @@ _outSelectStmt(StringInfo str, const SelectStmt *node) WRITE_NODE_FIELD(rarg); } +static void +_outPLAssignStmt(StringInfo str, const PLAssignStmt *node) +{ + WRITE_NODE_TYPE("PLASSIGN"); + + WRITE_STRING_FIELD(name); + WRITE_NODE_FIELD(indirection); + WRITE_INT_FIELD(nnames); + WRITE_NODE_FIELD(val); + WRITE_LOCATION_FIELD(location); +} + static void _outFuncCall(StringInfo str, const FuncCall *node) { @@ -3065,6 +3075,34 @@ _outWithClause(StringInfo str, const WithClause *node) WRITE_LOCATION_FIELD(location); } +static void +_outCTESearchClause(StringInfo str, const CTESearchClause *node) +{ + WRITE_NODE_TYPE("CTESEARCHCLAUSE"); + + WRITE_NODE_FIELD(search_col_list); + WRITE_BOOL_FIELD(search_breadth_first); + WRITE_STRING_FIELD(search_seq_column); + WRITE_LOCATION_FIELD(location); +} + +static void +_outCTECycleClause(StringInfo str, const CTECycleClause *node) +{ + WRITE_NODE_TYPE("CTECYCLECLAUSE"); + + WRITE_NODE_FIELD(cycle_col_list); + WRITE_STRING_FIELD(cycle_mark_column); + WRITE_NODE_FIELD(cycle_mark_value); + WRITE_NODE_FIELD(cycle_mark_default); + WRITE_STRING_FIELD(cycle_path_column); + WRITE_LOCATION_FIELD(location); + WRITE_OID_FIELD(cycle_mark_type); + WRITE_INT_FIELD(cycle_mark_typmod); + WRITE_OID_FIELD(cycle_mark_collation); + WRITE_OID_FIELD(cycle_mark_neop); +} + static void _outCommonTableExpr(StringInfo str, const CommonTableExpr *node) { @@ -3074,6 +3112,8 @@ _outCommonTableExpr(StringInfo str, const CommonTableExpr *node) WRITE_NODE_FIELD(aliascolnames); WRITE_ENUM_FIELD(ctematerialized, CTEMaterialize); WRITE_NODE_FIELD(ctequery); + WRITE_NODE_FIELD(search_clause); + WRITE_NODE_FIELD(cycle_clause); WRITE_LOCATION_FIELD(location); WRITE_BOOL_FIELD(cterecursive); WRITE_INT_FIELD(cterefcount); @@ -4211,6 +4251,9 @@ outNode(StringInfo str, const void *obj) case T_SelectStmt: _outSelectStmt(str, obj); break; + case T_PLAssignStmt: + _outPLAssignStmt(str, obj); + break; case T_ColumnDef: _outColumnDef(str, obj); break; @@ -4247,6 +4290,12 @@ outNode(StringInfo str, const void *obj) case T_WithClause: _outWithClause(str, obj); break; + case T_CTESearchClause: + _outCTESearchClause(str, obj); + break; + case T_CTECycleClause: + _outCTECycleClause(str, obj); + break; case T_CommonTableExpr: _outCommonTableExpr(str, obj); break; diff --git a/src/backend/nodes/params.c b/src/backend/nodes/params.c index c05f04a259c1b..45ebff5103e0c 100644 --- a/src/backend/nodes/params.c +++ b/src/backend/nodes/params.c @@ -4,7 +4,7 @@ * Support for finding the values associated with Param nodes. * * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION diff --git a/src/backend/nodes/print.c b/src/backend/nodes/print.c index 970a2d438402a..25ebc76fc5ef5 100644 --- a/src/backend/nodes/print.c +++ b/src/backend/nodes/print.c @@ -3,7 +3,7 @@ * print.c * various print routines (used mostly for debugging) * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * diff --git a/src/backend/nodes/read.c b/src/backend/nodes/read.c index 8c1e39044c81a..d281f7db6c3af 100644 --- a/src/backend/nodes/read.c +++ b/src/backend/nodes/read.c @@ -4,7 +4,7 @@ * routines to convert a string (legal ascii representation of node) back * to nodes * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * diff --git a/src/backend/nodes/readfuncs.c b/src/backend/nodes/readfuncs.c index 0f6a77afc4395..4388aae71d258 100644 --- a/src/backend/nodes/readfuncs.c +++ b/src/backend/nodes/readfuncs.c @@ -3,7 +3,7 @@ * readfuncs.c * Reader functions for Postgres tree nodes. * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * @@ -409,6 +409,44 @@ _readRowMarkClause(void) READ_DONE(); } +/* + * _readCTESearchClause + */ +static CTESearchClause * +_readCTESearchClause(void) +{ + READ_LOCALS(CTESearchClause); + + READ_NODE_FIELD(search_col_list); + READ_BOOL_FIELD(search_breadth_first); + READ_STRING_FIELD(search_seq_column); + READ_LOCATION_FIELD(location); + + READ_DONE(); +} + +/* + * _readCTECycleClause + */ +static CTECycleClause * +_readCTECycleClause(void) +{ + READ_LOCALS(CTECycleClause); + + READ_NODE_FIELD(cycle_col_list); + READ_STRING_FIELD(cycle_mark_column); + READ_NODE_FIELD(cycle_mark_value); + READ_NODE_FIELD(cycle_mark_default); + READ_STRING_FIELD(cycle_path_column); + READ_LOCATION_FIELD(location); + READ_OID_FIELD(cycle_mark_type); + READ_INT_FIELD(cycle_mark_typmod); + READ_OID_FIELD(cycle_mark_collation); + READ_OID_FIELD(cycle_mark_neop); + + READ_DONE(); +} + /* * _readCommonTableExpr */ @@ -421,6 +459,8 @@ _readCommonTableExpr(void) READ_NODE_FIELD(aliascolnames); READ_ENUM_FIELD(ctematerialized, CTEMaterialize); READ_NODE_FIELD(ctequery); + READ_NODE_FIELD(search_clause); + READ_NODE_FIELD(cycle_clause); READ_LOCATION_FIELD(location); READ_BOOL_FIELD(cterecursive); READ_INT_FIELD(cterefcount); @@ -2653,6 +2693,10 @@ parseNodeString(void) return_value = _readWindowClause(); else if (MATCH("ROWMARKCLAUSE", 13)) return_value = _readRowMarkClause(); + else if (MATCH("CTESEARCHCLAUSE", 15)) + return_value = _readCTESearchClause(); + else if (MATCH("CTECYCLECLAUSE", 14)) + return_value = _readCTECycleClause(); else if (MATCH("COMMONTABLEEXPR", 15)) return_value = _readCommonTableExpr(); else if (MATCH("SETOPERATIONSTMT", 16)) diff --git a/src/backend/nodes/tidbitmap.c b/src/backend/nodes/tidbitmap.c index 0d5056c3e3dde..c5feacbff48fc 100644 --- a/src/backend/nodes/tidbitmap.c +++ b/src/backend/nodes/tidbitmap.c @@ -29,7 +29,7 @@ * and a non-lossy page. * * - * Copyright (c) 2003-2020, PostgreSQL Global Development Group + * Copyright (c) 2003-2021, PostgreSQL Global Development Group * * IDENTIFICATION * src/backend/nodes/tidbitmap.c diff --git a/src/backend/nodes/value.c b/src/backend/nodes/value.c index 45b9b8473e01a..15e6d26752186 100644 --- a/src/backend/nodes/value.c +++ b/src/backend/nodes/value.c @@ -4,7 +4,7 @@ * implementation of Value nodes * * - * Copyright (c) 2003-2020, PostgreSQL Global Development Group + * Copyright (c) 2003-2021, PostgreSQL Global Development Group * * * IDENTIFICATION diff --git a/src/backend/optimizer/geqo/geqo_copy.c b/src/backend/optimizer/geqo/geqo_copy.c index a4bfb1ef2acc7..4f6226b0287ab 100644 --- a/src/backend/optimizer/geqo/geqo_copy.c +++ b/src/backend/optimizer/geqo/geqo_copy.c @@ -2,7 +2,7 @@ * * geqo_copy.c * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * src/backend/optimizer/geqo/geqo_copy.c diff --git a/src/backend/optimizer/geqo/geqo_eval.c b/src/backend/optimizer/geqo/geqo_eval.c index ff33acc7b66f1..2ecba83490f82 100644 --- a/src/backend/optimizer/geqo/geqo_eval.c +++ b/src/backend/optimizer/geqo/geqo_eval.c @@ -3,7 +3,7 @@ * geqo_eval.c * Routines to evaluate query trees * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * src/backend/optimizer/geqo/geqo_eval.c diff --git a/src/backend/optimizer/geqo/geqo_main.c b/src/backend/optimizer/geqo/geqo_main.c index 2db490de598cb..09d9e7d4dd61d 100644 --- a/src/backend/optimizer/geqo/geqo_main.c +++ b/src/backend/optimizer/geqo/geqo_main.c @@ -4,7 +4,7 @@ * solution to the query optimization problem * by means of a Genetic Algorithm (GA) * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * src/backend/optimizer/geqo/geqo_main.c diff --git a/src/backend/optimizer/geqo/geqo_misc.c b/src/backend/optimizer/geqo/geqo_misc.c index 06755c0f63a8d..02b5a7015b7fd 100644 --- a/src/backend/optimizer/geqo/geqo_misc.c +++ b/src/backend/optimizer/geqo/geqo_misc.c @@ -3,7 +3,7 @@ * geqo_misc.c * misc. printout and debug stuff * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * src/backend/optimizer/geqo/geqo_misc.c diff --git a/src/backend/optimizer/geqo/geqo_pool.c b/src/backend/optimizer/geqo/geqo_pool.c index 74dc51091d64c..1fc103ba1132f 100644 --- a/src/backend/optimizer/geqo/geqo_pool.c +++ b/src/backend/optimizer/geqo/geqo_pool.c @@ -3,7 +3,7 @@ * geqo_pool.c * Genetic Algorithm (GA) pool stuff * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * src/backend/optimizer/geqo/geqo_pool.c diff --git a/src/backend/optimizer/geqo/geqo_random.c b/src/backend/optimizer/geqo/geqo_random.c index d8e6895a6efe7..f21bc047e68b5 100644 --- a/src/backend/optimizer/geqo/geqo_random.c +++ b/src/backend/optimizer/geqo/geqo_random.c @@ -3,7 +3,7 @@ * geqo_random.c * random number generator * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * src/backend/optimizer/geqo/geqo_random.c diff --git a/src/backend/optimizer/geqo/geqo_selection.c b/src/backend/optimizer/geqo/geqo_selection.c index 5eecefc6a9768..66b6c8ae38e45 100644 --- a/src/backend/optimizer/geqo/geqo_selection.c +++ b/src/backend/optimizer/geqo/geqo_selection.c @@ -3,7 +3,7 @@ * geqo_selection.c * linear selection scheme for the genetic query optimizer * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * src/backend/optimizer/geqo/geqo_selection.c diff --git a/src/backend/optimizer/path/allpaths.c b/src/backend/optimizer/path/allpaths.c index 627d08b78a48f..cd3fdd259cdcc 100644 --- a/src/backend/optimizer/path/allpaths.c +++ b/src/backend/optimizer/path/allpaths.c @@ -3,7 +3,7 @@ * allpaths.c * Routines to find possible search paths for processing a query * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * @@ -99,18 +99,13 @@ static void set_append_rel_pathlist(PlannerInfo *root, RelOptInfo *rel, Index rti, RangeTblEntry *rte); static void generate_orderedappend_paths(PlannerInfo *root, RelOptInfo *rel, List *live_childrels, - List *all_child_pathkeys, - List *partitioned_rels); + List *all_child_pathkeys); static Path *get_cheapest_parameterized_child_path(PlannerInfo *root, RelOptInfo *rel, Relids required_outer); -static List *accumulate_partitioned_rels(List *partitioned_rels, - List *sub_partitioned_rels, - bool flatten_partitioned_rels); static void accumulate_append_subpath(Path *path, - List **subpaths, List **special_subpaths, - List **partitioned_rels, - bool flatten_partitioned_rels); + List **subpaths, + List **special_subpaths); static Path *get_singleton_append_subpath(Path *path); static void set_dummy_rel_pathlist(RelOptInfo *rel); static void set_subquery_pathlist(PlannerInfo *root, RelOptInfo *rel, @@ -1299,38 +1294,11 @@ add_paths_to_append_rel(PlannerInfo *root, RelOptInfo *rel, List *all_child_pathkeys = NIL; List *all_child_outers = NIL; ListCell *l; - List *partitioned_rels = NIL; - List *partial_partitioned_rels = NIL; - List *pa_partitioned_rels = NIL; double partial_rows = -1; - bool flatten_partitioned_rels; /* If appropriate, consider parallel append */ pa_subpaths_valid = enable_parallel_append && rel->consider_parallel; - /* What we do with the partitioned_rels list is different for UNION ALL */ - flatten_partitioned_rels = (rel->rtekind != RTE_SUBQUERY); - - /* - * For partitioned tables, we accumulate a list of Relids of each - * partitioned table which has at least one of its subpartitions directly - * present as a subpath in this Append. This is used later for run-time - * partition pruning. We must maintain separate lists for each Append - * Path that we create as some paths that we create here can't flatten - * sub-Appends and sub-MergeAppends into the top-level Append. We needn't - * bother doing this for join rels as no run-time pruning is done on - * those. - */ - if (rel->reloptkind != RELOPT_JOINREL && rel->part_scheme != NULL) - { - partitioned_rels = list_make1(bms_make_singleton(rel->relid)); - partial_partitioned_rels = list_make1(bms_make_singleton(rel->relid)); - - /* skip this one if we're not going to make a Parallel Append path */ - if (pa_subpaths_valid) - pa_partitioned_rels = list_make1(bms_make_singleton(rel->relid)); - } - /* * For every non-dummy child, remember the cheapest path. Also, identify * all pathkeys (orderings) and parameterizations (required_outer sets) @@ -1353,8 +1321,7 @@ add_paths_to_append_rel(PlannerInfo *root, RelOptInfo *rel, if (childrel->pathlist != NIL && childrel->cheapest_total_path->param_info == NULL) accumulate_append_subpath(childrel->cheapest_total_path, - &subpaths, NULL, &partitioned_rels, - flatten_partitioned_rels); + &subpaths, NULL); else subpaths_valid = false; @@ -1363,9 +1330,7 @@ add_paths_to_append_rel(PlannerInfo *root, RelOptInfo *rel, { cheapest_partial_path = linitial(childrel->partial_pathlist); accumulate_append_subpath(cheapest_partial_path, - &partial_subpaths, NULL, - &partial_partitioned_rels, - flatten_partitioned_rels); + &partial_subpaths, NULL); } else partial_subpaths_valid = false; @@ -1394,10 +1359,7 @@ add_paths_to_append_rel(PlannerInfo *root, RelOptInfo *rel, Assert(cheapest_partial_path != NULL); accumulate_append_subpath(cheapest_partial_path, &pa_partial_subpaths, - &pa_nonpartial_subpaths, - &pa_partitioned_rels, - flatten_partitioned_rels); - + &pa_nonpartial_subpaths); } else { @@ -1416,9 +1378,7 @@ add_paths_to_append_rel(PlannerInfo *root, RelOptInfo *rel, */ accumulate_append_subpath(nppath, &pa_nonpartial_subpaths, - NULL, - &pa_partitioned_rels, - flatten_partitioned_rels); + NULL); } } @@ -1495,7 +1455,7 @@ add_paths_to_append_rel(PlannerInfo *root, RelOptInfo *rel, if (subpaths_valid) add_path(rel, (Path *) create_append_path(root, rel, subpaths, NIL, NIL, NULL, 0, false, - partitioned_rels, -1)); + -1)); /* * Consider an append of unordered, unparameterized partial paths. Make @@ -1538,7 +1498,7 @@ add_paths_to_append_rel(PlannerInfo *root, RelOptInfo *rel, appendpath = create_append_path(root, rel, NIL, partial_subpaths, NIL, NULL, parallel_workers, enable_parallel_append, - partial_partitioned_rels, -1); + -1); /* * Make sure any subsequent partial paths use the same row count @@ -1587,7 +1547,7 @@ add_paths_to_append_rel(PlannerInfo *root, RelOptInfo *rel, appendpath = create_append_path(root, rel, pa_nonpartial_subpaths, pa_partial_subpaths, NIL, NULL, parallel_workers, true, - pa_partitioned_rels, partial_rows); + partial_rows); add_partial_path(rel, (Path *) appendpath); } @@ -1597,8 +1557,7 @@ add_paths_to_append_rel(PlannerInfo *root, RelOptInfo *rel, */ if (subpaths_valid) generate_orderedappend_paths(root, rel, live_childrels, - all_child_pathkeys, - partitioned_rels); + all_child_pathkeys); /* * Build Append paths for each parameterization seen among the child rels. @@ -1617,10 +1576,6 @@ add_paths_to_append_rel(PlannerInfo *root, RelOptInfo *rel, { Relids required_outer = (Relids) lfirst(l); ListCell *lcr; - List *part_rels = NIL; - - if (rel->reloptkind != RELOPT_JOINREL && rel->part_scheme != NULL) - part_rels = list_make1(bms_make_singleton(rel->relid)); /* Select the child paths for an Append with this parameterization */ subpaths = NIL; @@ -1646,15 +1601,14 @@ add_paths_to_append_rel(PlannerInfo *root, RelOptInfo *rel, subpaths_valid = false; break; } - accumulate_append_subpath(subpath, &subpaths, NULL, &part_rels, - flatten_partitioned_rels); + accumulate_append_subpath(subpath, &subpaths, NULL); } if (subpaths_valid) add_path(rel, (Path *) create_append_path(root, rel, subpaths, NIL, NIL, required_outer, 0, false, - part_rels, -1)); + -1)); } /* @@ -1681,7 +1635,7 @@ add_paths_to_append_rel(PlannerInfo *root, RelOptInfo *rel, appendpath = create_append_path(root, rel, NIL, list_make1(path), NIL, NULL, path->parallel_workers, true, - partitioned_rels, partial_rows); + partial_rows); add_partial_path(rel, (Path *) appendpath); } } @@ -1717,26 +1671,13 @@ add_paths_to_append_rel(PlannerInfo *root, RelOptInfo *rel, static void generate_orderedappend_paths(PlannerInfo *root, RelOptInfo *rel, List *live_childrels, - List *all_child_pathkeys, - List *partitioned_rels) + List *all_child_pathkeys) { ListCell *lcp; List *partition_pathkeys = NIL; List *partition_pathkeys_desc = NIL; bool partition_pathkeys_partial = true; bool partition_pathkeys_desc_partial = true; - List *startup_partitioned_rels = NIL; - List *total_partitioned_rels = NIL; - bool flatten_partitioned_rels; - - /* Set up the method for building the partitioned rels lists */ - flatten_partitioned_rels = (rel->rtekind != RTE_SUBQUERY); - - if (rel->reloptkind != RELOPT_JOINREL && rel->part_scheme != NULL) - { - startup_partitioned_rels = list_make1(bms_make_singleton(rel->relid)); - total_partitioned_rels = list_make1(bms_make_singleton(rel->relid)); - } /* * Some partitioned table setups may allow us to use an Append node @@ -1878,13 +1819,9 @@ generate_orderedappend_paths(PlannerInfo *root, RelOptInfo *rel, * child paths for the MergeAppend. */ accumulate_append_subpath(cheapest_startup, - &startup_subpaths, NULL, - &startup_partitioned_rels, - flatten_partitioned_rels); + &startup_subpaths, NULL); accumulate_append_subpath(cheapest_total, - &total_subpaths, NULL, - &total_partitioned_rels, - flatten_partitioned_rels); + &total_subpaths, NULL); } } @@ -1900,7 +1837,6 @@ generate_orderedappend_paths(PlannerInfo *root, RelOptInfo *rel, NULL, 0, false, - startup_partitioned_rels, -1)); if (startup_neq_total) add_path(rel, (Path *) create_append_path(root, @@ -1911,7 +1847,6 @@ generate_orderedappend_paths(PlannerInfo *root, RelOptInfo *rel, NULL, 0, false, - total_partitioned_rels, -1)); } else @@ -1921,15 +1856,13 @@ generate_orderedappend_paths(PlannerInfo *root, RelOptInfo *rel, rel, startup_subpaths, pathkeys, - NULL, - startup_partitioned_rels)); + NULL)); if (startup_neq_total) add_path(rel, (Path *) create_merge_append_path(root, rel, total_subpaths, pathkeys, - NULL, - total_partitioned_rels)); + NULL)); } } } @@ -2008,54 +1941,6 @@ get_cheapest_parameterized_child_path(PlannerInfo *root, RelOptInfo *rel, return cheapest; } -/* - * accumulate_partitioned_rels - * Record 'sub_partitioned_rels' in the 'partitioned_rels' list, - * flattening as appropriate. - */ -static List * -accumulate_partitioned_rels(List *partitioned_rels, - List *sub_partitioned_rels, - bool flatten) -{ - if (flatten) - { - /* - * We're only called with flatten == true when the partitioned_rels - * list has at most 1 element. So we can just add the members from - * sub list's first element onto the first element of - * partitioned_rels. Only later in planning when doing UNION ALL - * Append processing will we see flatten == false. partitioned_rels - * may end up with more than 1 element then, but we never expect to be - * called with flatten == true again after that, so we needn't bother - * doing anything here for anything but the initial element. - */ - if (partitioned_rels != NIL && sub_partitioned_rels != NIL) - { - Relids partrels = (Relids) linitial(partitioned_rels); - Relids subpartrels = (Relids) linitial(sub_partitioned_rels); - - /* Ensure the above comment holds true */ - Assert(list_length(partitioned_rels) == 1); - Assert(list_length(sub_partitioned_rels) == 1); - - linitial(partitioned_rels) = bms_add_members(partrels, subpartrels); - } - } - else - { - /* - * Handle UNION ALL to partitioned tables. This always occurs after - * we've done the accumulation for sub-partitioned tables, so there's - * no need to consider how adding multiple elements to the top level - * list affects the flatten == true case above. - */ - partitioned_rels = list_concat(partitioned_rels, sub_partitioned_rels); - } - - return partitioned_rels; -} - /* * accumulate_append_subpath * Add a subpath to the list being built for an Append or MergeAppend. @@ -2076,24 +1961,9 @@ accumulate_partitioned_rels(List *partitioned_rels, * children to subpaths and the rest to special_subpaths. If the latter is * NULL, we don't flatten the path at all (unless it contains only partial * paths). - * - * When pulling up sub-Appends and sub-Merge Appends, we also gather the - * path's list of partitioned tables and store in 'partitioned_rels'. The - * exact behavior here depends on the value of 'flatten_partitioned_rels'. - * - * When 'flatten_partitioned_rels' is true, 'partitioned_rels' will contain at - * most one element which is a Relids of the partitioned relations which there - * are subpaths for. In this case, we just add the RT indexes for the - * partitioned tables for the subpath we're pulling up to the single entry in - * 'partitioned_rels'. When 'flatten_partitioned_rels' is false we - * concatenate the path's partitioned rel list onto the top-level list. This - * done for UNION ALLs which could have a partitioned table in each union - * branch. */ static void -accumulate_append_subpath(Path *path, List **subpaths, List **special_subpaths, - List **partitioned_rels, - bool flatten_partitioned_rels) +accumulate_append_subpath(Path *path, List **subpaths, List **special_subpaths) { if (IsA(path, AppendPath)) { @@ -2102,9 +1972,6 @@ accumulate_append_subpath(Path *path, List **subpaths, List **special_subpaths, if (!apath->path.parallel_aware || apath->first_partial_path == 0) { *subpaths = list_concat(*subpaths, apath->subpaths); - *partitioned_rels = accumulate_partitioned_rels(*partitioned_rels, - apath->partitioned_rels, - flatten_partitioned_rels); return; } else if (special_subpaths != NULL) @@ -2120,9 +1987,6 @@ accumulate_append_subpath(Path *path, List **subpaths, List **special_subpaths, apath->first_partial_path); *special_subpaths = list_concat(*special_subpaths, new_special_subpaths); - *partitioned_rels = accumulate_partitioned_rels(*partitioned_rels, - apath->partitioned_rels, - flatten_partitioned_rels); return; } } @@ -2131,9 +1995,6 @@ accumulate_append_subpath(Path *path, List **subpaths, List **special_subpaths, MergeAppendPath *mpath = (MergeAppendPath *) path; *subpaths = list_concat(*subpaths, mpath->subpaths); - *partitioned_rels = accumulate_partitioned_rels(*partitioned_rels, - mpath->partitioned_rels, - flatten_partitioned_rels); return; } @@ -2195,7 +2056,7 @@ set_dummy_rel_pathlist(RelOptInfo *rel) /* Set up the dummy path */ add_path(rel, (Path *) create_append_path(NULL, rel, NIL, NIL, NIL, rel->lateral_relids, - 0, false, NIL, -1)); + 0, false, -1)); /* * We set the cheapest-path fields immediately, just in case they were diff --git a/src/backend/optimizer/path/clausesel.c b/src/backend/optimizer/path/clausesel.c index a7e535c27f5a1..d263ecf08272b 100644 --- a/src/backend/optimizer/path/clausesel.c +++ b/src/backend/optimizer/path/clausesel.c @@ -3,7 +3,7 @@ * clausesel.c * Routines to compute clause selectivities * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * @@ -227,7 +227,7 @@ clauselist_selectivity_ext(PlannerInfo *root, } else { - ok = (NumRelids(clause) == 1) && + ok = (NumRelids(root, clause) == 1) && (is_pseudo_constant_clause(lsecond(expr->args)) || (varonleft = false, is_pseudo_constant_clause(linitial(expr->args)))); @@ -609,7 +609,7 @@ bms_is_subset_singleton(const Bitmapset *s, int x) * restriction or join estimator. Subroutine for clause_selectivity(). */ static inline bool -treat_as_join_clause(Node *clause, RestrictInfo *rinfo, +treat_as_join_clause(PlannerInfo *root, Node *clause, RestrictInfo *rinfo, int varRelid, SpecialJoinInfo *sjinfo) { if (varRelid != 0) @@ -643,7 +643,7 @@ treat_as_join_clause(Node *clause, RestrictInfo *rinfo, if (rinfo) return (bms_membership(rinfo->clause_relids) == BMS_MULTIPLE); else - return (NumRelids(clause) > 1); + return (NumRelids(root, clause) > 1); } } @@ -860,7 +860,7 @@ clause_selectivity_ext(PlannerInfo *root, OpExpr *opclause = (OpExpr *) clause; Oid opno = opclause->opno; - if (treat_as_join_clause(clause, rinfo, varRelid, sjinfo)) + if (treat_as_join_clause(root, clause, rinfo, varRelid, sjinfo)) { /* Estimate selectivity for a join clause. */ s1 = join_selectivity(root, opno, @@ -896,7 +896,7 @@ clause_selectivity_ext(PlannerInfo *root, funcclause->funcid, funcclause->args, funcclause->inputcollid, - treat_as_join_clause(clause, rinfo, + treat_as_join_clause(root, clause, rinfo, varRelid, sjinfo), varRelid, jointype, @@ -907,7 +907,7 @@ clause_selectivity_ext(PlannerInfo *root, /* Use node specific selectivity calculation function */ s1 = scalararraysel(root, (ScalarArrayOpExpr *) clause, - treat_as_join_clause(clause, rinfo, + treat_as_join_clause(root, clause, rinfo, varRelid, sjinfo), varRelid, jointype, diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c index 22d6935824a48..aab06c7d213ea 100644 --- a/src/backend/optimizer/path/costsize.c +++ b/src/backend/optimizer/path/costsize.c @@ -60,7 +60,7 @@ * values. * * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION @@ -1858,7 +1858,7 @@ cost_incremental_sort(Path *path, * Check if the expression contains Var with "varno 0" so that we * don't call estimate_num_groups in that case. */ - if (bms_is_member(0, pull_varnos((Node *) member->em_expr))) + if (bms_is_member(0, pull_varnos(root, (Node *) member->em_expr))) { unknown_varno = true; break; diff --git a/src/backend/optimizer/path/equivclass.c b/src/backend/optimizer/path/equivclass.c index c33af06ec0ed6..0188c1e9a1894 100644 --- a/src/backend/optimizer/path/equivclass.c +++ b/src/backend/optimizer/path/equivclass.c @@ -6,7 +6,7 @@ * See src/backend/optimizer/README for discussion of EquivalenceClasses. * * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION @@ -196,7 +196,8 @@ process_equivalence(PlannerInfo *root, ntest->location = -1; *p_restrictinfo = - make_restrictinfo((Expr *) ntest, + make_restrictinfo(root, + (Expr *) ntest, restrictinfo->is_pushed_down, restrictinfo->outerjoin_delayed, restrictinfo->pseudoconstant, @@ -716,7 +717,7 @@ get_eclass_for_sort_expr(PlannerInfo *root, /* * Get the precise set of nullable relids appearing in the expression. */ - expr_relids = pull_varnos((Node *) expr); + expr_relids = pull_varnos(root, (Node *) expr); nullable_relids = bms_intersect(nullable_relids, expr_relids); newem = add_eq_member(newec, copyObject(expr), expr_relids, @@ -1696,7 +1697,8 @@ create_join_clause(PlannerInfo *root, */ oldcontext = MemoryContextSwitchTo(root->planner_cxt); - rinfo = build_implied_join_equality(opno, + rinfo = build_implied_join_equality(root, + opno, ec->ec_collation, leftem->em_expr, rightem->em_expr, @@ -1996,7 +1998,8 @@ reconsider_outer_join_clause(PlannerInfo *root, RestrictInfo *rinfo, cur_em->em_datatype); if (!OidIsValid(eq_op)) continue; /* can't generate equality */ - newrinfo = build_implied_join_equality(eq_op, + newrinfo = build_implied_join_equality(root, + eq_op, cur_ec->ec_collation, innervar, cur_em->em_expr, @@ -2141,7 +2144,8 @@ reconsider_full_join_clause(PlannerInfo *root, RestrictInfo *rinfo) cur_em->em_datatype); if (OidIsValid(eq_op)) { - newrinfo = build_implied_join_equality(eq_op, + newrinfo = build_implied_join_equality(root, + eq_op, cur_ec->ec_collation, leftvar, cur_em->em_expr, @@ -2156,7 +2160,8 @@ reconsider_full_join_clause(PlannerInfo *root, RestrictInfo *rinfo) cur_em->em_datatype); if (OidIsValid(eq_op)) { - newrinfo = build_implied_join_equality(eq_op, + newrinfo = build_implied_join_equality(root, + eq_op, cur_ec->ec_collation, rightvar, cur_em->em_expr, diff --git a/src/backend/optimizer/path/indxpath.c b/src/backend/optimizer/path/indxpath.c index bcb1bc6097d01..ff536e6b24ba4 100644 --- a/src/backend/optimizer/path/indxpath.c +++ b/src/backend/optimizer/path/indxpath.c @@ -4,7 +4,7 @@ * Routines to determine which indexes are usable for scanning a * given relation, and create Paths accordingly. * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * @@ -153,7 +153,8 @@ static IndexClause *match_clause_to_indexcol(PlannerInfo *root, RestrictInfo *rinfo, int indexcol, IndexOptInfo *index); -static IndexClause *match_boolean_index_clause(RestrictInfo *rinfo, +static IndexClause *match_boolean_index_clause(PlannerInfo *root, + RestrictInfo *rinfo, int indexcol, IndexOptInfo *index); static IndexClause *match_opclause_to_indexcol(PlannerInfo *root, RestrictInfo *rinfo, @@ -169,13 +170,16 @@ static IndexClause *get_index_clause_from_support(PlannerInfo *root, int indexarg, int indexcol, IndexOptInfo *index); -static IndexClause *match_saopclause_to_indexcol(RestrictInfo *rinfo, +static IndexClause *match_saopclause_to_indexcol(PlannerInfo *root, + RestrictInfo *rinfo, int indexcol, IndexOptInfo *index); -static IndexClause *match_rowcompare_to_indexcol(RestrictInfo *rinfo, +static IndexClause *match_rowcompare_to_indexcol(PlannerInfo *root, + RestrictInfo *rinfo, int indexcol, IndexOptInfo *index); -static IndexClause *expand_indexqual_rowcompare(RestrictInfo *rinfo, +static IndexClause *expand_indexqual_rowcompare(PlannerInfo *root, + RestrictInfo *rinfo, int indexcol, IndexOptInfo *index, Oid expr_op, @@ -2305,7 +2309,7 @@ match_clause_to_indexcol(PlannerInfo *root, opfamily = index->opfamily[indexcol]; if (IsBooleanOpfamily(opfamily)) { - iclause = match_boolean_index_clause(rinfo, indexcol, index); + iclause = match_boolean_index_clause(root, rinfo, indexcol, index); if (iclause) return iclause; } @@ -2325,11 +2329,11 @@ match_clause_to_indexcol(PlannerInfo *root, } else if (IsA(clause, ScalarArrayOpExpr)) { - return match_saopclause_to_indexcol(rinfo, indexcol, index); + return match_saopclause_to_indexcol(root, rinfo, indexcol, index); } else if (IsA(clause, RowCompareExpr)) { - return match_rowcompare_to_indexcol(rinfo, indexcol, index); + return match_rowcompare_to_indexcol(root, rinfo, indexcol, index); } else if (index->amsearchnulls && IsA(clause, NullTest)) { @@ -2368,7 +2372,8 @@ match_clause_to_indexcol(PlannerInfo *root, * index's key, and if so, build a suitable IndexClause. */ static IndexClause * -match_boolean_index_clause(RestrictInfo *rinfo, +match_boolean_index_clause(PlannerInfo *root, + RestrictInfo *rinfo, int indexcol, IndexOptInfo *index) { @@ -2438,7 +2443,7 @@ match_boolean_index_clause(RestrictInfo *rinfo, IndexClause *iclause = makeNode(IndexClause); iclause->rinfo = rinfo; - iclause->indexquals = list_make1(make_simple_restrictinfo(op)); + iclause->indexquals = list_make1(make_simple_restrictinfo(root, op)); iclause->lossy = false; iclause->indexcol = indexcol; iclause->indexcols = NIL; @@ -2663,7 +2668,8 @@ get_index_clause_from_support(PlannerInfo *root, { Expr *clause = (Expr *) lfirst(lc); - indexquals = lappend(indexquals, make_simple_restrictinfo(clause)); + indexquals = lappend(indexquals, + make_simple_restrictinfo(root, clause)); } iclause->rinfo = rinfo; @@ -2684,7 +2690,8 @@ get_index_clause_from_support(PlannerInfo *root, * which see for comments. */ static IndexClause * -match_saopclause_to_indexcol(RestrictInfo *rinfo, +match_saopclause_to_indexcol(PlannerInfo *root, + RestrictInfo *rinfo, int indexcol, IndexOptInfo *index) { @@ -2703,7 +2710,7 @@ match_saopclause_to_indexcol(RestrictInfo *rinfo, return NULL; leftop = (Node *) linitial(saop->args); rightop = (Node *) lsecond(saop->args); - right_relids = pull_varnos(rightop); + right_relids = pull_varnos(root, rightop); expr_op = saop->opno; expr_coll = saop->inputcollid; @@ -2751,7 +2758,8 @@ match_saopclause_to_indexcol(RestrictInfo *rinfo, * is handled by expand_indexqual_rowcompare(). */ static IndexClause * -match_rowcompare_to_indexcol(RestrictInfo *rinfo, +match_rowcompare_to_indexcol(PlannerInfo *root, + RestrictInfo *rinfo, int indexcol, IndexOptInfo *index) { @@ -2796,14 +2804,14 @@ match_rowcompare_to_indexcol(RestrictInfo *rinfo, * These syntactic tests are the same as in match_opclause_to_indexcol() */ if (match_index_to_operand(leftop, indexcol, index) && - !bms_is_member(index_relid, pull_varnos(rightop)) && + !bms_is_member(index_relid, pull_varnos(root, rightop)) && !contain_volatile_functions(rightop)) { /* OK, indexkey is on left */ var_on_left = true; } else if (match_index_to_operand(rightop, indexcol, index) && - !bms_is_member(index_relid, pull_varnos(leftop)) && + !bms_is_member(index_relid, pull_varnos(root, leftop)) && !contain_volatile_functions(leftop)) { /* indexkey is on right, so commute the operator */ @@ -2822,7 +2830,8 @@ match_rowcompare_to_indexcol(RestrictInfo *rinfo, case BTLessEqualStrategyNumber: case BTGreaterEqualStrategyNumber: case BTGreaterStrategyNumber: - return expand_indexqual_rowcompare(rinfo, + return expand_indexqual_rowcompare(root, + rinfo, indexcol, index, expr_op, @@ -2856,7 +2865,8 @@ match_rowcompare_to_indexcol(RestrictInfo *rinfo, * but we split it out for comprehensibility. */ static IndexClause * -expand_indexqual_rowcompare(RestrictInfo *rinfo, +expand_indexqual_rowcompare(PlannerInfo *root, + RestrictInfo *rinfo, int indexcol, IndexOptInfo *index, Oid expr_op, @@ -2926,7 +2936,7 @@ expand_indexqual_rowcompare(RestrictInfo *rinfo, if (expr_op == InvalidOid) break; /* operator is not usable */ } - if (bms_is_member(index->rel->relid, pull_varnos(constop))) + if (bms_is_member(index->rel->relid, pull_varnos(root, constop))) break; /* no good, Var on wrong side */ if (contain_volatile_functions(constop)) break; /* no good, volatile comparison value */ @@ -3036,7 +3046,8 @@ expand_indexqual_rowcompare(RestrictInfo *rinfo, matching_cols); rc->rargs = list_truncate(copyObject(non_var_args), matching_cols); - iclause->indexquals = list_make1(make_simple_restrictinfo((Expr *) rc)); + iclause->indexquals = list_make1(make_simple_restrictinfo(root, + (Expr *) rc)); } else { @@ -3050,7 +3061,7 @@ expand_indexqual_rowcompare(RestrictInfo *rinfo, copyObject(linitial(non_var_args)), InvalidOid, linitial_oid(clause->inputcollids)); - iclause->indexquals = list_make1(make_simple_restrictinfo(op)); + iclause->indexquals = list_make1(make_simple_restrictinfo(root, op)); } } @@ -3667,7 +3678,9 @@ relation_has_unique_index_for(PlannerInfo *root, RelOptInfo *rel, * specified index column matches a boolean restriction clause. */ bool -indexcol_is_bool_constant_for_query(IndexOptInfo *index, int indexcol) +indexcol_is_bool_constant_for_query(PlannerInfo *root, + IndexOptInfo *index, + int indexcol) { ListCell *lc; @@ -3689,7 +3702,7 @@ indexcol_is_bool_constant_for_query(IndexOptInfo *index, int indexcol) continue; /* See if we can match the clause's expression to the index column */ - if (match_boolean_index_clause(rinfo, indexcol, index)) + if (match_boolean_index_clause(root, rinfo, indexcol, index)) return true; } @@ -3801,10 +3814,10 @@ match_index_to_operand(Node *operand, * index: the index of interest */ bool -is_pseudo_constant_for_index(Node *expr, IndexOptInfo *index) +is_pseudo_constant_for_index(PlannerInfo *root, Node *expr, IndexOptInfo *index) { /* pull_varnos is cheaper than volatility check, so do that first */ - if (bms_is_member(index->rel->relid, pull_varnos(expr))) + if (bms_is_member(index->rel->relid, pull_varnos(root, expr))) return false; /* no good, contains Var of table */ if (contain_volatile_functions(expr)) return false; /* no good, volatile comparison value */ diff --git a/src/backend/optimizer/path/joinpath.c b/src/backend/optimizer/path/joinpath.c index 4a35903b29f74..57ce97fd53bd1 100644 --- a/src/backend/optimizer/path/joinpath.c +++ b/src/backend/optimizer/path/joinpath.c @@ -3,7 +3,7 @@ * joinpath.c * Routines to find all possible paths for processing a set of joins * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * diff --git a/src/backend/optimizer/path/joinrels.c b/src/backend/optimizer/path/joinrels.c index 2d343cd29346d..0dbe2ac7265ff 100644 --- a/src/backend/optimizer/path/joinrels.c +++ b/src/backend/optimizer/path/joinrels.c @@ -3,7 +3,7 @@ * joinrels.c * Routines to determine which relations should be joined * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * @@ -1279,7 +1279,7 @@ mark_dummy_rel(RelOptInfo *rel) /* Set up the dummy path */ add_path(rel, (Path *) create_append_path(NULL, rel, NIL, NIL, NIL, rel->lateral_relids, - 0, false, NIL, -1)); + 0, false, -1)); /* Set or update cheapest_total_path and related fields */ set_cheapest(rel); diff --git a/src/backend/optimizer/path/pathkeys.c b/src/backend/optimizer/path/pathkeys.c index ce9bf87e9b65a..bd9a176d7d37c 100644 --- a/src/backend/optimizer/path/pathkeys.c +++ b/src/backend/optimizer/path/pathkeys.c @@ -7,7 +7,7 @@ * the nature and use of path keys. * * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION @@ -598,7 +598,7 @@ build_index_pathkeys(PlannerInfo *root, * should stop considering index columns; any lower-order sort * keys won't be useful either. */ - if (!indexcol_is_bool_constant_for_query(index, i)) + if (!indexcol_is_bool_constant_for_query(root, index, i)) break; } diff --git a/src/backend/optimizer/path/tidpath.c b/src/backend/optimizer/path/tidpath.c index 1463a82be87a8..0845b460e2c9a 100644 --- a/src/backend/optimizer/path/tidpath.c +++ b/src/backend/optimizer/path/tidpath.c @@ -24,7 +24,7 @@ * representation all the way through to execution. * * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * @@ -123,7 +123,7 @@ IsTidEqualClause(RestrictInfo *rinfo, RelOptInfo *rel) * other side of the clause does. */ static bool -IsTidEqualAnyClause(RestrictInfo *rinfo, RelOptInfo *rel) +IsTidEqualAnyClause(PlannerInfo *root, RestrictInfo *rinfo, RelOptInfo *rel) { ScalarArrayOpExpr *node; Node *arg1, @@ -148,7 +148,7 @@ IsTidEqualAnyClause(RestrictInfo *rinfo, RelOptInfo *rel) IsCTIDVar((Var *) arg1, rel)) { /* The other argument must be a pseudoconstant */ - if (bms_is_member(rel->relid, pull_varnos(arg2)) || + if (bms_is_member(rel->relid, pull_varnos(root, arg2)) || contain_volatile_functions(arg2)) return false; @@ -190,7 +190,7 @@ IsCurrentOfClause(RestrictInfo *rinfo, RelOptInfo *rel) * (Using a List may seem a bit weird, but it simplifies the caller.) */ static List * -TidQualFromRestrictInfo(RestrictInfo *rinfo, RelOptInfo *rel) +TidQualFromRestrictInfo(PlannerInfo *root, RestrictInfo *rinfo, RelOptInfo *rel) { /* * We may ignore pseudoconstant clauses (they can't contain Vars, so could @@ -210,7 +210,7 @@ TidQualFromRestrictInfo(RestrictInfo *rinfo, RelOptInfo *rel) * Check all base cases. If we get a match, return the clause. */ if (IsTidEqualClause(rinfo, rel) || - IsTidEqualAnyClause(rinfo, rel) || + IsTidEqualAnyClause(root, rinfo, rel) || IsCurrentOfClause(rinfo, rel)) return list_make1(rinfo); @@ -227,7 +227,7 @@ TidQualFromRestrictInfo(RestrictInfo *rinfo, RelOptInfo *rel) * This function is just concerned with handling AND/OR recursion. */ static List * -TidQualFromRestrictInfoList(List *rlist, RelOptInfo *rel) +TidQualFromRestrictInfoList(PlannerInfo *root, List *rlist, RelOptInfo *rel) { List *rlst = NIL; ListCell *l; @@ -255,14 +255,14 @@ TidQualFromRestrictInfoList(List *rlist, RelOptInfo *rel) List *andargs = ((BoolExpr *) orarg)->args; /* Recurse in case there are sub-ORs */ - sublist = TidQualFromRestrictInfoList(andargs, rel); + sublist = TidQualFromRestrictInfoList(root, andargs, rel); } else { RestrictInfo *rinfo = castNode(RestrictInfo, orarg); Assert(!restriction_is_or_clause(rinfo)); - sublist = TidQualFromRestrictInfo(rinfo, rel); + sublist = TidQualFromRestrictInfo(root, rinfo, rel); } /* @@ -284,7 +284,7 @@ TidQualFromRestrictInfoList(List *rlist, RelOptInfo *rel) else { /* Not an OR clause, so handle base cases */ - rlst = TidQualFromRestrictInfo(rinfo, rel); + rlst = TidQualFromRestrictInfo(root, rinfo, rel); } /* @@ -390,7 +390,7 @@ create_tidscan_paths(PlannerInfo *root, RelOptInfo *rel) * If any suitable quals exist in the rel's baserestrict list, generate a * plain (unparameterized) TidPath with them. */ - tidquals = TidQualFromRestrictInfoList(rel->baserestrictinfo, rel); + tidquals = TidQualFromRestrictInfoList(root, rel->baserestrictinfo, rel); if (tidquals) { diff --git a/src/backend/optimizer/plan/analyzejoins.c b/src/backend/optimizer/plan/analyzejoins.c index 806629fff210b..37eb64bcef372 100644 --- a/src/backend/optimizer/plan/analyzejoins.c +++ b/src/backend/optimizer/plan/analyzejoins.c @@ -11,7 +11,7 @@ * is that we have to work harder to clean up after ourselves when we modify * the query, since the derived data structures have to be updated too. * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * @@ -231,7 +231,7 @@ join_is_removable(PlannerInfo *root, SpecialJoinInfo *sjinfo) continue; /* it definitely doesn't reference innerrel */ if (bms_is_subset(phinfo->ph_eval_at, innerrel->relids)) return false; /* there isn't any other place to eval PHV */ - if (bms_overlap(pull_varnos((Node *) phinfo->ph_var->phexpr), + if (bms_overlap(pull_varnos(root, (Node *) phinfo->ph_var->phexpr), innerrel->relids)) return false; /* it does reference innerrel */ } diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c index f7a8dae3c6483..6c8305c977e64 100644 --- a/src/backend/optimizer/plan/createplan.c +++ b/src/backend/optimizer/plan/createplan.c @@ -5,7 +5,7 @@ * Planning is complete, we just need to convert the selected * Path into a Plan. * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * @@ -1227,8 +1227,7 @@ create_append_plan(PlannerInfo *root, AppendPath *best_path, int flags) * pruning during execution. Gather information needed by the executor to * do partition pruning. */ - if (enable_partition_pruning && - best_path->partitioned_rels != NIL) + if (enable_partition_pruning) { List *prunequal; @@ -1249,7 +1248,6 @@ create_append_plan(PlannerInfo *root, AppendPath *best_path, int flags) partpruneinfo = make_partition_pruneinfo(root, rel, best_path->subpaths, - best_path->partitioned_rels, prunequal); } @@ -1393,8 +1391,7 @@ create_merge_append_plan(PlannerInfo *root, MergeAppendPath *best_path, * pruning during execution. Gather information needed by the executor to * do partition pruning. */ - if (enable_partition_pruning && - best_path->partitioned_rels != NIL) + if (enable_partition_pruning) { List *prunequal; @@ -1414,7 +1411,6 @@ create_merge_append_plan(PlannerInfo *root, MergeAppendPath *best_path, if (prunequal != NIL) partpruneinfo = make_partition_pruneinfo(root, rel, best_path->subpaths, - best_path->partitioned_rels, prunequal); } diff --git a/src/backend/optimizer/plan/initsplan.c b/src/backend/optimizer/plan/initsplan.c index aae5df09f9b7f..02f813cebdcb4 100644 --- a/src/backend/optimizer/plan/initsplan.c +++ b/src/backend/optimizer/plan/initsplan.c @@ -3,7 +3,7 @@ * initsplan.c * Target list, qualification, joininfo initialization routines * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * @@ -60,7 +60,8 @@ static SpecialJoinInfo *make_outerjoininfo(PlannerInfo *root, Relids left_rels, Relids right_rels, Relids inner_join_rels, JoinType jointype, List *clause); -static void compute_semijoin_info(SpecialJoinInfo *sjinfo, List *clause); +static void compute_semijoin_info(PlannerInfo *root, SpecialJoinInfo *sjinfo, + List *clause); static void distribute_qual_to_rels(PlannerInfo *root, Node *clause, bool below_outer_join, JoinType jointype, @@ -1196,7 +1197,7 @@ make_outerjoininfo(PlannerInfo *root, /* this always starts out false */ sjinfo->delay_upper_joins = false; - compute_semijoin_info(sjinfo, clause); + compute_semijoin_info(root, sjinfo, clause); /* If it's a full join, no need to be very smart */ if (jointype == JOIN_FULL) @@ -1210,7 +1211,7 @@ make_outerjoininfo(PlannerInfo *root, /* * Retrieve all relids mentioned within the join clause. */ - clause_relids = pull_varnos((Node *) clause); + clause_relids = pull_varnos(root, (Node *) clause); /* * For which relids is the clause strict, ie, it cannot succeed if the @@ -1390,7 +1391,7 @@ make_outerjoininfo(PlannerInfo *root, * SpecialJoinInfo; the rest may not be set yet. */ static void -compute_semijoin_info(SpecialJoinInfo *sjinfo, List *clause) +compute_semijoin_info(PlannerInfo *root, SpecialJoinInfo *sjinfo, List *clause) { List *semi_operators; List *semi_rhs_exprs; @@ -1454,7 +1455,7 @@ compute_semijoin_info(SpecialJoinInfo *sjinfo, List *clause) list_length(op->args) != 2) { /* No, but does it reference both sides? */ - all_varnos = pull_varnos((Node *) op); + all_varnos = pull_varnos(root, (Node *) op); if (!bms_overlap(all_varnos, sjinfo->syn_righthand) || bms_is_subset(all_varnos, sjinfo->syn_righthand)) { @@ -1475,8 +1476,8 @@ compute_semijoin_info(SpecialJoinInfo *sjinfo, List *clause) opno = op->opno; left_expr = linitial(op->args); right_expr = lsecond(op->args); - left_varnos = pull_varnos(left_expr); - right_varnos = pull_varnos(right_expr); + left_varnos = pull_varnos(root, left_expr); + right_varnos = pull_varnos(root, right_expr); all_varnos = bms_union(left_varnos, right_varnos); opinputtype = exprType(left_expr); @@ -1621,7 +1622,7 @@ distribute_qual_to_rels(PlannerInfo *root, Node *clause, /* * Retrieve all relids mentioned within the clause. */ - relids = pull_varnos(clause); + relids = pull_varnos(root, clause); /* * In ordinary SQL, a WHERE or JOIN/ON clause can't reference any rels @@ -1835,7 +1836,8 @@ distribute_qual_to_rels(PlannerInfo *root, Node *clause, /* * Build the RestrictInfo node itself. */ - restrictinfo = make_restrictinfo((Expr *) clause, + restrictinfo = make_restrictinfo(root, + (Expr *) clause, is_pushed_down, outerjoin_delayed, pseudoconstant, @@ -2309,7 +2311,7 @@ process_implied_equality(PlannerInfo *root, * * Retrieve all relids mentioned within the possibly-simplified clause. */ - relids = pull_varnos(clause); + relids = pull_varnos(root, clause); Assert(bms_is_subset(relids, qualscope)); /* @@ -2341,7 +2343,8 @@ process_implied_equality(PlannerInfo *root, /* * Build the RestrictInfo node itself. */ - restrictinfo = make_restrictinfo((Expr *) clause, + restrictinfo = make_restrictinfo(root, + (Expr *) clause, true, /* is_pushed_down */ false, /* outerjoin_delayed */ pseudoconstant, @@ -2407,7 +2410,8 @@ process_implied_equality(PlannerInfo *root, * caller's responsibility that left_ec/right_ec be set as necessary. */ RestrictInfo * -build_implied_join_equality(Oid opno, +build_implied_join_equality(PlannerInfo *root, + Oid opno, Oid collation, Expr *item1, Expr *item2, @@ -2433,7 +2437,8 @@ build_implied_join_equality(Oid opno, /* * Build the RestrictInfo node itself. */ - restrictinfo = make_restrictinfo(clause, + restrictinfo = make_restrictinfo(root, + clause, true, /* is_pushed_down */ false, /* outerjoin_delayed */ false, /* pseudoconstant */ diff --git a/src/backend/optimizer/plan/planagg.c b/src/backend/optimizer/plan/planagg.c index 48c4fee8923b8..c1634d1666942 100644 --- a/src/backend/optimizer/plan/planagg.c +++ b/src/backend/optimizer/plan/planagg.c @@ -17,7 +17,7 @@ * scan all the rows anyway. * * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * diff --git a/src/backend/optimizer/plan/planmain.c b/src/backend/optimizer/plan/planmain.c index 62dfc6d44a82e..e1a13e20c5a84 100644 --- a/src/backend/optimizer/plan/planmain.c +++ b/src/backend/optimizer/plan/planmain.c @@ -9,7 +9,7 @@ * shorn of features like subselects, inheritance, aggregates, grouping, * and so on. (Those are the things planner.c deals with.) * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c index 1a94b58f8beee..adf68d8790689 100644 --- a/src/backend/optimizer/plan/planner.c +++ b/src/backend/optimizer/plan/planner.c @@ -3,7 +3,7 @@ * planner.c * The query optimizer external interface. * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * @@ -1726,7 +1726,7 @@ inheritance_planner(PlannerInfo *root) /* Make a dummy path, cf set_dummy_rel_pathlist() */ dummy_path = (Path *) create_append_path(NULL, final_rel, NIL, NIL, NIL, NULL, 0, false, - NIL, -1); + -1); /* These lists must be nonempty to make a valid ModifyTable node */ subpaths = list_make1(dummy_path); @@ -4006,7 +4006,6 @@ create_degenerate_grouping_paths(PlannerInfo *root, RelOptInfo *input_rel, NULL, 0, false, - NIL, -1); } else diff --git a/src/backend/optimizer/plan/setrefs.c b/src/backend/optimizer/plan/setrefs.c index 127ea3d856dcb..c3c36be13e10e 100644 --- a/src/backend/optimizer/plan/setrefs.c +++ b/src/backend/optimizer/plan/setrefs.c @@ -4,7 +4,7 @@ * Post-processing of a completed plan tree: fix references to subplan * vars, compute regproc values for operators, etc * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * diff --git a/src/backend/optimizer/plan/subselect.c b/src/backend/optimizer/plan/subselect.c index fcce81926b7d6..54ef61bfb350d 100644 --- a/src/backend/optimizer/plan/subselect.c +++ b/src/backend/optimizer/plan/subselect.c @@ -6,7 +6,7 @@ * This module deals with SubLinks and CTEs, but not subquery RTEs (i.e., * not sub-SELECT-in-FROM cases). * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION @@ -1302,7 +1302,7 @@ convert_ANY_sublink_to_join(PlannerInfo *root, SubLink *sublink, * it's not gonna be a join. (Note that it won't have Vars referring to * the subquery, rather Params.) */ - upper_varnos = pull_varnos(sublink->testexpr); + upper_varnos = pull_varnos(root, sublink->testexpr); if (bms_is_empty(upper_varnos)) return NULL; @@ -1486,7 +1486,7 @@ convert_EXISTS_sublink_to_join(PlannerInfo *root, SubLink *sublink, * The ones <= rtoffset belong to the upper query; the ones > rtoffset do * not. */ - clause_varnos = pull_varnos(whereClause); + clause_varnos = pull_varnos(root, whereClause); upper_varnos = NULL; while ((varno = bms_first_member(clause_varnos)) >= 0) { diff --git a/src/backend/optimizer/prep/prepagg.c b/src/backend/optimizer/prep/prepagg.c index 34ac985a6649d..929a8ea13bc04 100644 --- a/src/backend/optimizer/prep/prepagg.c +++ b/src/backend/optimizer/prep/prepagg.c @@ -22,7 +22,7 @@ * at executor startup. The Agg nodes are constructed much later in the * planning, however, so it's not trivial. * - * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * diff --git a/src/backend/optimizer/prep/prepjointree.c b/src/backend/optimizer/prep/prepjointree.c index ce57dfa7cde72..d961592e015e8 100644 --- a/src/backend/optimizer/prep/prepjointree.c +++ b/src/backend/optimizer/prep/prepjointree.c @@ -14,7 +14,7 @@ * remove_useless_result_rtes * * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * @@ -82,7 +82,8 @@ static void pull_up_union_leaf_queries(Node *setOp, PlannerInfo *root, int childRToffset); static void make_setop_translation_list(Query *query, Index newvarno, AppendRelInfo *appinfo); -static bool is_simple_subquery(Query *subquery, RangeTblEntry *rte, +static bool is_simple_subquery(PlannerInfo *root, Query *subquery, + RangeTblEntry *rte, JoinExpr *lowest_outer_join); static Node *pull_up_simple_values(PlannerInfo *root, Node *jtnode, RangeTblEntry *rte); @@ -95,7 +96,8 @@ static bool is_simple_union_all(Query *subquery); static bool is_simple_union_all_recurse(Node *setOp, Query *setOpQuery, List *colTypes); static bool is_safe_append_member(Query *subquery); -static bool jointree_contains_lateral_outer_refs(Node *jtnode, bool restricted, +static bool jointree_contains_lateral_outer_refs(PlannerInfo *root, + Node *jtnode, bool restricted, Relids safe_upper_varnos); static void perform_pullup_replace_vars(PlannerInfo *root, pullup_replace_vars_context *rvcontext, @@ -744,7 +746,7 @@ pull_up_subqueries_recurse(PlannerInfo *root, Node *jtnode, * unless is_safe_append_member says so. */ if (rte->rtekind == RTE_SUBQUERY && - is_simple_subquery(rte->subquery, rte, lowest_outer_join) && + is_simple_subquery(root, rte->subquery, rte, lowest_outer_join) && (containing_appendrel == NULL || is_safe_append_member(rte->subquery))) return pull_up_simple_subquery(root, jtnode, rte, @@ -973,7 +975,7 @@ pull_up_simple_subquery(PlannerInfo *root, Node *jtnode, RangeTblEntry *rte, * easier just to keep this "if" looking the same as the one in * pull_up_subqueries_recurse. */ - if (is_simple_subquery(subquery, rte, lowest_outer_join) && + if (is_simple_subquery(root, subquery, rte, lowest_outer_join) && (containing_appendrel == NULL || is_safe_append_member(subquery))) { /* good to go */ @@ -1398,7 +1400,7 @@ make_setop_translation_list(Query *query, Index newvarno, * lowest_outer_join is the lowest outer join above the subquery, or NULL. */ static bool -is_simple_subquery(Query *subquery, RangeTblEntry *rte, +is_simple_subquery(PlannerInfo *root, Query *subquery, RangeTblEntry *rte, JoinExpr *lowest_outer_join) { /* @@ -1477,7 +1479,8 @@ is_simple_subquery(Query *subquery, RangeTblEntry *rte, safe_upper_varnos = NULL; /* doesn't matter */ } - if (jointree_contains_lateral_outer_refs((Node *) subquery->jointree, + if (jointree_contains_lateral_outer_refs(root, + (Node *) subquery->jointree, restricted, safe_upper_varnos)) return false; @@ -1496,7 +1499,9 @@ is_simple_subquery(Query *subquery, RangeTblEntry *rte, */ if (lowest_outer_join != NULL) { - Relids lvarnos = pull_varnos_of_level((Node *) subquery->targetList, 1); + Relids lvarnos = pull_varnos_of_level(root, + (Node *) subquery->targetList, + 1); if (!bms_is_subset(lvarnos, safe_upper_varnos)) return false; @@ -1929,7 +1934,8 @@ is_safe_append_member(Query *subquery) * in safe_upper_varnos. */ static bool -jointree_contains_lateral_outer_refs(Node *jtnode, bool restricted, +jointree_contains_lateral_outer_refs(PlannerInfo *root, Node *jtnode, + bool restricted, Relids safe_upper_varnos) { if (jtnode == NULL) @@ -1944,7 +1950,8 @@ jointree_contains_lateral_outer_refs(Node *jtnode, bool restricted, /* First, recurse to check child joins */ foreach(l, f->fromlist) { - if (jointree_contains_lateral_outer_refs(lfirst(l), + if (jointree_contains_lateral_outer_refs(root, + lfirst(l), restricted, safe_upper_varnos)) return true; @@ -1952,7 +1959,7 @@ jointree_contains_lateral_outer_refs(Node *jtnode, bool restricted, /* Then check the top-level quals */ if (restricted && - !bms_is_subset(pull_varnos_of_level(f->quals, 1), + !bms_is_subset(pull_varnos_of_level(root, f->quals, 1), safe_upper_varnos)) return true; } @@ -1971,18 +1978,20 @@ jointree_contains_lateral_outer_refs(Node *jtnode, bool restricted, } /* Check the child joins */ - if (jointree_contains_lateral_outer_refs(j->larg, + if (jointree_contains_lateral_outer_refs(root, + j->larg, restricted, safe_upper_varnos)) return true; - if (jointree_contains_lateral_outer_refs(j->rarg, + if (jointree_contains_lateral_outer_refs(root, + j->rarg, restricted, safe_upper_varnos)) return true; /* Check the JOIN's qual clauses */ if (restricted && - !bms_is_subset(pull_varnos_of_level(j->quals, 1), + !bms_is_subset(pull_varnos_of_level(root, j->quals, 1), safe_upper_varnos)) return true; } @@ -2366,7 +2375,8 @@ pullup_replace_vars_callback(Var *var, * level-zero var must belong to the subquery. */ if ((rcon->target_rte->lateral ? - bms_overlap(pull_varnos((Node *) newnode), rcon->relids) : + bms_overlap(pull_varnos(rcon->root, (Node *) newnode), + rcon->relids) : contain_vars_of_level((Node *) newnode, 0)) && !contain_nonstrict_functions((Node *) newnode)) { @@ -2804,7 +2814,7 @@ reduce_outer_joins_pass2(Node *jtnode, overlap = list_intersection(local_nonnullable_vars, forced_null_vars); if (overlap != NIL && - bms_overlap(pull_varnos((Node *) overlap), + bms_overlap(pull_varnos(root, (Node *) overlap), right_state->relids)) jointype = JOIN_ANTI; } diff --git a/src/backend/optimizer/prep/prepqual.c b/src/backend/optimizer/prep/prepqual.c index 391bdd659d257..8d4dc9cd10532 100644 --- a/src/backend/optimizer/prep/prepqual.c +++ b/src/backend/optimizer/prep/prepqual.c @@ -19,7 +19,7 @@ * tree after local transformations that might introduce nested AND/ORs. * * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * diff --git a/src/backend/optimizer/prep/preptlist.c b/src/backend/optimizer/prep/preptlist.c index d56d8c6509cde..23f9f861f4428 100644 --- a/src/backend/optimizer/prep/preptlist.c +++ b/src/backend/optimizer/prep/preptlist.c @@ -29,7 +29,7 @@ * that because it's faster in typical non-inherited cases. * * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION diff --git a/src/backend/optimizer/prep/prepunion.c b/src/backend/optimizer/prep/prepunion.c index 745f443e5c2df..becdcbb872533 100644 --- a/src/backend/optimizer/prep/prepunion.c +++ b/src/backend/optimizer/prep/prepunion.c @@ -12,7 +12,7 @@ * case, but most of the heavy lifting for that is done elsewhere, * notably in prepjointree.c and allpaths.c. * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * @@ -620,7 +620,7 @@ generate_union_paths(SetOperationStmt *op, PlannerInfo *root, * Append the child results together. */ path = (Path *) create_append_path(root, result_rel, pathlist, NIL, - NIL, NULL, 0, false, NIL, -1); + NIL, NULL, 0, false, -1); /* * For UNION ALL, we just need the Append path. For UNION, need to add @@ -677,7 +677,7 @@ generate_union_paths(SetOperationStmt *op, PlannerInfo *root, create_append_path(root, result_rel, NIL, partial_pathlist, NIL, NULL, parallel_workers, enable_parallel_append, - NIL, -1); + -1); ppath = (Path *) create_gather_path(root, result_rel, ppath, result_rel->reltarget, NULL, NULL); @@ -787,7 +787,7 @@ generate_nonunion_paths(SetOperationStmt *op, PlannerInfo *root, * Append the child results together. */ path = (Path *) create_append_path(root, result_rel, pathlist, NIL, - NIL, NULL, 0, false, NIL, -1); + NIL, NULL, 0, false, -1); /* Identify the grouping semantics */ groupList = generate_setop_grouplist(op, tlist); diff --git a/src/backend/optimizer/util/appendinfo.c b/src/backend/optimizer/util/appendinfo.c index d722063cf3b49..86922a273c6eb 100644 --- a/src/backend/optimizer/util/appendinfo.c +++ b/src/backend/optimizer/util/appendinfo.c @@ -3,7 +3,7 @@ * appendinfo.c * Routines for mapping between append parent(s) and children * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * diff --git a/src/backend/optimizer/util/clauses.c b/src/backend/optimizer/util/clauses.c index 8f5cbf99f43e0..f3786dd2b63bc 100644 --- a/src/backend/optimizer/util/clauses.c +++ b/src/backend/optimizer/util/clauses.c @@ -3,7 +3,7 @@ * clauses.c * routines to manipulate qualification clauses * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * @@ -1897,9 +1897,9 @@ is_pseudo_constant_clause_relids(Node *clause, Relids relids) * Returns the number of different relations referenced in 'clause'. */ int -NumRelids(Node *clause) +NumRelids(PlannerInfo *root, Node *clause) { - Relids varnos = pull_varnos(clause); + Relids varnos = pull_varnos(root, clause); int result = bms_num_members(varnos); bms_free(varnos); @@ -3805,9 +3805,9 @@ reorder_function_arguments(List *args, HeapTuple func_tuple) int i; Assert(nargsprovided <= pronargs); - if (pronargs > FUNC_MAX_ARGS) + if (pronargs < 0 || pronargs > FUNC_MAX_ARGS) elog(ERROR, "too many function arguments"); - MemSet(argarray, 0, pronargs * sizeof(Node *)); + memset(argarray, 0, pronargs * sizeof(Node *)); /* Deconstruct the argument list into an array indexed by argnumber */ i = 0; diff --git a/src/backend/optimizer/util/inherit.c b/src/backend/optimizer/util/inherit.c index 3132fd35a5185..be1c9ddd96469 100644 --- a/src/backend/optimizer/util/inherit.c +++ b/src/backend/optimizer/util/inherit.c @@ -3,7 +3,7 @@ * inherit.c * Routines to process child relations in inheritance trees * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * @@ -748,7 +748,8 @@ apply_child_basequals(PlannerInfo *root, RelOptInfo *parentrel, } /* reconstitute RestrictInfo with appropriate properties */ childquals = lappend(childquals, - make_restrictinfo((Expr *) onecq, + make_restrictinfo(root, + (Expr *) onecq, rinfo->is_pushed_down, rinfo->outerjoin_delayed, pseudoconstant, @@ -785,7 +786,7 @@ apply_child_basequals(PlannerInfo *root, RelOptInfo *parentrel, /* not likely that we'd see constants here, so no check */ childquals = lappend(childquals, - make_restrictinfo(qual, + make_restrictinfo(root, qual, true, false, false, security_level, NULL, NULL, NULL)); diff --git a/src/backend/optimizer/util/joininfo.c b/src/backend/optimizer/util/joininfo.c index ad35f1c4670a7..717808b0377c9 100644 --- a/src/backend/optimizer/util/joininfo.c +++ b/src/backend/optimizer/util/joininfo.c @@ -3,7 +3,7 @@ * joininfo.c * joininfo list manipulation routines * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * diff --git a/src/backend/optimizer/util/orclauses.c b/src/backend/optimizer/util/orclauses.c index 002a70e1ed168..d559f33826883 100644 --- a/src/backend/optimizer/util/orclauses.c +++ b/src/backend/optimizer/util/orclauses.c @@ -3,7 +3,7 @@ * orclauses.c * Routines to extract restriction OR clauses from join OR clauses * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * @@ -268,7 +268,8 @@ consider_new_or_clause(PlannerInfo *root, RelOptInfo *rel, * Build a RestrictInfo from the new OR clause. We can assume it's valid * as a base restriction clause. */ - or_rinfo = make_restrictinfo(orclause, + or_rinfo = make_restrictinfo(root, + orclause, true, false, false, diff --git a/src/backend/optimizer/util/paramassign.c b/src/backend/optimizer/util/paramassign.c index 93fae07311c5c..ebb424112b17f 100644 --- a/src/backend/optimizer/util/paramassign.c +++ b/src/backend/optimizer/util/paramassign.c @@ -40,7 +40,7 @@ * doesn't really save much executor work anyway. * * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION diff --git a/src/backend/optimizer/util/pathnode.c b/src/backend/optimizer/util/pathnode.c index 51478957fb353..9be0c4a6af592 100644 --- a/src/backend/optimizer/util/pathnode.c +++ b/src/backend/optimizer/util/pathnode.c @@ -3,7 +3,7 @@ * pathnode.c * Routines to manipulate pathlists and create path nodes * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * @@ -1217,7 +1217,7 @@ create_append_path(PlannerInfo *root, List *subpaths, List *partial_subpaths, List *pathkeys, Relids required_outer, int parallel_workers, bool parallel_aware, - List *partitioned_rels, double rows) + double rows) { AppendPath *pathnode = makeNode(AppendPath); ListCell *l; @@ -1230,15 +1230,14 @@ create_append_path(PlannerInfo *root, /* * When generating an Append path for a partitioned table, there may be - * parameters that are useful so we can eliminate certain partitions - * during execution. Here we'll go all the way and fully populate the - * parameter info data as we do for normal base relations. However, we - * need only bother doing this for RELOPT_BASEREL rels, as - * RELOPT_OTHER_MEMBER_REL's Append paths are merged into the base rel's - * Append subpaths. It would do no harm to do this, we just avoid it to - * save wasting effort. + * parameterized quals that are useful for run-time pruning. Hence, + * compute path.param_info the same way as for any other baserel, so that + * such quals will be available for make_partition_pruneinfo(). (This + * would not work right for a non-baserel, ie a scan on a non-leaf child + * partition, and it's not necessary anyway in that case. Must skip it if + * we don't have "root", too.) */ - if (partitioned_rels != NIL && root && rel->reloptkind == RELOPT_BASEREL) + if (root && rel->reloptkind == RELOPT_BASEREL && IS_PARTITIONED_REL(rel)) pathnode->path.param_info = get_baserel_parampathinfo(root, rel, required_outer); @@ -1250,7 +1249,6 @@ create_append_path(PlannerInfo *root, pathnode->path.parallel_safe = rel->consider_parallel; pathnode->path.parallel_workers = parallel_workers; pathnode->path.pathkeys = pathkeys; - pathnode->partitioned_rels = list_copy(partitioned_rels); /* * For parallel append, non-partial paths are sorted by descending total @@ -1378,8 +1376,7 @@ create_merge_append_path(PlannerInfo *root, RelOptInfo *rel, List *subpaths, List *pathkeys, - Relids required_outer, - List *partitioned_rels) + Relids required_outer) { MergeAppendPath *pathnode = makeNode(MergeAppendPath); Cost input_startup_cost; @@ -1395,7 +1392,6 @@ create_merge_append_path(PlannerInfo *root, pathnode->path.parallel_safe = rel->consider_parallel; pathnode->path.parallel_workers = 0; pathnode->path.pathkeys = pathkeys; - pathnode->partitioned_rels = list_copy(partitioned_rels); pathnode->subpaths = subpaths; /* @@ -3848,7 +3844,6 @@ reparameterize_path(PlannerInfo *root, Path *path, apath->path.pathkeys, required_outer, apath->path.parallel_workers, apath->path.parallel_aware, - apath->partitioned_rels, -1); } default: diff --git a/src/backend/optimizer/util/placeholder.c b/src/backend/optimizer/util/placeholder.c index bf991018abb68..1c4202d864cef 100644 --- a/src/backend/optimizer/util/placeholder.c +++ b/src/backend/optimizer/util/placeholder.c @@ -4,7 +4,7 @@ * PlaceHolderVar and PlaceHolderInfo manipulation routines * * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * @@ -98,7 +98,7 @@ find_placeholder_info(PlannerInfo *root, PlaceHolderVar *phv, * ph_eval_at. If no referenced rels are within the syntactic scope, * force evaluation at the syntactic location. */ - rels_used = pull_varnos((Node *) phv->phexpr); + rels_used = pull_varnos(root, (Node *) phv->phexpr); phinfo->ph_lateral = bms_difference(rels_used, phv->phrels); if (bms_is_empty(phinfo->ph_lateral)) phinfo->ph_lateral = NULL; /* make it exactly NULL if empty */ diff --git a/src/backend/optimizer/util/plancat.c b/src/backend/optimizer/util/plancat.c index daf17596233f1..177e6e336ab9d 100644 --- a/src/backend/optimizer/util/plancat.c +++ b/src/backend/optimizer/util/plancat.c @@ -4,7 +4,7 @@ * routines for accessing the system catalogs * * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * @@ -126,7 +126,8 @@ get_relation_info(PlannerInfo *root, Oid relationObjectId, bool inhparent, relation = table_open(relationObjectId, NoLock); /* Temporary and unlogged relations are inaccessible during recovery. */ - if (!RelationNeedsWAL(relation) && RecoveryInProgress()) + if (relation->rd_rel->relpersistence != RELPERSISTENCE_PERMANENT && + RecoveryInProgress()) ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("cannot access temporary or unlogged relations during recovery"))); diff --git a/src/backend/optimizer/util/predtest.c b/src/backend/optimizer/util/predtest.c index d6e83e5f8e5fe..f8be7283f7c05 100644 --- a/src/backend/optimizer/util/predtest.c +++ b/src/backend/optimizer/util/predtest.c @@ -4,7 +4,7 @@ * Routines to attempt to prove logical implications between predicate * expressions. * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * diff --git a/src/backend/optimizer/util/relnode.c b/src/backend/optimizer/util/relnode.c index 9c9a738c80355..731ff708b905e 100644 --- a/src/backend/optimizer/util/relnode.c +++ b/src/backend/optimizer/util/relnode.c @@ -3,7 +3,7 @@ * relnode.c * Relation-node lookup/construction routines * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * diff --git a/src/backend/optimizer/util/restrictinfo.c b/src/backend/optimizer/util/restrictinfo.c index 7075e93973de3..eb113d94c1e10 100644 --- a/src/backend/optimizer/util/restrictinfo.c +++ b/src/backend/optimizer/util/restrictinfo.c @@ -3,7 +3,7 @@ * restrictinfo.c * RestrictInfo node manipulation routines. * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * @@ -21,7 +21,8 @@ #include "optimizer/restrictinfo.h" -static RestrictInfo *make_restrictinfo_internal(Expr *clause, +static RestrictInfo *make_restrictinfo_internal(PlannerInfo *root, + Expr *clause, Expr *orclause, bool is_pushed_down, bool outerjoin_delayed, @@ -30,7 +31,8 @@ static RestrictInfo *make_restrictinfo_internal(Expr *clause, Relids required_relids, Relids outer_relids, Relids nullable_relids); -static Expr *make_sub_restrictinfos(Expr *clause, +static Expr *make_sub_restrictinfos(PlannerInfo *root, + Expr *clause, bool is_pushed_down, bool outerjoin_delayed, bool pseudoconstant, @@ -56,7 +58,8 @@ static Expr *make_sub_restrictinfos(Expr *clause, * later. */ RestrictInfo * -make_restrictinfo(Expr *clause, +make_restrictinfo(PlannerInfo *root, + Expr *clause, bool is_pushed_down, bool outerjoin_delayed, bool pseudoconstant, @@ -70,7 +73,8 @@ make_restrictinfo(Expr *clause, * above each subclause of the top-level AND/OR structure. */ if (is_orclause(clause)) - return (RestrictInfo *) make_sub_restrictinfos(clause, + return (RestrictInfo *) make_sub_restrictinfos(root, + clause, is_pushed_down, outerjoin_delayed, pseudoconstant, @@ -82,7 +86,8 @@ make_restrictinfo(Expr *clause, /* Shouldn't be an AND clause, else AND/OR flattening messed up */ Assert(!is_andclause(clause)); - return make_restrictinfo_internal(clause, + return make_restrictinfo_internal(root, + clause, NULL, is_pushed_down, outerjoin_delayed, @@ -99,7 +104,8 @@ make_restrictinfo(Expr *clause, * Common code for the main entry points and the recursive cases. */ static RestrictInfo * -make_restrictinfo_internal(Expr *clause, +make_restrictinfo_internal(PlannerInfo *root, + Expr *clause, Expr *orclause, bool is_pushed_down, bool outerjoin_delayed, @@ -137,8 +143,8 @@ make_restrictinfo_internal(Expr *clause, */ if (is_opclause(clause) && list_length(((OpExpr *) clause)->args) == 2) { - restrictinfo->left_relids = pull_varnos(get_leftop(clause)); - restrictinfo->right_relids = pull_varnos(get_rightop(clause)); + restrictinfo->left_relids = pull_varnos(root, get_leftop(clause)); + restrictinfo->right_relids = pull_varnos(root, get_rightop(clause)); restrictinfo->clause_relids = bms_union(restrictinfo->left_relids, restrictinfo->right_relids); @@ -165,7 +171,7 @@ make_restrictinfo_internal(Expr *clause, restrictinfo->left_relids = NULL; restrictinfo->right_relids = NULL; /* and get the total relid set the hard way */ - restrictinfo->clause_relids = pull_varnos((Node *) clause); + restrictinfo->clause_relids = pull_varnos(root, (Node *) clause); } /* required_relids defaults to clause_relids */ @@ -225,7 +231,8 @@ make_restrictinfo_internal(Expr *clause, * contained rels. */ static Expr * -make_sub_restrictinfos(Expr *clause, +make_sub_restrictinfos(PlannerInfo *root, + Expr *clause, bool is_pushed_down, bool outerjoin_delayed, bool pseudoconstant, @@ -241,7 +248,8 @@ make_sub_restrictinfos(Expr *clause, foreach(temp, ((BoolExpr *) clause)->args) orlist = lappend(orlist, - make_sub_restrictinfos(lfirst(temp), + make_sub_restrictinfos(root, + lfirst(temp), is_pushed_down, outerjoin_delayed, pseudoconstant, @@ -249,7 +257,8 @@ make_sub_restrictinfos(Expr *clause, NULL, outer_relids, nullable_relids)); - return (Expr *) make_restrictinfo_internal(clause, + return (Expr *) make_restrictinfo_internal(root, + clause, make_orclause(orlist), is_pushed_down, outerjoin_delayed, @@ -266,7 +275,8 @@ make_sub_restrictinfos(Expr *clause, foreach(temp, ((BoolExpr *) clause)->args) andlist = lappend(andlist, - make_sub_restrictinfos(lfirst(temp), + make_sub_restrictinfos(root, + lfirst(temp), is_pushed_down, outerjoin_delayed, pseudoconstant, @@ -277,7 +287,8 @@ make_sub_restrictinfos(Expr *clause, return make_andclause(andlist); } else - return (Expr *) make_restrictinfo_internal(clause, + return (Expr *) make_restrictinfo_internal(root, + clause, NULL, is_pushed_down, outerjoin_delayed, diff --git a/src/backend/optimizer/util/tlist.c b/src/backend/optimizer/util/tlist.c index 01cea102eab1a..89853a0630236 100644 --- a/src/backend/optimizer/util/tlist.c +++ b/src/backend/optimizer/util/tlist.c @@ -3,7 +3,7 @@ * tlist.c * Target list manipulation routines * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * diff --git a/src/backend/optimizer/util/var.c b/src/backend/optimizer/util/var.c index 029d546ab2be4..e307d6fbb0720 100644 --- a/src/backend/optimizer/util/var.c +++ b/src/backend/optimizer/util/var.c @@ -9,7 +9,7 @@ * contains variables. * * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * @@ -23,6 +23,7 @@ #include "access/sysattr.h" #include "nodes/nodeFuncs.h" #include "optimizer/optimizer.h" +#include "optimizer/placeholder.h" #include "optimizer/prep.h" #include "parser/parsetree.h" #include "rewrite/rewriteManip.h" @@ -31,6 +32,7 @@ typedef struct { Relids varnos; + PlannerInfo *root; int sublevels_up; } pull_varnos_context; @@ -92,11 +94,12 @@ static Relids alias_relid_set(Query *query, Relids relids); * SubPlan, we only need to look at the parameters passed to the subplan. */ Relids -pull_varnos(Node *node) +pull_varnos(PlannerInfo *root, Node *node) { pull_varnos_context context; context.varnos = NULL; + context.root = root; context.sublevels_up = 0; /* @@ -117,11 +120,12 @@ pull_varnos(Node *node) * Only Vars of the specified level are considered. */ Relids -pull_varnos_of_level(Node *node, int levelsup) +pull_varnos_of_level(PlannerInfo *root, Node *node, int levelsup) { pull_varnos_context context; context.varnos = NULL; + context.root = root; context.sublevels_up = levelsup; /* @@ -159,33 +163,56 @@ pull_varnos_walker(Node *node, pull_varnos_context *context) } if (IsA(node, PlaceHolderVar)) { - /* - * A PlaceHolderVar acts as a variable of its syntactic scope, or - * lower than that if it references only a subset of the rels in its - * syntactic scope. It might also contain lateral references, but we - * should ignore such references when computing the set of varnos in - * an expression tree. Also, if the PHV contains no variables within - * its syntactic scope, it will be forced to be evaluated exactly at - * the syntactic scope, so take that as the relid set. - */ PlaceHolderVar *phv = (PlaceHolderVar *) node; - pull_varnos_context subcontext; - subcontext.varnos = NULL; - subcontext.sublevels_up = context->sublevels_up; - (void) pull_varnos_walker((Node *) phv->phexpr, &subcontext); + /* + * If a PlaceHolderVar is not of the target query level, ignore it, + * instead recursing into its expression to see if it contains any + * vars that are of the target level. + */ if (phv->phlevelsup == context->sublevels_up) { - subcontext.varnos = bms_int_members(subcontext.varnos, - phv->phrels); - if (bms_is_empty(subcontext.varnos)) + /* + * Ideally, the PHV's contribution to context->varnos is its + * ph_eval_at set. However, this code can be invoked before + * that's been computed. If we cannot find a PlaceHolderInfo, + * fall back to the conservative assumption that the PHV will be + * evaluated at its syntactic level (phv->phrels). + * + * There is a second hazard: this code is also used to examine + * qual clauses during deconstruct_jointree, when we may have a + * PlaceHolderInfo but its ph_eval_at value is not yet final, so + * that theoretically we could obtain a relid set that's smaller + * than we'd see later on. That should never happen though, + * because we deconstruct the jointree working upwards. Any outer + * join that forces delay of evaluation of a given qual clause + * will be processed before we examine that clause here, so the + * ph_eval_at value should have been updated to include it. + */ + PlaceHolderInfo *phinfo = NULL; + + if (phv->phlevelsup == 0) + { + ListCell *lc; + + foreach(lc, context->root->placeholder_list) + { + phinfo = (PlaceHolderInfo *) lfirst(lc); + if (phinfo->phid == phv->phid) + break; + phinfo = NULL; + } + } + if (phinfo != NULL) + context->varnos = bms_add_members(context->varnos, + phinfo->ph_eval_at); + else context->varnos = bms_add_members(context->varnos, phv->phrels); + return false; /* don't recurse into expression */ } - context->varnos = bms_join(context->varnos, subcontext.varnos); - return false; } - if (IsA(node, Query)) + else if (IsA(node, Query)) { /* Recurse into RTE subquery or not-yet-planned sublink subquery */ bool result; diff --git a/src/backend/parser/analyze.c b/src/backend/parser/analyze.c index 084e00f73d818..0f3a70c49a871 100644 --- a/src/backend/parser/analyze.c +++ b/src/backend/parser/analyze.c @@ -14,7 +14,7 @@ * contain optimizable statements, which we should transform. * * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * src/backend/parser/analyze.c @@ -42,8 +42,10 @@ #include "parser/parse_param.h" #include "parser/parse_relation.h" #include "parser/parse_target.h" +#include "parser/parse_type.h" #include "parser/parsetree.h" #include "rewrite/rewriteManip.h" +#include "utils/builtins.h" #include "utils/rel.h" @@ -70,6 +72,8 @@ static Query *transformUpdateStmt(ParseState *pstate, UpdateStmt *stmt); static List *transformReturningList(ParseState *pstate, List *returningList); static List *transformUpdateTargetList(ParseState *pstate, List *targetList); +static Query *transformPLAssignStmt(ParseState *pstate, + PLAssignStmt *stmt); static Query *transformDeclareCursorStmt(ParseState *pstate, DeclareCursorStmt *stmt); static Query *transformExplainStmt(ParseState *pstate, @@ -304,6 +308,11 @@ transformStmt(ParseState *pstate, Node *parseTree) } break; + case T_PLAssignStmt: + result = transformPLAssignStmt(pstate, + (PLAssignStmt *) parseTree); + break; + /* * Special cases */ @@ -367,6 +376,7 @@ analyze_requires_snapshot(RawStmt *parseTree) case T_DeleteStmt: case T_UpdateStmt: case T_SelectStmt: + case T_PLAssignStmt: result = true; break; @@ -1799,6 +1809,33 @@ transformSetOperationStmt(ParseState *pstate, SelectStmt *stmt) return qry; } +/* + * Make a SortGroupClause node for a SetOperationStmt's groupClauses + */ +SortGroupClause * +makeSortGroupClauseForSetOp(Oid rescoltype) +{ + SortGroupClause *grpcl = makeNode(SortGroupClause); + Oid sortop; + Oid eqop; + bool hashable; + + /* determine the eqop and optional sortop */ + get_sort_group_operators(rescoltype, + false, true, false, + &sortop, &eqop, NULL, + &hashable); + + /* we don't have a tlist yet, so can't assign sortgrouprefs */ + grpcl->tleSortGroupRef = 0; + grpcl->eqop = eqop; + grpcl->sortop = sortop; + grpcl->nulls_first = false; /* OK with or without sortop */ + grpcl->hashable = hashable; + + return grpcl; +} + /* * transformSetOperationTree * Recursively transform leaves and internal nodes of a set-op tree @@ -2099,31 +2136,15 @@ transformSetOperationTree(ParseState *pstate, SelectStmt *stmt, */ if (op->op != SETOP_UNION || !op->all) { - SortGroupClause *grpcl = makeNode(SortGroupClause); - Oid sortop; - Oid eqop; - bool hashable; ParseCallbackState pcbstate; setup_parser_errposition_callback(&pcbstate, pstate, bestlocation); - /* determine the eqop and optional sortop */ - get_sort_group_operators(rescoltype, - false, true, false, - &sortop, &eqop, NULL, - &hashable); + op->groupClauses = lappend(op->groupClauses, + makeSortGroupClauseForSetOp(rescoltype)); cancel_parser_errposition_callback(&pcbstate); - - /* we don't have a tlist yet, so can't assign sortgrouprefs */ - grpcl->tleSortGroupRef = 0; - grpcl->eqop = eqop; - grpcl->sortop = sortop; - grpcl->nulls_first = false; /* OK with or without sortop */ - grpcl->hashable = hashable; - - op->groupClauses = lappend(op->groupClauses, grpcl); } /* @@ -2393,6 +2414,255 @@ transformReturningList(ParseState *pstate, List *returningList) } +/* + * transformPLAssignStmt - + * transform a PL/pgSQL assignment statement + * + * If there is no opt_indirection, the transformed statement looks like + * "SELECT a_expr ...", except the expression has been cast to the type of + * the target. With indirection, it's still a SELECT, but the expression will + * incorporate FieldStore and/or assignment SubscriptingRef nodes to compute a + * new value for a container-type variable represented by the target. The + * expression references the target as the container source. + */ +static Query * +transformPLAssignStmt(ParseState *pstate, PLAssignStmt *stmt) +{ + Query *qry = makeNode(Query); + ColumnRef *cref = makeNode(ColumnRef); + List *indirection = stmt->indirection; + int nnames = stmt->nnames; + SelectStmt *sstmt = stmt->val; + Node *target; + Oid targettype; + int32 targettypmod; + Oid targetcollation; + List *tlist; + TargetEntry *tle; + Oid type_id; + Node *qual; + ListCell *l; + + /* + * First, construct a ColumnRef for the target variable. If the target + * has more than one dotted name, we have to pull the extra names out of + * the indirection list. + */ + cref->fields = list_make1(makeString(stmt->name)); + cref->location = stmt->location; + if (nnames > 1) + { + /* avoid munging the raw parsetree */ + indirection = list_copy(indirection); + while (--nnames > 0 && indirection != NIL) + { + Node *ind = (Node *) linitial(indirection); + + if (!IsA(ind, String)) + elog(ERROR, "invalid name count in PLAssignStmt"); + cref->fields = lappend(cref->fields, ind); + indirection = list_delete_first(indirection); + } + } + + /* + * Transform the target reference. Typically we will get back a Param + * node, but there's no reason to be too picky about its type. + */ + target = transformExpr(pstate, (Node *) cref, + EXPR_KIND_UPDATE_TARGET); + targettype = exprType(target); + targettypmod = exprTypmod(target); + targetcollation = exprCollation(target); + + /* + * The rest mostly matches transformSelectStmt, except that we needn't + * consider WITH or INTO, and we build a targetlist our own way. + */ + qry->commandType = CMD_SELECT; + pstate->p_is_insert = false; + + /* make FOR UPDATE/FOR SHARE info available to addRangeTableEntry */ + pstate->p_locking_clause = sstmt->lockingClause; + + /* make WINDOW info available for window functions, too */ + pstate->p_windowdefs = sstmt->windowClause; + + /* process the FROM clause */ + transformFromClause(pstate, sstmt->fromClause); + + /* initially transform the targetlist as if in SELECT */ + tlist = transformTargetList(pstate, sstmt->targetList, + EXPR_KIND_SELECT_TARGET); + + /* we should have exactly one targetlist item */ + if (list_length(tlist) != 1) + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg_plural("assignment source returned %d column", + "assignment source returned %d columns", + list_length(tlist), + list_length(tlist)))); + + tle = linitial_node(TargetEntry, tlist); + + /* + * This next bit is similar to transformAssignedExpr; the key difference + * is we use COERCION_PLPGSQL not COERCION_ASSIGNMENT. + */ + type_id = exprType((Node *) tle->expr); + + pstate->p_expr_kind = EXPR_KIND_UPDATE_TARGET; + + if (indirection) + { + tle->expr = (Expr *) + transformAssignmentIndirection(pstate, + target, + stmt->name, + false, + targettype, + targettypmod, + targetcollation, + indirection, + list_head(indirection), + (Node *) tle->expr, + COERCION_PLPGSQL, + exprLocation(target)); + } + else if (targettype != type_id && + (targettype == RECORDOID || ISCOMPLEX(targettype)) && + (type_id == RECORDOID || ISCOMPLEX(type_id))) + { + /* + * Hack: do not let coerce_to_target_type() deal with inconsistent + * composite types. Just pass the expression result through as-is, + * and let the PL/pgSQL executor do the conversion its way. This is + * rather bogus, but it's needed for backwards compatibility. + */ + } + else + { + /* + * For normal non-qualified target column, do type checking and + * coercion. + */ + Node *orig_expr = (Node *) tle->expr; + + tle->expr = (Expr *) + coerce_to_target_type(pstate, + orig_expr, type_id, + targettype, targettypmod, + COERCION_PLPGSQL, + COERCE_IMPLICIT_CAST, + -1); + /* With COERCION_PLPGSQL, this error is probably unreachable */ + if (tle->expr == NULL) + ereport(ERROR, + (errcode(ERRCODE_DATATYPE_MISMATCH), + errmsg("variable \"%s\" is of type %s" + " but expression is of type %s", + stmt->name, + format_type_be(targettype), + format_type_be(type_id)), + errhint("You will need to rewrite or cast the expression."), + parser_errposition(pstate, exprLocation(orig_expr)))); + } + + pstate->p_expr_kind = EXPR_KIND_NONE; + + qry->targetList = list_make1(tle); + + /* transform WHERE */ + qual = transformWhereClause(pstate, sstmt->whereClause, + EXPR_KIND_WHERE, "WHERE"); + + /* initial processing of HAVING clause is much like WHERE clause */ + qry->havingQual = transformWhereClause(pstate, sstmt->havingClause, + EXPR_KIND_HAVING, "HAVING"); + + /* + * Transform sorting/grouping stuff. Do ORDER BY first because both + * transformGroupClause and transformDistinctClause need the results. Note + * that these functions can also change the targetList, so it's passed to + * them by reference. + */ + qry->sortClause = transformSortClause(pstate, + sstmt->sortClause, + &qry->targetList, + EXPR_KIND_ORDER_BY, + false /* allow SQL92 rules */ ); + + qry->groupClause = transformGroupClause(pstate, + sstmt->groupClause, + &qry->groupingSets, + &qry->targetList, + qry->sortClause, + EXPR_KIND_GROUP_BY, + false /* allow SQL92 rules */ ); + + if (sstmt->distinctClause == NIL) + { + qry->distinctClause = NIL; + qry->hasDistinctOn = false; + } + else if (linitial(sstmt->distinctClause) == NULL) + { + /* We had SELECT DISTINCT */ + qry->distinctClause = transformDistinctClause(pstate, + &qry->targetList, + qry->sortClause, + false); + qry->hasDistinctOn = false; + } + else + { + /* We had SELECT DISTINCT ON */ + qry->distinctClause = transformDistinctOnClause(pstate, + sstmt->distinctClause, + &qry->targetList, + qry->sortClause); + qry->hasDistinctOn = true; + } + + /* transform LIMIT */ + qry->limitOffset = transformLimitClause(pstate, sstmt->limitOffset, + EXPR_KIND_OFFSET, "OFFSET", + sstmt->limitOption); + qry->limitCount = transformLimitClause(pstate, sstmt->limitCount, + EXPR_KIND_LIMIT, "LIMIT", + sstmt->limitOption); + qry->limitOption = sstmt->limitOption; + + /* transform window clauses after we have seen all window functions */ + qry->windowClause = transformWindowDefinitions(pstate, + pstate->p_windowdefs, + &qry->targetList); + + qry->rtable = pstate->p_rtable; + qry->jointree = makeFromExpr(pstate->p_joinlist, qual); + + qry->hasSubLinks = pstate->p_hasSubLinks; + qry->hasWindowFuncs = pstate->p_hasWindowFuncs; + qry->hasTargetSRFs = pstate->p_hasTargetSRFs; + qry->hasAggs = pstate->p_hasAggs; + + foreach(l, sstmt->lockingClause) + { + transformLockingClause(pstate, qry, + (LockingClause *) lfirst(l), false); + } + + assign_query_collations(pstate, qry); + + /* this must be done after collations, for reliable comparison of exprs */ + if (pstate->p_hasAggs || qry->groupClause || qry->groupingSets || qry->havingQual) + parseCheckAggregates(pstate, qry); + + return qry; +} + + /* * transformDeclareCursorStmt - * transform a DECLARE CURSOR Statement diff --git a/src/backend/parser/check_keywords.pl b/src/backend/parser/check_keywords.pl index e6c6c98fb5ec7..598f3d20e3573 100644 --- a/src/backend/parser/check_keywords.pl +++ b/src/backend/parser/check_keywords.pl @@ -4,7 +4,7 @@ # Usage: check_keywords.pl gram.y kwlist.h # src/backend/parser/check_keywords.pl -# Copyright (c) 2009-2020, PostgreSQL Global Development Group +# Copyright (c) 2009-2021, PostgreSQL Global Development Group use strict; use warnings; diff --git a/src/backend/parser/gram.y b/src/backend/parser/gram.y index 8f341ac006126..dd72a9fc3c4bd 100644 --- a/src/backend/parser/gram.y +++ b/src/backend/parser/gram.y @@ -6,7 +6,7 @@ * gram.y * POSTGRESQL BISON rules/actions * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * @@ -294,6 +294,7 @@ static Node *makeRecursiveViewSelect(char *relname, List *aliases, Node *query); %type select_no_parens select_with_parens select_clause simple_select values_clause + PLpgSQL_Expr PLAssignStmt %type alter_column_default opclass_item opclass_drop alter_using %type add_drop opt_asc_desc opt_nulls_order @@ -384,11 +385,11 @@ static Node *makeRecursiveViewSelect(char *relname, List *aliases, Node *query); %type vacuum_relation %type opt_select_limit select_limit limit_clause -%type stmtblock stmtmulti +%type parse_toplevel stmtmulti OptTableElementList TableElementList OptInherit definition OptTypedTableElementList TypedTableElementList reloptions opt_reloptions - OptWith distinct_clause opt_definition func_args func_args_list + OptWith opt_definition func_args func_args_list func_args_with_defaults func_args_with_defaults_list aggr_args aggr_args_list func_as createfunc_opt_list alterfunc_opt_list @@ -400,6 +401,7 @@ static Node *makeRecursiveViewSelect(char *relname, List *aliases, Node *query); name_list role_list from_clause from_list opt_array_bounds qualified_name_list any_name any_name_list type_name_list any_operator expr_list attrs + distinct_clause opt_distinct_clause target_list opt_target_list insert_column_list set_target_list set_clause_list set_clause def_list operator_def_list indirection opt_indirection @@ -492,6 +494,7 @@ static Node *makeRecursiveViewSelect(char *relname, List *aliases, Node *query); %type row explicit_row implicit_row type_list array_expr_list %type case_expr case_arg when_clause case_default %type when_clause_list +%type opt_search_clause opt_cycle_clause %type sub_type opt_materialized %type NumericOnly %type NumericOnly_list @@ -535,7 +538,7 @@ static Node *makeRecursiveViewSelect(char *relname, List *aliases, Node *query); %type ColId ColLabel BareColLabel %type NonReservedWord NonReservedWord_or_Sconst %type var_name type_function_name param_name -%type createdb_opt_name +%type createdb_opt_name plassign_target %type var_value zone_value %type auth_ident RoleSpec opt_granted_by @@ -623,7 +626,7 @@ static Node *makeRecursiveViewSelect(char *relname, List *aliases, Node *query); ASSERTION ASSIGNMENT ASYMMETRIC AT ATTACH ATTRIBUTE AUTHORIZATION BACKWARD BEFORE BEGIN_P BETWEEN BIGINT BINARY BIT - BOOLEAN_P BOTH BY + BOOLEAN_P BOTH BREADTH BY CACHE CALL CALLED CASCADE CASCADED CASE CAST CATALOG_P CHAIN CHAR_P CHARACTER CHARACTERISTICS CHECK CHECKPOINT CLASS CLOSE @@ -635,7 +638,7 @@ static Node *makeRecursiveViewSelect(char *relname, List *aliases, Node *query); CURRENT_TIME CURRENT_TIMESTAMP CURRENT_USER CURSOR CYCLE DATA_P DATABASE DAY_P DEALLOCATE DEC DECIMAL_P DECLARE DEFAULT DEFAULTS - DEFERRABLE DEFERRED DEFINER DELETE_P DELIMITER DELIMITERS DEPENDS DESC + DEFERRABLE DEFERRED DEFINER DELETE_P DELIMITER DELIMITERS DEPENDS DEPTH DESC DETACH DICTIONARY DISABLE_P DISCARD DISTINCT DO DOCUMENT_P DOMAIN_P DOUBLE_P DROP @@ -723,6 +726,19 @@ static Node *makeRecursiveViewSelect(char *relname, List *aliases, Node *query); */ %token NOT_LA NULLS_LA WITH_LA +/* + * The grammar likewise thinks these tokens are keywords, but they are never + * generated by the scanner. Rather, they can be injected by parser.c as + * the initial token of the string (using the lookahead-token mechanism + * implemented there). This provides a way to tell the grammar to parse + * something other than the usual list of SQL commands. + */ +%token MODE_TYPE_NAME +%token MODE_PLPGSQL_EXPR +%token MODE_PLPGSQL_ASSIGN1 +%token MODE_PLPGSQL_ASSIGN2 +%token MODE_PLPGSQL_ASSIGN3 + /* Precedence: lowest to highest */ %nonassoc SET /* see relation_expr_opt_alias */ @@ -787,11 +803,46 @@ static Node *makeRecursiveViewSelect(char *relname, List *aliases, Node *query); /* * The target production for the whole parse. + * + * Ordinarily we parse a list of statements, but if we see one of the + * special MODE_XXX symbols as first token, we parse something else. + * The options here correspond to enum RawParseMode, which see for details. */ -stmtblock: stmtmulti +parse_toplevel: + stmtmulti { pg_yyget_extra(yyscanner)->parsetree = $1; } + | MODE_TYPE_NAME Typename + { + pg_yyget_extra(yyscanner)->parsetree = list_make1($2); + } + | MODE_PLPGSQL_EXPR PLpgSQL_Expr + { + pg_yyget_extra(yyscanner)->parsetree = + list_make1(makeRawStmt($2, 0)); + } + | MODE_PLPGSQL_ASSIGN1 PLAssignStmt + { + PLAssignStmt *n = (PLAssignStmt *) $2; + n->nnames = 1; + pg_yyget_extra(yyscanner)->parsetree = + list_make1(makeRawStmt((Node *) n, 0)); + } + | MODE_PLPGSQL_ASSIGN2 PLAssignStmt + { + PLAssignStmt *n = (PLAssignStmt *) $2; + n->nnames = 2; + pg_yyget_extra(yyscanner)->parsetree = + list_make1(makeRawStmt((Node *) n, 0)); + } + | MODE_PLPGSQL_ASSIGN3 PLAssignStmt + { + PLAssignStmt *n = (PLAssignStmt *) $2; + n->nnames = 3; + pg_yyget_extra(yyscanner)->parsetree = + list_make1(makeRawStmt((Node *) n, 0)); + } ; /* @@ -6722,7 +6773,7 @@ opt_from_in: from_in *****************************************************************************/ GrantStmt: GRANT privileges ON privilege_target TO grantee_list - opt_grant_grant_option + opt_grant_grant_option opt_granted_by { GrantStmt *n = makeNode(GrantStmt); n->is_grant = true; @@ -6732,13 +6783,14 @@ GrantStmt: GRANT privileges ON privilege_target TO grantee_list n->objects = ($4)->objs; n->grantees = $6; n->grant_option = $7; + n->grantor = $8; $$ = (Node*)n; } ; RevokeStmt: REVOKE privileges ON privilege_target - FROM grantee_list opt_drop_behavior + FROM grantee_list opt_granted_by opt_drop_behavior { GrantStmt *n = makeNode(GrantStmt); n->is_grant = false; @@ -6748,11 +6800,12 @@ RevokeStmt: n->objtype = ($4)->objtype; n->objects = ($4)->objs; n->grantees = $6; - n->behavior = $7; + n->grantor = $7; + n->behavior = $8; $$ = (Node *)n; } | REVOKE GRANT OPTION FOR privileges ON privilege_target - FROM grantee_list opt_drop_behavior + FROM grantee_list opt_granted_by opt_drop_behavior { GrantStmt *n = makeNode(GrantStmt); n->is_grant = false; @@ -6762,7 +6815,8 @@ RevokeStmt: n->objtype = ($7)->objtype; n->objects = ($7)->objs; n->grantees = $9; - n->behavior = $10; + n->grantor = $10; + n->behavior = $11; $$ = (Node *)n; } ; @@ -11211,6 +11265,11 @@ select_clause: * As with select_no_parens, simple_select cannot have outer parentheses, * but can have parenthesized subclauses. * + * It might appear that we could fold the first two alternatives into one + * by using opt_distinct_clause. However, that causes a shift/reduce conflict + * against INSERT ... SELECT ... ON CONFLICT. We avoid the ambiguity by + * requiring SELECT DISTINCT [ON] to be followed by a non-empty target_list. + * * Note that sort clauses cannot be included at this level --- SQL requires * SELECT foo UNION SELECT bar ORDER BY baz * to be parsed as @@ -11295,8 +11354,6 @@ simple_select: * WITH [ RECURSIVE ] [ (,...) ] * AS (query) [ SEARCH or CYCLE clause ] * - * We don't currently support the SEARCH or CYCLE clause. - * * Recognizing WITH_LA here allows a CTE to be named TIME or ORDINALITY. */ with_clause: @@ -11328,13 +11385,15 @@ cte_list: | cte_list ',' common_table_expr { $$ = lappend($1, $3); } ; -common_table_expr: name opt_name_list AS opt_materialized '(' PreparableStmt ')' +common_table_expr: name opt_name_list AS opt_materialized '(' PreparableStmt ')' opt_search_clause opt_cycle_clause { CommonTableExpr *n = makeNode(CommonTableExpr); n->ctename = $1; n->aliascolnames = $2; n->ctematerialized = $4; n->ctequery = $6; + n->search_clause = castNode(CTESearchClause, $8); + n->cycle_clause = castNode(CTECycleClause, $9); n->location = @1; $$ = (Node *) n; } @@ -11346,6 +11405,49 @@ opt_materialized: | /*EMPTY*/ { $$ = CTEMaterializeDefault; } ; +opt_search_clause: + SEARCH DEPTH FIRST_P BY columnList SET ColId + { + CTESearchClause *n = makeNode(CTESearchClause); + n->search_col_list = $5; + n->search_breadth_first = false; + n->search_seq_column = $7; + n->location = @1; + $$ = (Node *) n; + } + | SEARCH BREADTH FIRST_P BY columnList SET ColId + { + CTESearchClause *n = makeNode(CTESearchClause); + n->search_col_list = $5; + n->search_breadth_first = true; + n->search_seq_column = $7; + n->location = @1; + $$ = (Node *) n; + } + | /*EMPTY*/ + { + $$ = NULL; + } + ; + +opt_cycle_clause: + CYCLE columnList SET ColId TO AexprConst DEFAULT AexprConst USING ColId + { + CTECycleClause *n = makeNode(CTECycleClause); + n->cycle_col_list = $2; + n->cycle_mark_column = $4; + n->cycle_mark_value = $6; + n->cycle_mark_default = $8; + n->cycle_path_column = $10; + n->location = @1; + $$ = (Node *) n; + } + | /*EMPTY*/ + { + $$ = NULL; + } + ; + opt_with_clause: with_clause { $$ = $1; } | /*EMPTY*/ { $$ = NULL; } @@ -11448,8 +11550,13 @@ opt_all_clause: | /*EMPTY*/ ; +opt_distinct_clause: + distinct_clause { $$ = $1; } + | opt_all_clause { $$ = NIL; } + ; + opt_sort_clause: - sort_clause { $$ = $1;} + sort_clause { $$ = $1; } | /*EMPTY*/ { $$ = NIL; } ; @@ -15006,6 +15113,73 @@ role_list: RoleSpec { $$ = lappend($1, $3); } ; + +/***************************************************************************** + * + * PL/pgSQL extensions + * + * You'd think a PL/pgSQL "expression" should be just an a_expr, but + * historically it can include just about anything that can follow SELECT. + * Therefore the returned struct is a SelectStmt. + *****************************************************************************/ + +PLpgSQL_Expr: opt_distinct_clause opt_target_list + from_clause where_clause + group_clause having_clause window_clause + opt_sort_clause opt_select_limit opt_for_locking_clause + { + SelectStmt *n = makeNode(SelectStmt); + + n->distinctClause = $1; + n->targetList = $2; + n->fromClause = $3; + n->whereClause = $4; + n->groupClause = $5; + n->havingClause = $6; + n->windowClause = $7; + n->sortClause = $8; + if ($9) + { + n->limitOffset = $9->limitOffset; + n->limitCount = $9->limitCount; + if (!n->sortClause && + $9->limitOption == LIMIT_OPTION_WITH_TIES) + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("WITH TIES cannot be specified without ORDER BY clause"))); + n->limitOption = $9->limitOption; + } + n->lockingClause = $10; + $$ = (Node *) n; + } + ; + +/* + * PL/pgSQL Assignment statement: name opt_indirection := PLpgSQL_Expr + */ + +PLAssignStmt: plassign_target opt_indirection plassign_equals PLpgSQL_Expr + { + PLAssignStmt *n = makeNode(PLAssignStmt); + + n->name = $1; + n->indirection = check_indirection($2, yyscanner); + /* nnames will be filled by calling production */ + n->val = (SelectStmt *) $4; + n->location = @1; + $$ = (Node *) n; + } + ; + +plassign_target: ColId { $$ = $1; } + | PARAM { $$ = psprintf("$%d", $1); } + ; + +plassign_equals: COLON_EQUALS + | '=' + ; + + /* * Name classification hierarchy. * @@ -15092,6 +15266,7 @@ unreserved_keyword: | BACKWARD | BEFORE | BEGIN_P + | BREADTH | BY | CACHE | CALL @@ -15136,6 +15311,7 @@ unreserved_keyword: | DELIMITER | DELIMITERS | DEPENDS + | DEPTH | DETACH | DICTIONARY | DISABLE_P @@ -15603,6 +15779,7 @@ bare_label_keyword: | BIT | BOOLEAN_P | BOTH + | BREADTH | BY | CACHE | CALL @@ -15667,6 +15844,7 @@ bare_label_keyword: | DELIMITER | DELIMITERS | DEPENDS + | DEPTH | DESC | DETACH | DICTIONARY diff --git a/src/backend/parser/parse_agg.c b/src/backend/parser/parse_agg.c index 783f3fe8f2d1c..fd08b9eeff099 100644 --- a/src/backend/parser/parse_agg.c +++ b/src/backend/parser/parse_agg.c @@ -3,7 +3,7 @@ * parse_agg.c * handle aggregates and window functions in parser * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * @@ -545,6 +545,10 @@ check_agglevels_and_constraints(ParseState *pstate, Node *expr) break; + case EXPR_KIND_CYCLE_MARK: + errkind = true; + break; + /* * There is intentionally no default: case here, so that the * compiler will warn if we add a new ParseExprKind without @@ -933,6 +937,9 @@ transformWindowFuncCall(ParseState *pstate, WindowFunc *wfunc, case EXPR_KIND_GENERATED_COLUMN: err = _("window functions are not allowed in column generation expressions"); break; + case EXPR_KIND_CYCLE_MARK: + errkind = true; + break; /* * There is intentionally no default: case here, so that the diff --git a/src/backend/parser/parse_clause.c b/src/backend/parser/parse_clause.c index ea4a1f5aeb9f5..672245ded7a30 100644 --- a/src/backend/parser/parse_clause.c +++ b/src/backend/parser/parse_clause.c @@ -3,7 +3,7 @@ * parse_clause.c * handle clauses in parser * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * diff --git a/src/backend/parser/parse_coerce.c b/src/backend/parser/parse_coerce.c index e33618f9744d3..d5310f27db1d2 100644 --- a/src/backend/parser/parse_coerce.c +++ b/src/backend/parser/parse_coerce.c @@ -3,7 +3,7 @@ * parse_coerce.c * handle type coercions/conversions for parser * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * @@ -1541,7 +1541,7 @@ select_common_typmod(ParseState *pstate, List *exprs, Oid common_type) foreach(lc, exprs) { - Node *expr = (Node *) lfirst(lc); + Node *expr = (Node *) lfirst(lc); /* Types must match */ if (exprType(expr) != common_type) @@ -2380,7 +2380,8 @@ enforce_generic_type_consistency(const Oid *actual_arg_types, if (!OidIsValid(elem_typeid)) { /* - * if we don't have an element type yet, use the one we just got + * if we don't have an element type yet, use the one we just + * got */ elem_typeid = range_typelem; } @@ -3097,6 +3098,14 @@ find_coercion_pathway(Oid targetTypeId, Oid sourceTypeId, } } + /* + * When parsing PL/pgSQL assignments, allow an I/O cast to be used + * whenever no normal coercion is available. + */ + if (result == COERCION_PATH_NONE && + ccontext == COERCION_PLPGSQL) + result = COERCION_PATH_COERCEVIAIO; + return result; } diff --git a/src/backend/parser/parse_collate.c b/src/backend/parser/parse_collate.c index 13e62a201563c..4133526f04601 100644 --- a/src/backend/parser/parse_collate.c +++ b/src/backend/parser/parse_collate.c @@ -29,7 +29,7 @@ * at runtime. If we knew exactly which functions require collation * information, we could throw those errors at parse time instead. * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * diff --git a/src/backend/parser/parse_cte.c b/src/backend/parser/parse_cte.c index 1fca7485ca3ba..f4f7041ead09d 100644 --- a/src/backend/parser/parse_cte.c +++ b/src/backend/parser/parse_cte.c @@ -3,7 +3,7 @@ * parse_cte.c * handle CTEs (common table expressions) in parser * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * @@ -18,9 +18,13 @@ #include "catalog/pg_type.h" #include "nodes/nodeFuncs.h" #include "parser/analyze.h" +#include "parser/parse_coerce.h" +#include "parser/parse_collate.h" #include "parser/parse_cte.h" +#include "parser/parse_expr.h" #include "utils/builtins.h" #include "utils/lsyscache.h" +#include "utils/typcache.h" /* Enumeration of contexts in which a self-reference is disallowed */ @@ -334,6 +338,195 @@ analyzeCTE(ParseState *pstate, CommonTableExpr *cte) if (lctyp != NULL || lctypmod != NULL || lccoll != NULL) /* shouldn't happen */ elog(ERROR, "wrong number of output columns in WITH"); } + + if (cte->search_clause || cte->cycle_clause) + { + Query *ctequery; + SetOperationStmt *sos; + + if (!cte->cterecursive) + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("WITH query is not recursive"), + parser_errposition(pstate, cte->location))); + + /* + * SQL requires a WITH list element (CTE) to be "expandable" in order + * to allow a search or cycle clause. That is a stronger requirement + * than just being recursive. It basically means the query expression + * looks like + * + * non-recursive query UNION [ALL] recursive query + * + * and that the recursive query is not itself a set operation. + * + * As of this writing, most of these criteria are already satisfied by + * all recursive CTEs allowed by PostgreSQL. In the future, if + * further variants recursive CTEs are accepted, there might be + * further checks required here to determine what is "expandable". + */ + + ctequery = castNode(Query, cte->ctequery); + Assert(ctequery->setOperations); + sos = castNode(SetOperationStmt, ctequery->setOperations); + + /* + * This left side check is not required for expandability, but + * rewriteSearchAndCycle() doesn't currently have support for it, so + * we catch it here. + */ + if (!IsA(sos->larg, RangeTblRef)) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("with a SEARCH or CYCLE clause, the left side of the UNION must be a SELECT"))); + + if (!IsA(sos->rarg, RangeTblRef)) + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("with a SEARCH or CYCLE clause, the right side of the UNION must be a SELECT"))); + } + + if (cte->search_clause) + { + ListCell *lc; + List *seen = NIL; + + foreach(lc, cte->search_clause->search_col_list) + { + Value *colname = lfirst(lc); + + if (!list_member(cte->ctecolnames, colname)) + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("search column \"%s\" not in WITH query column list", + strVal(colname)), + parser_errposition(pstate, cte->search_clause->location))); + + if (list_member(seen, colname)) + ereport(ERROR, + (errcode(ERRCODE_DUPLICATE_COLUMN), + errmsg("search column \"%s\" specified more than once", + strVal(colname)), + parser_errposition(pstate, cte->search_clause->location))); + seen = lappend(seen, colname); + } + + if (list_member(cte->ctecolnames, makeString(cte->search_clause->search_seq_column))) + ereport(ERROR, + errcode(ERRCODE_SYNTAX_ERROR), + errmsg("search sequence column name \"%s\" already used in WITH query column list", + cte->search_clause->search_seq_column), + parser_errposition(pstate, cte->search_clause->location)); + } + + if (cte->cycle_clause) + { + ListCell *lc; + List *seen = NIL; + TypeCacheEntry *typentry; + Oid op; + + foreach(lc, cte->cycle_clause->cycle_col_list) + { + Value *colname = lfirst(lc); + + if (!list_member(cte->ctecolnames, colname)) + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("cycle column \"%s\" not in WITH query column list", + strVal(colname)), + parser_errposition(pstate, cte->cycle_clause->location))); + + if (list_member(seen, colname)) + ereport(ERROR, + (errcode(ERRCODE_DUPLICATE_COLUMN), + errmsg("cycle column \"%s\" specified more than once", + strVal(colname)), + parser_errposition(pstate, cte->cycle_clause->location))); + seen = lappend(seen, colname); + } + + if (list_member(cte->ctecolnames, makeString(cte->cycle_clause->cycle_mark_column))) + ereport(ERROR, + errcode(ERRCODE_SYNTAX_ERROR), + errmsg("cycle mark column name \"%s\" already used in WITH query column list", + cte->cycle_clause->cycle_mark_column), + parser_errposition(pstate, cte->cycle_clause->location)); + + cte->cycle_clause->cycle_mark_value = transformExpr(pstate, cte->cycle_clause->cycle_mark_value, + EXPR_KIND_CYCLE_MARK); + cte->cycle_clause->cycle_mark_default = transformExpr(pstate, cte->cycle_clause->cycle_mark_default, + EXPR_KIND_CYCLE_MARK); + + if (list_member(cte->ctecolnames, makeString(cte->cycle_clause->cycle_path_column))) + ereport(ERROR, + errcode(ERRCODE_SYNTAX_ERROR), + errmsg("cycle path column name \"%s\" already used in WITH query column list", + cte->cycle_clause->cycle_path_column), + parser_errposition(pstate, cte->cycle_clause->location)); + + if (strcmp(cte->cycle_clause->cycle_mark_column, + cte->cycle_clause->cycle_path_column) == 0) + ereport(ERROR, + errcode(ERRCODE_SYNTAX_ERROR), + errmsg("cycle mark column name and cycle path column name are the same"), + parser_errposition(pstate, cte->cycle_clause->location)); + + cte->cycle_clause->cycle_mark_type = select_common_type(pstate, + list_make2(cte->cycle_clause->cycle_mark_value, + cte->cycle_clause->cycle_mark_default), + "CYCLE", NULL); + cte->cycle_clause->cycle_mark_value = coerce_to_common_type(pstate, + cte->cycle_clause->cycle_mark_value, + cte->cycle_clause->cycle_mark_type, + "CYCLE/SET/TO"); + cte->cycle_clause->cycle_mark_default = coerce_to_common_type(pstate, + cte->cycle_clause->cycle_mark_default, + cte->cycle_clause->cycle_mark_type, + "CYCLE/SET/DEFAULT"); + + cte->cycle_clause->cycle_mark_typmod = select_common_typmod(pstate, + list_make2(cte->cycle_clause->cycle_mark_value, + cte->cycle_clause->cycle_mark_default), + cte->cycle_clause->cycle_mark_type); + + cte->cycle_clause->cycle_mark_collation = select_common_collation(pstate, + list_make2(cte->cycle_clause->cycle_mark_value, + cte->cycle_clause->cycle_mark_default), + true); + + typentry = lookup_type_cache(cte->cycle_clause->cycle_mark_type, TYPECACHE_EQ_OPR); + if (!typentry->eq_opr) + ereport(ERROR, + errcode(ERRCODE_UNDEFINED_FUNCTION), + errmsg("could not identify an equality operator for type %s", + format_type_be(cte->cycle_clause->cycle_mark_type))); + op = get_negator(typentry->eq_opr); + if (!op) + ereport(ERROR, + errcode(ERRCODE_UNDEFINED_FUNCTION), + errmsg("could not identify an inequality operator for type %s", + format_type_be(cte->cycle_clause->cycle_mark_type))); + + cte->cycle_clause->cycle_mark_neop = op; + } + + if (cte->search_clause && cte->cycle_clause) + { + if (strcmp(cte->search_clause->search_seq_column, + cte->cycle_clause->cycle_mark_column) == 0) + ereport(ERROR, + errcode(ERRCODE_SYNTAX_ERROR), + errmsg("search sequence column name and cycle mark column name are the same"), + parser_errposition(pstate, cte->search_clause->location)); + + if (strcmp(cte->search_clause->search_seq_column, + cte->cycle_clause->cycle_path_column) == 0) + ereport(ERROR, + errcode(ERRCODE_SYNTAX_ERROR), + errmsg("search_sequence column name and cycle path column name are the same"), + parser_errposition(pstate, cte->search_clause->location)); + } } /* diff --git a/src/backend/parser/parse_enr.c b/src/backend/parser/parse_enr.c index 625ded0707a85..8a4071a819a98 100644 --- a/src/backend/parser/parse_enr.c +++ b/src/backend/parser/parse_enr.c @@ -3,7 +3,7 @@ * parse_enr.c * parser support routines dealing with ephemeral named relations * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * diff --git a/src/backend/parser/parse_expr.c b/src/backend/parser/parse_expr.c index ffc96e2a6faff..6c87783b2c788 100644 --- a/src/backend/parser/parse_expr.c +++ b/src/backend/parser/parse_expr.c @@ -3,7 +3,7 @@ * parse_expr.c * handle expressions in parser * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * @@ -507,6 +507,7 @@ transformColumnRef(ParseState *pstate, ColumnRef *cref) case EXPR_KIND_CALL_ARGUMENT: case EXPR_KIND_COPY_WHERE: case EXPR_KIND_GENERATED_COLUMN: + case EXPR_KIND_CYCLE_MARK: /* okay */ break; @@ -1723,6 +1724,7 @@ transformSubLink(ParseState *pstate, SubLink *sublink) case EXPR_KIND_RETURNING: case EXPR_KIND_VALUES: case EXPR_KIND_VALUES_SINGLE: + case EXPR_KIND_CYCLE_MARK: /* okay */ break; case EXPR_KIND_CHECK_CONSTRAINT: @@ -3044,6 +3046,8 @@ ParseExprKindName(ParseExprKind exprKind) return "WHERE"; case EXPR_KIND_GENERATED_COLUMN: return "GENERATED AS"; + case EXPR_KIND_CYCLE_MARK: + return "CYCLE"; /* * There is intentionally no default: case here, so that the diff --git a/src/backend/parser/parse_func.c b/src/backend/parser/parse_func.c index 23ac2a2fe6533..37cebc7d829cc 100644 --- a/src/backend/parser/parse_func.c +++ b/src/backend/parser/parse_func.c @@ -3,7 +3,7 @@ * parse_func.c * handle function calls in parser * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * @@ -2527,6 +2527,9 @@ check_srf_call_placement(ParseState *pstate, Node *last_srf, int location) case EXPR_KIND_GENERATED_COLUMN: err = _("set-returning functions are not allowed in column generation expressions"); break; + case EXPR_KIND_CYCLE_MARK: + errkind = true; + break; /* * There is intentionally no default: case here, so that the diff --git a/src/backend/parser/parse_node.c b/src/backend/parser/parse_node.c index 3e20dfff2e250..17c900da31b35 100644 --- a/src/backend/parser/parse_node.c +++ b/src/backend/parser/parse_node.c @@ -3,7 +3,7 @@ * parse_node.c * various routines that make nodes for querytrees * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * diff --git a/src/backend/parser/parse_oper.c b/src/backend/parser/parse_oper.c index e72d3676f16ed..24013bcac9c61 100644 --- a/src/backend/parser/parse_oper.c +++ b/src/backend/parser/parse_oper.c @@ -3,7 +3,7 @@ * parse_oper.c * handle operator things for parser * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * diff --git a/src/backend/parser/parse_param.c b/src/backend/parser/parse_param.c index 93c9d82d017d5..68a5534393968 100644 --- a/src/backend/parser/parse_param.c +++ b/src/backend/parser/parse_param.c @@ -12,7 +12,7 @@ * Note that other approaches to parameters are possible using the parser * hooks defined in ParseState. * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * diff --git a/src/backend/parser/parse_relation.c b/src/backend/parser/parse_relation.c index a56bd86181a70..43db4e9af8bf6 100644 --- a/src/backend/parser/parse_relation.c +++ b/src/backend/parser/parse_relation.c @@ -3,7 +3,7 @@ * parse_relation.c * parser support routines dealing with relations * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * @@ -2235,6 +2235,8 @@ addRangeTableEntryForCTE(ParseState *pstate, int numaliases; int varattno; ListCell *lc; + int n_dontexpand_columns = 0; + ParseNamespaceItem *psi; Assert(pstate != NULL); @@ -2267,9 +2269,9 @@ addRangeTableEntryForCTE(ParseState *pstate, parser_errposition(pstate, rv->location))); } - rte->coltypes = cte->ctecoltypes; - rte->coltypmods = cte->ctecoltypmods; - rte->colcollations = cte->ctecolcollations; + rte->coltypes = list_copy(cte->ctecoltypes); + rte->coltypmods = list_copy(cte->ctecoltypmods); + rte->colcollations = list_copy(cte->ctecolcollations); rte->alias = alias; if (alias) @@ -2294,6 +2296,34 @@ addRangeTableEntryForCTE(ParseState *pstate, rte->eref = eref; + if (cte->search_clause) + { + rte->eref->colnames = lappend(rte->eref->colnames, makeString(cte->search_clause->search_seq_column)); + if (cte->search_clause->search_breadth_first) + rte->coltypes = lappend_oid(rte->coltypes, RECORDOID); + else + rte->coltypes = lappend_oid(rte->coltypes, RECORDARRAYOID); + rte->coltypmods = lappend_int(rte->coltypmods, -1); + rte->colcollations = lappend_oid(rte->colcollations, InvalidOid); + + n_dontexpand_columns += 1; + } + + if (cte->cycle_clause) + { + rte->eref->colnames = lappend(rte->eref->colnames, makeString(cte->cycle_clause->cycle_mark_column)); + rte->coltypes = lappend_oid(rte->coltypes, cte->cycle_clause->cycle_mark_type); + rte->coltypmods = lappend_int(rte->coltypmods, cte->cycle_clause->cycle_mark_typmod); + rte->colcollations = lappend_oid(rte->colcollations, cte->cycle_clause->cycle_mark_collation); + + rte->eref->colnames = lappend(rte->eref->colnames, makeString(cte->cycle_clause->cycle_path_column)); + rte->coltypes = lappend_oid(rte->coltypes, RECORDARRAYOID); + rte->coltypmods = lappend_int(rte->coltypmods, -1); + rte->colcollations = lappend_oid(rte->colcollations, InvalidOid); + + n_dontexpand_columns += 2; + } + /* * Set flags and access permissions. * @@ -2321,9 +2351,19 @@ addRangeTableEntryForCTE(ParseState *pstate, * Build a ParseNamespaceItem, but don't add it to the pstate's namespace * list --- caller must do that if appropriate. */ - return buildNSItemFromLists(rte, list_length(pstate->p_rtable), + psi = buildNSItemFromLists(rte, list_length(pstate->p_rtable), rte->coltypes, rte->coltypmods, rte->colcollations); + + /* + * The columns added by search and cycle clauses are not included in star + * expansion in queries contained in the CTE. + */ + if (rte->ctelevelsup > 0) + for (int i = 0; i < n_dontexpand_columns; i++) + psi->p_nscolumns[list_length(psi->p_rte->eref->colnames) - 1 - i].p_dontexpand = true; + + return psi; } /* @@ -3008,7 +3048,11 @@ expandNSItemVars(ParseNamespaceItem *nsitem, const char *colname = strVal(colnameval); ParseNamespaceColumn *nscol = nsitem->p_nscolumns + colindex; - if (colname[0]) + if (nscol->p_dontexpand) + { + /* skip */ + } + else if (colname[0]) { Var *var; diff --git a/src/backend/parser/parse_target.c b/src/backend/parser/parse_target.c index 3dda8e2847db5..51ecc16c42efe 100644 --- a/src/backend/parser/parse_target.c +++ b/src/backend/parser/parse_target.c @@ -3,7 +3,7 @@ * parse_target.c * handle target lists * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * @@ -34,17 +34,6 @@ static void markTargetListOrigin(ParseState *pstate, TargetEntry *tle, Var *var, int levelsup); -static Node *transformAssignmentIndirection(ParseState *pstate, - Node *basenode, - const char *targetName, - bool targetIsSubscripting, - Oid targetTypeId, - int32 targetTypMod, - Oid targetCollation, - List *indirection, - ListCell *indirection_cell, - Node *rhs, - int location); static Node *transformAssignmentSubscripts(ParseState *pstate, Node *basenode, const char *targetName, @@ -56,6 +45,7 @@ static Node *transformAssignmentSubscripts(ParseState *pstate, List *indirection, ListCell *next_indirection, Node *rhs, + CoercionContext ccontext, int location); static List *ExpandColumnRefStar(ParseState *pstate, ColumnRef *cref, bool make_target_entry); @@ -409,8 +399,23 @@ markTargetListOrigin(ParseState *pstate, TargetEntry *tle, { CommonTableExpr *cte = GetCTEForRTE(pstate, rte, netlevelsup); TargetEntry *ste; + List *tl = GetCTETargetList(cte); + int extra_cols = 0; + + /* + * RTE for CTE will already have the search and cycle columns + * added, but the subquery won't, so skip looking those up. + */ + if (cte->search_clause) + extra_cols += 1; + if (cte->cycle_clause) + extra_cols += 2; + if (extra_cols && + attnum > list_length(tl) && + attnum <= list_length(tl) + extra_cols) + break; - ste = get_tle_by_resno(GetCTETargetList(cte), attnum); + ste = get_tle_by_resno(tl, attnum); if (ste == NULL || ste->resjunk) elog(ERROR, "CTE %s does not have attribute %d", rte->eref->aliasname, attnum); @@ -561,6 +566,7 @@ transformAssignedExpr(ParseState *pstate, indirection, list_head(indirection), (Node *) expr, + COERCION_ASSIGNMENT, location); } else @@ -642,15 +648,15 @@ updateTargetListEntry(ParseState *pstate, /* * Process indirection (field selection or subscripting) of the target - * column in INSERT/UPDATE. This routine recurses for multiple levels - * of indirection --- but note that several adjacent A_Indices nodes in - * the indirection list are treated as a single multidimensional subscript + * column in INSERT/UPDATE/assignment. This routine recurses for multiple + * levels of indirection --- but note that several adjacent A_Indices nodes + * in the indirection list are treated as a single multidimensional subscript * operation. * * In the initial call, basenode is a Var for the target column in UPDATE, - * or a null Const of the target's type in INSERT. In recursive calls, - * basenode is NULL, indicating that a substitute node should be consed up if - * needed. + * or a null Const of the target's type in INSERT, or a Param for the target + * variable in PL/pgSQL assignment. In recursive calls, basenode is NULL, + * indicating that a substitute node should be consed up if needed. * * targetName is the name of the field or subfield we're assigning to, and * targetIsSubscripting is true if we're subscripting it. These are just for @@ -667,12 +673,16 @@ updateTargetListEntry(ParseState *pstate, * rhs is the already-transformed value to be assigned; note it has not been * coerced to any particular type. * + * ccontext is the coercion level to use while coercing the rhs. For + * normal statements it'll be COERCION_ASSIGNMENT, but PL/pgSQL uses + * a special value. + * * location is the cursor error position for any errors. (Note: this points * to the head of the target clause, eg "foo" in "foo.bar[baz]". Later we * might want to decorate indirection cells with their own location info, * in which case the location argument could probably be dropped.) */ -static Node * +Node * transformAssignmentIndirection(ParseState *pstate, Node *basenode, const char *targetName, @@ -683,6 +693,7 @@ transformAssignmentIndirection(ParseState *pstate, List *indirection, ListCell *indirection_cell, Node *rhs, + CoercionContext ccontext, int location) { Node *result; @@ -757,6 +768,7 @@ transformAssignmentIndirection(ParseState *pstate, indirection, i, rhs, + ccontext, location); } @@ -807,6 +819,7 @@ transformAssignmentIndirection(ParseState *pstate, indirection, lnext(indirection, i), rhs, + ccontext, location); /* and build a FieldStore node */ @@ -845,6 +858,7 @@ transformAssignmentIndirection(ParseState *pstate, indirection, NULL, rhs, + ccontext, location); } @@ -853,7 +867,7 @@ transformAssignmentIndirection(ParseState *pstate, result = coerce_to_target_type(pstate, rhs, exprType(rhs), targetTypeId, targetTypMod, - COERCION_ASSIGNMENT, + ccontext, COERCE_IMPLICIT_CAST, -1); if (result == NULL) @@ -898,6 +912,7 @@ transformAssignmentSubscripts(ParseState *pstate, List *indirection, ListCell *next_indirection, Node *rhs, + CoercionContext ccontext, int location) { Node *result; @@ -949,6 +964,7 @@ transformAssignmentSubscripts(ParseState *pstate, indirection, next_indirection, rhs, + ccontext, location); /* @@ -969,7 +985,7 @@ transformAssignmentSubscripts(ParseState *pstate, result = coerce_to_target_type(pstate, result, resulttype, targetTypeId, targetTypMod, - COERCION_ASSIGNMENT, + ccontext, COERCE_IMPLICIT_CAST, -1); /* can fail if we had int2vector/oidvector, but not for true domains */ diff --git a/src/backend/parser/parse_type.c b/src/backend/parser/parse_type.c index 2709f6f9c7953..abe131ebebfc2 100644 --- a/src/backend/parser/parse_type.c +++ b/src/backend/parser/parse_type.c @@ -3,7 +3,7 @@ * parse_type.c * handle type operations for parser * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * @@ -719,13 +719,6 @@ pts_error_callback(void *arg) const char *str = (const char *) arg; errcontext("invalid type name \"%s\"", str); - - /* - * Currently we just suppress any syntax error position report, rather - * than transforming to an "internal query" error. It's unlikely that a - * type name is complex enough to need positioning. - */ - errposition(0); } /* @@ -737,11 +730,7 @@ pts_error_callback(void *arg) TypeName * typeStringToTypeName(const char *str) { - StringInfoData buf; List *raw_parsetree_list; - SelectStmt *stmt; - ResTarget *restarget; - TypeCast *typecast; TypeName *typeName; ErrorContextCallback ptserrcontext; @@ -749,9 +738,6 @@ typeStringToTypeName(const char *str) if (strspn(str, " \t\n\r\f") == strlen(str)) goto fail; - initStringInfo(&buf); - appendStringInfo(&buf, "SELECT NULL::%s", str); - /* * Setup error traceback support in case of ereport() during parse */ @@ -760,58 +746,18 @@ typeStringToTypeName(const char *str) ptserrcontext.previous = error_context_stack; error_context_stack = &ptserrcontext; - raw_parsetree_list = raw_parser(buf.data); + raw_parsetree_list = raw_parser(str, RAW_PARSE_TYPE_NAME); error_context_stack = ptserrcontext.previous; - /* - * Make sure we got back exactly what we expected and no more; paranoia is - * justified since the string might contain anything. - */ - if (list_length(raw_parsetree_list) != 1) - goto fail; - stmt = (SelectStmt *) linitial_node(RawStmt, raw_parsetree_list)->stmt; - if (stmt == NULL || - !IsA(stmt, SelectStmt) || - stmt->distinctClause != NIL || - stmt->intoClause != NULL || - stmt->fromClause != NIL || - stmt->whereClause != NULL || - stmt->groupClause != NIL || - stmt->havingClause != NULL || - stmt->windowClause != NIL || - stmt->valuesLists != NIL || - stmt->sortClause != NIL || - stmt->limitOffset != NULL || - stmt->limitCount != NULL || - stmt->lockingClause != NIL || - stmt->withClause != NULL || - stmt->op != SETOP_NONE) - goto fail; - if (list_length(stmt->targetList) != 1) - goto fail; - restarget = (ResTarget *) linitial(stmt->targetList); - if (restarget == NULL || - !IsA(restarget, ResTarget) || - restarget->name != NULL || - restarget->indirection != NIL) - goto fail; - typecast = (TypeCast *) restarget->val; - if (typecast == NULL || - !IsA(typecast, TypeCast) || - typecast->arg == NULL || - !IsA(typecast->arg, A_Const)) - goto fail; + /* We should get back exactly one TypeName node. */ + Assert(list_length(raw_parsetree_list) == 1); + typeName = linitial_node(TypeName, raw_parsetree_list); - typeName = typecast->typeName; - if (typeName == NULL || - !IsA(typeName, TypeName)) - goto fail; + /* The grammar allows SETOF in TypeName, but we don't want that here. */ if (typeName->setof) goto fail; - pfree(buf.data); - return typeName; fail: diff --git a/src/backend/parser/parse_utilcmd.c b/src/backend/parser/parse_utilcmd.c index 89ee990599121..b31f3afa0391a 100644 --- a/src/backend/parser/parse_utilcmd.c +++ b/src/backend/parser/parse_utilcmd.c @@ -16,7 +16,7 @@ * a quick copyObject() call before manipulating the query tree. * * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * src/backend/parser/parse_utilcmd.c diff --git a/src/backend/parser/parser.c b/src/backend/parser/parser.c index be86eb37feff8..875de7ba28fe0 100644 --- a/src/backend/parser/parser.c +++ b/src/backend/parser/parser.c @@ -10,7 +10,7 @@ * analyze.c and related files. * * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION @@ -35,11 +35,11 @@ static char *str_udeescape(const char *str, char escape, * raw_parser * Given a query in string form, do lexical and grammatical analysis. * - * Returns a list of raw (un-analyzed) parse trees. The immediate elements - * of the list are always RawStmt nodes. + * Returns a list of raw (un-analyzed) parse trees. The contents of the + * list have the form required by the specified RawParseMode. */ List * -raw_parser(const char *str) +raw_parser(const char *str, RawParseMode mode) { core_yyscan_t yyscanner; base_yy_extra_type yyextra; @@ -49,8 +49,26 @@ raw_parser(const char *str) yyscanner = scanner_init(str, &yyextra.core_yy_extra, &ScanKeywords, ScanKeywordTokens); - /* base_yylex() only needs this much initialization */ - yyextra.have_lookahead = false; + /* base_yylex() only needs us to initialize the lookahead token, if any */ + if (mode == RAW_PARSE_DEFAULT) + yyextra.have_lookahead = false; + else + { + /* this array is indexed by RawParseMode enum */ + static const int mode_token[] = { + 0, /* RAW_PARSE_DEFAULT */ + MODE_TYPE_NAME, /* RAW_PARSE_TYPE_NAME */ + MODE_PLPGSQL_EXPR, /* RAW_PARSE_PLPGSQL_EXPR */ + MODE_PLPGSQL_ASSIGN1, /* RAW_PARSE_PLPGSQL_ASSIGN1 */ + MODE_PLPGSQL_ASSIGN2, /* RAW_PARSE_PLPGSQL_ASSIGN2 */ + MODE_PLPGSQL_ASSIGN3 /* RAW_PARSE_PLPGSQL_ASSIGN3 */ + }; + + yyextra.have_lookahead = true; + yyextra.lookahead_token = mode_token[mode]; + yyextra.lookahead_yylloc = 0; + yyextra.lookahead_end = NULL; + } /* initialize the bison parser */ parser_init(&yyextra); @@ -104,7 +122,8 @@ base_yylex(YYSTYPE *lvalp, YYLTYPE *llocp, core_yyscan_t yyscanner) cur_token = yyextra->lookahead_token; lvalp->core_yystype = yyextra->lookahead_yylval; *llocp = yyextra->lookahead_yylloc; - *(yyextra->lookahead_end) = yyextra->lookahead_hold_char; + if (yyextra->lookahead_end) + *(yyextra->lookahead_end) = yyextra->lookahead_hold_char; yyextra->have_lookahead = false; } else diff --git a/src/backend/parser/scan.l b/src/backend/parser/scan.l index 4eab2980c9908..9f9d8a17061d1 100644 --- a/src/backend/parser/scan.l +++ b/src/backend/parser/scan.l @@ -22,7 +22,7 @@ * Postgres 9.2, this check is made automatically by the Makefile.) * * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION diff --git a/src/backend/parser/scansup.c b/src/backend/parser/scansup.c index d07cbafcee757..f55caccddfda4 100644 --- a/src/backend/parser/scansup.c +++ b/src/backend/parser/scansup.c @@ -3,7 +3,7 @@ * scansup.c * scanner support routines used by the core lexer * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * diff --git a/src/backend/partitioning/partbounds.c b/src/backend/partitioning/partbounds.c index 299f5deb3290a..0c3f212ff21e6 100644 --- a/src/backend/partitioning/partbounds.c +++ b/src/backend/partitioning/partbounds.c @@ -3,7 +3,7 @@ * partbounds.c * Support routines for manipulating partition bounds * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION @@ -224,7 +224,6 @@ static int partition_range_bsearch(int partnatts, FmgrInfo *partsupfunc, Oid *partcollation, PartitionBoundInfo boundinfo, PartitionRangeBound *probe, int32 *cmpval); -static int get_partition_bound_num_indexes(PartitionBoundInfo b); static Expr *make_partition_op_expr(PartitionKey key, int keynum, uint16 strategy, Expr *arg1, Expr *arg2); static Oid get_partition_operator(PartitionKey key, int col, @@ -398,6 +397,7 @@ create_hash_bounds(PartitionBoundSpec **boundspecs, int nparts, boundinfo->ndatums = ndatums; boundinfo->datums = (Datum **) palloc0(ndatums * sizeof(Datum *)); + boundinfo->nindexes = greatest_modulus; boundinfo->indexes = (int *) palloc(greatest_modulus * sizeof(int)); for (i = 0; i < greatest_modulus; i++) boundinfo->indexes[i] = -1; @@ -530,6 +530,7 @@ create_list_bounds(PartitionBoundSpec **boundspecs, int nparts, boundinfo->ndatums = ndatums; boundinfo->datums = (Datum **) palloc0(ndatums * sizeof(Datum *)); + boundinfo->nindexes = ndatums; boundinfo->indexes = (int *) palloc(ndatums * sizeof(int)); /* @@ -725,8 +726,9 @@ create_range_bounds(PartitionBoundSpec **boundspecs, int nparts, /* * For range partitioning, an additional value of -1 is stored as the last - * element. + * element of the indexes[] array. */ + boundinfo->nindexes = ndatums + 1; boundinfo->indexes = (int *) palloc((ndatums + 1) * sizeof(int)); for (i = 0; i < ndatums; i++) @@ -807,45 +809,41 @@ partition_bounds_equal(int partnatts, int16 *parttyplen, bool *parttypbyval, if (b1->ndatums != b2->ndatums) return false; + if (b1->nindexes != b2->nindexes) + return false; + if (b1->null_index != b2->null_index) return false; if (b1->default_index != b2->default_index) return false; - if (b1->strategy == PARTITION_STRATEGY_HASH) + /* For all partition strategies, the indexes[] arrays have to match */ + for (i = 0; i < b1->nindexes; i++) { - int greatest_modulus = get_hash_partition_greatest_modulus(b1); - - /* - * If two hash partitioned tables have different greatest moduli, - * their partition schemes don't match. - */ - if (greatest_modulus != get_hash_partition_greatest_modulus(b2)) + if (b1->indexes[i] != b2->indexes[i]) return false; + } + /* Finally, compare the datums[] arrays */ + if (b1->strategy == PARTITION_STRATEGY_HASH) + { /* * We arrange the partitions in the ascending order of their moduli * and remainders. Also every modulus is factor of next larger * modulus. Therefore we can safely store index of a given partition * in indexes array at remainder of that partition. Also entries at * (remainder + N * modulus) positions in indexes array are all same - * for (modulus, remainder) specification for any partition. Thus - * datums array from both the given bounds are same, if and only if - * their indexes array will be same. So, it suffices to compare - * indexes array. - */ - for (i = 0; i < greatest_modulus; i++) - if (b1->indexes[i] != b2->indexes[i]) - return false; - -#ifdef USE_ASSERT_CHECKING - - /* - * Nonetheless make sure that the bounds are indeed same when the + * for (modulus, remainder) specification for any partition. Thus the + * datums arrays from the given bounds are the same, if and only if + * their indexes arrays are the same. So, it suffices to compare the + * indexes arrays. + * + * Nonetheless make sure that the bounds are indeed the same when the * indexes match. Hash partition bound stores modulus and remainder * at b1->datums[i][0] and b1->datums[i][1] position respectively. */ +#ifdef USE_ASSERT_CHECKING for (i = 0; i < b1->ndatums; i++) Assert((b1->datums[i][0] == b2->datums[i][0] && b1->datums[i][1] == b2->datums[i][1])); @@ -891,15 +889,7 @@ partition_bounds_equal(int partnatts, int16 *parttyplen, bool *parttypbyval, parttypbyval[j], parttyplen[j])) return false; } - - if (b1->indexes[i] != b2->indexes[i]) - return false; } - - /* There are ndatums+1 indexes in case of range partitions */ - if (b1->strategy == PARTITION_STRATEGY_RANGE && - b1->indexes[i] != b2->indexes[i]) - return false; } return true; } @@ -920,8 +910,8 @@ partition_bounds_copy(PartitionBoundInfo src, PartitionBoundInfo dest; int i; int ndatums; + int nindexes; int partnatts; - int num_indexes; bool hash_part; int natts; @@ -929,10 +919,9 @@ partition_bounds_copy(PartitionBoundInfo src, dest->strategy = src->strategy; ndatums = dest->ndatums = src->ndatums; + nindexes = dest->nindexes = src->nindexes; partnatts = key->partnatts; - num_indexes = get_partition_bound_num_indexes(src); - /* List partitioned tables have only a single partition key. */ Assert(key->strategy != PARTITION_STRATEGY_LIST || partnatts == 1); @@ -990,8 +979,8 @@ partition_bounds_copy(PartitionBoundInfo src, } } - dest->indexes = (int *) palloc(sizeof(int) * num_indexes); - memcpy(dest->indexes, src->indexes, sizeof(int) * num_indexes); + dest->indexes = (int *) palloc(sizeof(int) * nindexes); + memcpy(dest->indexes, src->indexes, sizeof(int) * nindexes); dest->null_index = src->null_index; dest->default_index = src->default_index; @@ -2456,6 +2445,7 @@ build_merged_partition_bounds(char strategy, List *merged_datums, } Assert(list_length(merged_indexes) == ndatums); + merged_bounds->nindexes = ndatums; merged_bounds->indexes = (int *) palloc(sizeof(int) * ndatums); pos = 0; foreach(lc, merged_indexes) @@ -2889,7 +2879,7 @@ check_new_partition_bound(char *relname, Relation parent, (errcode(ERRCODE_INVALID_OBJECT_DEFINITION), errmsg("every hash partition modulus must be a factor of the next larger modulus"))); - greatest_modulus = get_hash_partition_greatest_modulus(boundinfo); + greatest_modulus = boundinfo->nindexes; remainder = spec->remainder; /* @@ -3282,18 +3272,15 @@ check_default_partition_contents(Relation parent, Relation default_rel, /* * get_hash_partition_greatest_modulus * - * Returns the greatest modulus of the hash partition bound. The greatest - * modulus will be at the end of the datums array because hash partitions are - * arranged in the ascending order of their moduli and remainders. + * Returns the greatest modulus of the hash partition bound. + * This is no longer used in the core code, but we keep it around + * in case external modules are using it. */ int get_hash_partition_greatest_modulus(PartitionBoundInfo bound) { Assert(bound && bound->strategy == PARTITION_STRATEGY_HASH); - Assert(bound->datums && bound->ndatums > 0); - Assert(DatumGetInt32(bound->datums[bound->ndatums - 1][0]) > 0); - - return DatumGetInt32(bound->datums[bound->ndatums - 1][0]); + return bound->nindexes; } /* @@ -3697,46 +3684,6 @@ qsort_partition_rbound_cmp(const void *a, const void *b, void *arg) b1, b2); } -/* - * get_partition_bound_num_indexes - * - * Returns the number of the entries in the partition bound indexes array. - */ -static int -get_partition_bound_num_indexes(PartitionBoundInfo bound) -{ - int num_indexes; - - Assert(bound); - - switch (bound->strategy) - { - case PARTITION_STRATEGY_HASH: - - /* - * The number of the entries in the indexes array is same as the - * greatest modulus. - */ - num_indexes = get_hash_partition_greatest_modulus(bound); - break; - - case PARTITION_STRATEGY_LIST: - num_indexes = bound->ndatums; - break; - - case PARTITION_STRATEGY_RANGE: - /* Range partitioned table has an extra index. */ - num_indexes = bound->ndatums + 1; - break; - - default: - elog(ERROR, "unexpected partition strategy: %d", - (int) bound->strategy); - } - - return num_indexes; -} - /* * get_partition_operator * diff --git a/src/backend/partitioning/partdesc.c b/src/backend/partitioning/partdesc.c index 5b0a15ac0b782..f852b6e99de86 100644 --- a/src/backend/partitioning/partdesc.c +++ b/src/backend/partitioning/partdesc.c @@ -3,7 +3,7 @@ * partdesc.c * Support routines for manipulating partition descriptors * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION diff --git a/src/backend/partitioning/partprune.c b/src/backend/partitioning/partprune.c index e7c7a6deb6a54..d08739127b9cf 100644 --- a/src/backend/partitioning/partprune.c +++ b/src/backend/partitioning/partprune.c @@ -25,7 +25,7 @@ * * See gen_partprune_steps_internal() for more details on step generation. * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION @@ -138,10 +138,12 @@ typedef struct PruneStepResult } PruneStepResult; +static List *add_part_relids(List *allpartrelids, Bitmapset *partrelids); static List *make_partitionedrel_pruneinfo(PlannerInfo *root, RelOptInfo *parentrel, + List *prunequal, + Bitmapset *partrelids, int *relid_subplan_map, - Relids partrelids, List *prunequal, Bitmapset **matchedsubplans); static void gen_partprune_steps(RelOptInfo *rel, List *clauses, PartClauseTarget target, @@ -213,67 +215,105 @@ static void partkey_datum_from_expr(PartitionPruneContext *context, * * 'parentrel' is the RelOptInfo for an appendrel, and 'subpaths' is the list * of scan paths for its child rels. - * - * 'partitioned_rels' is a List containing Lists of relids of partitioned - * tables (a/k/a non-leaf partitions) that are parents of some of the child - * rels. Here we attempt to populate the PartitionPruneInfo by adding a - * 'prune_infos' item for each sublist in the 'partitioned_rels' list. - * However, some of the sets of partitioned relations may not require any - * run-time pruning. In these cases we'll simply not include a 'prune_infos' - * item for that set and instead we'll add all the subplans which belong to - * that set into the PartitionPruneInfo's 'other_subplans' field. Callers - * will likely never want to prune subplans which are mentioned in this field. - * - * 'prunequal' is a list of potential pruning quals. + * 'prunequal' is a list of potential pruning quals (i.e., restriction + * clauses that are applicable to the appendrel). */ PartitionPruneInfo * make_partition_pruneinfo(PlannerInfo *root, RelOptInfo *parentrel, - List *subpaths, List *partitioned_rels, + List *subpaths, List *prunequal) { PartitionPruneInfo *pruneinfo; Bitmapset *allmatchedsubplans = NULL; + List *allpartrelids; + List *prunerelinfos; int *relid_subplan_map; ListCell *lc; - List *prunerelinfos; int i; /* - * Construct a temporary array to map from planner relids to subplan - * indexes. For convenience, we use 1-based indexes here, so that zero - * can represent an un-filled array entry. + * Scan the subpaths to see which ones are scans of partition child + * relations, and identify their parent partitioned rels. (Note: we must + * restrict the parent partitioned rels to be parentrel or children of + * parentrel, otherwise we couldn't translate prunequal to match.) + * + * Also construct a temporary array to map from partition-child-relation + * relid to the index in 'subpaths' of the scan plan for that partition. + * (Use of "subplan" rather than "subpath" is a bit of a misnomer, but + * we'll let it stand.) For convenience, we use 1-based indexes here, so + * that zero can represent an un-filled array entry. */ + allpartrelids = NIL; relid_subplan_map = palloc0(sizeof(int) * root->simple_rel_array_size); - /* - * relid_subplan_map maps relid of a leaf partition to the index in - * 'subpaths' of the scan plan for that partition. - */ i = 1; foreach(lc, subpaths) { Path *path = (Path *) lfirst(lc); RelOptInfo *pathrel = path->parent; - Assert(IS_SIMPLE_REL(pathrel)); - Assert(pathrel->relid < root->simple_rel_array_size); - /* No duplicates please */ - Assert(relid_subplan_map[pathrel->relid] == 0); + /* We don't consider partitioned joins here */ + if (pathrel->reloptkind == RELOPT_OTHER_MEMBER_REL) + { + RelOptInfo *prel = pathrel; + Bitmapset *partrelids = NULL; - relid_subplan_map[pathrel->relid] = i++; + /* + * Traverse up to the pathrel's topmost partitioned parent, + * collecting parent relids as we go; but stop if we reach + * parentrel. (Normally, a pathrel's topmost partitioned parent + * is either parentrel or a UNION ALL appendrel child of + * parentrel. But when handling partitionwise joins of + * multi-level partitioning trees, we can see an append path whose + * parentrel is an intermediate partitioned table.) + */ + do + { + AppendRelInfo *appinfo; + + Assert(prel->relid < root->simple_rel_array_size); + appinfo = root->append_rel_array[prel->relid]; + prel = find_base_rel(root, appinfo->parent_relid); + if (!IS_PARTITIONED_REL(prel)) + break; /* reached a non-partitioned parent */ + /* accept this level as an interesting parent */ + partrelids = bms_add_member(partrelids, prel->relid); + if (prel == parentrel) + break; /* don't traverse above parentrel */ + } while (prel->reloptkind == RELOPT_OTHER_MEMBER_REL); + + if (partrelids) + { + /* + * Found some relevant parent partitions, which may or may not + * overlap with partition trees we already found. Add new + * information to the allpartrelids list. + */ + allpartrelids = add_part_relids(allpartrelids, partrelids); + /* Also record the subplan in relid_subplan_map[] */ + /* No duplicates please */ + Assert(relid_subplan_map[pathrel->relid] == 0); + relid_subplan_map[pathrel->relid] = i; + } + } + i++; } - /* We now build a PartitionedRelPruneInfo for each partitioned rel. */ + /* + * We now build a PartitionedRelPruneInfo for each topmost partitioned rel + * (omitting any that turn out not to have useful pruning quals). + */ prunerelinfos = NIL; - foreach(lc, partitioned_rels) + foreach(lc, allpartrelids) { - Relids partrelids = (Relids) lfirst(lc); + Bitmapset *partrelids = (Bitmapset *) lfirst(lc); List *pinfolist; Bitmapset *matchedsubplans = NULL; pinfolist = make_partitionedrel_pruneinfo(root, parentrel, + prunequal, + partrelids, relid_subplan_map, - partrelids, prunequal, &matchedsubplans); /* When pruning is possible, record the matched subplans */ @@ -299,7 +339,7 @@ make_partition_pruneinfo(PlannerInfo *root, RelOptInfo *parentrel, pruneinfo->prune_infos = prunerelinfos; /* - * Some subplans may not belong to any of the listed partitioned rels. + * Some subplans may not belong to any of the identified partitioned rels. * This can happen for UNION ALL queries which include a non-partitioned * table, or when some of the hierarchies aren't run-time prunable. Build * a bitmapset of the indexes of all such subplans, so that the executor @@ -321,28 +361,86 @@ make_partition_pruneinfo(PlannerInfo *root, RelOptInfo *parentrel, return pruneinfo; } +/* + * add_part_relids + * Add new info to a list of Bitmapsets of partitioned relids. + * + * Within 'allpartrelids', there is one Bitmapset for each topmost parent + * partitioned rel. Each Bitmapset contains the RT indexes of the topmost + * parent as well as its relevant non-leaf child partitions. Since (by + * construction of the rangetable list) parent partitions must have lower + * RT indexes than their children, we can distinguish the topmost parent + * as being the lowest set bit in the Bitmapset. + * + * 'partrelids' contains the RT indexes of a parent partitioned rel, and + * possibly some non-leaf children, that are newly identified as parents of + * some subpath rel passed to make_partition_pruneinfo(). These are added + * to an appropriate member of 'allpartrelids'. + * + * Note that the list contains only RT indexes of partitioned tables that + * are parents of some scan-level relation appearing in the 'subpaths' that + * make_partition_pruneinfo() is dealing with. Also, "topmost" parents are + * not allowed to be higher than the 'parentrel' associated with the append + * path. In this way, we avoid expending cycles on partitioned rels that + * can't contribute useful pruning information for the problem at hand. + * (It is possible for 'parentrel' to be a child partitioned table, and it + * is also possible for scan-level relations to be child partitioned tables + * rather than leaf partitions. Hence we must construct this relation set + * with reference to the particular append path we're dealing with, rather + * than looking at the full partitioning structure represented in the + * RelOptInfos.) + */ +static List * +add_part_relids(List *allpartrelids, Bitmapset *partrelids) +{ + Index targetpart; + ListCell *lc; + + /* We can easily get the lowest set bit this way: */ + targetpart = bms_next_member(partrelids, -1); + Assert(targetpart > 0); + + /* Look for a matching topmost parent */ + foreach(lc, allpartrelids) + { + Bitmapset *currpartrelids = (Bitmapset *) lfirst(lc); + Index currtarget = bms_next_member(currpartrelids, -1); + + if (targetpart == currtarget) + { + /* Found a match, so add any new RT indexes to this hierarchy */ + currpartrelids = bms_add_members(currpartrelids, partrelids); + lfirst(lc) = currpartrelids; + return allpartrelids; + } + } + /* No match, so add the new partition hierarchy to the list */ + return lappend(allpartrelids, partrelids); +} + /* * make_partitionedrel_pruneinfo - * Build a List of PartitionedRelPruneInfos, one for each partitioned - * rel. These can be used in the executor to allow additional partition - * pruning to take place. - * - * Here we generate partition pruning steps for 'prunequal' and also build a - * data structure which allows mapping of partition indexes into 'subpaths' - * indexes. - * - * If no non-Const expressions are being compared to the partition key in any - * of the 'partitioned_rels', then we return NIL to indicate no run-time - * pruning should be performed. Run-time pruning would be useless since the - * pruning done during planning will have pruned everything that can be. - * - * On non-NIL return, 'matchedsubplans' is set to the subplan indexes which - * were matched to this partition hierarchy. + * Build a List of PartitionedRelPruneInfos, one for each interesting + * partitioned rel in a partitioning hierarchy. These can be used in the + * executor to allow additional partition pruning to take place. + * + * parentrel: rel associated with the appendpath being considered + * prunequal: potential pruning quals, represented for parentrel + * partrelids: Set of RT indexes identifying relevant partitioned tables + * within a single partitioning hierarchy + * relid_subplan_map[]: maps child relation relids to subplan indexes + * matchedsubplans: on success, receives the set of subplan indexes which + * were matched to this partition hierarchy + * + * If we cannot find any useful run-time pruning steps, return NIL. + * However, on success, each rel identified in partrelids will have + * an element in the result list, even if some of them are useless. */ static List * make_partitionedrel_pruneinfo(PlannerInfo *root, RelOptInfo *parentrel, + List *prunequal, + Bitmapset *partrelids, int *relid_subplan_map, - Relids partrelids, List *prunequal, Bitmapset **matchedsubplans) { RelOptInfo *targetpart = NULL; @@ -781,7 +879,10 @@ get_matching_partitions(PartitionPruneContext *context, List *pruning_steps) scan_default = final_result->scan_default; while ((i = bms_next_member(final_result->bound_offsets, i)) >= 0) { - int partindex = context->boundinfo->indexes[i]; + int partindex; + + Assert(i < context->boundinfo->nindexes); + partindex = context->boundinfo->indexes[i]; if (partindex < 0) { @@ -2418,11 +2519,12 @@ get_steps_using_prefix_recurse(GeneratePruningStepsContext *context, */ Assert(list_length(step_exprs) == cur_keyno || !bms_is_empty(step_nullkeys)); + /* * Note also that for hash partitioning, each partition key should * have either equality clauses or an IS NULL clause, so if a - * partition key doesn't have an expression, it would be specified - * in step_nullkeys. + * partition key doesn't have an expression, it would be specified in + * step_nullkeys. */ Assert(context->rel->part_scheme->strategy != PARTITION_STRATEGY_HASH || @@ -2514,20 +2616,19 @@ get_matching_hash_bounds(PartitionPruneContext *context, for (i = 0; i < partnatts; i++) isnull[i] = bms_is_member(i, nullkeys); - greatest_modulus = get_hash_partition_greatest_modulus(boundinfo); rowHash = compute_partition_hash_value(partnatts, partsupfunc, partcollation, values, isnull); + greatest_modulus = boundinfo->nindexes; if (partindices[rowHash % greatest_modulus] >= 0) result->bound_offsets = bms_make_singleton(rowHash % greatest_modulus); } else { - /* Getting here means at least one hash partition exists. */ - Assert(boundinfo->ndatums > 0); + /* Report all valid offsets into the boundinfo->indexes array. */ result->bound_offsets = bms_add_range(NULL, 0, - boundinfo->ndatums - 1); + boundinfo->nindexes - 1); } /* @@ -3388,30 +3489,20 @@ perform_pruning_combine_step(PartitionPruneContext *context, PartitionPruneStepCombine *cstep, PruneStepResult **step_results) { - ListCell *lc1; - PruneStepResult *result = NULL; + PruneStepResult *result = (PruneStepResult *) palloc0(sizeof(PruneStepResult)); bool firststep; + ListCell *lc1; /* * A combine step without any source steps is an indication to not perform * any partition pruning. Return all datum indexes in that case. */ - result = (PruneStepResult *) palloc0(sizeof(PruneStepResult)); - if (list_length(cstep->source_stepids) == 0) + if (cstep->source_stepids == NIL) { PartitionBoundInfo boundinfo = context->boundinfo; - int rangemax; - - /* - * Add all valid offsets into the boundinfo->indexes array. For range - * partitioning, boundinfo->indexes contains (boundinfo->ndatums + 1) - * valid entries; otherwise there are boundinfo->ndatums. - */ - rangemax = context->strategy == PARTITION_STRATEGY_RANGE ? - boundinfo->ndatums : boundinfo->ndatums - 1; result->bound_offsets = - bms_add_range(result->bound_offsets, 0, rangemax); + bms_add_range(NULL, 0, boundinfo->nindexes - 1); result->scan_default = partition_bound_has_default(boundinfo); result->scan_null = partition_bound_accepts_nulls(boundinfo); return result; diff --git a/src/backend/port/atomics.c b/src/backend/port/atomics.c index c4f83706b43bf..f9f8b098a52a0 100644 --- a/src/backend/port/atomics.c +++ b/src/backend/port/atomics.c @@ -3,7 +3,7 @@ * atomics.c * Non-Inline parts of the atomics implementation * - * Portions Copyright (c) 2013-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 2013-2021, PostgreSQL Global Development Group * * * IDENTIFICATION diff --git a/src/backend/port/posix_sema.c b/src/backend/port/posix_sema.c index 277b82ca8037a..114da3b30cbfb 100644 --- a/src/backend/port/posix_sema.c +++ b/src/backend/port/posix_sema.c @@ -15,7 +15,7 @@ * forked backends, but they could not be accessed by exec'd backends. * * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION diff --git a/src/backend/port/sysv_sema.c b/src/backend/port/sysv_sema.c index 88c2862d58b53..21c883ba9acd7 100644 --- a/src/backend/port/sysv_sema.c +++ b/src/backend/port/sysv_sema.c @@ -4,7 +4,7 @@ * Implement PGSemaphores using SysV semaphore facilities * * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION diff --git a/src/backend/port/sysv_shmem.c b/src/backend/port/sysv_shmem.c index 203555822d9a6..0cc83ffc16af5 100644 --- a/src/backend/port/sysv_shmem.c +++ b/src/backend/port/sysv_shmem.c @@ -9,7 +9,7 @@ * exist, though, because mmap'd shmem provides no way to find out how * many processes are attached, which we need for interlocking purposes. * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION diff --git a/src/backend/port/tas/sunstudio_sparc.s b/src/backend/port/tas/sunstudio_sparc.s index 4bebf079de3fe..b13ca7937cd04 100644 --- a/src/backend/port/tas/sunstudio_sparc.s +++ b/src/backend/port/tas/sunstudio_sparc.s @@ -3,7 +3,7 @@ ! sunstudio_sparc.s ! compare and swap for Sun Studio on Sparc ! -! Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group +! Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group ! Portions Copyright (c) 1994, Regents of the University of California ! ! IDENTIFICATION diff --git a/src/backend/port/tas/sunstudio_x86.s b/src/backend/port/tas/sunstudio_x86.s index d95e17384965a..21d6c636412db 100644 --- a/src/backend/port/tas/sunstudio_x86.s +++ b/src/backend/port/tas/sunstudio_x86.s @@ -3,7 +3,7 @@ / sunstudio_x86.s / compare and swap for Sun Studio on x86 / -/ Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group +/ Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group / Portions Copyright (c) 1994, Regents of the University of California / / IDENTIFICATION diff --git a/src/backend/port/win32/crashdump.c b/src/backend/port/win32/crashdump.c index 47114d916cc1d..45b6696ba17e6 100644 --- a/src/backend/port/win32/crashdump.c +++ b/src/backend/port/win32/crashdump.c @@ -28,7 +28,7 @@ * be added, though at the cost of a greater chance of the crash dump failing. * * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * * IDENTIFICATION * src/backend/port/win32/crashdump.c diff --git a/src/backend/port/win32/signal.c b/src/backend/port/win32/signal.c index 3218b38240c25..580a517f3f56f 100644 --- a/src/backend/port/win32/signal.c +++ b/src/backend/port/win32/signal.c @@ -3,7 +3,7 @@ * signal.c * Microsoft Windows Win32 Signal Emulation Functions * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * * IDENTIFICATION * src/backend/port/win32/signal.c diff --git a/src/backend/port/win32/socket.c b/src/backend/port/win32/socket.c index 7c7611a01e233..a8012c2798df9 100644 --- a/src/backend/port/win32/socket.c +++ b/src/backend/port/win32/socket.c @@ -3,7 +3,7 @@ * socket.c * Microsoft Windows Win32 Socket Functions * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * * IDENTIFICATION * src/backend/port/win32/socket.c diff --git a/src/backend/port/win32/timer.c b/src/backend/port/win32/timer.c index bb98178fe1d05..53fdae9468b71 100644 --- a/src/backend/port/win32/timer.c +++ b/src/backend/port/win32/timer.c @@ -8,7 +8,7 @@ * - Does not support interval timer (value->it_interval) * - Only supports ITIMER_REAL * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * * IDENTIFICATION * src/backend/port/win32/timer.c diff --git a/src/backend/port/win32_sema.c b/src/backend/port/win32_sema.c index d15c4c1dc4256..858b88adae8b2 100644 --- a/src/backend/port/win32_sema.c +++ b/src/backend/port/win32_sema.c @@ -3,7 +3,7 @@ * win32_sema.c * Microsoft Windows Win32 Semaphores Emulation * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * * IDENTIFICATION * src/backend/port/win32_sema.c diff --git a/src/backend/port/win32_shmem.c b/src/backend/port/win32_shmem.c index 30b07303ff7c0..177315f238d07 100644 --- a/src/backend/port/win32_shmem.c +++ b/src/backend/port/win32_shmem.c @@ -3,7 +3,7 @@ * win32_shmem.c * Implement shared memory using win32 facilities * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * * IDENTIFICATION * src/backend/port/win32_shmem.c diff --git a/src/backend/postmaster/autovacuum.c b/src/backend/postmaster/autovacuum.c index ed127a1032d61..47e60ca5613f8 100644 --- a/src/backend/postmaster/autovacuum.c +++ b/src/backend/postmaster/autovacuum.c @@ -50,7 +50,7 @@ * there is a window (caused by pgstat delay) on which a worker may choose a * table that was already vacuumed; this is a bug in the current design. * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * diff --git a/src/backend/postmaster/bgworker.c b/src/backend/postmaster/bgworker.c index d209b69ec05e9..dd3dad3de35fa 100644 --- a/src/backend/postmaster/bgworker.c +++ b/src/backend/postmaster/bgworker.c @@ -2,7 +2,7 @@ * bgworker.c * POSTGRES pluggable background workers implementation * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * * IDENTIFICATION * src/backend/postmaster/bgworker.c diff --git a/src/backend/postmaster/bgwriter.c b/src/backend/postmaster/bgwriter.c index a7afa758b618d..715d5195bb671 100644 --- a/src/backend/postmaster/bgwriter.c +++ b/src/backend/postmaster/bgwriter.c @@ -24,7 +24,7 @@ * should be killed by SIGQUIT and then a recovery cycle started. * * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * * * IDENTIFICATION diff --git a/src/backend/postmaster/checkpointer.c b/src/backend/postmaster/checkpointer.c index a62c6d4d0acc5..54a818bf611a6 100644 --- a/src/backend/postmaster/checkpointer.c +++ b/src/backend/postmaster/checkpointer.c @@ -26,7 +26,7 @@ * restart needs to be forced.) * * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * * * IDENTIFICATION diff --git a/src/backend/postmaster/fork_process.c b/src/backend/postmaster/fork_process.c index 5247b9f23c9f5..62d068bc1e2e7 100644 --- a/src/backend/postmaster/fork_process.c +++ b/src/backend/postmaster/fork_process.c @@ -4,7 +4,7 @@ * EXEC_BACKEND case; it might be extended to do so, but it would be * considerably more complex. * - * Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Copyright (c) 1996-2021, PostgreSQL Global Development Group * * IDENTIFICATION * src/backend/postmaster/fork_process.c diff --git a/src/backend/postmaster/interrupt.c b/src/backend/postmaster/interrupt.c index ee7dbf924ae62..dd9136a942b6c 100644 --- a/src/backend/postmaster/interrupt.c +++ b/src/backend/postmaster/interrupt.c @@ -3,7 +3,7 @@ * interrupt.c * Interrupt handling routines. * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION diff --git a/src/backend/postmaster/pgarch.c b/src/backend/postmaster/pgarch.c index ed1b65358df85..edec311f12e56 100644 --- a/src/backend/postmaster/pgarch.c +++ b/src/backend/postmaster/pgarch.c @@ -14,7 +14,7 @@ * * Initial author: Simon Riggs simon@2ndquadrant.com * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * diff --git a/src/backend/postmaster/pgstat.c b/src/backend/postmaster/pgstat.c index 123369f4faff4..f75b52719dddd 100644 --- a/src/backend/postmaster/pgstat.c +++ b/src/backend/postmaster/pgstat.c @@ -11,7 +11,7 @@ * - Add a pgstat config column to pg_database, so this * entire thing can be enabled/disabled on a per db basis. * - * Copyright (c) 2001-2020, PostgreSQL Global Development Group + * Copyright (c) 2001-2021, PostgreSQL Global Development Group * * src/backend/postmaster/pgstat.c * ---------- @@ -258,6 +258,9 @@ static int pgStatXactCommit = 0; static int pgStatXactRollback = 0; PgStat_Counter pgStatBlockReadTime = 0; PgStat_Counter pgStatBlockWriteTime = 0; +static PgStat_Counter pgStatActiveTime = 0; +static PgStat_Counter pgStatTransactionIdleTime = 0; +SessionEndType pgStatSessionEndCause = DISCONNECT_NORMAL; /* Record that's written to 2PC state file when pgstat state is persisted */ typedef struct TwoPhasePgStatRecord @@ -343,6 +346,7 @@ static void pgstat_send_tabstat(PgStat_MsgTabstat *tsmsg); static void pgstat_send_funcstats(void); static void pgstat_send_slru(void); static HTAB *pgstat_collect_oids(Oid catalogid, AttrNumber anum_oid); +static void pgstat_send_connstats(bool disconnect, TimestampTz last_report); static PgStat_TableStatus *get_tabstat_entry(Oid rel_id, bool isshared); @@ -378,6 +382,7 @@ static void pgstat_recv_funcpurge(PgStat_MsgFuncpurge *msg, int len); static void pgstat_recv_recoveryconflict(PgStat_MsgRecoveryConflict *msg, int len); static void pgstat_recv_deadlock(PgStat_MsgDeadlock *msg, int len); static void pgstat_recv_checksum_failure(PgStat_MsgChecksumFailure *msg, int len); +static void pgstat_recv_connstat(PgStat_MsgConn *msg, int len); static void pgstat_recv_replslot(PgStat_MsgReplSlot *msg, int len); static void pgstat_recv_tempfile(PgStat_MsgTempFile *msg, int len); @@ -855,10 +860,14 @@ allow_immediate_pgstat_restart(void) * per-table and function usage statistics to the collector. Note that this * is called only when not within a transaction, so it is fair to use * transaction stop time as an approximation of current time. + * + * "disconnect" is "true" only for the last call before the backend + * exits. This makes sure that no data is lost and that interrupted + * sessions are reported correctly. * ---------- */ void -pgstat_report_stat(bool force) +pgstat_report_stat(bool disconnect) { /* we assume this inits to all zeroes: */ static const PgStat_TableCounts all_zeroes; @@ -873,17 +882,22 @@ pgstat_report_stat(bool force) /* Don't expend a clock check if nothing to do */ if ((pgStatTabList == NULL || pgStatTabList->tsa_used == 0) && pgStatXactCommit == 0 && pgStatXactRollback == 0 && - !have_function_stats) + !have_function_stats && !disconnect) return; /* * Don't send a message unless it's been at least PGSTAT_STAT_INTERVAL - * msec since we last sent one, or the caller wants to force stats out. + * msec since we last sent one, or the backend is about to exit. */ now = GetCurrentTransactionStopTimestamp(); - if (!force && + if (!disconnect && !TimestampDifferenceExceeds(last_report, now, PGSTAT_STAT_INTERVAL)) return; + + /* for backends, send connection statistics */ + if (MyBackendType == B_BACKEND) + pgstat_send_connstats(disconnect, last_report); + last_report = now; /* @@ -1351,6 +1365,48 @@ pgstat_drop_relation(Oid relid) #endif /* NOT_USED */ +/* ---------- + * pgstat_send_connstats() - + * + * Tell the collector about session statistics. + * The parameter "disconnect" will be true when the backend exits. + * "last_report" is the last time we were called (0 if never). + * ---------- + */ +static void +pgstat_send_connstats(bool disconnect, TimestampTz last_report) +{ + PgStat_MsgConn msg; + long secs; + int usecs; + + if (pgStatSock == PGINVALID_SOCKET || !pgstat_track_counts) + return; + + pgstat_setheader(&msg.m_hdr, PGSTAT_MTYPE_CONNECTION); + msg.m_databaseid = MyDatabaseId; + + /* session time since the last report */ + TimestampDifference(((last_report == 0) ? MyStartTimestamp : last_report), + GetCurrentTimestamp(), + &secs, &usecs); + msg.m_session_time = secs * 1000000 + usecs; + + msg.m_disconnect = disconnect ? pgStatSessionEndCause : DISCONNECT_NOT_YET; + + msg.m_active_time = pgStatActiveTime; + pgStatActiveTime = 0; + + msg.m_idle_in_xact_time = pgStatTransactionIdleTime; + pgStatTransactionIdleTime = 0; + + /* report a new session only the first time */ + msg.m_count = (last_report == 0) ? 1 : 0; + + pgstat_send(&msg, sizeof(PgStat_MsgConn)); +} + + /* ---------- * pgstat_reset_counters() - * @@ -3348,6 +3404,30 @@ pgstat_report_activity(BackendState state, const char *cmd_str) } current_timestamp = GetCurrentTimestamp(); + /* + * If the state has changed from "active" or "idle in transaction", + * calculate the duration. + */ + if ((beentry->st_state == STATE_RUNNING || + beentry->st_state == STATE_FASTPATH || + beentry->st_state == STATE_IDLEINTRANSACTION || + beentry->st_state == STATE_IDLEINTRANSACTION_ABORTED) && + state != beentry->st_state) + { + long secs; + int usecs; + + TimestampDifference(beentry->st_state_start_timestamp, + current_timestamp, + &secs, &usecs); + + if (beentry->st_state == STATE_RUNNING || + beentry->st_state == STATE_FASTPATH) + pgStatActiveTime += secs * 1000000 + usecs; + else + pgStatTransactionIdleTime += secs * 1000000 + usecs; + } + /* * Now update the status entry */ @@ -4919,6 +4999,10 @@ PgstatCollectorMain(int argc, char *argv[]) pgstat_recv_replslot(&msg.msg_replslot, len); break; + case PGSTAT_MTYPE_CONNECTION: + pgstat_recv_connstat(&msg.msg_conn, len); + break; + default: break; } @@ -4993,6 +5077,13 @@ reset_dbentry_counters(PgStat_StatDBEntry *dbentry) dbentry->last_checksum_failure = 0; dbentry->n_block_read_time = 0; dbentry->n_block_write_time = 0; + dbentry->n_sessions = 0; + dbentry->total_session_time = 0; + dbentry->total_active_time = 0; + dbentry->total_idle_in_xact_time = 0; + dbentry->n_sessions_abandoned = 0; + dbentry->n_sessions_fatal = 0; + dbentry->n_sessions_killed = 0; dbentry->stat_reset_timestamp = GetCurrentTimestamp(); dbentry->stats_timestamp = 0; @@ -6944,6 +7035,41 @@ pgstat_recv_replslot(PgStat_MsgReplSlot *msg, int len) } } +/* ---------- + * pgstat_recv_connstat() - + * + * Process connection information. + * ---------- + */ +static void +pgstat_recv_connstat(PgStat_MsgConn *msg, int len) +{ + PgStat_StatDBEntry *dbentry; + + dbentry = pgstat_get_db_entry(msg->m_databaseid, true); + + dbentry->n_sessions += msg->m_count; + dbentry->total_session_time += msg->m_session_time; + dbentry->total_active_time += msg->m_active_time; + dbentry->total_idle_in_xact_time += msg->m_idle_in_xact_time; + switch (msg->m_disconnect) + { + case DISCONNECT_NOT_YET: + case DISCONNECT_NORMAL: + /* we don't collect these */ + break; + case DISCONNECT_CLIENT_EOF: + dbentry->n_sessions_abandoned++; + break; + case DISCONNECT_FATAL: + dbentry->n_sessions_fatal++; + break; + case DISCONNECT_KILLED: + dbentry->n_sessions_killed++; + break; + } +} + /* ---------- * pgstat_recv_tempfile() - * diff --git a/src/backend/postmaster/postmaster.c b/src/backend/postmaster/postmaster.c index b3ccd18cda6e5..7de27ee4e0171 100644 --- a/src/backend/postmaster/postmaster.c +++ b/src/backend/postmaster/postmaster.c @@ -32,7 +32,7 @@ * clients. * * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * diff --git a/src/backend/postmaster/startup.c b/src/backend/postmaster/startup.c index 64af7b8707cc6..f781fdc6fcab5 100644 --- a/src/backend/postmaster/startup.c +++ b/src/backend/postmaster/startup.c @@ -9,7 +9,7 @@ * though.) * * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * * * IDENTIFICATION diff --git a/src/backend/postmaster/syslogger.c b/src/backend/postmaster/syslogger.c index faa82ec48158d..d3bcc4b118e7e 100644 --- a/src/backend/postmaster/syslogger.c +++ b/src/backend/postmaster/syslogger.c @@ -13,7 +13,7 @@ * * Author: Andreas Pflug * - * Copyright (c) 2004-2020, PostgreSQL Global Development Group + * Copyright (c) 2004-2021, PostgreSQL Global Development Group * * * IDENTIFICATION diff --git a/src/backend/postmaster/walwriter.c b/src/backend/postmaster/walwriter.c index a52832fe900aa..4f1a8e356b96f 100644 --- a/src/backend/postmaster/walwriter.c +++ b/src/backend/postmaster/walwriter.c @@ -31,7 +31,7 @@ * should be killed by SIGQUIT and then a recovery cycle started. * * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * * * IDENTIFICATION diff --git a/src/backend/regex/regc_lex.c b/src/backend/regex/regc_lex.c index 38617b79fd146..ca2bce48312aa 100644 --- a/src/backend/regex/regc_lex.c +++ b/src/backend/regex/regc_lex.c @@ -994,7 +994,7 @@ brenext(struct vars *v, case CHR('*'): if (LASTTYPE(EMPTY) || LASTTYPE('(') || LASTTYPE('^')) RETV(PLAIN, c); - RET('*'); + RETV('*', 1); break; case CHR('['): if (HAVE(6) && *(v->now + 0) == CHR('[') && diff --git a/src/backend/regex/regc_pg_locale.c b/src/backend/regex/regc_pg_locale.c index 3cc2d4d362776..1fff3df1daedc 100644 --- a/src/backend/regex/regc_pg_locale.c +++ b/src/backend/regex/regc_pg_locale.c @@ -6,7 +6,7 @@ * * This file is #included by regcomp.c; it's not meant to compile standalone. * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION diff --git a/src/backend/regex/regexport.c b/src/backend/regex/regexport.c index a925a9f9a003a..d4f940b8c3493 100644 --- a/src/backend/regex/regexport.c +++ b/src/backend/regex/regexport.c @@ -15,7 +15,7 @@ * allows the caller to decide how big is too big to bother with. * * - * Portions Copyright (c) 2013-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 2013-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1998, 1999 Henry Spencer * * IDENTIFICATION diff --git a/src/backend/regex/regprefix.c b/src/backend/regex/regprefix.c index 991b8689bef48..1d4593ac945c9 100644 --- a/src/backend/regex/regprefix.c +++ b/src/backend/regex/regprefix.c @@ -4,7 +4,7 @@ * Extract a common prefix, if any, from a compiled regex. * * - * Portions Copyright (c) 2012-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 2012-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1998, 1999 Henry Spencer * * IDENTIFICATION diff --git a/src/backend/replication/backup_manifest.c b/src/backend/replication/backup_manifest.c index c3f339c55631d..0cefd181b5a11 100644 --- a/src/backend/replication/backup_manifest.c +++ b/src/backend/replication/backup_manifest.c @@ -3,7 +3,7 @@ * backup_manifest.c * code for generating and sending a backup manifest * - * Portions Copyright (c) 2010-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 2010-2021, PostgreSQL Global Development Group * * IDENTIFICATION * src/backend/replication/backup_manifest.c @@ -13,11 +13,11 @@ #include "postgres.h" #include "access/timeline.h" +#include "common/hex.h" #include "libpq/libpq.h" #include "libpq/pqformat.h" #include "mb/pg_wchar.h" #include "replication/backup_manifest.h" -#include "utils/builtins.h" #include "utils/json.h" static void AppendStringToManifest(backup_manifest_info *manifest, char *s); @@ -150,10 +150,12 @@ AddFileToBackupManifest(backup_manifest_info *manifest, const char *spcoid, } else { + uint64 dstlen = pg_hex_enc_len(pathlen); + appendStringInfoString(&buf, "{ \"Encoded-Path\": \""); - enlargeStringInfo(&buf, 2 * pathlen); - buf.len += hex_encode(pathname, pathlen, - &buf.data[buf.len]); + enlargeStringInfo(&buf, dstlen); + buf.len += pg_hex_encode(pathname, pathlen, + &buf.data[buf.len], dstlen); appendStringInfoString(&buf, "\", "); } @@ -176,6 +178,7 @@ AddFileToBackupManifest(backup_manifest_info *manifest, const char *spcoid, { uint8 checksumbuf[PG_CHECKSUM_MAX_LENGTH]; int checksumlen; + uint64 dstlen; checksumlen = pg_checksum_final(checksum_ctx, checksumbuf); if (checksumlen < 0) @@ -185,9 +188,10 @@ AddFileToBackupManifest(backup_manifest_info *manifest, const char *spcoid, appendStringInfo(&buf, ", \"Checksum-Algorithm\": \"%s\", \"Checksum\": \"", pg_checksum_type_name(checksum_ctx->type)); - enlargeStringInfo(&buf, 2 * checksumlen); - buf.len += hex_encode((char *) checksumbuf, checksumlen, - &buf.data[buf.len]); + dstlen = pg_hex_enc_len(checksumlen); + enlargeStringInfo(&buf, dstlen); + buf.len += pg_hex_encode((char *) checksumbuf, checksumlen, + &buf.data[buf.len], dstlen); appendStringInfoChar(&buf, '"'); } @@ -307,8 +311,9 @@ SendBackupManifest(backup_manifest_info *manifest) { StringInfoData protobuf; uint8 checksumbuf[PG_SHA256_DIGEST_LENGTH]; - char checksumstringbuf[PG_SHA256_DIGEST_STRING_LENGTH]; + char *checksumstringbuf; size_t manifest_bytes_done = 0; + uint64 dstlen; if (!IsManifestEnabled(manifest)) return; @@ -328,8 +333,11 @@ SendBackupManifest(backup_manifest_info *manifest) if (pg_cryptohash_final(manifest->manifest_ctx, checksumbuf) < 0) elog(ERROR, "failed to finalize checksum of backup manifest"); AppendStringToManifest(manifest, "\"Manifest-Checksum\": \""); - hex_encode((char *) checksumbuf, sizeof checksumbuf, checksumstringbuf); - checksumstringbuf[PG_SHA256_DIGEST_STRING_LENGTH - 1] = '\0'; + dstlen = pg_hex_enc_len(PG_SHA256_DIGEST_LENGTH); + checksumstringbuf = palloc0(dstlen + 1); /* includes \0 */ + pg_hex_encode((char *) checksumbuf, sizeof checksumbuf, + checksumstringbuf, dstlen); + checksumstringbuf[dstlen] = '\0'; AppendStringToManifest(manifest, checksumstringbuf); AppendStringToManifest(manifest, "\"}\n"); diff --git a/src/backend/replication/basebackup.c b/src/backend/replication/basebackup.c index 1d8d1742a73a0..0f54635550b98 100644 --- a/src/backend/replication/basebackup.c +++ b/src/backend/replication/basebackup.c @@ -3,7 +3,7 @@ * basebackup.c * code for taking a base backup and streaming it to a standby * - * Portions Copyright (c) 2010-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 2010-2021, PostgreSQL Global Development Group * * IDENTIFICATION * src/backend/replication/basebackup.c diff --git a/src/backend/replication/libpqwalreceiver/libpqwalreceiver.c b/src/backend/replication/libpqwalreceiver/libpqwalreceiver.c index 24f8b3e42ecee..e9582748617f0 100644 --- a/src/backend/replication/libpqwalreceiver/libpqwalreceiver.c +++ b/src/backend/replication/libpqwalreceiver/libpqwalreceiver.c @@ -6,7 +6,7 @@ * loaded as a dynamic module to avoid linking the main server binary with * libpq. * - * Portions Copyright (c) 2010-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 2010-2021, PostgreSQL Global Development Group * * * IDENTIFICATION diff --git a/src/backend/replication/logical/decode.c b/src/backend/replication/logical/decode.c index 3f84ee99b8633..afa1df00d0eaf 100644 --- a/src/backend/replication/logical/decode.c +++ b/src/backend/replication/logical/decode.c @@ -16,7 +16,7 @@ * contents of records in here except turning them into a more usable * format. * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION @@ -67,13 +67,24 @@ static void DecodeMultiInsert(LogicalDecodingContext *ctx, XLogRecordBuffer *buf static void DecodeSpecConfirm(LogicalDecodingContext *ctx, XLogRecordBuffer *buf); static void DecodeCommit(LogicalDecodingContext *ctx, XLogRecordBuffer *buf, - xl_xact_parsed_commit *parsed, TransactionId xid); + xl_xact_parsed_commit *parsed, TransactionId xid, + bool two_phase); static void DecodeAbort(LogicalDecodingContext *ctx, XLogRecordBuffer *buf, - xl_xact_parsed_abort *parsed, TransactionId xid); + xl_xact_parsed_abort *parsed, TransactionId xid, + bool two_phase); +static void DecodePrepare(LogicalDecodingContext *ctx, XLogRecordBuffer *buf, + xl_xact_parsed_prepare *parsed); + /* common function to decode tuples */ static void DecodeXLogTuple(char *data, Size len, ReorderBufferTupleBuf *tup); +/* helper functions for decoding transactions */ +static inline bool FilterPrepare(LogicalDecodingContext *ctx, const char *gid); +static bool DecodeTXNNeedSkip(LogicalDecodingContext *ctx, + XLogRecordBuffer *buf, Oid dbId, + RepOriginId origin_id); + /* * Take every XLogReadRecord()ed record and perform the actions required to * decode it using the output plugin already setup in the logical decoding @@ -244,6 +255,7 @@ DecodeXactOp(LogicalDecodingContext *ctx, XLogRecordBuffer *buf) xl_xact_commit *xlrec; xl_xact_parsed_commit parsed; TransactionId xid; + bool two_phase = false; xlrec = (xl_xact_commit *) XLogRecGetData(r); ParseCommitRecord(XLogRecGetInfo(buf->record), xlrec, &parsed); @@ -253,7 +265,15 @@ DecodeXactOp(LogicalDecodingContext *ctx, XLogRecordBuffer *buf) else xid = parsed.twophase_xid; - DecodeCommit(ctx, buf, &parsed, xid); + /* + * We would like to process the transaction in a two-phase + * manner iff output plugin supports two-phase commits and + * doesn't filter the transaction at prepare time. + */ + if (info == XLOG_XACT_COMMIT_PREPARED) + two_phase = !(FilterPrepare(ctx, parsed.twophase_gid)); + + DecodeCommit(ctx, buf, &parsed, xid, two_phase); break; } case XLOG_XACT_ABORT: @@ -262,6 +282,7 @@ DecodeXactOp(LogicalDecodingContext *ctx, XLogRecordBuffer *buf) xl_xact_abort *xlrec; xl_xact_parsed_abort parsed; TransactionId xid; + bool two_phase = false; xlrec = (xl_xact_abort *) XLogRecGetData(r); ParseAbortRecord(XLogRecGetInfo(buf->record), xlrec, &parsed); @@ -271,7 +292,15 @@ DecodeXactOp(LogicalDecodingContext *ctx, XLogRecordBuffer *buf) else xid = parsed.twophase_xid; - DecodeAbort(ctx, buf, &parsed, xid); + /* + * We would like to process the transaction in a two-phase + * manner iff output plugin supports two-phase commits and + * doesn't filter the transaction at prepare time. + */ + if (info == XLOG_XACT_ABORT_PREPARED) + two_phase = !(FilterPrepare(ctx, parsed.twophase_gid)); + + DecodeAbort(ctx, buf, &parsed, xid, two_phase); break; } case XLOG_XACT_ASSIGNMENT: @@ -312,17 +341,30 @@ DecodeXactOp(LogicalDecodingContext *ctx, XLogRecordBuffer *buf) } break; case XLOG_XACT_PREPARE: + { + xl_xact_parsed_prepare parsed; + xl_xact_prepare *xlrec; - /* - * Currently decoding ignores PREPARE TRANSACTION and will just - * decode the transaction when the COMMIT PREPARED is sent or - * throw away the transaction's contents when a ROLLBACK PREPARED - * is received. In the future we could add code to expose prepared - * transactions in the changestream allowing for a kind of - * distributed 2PC. - */ - ReorderBufferProcessXid(reorder, XLogRecGetXid(r), buf->origptr); - break; + /* ok, parse it */ + xlrec = (xl_xact_prepare *) XLogRecGetData(r); + ParsePrepareRecord(XLogRecGetInfo(buf->record), + xlrec, &parsed); + + /* + * We would like to process the transaction in a two-phase + * manner iff output plugin supports two-phase commits and + * doesn't filter the transaction at prepare time. + */ + if (FilterPrepare(ctx, parsed.twophase_gid)) + { + ReorderBufferProcessXid(reorder, parsed.twophase_xid, + buf->origptr); + break; + } + + DecodePrepare(ctx, buf, &parsed); + break; + } default: elog(ERROR, "unexpected RM_XACT_ID record type: %u", info); } @@ -520,6 +562,32 @@ DecodeHeapOp(LogicalDecodingContext *ctx, XLogRecordBuffer *buf) } } +/* + * Ask output plugin whether we want to skip this PREPARE and send + * this transaction as a regular commit later. + */ +static inline bool +FilterPrepare(LogicalDecodingContext *ctx, const char *gid) +{ + /* + * Skip if decoding of two-phase transactions at PREPARE time is not + * enabled. In that case, all two-phase transactions are considered + * filtered out and will be applied as regular transactions at COMMIT + * PREPARED. + */ + if (!ctx->twophase) + return true; + + /* + * The filter_prepare callback is optional. When not supplied, all + * prepared transactions should go through. + */ + if (ctx->callbacks.filter_prepare_cb == NULL) + return false; + + return filter_prepare_cb_wrapper(ctx, gid); +} + static inline bool FilterByOrigin(LogicalDecodingContext *ctx, RepOriginId origin_id) { @@ -582,10 +650,15 @@ DecodeLogicalMsgOp(LogicalDecodingContext *ctx, XLogRecordBuffer *buf) /* * Consolidated commit record handling between the different form of commit * records. + * + * 'two_phase' indicates that caller wants to process the transaction in two + * phases, first process prepare if not already done and then process + * commit_prepared. */ static void DecodeCommit(LogicalDecodingContext *ctx, XLogRecordBuffer *buf, - xl_xact_parsed_commit *parsed, TransactionId xid) + xl_xact_parsed_commit *parsed, TransactionId xid, + bool two_phase) { XLogRecPtr origin_lsn = InvalidXLogRecPtr; TimestampTz commit_time = parsed->xact_time; @@ -606,30 +679,19 @@ DecodeCommit(LogicalDecodingContext *ctx, XLogRecordBuffer *buf, * the reorderbuffer to forget the content of the (sub-)transactions * if not. * - * There can be several reasons we might not be interested in this - * transaction: - * 1) We might not be interested in decoding transactions up to this - * LSN. This can happen because we previously decoded it and now just - * are restarting or if we haven't assembled a consistent snapshot yet. - * 2) The transaction happened in another database. - * 3) The output plugin is not interested in the origin. - * 4) We are doing fast-forwarding - * * We can't just use ReorderBufferAbort() here, because we need to execute * the transaction's invalidations. This currently won't be needed if * we're just skipping over the transaction because currently we only do * so during startup, to get to the first transaction the client needs. As * we have reset the catalog caches before starting to read WAL, and we * haven't yet touched any catalogs, there can't be anything to invalidate. - * But if we're "forgetting" this commit because it's it happened in - * another database, the invalidations might be important, because they - * could be for shared catalogs and we might have loaded data into the - * relevant syscaches. + * But if we're "forgetting" this commit because it happened in another + * database, the invalidations might be important, because they could be + * for shared catalogs and we might have loaded data into the relevant + * syscaches. * --- */ - if (SnapBuildXactNeedsSkip(ctx->snapshot_builder, buf->origptr) || - (parsed->dbId != InvalidOid && parsed->dbId != ctx->slot->data.database) || - ctx->fast_forward || FilterByOrigin(ctx, origin_id)) + if (DecodeTXNNeedSkip(ctx, buf, parsed->dbId, origin_id)) { for (i = 0; i < parsed->nsubxacts; i++) { @@ -647,34 +709,163 @@ DecodeCommit(LogicalDecodingContext *ctx, XLogRecordBuffer *buf, buf->origptr, buf->endptr); } + /* + * Send the final commit record if the transaction data is already + * decoded, otherwise, process the entire transaction. + */ + if (two_phase) + { + ReorderBufferFinishPrepared(ctx->reorder, xid, buf->origptr, buf->endptr, + commit_time, origin_id, origin_lsn, + parsed->twophase_gid, true); + } + else + { + ReorderBufferCommit(ctx->reorder, xid, buf->origptr, buf->endptr, + commit_time, origin_id, origin_lsn); + } + + /* + * Update the decoding stats at transaction prepare/commit/abort. It is + * not clear that sending more or less frequently than this would be + * better. + */ + UpdateDecodingStats(ctx); +} + +/* + * Decode PREPARE record. Similar logic as in DecodeCommit. + * + * Note that we don't skip prepare even if have detected concurrent abort + * because it is quite possible that we had already sent some changes before we + * detect abort in which case we need to abort those changes in the subscriber. + * To abort such changes, we do send the prepare and then the rollback prepared + * which is what happened on the publisher-side as well. Now, we can invent a + * new abort API wherein in such cases we send abort and skip sending prepared + * and rollback prepared but then it is not that straightforward because we + * might have streamed this transaction by that time in which case it is + * handled when the rollback is encountered. It is not impossible to optimize + * the concurrent abort case but it can introduce design complexity w.r.t + * handling different cases so leaving it for now as it doesn't seem worth it. + */ +static void +DecodePrepare(LogicalDecodingContext *ctx, XLogRecordBuffer *buf, + xl_xact_parsed_prepare *parsed) +{ + SnapBuild *builder = ctx->snapshot_builder; + XLogRecPtr origin_lsn = parsed->origin_lsn; + TimestampTz prepare_time = parsed->xact_time; + XLogRecPtr origin_id = XLogRecGetOrigin(buf->record); + int i; + TransactionId xid = parsed->twophase_xid; + + if (parsed->origin_timestamp != 0) + prepare_time = parsed->origin_timestamp; + + /* + * Remember the prepare info for a txn so that it can be used later in + * commit prepared if required. See ReorderBufferFinishPrepared. + */ + if (!ReorderBufferRememberPrepareInfo(ctx->reorder, xid, buf->origptr, + buf->endptr, prepare_time, origin_id, + origin_lsn)) + return; + + /* We can't start streaming unless a consistent state is reached. */ + if (SnapBuildCurrentState(builder) < SNAPBUILD_CONSISTENT) + { + ReorderBufferSkipPrepare(ctx->reorder, xid); + return; + } + + /* + * Check whether we need to process this transaction. See + * DecodeTXNNeedSkip for the reasons why we sometimes want to skip the + * transaction. + * + * We can't call ReorderBufferForget as we did in DecodeCommit as the txn + * hasn't yet been committed, removing this txn before a commit might + * result in the computation of an incorrect restart_lsn. See + * SnapBuildProcessRunningXacts. But we need to process cache + * invalidations if there are any for the reasons mentioned in + * DecodeCommit. + */ + if (DecodeTXNNeedSkip(ctx, buf, parsed->dbId, origin_id)) + { + ReorderBufferSkipPrepare(ctx->reorder, xid); + ReorderBufferInvalidate(ctx->reorder, xid, buf->origptr); + return; + } + + /* Tell the reorderbuffer about the surviving subtransactions. */ + for (i = 0; i < parsed->nsubxacts; i++) + { + ReorderBufferCommitChild(ctx->reorder, xid, parsed->subxacts[i], + buf->origptr, buf->endptr); + } + /* replay actions of all transaction + subtransactions in order */ - ReorderBufferCommit(ctx->reorder, xid, buf->origptr, buf->endptr, - commit_time, origin_id, origin_lsn); + ReorderBufferPrepare(ctx->reorder, xid, parsed->twophase_gid); /* - * Update the decoding stats at transaction commit/abort. It is not clear - * that sending more or less frequently than this would be better. + * Update the decoding stats at transaction prepare/commit/abort. It is + * not clear that sending more or less frequently than this would be + * better. */ UpdateDecodingStats(ctx); } + /* * Get the data from the various forms of abort records and pass it on to - * snapbuild.c and reorderbuffer.c + * snapbuild.c and reorderbuffer.c. + * + * 'two_phase' indicates to finish prepared transaction. */ static void DecodeAbort(LogicalDecodingContext *ctx, XLogRecordBuffer *buf, - xl_xact_parsed_abort *parsed, TransactionId xid) + xl_xact_parsed_abort *parsed, TransactionId xid, + bool two_phase) { int i; + XLogRecPtr origin_lsn = InvalidXLogRecPtr; + TimestampTz abort_time = parsed->xact_time; + XLogRecPtr origin_id = XLogRecGetOrigin(buf->record); + bool skip_xact; - for (i = 0; i < parsed->nsubxacts; i++) + if (parsed->xinfo & XACT_XINFO_HAS_ORIGIN) + { + origin_lsn = parsed->origin_lsn; + abort_time = parsed->origin_timestamp; + } + + /* + * Check whether we need to process this transaction. See + * DecodeTXNNeedSkip for the reasons why we sometimes want to skip the + * transaction. + */ + skip_xact = DecodeTXNNeedSkip(ctx, buf, parsed->dbId, origin_id); + + /* + * Send the final rollback record for a prepared transaction unless we + * need to skip it. For non-two-phase xacts, simply forget the xact. + */ + if (two_phase && !skip_xact) { - ReorderBufferAbort(ctx->reorder, parsed->subxacts[i], - buf->record->EndRecPtr); + ReorderBufferFinishPrepared(ctx->reorder, xid, buf->origptr, buf->endptr, + abort_time, origin_id, origin_lsn, + parsed->twophase_gid, false); } + else + { + for (i = 0; i < parsed->nsubxacts; i++) + { + ReorderBufferAbort(ctx->reorder, parsed->subxacts[i], + buf->record->EndRecPtr); + } - ReorderBufferAbort(ctx->reorder, xid, buf->record->EndRecPtr); + ReorderBufferAbort(ctx->reorder, xid, buf->record->EndRecPtr); + } /* update the decoding stats */ UpdateDecodingStats(ctx); @@ -1080,3 +1271,24 @@ DecodeXLogTuple(char *data, Size len, ReorderBufferTupleBuf *tuple) header->t_infomask2 = xlhdr.t_infomask2; header->t_hoff = xlhdr.t_hoff; } + +/* + * Check whether we are interested in this specific transaction. + * + * There can be several reasons we might not be interested in this + * transaction: + * 1) We might not be interested in decoding transactions up to this + * LSN. This can happen because we previously decoded it and now just + * are restarting or if we haven't assembled a consistent snapshot yet. + * 2) The transaction happened in another database. + * 3) The output plugin is not interested in the origin. + * 4) We are doing fast-forwarding + */ +static bool +DecodeTXNNeedSkip(LogicalDecodingContext *ctx, XLogRecordBuffer *buf, + Oid txn_dbid, RepOriginId origin_id) +{ + return (SnapBuildXactNeedsSkip(ctx->snapshot_builder, buf->origptr) || + (txn_dbid != InvalidOid && txn_dbid != ctx->slot->data.database) || + ctx->fast_forward || FilterByOrigin(ctx, origin_id)); +} diff --git a/src/backend/replication/logical/launcher.c b/src/backend/replication/logical/launcher.c index bdaf0312d63dc..186514cd9ed43 100644 --- a/src/backend/replication/logical/launcher.c +++ b/src/backend/replication/logical/launcher.c @@ -2,7 +2,7 @@ * launcher.c * PostgreSQL logical replication worker launcher process * - * Copyright (c) 2016-2020, PostgreSQL Global Development Group + * Copyright (c) 2016-2021, PostgreSQL Global Development Group * * IDENTIFICATION * src/backend/replication/logical/launcher.c diff --git a/src/backend/replication/logical/logical.c b/src/backend/replication/logical/logical.c index f1f4df7d70f28..0977aec711cff 100644 --- a/src/backend/replication/logical/logical.c +++ b/src/backend/replication/logical/logical.c @@ -2,7 +2,7 @@ * logical.c * PostgreSQL logical decoding coordination * - * Copyright (c) 2012-2020, PostgreSQL Global Development Group + * Copyright (c) 2012-2021, PostgreSQL Global Development Group * * IDENTIFICATION * src/backend/replication/logical/logical.c @@ -59,6 +59,13 @@ static void shutdown_cb_wrapper(LogicalDecodingContext *ctx); static void begin_cb_wrapper(ReorderBuffer *cache, ReorderBufferTXN *txn); static void commit_cb_wrapper(ReorderBuffer *cache, ReorderBufferTXN *txn, XLogRecPtr commit_lsn); +static void begin_prepare_cb_wrapper(ReorderBuffer *cache, ReorderBufferTXN *txn); +static void prepare_cb_wrapper(ReorderBuffer *cache, ReorderBufferTXN *txn, + XLogRecPtr prepare_lsn); +static void commit_prepared_cb_wrapper(ReorderBuffer *cache, ReorderBufferTXN *txn, + XLogRecPtr commit_lsn); +static void rollback_prepared_cb_wrapper(ReorderBuffer *cache, ReorderBufferTXN *txn, + XLogRecPtr prepare_end_lsn, TimestampTz prepare_time); static void change_cb_wrapper(ReorderBuffer *cache, ReorderBufferTXN *txn, Relation relation, ReorderBufferChange *change); static void truncate_cb_wrapper(ReorderBuffer *cache, ReorderBufferTXN *txn, @@ -74,6 +81,8 @@ static void stream_stop_cb_wrapper(ReorderBuffer *cache, ReorderBufferTXN *txn, XLogRecPtr last_lsn); static void stream_abort_cb_wrapper(ReorderBuffer *cache, ReorderBufferTXN *txn, XLogRecPtr abort_lsn); +static void stream_prepare_cb_wrapper(ReorderBuffer *cache, ReorderBufferTXN *txn, + XLogRecPtr prepare_lsn); static void stream_commit_cb_wrapper(ReorderBuffer *cache, ReorderBufferTXN *txn, XLogRecPtr commit_lsn); static void stream_change_cb_wrapper(ReorderBuffer *cache, ReorderBufferTXN *txn, @@ -237,11 +246,37 @@ StartupDecodingContext(List *output_plugin_options, ctx->reorder->stream_start = stream_start_cb_wrapper; ctx->reorder->stream_stop = stream_stop_cb_wrapper; ctx->reorder->stream_abort = stream_abort_cb_wrapper; + ctx->reorder->stream_prepare = stream_prepare_cb_wrapper; ctx->reorder->stream_commit = stream_commit_cb_wrapper; ctx->reorder->stream_change = stream_change_cb_wrapper; ctx->reorder->stream_message = stream_message_cb_wrapper; ctx->reorder->stream_truncate = stream_truncate_cb_wrapper; + + /* + * To support two-phase logical decoding, we require + * begin_prepare/prepare/commit-prepare/abort-prepare callbacks. The + * filter_prepare callback is optional. We however enable two-phase + * logical decoding when at least one of the methods is enabled so that we + * can easily identify missing methods. + * + * We decide it here, but only check it later in the wrappers. + */ + ctx->twophase = (ctx->callbacks.begin_prepare_cb != NULL) || + (ctx->callbacks.prepare_cb != NULL) || + (ctx->callbacks.commit_prepared_cb != NULL) || + (ctx->callbacks.rollback_prepared_cb != NULL) || + (ctx->callbacks.stream_prepare_cb != NULL) || + (ctx->callbacks.filter_prepare_cb != NULL); + + /* + * Callback to support decoding at prepare time. + */ + ctx->reorder->begin_prepare = begin_prepare_cb_wrapper; + ctx->reorder->prepare = prepare_cb_wrapper; + ctx->reorder->commit_prepared = commit_prepared_cb_wrapper; + ctx->reorder->rollback_prepared = rollback_prepared_cb_wrapper; + ctx->out = makeStringInfo(); ctx->prepare_write = prepare_write; ctx->write = do_write; @@ -782,6 +817,186 @@ commit_cb_wrapper(ReorderBuffer *cache, ReorderBufferTXN *txn, error_context_stack = errcallback.previous; } +/* + * The functionality of begin_prepare is quite similar to begin with the + * exception that this will have gid (global transaction id) information which + * can be used by plugin. Now, we thought about extending the existing begin + * but that would break the replication protocol and additionally this looks + * cleaner. + */ +static void +begin_prepare_cb_wrapper(ReorderBuffer *cache, ReorderBufferTXN *txn) +{ + LogicalDecodingContext *ctx = cache->private_data; + LogicalErrorCallbackState state; + ErrorContextCallback errcallback; + + Assert(!ctx->fast_forward); + + /* We're only supposed to call this when two-phase commits are supported */ + Assert(ctx->twophase); + + /* Push callback + info on the error context stack */ + state.ctx = ctx; + state.callback_name = "begin_prepare"; + state.report_location = txn->first_lsn; + errcallback.callback = output_plugin_error_callback; + errcallback.arg = (void *) &state; + errcallback.previous = error_context_stack; + error_context_stack = &errcallback; + + /* set output state */ + ctx->accept_writes = true; + ctx->write_xid = txn->xid; + ctx->write_location = txn->first_lsn; + + /* + * If the plugin supports two-phase commits then begin prepare callback is + * mandatory + */ + if (ctx->callbacks.begin_prepare_cb == NULL) + ereport(ERROR, + (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), + errmsg("logical replication at prepare time requires begin_prepare_cb callback"))); + + /* do the actual work: call callback */ + ctx->callbacks.begin_prepare_cb(ctx, txn); + + /* Pop the error context stack */ + error_context_stack = errcallback.previous; +} + +static void +prepare_cb_wrapper(ReorderBuffer *cache, ReorderBufferTXN *txn, + XLogRecPtr prepare_lsn) +{ + LogicalDecodingContext *ctx = cache->private_data; + LogicalErrorCallbackState state; + ErrorContextCallback errcallback; + + Assert(!ctx->fast_forward); + + /* We're only supposed to call this when two-phase commits are supported */ + Assert(ctx->twophase); + + /* Push callback + info on the error context stack */ + state.ctx = ctx; + state.callback_name = "prepare"; + state.report_location = txn->final_lsn; /* beginning of prepare record */ + errcallback.callback = output_plugin_error_callback; + errcallback.arg = (void *) &state; + errcallback.previous = error_context_stack; + error_context_stack = &errcallback; + + /* set output state */ + ctx->accept_writes = true; + ctx->write_xid = txn->xid; + ctx->write_location = txn->end_lsn; /* points to the end of the record */ + + /* + * If the plugin supports two-phase commits then prepare callback is + * mandatory + */ + if (ctx->callbacks.prepare_cb == NULL) + ereport(ERROR, + (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), + errmsg("logical replication at prepare time requires prepare_cb callback"))); + + /* do the actual work: call callback */ + ctx->callbacks.prepare_cb(ctx, txn, prepare_lsn); + + /* Pop the error context stack */ + error_context_stack = errcallback.previous; +} + +static void +commit_prepared_cb_wrapper(ReorderBuffer *cache, ReorderBufferTXN *txn, + XLogRecPtr commit_lsn) +{ + LogicalDecodingContext *ctx = cache->private_data; + LogicalErrorCallbackState state; + ErrorContextCallback errcallback; + + Assert(!ctx->fast_forward); + + /* We're only supposed to call this when two-phase commits are supported */ + Assert(ctx->twophase); + + /* Push callback + info on the error context stack */ + state.ctx = ctx; + state.callback_name = "commit_prepared"; + state.report_location = txn->final_lsn; /* beginning of commit record */ + errcallback.callback = output_plugin_error_callback; + errcallback.arg = (void *) &state; + errcallback.previous = error_context_stack; + error_context_stack = &errcallback; + + /* set output state */ + ctx->accept_writes = true; + ctx->write_xid = txn->xid; + ctx->write_location = txn->end_lsn; /* points to the end of the record */ + + /* + * If the plugin support two-phase commits then commit prepared callback + * is mandatory + */ + if (ctx->callbacks.commit_prepared_cb == NULL) + ereport(ERROR, + (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), + errmsg("logical replication at prepare time requires commit_prepared_cb callback"))); + + /* do the actual work: call callback */ + ctx->callbacks.commit_prepared_cb(ctx, txn, commit_lsn); + + /* Pop the error context stack */ + error_context_stack = errcallback.previous; +} + +static void +rollback_prepared_cb_wrapper(ReorderBuffer *cache, ReorderBufferTXN *txn, + XLogRecPtr prepare_end_lsn, + TimestampTz prepare_time) +{ + LogicalDecodingContext *ctx = cache->private_data; + LogicalErrorCallbackState state; + ErrorContextCallback errcallback; + + Assert(!ctx->fast_forward); + + /* We're only supposed to call this when two-phase commits are supported */ + Assert(ctx->twophase); + + /* Push callback + info on the error context stack */ + state.ctx = ctx; + state.callback_name = "rollback_prepared"; + state.report_location = txn->final_lsn; /* beginning of commit record */ + errcallback.callback = output_plugin_error_callback; + errcallback.arg = (void *) &state; + errcallback.previous = error_context_stack; + error_context_stack = &errcallback; + + /* set output state */ + ctx->accept_writes = true; + ctx->write_xid = txn->xid; + ctx->write_location = txn->end_lsn; /* points to the end of the record */ + + /* + * If the plugin support two-phase commits then rollback prepared callback + * is mandatory + */ + if (ctx->callbacks.rollback_prepared_cb == NULL) + ereport(ERROR, + (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), + errmsg("logical replication at prepare time requires rollback_prepared_cb callback"))); + + /* do the actual work: call callback */ + ctx->callbacks.rollback_prepared_cb(ctx, txn, prepare_end_lsn, + prepare_time); + + /* Pop the error context stack */ + error_context_stack = errcallback.previous; +} + static void change_cb_wrapper(ReorderBuffer *cache, ReorderBufferTXN *txn, Relation relation, ReorderBufferChange *change) @@ -859,6 +1074,36 @@ truncate_cb_wrapper(ReorderBuffer *cache, ReorderBufferTXN *txn, error_context_stack = errcallback.previous; } +bool +filter_prepare_cb_wrapper(LogicalDecodingContext *ctx, const char *gid) +{ + LogicalErrorCallbackState state; + ErrorContextCallback errcallback; + bool ret; + + Assert(!ctx->fast_forward); + + /* Push callback + info on the error context stack */ + state.ctx = ctx; + state.callback_name = "filter_prepare"; + state.report_location = InvalidXLogRecPtr; + errcallback.callback = output_plugin_error_callback; + errcallback.arg = (void *) &state; + errcallback.previous = error_context_stack; + error_context_stack = &errcallback; + + /* set output state */ + ctx->accept_writes = false; + + /* do the actual work: call callback */ + ret = ctx->callbacks.filter_prepare_cb(ctx, gid); + + /* Pop the error context stack */ + error_context_stack = errcallback.previous; + + return ret; +} + bool filter_by_origin_cb_wrapper(LogicalDecodingContext *ctx, RepOriginId origin_id) { @@ -1056,6 +1301,49 @@ stream_abort_cb_wrapper(ReorderBuffer *cache, ReorderBufferTXN *txn, error_context_stack = errcallback.previous; } +static void +stream_prepare_cb_wrapper(ReorderBuffer *cache, ReorderBufferTXN *txn, + XLogRecPtr prepare_lsn) +{ + LogicalDecodingContext *ctx = cache->private_data; + LogicalErrorCallbackState state; + ErrorContextCallback errcallback; + + Assert(!ctx->fast_forward); + + /* + * We're only supposed to call this when streaming and two-phase commits + * are supported. + */ + Assert(ctx->streaming); + Assert(ctx->twophase); + + /* Push callback + info on the error context stack */ + state.ctx = ctx; + state.callback_name = "stream_prepare"; + state.report_location = txn->final_lsn; + errcallback.callback = output_plugin_error_callback; + errcallback.arg = (void *) &state; + errcallback.previous = error_context_stack; + error_context_stack = &errcallback; + + /* set output state */ + ctx->accept_writes = true; + ctx->write_xid = txn->xid; + ctx->write_location = txn->end_lsn; + + /* in streaming mode with two-phase commits, stream_prepare_cb is required */ + if (ctx->callbacks.stream_prepare_cb == NULL) + ereport(ERROR, + (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), + errmsg("logical streaming at prepare time requires a stream_prepare_cb callback"))); + + ctx->callbacks.stream_prepare_cb(ctx, txn, prepare_lsn); + + /* Pop the error context stack */ + error_context_stack = errcallback.previous; +} + static void stream_commit_cb_wrapper(ReorderBuffer *cache, ReorderBufferTXN *txn, XLogRecPtr commit_lsn) @@ -1083,7 +1371,7 @@ stream_commit_cb_wrapper(ReorderBuffer *cache, ReorderBufferTXN *txn, ctx->write_xid = txn->xid; ctx->write_location = txn->end_lsn; - /* in streaming mode, stream_abort_cb is required */ + /* in streaming mode, stream_commit_cb is required */ if (ctx->callbacks.stream_commit_cb == NULL) ereport(ERROR, (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), diff --git a/src/backend/replication/logical/logicalfuncs.c b/src/backend/replication/logical/logicalfuncs.c index b99c94e84891a..f7e055874e305 100644 --- a/src/backend/replication/logical/logicalfuncs.c +++ b/src/backend/replication/logical/logicalfuncs.c @@ -6,7 +6,7 @@ * logical replication slots via SQL. * * - * Copyright (c) 2012-2020, PostgreSQL Global Development Group + * Copyright (c) 2012-2021, PostgreSQL Global Development Group * * IDENTIFICATION * src/backend/replication/logicalfuncs.c diff --git a/src/backend/replication/logical/message.c b/src/backend/replication/logical/message.c index dfccccf270124..93bd372421a64 100644 --- a/src/backend/replication/logical/message.c +++ b/src/backend/replication/logical/message.c @@ -3,7 +3,7 @@ * message.c * Generic logical messages. * - * Copyright (c) 2013-2020, PostgreSQL Global Development Group + * Copyright (c) 2013-2021, PostgreSQL Global Development Group * * IDENTIFICATION * src/backend/replication/logical/message.c diff --git a/src/backend/replication/logical/origin.c b/src/backend/replication/logical/origin.c index 15ab8e7204b6c..9bd761a426223 100644 --- a/src/backend/replication/logical/origin.c +++ b/src/backend/replication/logical/origin.c @@ -3,7 +3,7 @@ * origin.c * Logical replication progress tracking support. * - * Copyright (c) 2013-2020, PostgreSQL Global Development Group + * Copyright (c) 2013-2021, PostgreSQL Global Development Group * * IDENTIFICATION * src/backend/replication/logical/origin.c @@ -559,8 +559,8 @@ CheckPointReplicationOrigin(void) tmppath))); /* - * no other backend can perform this at the same time, we're protected by - * CheckpointLock. + * no other backend can perform this at the same time; only one + * checkpoint can happen at a time. */ tmpfd = OpenTransientFile(tmppath, O_CREAT | O_EXCL | O_WRONLY | PG_BINARY); @@ -843,7 +843,7 @@ replorigin_redo(XLogReaderState *record) * that originated at the LSN remote_commit on the remote node was replayed * successfully and that we don't need to do so again. In combination with * setting up replorigin_session_origin_lsn and replorigin_session_origin - * that ensures we won't loose knowledge about that after a crash if the + * that ensures we won't lose knowledge about that after a crash if the * transaction had a persistent effect (think of asynchronous commits). * * local_commit needs to be a local LSN of the commit so that we can make sure diff --git a/src/backend/replication/logical/proto.c b/src/backend/replication/logical/proto.c index fdb31182d77f0..f2c85cabb5233 100644 --- a/src/backend/replication/logical/proto.c +++ b/src/backend/replication/logical/proto.c @@ -3,7 +3,7 @@ * proto.c * logical replication protocol functions * - * Copyright (c) 2015-2020, PostgreSQL Global Development Group + * Copyright (c) 2015-2021, PostgreSQL Global Development Group * * IDENTIFICATION * src/backend/replication/logical/proto.c @@ -493,7 +493,6 @@ logicalrep_write_tuple(StringInfo out, Relation rel, HeapTuple tuple, bool binar HeapTuple typtup; Form_pg_type typclass; Form_pg_attribute att = TupleDescAttr(desc, i); - char *outputstr; if (att->attisdropped || att->attgenerated) continue; @@ -537,6 +536,8 @@ logicalrep_write_tuple(StringInfo out, Relation rel, HeapTuple tuple, bool binar } else { + char *outputstr; + pq_sendbyte(out, LOGICALREP_COLUMN_TEXT); outputstr = OidOutputFunctionCall(typclass->typoutput, values[i]); pq_sendcountedtext(out, outputstr, strlen(outputstr), false); diff --git a/src/backend/replication/logical/relation.c b/src/backend/replication/logical/relation.c index f4dbbbe2dde4f..e861c0ff8029d 100644 --- a/src/backend/replication/logical/relation.c +++ b/src/backend/replication/logical/relation.c @@ -2,7 +2,7 @@ * relation.c * PostgreSQL logical replication relation mapping cache * - * Copyright (c) 2016-2020, PostgreSQL Global Development Group + * Copyright (c) 2016-2021, PostgreSQL Global Development Group * * IDENTIFICATION * src/backend/replication/logical/relation.c diff --git a/src/backend/replication/logical/reorderbuffer.c b/src/backend/replication/logical/reorderbuffer.c index 6b0a59efaf575..5a62ab8bbc1ac 100644 --- a/src/backend/replication/logical/reorderbuffer.c +++ b/src/backend/replication/logical/reorderbuffer.c @@ -4,7 +4,7 @@ * PostgreSQL logical replay/reorder buffer management * * - * Copyright (c) 2012-2020, PostgreSQL Global Development Group + * Copyright (c) 2012-2021, PostgreSQL Global Development Group * * * IDENTIFICATION @@ -251,7 +251,8 @@ static Size ReorderBufferRestoreChanges(ReorderBuffer *rb, ReorderBufferTXN *txn static void ReorderBufferRestoreChange(ReorderBuffer *rb, ReorderBufferTXN *txn, char *change); static void ReorderBufferRestoreCleanup(ReorderBuffer *rb, ReorderBufferTXN *txn); -static void ReorderBufferTruncateTXN(ReorderBuffer *rb, ReorderBufferTXN *txn); +static void ReorderBufferTruncateTXN(ReorderBuffer *rb, ReorderBufferTXN *txn, + bool txn_prepared); static void ReorderBufferCleanupSerializedTXNs(const char *slotname); static void ReorderBufferSerializedPath(char *path, ReplicationSlot *slot, TransactionId xid, XLogSegNo segno); @@ -422,6 +423,12 @@ ReorderBufferReturnTXN(ReorderBuffer *rb, ReorderBufferTXN *txn) /* free data that's contained */ + if (txn->gid != NULL) + { + pfree(txn->gid); + txn->gid = NULL; + } + if (txn->tuplecid_hash != NULL) { hash_destroy(txn->tuplecid_hash); @@ -1516,12 +1523,18 @@ ReorderBufferCleanupTXN(ReorderBuffer *rb, ReorderBufferTXN *txn) } /* - * Discard changes from a transaction (and subtransactions), after streaming - * them. Keep the remaining info - transactions, tuplecids, invalidations and - * snapshots. + * Discard changes from a transaction (and subtransactions), either after + * streaming or decoding them at PREPARE. Keep the remaining info - + * transactions, tuplecids, invalidations and snapshots. + * + * We additionaly remove tuplecids after decoding the transaction at prepare + * time as we only need to perform invalidation at rollback or commit prepared. + * + * 'txn_prepared' indicates that we have decoded the transaction at prepare + * time. */ static void -ReorderBufferTruncateTXN(ReorderBuffer *rb, ReorderBufferTXN *txn) +ReorderBufferTruncateTXN(ReorderBuffer *rb, ReorderBufferTXN *txn, bool txn_prepared) { dlist_mutable_iter iter; @@ -1540,7 +1553,7 @@ ReorderBufferTruncateTXN(ReorderBuffer *rb, ReorderBufferTXN *txn) Assert(rbtxn_is_known_subxact(subtxn)); Assert(subtxn->nsubtxns == 0); - ReorderBufferTruncateTXN(rb, subtxn); + ReorderBufferTruncateTXN(rb, subtxn, txn_prepared); } /* cleanup changes in the txn */ @@ -1574,9 +1587,33 @@ ReorderBufferTruncateTXN(ReorderBuffer *rb, ReorderBufferTXN *txn) * about the toplevel xact (we send the XID in all messages), but we never * stream XIDs of empty subxacts. */ - if ((!txn->toptxn) || (txn->nentries_mem != 0)) + if ((!txn_prepared) && ((!txn->toptxn) || (txn->nentries_mem != 0))) txn->txn_flags |= RBTXN_IS_STREAMED; + if (txn_prepared) + { + /* + * If this is a prepared txn, cleanup the tuplecids we stored for + * decoding catalog snapshot access. They are always stored in the + * toplevel transaction. + */ + dlist_foreach_modify(iter, &txn->tuplecids) + { + ReorderBufferChange *change; + + change = dlist_container(ReorderBufferChange, node, iter.cur); + + /* Check we're not mixing changes from different transactions. */ + Assert(change->txn == txn); + Assert(change->action == REORDER_BUFFER_CHANGE_INTERNAL_TUPLECID); + + /* Remove the change from its containing list. */ + dlist_delete(&change->node); + + ReorderBufferReturnChange(rb, change, true); + } + } + /* * Destroy the (relfilenode, ctid) hashtable, so that we don't leak any * memory. We could also keep the hash table and update it with new ctid @@ -1756,9 +1793,10 @@ ReorderBufferFreeSnap(ReorderBuffer *rb, Snapshot snap) } /* - * If the transaction was (partially) streamed, we need to commit it in a - * 'streamed' way. That is, we first stream the remaining part of the - * transaction, and then invoke stream_commit message. + * If the transaction was (partially) streamed, we need to prepare or commit + * it in a 'streamed' way. That is, we first stream the remaining part of the + * transaction, and then invoke stream_prepare or stream_commit message as per + * the case. */ static void ReorderBufferStreamCommit(ReorderBuffer *rb, ReorderBufferTXN *txn) @@ -1768,29 +1806,49 @@ ReorderBufferStreamCommit(ReorderBuffer *rb, ReorderBufferTXN *txn) ReorderBufferStreamTXN(rb, txn); - rb->stream_commit(rb, txn, txn->final_lsn); + if (rbtxn_prepared(txn)) + { + /* + * Note, we send stream prepare even if a concurrent abort is + * detected. See DecodePrepare for more information. + */ + rb->stream_prepare(rb, txn, txn->final_lsn); - ReorderBufferCleanupTXN(rb, txn); + /* + * This is a PREPARED transaction, part of a two-phase commit. The + * full cleanup will happen as part of the COMMIT PREPAREDs, so now + * just truncate txn by removing changes and tuple_cids. + */ + ReorderBufferTruncateTXN(rb, txn, true); + /* Reset the CheckXidAlive */ + CheckXidAlive = InvalidTransactionId; + } + else + { + rb->stream_commit(rb, txn, txn->final_lsn); + ReorderBufferCleanupTXN(rb, txn); + } } /* * Set xid to detect concurrent aborts. * - * While streaming an in-progress transaction there is a possibility that the - * (sub)transaction might get aborted concurrently. In such case if the - * (sub)transaction has catalog update then we might decode the tuple using - * wrong catalog version. For example, suppose there is one catalog tuple with - * (xmin: 500, xmax: 0). Now, the transaction 501 updates the catalog tuple - * and after that we will have two tuples (xmin: 500, xmax: 501) and - * (xmin: 501, xmax: 0). Now, if 501 is aborted and some other transaction - * say 502 updates the same catalog tuple then the first tuple will be changed - * to (xmin: 500, xmax: 502). So, the problem is that when we try to decode - * the tuple inserted/updated in 501 after the catalog update, we will see the - * catalog tuple with (xmin: 500, xmax: 502) as visible because it will - * consider that the tuple is deleted by xid 502 which is not visible to our - * snapshot. And when we will try to decode with that catalog tuple, it can - * lead to a wrong result or a crash. So, it is necessary to detect - * concurrent aborts to allow streaming of in-progress transactions. + * While streaming an in-progress transaction or decoding a prepared + * transaction there is a possibility that the (sub)transaction might get + * aborted concurrently. In such case if the (sub)transaction has catalog + * update then we might decode the tuple using wrong catalog version. For + * example, suppose there is one catalog tuple with (xmin: 500, xmax: 0). Now, + * the transaction 501 updates the catalog tuple and after that we will have + * two tuples (xmin: 500, xmax: 501) and (xmin: 501, xmax: 0). Now, if 501 is + * aborted and some other transaction say 502 updates the same catalog tuple + * then the first tuple will be changed to (xmin: 500, xmax: 502). So, the + * problem is that when we try to decode the tuple inserted/updated in 501 + * after the catalog update, we will see the catalog tuple with (xmin: 500, + * xmax: 502) as visible because it will consider that the tuple is deleted by + * xid 502 which is not visible to our snapshot. And when we will try to + * decode with that catalog tuple, it can lead to a wrong result or a crash. + * So, it is necessary to detect concurrent aborts to allow streaming of + * in-progress transactions or decoding of prepared transactions. * * For detecting the concurrent abort we set CheckXidAlive to the current * (sub)transaction's xid for which this change belongs to. And, during @@ -1799,7 +1857,10 @@ ReorderBufferStreamCommit(ReorderBuffer *rb, ReorderBufferTXN *txn) * and discard the already streamed changes on such an error. We might have * already streamed some of the changes for the aborted (sub)transaction, but * that is fine because when we decode the abort we will stream abort message - * to truncate the changes in the subscriber. + * to truncate the changes in the subscriber. Similarly, for prepared + * transactions, we stop decoding if concurrent abort is detected and then + * rollback the changes when rollback prepared is encountered. See + * DecodePreare. */ static inline void SetupCheckXidLive(TransactionId xid) @@ -1901,7 +1962,7 @@ ReorderBufferResetTXN(ReorderBuffer *rb, ReorderBufferTXN *txn, ReorderBufferChange *specinsert) { /* Discard the changes that we just streamed */ - ReorderBufferTruncateTXN(rb, txn); + ReorderBufferTruncateTXN(rb, txn, rbtxn_prepared(txn)); /* Free all resources allocated for toast reconstruction */ ReorderBufferToastReset(rb, txn); @@ -1913,15 +1974,19 @@ ReorderBufferResetTXN(ReorderBuffer *rb, ReorderBufferTXN *txn, specinsert = NULL; } - /* Stop the stream. */ - rb->stream_stop(rb, txn, last_lsn); - - /* Remember the command ID and snapshot for the streaming run */ - ReorderBufferSaveTXNSnapshot(rb, txn, snapshot_now, command_id); + /* + * For the streaming case, stop the stream and remember the command ID and + * snapshot for the streaming run. + */ + if (rbtxn_is_streamed(txn)) + { + rb->stream_stop(rb, txn, last_lsn); + ReorderBufferSaveTXNSnapshot(rb, txn, snapshot_now, command_id); + } } /* - * Helper function for ReorderBufferCommit and ReorderBufferStreamTXN. + * Helper function for ReorderBufferReplay and ReorderBufferStreamTXN. * * Send data of a transaction (and its subtransactions) to the * output plugin. We iterate over the top and subtransactions (using a k-way @@ -1974,9 +2039,17 @@ ReorderBufferProcessTXN(ReorderBuffer *rb, ReorderBufferTXN *txn, else StartTransactionCommand(); - /* We only need to send begin/commit for non-streamed transactions. */ + /* + * We only need to send begin/begin-prepare for non-streamed + * transactions. + */ if (!streaming) - rb->begin(rb, txn); + { + if (rbtxn_prepared(txn)) + rb->begin_prepare(rb, txn); + else + rb->begin(rb, txn); + } ReorderBufferIterTXNInit(rb, txn, &iterstate); while ((change = ReorderBufferIterTXNNext(rb, iterstate)) != NULL) @@ -2007,8 +2080,12 @@ ReorderBufferProcessTXN(ReorderBuffer *rb, ReorderBufferTXN *txn, prev_lsn = change->lsn; - /* Set the current xid to detect concurrent aborts. */ - if (streaming) + /* + * Set the current xid to detect concurrent aborts. This is + * required for the cases when we decode the changes before the + * COMMIT record is processed. + */ + if (streaming || rbtxn_prepared(change->txn)) { curtxn = change->txn; SetupCheckXidLive(curtxn->xid); @@ -2042,13 +2119,13 @@ ReorderBufferProcessTXN(ReorderBuffer *rb, ReorderBufferTXN *txn, * Mapped catalog tuple without data, emitted while * catalog table was in the process of being rewritten. We * can fail to look up the relfilenode, because the - * relmapper has no "historic" view, in contrast to normal - * the normal catalog during decoding. Thus repeated - * rewrites can cause a lookup failure. That's OK because - * we do not decode catalog changes anyway. Normally such - * tuples would be skipped over below, but we can't - * identify whether the table should be logically logged - * without mapping the relfilenode to the oid. + * relmapper has no "historic" view, in contrast to the + * normal catalog during decoding. Thus repeated rewrites + * can cause a lookup failure. That's OK because we do not + * decode catalog changes anyway. Normally such tuples + * would be skipped over below, but we can't identify + * whether the table should be logically logged without + * mapping the relfilenode to the oid. */ if (reloid == InvalidOid && change->data.tp.newtuple == NULL && @@ -2299,7 +2376,16 @@ ReorderBufferProcessTXN(ReorderBuffer *rb, ReorderBufferTXN *txn, } } else - rb->commit(rb, txn, commit_lsn); + { + /* + * Call either PREPARE (for two-phase transactions) or COMMIT (for + * regular ones). + */ + if (rbtxn_prepared(txn)) + rb->prepare(rb, txn, commit_lsn); + else + rb->commit(rb, txn, commit_lsn); + } /* this is just a sanity check against bad output plugin behaviour */ if (GetCurrentTransactionIdIfAny() != InvalidTransactionId) @@ -2333,15 +2419,22 @@ ReorderBufferProcessTXN(ReorderBuffer *rb, ReorderBufferTXN *txn, RollbackAndReleaseCurrentSubTransaction(); /* - * If we are streaming the in-progress transaction then discard the - * changes that we just streamed, and mark the transactions as - * streamed (if they contained changes). Otherwise, remove all the - * changes and deallocate the ReorderBufferTXN. + * We are here due to one of the four reasons: 1. Decoding an + * in-progress txn. 2. Decoding a prepared txn. 3. Decoding of a + * prepared txn that was (partially) streamed. 4. Decoding a committed + * txn. + * + * For 1, we allow truncation of txn data by removing the changes + * already streamed but still keeping other things like invalidations, + * snapshot, and tuplecids. For 2 and 3, we indicate + * ReorderBufferTruncateTXN to do more elaborate truncation of txn + * data as the entire transaction has been decoded except for commit. + * For 4, as the entire txn has been decoded, we can fully clean up + * the TXN reorder buffer. */ - if (streaming) + if (streaming || rbtxn_prepared(txn)) { - ReorderBufferTruncateTXN(rb, txn); - + ReorderBufferTruncateTXN(rb, txn, rbtxn_prepared(txn)); /* Reset the CheckXidAlive */ CheckXidAlive = InvalidTransactionId; } @@ -2374,17 +2467,20 @@ ReorderBufferProcessTXN(ReorderBuffer *rb, ReorderBufferTXN *txn, /* * The error code ERRCODE_TRANSACTION_ROLLBACK indicates a concurrent - * abort of the (sub)transaction we are streaming. We need to do the - * cleanup and return gracefully on this error, see SetupCheckXidLive. + * abort of the (sub)transaction we are streaming or preparing. We + * need to do the cleanup and return gracefully on this error, see + * SetupCheckXidLive. */ if (errdata->sqlerrcode == ERRCODE_TRANSACTION_ROLLBACK) { /* - * This error can only occur when we are sending the data in - * streaming mode and the streaming is not finished yet. + * This error can occur either when we are sending the data in + * streaming mode and the streaming is not finished yet or when we + * are sending the data out on a PREPARE during a two-phase + * commit. */ - Assert(streaming); - Assert(stream_started); + Assert(streaming || rbtxn_prepared(txn)); + Assert(stream_started || rbtxn_prepared(txn)); /* Cleanup the temporary error state. */ FlushErrorState(); @@ -2414,26 +2510,19 @@ ReorderBufferProcessTXN(ReorderBuffer *rb, ReorderBufferTXN *txn, * ReorderBufferCommitChild(), even if previously assigned to the toplevel * transaction with ReorderBufferAssignChild. * - * This interface is called once a toplevel commit is read for both streamed - * as well as non-streamed transactions. + * This interface is called once a prepare or toplevel commit is read for both + * streamed as well as non-streamed transactions. */ -void -ReorderBufferCommit(ReorderBuffer *rb, TransactionId xid, +static void +ReorderBufferReplay(ReorderBufferTXN *txn, + ReorderBuffer *rb, TransactionId xid, XLogRecPtr commit_lsn, XLogRecPtr end_lsn, TimestampTz commit_time, RepOriginId origin_id, XLogRecPtr origin_lsn) { - ReorderBufferTXN *txn; Snapshot snapshot_now; CommandId command_id = FirstCommandId; - txn = ReorderBufferTXNByXid(rb, xid, false, NULL, InvalidXLogRecPtr, - false); - - /* unknown transaction, nothing to replay */ - if (txn == NULL) - return; - txn->final_lsn = commit_lsn; txn->end_lsn = end_lsn; txn->commit_time = commit_time; @@ -2463,7 +2552,13 @@ ReorderBufferCommit(ReorderBuffer *rb, TransactionId xid, if (txn->base_snapshot == NULL) { Assert(txn->ninvalidations == 0); - ReorderBufferCleanupTXN(rb, txn); + + /* + * Removing this txn before a commit might result in the computation + * of an incorrect restart_lsn. See SnapBuildProcessRunningXacts. + */ + if (!rbtxn_prepared(txn)) + ReorderBufferCleanupTXN(rb, txn); return; } @@ -2474,6 +2569,178 @@ ReorderBufferCommit(ReorderBuffer *rb, TransactionId xid, command_id, false); } +/* + * Commit a transaction. + * + * See comments for ReorderBufferReplay(). + */ +void +ReorderBufferCommit(ReorderBuffer *rb, TransactionId xid, + XLogRecPtr commit_lsn, XLogRecPtr end_lsn, + TimestampTz commit_time, + RepOriginId origin_id, XLogRecPtr origin_lsn) +{ + ReorderBufferTXN *txn; + + txn = ReorderBufferTXNByXid(rb, xid, false, NULL, InvalidXLogRecPtr, + false); + + /* unknown transaction, nothing to replay */ + if (txn == NULL) + return; + + ReorderBufferReplay(txn, rb, xid, commit_lsn, end_lsn, commit_time, + origin_id, origin_lsn); +} + +/* + * Record the prepare information for a transaction. + */ +bool +ReorderBufferRememberPrepareInfo(ReorderBuffer *rb, TransactionId xid, + XLogRecPtr prepare_lsn, XLogRecPtr end_lsn, + TimestampTz prepare_time, + RepOriginId origin_id, XLogRecPtr origin_lsn) +{ + ReorderBufferTXN *txn; + + txn = ReorderBufferTXNByXid(rb, xid, false, NULL, InvalidXLogRecPtr, false); + + /* unknown transaction, nothing to do */ + if (txn == NULL) + return false; + + /* + * Remember the prepare information to be later used by commit prepared in + * case we skip doing prepare. + */ + txn->final_lsn = prepare_lsn; + txn->end_lsn = end_lsn; + txn->commit_time = prepare_time; + txn->origin_id = origin_id; + txn->origin_lsn = origin_lsn; + + return true; +} + +/* Remember that we have skipped prepare */ +void +ReorderBufferSkipPrepare(ReorderBuffer *rb, TransactionId xid) +{ + ReorderBufferTXN *txn; + + txn = ReorderBufferTXNByXid(rb, xid, false, NULL, InvalidXLogRecPtr, false); + + /* unknown transaction, nothing to do */ + if (txn == NULL) + return; + + txn->txn_flags |= RBTXN_SKIPPED_PREPARE; +} + +/* + * Prepare a two-phase transaction. + * + * See comments for ReorderBufferReplay(). + */ +void +ReorderBufferPrepare(ReorderBuffer *rb, TransactionId xid, + char *gid) +{ + ReorderBufferTXN *txn; + + txn = ReorderBufferTXNByXid(rb, xid, false, NULL, InvalidXLogRecPtr, + false); + + /* unknown transaction, nothing to replay */ + if (txn == NULL) + return; + + txn->txn_flags |= RBTXN_PREPARE; + txn->gid = pstrdup(gid); + + /* The prepare info must have been updated in txn by now. */ + Assert(txn->final_lsn != InvalidXLogRecPtr); + + ReorderBufferReplay(txn, rb, xid, txn->final_lsn, txn->end_lsn, + txn->commit_time, txn->origin_id, txn->origin_lsn); +} + +/* + * This is used to handle COMMIT/ROLLBACK PREPARED. + */ +void +ReorderBufferFinishPrepared(ReorderBuffer *rb, TransactionId xid, + XLogRecPtr commit_lsn, XLogRecPtr end_lsn, + TimestampTz commit_time, RepOriginId origin_id, + XLogRecPtr origin_lsn, char *gid, bool is_commit) +{ + ReorderBufferTXN *txn; + XLogRecPtr prepare_end_lsn; + TimestampTz prepare_time; + + txn = ReorderBufferTXNByXid(rb, xid, true, NULL, commit_lsn, false); + + /* unknown transaction, nothing to do */ + if (txn == NULL) + return; + + /* + * By this time the txn has the prepare record information, remember it to + * be later used for rollback. + */ + prepare_end_lsn = txn->end_lsn; + prepare_time = txn->commit_time; + + /* add the gid in the txn */ + txn->gid = pstrdup(gid); + + /* + * It is possible that this transaction is not decoded at prepare time + * either because by that time we didn't have a consistent snapshot or it + * was decoded earlier but we have restarted. We can't distinguish between + * those two cases so we send the prepare in both the cases and let + * downstream decide whether to process or skip it. We don't need to + * decode the xact for aborts if it is not done already. + */ + if (!rbtxn_prepared(txn) && is_commit) + { + txn->txn_flags |= RBTXN_PREPARE; + + /* + * The prepare info must have been updated in txn even if we skip + * prepare. + */ + Assert(txn->final_lsn != InvalidXLogRecPtr); + + /* + * By this time the txn has the prepare record information and it is + * important to use that so that downstream gets the accurate + * information. If instead, we have passed commit information here + * then downstream can behave as it has already replayed commit + * prepared after the restart. + */ + ReorderBufferReplay(txn, rb, xid, txn->final_lsn, txn->end_lsn, + txn->commit_time, txn->origin_id, txn->origin_lsn); + } + + txn->final_lsn = commit_lsn; + txn->end_lsn = end_lsn; + txn->commit_time = commit_time; + txn->origin_id = origin_id; + txn->origin_lsn = origin_lsn; + + if (is_commit) + rb->commit_prepared(rb, txn, commit_lsn); + else + rb->rollback_prepared(rb, txn, prepare_end_lsn, prepare_time); + + /* cleanup: make sure there's no cache pollution */ + ReorderBufferExecuteInvalidations(txn->ninvalidations, + txn->invalidations); + ReorderBufferCleanupTXN(rb, txn); +} + /* * Abort a transaction that possibly has previous changes. Needs to be first * called for subtransactions and then for the toplevel xid. @@ -2605,6 +2872,39 @@ ReorderBufferForget(ReorderBuffer *rb, TransactionId xid, XLogRecPtr lsn) ReorderBufferCleanupTXN(rb, txn); } +/* + * Invalidate cache for those transactions that need to be skipped just in case + * catalogs were manipulated as part of the transaction. + * + * Note that this is a special-purpose function for prepared transactions where + * we don't want to clean up the TXN even when we decide to skip it. See + * DecodePrepare. + */ +void +ReorderBufferInvalidate(ReorderBuffer *rb, TransactionId xid, XLogRecPtr lsn) +{ + ReorderBufferTXN *txn; + + txn = ReorderBufferTXNByXid(rb, xid, false, NULL, InvalidXLogRecPtr, + false); + + /* unknown, nothing to do */ + if (txn == NULL) + return; + + /* + * Process cache invalidation messages if there are any. Even if we're not + * interested in the transaction's contents, it could have manipulated the + * catalog and we need to update the caches according to that. + */ + if (txn->base_snapshot != NULL && txn->ninvalidations > 0) + ReorderBufferImmediateInvalidation(rb, txn->ninvalidations, + txn->invalidations); + else + Assert(txn->ninvalidations == 0); +} + + /* * Execute invalidations happening outside the context of a decoded * transaction. That currently happens either for xid-less commits diff --git a/src/backend/replication/logical/snapbuild.c b/src/backend/replication/logical/snapbuild.c index 9d5d68f3fa785..e903e561afc42 100644 --- a/src/backend/replication/logical/snapbuild.c +++ b/src/backend/replication/logical/snapbuild.c @@ -107,7 +107,7 @@ * is a convenient point to initialize replication from, which is why we * export a snapshot at that point, which *can* be used to read normal data. * - * Copyright (c) 2012-2020, PostgreSQL Global Development Group + * Copyright (c) 2012-2021, PostgreSQL Global Development Group * * IDENTIFICATION * src/backend/replication/snapbuild.c @@ -834,6 +834,13 @@ SnapBuildDistributeNewCatalogSnapshot(SnapBuild *builder, XLogRecPtr lsn) if (!ReorderBufferXidHasBaseSnapshot(builder->reorder, txn->xid)) continue; + /* + * We don't need to add snapshot to prepared transactions as they + * should not see the new catalog contents. + */ + if (rbtxn_prepared(txn) || rbtxn_skip_prepared(txn)) + continue; + elog(DEBUG2, "adding a new snapshot to %u at %X/%X", txn->xid, (uint32) (lsn >> 32), (uint32) lsn); @@ -1378,7 +1385,7 @@ SnapBuildFindSnapshot(SnapBuild *builder, XLogRecPtr lsn, xl_running_xacts *runn * a) allow isolationtester to notice that we're currently waiting for * something. * b) log a new xl_running_xacts record where it'd be helpful, without having - * to write for bgwriter or checkpointer. + * to wait for bgwriter or checkpointer. * --- */ static void @@ -1407,7 +1414,7 @@ SnapBuildWaitSnapshot(xl_running_xacts *running, TransactionId cutoff) /* * All transactions we needed to finish finished - try to ensure there is * another xl_running_xacts record in a timely manner, without having to - * write for bgwriter or checkpointer to log one. During recovery we + * wait for bgwriter or checkpointer to log one. During recovery we * can't enforce that, so we'll have to wait. */ if (!RecoveryInProgress()) @@ -1481,7 +1488,7 @@ static void SnapBuildSerialize(SnapBuild *builder, XLogRecPtr lsn) { Size needed_length; - SnapBuildOnDisk *ondisk; + SnapBuildOnDisk *ondisk = NULL; char *ondisk_c; int fd; char tmppath[MAXPGPATH]; @@ -1680,6 +1687,9 @@ SnapBuildSerialize(SnapBuild *builder, XLogRecPtr lsn) out: ReorderBufferSetRestartPoint(builder->reorder, builder->last_serialized_snapshot); + /* be tidy */ + if (ondisk) + pfree(ondisk); } /* diff --git a/src/backend/replication/logical/tablesync.c b/src/backend/replication/logical/tablesync.c index 6259606537329..a18f847ade052 100644 --- a/src/backend/replication/logical/tablesync.c +++ b/src/backend/replication/logical/tablesync.c @@ -2,7 +2,7 @@ * tablesync.c * PostgreSQL logical replication: initial table data synchronization * - * Copyright (c) 2012-2020, PostgreSQL Global Development Group + * Copyright (c) 2012-2021, PostgreSQL Global Development Group * * IDENTIFICATION * src/backend/replication/logical/tablesync.c @@ -640,7 +640,7 @@ fetch_remote_table_info(char *nspname, char *relname, StringInfoData cmd; TupleTableSlot *slot; Oid tableRow[] = {OIDOID, CHAROID, CHAROID}; - Oid attrRow[] = {TEXTOID, OIDOID, INT4OID, BOOLOID}; + Oid attrRow[] = {TEXTOID, OIDOID, BOOLOID}; bool isnull; int natt; @@ -685,7 +685,6 @@ fetch_remote_table_info(char *nspname, char *relname, appendStringInfo(&cmd, "SELECT a.attname," " a.atttypid," - " a.atttypmod," " a.attnum = ANY(i.indkey)" " FROM pg_catalog.pg_attribute a" " LEFT JOIN pg_catalog.pg_index i" @@ -718,7 +717,7 @@ fetch_remote_table_info(char *nspname, char *relname, Assert(!isnull); lrel->atttyps[natt] = DatumGetObjectId(slot_getattr(slot, 2, &isnull)); Assert(!isnull); - if (DatumGetBool(slot_getattr(slot, 4, &isnull))) + if (DatumGetBool(slot_getattr(slot, 3, &isnull))) lrel->attkeys = bms_add_member(lrel->attkeys, natt); /* Should never happen. */ diff --git a/src/backend/replication/logical/worker.c b/src/backend/replication/logical/worker.c index 3874939380079..eb7db89cef7d5 100644 --- a/src/backend/replication/logical/worker.c +++ b/src/backend/replication/logical/worker.c @@ -2,7 +2,7 @@ * worker.c * PostgreSQL logical replication worker (apply) * - * Copyright (c) 2016-2020, PostgreSQL Global Development Group + * Copyright (c) 2016-2021, PostgreSQL Global Development Group * * IDENTIFICATION * src/backend/replication/logical/worker.c @@ -225,7 +225,7 @@ static void maybe_reread_subscription(void); static void apply_dispatch(StringInfo s); static void apply_handle_commit_internal(StringInfo s, - LogicalRepCommitData* commit_data); + LogicalRepCommitData *commit_data); static void apply_handle_insert_internal(ResultRelInfo *relinfo, EState *estate, TupleTableSlot *remoteslot); static void apply_handle_update_internal(ResultRelInfo *relinfo, @@ -752,10 +752,10 @@ apply_handle_stream_start(StringInfo s) /* * Start a transaction on stream start, this transaction will be committed - * on the stream stop unless it is a tablesync worker in which case it will - * be committed after processing all the messages. We need the transaction - * for handling the buffile, used for serializing the streaming data and - * subxact info. + * on the stream stop unless it is a tablesync worker in which case it + * will be committed after processing all the messages. We need the + * transaction for handling the buffile, used for serializing the + * streaming data and subxact info. */ ensure_transaction(); @@ -1060,7 +1060,7 @@ apply_handle_stream_commit(StringInfo s) * Helper function for apply_handle_commit and apply_handle_stream_commit. */ static void -apply_handle_commit_internal(StringInfo s, LogicalRepCommitData* commit_data) +apply_handle_commit_internal(StringInfo s, LogicalRepCommitData *commit_data) { /* The synchronization worker runs in single transaction. */ if (IsTransactionState() && !am_tablesync_worker()) @@ -1309,7 +1309,8 @@ apply_handle_update(StringInfo s) InitResultRelInfo(resultRelInfo, rel->localrel, 1, NULL, 0); /* - * Populate updatedCols so that per-column triggers can fire. This could + * Populate updatedCols so that per-column triggers can fire, and so + * executor can correctly pass down indexUnchanged hint. This could * include more columns than were actually changed on the publisher * because the logical replication protocol doesn't contain that * information. But it would for example exclude columns that only exist diff --git a/src/backend/replication/pgoutput/pgoutput.c b/src/backend/replication/pgoutput/pgoutput.c index 49d25b02d744b..79765f96969df 100644 --- a/src/backend/replication/pgoutput/pgoutput.c +++ b/src/backend/replication/pgoutput/pgoutput.c @@ -3,7 +3,7 @@ * pgoutput.c * Logical Replication output plugin * - * Copyright (c) 2012-2020, PostgreSQL Global Development Group + * Copyright (c) 2012-2021, PostgreSQL Global Development Group * * IDENTIFICATION * src/backend/replication/pgoutput/pgoutput.c @@ -502,6 +502,7 @@ pgoutput_change(LogicalDecodingContext *ctx, ReorderBufferTXN *txn, MemoryContext old; RelationSyncEntry *relentry; TransactionId xid = InvalidTransactionId; + Relation ancestor = NULL; if (!is_publishable_relation(relation)) return; @@ -552,7 +553,8 @@ pgoutput_change(LogicalDecodingContext *ctx, ReorderBufferTXN *txn, if (relentry->publish_as_relid != RelationGetRelid(relation)) { Assert(relation->rd_rel->relispartition); - relation = RelationIdGetRelation(relentry->publish_as_relid); + ancestor = RelationIdGetRelation(relentry->publish_as_relid); + relation = ancestor; /* Convert tuple if needed. */ if (relentry->map) tuple = execute_attr_map_tuple(tuple, relentry->map); @@ -574,7 +576,8 @@ pgoutput_change(LogicalDecodingContext *ctx, ReorderBufferTXN *txn, if (relentry->publish_as_relid != RelationGetRelid(relation)) { Assert(relation->rd_rel->relispartition); - relation = RelationIdGetRelation(relentry->publish_as_relid); + ancestor = RelationIdGetRelation(relentry->publish_as_relid); + relation = ancestor; /* Convert tuples if needed. */ if (relentry->map) { @@ -598,7 +601,8 @@ pgoutput_change(LogicalDecodingContext *ctx, ReorderBufferTXN *txn, if (relentry->publish_as_relid != RelationGetRelid(relation)) { Assert(relation->rd_rel->relispartition); - relation = RelationIdGetRelation(relentry->publish_as_relid); + ancestor = RelationIdGetRelation(relentry->publish_as_relid); + relation = ancestor; /* Convert tuple if needed. */ if (relentry->map) oldtuple = execute_attr_map_tuple(oldtuple, relentry->map); @@ -616,6 +620,12 @@ pgoutput_change(LogicalDecodingContext *ctx, ReorderBufferTXN *txn, Assert(false); } + if (RelationIsValid(ancestor)) + { + RelationClose(ancestor); + ancestor = NULL; + } + /* Cleanup */ MemoryContextSwitchTo(old); MemoryContextReset(data->context); @@ -1169,5 +1179,16 @@ rel_sync_cache_publication_cb(Datum arg, int cacheid, uint32 hashvalue) */ hash_seq_init(&status, RelationSyncCache); while ((entry = (RelationSyncEntry *) hash_seq_search(&status)) != NULL) + { entry->replicate_valid = false; + + /* + * There might be some relations dropped from the publication so we + * don't need to publish the changes for them. + */ + entry->pubactions.pubinsert = false; + entry->pubactions.pubupdate = false; + entry->pubactions.pubdelete = false; + entry->pubactions.pubtruncate = false; + } } diff --git a/src/backend/replication/repl_gram.y b/src/backend/replication/repl_gram.y index f93a0de2187fb..eb283a86327c8 100644 --- a/src/backend/replication/repl_gram.y +++ b/src/backend/replication/repl_gram.y @@ -3,7 +3,7 @@ * * repl_gram.y - Parser for the replication commands * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * diff --git a/src/backend/replication/repl_scanner.l b/src/backend/replication/repl_scanner.l index 452ad9fc278e8..dcc3c3fc515cb 100644 --- a/src/backend/replication/repl_scanner.l +++ b/src/backend/replication/repl_scanner.l @@ -4,7 +4,7 @@ * repl_scanner.l * a lexical scanner for the replication commands * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * diff --git a/src/backend/replication/slot.c b/src/backend/replication/slot.c index 9c7cf13d4d9c2..e00c7ffc013b0 100644 --- a/src/backend/replication/slot.c +++ b/src/backend/replication/slot.c @@ -4,7 +4,7 @@ * Replication slot management. * * - * Copyright (c) 2012-2020, PostgreSQL Global Development Group + * Copyright (c) 2012-2021, PostgreSQL Global Development Group * * * IDENTIFICATION diff --git a/src/backend/replication/slotfuncs.c b/src/backend/replication/slotfuncs.c index 1725ad0736fd5..057f41046dd04 100644 --- a/src/backend/replication/slotfuncs.c +++ b/src/backend/replication/slotfuncs.c @@ -3,7 +3,7 @@ * slotfuncs.c * Support functions for replication slots * - * Copyright (c) 2012-2020, PostgreSQL Global Development Group + * Copyright (c) 2012-2021, PostgreSQL Global Development Group * * IDENTIFICATION * src/backend/replication/slotfuncs.c diff --git a/src/backend/replication/syncrep.c b/src/backend/replication/syncrep.c index 6e8c76537af0a..1c1bf1605285b 100644 --- a/src/backend/replication/syncrep.c +++ b/src/backend/replication/syncrep.c @@ -63,7 +63,7 @@ * the standbys which are considered as synchronous at that moment * will release waiters from the queue. * - * Portions Copyright (c) 2010-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 2010-2021, PostgreSQL Global Development Group * * IDENTIFICATION * src/backend/replication/syncrep.c diff --git a/src/backend/replication/syncrep_gram.y b/src/backend/replication/syncrep_gram.y index 350195eff6451..88d95f2228625 100644 --- a/src/backend/replication/syncrep_gram.y +++ b/src/backend/replication/syncrep_gram.y @@ -3,7 +3,7 @@ * * syncrep_gram.y - Parser for synchronous_standby_names * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * diff --git a/src/backend/replication/syncrep_scanner.l b/src/backend/replication/syncrep_scanner.l index 6883f60e18ce9..0491590d060b0 100644 --- a/src/backend/replication/syncrep_scanner.l +++ b/src/backend/replication/syncrep_scanner.l @@ -4,7 +4,7 @@ * syncrep_scanner.l * a lexical scanner for synchronous_standby_names * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * diff --git a/src/backend/replication/walreceiver.c b/src/backend/replication/walreceiver.c index 9621c8d0efe2f..723f513d8bcb2 100644 --- a/src/backend/replication/walreceiver.c +++ b/src/backend/replication/walreceiver.c @@ -39,7 +39,7 @@ * specific parts are in the libpqwalreceiver module. It's loaded * dynamically to avoid linking the server with libpq. * - * Portions Copyright (c) 2010-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 2010-2021, PostgreSQL Global Development Group * * * IDENTIFICATION diff --git a/src/backend/replication/walreceiverfuncs.c b/src/backend/replication/walreceiverfuncs.c index c3e317df9ffc9..69b91a7dab8f5 100644 --- a/src/backend/replication/walreceiverfuncs.c +++ b/src/backend/replication/walreceiverfuncs.c @@ -6,7 +6,7 @@ * with the walreceiver process. Functions implementing walreceiver itself * are in walreceiver.c. * - * Portions Copyright (c) 2010-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 2010-2021, PostgreSQL Global Development Group * * * IDENTIFICATION diff --git a/src/backend/replication/walsender.c b/src/backend/replication/walsender.c index d5c9bc31d8e79..8545c6c423170 100644 --- a/src/backend/replication/walsender.c +++ b/src/backend/replication/walsender.c @@ -37,7 +37,7 @@ * record, wait for it to be replicated to the standby, and then exit. * * - * Portions Copyright (c) 2010-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 2010-2021, PostgreSQL Global Development Group * * IDENTIFICATION * src/backend/replication/walsender.c @@ -2491,7 +2491,7 @@ WalSndSegmentOpen(XLogReaderState *state, XLogSegNo nextSegNo, XLogSegNo endSegNo; XLByteToSeg(sendTimeLineValidUpto, endSegNo, state->segcxt.ws_segsize); - if (state->seg.ws_segno == endSegNo) + if (nextSegNo == endSegNo) *tli_p = sendTimeLineNextTLI; } diff --git a/src/backend/rewrite/Makefile b/src/backend/rewrite/Makefile index b435b3e985c00..4680752e6a7f8 100644 --- a/src/backend/rewrite/Makefile +++ b/src/backend/rewrite/Makefile @@ -17,6 +17,7 @@ OBJS = \ rewriteHandler.o \ rewriteManip.o \ rewriteRemove.o \ + rewriteSearchCycle.o \ rewriteSupport.o \ rowsecurity.o diff --git a/src/backend/rewrite/rewriteDefine.c b/src/backend/rewrite/rewriteDefine.c index e7855fa4512c5..ee4ccbbd3823c 100644 --- a/src/backend/rewrite/rewriteDefine.c +++ b/src/backend/rewrite/rewriteDefine.c @@ -3,7 +3,7 @@ * rewriteDefine.c * routines for defining a rewrite rule * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * diff --git a/src/backend/rewrite/rewriteHandler.c b/src/backend/rewrite/rewriteHandler.c index c25012f325b4c..0672f497c6b35 100644 --- a/src/backend/rewrite/rewriteHandler.c +++ b/src/backend/rewrite/rewriteHandler.c @@ -3,7 +3,7 @@ * rewriteHandler.c * Primary module of query rewriter. * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION @@ -38,6 +38,7 @@ #include "rewrite/rewriteDefine.h" #include "rewrite/rewriteHandler.h" #include "rewrite/rewriteManip.h" +#include "rewrite/rewriteSearchCycle.h" #include "rewrite/rowsecurity.h" #include "utils/builtins.h" #include "utils/lsyscache.h" @@ -2079,6 +2080,23 @@ fireRIRrules(Query *parsetree, List *activeRIRs) int rt_index; ListCell *lc; + /* + * Expand SEARCH and CYCLE clauses in CTEs. + * + * This is just a convenient place to do this, since we are already + * looking at each Query. + */ + foreach(lc, parsetree->cteList) + { + CommonTableExpr *cte = lfirst_node(CommonTableExpr, lc); + + if (cte->search_clause || cte->cycle_clause) + { + cte = rewriteSearchAndCycle(cte); + lfirst(lc) = cte; + } + } + /* * don't try to convert this into a foreach loop, because rtable list can * get changed each time through... diff --git a/src/backend/rewrite/rewriteManip.c b/src/backend/rewrite/rewriteManip.c index a727f41bde34a..d4e0b8b4defe7 100644 --- a/src/backend/rewrite/rewriteManip.c +++ b/src/backend/rewrite/rewriteManip.c @@ -2,7 +2,7 @@ * * rewriteManip.c * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * diff --git a/src/backend/rewrite/rewriteRemove.c b/src/backend/rewrite/rewriteRemove.c index a24303fd00c67..a48b15e249da7 100644 --- a/src/backend/rewrite/rewriteRemove.c +++ b/src/backend/rewrite/rewriteRemove.c @@ -3,7 +3,7 @@ * rewriteRemove.c * routines for removing rewrite rules * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * diff --git a/src/backend/rewrite/rewriteSearchCycle.c b/src/backend/rewrite/rewriteSearchCycle.c new file mode 100644 index 0000000000000..1a7d66fa6f991 --- /dev/null +++ b/src/backend/rewrite/rewriteSearchCycle.c @@ -0,0 +1,668 @@ +/*------------------------------------------------------------------------- + * + * rewriteSearchCycle.c + * Support for rewriting SEARCH and CYCLE clauses. + * + * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * IDENTIFICATION + * src/backend/rewrite/rewriteSearchCycle.c + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include "catalog/pg_operator_d.h" +#include "catalog/pg_type_d.h" +#include "nodes/makefuncs.h" +#include "nodes/pg_list.h" +#include "nodes/parsenodes.h" +#include "nodes/primnodes.h" +#include "parser/analyze.h" +#include "parser/parsetree.h" +#include "rewrite/rewriteManip.h" +#include "rewrite/rewriteSearchCycle.h" +#include "utils/fmgroids.h" + + +/*---------- + * Rewrite a CTE with SEARCH or CYCLE clause + * + * Consider a CTE like + * + * WITH RECURSIVE ctename (col1, col2, col3) AS ( + * query1 + * UNION [ALL] + * SELECT trosl FROM ctename + * ) + * + * With a search clause + * + * SEARCH BREADTH FIRST BY col1, col2 SET sqc + * + * the CTE is rewritten to + * + * WITH RECURSIVE ctename (col1, col2, col3, sqc) AS ( + * SELECT col1, col2, col3, -- original WITH column list + * ROW(0, col1, col2) -- initial row of search columns + * FROM (query1) "*TLOCRN*" (col1, col2, col3) + * UNION [ALL] + * SELECT col1, col2, col3, -- same as above + * ROW(sqc.depth + 1, col1, col2) -- count depth + * FROM (SELECT trosl, ctename.sqc FROM ctename) "*TROCRN*" (col1, col2, col3, sqc) + * ) + * + * (This isn't quite legal SQL: sqc.depth is meant to refer to the first + * column of sqc, which has a row type, but the field names are not defined + * here. Representing this properly in SQL would be more complicated (and the + * SQL standard actually does it in that more complicated way), but the + * internal representation allows us to construct it this way.) + * + * With a search caluse + * + * SEARCH DEPTH FIRST BY col1, col2 SET sqc + * + * the CTE is rewritten to + * + * WITH RECURSIVE ctename (col1, col2, col3, sqc) AS ( + * SELECT col1, col2, col3, -- original WITH column list + * ARRAY[ROW(col1, col2)] -- initial row of search columns + * FROM (query1) "*TLOCRN*" (col1, col2, col3) + * UNION [ALL] + * SELECT col1, col2, col3, -- same as above + * sqc || ARRAY[ROW(col1, col2)] -- record rows seen + * FROM (SELECT trosl, ctename.sqc FROM ctename) "*TROCRN*" (col1, col2, col3, sqc) + * ) + * + * With a cycle clause + * + * CYCLE col1, col2 SET cmc TO 'Y' DEFAULT 'N' USING cpa + * + * (cmc = cycle mark column, cpa = cycle path) the CTE is rewritten to + * + * WITH RECURSIVE ctename (col1, col2, col3, cmc, cpa) AS ( + * SELECT col1, col2, col3, -- original WITH column list + * 'N', -- cycle mark default + * ARRAY[ROW(col1, col2)] -- initial row of cycle columns + * FROM (query1) "*TLOCRN*" (col1, col2, col3) + * UNION [ALL] + * SELECT col1, col2, col3, -- same as above + * CASE WHEN ROW(col1, col2) = ANY (ARRAY[cpa]) THEN 'Y' ELSE 'N' END, -- compute cycle mark column + * cpa || ARRAY[ROW(col1, col2)] -- record rows seen + * FROM (SELECT trosl, ctename.cmc, ctename.cpa FROM ctename) "*TROCRN*" (col1, col2, col3, cmc, cpa) + * WHERE cmc <> 'Y' + * ) + * + * The expression to compute the cycle mark column in the right-hand query is + * written as + * + * CASE WHEN ROW(col1, col2) IN (SELECT p.* FROM TABLE(cpa) p) THEN cmv ELSE cmd END + * + * in the SQL standard, but in PostgreSQL we can use the scalar-array operator + * expression shown above. + * + * Also, in some of the cases where operators are shown above we actually + * directly produce the underlying function call. + * + * If both a search clause and a cycle clause is specified, then the search + * clause column is added before the cycle clause columns. + */ + +/* + * Make a RowExpr from the specified column names, which have to be among the + * output columns of the CTE. + */ +static RowExpr * +make_path_rowexpr(const CommonTableExpr *cte, const List *col_list) +{ + RowExpr *rowexpr; + ListCell *lc; + + rowexpr = makeNode(RowExpr); + rowexpr->row_typeid = RECORDOID; + rowexpr->row_format = COERCE_IMPLICIT_CAST; + rowexpr->location = -1; + + foreach(lc, col_list) + { + char *colname = strVal(lfirst(lc)); + + for (int i = 0; i < list_length(cte->ctecolnames); i++) + { + char *colname2 = strVal(list_nth(cte->ctecolnames, i)); + + if (strcmp(colname, colname2) == 0) + { + Var *var; + + var = makeVar(1, i + 1, + list_nth_oid(cte->ctecoltypes, i), + list_nth_int(cte->ctecoltypmods, i), + list_nth_oid(cte->ctecolcollations, i), + 0); + rowexpr->args = lappend(rowexpr->args, var); + rowexpr->colnames = lappend(rowexpr->colnames, makeString(colname)); + break; + } + } + } + + return rowexpr; +} + +/* + * Wrap a RowExpr in an ArrayExpr, for the initial search depth first or cycle + * row. + */ +static Expr * +make_path_initial_array(RowExpr *rowexpr) +{ + ArrayExpr *arr; + + arr = makeNode(ArrayExpr); + arr->array_typeid = RECORDARRAYOID; + arr->element_typeid = RECORDOID; + arr->location = -1; + arr->elements = list_make1(rowexpr); + + return (Expr *) arr; +} + +/* + * Make an array catenation expression like + * + * cpa || ARRAY[ROW(cols)] + * + * where the varattno of cpa is provided as path_varattno. + */ +static Expr * +make_path_cat_expr(RowExpr *rowexpr, AttrNumber path_varattno) +{ + ArrayExpr *arr; + FuncExpr *fexpr; + + arr = makeNode(ArrayExpr); + arr->array_typeid = RECORDARRAYOID; + arr->element_typeid = RECORDOID; + arr->location = -1; + arr->elements = list_make1(rowexpr); + + fexpr = makeFuncExpr(F_ARRAY_CAT, RECORDARRAYOID, + list_make2(makeVar(1, path_varattno, RECORDARRAYOID, -1, 0, 0), + arr), + InvalidOid, InvalidOid, COERCE_EXPLICIT_CALL); + + return (Expr *) fexpr; +} + +/* + * The real work happens here. + */ +CommonTableExpr * +rewriteSearchAndCycle(CommonTableExpr *cte) +{ + Query *ctequery; + SetOperationStmt *sos; + int rti1, + rti2; + RangeTblEntry *rte1, + *rte2, + *newrte; + Query *newq1, + *newq2; + Query *newsubquery; + RangeTblRef *rtr; + Oid search_seq_type = InvalidOid; + AttrNumber sqc_attno = InvalidAttrNumber; + AttrNumber cmc_attno = InvalidAttrNumber; + AttrNumber cpa_attno = InvalidAttrNumber; + TargetEntry *tle; + RowExpr *cycle_col_rowexpr = NULL; + RowExpr *search_col_rowexpr = NULL; + List *ewcl; + int cte_rtindex = -1; + + Assert(cte->search_clause || cte->cycle_clause); + + cte = copyObject(cte); + + ctequery = castNode(Query, cte->ctequery); + + /* + * The top level of the CTE's query should be a UNION. Find the two + * subqueries. + */ + Assert(ctequery->setOperations); + sos = castNode(SetOperationStmt, ctequery->setOperations); + Assert(sos->op == SETOP_UNION); + + rti1 = castNode(RangeTblRef, sos->larg)->rtindex; + rti2 = castNode(RangeTblRef, sos->rarg)->rtindex; + + rte1 = rt_fetch(rti1, ctequery->rtable); + rte2 = rt_fetch(rti2, ctequery->rtable); + + Assert(rte1->rtekind == RTE_SUBQUERY); + Assert(rte2->rtekind == RTE_SUBQUERY); + + /* + * We'll need this a few times later. + */ + if (cte->search_clause) + { + if (cte->search_clause->search_breadth_first) + search_seq_type = RECORDOID; + else + search_seq_type = RECORDARRAYOID; + } + + /* + * Attribute numbers of the added columns in the CTE's column list + */ + if (cte->search_clause) + sqc_attno = list_length(cte->ctecolnames) + 1; + if (cte->cycle_clause) + { + cmc_attno = list_length(cte->ctecolnames) + 1; + cpa_attno = list_length(cte->ctecolnames) + 2; + if (cte->search_clause) + { + cmc_attno++; + cpa_attno++; + } + } + + /* + * Make new left subquery + */ + newq1 = makeNode(Query); + newq1->commandType = CMD_SELECT; + newq1->canSetTag = true; + + newrte = makeNode(RangeTblEntry); + newrte->rtekind = RTE_SUBQUERY; + newrte->alias = makeAlias("*TLOCRN*", cte->ctecolnames); + newrte->eref = newrte->alias; + newsubquery = copyObject(rte1->subquery); + IncrementVarSublevelsUp((Node *) newsubquery, 1, 1); + newrte->subquery = newsubquery; + newrte->inFromCl = true; + newq1->rtable = list_make1(newrte); + + rtr = makeNode(RangeTblRef); + rtr->rtindex = 1; + newq1->jointree = makeFromExpr(list_make1(rtr), NULL); + + /* + * Make target list + */ + for (int i = 0; i < list_length(cte->ctecolnames); i++) + { + Var *var; + + var = makeVar(1, i + 1, + list_nth_oid(cte->ctecoltypes, i), + list_nth_int(cte->ctecoltypmods, i), + list_nth_oid(cte->ctecolcollations, i), + 0); + tle = makeTargetEntry((Expr *) var, i + 1, strVal(list_nth(cte->ctecolnames, i)), false); + tle->resorigtbl = castNode(TargetEntry, list_nth(rte1->subquery->targetList, i))->resorigtbl; + tle->resorigcol = castNode(TargetEntry, list_nth(rte1->subquery->targetList, i))->resorigcol; + newq1->targetList = lappend(newq1->targetList, tle); + } + + if (cte->search_clause) + { + Expr *texpr; + + search_col_rowexpr = make_path_rowexpr(cte, cte->search_clause->search_col_list); + if (cte->search_clause->search_breadth_first) + { + search_col_rowexpr->args = lcons(makeConst(INT8OID, -1, InvalidOid, sizeof(int64), + Int64GetDatum(0), false, FLOAT8PASSBYVAL), + search_col_rowexpr->args); + search_col_rowexpr->colnames = lcons(makeString("*DEPTH*"), search_col_rowexpr->colnames); + texpr = (Expr *) search_col_rowexpr; + } + else + texpr = make_path_initial_array(search_col_rowexpr); + tle = makeTargetEntry(texpr, + list_length(newq1->targetList) + 1, + cte->search_clause->search_seq_column, + false); + newq1->targetList = lappend(newq1->targetList, tle); + } + if (cte->cycle_clause) + { + tle = makeTargetEntry((Expr *) cte->cycle_clause->cycle_mark_default, + list_length(newq1->targetList) + 1, + cte->cycle_clause->cycle_mark_column, + false); + newq1->targetList = lappend(newq1->targetList, tle); + cycle_col_rowexpr = make_path_rowexpr(cte, cte->cycle_clause->cycle_col_list); + tle = makeTargetEntry(make_path_initial_array(cycle_col_rowexpr), + list_length(newq1->targetList) + 1, + cte->cycle_clause->cycle_path_column, + false); + newq1->targetList = lappend(newq1->targetList, tle); + } + + rte1->subquery = newq1; + + if (cte->search_clause) + { + rte1->eref->colnames = lappend(rte1->eref->colnames, makeString(cte->search_clause->search_seq_column)); + } + if (cte->cycle_clause) + { + rte1->eref->colnames = lappend(rte1->eref->colnames, makeString(cte->cycle_clause->cycle_mark_column)); + rte1->eref->colnames = lappend(rte1->eref->colnames, makeString(cte->cycle_clause->cycle_path_column)); + } + + /* + * Make new right subquery + */ + newq2 = makeNode(Query); + newq2->commandType = CMD_SELECT; + newq2->canSetTag = true; + + newrte = makeNode(RangeTblEntry); + newrte->rtekind = RTE_SUBQUERY; + ewcl = copyObject(cte->ctecolnames); + if (cte->search_clause) + { + ewcl = lappend(ewcl, makeString(cte->search_clause->search_seq_column)); + } + if (cte->cycle_clause) + { + ewcl = lappend(ewcl, makeString(cte->cycle_clause->cycle_mark_column)); + ewcl = lappend(ewcl, makeString(cte->cycle_clause->cycle_path_column)); + } + newrte->alias = makeAlias("*TROCRN*", ewcl); + newrte->eref = newrte->alias; + + /* + * Find the reference to our CTE in the range table + */ + for (int rti = 1; rti <= list_length(rte2->subquery->rtable); rti++) + { + RangeTblEntry *e = rt_fetch(rti, rte2->subquery->rtable); + + if (e->rtekind == RTE_CTE && strcmp(cte->ctename, e->ctename) == 0) + { + cte_rtindex = rti; + break; + } + } + Assert(cte_rtindex > 0); + + newsubquery = copyObject(rte2->subquery); + IncrementVarSublevelsUp((Node *) newsubquery, 1, 1); + + /* + * Add extra columns to target list of subquery of right subquery + */ + if (cte->search_clause) + { + Var *var; + + /* ctename.sqc */ + var = makeVar(cte_rtindex, sqc_attno, + search_seq_type, -1, InvalidOid, 0); + tle = makeTargetEntry((Expr *) var, + list_length(newsubquery->targetList) + 1, + cte->search_clause->search_seq_column, + false); + newsubquery->targetList = lappend(newsubquery->targetList, tle); + } + if (cte->cycle_clause) + { + Var *var; + + /* ctename.cmc */ + var = makeVar(cte_rtindex, cmc_attno, + cte->cycle_clause->cycle_mark_type, + cte->cycle_clause->cycle_mark_typmod, + cte->cycle_clause->cycle_mark_collation, 0); + tle = makeTargetEntry((Expr *) var, + list_length(newsubquery->targetList) + 1, + cte->cycle_clause->cycle_mark_column, + false); + newsubquery->targetList = lappend(newsubquery->targetList, tle); + + /* ctename.cpa */ + var = makeVar(cte_rtindex, cpa_attno, + RECORDARRAYOID, -1, InvalidOid, 0); + tle = makeTargetEntry((Expr *) var, + list_length(newsubquery->targetList) + 1, + cte->cycle_clause->cycle_path_column, + false); + newsubquery->targetList = lappend(newsubquery->targetList, tle); + } + + newrte->subquery = newsubquery; + newrte->inFromCl = true; + newq2->rtable = list_make1(newrte); + + rtr = makeNode(RangeTblRef); + rtr->rtindex = 1; + + if (cte->cycle_clause) + { + Expr *expr; + + /* + * Add cmc <> cmv condition + */ + expr = make_opclause(cte->cycle_clause->cycle_mark_neop, BOOLOID, false, + (Expr *) makeVar(1, cmc_attno, + cte->cycle_clause->cycle_mark_type, + cte->cycle_clause->cycle_mark_typmod, + cte->cycle_clause->cycle_mark_collation, 0), + (Expr *) cte->cycle_clause->cycle_mark_value, + InvalidOid, + cte->cycle_clause->cycle_mark_collation); + + newq2->jointree = makeFromExpr(list_make1(rtr), (Node *) expr); + } + else + newq2->jointree = makeFromExpr(list_make1(rtr), NULL); + + /* + * Make target list + */ + for (int i = 0; i < list_length(cte->ctecolnames); i++) + { + Var *var; + + var = makeVar(1, i + 1, + list_nth_oid(cte->ctecoltypes, i), + list_nth_int(cte->ctecoltypmods, i), + list_nth_oid(cte->ctecolcollations, i), + 0); + tle = makeTargetEntry((Expr *) var, i + 1, strVal(list_nth(cte->ctecolnames, i)), false); + tle->resorigtbl = castNode(TargetEntry, list_nth(rte2->subquery->targetList, i))->resorigtbl; + tle->resorigcol = castNode(TargetEntry, list_nth(rte2->subquery->targetList, i))->resorigcol; + newq2->targetList = lappend(newq2->targetList, tle); + } + + if (cte->search_clause) + { + Expr *texpr; + + if (cte->search_clause->search_breadth_first) + { + FieldSelect *fs; + FuncExpr *fexpr; + + /* + * ROW(sqc.depth + 1, cols) + */ + + search_col_rowexpr = copyObject(search_col_rowexpr); + + fs = makeNode(FieldSelect); + fs->arg = (Expr *) makeVar(1, sqc_attno, RECORDOID, -1, 0, 0); + fs->fieldnum = 1; + fs->resulttype = INT8OID; + fs->resulttypmod = -1; + + fexpr = makeFuncExpr(F_INT8INC, INT8OID, list_make1(fs), InvalidOid, InvalidOid, COERCE_EXPLICIT_CALL); + + lfirst(list_head(search_col_rowexpr->args)) = fexpr; + + texpr = (Expr *) search_col_rowexpr; + } + else + { + /* + * sqc || ARRAY[ROW(cols)] + */ + texpr = make_path_cat_expr(search_col_rowexpr, sqc_attno); + } + tle = makeTargetEntry(texpr, + list_length(newq2->targetList) + 1, + cte->search_clause->search_seq_column, + false); + newq2->targetList = lappend(newq2->targetList, tle); + } + + if (cte->cycle_clause) + { + ScalarArrayOpExpr *saoe; + CaseExpr *caseexpr; + CaseWhen *casewhen; + + /* + * CASE WHEN ROW(cols) = ANY (ARRAY[cpa]) THEN cmv ELSE cmd END + */ + + saoe = makeNode(ScalarArrayOpExpr); + saoe->location = -1; + saoe->opno = RECORD_EQ_OP; + saoe->useOr = true; + saoe->args = list_make2(cycle_col_rowexpr, + makeVar(1, cpa_attno, RECORDARRAYOID, -1, 0, 0)); + + caseexpr = makeNode(CaseExpr); + caseexpr->location = -1; + caseexpr->casetype = cte->cycle_clause->cycle_mark_type; + caseexpr->casecollid = cte->cycle_clause->cycle_mark_collation; + casewhen = makeNode(CaseWhen); + casewhen->location = -1; + casewhen->expr = (Expr *) saoe; + casewhen->result = (Expr *) cte->cycle_clause->cycle_mark_value; + caseexpr->args = list_make1(casewhen); + caseexpr->defresult = (Expr *) cte->cycle_clause->cycle_mark_default; + + tle = makeTargetEntry((Expr *) caseexpr, + list_length(newq2->targetList) + 1, + cte->cycle_clause->cycle_mark_column, + false); + newq2->targetList = lappend(newq2->targetList, tle); + + /* + * cpa || ARRAY[ROW(cols)] + */ + tle = makeTargetEntry(make_path_cat_expr(cycle_col_rowexpr, cpa_attno), + list_length(newq2->targetList) + 1, + cte->cycle_clause->cycle_path_column, + false); + newq2->targetList = lappend(newq2->targetList, tle); + } + + rte2->subquery = newq2; + + if (cte->search_clause) + { + rte2->eref->colnames = lappend(rte2->eref->colnames, makeString(cte->search_clause->search_seq_column)); + } + if (cte->cycle_clause) + { + rte2->eref->colnames = lappend(rte2->eref->colnames, makeString(cte->cycle_clause->cycle_mark_column)); + rte2->eref->colnames = lappend(rte2->eref->colnames, makeString(cte->cycle_clause->cycle_path_column)); + } + + /* + * Add the additional columns to the SetOperationStmt + */ + if (cte->search_clause) + { + sos->colTypes = lappend_oid(sos->colTypes, search_seq_type); + sos->colTypmods = lappend_int(sos->colTypmods, -1); + sos->colCollations = lappend_oid(sos->colCollations, InvalidOid); + if (!sos->all) + sos->groupClauses = lappend(sos->groupClauses, + makeSortGroupClauseForSetOp(search_seq_type)); + } + if (cte->cycle_clause) + { + sos->colTypes = lappend_oid(sos->colTypes, cte->cycle_clause->cycle_mark_type); + sos->colTypmods = lappend_int(sos->colTypmods, cte->cycle_clause->cycle_mark_typmod); + sos->colCollations = lappend_oid(sos->colCollations, cte->cycle_clause->cycle_mark_collation); + if (!sos->all) + sos->groupClauses = lappend(sos->groupClauses, + makeSortGroupClauseForSetOp(cte->cycle_clause->cycle_mark_type)); + + sos->colTypes = lappend_oid(sos->colTypes, RECORDARRAYOID); + sos->colTypmods = lappend_int(sos->colTypmods, -1); + sos->colCollations = lappend_oid(sos->colCollations, InvalidOid); + if (!sos->all) + sos->groupClauses = lappend(sos->groupClauses, + makeSortGroupClauseForSetOp(RECORDARRAYOID)); + } + + /* + * Add the additional columns to the CTE query's target list + */ + if (cte->search_clause) + { + ctequery->targetList = lappend(ctequery->targetList, + makeTargetEntry((Expr *) makeVar(1, sqc_attno, + search_seq_type, -1, InvalidOid, 0), + list_length(ctequery->targetList) + 1, + cte->search_clause->search_seq_column, + false)); + } + if (cte->cycle_clause) + { + ctequery->targetList = lappend(ctequery->targetList, + makeTargetEntry((Expr *) makeVar(1, cmc_attno, + cte->cycle_clause->cycle_mark_type, + cte->cycle_clause->cycle_mark_typmod, + cte->cycle_clause->cycle_mark_collation, 0), + list_length(ctequery->targetList) + 1, + cte->cycle_clause->cycle_mark_column, + false)); + ctequery->targetList = lappend(ctequery->targetList, + makeTargetEntry((Expr *) makeVar(1, cpa_attno, + RECORDARRAYOID, -1, InvalidOid, 0), + list_length(ctequery->targetList) + 1, + cte->cycle_clause->cycle_path_column, + false)); + } + + /* + * Add the additional columns to the CTE's output columns + */ + cte->ctecolnames = ewcl; + if (cte->search_clause) + { + cte->ctecoltypes = lappend_oid(cte->ctecoltypes, search_seq_type); + cte->ctecoltypmods = lappend_int(cte->ctecoltypmods, -1); + cte->ctecolcollations = lappend_oid(cte->ctecolcollations, InvalidOid); + } + if (cte->cycle_clause) + { + cte->ctecoltypes = lappend_oid(cte->ctecoltypes, cte->cycle_clause->cycle_mark_type); + cte->ctecoltypmods = lappend_int(cte->ctecoltypmods, cte->cycle_clause->cycle_mark_typmod); + cte->ctecolcollations = lappend_oid(cte->ctecolcollations, cte->cycle_clause->cycle_mark_collation); + + cte->ctecoltypes = lappend_oid(cte->ctecoltypes, RECORDARRAYOID); + cte->ctecoltypmods = lappend_int(cte->ctecoltypmods, -1); + cte->ctecolcollations = lappend_oid(cte->ctecolcollations, InvalidOid); + } + + return cte; +} diff --git a/src/backend/rewrite/rewriteSupport.c b/src/backend/rewrite/rewriteSupport.c index fc9a3b1ebf665..85f1ac953ad1f 100644 --- a/src/backend/rewrite/rewriteSupport.c +++ b/src/backend/rewrite/rewriteSupport.c @@ -3,7 +3,7 @@ * rewriteSupport.c * * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * diff --git a/src/backend/rewrite/rowsecurity.c b/src/backend/rewrite/rowsecurity.c index 0fe2f9ca8388a..fc26cb23a21a0 100644 --- a/src/backend/rewrite/rowsecurity.c +++ b/src/backend/rewrite/rowsecurity.c @@ -29,7 +29,7 @@ * in the current environment, but that may change if the row_security GUC or * the current role changes. * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California */ #include "postgres.h" diff --git a/src/backend/snowball/dict_snowball.c b/src/backend/snowball/dict_snowball.c index 4e1aceee02573..044e20cef85c3 100644 --- a/src/backend/snowball/dict_snowball.c +++ b/src/backend/snowball/dict_snowball.c @@ -3,7 +3,7 @@ * dict_snowball.c * Snowball dictionary * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * * IDENTIFICATION * src/backend/snowball/dict_snowball.c diff --git a/src/backend/snowball/snowball.sql.in b/src/backend/snowball/snowball.sql.in index 0d47facd0020b..3397fb1e02c39 100644 --- a/src/backend/snowball/snowball.sql.in +++ b/src/backend/snowball/snowball.sql.in @@ -1,7 +1,7 @@ /* * text search configuration for _LANGNAME_ language * - * Copyright (c) 2007-2020, PostgreSQL Global Development Group + * Copyright (c) 2007-2021, PostgreSQL Global Development Group * * src/backend/snowball/snowball.sql.in * diff --git a/src/backend/snowball/snowball_func.sql.in b/src/backend/snowball/snowball_func.sql.in index 8e2063b7330de..cb1eaca4fb5f5 100644 --- a/src/backend/snowball/snowball_func.sql.in +++ b/src/backend/snowball/snowball_func.sql.in @@ -1,7 +1,7 @@ /* * Create underlying C functions for Snowball stemmers * - * Copyright (c) 2007-2020, PostgreSQL Global Development Group + * Copyright (c) 2007-2021, PostgreSQL Global Development Group * * src/backend/snowball/snowball_func.sql.in * diff --git a/src/backend/statistics/dependencies.c b/src/backend/statistics/dependencies.c index b1abcde96872c..f6e399b192fbd 100644 --- a/src/backend/statistics/dependencies.c +++ b/src/backend/statistics/dependencies.c @@ -3,7 +3,7 @@ * dependencies.c * POSTGRES functional dependencies * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION diff --git a/src/backend/statistics/extended_stats.c b/src/backend/statistics/extended_stats.c index 6d26de37f4ddf..a030ea3653af5 100644 --- a/src/backend/statistics/extended_stats.c +++ b/src/backend/statistics/extended_stats.c @@ -6,7 +6,7 @@ * Generic code supporting statistics objects created via CREATE STATISTICS. * * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION diff --git a/src/backend/statistics/mcv.c b/src/backend/statistics/mcv.c index fae792a2ddf11..abbc1f1ba8b91 100644 --- a/src/backend/statistics/mcv.c +++ b/src/backend/statistics/mcv.c @@ -4,7 +4,7 @@ * POSTGRES multivariate MCV lists * * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION diff --git a/src/backend/statistics/mvdistinct.c b/src/backend/statistics/mvdistinct.c index 4b86f0ab2d13f..9ef21debb63bb 100644 --- a/src/backend/statistics/mvdistinct.c +++ b/src/backend/statistics/mvdistinct.c @@ -13,7 +13,7 @@ * estimates are already available in pg_statistic. * * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION diff --git a/src/backend/storage/buffer/buf_init.c b/src/backend/storage/buffer/buf_init.c index a8ce6603ed00b..e9e4f35bb5f2f 100644 --- a/src/backend/storage/buffer/buf_init.c +++ b/src/backend/storage/buffer/buf_init.c @@ -3,7 +3,7 @@ * buf_init.c * buffer manager initialization routines * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * diff --git a/src/backend/storage/buffer/buf_table.c b/src/backend/storage/buffer/buf_table.c index 4953ae9f82449..caa03ae123351 100644 --- a/src/backend/storage/buffer/buf_table.c +++ b/src/backend/storage/buffer/buf_table.c @@ -10,7 +10,7 @@ * before the lock is released (see notes in README). * * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * diff --git a/src/backend/storage/buffer/bufmgr.c b/src/backend/storage/buffer/bufmgr.c index c5e87071517e8..561c212092f76 100644 --- a/src/backend/storage/buffer/bufmgr.c +++ b/src/backend/storage/buffer/bufmgr.c @@ -3,7 +3,7 @@ * bufmgr.c * buffer manager interface routines * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * @@ -70,6 +70,14 @@ #define RELS_BSEARCH_THRESHOLD 20 +/* + * This is the size (in the number of blocks) above which we scan the + * entire buffer pool to remove the buffers for all the pages of relation + * being dropped. For the relations with size below this threshold, we find + * the buffers by doing lookups in BufMapping table. + */ +#define BUF_DROP_FULL_SCAN_THRESHOLD (uint32) (NBuffers / 32) + typedef struct PrivateRefCountEntry { Buffer buffer; @@ -473,6 +481,10 @@ static BufferDesc *BufferAlloc(SMgrRelation smgr, BufferAccessStrategy strategy, bool *foundPtr); static void FlushBuffer(BufferDesc *buf, SMgrRelation reln); +static void FindAndDropRelFileNodeBuffers(RelFileNode rnode, + ForkNumber forkNum, + BlockNumber nForkBlock, + BlockNumber firstDelBlock); static void AtProcExit_Buffers(int code, Datum arg); static void CheckForBufferLeaks(void); static int rnode_comparator(const void *p1, const void *p2); @@ -2965,19 +2977,19 @@ BufferGetLSNAtomic(Buffer buffer) * later. It is also the responsibility of higher-level code to ensure * that no other process could be trying to load more pages of the * relation into buffers. - * - * XXX currently it sequentially searches the buffer pool, should be - * changed to more clever ways of searching. However, this routine - * is used only in code paths that aren't very performance-critical, - * and we shouldn't slow down the hot paths to make it faster ... * -------------------------------------------------------------------- */ void -DropRelFileNodeBuffers(RelFileNodeBackend rnode, ForkNumber *forkNum, +DropRelFileNodeBuffers(SMgrRelation smgr_reln, ForkNumber *forkNum, int nforks, BlockNumber *firstDelBlock) { int i; int j; + RelFileNodeBackend rnode; + BlockNumber nForkBlock[MAX_FORKNUM]; + BlockNumber nBlocksToInvalidate = 0; + + rnode = smgr_reln->smgr_rnode; /* If it's a local relation, it's localbuf.c's problem. */ if (RelFileNodeBackendIsTemp(rnode)) @@ -2991,6 +3003,56 @@ DropRelFileNodeBuffers(RelFileNodeBackend rnode, ForkNumber *forkNum, return; } + /* + * To remove all the pages of the specified relation forks from the buffer + * pool, we need to scan the entire buffer pool but we can optimize it by + * finding the buffers from BufMapping table provided we know the exact + * size of each fork of the relation. The exact size is required to ensure + * that we don't leave any buffer for the relation being dropped as + * otherwise the background writer or checkpointer can lead to a PANIC + * error while flushing buffers corresponding to files that don't exist. + * + * To know the exact size, we rely on the size cached for each fork by us + * during recovery which limits the optimization to recovery and on + * standbys but we can easily extend it once we have shared cache for + * relation size. + * + * In recovery, we cache the value returned by the first lseek(SEEK_END) + * and the future writes keeps the cached value up-to-date. See + * smgrextend. It is possible that the value of the first lseek is smaller + * than the actual number of existing blocks in the file due to buggy + * Linux kernels that might not have accounted for the recent write. But + * that should be fine because there must not be any buffers after that + * file size. + */ + for (i = 0; i < nforks; i++) + { + /* Get the number of blocks for a relation's fork */ + nForkBlock[i] = smgrnblocks_cached(smgr_reln, forkNum[i]); + + if (nForkBlock[i] == InvalidBlockNumber) + { + nBlocksToInvalidate = InvalidBlockNumber; + break; + } + + /* calculate the number of blocks to be invalidated */ + nBlocksToInvalidate += (nForkBlock[i] - firstDelBlock[i]); + } + + /* + * We apply the optimization iff the total number of blocks to invalidate + * is below the BUF_DROP_FULL_SCAN_THRESHOLD. + */ + if (BlockNumberIsValid(nBlocksToInvalidate) && + nBlocksToInvalidate < BUF_DROP_FULL_SCAN_THRESHOLD) + { + for (j = 0; j < nforks; j++) + FindAndDropRelFileNodeBuffers(rnode.node, forkNum[j], + nForkBlock[j], firstDelBlock[j]); + return; + } + for (i = 0; i < NBuffers; i++) { BufferDesc *bufHdr = GetBufferDescriptor(i); @@ -3042,28 +3104,33 @@ DropRelFileNodeBuffers(RelFileNodeBackend rnode, ForkNumber *forkNum, * -------------------------------------------------------------------- */ void -DropRelFileNodesAllBuffers(RelFileNodeBackend *rnodes, int nnodes) +DropRelFileNodesAllBuffers(SMgrRelation *smgr_reln, int nnodes) { - int i, - n = 0; + int i; + int j; + int n = 0; + SMgrRelation *rels; + BlockNumber (*block)[MAX_FORKNUM + 1]; + BlockNumber nBlocksToInvalidate = 0; RelFileNode *nodes; + bool cached = true; bool use_bsearch; if (nnodes == 0) return; - nodes = palloc(sizeof(RelFileNode) * nnodes); /* non-local relations */ + rels = palloc(sizeof(SMgrRelation) * nnodes); /* non-local relations */ /* If it's a local relation, it's localbuf.c's problem. */ for (i = 0; i < nnodes; i++) { - if (RelFileNodeBackendIsTemp(rnodes[i])) + if (RelFileNodeBackendIsTemp(smgr_reln[i]->smgr_rnode)) { - if (rnodes[i].backend == MyBackendId) - DropRelFileNodeAllLocalBuffers(rnodes[i].node); + if (smgr_reln[i]->smgr_rnode.backend == MyBackendId) + DropRelFileNodeAllLocalBuffers(smgr_reln[i]->smgr_rnode.node); } else - nodes[n++] = rnodes[i].node; + rels[n++] = smgr_reln[i]; } /* @@ -3072,10 +3139,72 @@ DropRelFileNodesAllBuffers(RelFileNodeBackend *rnodes, int nnodes) */ if (n == 0) { - pfree(nodes); + pfree(rels); return; } + /* + * This is used to remember the number of blocks for all the relations + * forks. + */ + block = (BlockNumber (*)[MAX_FORKNUM + 1]) + palloc(sizeof(BlockNumber) * n * (MAX_FORKNUM + 1)); + + /* + * We can avoid scanning the entire buffer pool if we know the exact size + * of each of the given relation forks. See DropRelFileNodeBuffers. + */ + for (i = 0; i < n && cached; i++) + { + for (j = 0; j <= MAX_FORKNUM; j++) + { + /* Get the number of blocks for a relation's fork. */ + block[i][j] = smgrnblocks_cached(rels[i], j); + + /* We need to only consider the relation forks that exists. */ + if (block[i][j] == InvalidBlockNumber) + { + if (!smgrexists(rels[i], j)) + continue; + cached = false; + break; + } + + /* calculate the total number of blocks to be invalidated */ + nBlocksToInvalidate += block[i][j]; + } + } + + /* + * We apply the optimization iff the total number of blocks to invalidate + * is below the BUF_DROP_FULL_SCAN_THRESHOLD. + */ + if (cached && nBlocksToInvalidate < BUF_DROP_FULL_SCAN_THRESHOLD) + { + for (i = 0; i < n; i++) + { + for (j = 0; j <= MAX_FORKNUM; j++) + { + /* ignore relation forks that doesn't exist */ + if (!BlockNumberIsValid(block[i][j])) + continue; + + /* drop all the buffers for a particular relation fork */ + FindAndDropRelFileNodeBuffers(rels[i]->smgr_rnode.node, + j, block[i][j], 0); + } + } + + pfree(block); + pfree(rels); + return; + } + + pfree(block); + nodes = palloc(sizeof(RelFileNode) * n); /* non-local relations */ + for (i = 0; i < n; i++) + nodes[i] = rels[i]->smgr_rnode.node; + /* * For low number of relations to drop just use a simple walk through, to * save the bsearch overhead. The threshold to use is rather a guess than @@ -3131,6 +3260,66 @@ DropRelFileNodesAllBuffers(RelFileNodeBackend *rnodes, int nnodes) } pfree(nodes); + pfree(rels); +} + +/* --------------------------------------------------------------------- + * FindAndDropRelFileNodeBuffers + * + * This function performs look up in BufMapping table and removes from the + * buffer pool all the pages of the specified relation fork that has block + * number >= firstDelBlock. (In particular, with firstDelBlock = 0, all + * pages are removed.) + * -------------------------------------------------------------------- + */ +static void +FindAndDropRelFileNodeBuffers(RelFileNode rnode, ForkNumber forkNum, + BlockNumber nForkBlock, + BlockNumber firstDelBlock) +{ + BlockNumber curBlock; + + for (curBlock = firstDelBlock; curBlock < nForkBlock; curBlock++) + { + uint32 bufHash; /* hash value for tag */ + BufferTag bufTag; /* identity of requested block */ + LWLock *bufPartitionLock; /* buffer partition lock for it */ + int buf_id; + BufferDesc *bufHdr; + uint32 buf_state; + + /* create a tag so we can lookup the buffer */ + INIT_BUFFERTAG(bufTag, rnode, forkNum, curBlock); + + /* determine its hash code and partition lock ID */ + bufHash = BufTableHashCode(&bufTag); + bufPartitionLock = BufMappingPartitionLock(bufHash); + + /* Check that it is in the buffer pool. If not, do nothing. */ + LWLockAcquire(bufPartitionLock, LW_SHARED); + buf_id = BufTableLookup(&bufTag, bufHash); + LWLockRelease(bufPartitionLock); + + if (buf_id < 0) + continue; + + bufHdr = GetBufferDescriptor(buf_id); + + /* + * We need to lock the buffer header and recheck if the buffer is + * still associated with the same block because the buffer could be + * evicted by some other backend loading blocks for a different + * relation after we release lock on the BufMapping table. + */ + buf_state = LockBufHdr(bufHdr); + + if (RelFileNodeEquals(bufHdr->tag.rnode, rnode) && + bufHdr->tag.forkNum == forkNum && + bufHdr->tag.blockNum >= firstDelBlock) + InvalidateBuffer(bufHdr); /* releases spinlock */ + else + UnlockBufHdr(bufHdr, buf_state); + } } /* --------------------------------------------------------------------- @@ -3245,8 +3434,7 @@ PrintPinnedBufs(void) * XXX currently it sequentially searches the buffer pool, should be * changed to more clever ways of searching. This routine is not * used in any performance-critical code paths, so it's not worth - * adding additional overhead to normal paths to make it go faster; - * but see also DropRelFileNodeBuffers. + * adding additional overhead to normal paths to make it go faster. * -------------------------------------------------------------------- */ void @@ -3809,6 +3997,8 @@ LockBufferForCleanup(Buffer buffer) { BufferDesc *bufHdr; char *new_status = NULL; + TimestampTz waitStart = 0; + bool logged_recovery_conflict = false; Assert(BufferIsPinned(buffer)); Assert(PinCountWaitBuf == NULL); @@ -3844,6 +4034,16 @@ LockBufferForCleanup(Buffer buffer) /* Successfully acquired exclusive lock with pincount 1 */ UnlockBufHdr(bufHdr, buf_state); + /* + * Emit the log message if recovery conflict on buffer pin was + * resolved but the startup process waited longer than + * deadlock_timeout for it. + */ + if (logged_recovery_conflict) + LogRecoveryConflict(PROCSIG_RECOVERY_CONFLICT_BUFFERPIN, + waitStart, GetCurrentTimestamp(), + NULL, false); + /* Report change to non-waiting status */ if (new_status) { @@ -3882,6 +4082,34 @@ LockBufferForCleanup(Buffer buffer) new_status[len] = '\0'; /* truncate off " waiting" */ } + /* + * Emit the log message if the startup process is waiting longer + * than deadlock_timeout for recovery conflict on buffer pin. + * + * Skip this if first time through because the startup process has + * not started waiting yet in this case. So, the wait start + * timestamp is set after this logic. + */ + if (waitStart != 0 && !logged_recovery_conflict) + { + TimestampTz now = GetCurrentTimestamp(); + + if (TimestampDifferenceExceeds(waitStart, now, + DeadlockTimeout)) + { + LogRecoveryConflict(PROCSIG_RECOVERY_CONFLICT_BUFFERPIN, + waitStart, now, NULL, true); + logged_recovery_conflict = true; + } + } + + /* + * Set the wait start timestamp if logging is enabled and first + * time through. + */ + if (log_recovery_conflict_waits && waitStart == 0) + waitStart = GetCurrentTimestamp(); + /* Publish the bufid that Startup process waits on */ SetStartupBufferPinWaitBufId(buffer - 1); /* Set alarm and then wait to be signaled by UnpinBuffer() */ diff --git a/src/backend/storage/buffer/freelist.c b/src/backend/storage/buffer/freelist.c index 942f8d4edd267..6be80476dbd8c 100644 --- a/src/backend/storage/buffer/freelist.c +++ b/src/backend/storage/buffer/freelist.c @@ -4,7 +4,7 @@ * routines for managing the buffer pool's replacement strategy. * * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * diff --git a/src/backend/storage/buffer/localbuf.c b/src/backend/storage/buffer/localbuf.c index cd3475e9e1d3f..04b3558ea3350 100644 --- a/src/backend/storage/buffer/localbuf.c +++ b/src/backend/storage/buffer/localbuf.c @@ -4,7 +4,7 @@ * local buffer manager. Fast buffer manager for temporary tables, * which never need to be WAL-logged or checkpointed, etc. * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994-5, Regents of the University of California * * diff --git a/src/backend/storage/file/buffile.c b/src/backend/storage/file/buffile.c index d581f96eda985..a4be5fe5135e4 100644 --- a/src/backend/storage/file/buffile.c +++ b/src/backend/storage/file/buffile.c @@ -3,7 +3,7 @@ * buffile.c * Management of large buffered temporary files. * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION diff --git a/src/backend/storage/file/copydir.c b/src/backend/storage/file/copydir.c index 0cf598dd0c641..da8b7cbeca3e4 100644 --- a/src/backend/storage/file/copydir.c +++ b/src/backend/storage/file/copydir.c @@ -3,7 +3,7 @@ * copydir.c * copies a directory * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * While "xcopy /e /i /q" works fine for copying directories, on Windows XP diff --git a/src/backend/storage/file/fd.c b/src/backend/storage/file/fd.c index f07b5325aa5b8..b58502837aa59 100644 --- a/src/backend/storage/file/fd.c +++ b/src/backend/storage/file/fd.c @@ -3,7 +3,7 @@ * fd.c * Virtual file descriptor code. * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION @@ -92,6 +92,7 @@ #include "common/file_utils.h" #include "miscadmin.h" #include "pgstat.h" +#include "port/pg_iovec.h" #include "portability/mem.h" #include "storage/fd.h" #include "storage/ipc.h" @@ -3635,3 +3636,67 @@ data_sync_elevel(int elevel) { return data_sync_retry ? elevel : PANIC; } + +/* + * A convenience wrapper for pg_pwritev() that retries on partial write. If an + * error is returned, it is unspecified how much has been written. + */ +ssize_t +pg_pwritev_with_retry(int fd, const struct iovec *iov, int iovcnt, off_t offset) +{ + struct iovec iov_copy[PG_IOV_MAX]; + ssize_t sum = 0; + ssize_t part; + + /* We'd better have space to make a copy, in case we need to retry. */ + if (iovcnt > PG_IOV_MAX) + { + errno = EINVAL; + return -1; + } + + for (;;) + { + /* Write as much as we can. */ + part = pg_pwritev(fd, iov, iovcnt, offset); + if (part < 0) + return -1; + +#ifdef SIMULATE_SHORT_WRITE + part = Min(part, 4096); +#endif + + /* Count our progress. */ + sum += part; + offset += part; + + /* Step over iovecs that are done. */ + while (iovcnt > 0 && iov->iov_len <= part) + { + part -= iov->iov_len; + ++iov; + --iovcnt; + } + + /* Are they all done? */ + if (iovcnt == 0) + { + /* We don't expect the kernel to write more than requested. */ + Assert(part == 0); + break; + } + + /* + * Move whatever's left to the front of our mutable copy and adjust + * the leading iovec. + */ + Assert(iovcnt > 0); + memmove(iov_copy, iov, sizeof(*iov) * iovcnt); + Assert(iov->iov_len > part); + iov_copy[0].iov_base = (char *) iov_copy[0].iov_base + part; + iov_copy[0].iov_len -= part; + iov = iov_copy; + } + + return sum; +} diff --git a/src/backend/storage/file/reinit.c b/src/backend/storage/file/reinit.c index 8700f7f19a4ae..40c758d789ddb 100644 --- a/src/backend/storage/file/reinit.c +++ b/src/backend/storage/file/reinit.c @@ -3,7 +3,7 @@ * reinit.c * Reinitialization of unlogged relations * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION diff --git a/src/backend/storage/file/sharedfileset.c b/src/backend/storage/file/sharedfileset.c index 859c22e79b621..de422b1ebdf63 100644 --- a/src/backend/storage/file/sharedfileset.c +++ b/src/backend/storage/file/sharedfileset.c @@ -3,7 +3,7 @@ * sharedfileset.c * Shared temporary file management. * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION diff --git a/src/backend/storage/freespace/freespace.c b/src/backend/storage/freespace/freespace.c index 6a96126b0c2ff..8c12dda2380d0 100644 --- a/src/backend/storage/freespace/freespace.c +++ b/src/backend/storage/freespace/freespace.c @@ -4,7 +4,7 @@ * POSTGRES free space map for quickly finding free space in relations * * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION diff --git a/src/backend/storage/freespace/fsmpage.c b/src/backend/storage/freespace/fsmpage.c index 50f0ada756d2d..88ae51e5265fb 100644 --- a/src/backend/storage/freespace/fsmpage.c +++ b/src/backend/storage/freespace/fsmpage.c @@ -4,7 +4,7 @@ * routines to search and manipulate one FSM page. * * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION diff --git a/src/backend/storage/freespace/indexfsm.c b/src/backend/storage/freespace/indexfsm.c index d975c3364b486..d66e10b89d292 100644 --- a/src/backend/storage/freespace/indexfsm.c +++ b/src/backend/storage/freespace/indexfsm.c @@ -4,7 +4,7 @@ * POSTGRES free space map for quickly finding free pages in relations * * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION diff --git a/src/backend/storage/ipc/barrier.c b/src/backend/storage/ipc/barrier.c index 69afd58689885..5c05297a2aa7f 100644 --- a/src/backend/storage/ipc/barrier.c +++ b/src/backend/storage/ipc/barrier.c @@ -3,7 +3,7 @@ * barrier.c * Barriers for synchronizing cooperating processes. * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * From Wikipedia[1]: "In parallel computing, a barrier is a type of diff --git a/src/backend/storage/ipc/dsm.c b/src/backend/storage/ipc/dsm.c index dffbd8e82a2a2..ae82b4bdc0e26 100644 --- a/src/backend/storage/ipc/dsm.c +++ b/src/backend/storage/ipc/dsm.c @@ -14,7 +14,7 @@ * hard postmaster crash, remaining segments will be removed, if they * still exist, at the next postmaster startup. * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * diff --git a/src/backend/storage/ipc/dsm_impl.c b/src/backend/storage/ipc/dsm_impl.c index d4306418dcb24..f7e292981e4de 100644 --- a/src/backend/storage/ipc/dsm_impl.c +++ b/src/backend/storage/ipc/dsm_impl.c @@ -36,7 +36,7 @@ * * As ever, Windows requires its own implementation. * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * diff --git a/src/backend/storage/ipc/ipc.c b/src/backend/storage/ipc/ipc.c index 36a067c9244c8..4045d7d68a0a3 100644 --- a/src/backend/storage/ipc/ipc.c +++ b/src/backend/storage/ipc/ipc.c @@ -8,7 +8,7 @@ * exit-time cleanup for either a postmaster or a backend. * * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * diff --git a/src/backend/storage/ipc/ipci.c b/src/backend/storage/ipc/ipci.c index 96c2aaabbd65c..f9bbe97b50750 100644 --- a/src/backend/storage/ipc/ipci.c +++ b/src/backend/storage/ipc/ipci.c @@ -3,7 +3,7 @@ * ipci.c * POSTGRES inter-process communication initialization code. * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * diff --git a/src/backend/storage/ipc/latch.c b/src/backend/storage/ipc/latch.c index 24afc47d5134b..f2d005eea054a 100644 --- a/src/backend/storage/ipc/latch.c +++ b/src/backend/storage/ipc/latch.c @@ -22,7 +22,7 @@ * The Windows implementation uses Windows events that are inherited by all * postmaster child processes. There's no need for the self-pipe trick there. * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION diff --git a/src/backend/storage/ipc/pmsignal.c b/src/backend/storage/ipc/pmsignal.c index 8ef3f6da4a128..280c2395c9ed3 100644 --- a/src/backend/storage/ipc/pmsignal.c +++ b/src/backend/storage/ipc/pmsignal.c @@ -4,7 +4,7 @@ * routines for signaling between the postmaster and its child processes * * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION diff --git a/src/backend/storage/ipc/procarray.c b/src/backend/storage/ipc/procarray.c index ee912b9d5e40d..cf12eda504113 100644 --- a/src/backend/storage/ipc/procarray.c +++ b/src/backend/storage/ipc/procarray.c @@ -34,7 +34,7 @@ * happen, it would tie up KnownAssignedXids indefinitely, so we protect * ourselves by pruning the array when a valid list of running XIDs arrives. * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * @@ -3324,6 +3324,13 @@ GetConflictingVirtualXIDs(TransactionId limitXmin, Oid dbOid) */ pid_t CancelVirtualTransaction(VirtualTransactionId vxid, ProcSignalReason sigmode) +{ + return SignalVirtualTransaction(vxid, sigmode, true); +} + +pid_t +SignalVirtualTransaction(VirtualTransactionId vxid, ProcSignalReason sigmode, + bool conflictPending) { ProcArrayStruct *arrayP = procArray; int index; @@ -3342,7 +3349,7 @@ CancelVirtualTransaction(VirtualTransactionId vxid, ProcSignalReason sigmode) if (procvxid.backendId == vxid.backendId && procvxid.localTransactionId == vxid.localTransactionId) { - proc->recoveryConflictPending = true; + proc->recoveryConflictPending = conflictPending; pid = proc->pid; if (pid != 0) { diff --git a/src/backend/storage/ipc/procsignal.c b/src/backend/storage/ipc/procsignal.c index ffe67acea1ce1..c43cdd685b4d9 100644 --- a/src/backend/storage/ipc/procsignal.c +++ b/src/backend/storage/ipc/procsignal.c @@ -4,7 +4,7 @@ * Routines for interprocess signaling * * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION @@ -18,6 +18,7 @@ #include #include "access/parallel.h" +#include "port/pg_bitutils.h" #include "commands/async.h" #include "miscadmin.h" #include "pgstat.h" @@ -87,12 +88,17 @@ typedef struct #define BARRIER_SHOULD_CHECK(flags, type) \ (((flags) & (((uint32) 1) << (uint32) (type))) != 0) +/* Clear the relevant type bit from the flags. */ +#define BARRIER_CLEAR_BIT(flags, type) \ + ((flags) &= ~(((uint32) 1) << (uint32) (type))) + static ProcSignalHeader *ProcSignal = NULL; static volatile ProcSignalSlot *MyProcSignalSlot = NULL; static bool CheckProcSignal(ProcSignalReason reason); static void CleanupProcSignalState(int status, Datum arg); -static void ProcessBarrierPlaceholder(void); +static void ResetProcSignalBarrierBits(uint32 flags); +static bool ProcessBarrierPlaceholder(void); /* * ProcSignalShmemSize @@ -394,6 +400,12 @@ WaitForProcSignalBarrier(uint64 generation) volatile ProcSignalSlot *slot = &ProcSignal->psh_slot[i]; uint64 oldval; + /* + * It's important that we check only pss_barrierGeneration here and + * not pss_barrierCheckMask. Bits in pss_barrierCheckMask get cleared + * before the barrier is actually absorbed, but pss_barrierGeneration + * is updated only afterward. + */ oldval = pg_atomic_read_u64(&slot->pss_barrierGeneration); while (oldval < generation) { @@ -453,7 +465,7 @@ ProcessProcSignalBarrier(void) { uint64 local_gen; uint64 shared_gen; - uint32 flags; + volatile uint32 flags; Assert(MyProcSignalSlot); @@ -482,21 +494,92 @@ ProcessProcSignalBarrier(void) * read of the barrier generation above happens before we atomically * extract the flags, and that any subsequent state changes happen * afterward. + * + * NB: In order to avoid race conditions, we must zero pss_barrierCheckMask + * first and only afterwards try to do barrier processing. If we did it + * in the other order, someone could send us another barrier of some + * type right after we called the barrier-processing function but before + * we cleared the bit. We would have no way of knowing that the bit needs + * to stay set in that case, so the need to call the barrier-processing + * function again would just get forgotten. So instead, we tentatively + * clear all the bits and then put back any for which we don't manage + * to successfully absorb the barrier. */ flags = pg_atomic_exchange_u32(&MyProcSignalSlot->pss_barrierCheckMask, 0); /* - * Process each type of barrier. It's important that nothing we call from - * here throws an error, because pss_barrierCheckMask has already been - * cleared. If we jumped out of here before processing all barrier types, - * then we'd forget about the need to do so later. - * - * NB: It ought to be OK to call the barrier-processing functions - * unconditionally, but it's more efficient to call only the ones that - * might need us to do something based on the flags. + * If there are no flags set, then we can skip doing any real work. + * Otherwise, establish a PG_TRY block, so that we don't lose track of + * which types of barrier processing are needed if an ERROR occurs. */ - if (BARRIER_SHOULD_CHECK(flags, PROCSIGNAL_BARRIER_PLACEHOLDER)) - ProcessBarrierPlaceholder(); + if (flags != 0) + { + bool success = true; + + PG_TRY(); + { + /* + * Process each type of barrier. The barrier-processing functions + * should normally return true, but may return false if the barrier + * can't be absorbed at the current time. This should be rare, + * because it's pretty expensive. Every single + * CHECK_FOR_INTERRUPTS() will return here until we manage to + * absorb the barrier, and that cost will add up in a hurry. + * + * NB: It ought to be OK to call the barrier-processing functions + * unconditionally, but it's more efficient to call only the ones + * that might need us to do something based on the flags. + */ + while (flags != 0) + { + ProcSignalBarrierType type; + bool processed = true; + + type = (ProcSignalBarrierType) pg_rightmost_one_pos32(flags); + switch (type) + { + case PROCSIGNAL_BARRIER_PLACEHOLDER: + processed = ProcessBarrierPlaceholder(); + break; + } + + /* + * To avoid an infinite loop, we must always unset the bit + * in flags. + */ + BARRIER_CLEAR_BIT(flags, type); + + /* + * If we failed to process the barrier, reset the shared bit + * so we try again later, and set a flag so that we don't bump + * our generation. + */ + if (!processed) + { + ResetProcSignalBarrierBits(((uint32) 1) << type); + success = false; + } + } + } + PG_CATCH(); + { + /* + * If an ERROR occurred, we'll need to try again later to handle + * that barrier type and any others that haven't been handled yet + * or weren't successfully absorbed. + */ + ResetProcSignalBarrierBits(flags); + PG_RE_THROW(); + } + PG_END_TRY(); + + /* + * If some barrier types were not successfully absorbed, we will have + * to try again later. + */ + if (!success) + return; + } /* * State changes related to all types of barriers that might have been @@ -508,7 +591,20 @@ ProcessProcSignalBarrier(void) pg_atomic_write_u64(&MyProcSignalSlot->pss_barrierGeneration, shared_gen); } +/* + * If it turns out that we couldn't absorb one or more barrier types, either + * because the barrier-processing functions returned false or due to an error, + * arrange for processing to be retried later. + */ static void +ResetProcSignalBarrierBits(uint32 flags) +{ + pg_atomic_fetch_or_u32(&MyProcSignalSlot->pss_barrierCheckMask, flags); + ProcSignalBarrierPending = true; + InterruptPending = true; +} + +static bool ProcessBarrierPlaceholder(void) { /* @@ -518,7 +614,12 @@ ProcessBarrierPlaceholder(void) * appropriately descriptive. Get rid of this function and instead have * ProcessBarrierSomethingElse. Most likely, that function should live in * the file pertaining to that subsystem, rather than here. + * + * The return value should be 'true' if the barrier was successfully + * absorbed and 'false' if not. Note that returning 'false' can lead to + * very frequent retries, so try hard to make that an uncommon case. */ + return true; } /* diff --git a/src/backend/storage/ipc/shm_mq.c b/src/backend/storage/ipc/shm_mq.c index ac9d23a3403ad..8a46962f939a3 100644 --- a/src/backend/storage/ipc/shm_mq.c +++ b/src/backend/storage/ipc/shm_mq.c @@ -8,7 +8,7 @@ * and only the receiver may receive. This is intended to allow a user * backend to communicate with worker backends that it has registered. * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * src/backend/storage/ipc/shm_mq.c diff --git a/src/backend/storage/ipc/shm_toc.c b/src/backend/storage/ipc/shm_toc.c index bdd72c48fb87d..863b98bf0545d 100644 --- a/src/backend/storage/ipc/shm_toc.c +++ b/src/backend/storage/ipc/shm_toc.c @@ -3,7 +3,7 @@ * shm_toc.c * shared memory segment table of contents * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * src/backend/storage/ipc/shm_toc.c @@ -265,8 +265,8 @@ shm_toc_estimate(shm_toc_estimator *e) Size sz; sz = offsetof(shm_toc, toc_entry); - sz += add_size(sz, mul_size(e->number_of_keys, sizeof(shm_toc_entry))); - sz += add_size(sz, e->space_for_chunks); + sz = add_size(sz, mul_size(e->number_of_keys, sizeof(shm_toc_entry))); + sz = add_size(sz, e->space_for_chunks); return BUFFERALIGN(sz); } diff --git a/src/backend/storage/ipc/shmem.c b/src/backend/storage/ipc/shmem.c index 4ef8b18656d0c..4425e99f1725b 100644 --- a/src/backend/storage/ipc/shmem.c +++ b/src/backend/storage/ipc/shmem.c @@ -3,7 +3,7 @@ * shmem.c * create shared memory and initialize shared memory data structures. * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * diff --git a/src/backend/storage/ipc/shmqueue.c b/src/backend/storage/ipc/shmqueue.c index d52b28f0fa76f..dc3238cecfab6 100644 --- a/src/backend/storage/ipc/shmqueue.c +++ b/src/backend/storage/ipc/shmqueue.c @@ -3,7 +3,7 @@ * shmqueue.c * shared memory linked lists * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * diff --git a/src/backend/storage/ipc/signalfuncs.c b/src/backend/storage/ipc/signalfuncs.c index d822e82cb98d1..69fe23a2563eb 100644 --- a/src/backend/storage/ipc/signalfuncs.c +++ b/src/backend/storage/ipc/signalfuncs.c @@ -3,7 +3,7 @@ * signalfuncs.c * Functions for signaling backends * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * diff --git a/src/backend/storage/ipc/sinval.c b/src/backend/storage/ipc/sinval.c index b5640e46be01e..f585d63e5cdbc 100644 --- a/src/backend/storage/ipc/sinval.c +++ b/src/backend/storage/ipc/sinval.c @@ -3,7 +3,7 @@ * sinval.c * POSTGRES shared cache invalidation communication code. * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * diff --git a/src/backend/storage/ipc/sinvaladt.c b/src/backend/storage/ipc/sinvaladt.c index a9477ccb4a304..946bd8e3cb5ca 100644 --- a/src/backend/storage/ipc/sinvaladt.c +++ b/src/backend/storage/ipc/sinvaladt.c @@ -3,7 +3,7 @@ * sinvaladt.c * POSTGRES shared cache invalidation data manager. * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * diff --git a/src/backend/storage/ipc/standby.c b/src/backend/storage/ipc/standby.c index 92d9027776c65..39a30c00f7a36 100644 --- a/src/backend/storage/ipc/standby.c +++ b/src/backend/storage/ipc/standby.c @@ -7,7 +7,7 @@ * AccessExclusiveLocks and starting snapshots for Hot Standby mode. * Plus conflict recovery processing. * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION @@ -39,9 +39,14 @@ int vacuum_defer_cleanup_age; int max_standby_archive_delay = 30 * 1000; int max_standby_streaming_delay = 30 * 1000; +bool log_recovery_conflict_waits = false; static HTAB *RecoveryLockLists; +/* Flags set by timeout handlers */ +static volatile sig_atomic_t got_standby_deadlock_timeout = false; +static volatile sig_atomic_t got_standby_lock_timeout = false; + static void ResolveRecoveryConflictWithVirtualXIDs(VirtualTransactionId *waitlist, ProcSignalReason reason, uint32 wait_event_info, @@ -49,6 +54,7 @@ static void ResolveRecoveryConflictWithVirtualXIDs(VirtualTransactionId *waitlis static void SendRecoveryConflictWithBufferPin(ProcSignalReason reason); static XLogRecPtr LogCurrentRunningXacts(RunningTransactions CurrRunningXacts); static void LogAccessExclusiveLocks(int nlocks, xl_standby_lock *locks); +static const char *get_recovery_conflict_desc(ProcSignalReason reason); /* * Keep track of all the locks owned by a given transaction. @@ -214,15 +220,101 @@ WaitExceedsMaxStandbyDelay(uint32 wait_event_info) return false; } +/* + * Log the recovery conflict. + * + * wait_start is the timestamp when the caller started to wait. + * now is the timestamp when this function has been called. + * wait_list is the list of virtual transaction ids assigned to + * conflicting processes. still_waiting indicates whether + * the startup process is still waiting for the recovery conflict + * to be resolved or not. + */ +void +LogRecoveryConflict(ProcSignalReason reason, TimestampTz wait_start, + TimestampTz now, VirtualTransactionId *wait_list, + bool still_waiting) +{ + long secs; + int usecs; + long msecs; + StringInfoData buf; + int nprocs = 0; + + /* + * There must be no conflicting processes when the recovery conflict has + * already been resolved. + */ + Assert(still_waiting || wait_list == NULL); + + TimestampDifference(wait_start, now, &secs, &usecs); + msecs = secs * 1000 + usecs / 1000; + usecs = usecs % 1000; + + if (wait_list) + { + VirtualTransactionId *vxids; + + /* Construct a string of list of the conflicting processes */ + vxids = wait_list; + while (VirtualTransactionIdIsValid(*vxids)) + { + PGPROC *proc = BackendIdGetProc(vxids->backendId); + + /* proc can be NULL if the target backend is not active */ + if (proc) + { + if (nprocs == 0) + { + initStringInfo(&buf); + appendStringInfo(&buf, "%d", proc->pid); + } + else + appendStringInfo(&buf, ", %d", proc->pid); + + nprocs++; + } + + vxids++; + } + } + + /* + * If wait_list is specified, report the list of PIDs of active + * conflicting backends in a detail message. Note that if all the backends + * in the list are not active, no detail message is logged. + */ + if (still_waiting) + { + ereport(LOG, + errmsg("recovery still waiting after %ld.%03d ms: %s", + msecs, usecs, _(get_recovery_conflict_desc(reason))), + nprocs > 0 ? errdetail_log_plural("Conflicting process: %s.", + "Conflicting processes: %s.", + nprocs, buf.data) : 0); + } + else + { + ereport(LOG, + errmsg("recovery finished waiting after %ld.%03d ms: %s", + msecs, usecs, _(get_recovery_conflict_desc(reason)))); + } + + if (nprocs > 0) + pfree(buf.data); +} + /* * This is the main executioner for any query backend that conflicts with * recovery processing. Judgement has already been passed on it within * a specific rmgr. Here we just issue the orders to the procs. The procs * then throw the required error as instructed. * - * If report_waiting is true, "waiting" is reported in PS display if necessary. - * If the caller has already reported that, report_waiting should be false. - * Otherwise, "waiting" is reported twice unexpectedly. + * If report_waiting is true, "waiting" is reported in PS display and the + * wait for recovery conflict is reported in the log, if necessary. If + * the caller is responsible for reporting them, report_waiting should be + * false. Otherwise, both the caller and this function report the same + * thing unexpectedly. */ static void ResolveRecoveryConflictWithVirtualXIDs(VirtualTransactionId *waitlist, @@ -230,15 +322,16 @@ ResolveRecoveryConflictWithVirtualXIDs(VirtualTransactionId *waitlist, bool report_waiting) { TimestampTz waitStart = 0; - char *new_status; + char *new_status = NULL; + bool logged_recovery_conflict = false; /* Fast exit, to avoid a kernel call if there's no work to be done. */ if (!VirtualTransactionIdIsValid(*waitlist)) return; - if (report_waiting) + /* Set the wait start timestamp for reporting */ + if (report_waiting && (log_recovery_conflict_waits || update_process_title)) waitStart = GetCurrentTimestamp(); - new_status = NULL; /* we haven't changed the ps display */ while (VirtualTransactionIdIsValid(*waitlist)) { @@ -248,25 +341,6 @@ ResolveRecoveryConflictWithVirtualXIDs(VirtualTransactionId *waitlist, /* wait until the virtual xid is gone */ while (!VirtualXactLock(*waitlist, false)) { - /* - * Report via ps if we have been waiting for more than 500 msec - * (should that be configurable?) - */ - if (update_process_title && new_status == NULL && report_waiting && - TimestampDifferenceExceeds(waitStart, GetCurrentTimestamp(), - 500)) - { - const char *old_status; - int len; - - old_status = get_ps_display(&len); - new_status = (char *) palloc(len + 8 + 1); - memcpy(new_status, old_status, len); - strcpy(new_status + len, " waiting"); - set_ps_display(new_status); - new_status[len] = '\0'; /* truncate off " waiting" */ - } - /* Is it time to kill it? */ if (WaitExceedsMaxStandbyDelay(wait_event_info)) { @@ -285,12 +359,63 @@ ResolveRecoveryConflictWithVirtualXIDs(VirtualTransactionId *waitlist, if (pid != 0) pg_usleep(5000L); } + + if (waitStart != 0 && (!logged_recovery_conflict || new_status == NULL)) + { + TimestampTz now = 0; + bool maybe_log_conflict; + bool maybe_update_title; + + maybe_log_conflict = (log_recovery_conflict_waits && !logged_recovery_conflict); + maybe_update_title = (update_process_title && new_status == NULL); + + /* Get the current timestamp if not report yet */ + if (maybe_log_conflict || maybe_update_title) + now = GetCurrentTimestamp(); + + /* + * Report via ps if we have been waiting for more than 500 + * msec (should that be configurable?) + */ + if (maybe_update_title && + TimestampDifferenceExceeds(waitStart, now, 500)) + { + const char *old_status; + int len; + + old_status = get_ps_display(&len); + new_status = (char *) palloc(len + 8 + 1); + memcpy(new_status, old_status, len); + strcpy(new_status + len, " waiting"); + set_ps_display(new_status); + new_status[len] = '\0'; /* truncate off " waiting" */ + } + + /* + * Emit the log message if the startup process is waiting + * longer than deadlock_timeout for recovery conflict. + */ + if (maybe_log_conflict && + TimestampDifferenceExceeds(waitStart, now, DeadlockTimeout)) + { + LogRecoveryConflict(reason, waitStart, now, waitlist, true); + logged_recovery_conflict = true; + } + } } /* The virtual transaction is gone now, wait for the next one */ waitlist++; } + /* + * Emit the log message if recovery conflict was resolved but the startup + * process waited longer than deadlock_timeout for it. + */ + if (logged_recovery_conflict) + LogRecoveryConflict(reason, waitStart, GetCurrentTimestamp(), + NULL, false); + /* Reset ps display if we changed it */ if (new_status) { @@ -305,13 +430,15 @@ ResolveRecoveryConflictWithSnapshot(TransactionId latestRemovedXid, RelFileNode VirtualTransactionId *backends; /* - * If we get passed InvalidTransactionId then we are a little surprised, - * but it is theoretically possible in normal running. It also happens - * when replaying already applied WAL records after a standby crash or - * restart, or when replaying an XLOG_HEAP2_VISIBLE record that marks as - * frozen a page which was already all-visible. If latestRemovedXid is - * invalid then there is no conflict. That rule applies across all record - * types that suffer from this conflict. + * If we get passed InvalidTransactionId then we do nothing (no conflict). + * + * This can happen when replaying already-applied WAL records after a + * standby crash or restart, or when replaying an XLOG_HEAP2_VISIBLE + * record that marks as frozen a page which was already all-visible. It's + * also quite common with records generated during index deletion + * (original execution of the deletion can reason that a recovery conflict + * which is sufficient for the deletion operation must take place before + * replay of the deletion record itself). */ if (!TransactionIdIsValid(latestRemovedXid)) return; @@ -395,11 +522,22 @@ ResolveRecoveryConflictWithDatabase(Oid dbid) * lock. As we are already queued to be granted the lock, no new lock * requests conflicting with ours will be granted in the meantime. * - * Deadlocks involving the Startup process and an ordinary backend process - * will be detected by the deadlock detector within the ordinary backend. + * We also must check for deadlocks involving the Startup process and + * hot-standby backend processes. If deadlock_timeout is reached in + * this function, all the backends holding the conflicting locks are + * requested to check themselves for deadlocks. + * + * logging_conflict should be true if the recovery conflict has not been + * logged yet even though logging is enabled. After deadlock_timeout is + * reached and the request for deadlock check is sent, we wait again to + * be signaled by the release of the lock if logging_conflict is false. + * Otherwise we return without waiting again so that the caller can report + * the recovery conflict. In this case, then, this function is called again + * with logging_conflict=false (because the recovery conflict has already + * been logged) and we will wait again for the lock to be released. */ void -ResolveRecoveryConflictWithLock(LOCKTAG locktag) +ResolveRecoveryConflictWithLock(LOCKTAG locktag, bool logging_conflict) { TimestampTz ltime; @@ -407,7 +545,7 @@ ResolveRecoveryConflictWithLock(LOCKTAG locktag) ltime = GetStandbyLimitTime(); - if (GetCurrentTimestamp() >= ltime) + if (GetCurrentTimestamp() >= ltime && ltime != 0) { /* * We're already behind, so clear a path as quickly as possible. @@ -429,19 +567,85 @@ ResolveRecoveryConflictWithLock(LOCKTAG locktag) else { /* - * Wait (or wait again) until ltime + * Wait (or wait again) until ltime, and check for deadlocks as well + * if we will be waiting longer than deadlock_timeout */ - EnableTimeoutParams timeouts[1]; + EnableTimeoutParams timeouts[2]; + int cnt = 0; + + if (ltime != 0) + { + got_standby_lock_timeout = false; + timeouts[cnt].id = STANDBY_LOCK_TIMEOUT; + timeouts[cnt].type = TMPARAM_AT; + timeouts[cnt].fin_time = ltime; + cnt++; + } + + got_standby_deadlock_timeout = false; + timeouts[cnt].id = STANDBY_DEADLOCK_TIMEOUT; + timeouts[cnt].type = TMPARAM_AFTER; + timeouts[cnt].delay_ms = DeadlockTimeout; + cnt++; - timeouts[0].id = STANDBY_LOCK_TIMEOUT; - timeouts[0].type = TMPARAM_AT; - timeouts[0].fin_time = ltime; - enable_timeouts(timeouts, 1); + enable_timeouts(timeouts, cnt); } /* Wait to be signaled by the release of the Relation Lock */ ProcWaitForSignal(PG_WAIT_LOCK | locktag.locktag_type); + /* + * Exit if ltime is reached. Then all the backends holding conflicting + * locks will be canceled in the next ResolveRecoveryConflictWithLock() + * call. + */ + if (got_standby_lock_timeout) + goto cleanup; + + if (got_standby_deadlock_timeout) + { + VirtualTransactionId *backends; + + backends = GetLockConflicts(&locktag, AccessExclusiveLock, NULL); + + /* Quick exit if there's no work to be done */ + if (!VirtualTransactionIdIsValid(*backends)) + goto cleanup; + + /* + * Send signals to all the backends holding the conflicting locks, to + * ask them to check themselves for deadlocks. + */ + while (VirtualTransactionIdIsValid(*backends)) + { + SignalVirtualTransaction(*backends, + PROCSIG_RECOVERY_CONFLICT_STARTUP_DEADLOCK, + false); + backends++; + } + + /* + * Exit if the recovery conflict has not been logged yet even though + * logging is enabled, so that the caller can log that. Then + * RecoveryConflictWithLock() is called again and we will wait again + * for the lock to be released. + */ + if (logging_conflict) + goto cleanup; + + /* + * Wait again here to be signaled by the release of the Relation Lock, + * to prevent the subsequent RecoveryConflictWithLock() from causing + * deadlock_timeout and sending a request for deadlocks check again. + * Otherwise the request continues to be sent every deadlock_timeout + * until the relation locks are released or ltime is reached. + */ + got_standby_deadlock_timeout = false; + ProcWaitForSignal(PG_WAIT_LOCK | locktag.locktag_type); + } + +cleanup: + /* * Clear any timeout requests established above. We assume here that the * Startup process doesn't have any other outstanding timeouts than those @@ -449,6 +653,8 @@ ResolveRecoveryConflictWithLock(LOCKTAG locktag) * timeouts individually, but that'd be slower. */ disable_all_timeouts(false); + got_standby_lock_timeout = false; + got_standby_deadlock_timeout = false; } /* @@ -487,15 +693,7 @@ ResolveRecoveryConflictWithBufferPin(void) ltime = GetStandbyLimitTime(); - if (ltime == 0) - { - /* - * We're willing to wait forever for conflicts, so set timeout for - * deadlock check only - */ - enable_timeout_after(STANDBY_DEADLOCK_TIMEOUT, DeadlockTimeout); - } - else if (GetCurrentTimestamp() >= ltime) + if (GetCurrentTimestamp() >= ltime && ltime != 0) { /* * We're already behind, so clear a path as quickly as possible. @@ -509,14 +707,23 @@ ResolveRecoveryConflictWithBufferPin(void) * waiting longer than deadlock_timeout */ EnableTimeoutParams timeouts[2]; + int cnt = 0; + + if (ltime != 0) + { + timeouts[cnt].id = STANDBY_TIMEOUT; + timeouts[cnt].type = TMPARAM_AT; + timeouts[cnt].fin_time = ltime; + cnt++; + } + + got_standby_deadlock_timeout = false; + timeouts[cnt].id = STANDBY_DEADLOCK_TIMEOUT; + timeouts[cnt].type = TMPARAM_AFTER; + timeouts[cnt].delay_ms = DeadlockTimeout; + cnt++; - timeouts[0].id = STANDBY_TIMEOUT; - timeouts[0].type = TMPARAM_AT; - timeouts[0].fin_time = ltime; - timeouts[1].id = STANDBY_DEADLOCK_TIMEOUT; - timeouts[1].type = TMPARAM_AFTER; - timeouts[1].delay_ms = DeadlockTimeout; - enable_timeouts(timeouts, 2); + enable_timeouts(timeouts, cnt); } /* @@ -529,6 +736,25 @@ ResolveRecoveryConflictWithBufferPin(void) */ ProcWaitForSignal(PG_WAIT_BUFFER_PIN); + if (got_standby_deadlock_timeout) + { + /* + * Send out a request for hot-standby backends to check themselves for + * deadlocks. + * + * XXX The subsequent ResolveRecoveryConflictWithBufferPin() will wait + * to be signaled by UnpinBuffer() again and send a request for + * deadlocks check if deadlock_timeout happens. This causes the + * request to continue to be sent every deadlock_timeout until the + * buffer is unpinned or ltime is reached. This would increase the + * workload in the startup process and backends. In practice it may + * not be so harmful because the period that the buffer is kept pinned + * is basically no so long. But we should fix this? + */ + SendRecoveryConflictWithBufferPin( + PROCSIG_RECOVERY_CONFLICT_STARTUP_DEADLOCK); + } + /* * Clear any timeout requests established above. We assume here that the * Startup process doesn't have any other timeouts than what this function @@ -536,6 +762,7 @@ ResolveRecoveryConflictWithBufferPin(void) * individually, but that'd be slower. */ disable_all_timeouts(false); + got_standby_deadlock_timeout = false; } static void @@ -595,13 +822,12 @@ CheckRecoveryConflictDeadlock(void) /* * StandbyDeadLockHandler() will be called if STANDBY_DEADLOCK_TIMEOUT - * occurs before STANDBY_TIMEOUT. Send out a request for hot-standby - * backends to check themselves for deadlocks. + * occurs before STANDBY_TIMEOUT. */ void StandbyDeadLockHandler(void) { - SendRecoveryConflictWithBufferPin(PROCSIG_RECOVERY_CONFLICT_STARTUP_DEADLOCK); + got_standby_deadlock_timeout = true; } /* @@ -620,11 +846,11 @@ StandbyTimeoutHandler(void) /* * StandbyLockTimeoutHandler() will be called if STANDBY_LOCK_TIMEOUT is exceeded. - * This doesn't need to do anything, simply waking up is enough. */ void StandbyLockTimeoutHandler(void) { + got_standby_lock_timeout = true; } /* @@ -1122,3 +1348,36 @@ LogStandbyInvalidations(int nmsgs, SharedInvalidationMessage *msgs, nmsgs * sizeof(SharedInvalidationMessage)); XLogInsert(RM_STANDBY_ID, XLOG_INVALIDATIONS); } + +/* Return the description of recovery conflict */ +static const char * +get_recovery_conflict_desc(ProcSignalReason reason) +{ + const char *reasonDesc = gettext_noop("unknown reason"); + + switch (reason) + { + case PROCSIG_RECOVERY_CONFLICT_BUFFERPIN: + reasonDesc = gettext_noop("recovery conflict on buffer pin"); + break; + case PROCSIG_RECOVERY_CONFLICT_LOCK: + reasonDesc = gettext_noop("recovery conflict on lock"); + break; + case PROCSIG_RECOVERY_CONFLICT_TABLESPACE: + reasonDesc = gettext_noop("recovery conflict on tablespace"); + break; + case PROCSIG_RECOVERY_CONFLICT_SNAPSHOT: + reasonDesc = gettext_noop("recovery conflict on snapshot"); + break; + case PROCSIG_RECOVERY_CONFLICT_STARTUP_DEADLOCK: + reasonDesc = gettext_noop("recovery conflict on buffer deadlock"); + break; + case PROCSIG_RECOVERY_CONFLICT_DATABASE: + reasonDesc = gettext_noop("recovery conflict on database"); + break; + default: + break; + } + + return reasonDesc; +} diff --git a/src/backend/storage/large_object/inv_api.c b/src/backend/storage/large_object/inv_api.c index 20130e47b76c1..bee234bffc968 100644 --- a/src/backend/storage/large_object/inv_api.c +++ b/src/backend/storage/large_object/inv_api.c @@ -19,7 +19,7 @@ * memory context given to inv_open (for LargeObjectDesc structs). * * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * diff --git a/src/backend/storage/lmgr/condition_variable.c b/src/backend/storage/lmgr/condition_variable.c index 2ec00397b491b..0a61ff0031fef 100644 --- a/src/backend/storage/lmgr/condition_variable.c +++ b/src/backend/storage/lmgr/condition_variable.c @@ -8,7 +8,7 @@ * interrupted, unlike LWLock waits. Condition variables are safe * to use within dynamic shared memory segments. * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * src/backend/storage/lmgr/condition_variable.c diff --git a/src/backend/storage/lmgr/deadlock.c b/src/backend/storage/lmgr/deadlock.c index f7ed6968ca938..67733c0d1a76c 100644 --- a/src/backend/storage/lmgr/deadlock.c +++ b/src/backend/storage/lmgr/deadlock.c @@ -7,7 +7,7 @@ * detection and resolution algorithms. * * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * diff --git a/src/backend/storage/lmgr/generate-lwlocknames.pl b/src/backend/storage/lmgr/generate-lwlocknames.pl index 39cb97f5c3d95..8a44946594d42 100644 --- a/src/backend/storage/lmgr/generate-lwlocknames.pl +++ b/src/backend/storage/lmgr/generate-lwlocknames.pl @@ -1,7 +1,7 @@ #!/usr/bin/perl # # Generate lwlocknames.h and lwlocknames.c from lwlocknames.txt -# Copyright (c) 2000-2020, PostgreSQL Global Development Group +# Copyright (c) 2000-2021, PostgreSQL Global Development Group use strict; use warnings; diff --git a/src/backend/storage/lmgr/lmgr.c b/src/backend/storage/lmgr/lmgr.c index 7409de9405925..5736d03083f1a 100644 --- a/src/backend/storage/lmgr/lmgr.c +++ b/src/backend/storage/lmgr/lmgr.c @@ -3,7 +3,7 @@ * lmgr.c * POSTGRES lock manager code * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * @@ -904,8 +904,7 @@ WaitForLockersMultiple(List *locktags, LOCKMODE lockmode, bool progress) /* * Note: GetLockConflicts() never reports our own xid, hence we need not - * check for that. Also, prepared xacts are not reported, which is fine - * since they certainly aren't going to do anything anymore. + * check for that. Also, prepared xacts are reported and awaited. */ /* Finally wait for each such transaction to complete */ diff --git a/src/backend/storage/lmgr/lock.c b/src/backend/storage/lmgr/lock.c index 53472dd21ec8e..79c1cf9b8b4e3 100644 --- a/src/backend/storage/lmgr/lock.c +++ b/src/backend/storage/lmgr/lock.c @@ -3,7 +3,7 @@ * lock.c * POSTGRES primary lock mechanism * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * @@ -2903,9 +2903,7 @@ FastPathGetRelationLockEntry(LOCALLOCK *locallock) * so use of this function has to be thought about carefully. * * Note we never include the current xact's vxid in the result array, - * since an xact never blocks itself. Also, prepared transactions are - * ignored, which is a bit more debatable but is appropriate for current - * uses of the result. + * since an xact never blocks itself. */ VirtualTransactionId * GetLockConflicts(const LOCKTAG *locktag, LOCKMODE lockmode, int *countp) @@ -2930,19 +2928,21 @@ GetLockConflicts(const LOCKTAG *locktag, LOCKMODE lockmode, int *countp) /* * Allocate memory to store results, and fill with InvalidVXID. We only - * need enough space for MaxBackends + a terminator, since prepared xacts - * don't count. InHotStandby allocate once in TopMemoryContext. + * need enough space for MaxBackends + max_prepared_xacts + a terminator. + * InHotStandby allocate once in TopMemoryContext. */ if (InHotStandby) { if (vxids == NULL) vxids = (VirtualTransactionId *) MemoryContextAlloc(TopMemoryContext, - sizeof(VirtualTransactionId) * (MaxBackends + 1)); + sizeof(VirtualTransactionId) * + (MaxBackends + max_prepared_xacts + 1)); } else vxids = (VirtualTransactionId *) - palloc0(sizeof(VirtualTransactionId) * (MaxBackends + 1)); + palloc0(sizeof(VirtualTransactionId) * + (MaxBackends + max_prepared_xacts + 1)); /* Compute hash code and partition lock, and look up conflicting modes. */ hashcode = LockTagHashCode(locktag); @@ -3017,13 +3017,9 @@ GetLockConflicts(const LOCKTAG *locktag, LOCKMODE lockmode, int *countp) /* Conflict! */ GET_VXID_FROM_PGPROC(vxid, *proc); - /* - * If we see an invalid VXID, then either the xact has already - * committed (or aborted), or it's a prepared xact. In either - * case we may ignore it. - */ if (VirtualTransactionIdIsValid(vxid)) vxids[count++] = vxid; + /* else, xact already committed or aborted */ /* No need to examine remaining slots. */ break; @@ -3082,11 +3078,6 @@ GetLockConflicts(const LOCKTAG *locktag, LOCKMODE lockmode, int *countp) GET_VXID_FROM_PGPROC(vxid, *proc); - /* - * If we see an invalid VXID, then either the xact has already - * committed (or aborted), or it's a prepared xact. In either - * case we may ignore it. - */ if (VirtualTransactionIdIsValid(vxid)) { int i; @@ -3098,6 +3089,7 @@ GetLockConflicts(const LOCKTAG *locktag, LOCKMODE lockmode, int *countp) if (i >= fast_count) vxids[count++] = vxid; } + /* else, xact already committed or aborted */ } } @@ -3107,7 +3099,7 @@ GetLockConflicts(const LOCKTAG *locktag, LOCKMODE lockmode, int *countp) LWLockRelease(partitionLock); - if (count > MaxBackends) /* should never happen */ + if (count > MaxBackends + max_prepared_xacts) /* should never happen */ elog(PANIC, "too many conflicting locks found"); vxids[count].backendId = InvalidBackendId; @@ -4464,6 +4456,21 @@ VirtualXactLock(VirtualTransactionId vxid, bool wait) Assert(VirtualTransactionIdIsValid(vxid)); + if (VirtualTransactionIdIsPreparedXact(vxid)) + { + LockAcquireResult lar; + + /* + * Prepared transactions don't hold vxid locks. The + * LocalTransactionId is always a normal, locked XID. + */ + SET_LOCKTAG_TRANSACTION(tag, vxid.localTransactionId); + lar = LockAcquire(&tag, ShareLock, false, !wait); + if (lar != LOCKACQUIRE_NOT_AVAIL) + LockRelease(&tag, ShareLock, false); + return lar != LOCKACQUIRE_NOT_AVAIL; + } + SET_LOCKTAG_VIRTUALTRANSACTION(tag, vxid); /* diff --git a/src/backend/storage/lmgr/lwlock.c b/src/backend/storage/lmgr/lwlock.c index 26bcce97350e4..8cb6a6f042ad8 100644 --- a/src/backend/storage/lmgr/lwlock.c +++ b/src/backend/storage/lmgr/lwlock.c @@ -20,7 +20,7 @@ * appropriate value for a free lock. The meaning of the variable is up to * the caller, the lightweight lock code just assigns and compares it. * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION @@ -1725,8 +1725,6 @@ LWLockWaitForVar(LWLock *lock, uint64 *valptr, uint64 oldval, uint64 *newval) /* Now loop back and check the status of the lock again. */ } - TRACE_POSTGRESQL_LWLOCK_ACQUIRE(T_NAME(lock), LW_EXCLUSIVE); - /* * Fix the process wait semaphore's count for any absorbed wakeups. */ diff --git a/src/backend/storage/lmgr/lwlocknames.txt b/src/backend/storage/lmgr/lwlocknames.txt index 774292fd94277..6c7cf6c295661 100644 --- a/src/backend/storage/lmgr/lwlocknames.txt +++ b/src/backend/storage/lmgr/lwlocknames.txt @@ -15,7 +15,7 @@ SInvalWriteLock 6 WALBufMappingLock 7 WALWriteLock 8 ControlFileLock 9 -CheckpointLock 10 +# 10 was CheckpointLock XactSLRULock 11 SubtransSLRULock 12 MultiXactGenLock 13 diff --git a/src/backend/storage/lmgr/predicate.c b/src/backend/storage/lmgr/predicate.c index e42e131543a68..074df5b38c59c 100644 --- a/src/backend/storage/lmgr/predicate.c +++ b/src/backend/storage/lmgr/predicate.c @@ -135,7 +135,7 @@ * - Protects both PredXact and SerializableXidHash. * * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * @@ -438,7 +438,7 @@ static void SetPossibleUnsafeConflict(SERIALIZABLEXACT *roXact, SERIALIZABLEXACT static void ReleaseRWConflict(RWConflict conflict); static void FlagSxactUnsafe(SERIALIZABLEXACT *sxact); -static bool SerialPagePrecedesLogically(int p, int q); +static bool SerialPagePrecedesLogically(int page1, int page2); static void SerialInit(void); static void SerialAdd(TransactionId xid, SerCommitSeqNo minConflictCommitSeqNo); static SerCommitSeqNo SerialGetMinConflictCommitSeqNo(TransactionId xid); @@ -784,28 +784,80 @@ FlagSxactUnsafe(SERIALIZABLEXACT *sxact) /*------------------------------------------------------------------------*/ /* - * We will work on the page range of 0..SERIAL_MAX_PAGE. - * Compares using wraparound logic, as is required by slru.c. + * Decide whether a Serial page number is "older" for truncation purposes. + * Analogous to CLOGPagePrecedes(). */ static bool -SerialPagePrecedesLogically(int p, int q) +SerialPagePrecedesLogically(int page1, int page2) { - int diff; + TransactionId xid1; + TransactionId xid2; + + xid1 = ((TransactionId) page1) * SERIAL_ENTRIESPERPAGE; + xid1 += FirstNormalTransactionId + 1; + xid2 = ((TransactionId) page2) * SERIAL_ENTRIESPERPAGE; + xid2 += FirstNormalTransactionId + 1; + + return (TransactionIdPrecedes(xid1, xid2) && + TransactionIdPrecedes(xid1, xid2 + SERIAL_ENTRIESPERPAGE - 1)); +} + +#ifdef USE_ASSERT_CHECKING +static void +SerialPagePrecedesLogicallyUnitTests(void) +{ + int per_page = SERIAL_ENTRIESPERPAGE, + offset = per_page / 2; + int newestPage, + oldestPage, + headPage, + targetPage; + TransactionId newestXact, + oldestXact; + + /* GetNewTransactionId() has assigned the last XID it can safely use. */ + newestPage = 2 * SLRU_PAGES_PER_SEGMENT - 1; /* nothing special */ + newestXact = newestPage * per_page + offset; + Assert(newestXact / per_page == newestPage); + oldestXact = newestXact + 1; + oldestXact -= 1U << 31; + oldestPage = oldestXact / per_page; /* - * We have to compare modulo (SERIAL_MAX_PAGE+1)/2. Both inputs should be - * in the range 0..SERIAL_MAX_PAGE. + * In this scenario, the SLRU headPage pertains to the last ~1000 XIDs + * assigned. oldestXact finishes, ~2B XIDs having elapsed since it + * started. Further transactions cause us to summarize oldestXact to + * tailPage. Function must return false so SerialAdd() doesn't zero + * tailPage (which may contain entries for other old, recently-finished + * XIDs) and half the SLRU. Reaching this requires burning ~2B XIDs in + * single-user mode, a negligible possibility. */ - Assert(p >= 0 && p <= SERIAL_MAX_PAGE); - Assert(q >= 0 && q <= SERIAL_MAX_PAGE); - - diff = p - q; - if (diff >= ((SERIAL_MAX_PAGE + 1) / 2)) - diff -= SERIAL_MAX_PAGE + 1; - else if (diff < -((int) (SERIAL_MAX_PAGE + 1) / 2)) - diff += SERIAL_MAX_PAGE + 1; - return diff < 0; + headPage = newestPage; + targetPage = oldestPage; + Assert(!SerialPagePrecedesLogically(headPage, targetPage)); + + /* + * In this scenario, the SLRU headPage pertains to oldestXact. We're + * summarizing an XID near newestXact. (Assume few other XIDs used + * SERIALIZABLE, hence the minimal headPage advancement. Assume + * oldestXact was long-running and only recently reached the SLRU.) + * Function must return true to make SerialAdd() create targetPage. + * + * Today's implementation mishandles this case, but it doesn't matter + * enough to fix. Verify that the defect affects just one page by + * asserting correct treatment of its prior page. Reaching this case + * requires burning ~2B XIDs in single-user mode, a negligible + * possibility. Moreover, if it does happen, the consequence would be + * mild, namely a new transaction failing in SimpleLruReadPage(). + */ + headPage = oldestPage; + targetPage = newestPage; + Assert(SerialPagePrecedesLogically(headPage, targetPage - 1)); +#if 0 + Assert(SerialPagePrecedesLogically(headPage, targetPage)); +#endif } +#endif /* * Initialize for the tracking of old serializable committed xids. @@ -822,6 +874,10 @@ SerialInit(void) SimpleLruInit(SerialSlruCtl, "Serial", NUM_SERIAL_BUFFERS, 0, SerialSLRULock, "pg_serial", LWTRANCHE_SERIAL_BUFFER, SYNC_HANDLER_NONE); +#ifdef USE_ASSERT_CHECKING + SerialPagePrecedesLogicallyUnitTests(); +#endif + SlruPagePrecedesUnitTests(SerialSlruCtl, SERIAL_ENTRIESPERPAGE); /* * Create or attach to the SerialControl structure. @@ -1030,7 +1086,7 @@ CheckPointPredicate(void) } else { - /* + /*---------- * The SLRU is no longer needed. Truncate to head before we set head * invalid. * @@ -1039,6 +1095,25 @@ CheckPointPredicate(void) * that we leave behind will appear to be new again. In that case it * won't be removed until XID horizon advances enough to make it * current again. + * + * XXX: This should happen in vac_truncate_clog(), not in checkpoints. + * Consider this scenario, starting from a system with no in-progress + * transactions and VACUUM FREEZE having maximized oldestXact: + * - Start a SERIALIZABLE transaction. + * - Start, finish, and summarize a SERIALIZABLE transaction, creating + * one SLRU page. + * - Consume XIDs to reach xidStopLimit. + * - Finish all transactions. Due to the long-running SERIALIZABLE + * transaction, earlier checkpoints did not touch headPage. The + * next checkpoint will change it, but that checkpoint happens after + * the end of the scenario. + * - VACUUM to advance XID limits. + * - Consume ~2M XIDs, crossing the former xidWrapLimit. + * - Start, finish, and summarize a SERIALIZABLE transaction. + * SerialAdd() declines to create the targetPage, because headPage + * is not regarded as in the past relative to that targetPage. The + * transaction instigating the summarize fails in + * SimpleLruReadPage(). */ tailPage = serialControl->headPage; serialControl->headPage = -1; diff --git a/src/backend/storage/lmgr/proc.c b/src/backend/storage/lmgr/proc.c index 7dc3911590e6e..c87ffc654919a 100644 --- a/src/backend/storage/lmgr/proc.c +++ b/src/backend/storage/lmgr/proc.c @@ -3,7 +3,7 @@ * proc.c * routines to manage per-process shared memory data structure * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * @@ -61,6 +61,7 @@ int DeadlockTimeout = 1000; int StatementTimeout = 0; int LockTimeout = 0; int IdleInTransactionSessionTimeout = 0; +int IdleSessionTimeout = 0; bool log_lock_waits = false; /* Pointer to this process's PGPROC struct, if any */ @@ -1063,8 +1064,10 @@ ProcSleep(LOCALLOCK *locallock, LockMethod lockMethodTable) LWLock *partitionLock = LockHashPartitionLock(hashcode); PROC_QUEUE *waitQueue = &(lock->waitProcs); LOCKMASK myHeldLocks = MyProc->heldLocks; + TimestampTz standbyWaitStart = 0; bool early_deadlock = false; bool allow_autovacuum_cancel = true; + bool logged_recovery_conflict = false; ProcWaitStatus myWaitStatus; PGPROC *proc; PGPROC *leader = MyProc->lockGroupLeader; @@ -1260,6 +1263,14 @@ ProcSleep(LOCALLOCK *locallock, LockMethod lockMethodTable) else enable_timeout_after(DEADLOCK_TIMEOUT, DeadlockTimeout); } + else if (log_recovery_conflict_waits) + { + /* + * Set the wait start timestamp if logging is enabled and in hot + * standby. + */ + standbyWaitStart = GetCurrentTimestamp(); + } /* * If somebody wakes us between LWLockRelease and WaitLatch, the latch @@ -1279,8 +1290,43 @@ ProcSleep(LOCALLOCK *locallock, LockMethod lockMethodTable) { if (InHotStandby) { - /* Set a timer and wait for that or for the Lock to be granted */ - ResolveRecoveryConflictWithLock(locallock->tag.lock); + bool maybe_log_conflict = + (standbyWaitStart != 0 && !logged_recovery_conflict); + + /* Set a timer and wait for that or for the lock to be granted */ + ResolveRecoveryConflictWithLock(locallock->tag.lock, + maybe_log_conflict); + + /* + * Emit the log message if the startup process is waiting longer + * than deadlock_timeout for recovery conflict on lock. + */ + if (maybe_log_conflict) + { + TimestampTz now = GetCurrentTimestamp(); + + if (TimestampDifferenceExceeds(standbyWaitStart, now, + DeadlockTimeout)) + { + VirtualTransactionId *vxids; + int cnt; + + vxids = GetLockConflicts(&locallock->tag.lock, + AccessExclusiveLock, &cnt); + + /* + * Log the recovery conflict and the list of PIDs of + * backends holding the conflicting lock. Note that we do + * logging even if there are no such backends right now + * because the startup process here has already waited + * longer than deadlock_timeout. + */ + LogRecoveryConflict(PROCSIG_RECOVERY_CONFLICT_LOCK, + standbyWaitStart, now, + cnt > 0 ? vxids : NULL, true); + logged_recovery_conflict = true; + } + } } else { @@ -1562,6 +1608,15 @@ ProcSleep(LOCALLOCK *locallock, LockMethod lockMethodTable) disable_timeout(DEADLOCK_TIMEOUT, false); } + /* + * Emit the log message if recovery conflict on lock was resolved but the + * startup process waited longer than deadlock_timeout for it. + */ + if (InHotStandby && logged_recovery_conflict) + LogRecoveryConflict(PROCSIG_RECOVERY_CONFLICT_LOCK, + standbyWaitStart, GetCurrentTimestamp(), + NULL, false); + /* * Re-acquire the lock table's partition lock. We have to do this to hold * off cancel/die interrupts before we can mess with lockAwaited (else we @@ -1793,6 +1848,9 @@ CheckDeadLockAlert(void) * Have to set the latch again, even if handle_sig_alarm already did. Back * then got_deadlock_timeout wasn't yet set... It's unlikely that this * ever would be a problem, but setting a set latch again is cheap. + * + * Note that, when this function runs inside procsignal_sigusr1_handler(), + * the handler function sets the latch again after the latch is set here. */ SetLatch(MyLatch); errno = save_errno; diff --git a/src/backend/storage/lmgr/s_lock.c b/src/backend/storage/lmgr/s_lock.c index 7fac0703419d0..2dc2d67151072 100644 --- a/src/backend/storage/lmgr/s_lock.c +++ b/src/backend/storage/lmgr/s_lock.c @@ -36,7 +36,7 @@ * the probability of unintended failure) than to fix the total time * spent. * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * diff --git a/src/backend/storage/lmgr/spin.c b/src/backend/storage/lmgr/spin.c index 9f7eae9339223..6fe0c6532c621 100644 --- a/src/backend/storage/lmgr/spin.c +++ b/src/backend/storage/lmgr/spin.c @@ -11,7 +11,7 @@ * is too slow to be very useful :-( * * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * diff --git a/src/backend/storage/page/bufpage.c b/src/backend/storage/page/bufpage.c index ddf18079e2fbf..9ac556b4ae0a7 100644 --- a/src/backend/storage/page/bufpage.c +++ b/src/backend/storage/page/bufpage.c @@ -3,7 +3,7 @@ * bufpage.c * POSTGRES standard buffer page code. * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * diff --git a/src/backend/storage/page/checksum.c b/src/backend/storage/page/checksum.c index e010691c9f2ae..6462ddd812619 100644 --- a/src/backend/storage/page/checksum.c +++ b/src/backend/storage/page/checksum.c @@ -3,7 +3,7 @@ * checksum.c * Checksum implementation for data pages. * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION diff --git a/src/backend/storage/page/itemptr.c b/src/backend/storage/page/itemptr.c index e7806cc60c978..55759c383b616 100644 --- a/src/backend/storage/page/itemptr.c +++ b/src/backend/storage/page/itemptr.c @@ -3,7 +3,7 @@ * itemptr.c * POSTGRES disk item pointer code. * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * diff --git a/src/backend/storage/smgr/md.c b/src/backend/storage/smgr/md.c index 9889ad6ad882f..0643d714fb1c5 100644 --- a/src/backend/storage/smgr/md.c +++ b/src/backend/storage/smgr/md.c @@ -10,7 +10,7 @@ * It doesn't matter whether the bits are on spinning rust or some other * storage technology. * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * diff --git a/src/backend/storage/smgr/smgr.c b/src/backend/storage/smgr/smgr.c index 072bdd118fd5d..4dc24649df982 100644 --- a/src/backend/storage/smgr/smgr.c +++ b/src/backend/storage/smgr/smgr.c @@ -6,7 +6,7 @@ * All file system operations in POSTGRES dispatch through these * routines. * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * @@ -390,6 +390,12 @@ smgrdounlinkall(SMgrRelation *rels, int nrels, bool isRedo) if (nrels == 0) return; + /* + * Get rid of any remaining buffers for the relations. bufmgr will just + * drop them without bothering to write the contents. + */ + DropRelFileNodesAllBuffers(rels, nrels); + /* * create an array which contains all relations to be dropped, and close * each relation's forks at the smgr level while at it @@ -407,12 +413,6 @@ smgrdounlinkall(SMgrRelation *rels, int nrels, bool isRedo) smgrsw[which].smgr_close(rels[i], forknum); } - /* - * Get rid of any remaining buffers for the relations. bufmgr will just - * drop them without bothering to write the contents. - */ - DropRelFileNodesAllBuffers(rnodes, nrels); - /* * It'd be nice to tell the stats collector to forget them immediately, * too. But we can't because we don't know the OIDs. @@ -549,6 +549,28 @@ smgrnblocks(SMgrRelation reln, ForkNumber forknum) { BlockNumber result; + /* Check and return if we get the cached value for the number of blocks. */ + result = smgrnblocks_cached(reln, forknum); + if (result != InvalidBlockNumber) + return result; + + result = smgrsw[reln->smgr_which].smgr_nblocks(reln, forknum); + + reln->smgr_cached_nblocks[forknum] = result; + + return result; +} + +/* + * smgrnblocks_cached() -- Get the cached number of blocks in the supplied + * relation. + * + * Returns an InvalidBlockNumber when not in recovery and when the relation + * fork size is not cached. + */ +BlockNumber +smgrnblocks_cached(SMgrRelation reln, ForkNumber forknum) +{ /* * For now, we only use cached values in recovery due to lack of a shared * invalidation mechanism for changes in file size. @@ -556,11 +578,7 @@ smgrnblocks(SMgrRelation reln, ForkNumber forknum) if (InRecovery && reln->smgr_cached_nblocks[forknum] != InvalidBlockNumber) return reln->smgr_cached_nblocks[forknum]; - result = smgrsw[reln->smgr_which].smgr_nblocks(reln, forknum); - - reln->smgr_cached_nblocks[forknum] = result; - - return result; + return InvalidBlockNumber; } /* @@ -582,7 +600,7 @@ smgrtruncate(SMgrRelation reln, ForkNumber *forknum, int nforks, BlockNumber *nb * Get rid of any buffers for the about-to-be-deleted blocks. bufmgr will * just drop them without bothering to write the contents. */ - DropRelFileNodeBuffers(reln->smgr_rnode, forknum, nforks, nblocks); + DropRelFileNodeBuffers(reln, forknum, nforks, nblocks); /* * Send a shared-inval message to force other backends to close any smgr diff --git a/src/backend/storage/sync/sync.c b/src/backend/storage/sync/sync.c index a49588f6b9ed8..fe143151cc524 100644 --- a/src/backend/storage/sync/sync.c +++ b/src/backend/storage/sync/sync.c @@ -3,7 +3,7 @@ * sync.c * File synchronization management code. * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * diff --git a/src/backend/tcop/cmdtag.c b/src/backend/tcop/cmdtag.c index b9fbff612f2a2..e208c7dcfac6a 100644 --- a/src/backend/tcop/cmdtag.c +++ b/src/backend/tcop/cmdtag.c @@ -3,7 +3,7 @@ * cmdtag.c * Data and routines for commandtag names and enumeration. * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION diff --git a/src/backend/tcop/dest.c b/src/backend/tcop/dest.c index 96789f88ef938..4316137a9d3e4 100644 --- a/src/backend/tcop/dest.c +++ b/src/backend/tcop/dest.c @@ -4,7 +4,7 @@ * support for communication destinations * * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION diff --git a/src/backend/tcop/fastpath.c b/src/backend/tcop/fastpath.c index e793984a9f3e0..1b76653caa4c3 100644 --- a/src/backend/tcop/fastpath.c +++ b/src/backend/tcop/fastpath.c @@ -3,7 +3,7 @@ * fastpath.c * routines to handle function requests from the frontend * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * diff --git a/src/backend/tcop/postgres.c b/src/backend/tcop/postgres.c index d35c5020ea634..cb5a96117f6cd 100644 --- a/src/backend/tcop/postgres.c +++ b/src/backend/tcop/postgres.c @@ -3,7 +3,7 @@ * postgres.c * POSTGRES C Backend Interface * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * @@ -635,7 +635,7 @@ pg_parse_query(const char *query_string) if (log_parser_stats) ResetUsage(); - raw_parsetree_list = raw_parser(query_string); + raw_parsetree_list = raw_parser(query_string, RAW_PARSE_DEFAULT); if (log_parser_stats) ShowUsage("PARSER STATISTICS"); @@ -1963,7 +1963,7 @@ exec_bind_message(StringInfo input_message) * will be generated in MessageContext. The plan refcount will be * assigned to the Portal, so it will be released at portal destruction. */ - cplan = GetCachedPlan(psrc, params, false, NULL); + cplan = GetCachedPlan(psrc, params, NULL, NULL); /* * Now we can define the portal. @@ -2789,6 +2789,18 @@ quickdie(SIGNAL_ARGS) * wrong, so there's not much to lose. Assuming the postmaster is still * running, it will SIGKILL us soon if we get stuck for some reason. * + * One thing we can do to make this a tad safer is to clear the error + * context stack, so that context callbacks are not called. That's a lot + * less code that could be reached here, and the context info is unlikely + * to be very relevant to a SIGQUIT report anyway. + */ + error_context_stack = NULL; + + /* + * When responding to a postmaster-issued signal, we send the message only + * to the client; sending to the server log just creates log spam, plus + * it's more code that we need to hope will work in a signal handler. + * * Ideally these should be ereport(FATAL), but then we'd not get control * back to force the correct type of process exit. */ @@ -2802,7 +2814,7 @@ quickdie(SIGNAL_ARGS) break; case PMQUIT_FOR_CRASH: /* A crash-and-restart cycle is in progress */ - ereport(WARNING, + ereport(WARNING_CLIENT_ONLY, (errcode(ERRCODE_CRASH_SHUTDOWN), errmsg("terminating connection because of crash of another server process"), errdetail("The postmaster has commanded this server process to roll back" @@ -2814,7 +2826,7 @@ quickdie(SIGNAL_ARGS) break; case PMQUIT_FOR_STOP: /* Immediate-mode stop */ - ereport(WARNING, + ereport(WARNING_CLIENT_ONLY, (errcode(ERRCODE_ADMIN_SHUTDOWN), errmsg("terminating connection due to immediate shutdown command"))); break; @@ -2853,6 +2865,9 @@ die(SIGNAL_ARGS) ProcDiePending = true; } + /* for the statistics collector */ + pgStatSessionEndCause = DISCONNECT_KILLED; + /* If we're still here, waken anything waiting on the process latch */ SetLatch(MyLatch); @@ -2938,11 +2953,23 @@ RecoveryConflictInterrupt(ProcSignalReason reason) case PROCSIG_RECOVERY_CONFLICT_BUFFERPIN: /* - * If we aren't blocking the Startup process there is nothing - * more to do. + * If PROCSIG_RECOVERY_CONFLICT_BUFFERPIN is requested but we + * aren't blocking the Startup process there is nothing more + * to do. + * + * When PROCSIG_RECOVERY_CONFLICT_STARTUP_DEADLOCK is + * requested, if we're waiting for locks and the startup + * process is not waiting for buffer pin (i.e., also waiting + * for locks), we set the flag so that ProcSleep() will check + * for deadlocks. */ if (!HoldingBufferPinThatDelaysRecovery()) + { + if (reason == PROCSIG_RECOVERY_CONFLICT_STARTUP_DEADLOCK && + GetStartupBufferPinWaitBufId() < 0) + CheckDeadLockAlert(); return; + } MyProc->recoveryConflictPending = true; @@ -3218,14 +3245,28 @@ ProcessInterrupts(void) if (IdleInTransactionSessionTimeoutPending) { - /* Has the timeout setting changed since last we looked? */ + /* + * If the GUC has been reset to zero, ignore the signal. This is + * important because the GUC update itself won't disable any pending + * interrupt. + */ if (IdleInTransactionSessionTimeout > 0) ereport(FATAL, (errcode(ERRCODE_IDLE_IN_TRANSACTION_SESSION_TIMEOUT), errmsg("terminating connection due to idle-in-transaction timeout"))); else IdleInTransactionSessionTimeoutPending = false; + } + if (IdleSessionTimeoutPending) + { + /* As above, ignore the signal if the GUC has been reset to zero. */ + if (IdleSessionTimeout > 0) + ereport(FATAL, + (errcode(ERRCODE_IDLE_SESSION_TIMEOUT), + errmsg("terminating connection due to idle-session timeout"))); + else + IdleSessionTimeoutPending = false; } if (ProcSignalBarrierPending) @@ -3802,7 +3843,8 @@ PostgresMain(int argc, char *argv[], StringInfoData input_message; sigjmp_buf local_sigjmp_buf; volatile bool send_ready_for_query = true; - bool disable_idle_in_transaction_timeout = false; + bool idle_in_transaction_timeout_enabled = false; + bool idle_session_timeout_enabled = false; /* Initialize startup process environment if necessary. */ if (!IsUnderPostmaster) @@ -4204,6 +4246,8 @@ PostgresMain(int argc, char *argv[], * processing of batched messages, and because we don't want to report * uncommitted updates (that confuses autovacuum). The notification * processor wants a call too, if we are not in a transaction block. + * + * Also, if an idle timeout is enabled, start the timer for that. */ if (send_ready_for_query) { @@ -4215,7 +4259,7 @@ PostgresMain(int argc, char *argv[], /* Start the idle-in-transaction timer */ if (IdleInTransactionSessionTimeout > 0) { - disable_idle_in_transaction_timeout = true; + idle_in_transaction_timeout_enabled = true; enable_timeout_after(IDLE_IN_TRANSACTION_SESSION_TIMEOUT, IdleInTransactionSessionTimeout); } @@ -4228,7 +4272,7 @@ PostgresMain(int argc, char *argv[], /* Start the idle-in-transaction timer */ if (IdleInTransactionSessionTimeout > 0) { - disable_idle_in_transaction_timeout = true; + idle_in_transaction_timeout_enabled = true; enable_timeout_after(IDLE_IN_TRANSACTION_SESSION_TIMEOUT, IdleInTransactionSessionTimeout); } @@ -4251,6 +4295,14 @@ PostgresMain(int argc, char *argv[], set_ps_display("idle"); pgstat_report_activity(STATE_IDLE, NULL); + + /* Start the idle-session timer */ + if (IdleSessionTimeout > 0) + { + idle_session_timeout_enabled = true; + enable_timeout_after(IDLE_SESSION_TIMEOUT, + IdleSessionTimeout); + } } /* Report any recently-changed GUC options */ @@ -4274,7 +4326,26 @@ PostgresMain(int argc, char *argv[], firstchar = ReadCommand(&input_message); /* - * (4) disable async signal conditions again. + * (4) turn off the idle-in-transaction and idle-session timeouts, if + * active. We do this before step (5) so that any last-moment timeout + * is certain to be detected in step (5). + * + * At most one of these timeouts will be active, so there's no need to + * worry about combining the timeout.c calls into one. + */ + if (idle_in_transaction_timeout_enabled) + { + disable_timeout(IDLE_IN_TRANSACTION_SESSION_TIMEOUT, false); + idle_in_transaction_timeout_enabled = false; + } + if (idle_session_timeout_enabled) + { + disable_timeout(IDLE_SESSION_TIMEOUT, false); + idle_session_timeout_enabled = false; + } + + /* + * (5) disable async signal conditions again. * * Query cancel is supposed to be a no-op when there is no query in * progress, so if a query cancel arrived while we were idle, just @@ -4285,15 +4356,6 @@ PostgresMain(int argc, char *argv[], CHECK_FOR_INTERRUPTS(); DoingCommandRead = false; - /* - * (5) turn off the idle-in-transaction timeout - */ - if (disable_idle_in_transaction_timeout) - { - disable_timeout(IDLE_IN_TRANSACTION_SESSION_TIMEOUT, false); - disable_idle_in_transaction_timeout = false; - } - /* * (6) check for any other interesting events that happened while we * slept. @@ -4520,9 +4582,15 @@ PostgresMain(int argc, char *argv[], * means unexpected loss of frontend connection. Either way, * perform normal shutdown. */ - case 'X': case EOF: + /* for the statistics collector */ + pgStatSessionEndCause = DISCONNECT_CLIENT_EOF; + + /* FALLTHROUGH */ + + case 'X': + /* * Reset whereToSendOutput to prevent ereport from attempting * to send any more messages to client. diff --git a/src/backend/tcop/pquery.c b/src/backend/tcop/pquery.c index 96ea74f118dff..579b37a9c6d0a 100644 --- a/src/backend/tcop/pquery.c +++ b/src/backend/tcop/pquery.c @@ -3,7 +3,7 @@ * pquery.c * POSTGRES process query command code * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * diff --git a/src/backend/tcop/utility.c b/src/backend/tcop/utility.c index a42ead7d698e7..1d81071c35724 100644 --- a/src/backend/tcop/utility.c +++ b/src/backend/tcop/utility.c @@ -5,7 +5,7 @@ * commands. At one time acted as an interface between the Lisp and C * systems. * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * @@ -917,45 +917,7 @@ standard_ProcessUtility(PlannedStmt *pstmt, break; case T_ReindexStmt: - { - ReindexStmt *stmt = (ReindexStmt *) parsetree; - int options; - - options = ReindexParseOptions(pstate, stmt); - if ((options & REINDEXOPT_CONCURRENTLY) != 0) - PreventInTransactionBlock(isTopLevel, - "REINDEX CONCURRENTLY"); - - switch (stmt->kind) - { - case REINDEX_OBJECT_INDEX: - ReindexIndex(stmt->relation, options, isTopLevel); - break; - case REINDEX_OBJECT_TABLE: - ReindexTable(stmt->relation, options, isTopLevel); - break; - case REINDEX_OBJECT_SCHEMA: - case REINDEX_OBJECT_SYSTEM: - case REINDEX_OBJECT_DATABASE: - - /* - * This cannot run inside a user transaction block; if - * we were inside a transaction, then its commit- and - * start-transaction-command calls would not have the - * intended effect! - */ - PreventInTransactionBlock(isTopLevel, - (stmt->kind == REINDEX_OBJECT_SCHEMA) ? "REINDEX SCHEMA" : - (stmt->kind == REINDEX_OBJECT_SYSTEM) ? "REINDEX SYSTEM" : - "REINDEX DATABASE"); - ReindexMultipleTables(stmt->name, stmt->kind, options); - break; - default: - elog(ERROR, "unrecognized object type: %d", - (int) stmt->kind); - break; - } - } + ExecReindex(pstate, (ReindexStmt *) parsetree, isTopLevel); break; /* @@ -2313,6 +2275,10 @@ CreateCommandTag(Node *parsetree) tag = CMDTAG_SELECT; break; + case T_PLAssignStmt: + tag = CMDTAG_SELECT; + break; + /* utility statements --- same whether raw or cooked */ case T_TransactionStmt: { @@ -3181,6 +3147,10 @@ GetCommandLogLevel(Node *parsetree) lev = LOGSTMT_ALL; break; + case T_PLAssignStmt: + lev = LOGSTMT_ALL; + break; + /* utility statements --- same whether raw or cooked */ case T_TransactionStmt: lev = LOGSTMT_ALL; diff --git a/src/backend/tsearch/Makefile b/src/backend/tsearch/Makefile index 7c669b1abc97f..cdb259eca5811 100644 --- a/src/backend/tsearch/Makefile +++ b/src/backend/tsearch/Makefile @@ -2,7 +2,7 @@ # # Makefile for backend/tsearch # -# Copyright (c) 2006-2020, PostgreSQL Global Development Group +# Copyright (c) 2006-2021, PostgreSQL Global Development Group # # src/backend/tsearch/Makefile # diff --git a/src/backend/tsearch/dict.c b/src/backend/tsearch/dict.c index 835b6721a9f89..1e1ccdac2908b 100644 --- a/src/backend/tsearch/dict.c +++ b/src/backend/tsearch/dict.c @@ -3,7 +3,7 @@ * dict.c * Standard interface to dictionary * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * * * IDENTIFICATION diff --git a/src/backend/tsearch/dict_ispell.c b/src/backend/tsearch/dict_ispell.c index ecb15dcffd871..d93f6018cec02 100644 --- a/src/backend/tsearch/dict_ispell.c +++ b/src/backend/tsearch/dict_ispell.c @@ -3,7 +3,7 @@ * dict_ispell.c * Ispell dictionary interface * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * * * IDENTIFICATION diff --git a/src/backend/tsearch/dict_simple.c b/src/backend/tsearch/dict_simple.c index 5b74deb02c7d6..9cd4b6bae55e8 100644 --- a/src/backend/tsearch/dict_simple.c +++ b/src/backend/tsearch/dict_simple.c @@ -3,7 +3,7 @@ * dict_simple.c * Simple dictionary: just lowercase and check for stopword * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * * * IDENTIFICATION diff --git a/src/backend/tsearch/dict_synonym.c b/src/backend/tsearch/dict_synonym.c index e732e66dace0e..ed885ca5551d5 100644 --- a/src/backend/tsearch/dict_synonym.c +++ b/src/backend/tsearch/dict_synonym.c @@ -3,7 +3,7 @@ * dict_synonym.c * Synonym dictionary: replace word by its synonym * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * * * IDENTIFICATION diff --git a/src/backend/tsearch/dict_thesaurus.c b/src/backend/tsearch/dict_thesaurus.c index 64c979086d1eb..a95ed0891dd18 100644 --- a/src/backend/tsearch/dict_thesaurus.c +++ b/src/backend/tsearch/dict_thesaurus.c @@ -3,7 +3,7 @@ * dict_thesaurus.c * Thesaurus dictionary: phrase to phrase substitution * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * * * IDENTIFICATION diff --git a/src/backend/tsearch/regis.c b/src/backend/tsearch/regis.c index 2edd4faa8ec06..80017177222d1 100644 --- a/src/backend/tsearch/regis.c +++ b/src/backend/tsearch/regis.c @@ -3,7 +3,7 @@ * regis.c * Fast regex subset * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * * * IDENTIFICATION diff --git a/src/backend/tsearch/spell.c b/src/backend/tsearch/spell.c index 05d08cfc0102d..9b9a9afaa89ac 100644 --- a/src/backend/tsearch/spell.c +++ b/src/backend/tsearch/spell.c @@ -3,7 +3,7 @@ * spell.c * Normalizing word with ISpell * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * * Ispell dictionary * ----------------- diff --git a/src/backend/tsearch/to_tsany.c b/src/backend/tsearch/to_tsany.c index e7cd6264db275..f4ddfc01059ec 100644 --- a/src/backend/tsearch/to_tsany.c +++ b/src/backend/tsearch/to_tsany.c @@ -3,7 +3,7 @@ * to_tsany.c * to_ts* function definitions * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * * * IDENTIFICATION @@ -20,10 +20,20 @@ #include "utils/jsonfuncs.h" +/* + * Opaque data structure, which is passed by parse_tsquery() to pushval_morph(). + */ typedef struct MorphOpaque { Oid cfg_id; - int qoperator; /* query operator */ + + /* + * Single tsquery morph could be parsed into multiple words. When these + * words reside in adjacent positions, they are connected using this + * operator. Usually, that is OP_PHRASE, which requires word positions of + * a complex morph to exactly match the tsvector. + */ + int qoperator; } MorphOpaque; typedef struct TSVectorBuildState @@ -573,7 +583,14 @@ to_tsquery_byid(PG_FUNCTION_ARGS) MorphOpaque data; data.cfg_id = PG_GETARG_OID(0); - data.qoperator = OP_AND; + + /* + * Passing OP_PHRASE as a qoperator makes tsquery require matching of word + * positions of a complex morph exactly match the tsvector. Also, when + * the complex morphs are connected with OP_PHRASE operator, we connect + * all their words into the OP_PHRASE sequence. + */ + data.qoperator = OP_PHRASE; query = parse_tsquery(text_to_cstring(in), pushval_morph, @@ -603,6 +620,12 @@ plainto_tsquery_byid(PG_FUNCTION_ARGS) MorphOpaque data; data.cfg_id = PG_GETARG_OID(0); + + /* + * parse_tsquery() with P_TSQ_PLAIN flag takes the whole input text as a + * single morph. Passing OP_PHRASE as a qoperator makes tsquery require + * matching of all words independently on their positions. + */ data.qoperator = OP_AND; query = parse_tsquery(text_to_cstring(in), @@ -634,6 +657,12 @@ phraseto_tsquery_byid(PG_FUNCTION_ARGS) MorphOpaque data; data.cfg_id = PG_GETARG_OID(0); + + /* + * parse_tsquery() with P_TSQ_PLAIN flag takes the whole input text as a + * single morph. Passing OP_PHRASE as a qoperator makes tsquery require + * matching of word positions. + */ data.qoperator = OP_PHRASE; query = parse_tsquery(text_to_cstring(in), @@ -665,7 +694,13 @@ websearch_to_tsquery_byid(PG_FUNCTION_ARGS) data.cfg_id = PG_GETARG_OID(0); - data.qoperator = OP_AND; + /* + * Passing OP_PHRASE as a qoperator makes tsquery require matching of word + * positions of a complex morph exactly match the tsvector. Also, when + * the complex morphs are given in quotes, we connect all their words into + * the OP_PHRASE sequence. + */ + data.qoperator = OP_PHRASE; query = parse_tsquery(text_to_cstring(in), pushval_morph, diff --git a/src/backend/tsearch/ts_locale.c b/src/backend/tsearch/ts_locale.c index d362e86d61a26..f918cc8908bb9 100644 --- a/src/backend/tsearch/ts_locale.c +++ b/src/backend/tsearch/ts_locale.c @@ -3,7 +3,7 @@ * ts_locale.c * locale compatibility layer for tsearch * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * * * IDENTIFICATION diff --git a/src/backend/tsearch/ts_parse.c b/src/backend/tsearch/ts_parse.c index 1c0f94e79759d..92d95b4bd4971 100644 --- a/src/backend/tsearch/ts_parse.c +++ b/src/backend/tsearch/ts_parse.c @@ -3,7 +3,7 @@ * ts_parse.c * main parse functions for tsearch * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * * * IDENTIFICATION diff --git a/src/backend/tsearch/ts_selfuncs.c b/src/backend/tsearch/ts_selfuncs.c index e74b85a6900b3..be2546a86ea4c 100644 --- a/src/backend/tsearch/ts_selfuncs.c +++ b/src/backend/tsearch/ts_selfuncs.c @@ -3,7 +3,7 @@ * ts_selfuncs.c * Selectivity estimation functions for text search operators. * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * * * IDENTIFICATION diff --git a/src/backend/tsearch/ts_typanalyze.c b/src/backend/tsearch/ts_typanalyze.c index 19e9611a3ae82..33b32ad030919 100644 --- a/src/backend/tsearch/ts_typanalyze.c +++ b/src/backend/tsearch/ts_typanalyze.c @@ -3,7 +3,7 @@ * ts_typanalyze.c * functions for gathering statistics from tsvector columns * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * * * IDENTIFICATION diff --git a/src/backend/tsearch/ts_utils.c b/src/backend/tsearch/ts_utils.c index 3bc6b32095fce..ed16a2e25a2a8 100644 --- a/src/backend/tsearch/ts_utils.c +++ b/src/backend/tsearch/ts_utils.c @@ -3,7 +3,7 @@ * ts_utils.c * various support functions * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * * * IDENTIFICATION diff --git a/src/backend/tsearch/wparser.c b/src/backend/tsearch/wparser.c index 9c1fc7b10142b..71882dced99a7 100644 --- a/src/backend/tsearch/wparser.c +++ b/src/backend/tsearch/wparser.c @@ -3,7 +3,7 @@ * wparser.c * Standard interface to word parser * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * * * IDENTIFICATION diff --git a/src/backend/tsearch/wparser_def.c b/src/backend/tsearch/wparser_def.c index 7b29062a97eaa..559dff635588d 100644 --- a/src/backend/tsearch/wparser_def.c +++ b/src/backend/tsearch/wparser_def.c @@ -3,7 +3,7 @@ * wparser_def.c * Default text search parser * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * * * IDENTIFICATION diff --git a/src/backend/utils/Gen_dummy_probes.pl b/src/backend/utils/Gen_dummy_probes.pl index cb0ad5a75cf59..9f3cf6baf18c8 100644 --- a/src/backend/utils/Gen_dummy_probes.pl +++ b/src/backend/utils/Gen_dummy_probes.pl @@ -4,7 +4,7 @@ # Gen_dummy_probes.pl # Perl script that generates probes.h file when dtrace is not available # -# Portions Copyright (c) 2008-2020, PostgreSQL Global Development Group +# Portions Copyright (c) 2008-2021, PostgreSQL Global Development Group # # # IDENTIFICATION diff --git a/src/backend/utils/Gen_dummy_probes.sed b/src/backend/utils/Gen_dummy_probes.sed index 3c9eac6e4f74c..aa3db59cce7c2 100644 --- a/src/backend/utils/Gen_dummy_probes.sed +++ b/src/backend/utils/Gen_dummy_probes.sed @@ -1,7 +1,7 @@ #------------------------------------------------------------------------- # sed script to create dummy probes.h file when dtrace is not available # -# Copyright (c) 2008-2020, PostgreSQL Global Development Group +# Copyright (c) 2008-2021, PostgreSQL Global Development Group # # src/backend/utils/Gen_dummy_probes.sed #------------------------------------------------------------------------- diff --git a/src/backend/utils/Gen_fmgrtab.pl b/src/backend/utils/Gen_fmgrtab.pl index ae8cf5bb64cc5..881568defd7e2 100644 --- a/src/backend/utils/Gen_fmgrtab.pl +++ b/src/backend/utils/Gen_fmgrtab.pl @@ -5,7 +5,7 @@ # Perl script that generates fmgroids.h, fmgrprotos.h, and fmgrtab.c # from pg_proc.dat # -# Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group +# Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group # Portions Copyright (c) 1994, Regents of the University of California # # @@ -109,7 +109,7 @@ * These macros can be used to avoid a catalog lookup when a specific * fmgr-callable function needs to be referenced. * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * NOTES @@ -140,7 +140,7 @@ * fmgrprotos.h * Prototypes for built-in functions. * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * NOTES @@ -166,7 +166,7 @@ * fmgrtab.c * The function manager's table of internal functions. * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * NOTES diff --git a/src/backend/utils/Makefile b/src/backend/utils/Makefile index b91028ddfd6fa..26e07100a5d13 100644 --- a/src/backend/utils/Makefile +++ b/src/backend/utils/Makefile @@ -2,7 +2,7 @@ # # Makefile for backend/utils # -# Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group +# Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group # Portions Copyright (c) 1994, Regents of the University of California # # src/backend/utils/Makefile diff --git a/src/backend/utils/adt/Makefile b/src/backend/utils/adt/Makefile index 82732146d3deb..279ff15ade925 100644 --- a/src/backend/utils/adt/Makefile +++ b/src/backend/utils/adt/Makefile @@ -50,6 +50,7 @@ OBJS = \ jsonb_op.o \ jsonb_util.o \ jsonfuncs.o \ + jsonbsubs.o \ jsonpath.o \ jsonpath_exec.o \ jsonpath_gram.o \ diff --git a/src/backend/utils/adt/acl.c b/src/backend/utils/adt/acl.c index fe6c444738aa7..c7f029e2186a1 100644 --- a/src/backend/utils/adt/acl.c +++ b/src/backend/utils/adt/acl.c @@ -3,7 +3,7 @@ * acl.c * Basic access control list data structures manipulation routines. * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * diff --git a/src/backend/utils/adt/amutils.c b/src/backend/utils/adt/amutils.c index 220cd8fc52fe0..569412fcacf53 100644 --- a/src/backend/utils/adt/amutils.c +++ b/src/backend/utils/adt/amutils.c @@ -3,7 +3,7 @@ * amutils.c * SQL-level APIs related to index access methods. * - * Copyright (c) 2016-2020, PostgreSQL Global Development Group + * Copyright (c) 2016-2021, PostgreSQL Global Development Group * * * IDENTIFICATION diff --git a/src/backend/utils/adt/array_expanded.c b/src/backend/utils/adt/array_expanded.c index 18de2dd352f60..60511f639d360 100644 --- a/src/backend/utils/adt/array_expanded.c +++ b/src/backend/utils/adt/array_expanded.c @@ -3,7 +3,7 @@ * array_expanded.c * Basic functions for manipulating expanded arrays. * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * diff --git a/src/backend/utils/adt/array_selfuncs.c b/src/backend/utils/adt/array_selfuncs.c index d97e60a3ab5bd..23de5d9226449 100644 --- a/src/backend/utils/adt/array_selfuncs.c +++ b/src/backend/utils/adt/array_selfuncs.c @@ -3,7 +3,7 @@ * array_selfuncs.c * Functions for selectivity estimation of array operators * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * diff --git a/src/backend/utils/adt/array_typanalyze.c b/src/backend/utils/adt/array_typanalyze.c index cb2a834193d67..c5008a0c16915 100644 --- a/src/backend/utils/adt/array_typanalyze.c +++ b/src/backend/utils/adt/array_typanalyze.c @@ -3,7 +3,7 @@ * array_typanalyze.c * Functions for gathering statistics from array columns * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * diff --git a/src/backend/utils/adt/array_userfuncs.c b/src/backend/utils/adt/array_userfuncs.c index 9e18bc9cda9a1..a2793bfae32a9 100644 --- a/src/backend/utils/adt/array_userfuncs.c +++ b/src/backend/utils/adt/array_userfuncs.c @@ -3,7 +3,7 @@ * array_userfuncs.c * Misc user-visible array support functions * - * Copyright (c) 2003-2020, PostgreSQL Global Development Group + * Copyright (c) 2003-2021, PostgreSQL Global Development Group * * IDENTIFICATION * src/backend/utils/adt/array_userfuncs.c diff --git a/src/backend/utils/adt/arrayfuncs.c b/src/backend/utils/adt/arrayfuncs.c index 4c8a739bc4392..f7012cc5d9876 100644 --- a/src/backend/utils/adt/arrayfuncs.c +++ b/src/backend/utils/adt/arrayfuncs.c @@ -3,7 +3,7 @@ * arrayfuncs.c * Support functions for arrays. * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * @@ -2582,8 +2582,11 @@ array_set_element_expanded(Datum arraydatum, /* * Copy new element into array's context, if needed (we assume it's - * already detoasted, so no junk should be created). If we fail further - * down, this memory is leaked, but that's reasonably harmless. + * already detoasted, so no junk should be created). Doing this before + * we've made any significant changes ensures that our behavior is sane + * even when the source is a reference to some element of this same array. + * If we fail further down, this memory is leaked, but that's reasonably + * harmless. */ if (!eah->typbyval && !isNull) { diff --git a/src/backend/utils/adt/arraysubs.c b/src/backend/utils/adt/arraysubs.c index a081288f42db2..1d910d14dbc95 100644 --- a/src/backend/utils/adt/arraysubs.c +++ b/src/backend/utils/adt/arraysubs.c @@ -3,7 +3,7 @@ * arraysubs.c * Subscripting support functions for arrays. * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * diff --git a/src/backend/utils/adt/arrayutils.c b/src/backend/utils/adt/arrayutils.c index bc4360aaec0c7..2a6a05718f8e4 100644 --- a/src/backend/utils/adt/arrayutils.c +++ b/src/backend/utils/adt/arrayutils.c @@ -3,7 +3,7 @@ * arrayutils.c * This file contains some support routines required for array functions. * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * diff --git a/src/backend/utils/adt/ascii.c b/src/backend/utils/adt/ascii.c index 3aa8a5e7d21b8..9dfff9dbef403 100644 --- a/src/backend/utils/adt/ascii.c +++ b/src/backend/utils/adt/ascii.c @@ -2,7 +2,7 @@ * ascii.c * The PostgreSQL routine for string to ascii conversion. * - * Portions Copyright (c) 1999-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1999-2021, PostgreSQL Global Development Group * * IDENTIFICATION * src/backend/utils/adt/ascii.c diff --git a/src/backend/utils/adt/bool.c b/src/backend/utils/adt/bool.c index 340607f93645b..fe11d1ae94639 100644 --- a/src/backend/utils/adt/bool.c +++ b/src/backend/utils/adt/bool.c @@ -3,7 +3,7 @@ * bool.c * Functions for the built-in type "bool". * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * diff --git a/src/backend/utils/adt/char.c b/src/backend/utils/adt/char.c index 20ea1366d053b..e620d47eb520a 100644 --- a/src/backend/utils/adt/char.c +++ b/src/backend/utils/adt/char.c @@ -4,7 +4,7 @@ * Functions for the built-in type "char" (not to be confused with * bpchar, which is the SQL CHAR(n) type). * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * diff --git a/src/backend/utils/adt/cryptohashfuncs.c b/src/backend/utils/adt/cryptohashfuncs.c index 47bc0b34828e3..152adcbfb4ad0 100644 --- a/src/backend/utils/adt/cryptohashfuncs.c +++ b/src/backend/utils/adt/cryptohashfuncs.c @@ -3,7 +3,7 @@ * cryptohashfuncs.c * Cryptographic hash functions * - * Portions Copyright (c) 2018-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 2018-2021, PostgreSQL Global Development Group * * * IDENTIFICATION @@ -68,65 +68,77 @@ md5_bytea(PG_FUNCTION_ARGS) PG_RETURN_TEXT_P(cstring_to_text(hexsum)); } - /* - * SHA-2 variants + * Internal routine to compute a cryptohash with the given bytea input. */ - -Datum -sha224_bytea(PG_FUNCTION_ARGS) +static inline bytea * +cryptohash_internal(pg_cryptohash_type type, bytea *input) { - bytea *in = PG_GETARG_BYTEA_PP(0); const uint8 *data; + const char *typestr = NULL; + int digest_len = 0; size_t len; pg_cryptohash_ctx *ctx; - unsigned char buf[PG_SHA224_DIGEST_LENGTH]; bytea *result; - len = VARSIZE_ANY_EXHDR(in); - data = (unsigned char *) VARDATA_ANY(in); - - ctx = pg_cryptohash_create(PG_SHA224); + switch (type) + { + case PG_SHA224: + typestr = "SHA224"; + digest_len = PG_SHA224_DIGEST_LENGTH; + break; + case PG_SHA256: + typestr = "SHA256"; + digest_len = PG_SHA256_DIGEST_LENGTH; + break; + case PG_SHA384: + typestr = "SHA384"; + digest_len = PG_SHA384_DIGEST_LENGTH; + break; + case PG_SHA512: + typestr = "SHA512"; + digest_len = PG_SHA512_DIGEST_LENGTH; + break; + case PG_MD5: + case PG_SHA1: + elog(ERROR, "unsupported cryptohash type %d", type); + break; + } + + result = palloc0(digest_len + VARHDRSZ); + len = VARSIZE_ANY_EXHDR(input); + data = (unsigned char *) VARDATA_ANY(input); + + ctx = pg_cryptohash_create(type); if (pg_cryptohash_init(ctx) < 0) - elog(ERROR, "could not initialize %s context", "SHA224"); + elog(ERROR, "could not initialize %s context", typestr); if (pg_cryptohash_update(ctx, data, len) < 0) - elog(ERROR, "could not update %s context", "SHA224"); - if (pg_cryptohash_final(ctx, buf) < 0) - elog(ERROR, "could not finalize %s context", "SHA224"); + elog(ERROR, "could not update %s context", typestr); + if (pg_cryptohash_final(ctx, (unsigned char *) VARDATA(result)) < 0) + elog(ERROR, "could not finalize %s context", typestr); pg_cryptohash_free(ctx); - result = palloc(sizeof(buf) + VARHDRSZ); - SET_VARSIZE(result, sizeof(buf) + VARHDRSZ); - memcpy(VARDATA(result), buf, sizeof(buf)); + SET_VARSIZE(result, digest_len + VARHDRSZ); - PG_RETURN_BYTEA_P(result); + return result; } +/* + * SHA-2 variants + */ + Datum -sha256_bytea(PG_FUNCTION_ARGS) +sha224_bytea(PG_FUNCTION_ARGS) { - bytea *in = PG_GETARG_BYTEA_PP(0); - const uint8 *data; - size_t len; - pg_cryptohash_ctx *ctx; - unsigned char buf[PG_SHA256_DIGEST_LENGTH]; - bytea *result; + bytea *result = cryptohash_internal(PG_SHA224, PG_GETARG_BYTEA_PP(0)); - len = VARSIZE_ANY_EXHDR(in); - data = (unsigned char *) VARDATA_ANY(in); - - ctx = pg_cryptohash_create(PG_SHA256); - if (pg_cryptohash_init(ctx) < 0) - elog(ERROR, "could not initialize %s context", "SHA256"); - if (pg_cryptohash_update(ctx, data, len) < 0) - elog(ERROR, "could not update %s context", "SHA256"); - if (pg_cryptohash_final(ctx, buf) < 0) - elog(ERROR, "could not finalize %s context", "SHA256"); - pg_cryptohash_free(ctx); + PG_RETURN_BYTEA_P(result); +} - result = palloc(sizeof(buf) + VARHDRSZ); - SET_VARSIZE(result, sizeof(buf) + VARHDRSZ); - memcpy(VARDATA(result), buf, sizeof(buf)); +Datum +sha256_bytea(PG_FUNCTION_ARGS) +{ + bytea *result = cryptohash_internal(PG_SHA256, PG_GETARG_BYTEA_PP(0)); PG_RETURN_BYTEA_P(result); } @@ -134,28 +146,7 @@ sha256_bytea(PG_FUNCTION_ARGS) Datum sha384_bytea(PG_FUNCTION_ARGS) { - bytea *in = PG_GETARG_BYTEA_PP(0); - const uint8 *data; - size_t len; - pg_cryptohash_ctx *ctx; - unsigned char buf[PG_SHA384_DIGEST_LENGTH]; - bytea *result; - - len = VARSIZE_ANY_EXHDR(in); - data = (unsigned char *) VARDATA_ANY(in); - - ctx = pg_cryptohash_create(PG_SHA384); - if (pg_cryptohash_init(ctx) < 0) - elog(ERROR, "could not initialize %s context", "SHA384"); - if (pg_cryptohash_update(ctx, data, len) < 0) - elog(ERROR, "could not update %s context", "SHA384"); - if (pg_cryptohash_final(ctx, buf) < 0) - elog(ERROR, "could not finalize %s context", "SHA384"); - pg_cryptohash_free(ctx); - - result = palloc(sizeof(buf) + VARHDRSZ); - SET_VARSIZE(result, sizeof(buf) + VARHDRSZ); - memcpy(VARDATA(result), buf, sizeof(buf)); + bytea *result = cryptohash_internal(PG_SHA384, PG_GETARG_BYTEA_PP(0)); PG_RETURN_BYTEA_P(result); } @@ -163,28 +154,7 @@ sha384_bytea(PG_FUNCTION_ARGS) Datum sha512_bytea(PG_FUNCTION_ARGS) { - bytea *in = PG_GETARG_BYTEA_PP(0); - const uint8 *data; - size_t len; - pg_cryptohash_ctx *ctx; - unsigned char buf[PG_SHA512_DIGEST_LENGTH]; - bytea *result; - - len = VARSIZE_ANY_EXHDR(in); - data = (unsigned char *) VARDATA_ANY(in); - - ctx = pg_cryptohash_create(PG_SHA512); - if (pg_cryptohash_init(ctx) < 0) - elog(ERROR, "could not initialize %s context", "SHA512"); - if (pg_cryptohash_update(ctx, data, len) < 0) - elog(ERROR, "could not update %s context", "SHA512"); - if (pg_cryptohash_final(ctx, buf) < 0) - elog(ERROR, "could not finalize %s context", "SHA512"); - pg_cryptohash_free(ctx); - - result = palloc(sizeof(buf) + VARHDRSZ); - SET_VARSIZE(result, sizeof(buf) + VARHDRSZ); - memcpy(VARDATA(result), buf, sizeof(buf)); + bytea *result = cryptohash_internal(PG_SHA512, PG_GETARG_BYTEA_PP(0)); PG_RETURN_BYTEA_P(result); } diff --git a/src/backend/utils/adt/date.c b/src/backend/utils/adt/date.c index a470cf890a205..68d99a5099206 100644 --- a/src/backend/utils/adt/date.c +++ b/src/backend/utils/adt/date.c @@ -3,7 +3,7 @@ * date.c * implements DATE and TIME data types specified in SQL standard * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994-5, Regents of the University of California * * diff --git a/src/backend/utils/adt/datetime.c b/src/backend/utils/adt/datetime.c index 91fab8cc9cb39..350b0c55eac52 100644 --- a/src/backend/utils/adt/datetime.c +++ b/src/backend/utils/adt/datetime.c @@ -3,7 +3,7 @@ * datetime.c * Support functions for date/time types. * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * diff --git a/src/backend/utils/adt/datum.c b/src/backend/utils/adt/datum.c index 34cdde1bb91bb..6a317fc0a6d0c 100644 --- a/src/backend/utils/adt/datum.c +++ b/src/backend/utils/adt/datum.c @@ -3,7 +3,7 @@ * datum.c * POSTGRES Datum (abstract data type) manipulation routines. * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * diff --git a/src/backend/utils/adt/dbsize.c b/src/backend/utils/adt/dbsize.c index 3319e9761e4c0..64cdaa4134b79 100644 --- a/src/backend/utils/adt/dbsize.c +++ b/src/backend/utils/adt/dbsize.c @@ -2,7 +2,7 @@ * dbsize.c * Database object size functions, and related inquiries * - * Copyright (c) 2002-2020, PostgreSQL Global Development Group + * Copyright (c) 2002-2021, PostgreSQL Global Development Group * * IDENTIFICATION * src/backend/utils/adt/dbsize.c diff --git a/src/backend/utils/adt/domains.c b/src/backend/utils/adt/domains.c index 41e1a1b610b30..0a36772fc031d 100644 --- a/src/backend/utils/adt/domains.c +++ b/src/backend/utils/adt/domains.c @@ -19,7 +19,7 @@ * to evaluate them in. * * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * diff --git a/src/backend/utils/adt/encode.c b/src/backend/utils/adt/encode.c index a6c65b16578ae..8449aaac56ace 100644 --- a/src/backend/utils/adt/encode.c +++ b/src/backend/utils/adt/encode.c @@ -3,7 +3,7 @@ * encode.c * Various data encoding/decoding things. * - * Copyright (c) 2001-2020, PostgreSQL Global Development Group + * Copyright (c) 2001-2021, PostgreSQL Global Development Group * * * IDENTIFICATION @@ -15,7 +15,7 @@ #include -#include "common/hex_decode.h" +#include "common/hex.h" #include "mb/pg_wchar.h" #include "utils/builtins.h" #include "utils/memutils.h" @@ -32,10 +32,12 @@ */ struct pg_encoding { - uint64 (*encode_len) (const char *data, size_t dlen); - uint64 (*decode_len) (const char *data, size_t dlen); - uint64 (*encode) (const char *data, size_t dlen, char *res); - uint64 (*decode) (const char *data, size_t dlen, char *res); + uint64 (*encode_len) (const char *src, size_t srclen); + uint64 (*decode_len) (const char *src, size_t srclen); + uint64 (*encode) (const char *src, size_t srclen, + char *dst, size_t dstlen); + uint64 (*decode) (const char *src, size_t srclen, + char *dst, size_t dstlen); }; static const struct pg_encoding *pg_find_encoding(const char *name); @@ -81,11 +83,7 @@ binary_encode(PG_FUNCTION_ARGS) result = palloc(VARHDRSZ + resultlen); - res = enc->encode(dataptr, datalen, VARDATA(result)); - - /* Make this FATAL 'cause we've trodden on memory ... */ - if (res > resultlen) - elog(FATAL, "overflow - encode estimate too small"); + res = enc->encode(dataptr, datalen, VARDATA(result), resultlen); SET_VARSIZE(result, VARHDRSZ + res); @@ -129,11 +127,7 @@ binary_decode(PG_FUNCTION_ARGS) result = palloc(VARHDRSZ + resultlen); - res = enc->decode(dataptr, datalen, VARDATA(result)); - - /* Make this FATAL 'cause we've trodden on memory ... */ - if (res > resultlen) - elog(FATAL, "overflow - decode estimate too small"); + res = enc->decode(dataptr, datalen, VARDATA(result), resultlen); SET_VARSIZE(result, VARHDRSZ + res); @@ -145,32 +139,20 @@ binary_decode(PG_FUNCTION_ARGS) * HEX */ -static const char hextbl[] = "0123456789abcdef"; - -uint64 -hex_encode(const char *src, size_t len, char *dst) -{ - const char *end = src + len; - - while (src < end) - { - *dst++ = hextbl[(*src >> 4) & 0xF]; - *dst++ = hextbl[*src & 0xF]; - src++; - } - return (uint64) len * 2; -} - +/* + * Those two wrappers are still needed to match with the layer of + * src/common/. + */ static uint64 hex_enc_len(const char *src, size_t srclen) { - return (uint64) srclen << 1; + return pg_hex_enc_len(srclen); } static uint64 hex_dec_len(const char *src, size_t srclen) { - return (uint64) srclen >> 1; + return pg_hex_dec_len(srclen); } /* @@ -192,12 +174,12 @@ static const int8 b64lookup[128] = { }; static uint64 -pg_base64_encode(const char *src, size_t len, char *dst) +pg_base64_encode(const char *src, size_t srclen, char *dst, size_t dstlen) { char *p, *lend = dst + 76; const char *s, - *end = src + len; + *end = src + srclen; int pos = 2; uint32 buf = 0; @@ -213,6 +195,8 @@ pg_base64_encode(const char *src, size_t len, char *dst) /* write it out */ if (pos < 0) { + if ((p - dst + 4) > dstlen) + elog(ERROR, "overflow of destination buffer in base64 encoding"); *p++ = _base64[(buf >> 18) & 0x3f]; *p++ = _base64[(buf >> 12) & 0x3f]; *p++ = _base64[(buf >> 6) & 0x3f]; @@ -223,25 +207,30 @@ pg_base64_encode(const char *src, size_t len, char *dst) } if (p >= lend) { + if ((p - dst + 1) > dstlen) + elog(ERROR, "overflow of destination buffer in base64 encoding"); *p++ = '\n'; lend = p + 76; } } if (pos != 2) { + if ((p - dst + 4) > dstlen) + elog(ERROR, "overflow of destination buffer in base64 encoding"); *p++ = _base64[(buf >> 18) & 0x3f]; *p++ = _base64[(buf >> 12) & 0x3f]; *p++ = (pos == 0) ? _base64[(buf >> 6) & 0x3f] : '='; *p++ = '='; } + Assert((p - dst) <= dstlen); return p - dst; } static uint64 -pg_base64_decode(const char *src, size_t len, char *dst) +pg_base64_decode(const char *src, size_t srclen, char *dst, size_t dstlen) { - const char *srcend = src + len, + const char *srcend = src + srclen, *s = src; char *p = dst; char c; @@ -289,11 +278,21 @@ pg_base64_decode(const char *src, size_t len, char *dst) pos++; if (pos == 4) { + if ((p - dst + 1) > dstlen) + elog(ERROR, "overflow of destination buffer in base64 decoding"); *p++ = (buf >> 16) & 255; if (end == 0 || end > 1) + { + if ((p - dst + 1) > dstlen) + elog(ERROR, "overflow of destination buffer in base64 decoding"); *p++ = (buf >> 8) & 255; + } if (end == 0 || end > 2) + { + if ((p - dst + 1) > dstlen) + elog(ERROR, "overflow of destination buffer in base64 decoding"); *p++ = buf & 255; + } buf = 0; pos = 0; } @@ -305,6 +304,7 @@ pg_base64_decode(const char *src, size_t len, char *dst) errmsg("invalid base64 end sequence"), errhint("Input data is missing padding, is truncated, or is otherwise corrupted."))); + Assert((p - dst) <= dstlen); return p - dst; } @@ -340,7 +340,7 @@ pg_base64_dec_len(const char *src, size_t srclen) #define DIG(VAL) ((VAL) + '0') static uint64 -esc_encode(const char *src, size_t srclen, char *dst) +esc_encode(const char *src, size_t srclen, char *dst, size_t dstlen) { const char *end = src + srclen; char *rp = dst; @@ -352,6 +352,8 @@ esc_encode(const char *src, size_t srclen, char *dst) if (c == '\0' || IS_HIGHBIT_SET(c)) { + if ((rp - dst + 4) > dstlen) + elog(ERROR, "overflow of destination buffer in escape encoding"); rp[0] = '\\'; rp[1] = DIG(c >> 6); rp[2] = DIG((c >> 3) & 7); @@ -361,6 +363,8 @@ esc_encode(const char *src, size_t srclen, char *dst) } else if (c == '\\') { + if ((rp - dst + 2) > dstlen) + elog(ERROR, "overflow of destination buffer in escape encoding"); rp[0] = '\\'; rp[1] = '\\'; rp += 2; @@ -368,6 +372,8 @@ esc_encode(const char *src, size_t srclen, char *dst) } else { + if ((rp - dst + 1) > dstlen) + elog(ERROR, "overflow of destination buffer in escape encoding"); *rp++ = c; len++; } @@ -375,11 +381,12 @@ esc_encode(const char *src, size_t srclen, char *dst) src++; } + Assert((rp - dst) <= dstlen); return len; } static uint64 -esc_decode(const char *src, size_t srclen, char *dst) +esc_decode(const char *src, size_t srclen, char *dst, size_t dstlen) { const char *end = src + srclen; char *rp = dst; @@ -388,7 +395,11 @@ esc_decode(const char *src, size_t srclen, char *dst) while (src < end) { if (src[0] != '\\') + { + if ((rp - dst + 1) > dstlen) + elog(ERROR, "overflow of destination buffer in escape decoding"); *rp++ = *src++; + } else if (src + 3 < end && (src[1] >= '0' && src[1] <= '3') && (src[2] >= '0' && src[2] <= '7') && @@ -400,12 +411,16 @@ esc_decode(const char *src, size_t srclen, char *dst) val <<= 3; val += VAL(src[2]); val <<= 3; + if ((rp - dst + 1) > dstlen) + elog(ERROR, "overflow of destination buffer in escape decoding"); *rp++ = val + VAL(src[3]); src += 4; } else if (src + 1 < end && (src[1] == '\\')) { + if ((rp - dst + 1) > dstlen) + elog(ERROR, "overflow of destination buffer in escape decoding"); *rp++ = '\\'; src += 2; } @@ -423,6 +438,7 @@ esc_decode(const char *src, size_t srclen, char *dst) len++; } + Assert((rp - dst) <= dstlen); return len; } @@ -504,7 +520,7 @@ static const struct { "hex", { - hex_enc_len, hex_dec_len, hex_encode, hex_decode + hex_enc_len, hex_dec_len, pg_hex_encode, pg_hex_decode } }, { diff --git a/src/backend/utils/adt/enum.c b/src/backend/utils/adt/enum.c index 69faf41df964e..0d892132a841d 100644 --- a/src/backend/utils/adt/enum.c +++ b/src/backend/utils/adt/enum.c @@ -3,7 +3,7 @@ * enum.c * I/O functions, operators, aggregates etc for enum types * - * Copyright (c) 2006-2020, PostgreSQL Global Development Group + * Copyright (c) 2006-2021, PostgreSQL Global Development Group * * * IDENTIFICATION @@ -82,12 +82,12 @@ check_safe_enum_use(HeapTuple enumval_tup) return; /* - * Check if the enum value is blacklisted. If not, it's safe, because it + * Check if the enum value is uncommitted. If not, it's safe, because it * was made during CREATE TYPE AS ENUM and can't be shorter-lived than its * owning type. (This'd also be false for values made by other * transactions; but the previous tests should have handled all of those.) */ - if (!EnumBlacklisted(en->oid)) + if (!EnumUncommitted(en->oid)) return; /* diff --git a/src/backend/utils/adt/expandeddatum.c b/src/backend/utils/adt/expandeddatum.c index 3c35523556002..cb0adfaa21527 100644 --- a/src/backend/utils/adt/expandeddatum.c +++ b/src/backend/utils/adt/expandeddatum.c @@ -3,7 +3,7 @@ * expandeddatum.c * Support functions for "expanded" value representations. * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * diff --git a/src/backend/utils/adt/expandedrecord.c b/src/backend/utils/adt/expandedrecord.c index ec12ec54fc823..e19491ecf7442 100644 --- a/src/backend/utils/adt/expandedrecord.c +++ b/src/backend/utils/adt/expandedrecord.c @@ -7,7 +7,7 @@ * store values of named composite types, domains over named composite types, * and record types (registered or anonymous). * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * diff --git a/src/backend/utils/adt/float.c b/src/backend/utils/adt/float.c index 429c9280c0cf7..098bbb372bfc5 100644 --- a/src/backend/utils/adt/float.c +++ b/src/backend/utils/adt/float.c @@ -3,7 +3,7 @@ * float.c * Functions for the built-in floating-point types. * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * diff --git a/src/backend/utils/adt/format_type.c b/src/backend/utils/adt/format_type.c index 013409aee7d84..0e8e065457585 100644 --- a/src/backend/utils/adt/format_type.c +++ b/src/backend/utils/adt/format_type.c @@ -4,7 +4,7 @@ * Display type names "nicely". * * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION diff --git a/src/backend/utils/adt/formatting.c b/src/backend/utils/adt/formatting.c index 3bb01cdb65ab1..783c7b5e7acf9 100644 --- a/src/backend/utils/adt/formatting.c +++ b/src/backend/utils/adt/formatting.c @@ -4,7 +4,7 @@ * src/backend/utils/adt/formatting.c * * - * Portions Copyright (c) 1999-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1999-2021, PostgreSQL Global Development Group * * * TO_CHAR(); TO_TIMESTAMP(); TO_DATE(); TO_NUMBER(); diff --git a/src/backend/utils/adt/genfile.c b/src/backend/utils/adt/genfile.c index d34182a7b04d2..169ddf8d76813 100644 --- a/src/backend/utils/adt/genfile.c +++ b/src/backend/utils/adt/genfile.c @@ -4,7 +4,7 @@ * Functions for direct access to files * * - * Copyright (c) 2004-2020, PostgreSQL Global Development Group + * Copyright (c) 2004-2021, PostgreSQL Global Development Group * * Author: Andreas Pflug * diff --git a/src/backend/utils/adt/geo_ops.c b/src/backend/utils/adt/geo_ops.c index c1dc511a1a8c3..9484dbc22737a 100644 --- a/src/backend/utils/adt/geo_ops.c +++ b/src/backend/utils/adt/geo_ops.c @@ -13,7 +13,7 @@ * - circle * - polygon * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * diff --git a/src/backend/utils/adt/geo_selfuncs.c b/src/backend/utils/adt/geo_selfuncs.c index 89cf8d32e7916..db941244efae3 100644 --- a/src/backend/utils/adt/geo_selfuncs.c +++ b/src/backend/utils/adt/geo_selfuncs.c @@ -4,7 +4,7 @@ * Selectivity routines registered in the operator catalog in the * "oprrest" and "oprjoin" attributes. * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * diff --git a/src/backend/utils/adt/geo_spgist.c b/src/backend/utils/adt/geo_spgist.c index de7e6fa404254..d0ff5522cefb6 100644 --- a/src/backend/utils/adt/geo_spgist.c +++ b/src/backend/utils/adt/geo_spgist.c @@ -62,7 +62,7 @@ * except the root. For the root node, we are setting the boundaries * that we don't yet have as infinity. * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION diff --git a/src/backend/utils/adt/int.c b/src/backend/utils/adt/int.c index 418c13e1b4cdc..e9f108425c5a8 100644 --- a/src/backend/utils/adt/int.c +++ b/src/backend/utils/adt/int.c @@ -3,7 +3,7 @@ * int.c * Functions for the built-in integer types (except int8). * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * diff --git a/src/backend/utils/adt/int8.c b/src/backend/utils/adt/int8.c index 005f68d85391f..2168080dcce9a 100644 --- a/src/backend/utils/adt/int8.c +++ b/src/backend/utils/adt/int8.c @@ -3,7 +3,7 @@ * int8.c * Internal 64-bit integer operations * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION diff --git a/src/backend/utils/adt/json.c b/src/backend/utils/adt/json.c index 420d3cdcbb9d7..30ca2cf6c81b8 100644 --- a/src/backend/utils/adt/json.c +++ b/src/backend/utils/adt/json.c @@ -3,7 +3,7 @@ * json.c * JSON data type support. * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION diff --git a/src/backend/utils/adt/jsonb.c b/src/backend/utils/adt/jsonb.c index 1e9ca046c6992..8d1e7fbf9108c 100644 --- a/src/backend/utils/adt/jsonb.c +++ b/src/backend/utils/adt/jsonb.c @@ -3,7 +3,7 @@ * jsonb.c * I/O routines for jsonb type * - * Copyright (c) 2014-2020, PostgreSQL Global Development Group + * Copyright (c) 2014-2021, PostgreSQL Global Development Group * * IDENTIFICATION * src/backend/utils/adt/jsonb.c diff --git a/src/backend/utils/adt/jsonb_gin.c b/src/backend/utils/adt/jsonb_gin.c index aee3d9d6733ee..37499bc562266 100644 --- a/src/backend/utils/adt/jsonb_gin.c +++ b/src/backend/utils/adt/jsonb_gin.c @@ -3,7 +3,7 @@ * jsonb_gin.c * GIN support functions for jsonb * - * Copyright (c) 2014-2020, PostgreSQL Global Development Group + * Copyright (c) 2014-2021, PostgreSQL Global Development Group * * We provide two opclasses for jsonb indexing: jsonb_ops and jsonb_path_ops. * For their description see json.sgml and comments in jsonb.h. diff --git a/src/backend/utils/adt/jsonb_op.c b/src/backend/utils/adt/jsonb_op.c index dc17e17f9b46b..6e85e5c36b396 100644 --- a/src/backend/utils/adt/jsonb_op.c +++ b/src/backend/utils/adt/jsonb_op.c @@ -3,7 +3,7 @@ * jsonb_op.c * Special operators for jsonb only, used by various index access methods * - * Copyright (c) 2014-2020, PostgreSQL Global Development Group + * Copyright (c) 2014-2021, PostgreSQL Global Development Group * * * IDENTIFICATION diff --git a/src/backend/utils/adt/jsonb_util.c b/src/backend/utils/adt/jsonb_util.c index 4eeffa1424346..571118779590d 100644 --- a/src/backend/utils/adt/jsonb_util.c +++ b/src/backend/utils/adt/jsonb_util.c @@ -3,7 +3,7 @@ * jsonb_util.c * converting between Jsonb and JsonbValues, and iterating. * - * Copyright (c) 2014-2020, PostgreSQL Global Development Group + * Copyright (c) 2014-2021, PostgreSQL Global Development Group * * * IDENTIFICATION @@ -68,18 +68,25 @@ static JsonbValue *pushJsonbValueScalar(JsonbParseState **pstate, JsonbIteratorToken seq, JsonbValue *scalarVal); +void +JsonbToJsonbValue(Jsonb *jsonb, JsonbValue *val) +{ + val->type = jbvBinary; + val->val.binary.data = &jsonb->root; + val->val.binary.len = VARSIZE(jsonb) - VARHDRSZ; +} + /* * Turn an in-memory JsonbValue into a Jsonb for on-disk storage. * - * There isn't a JsonbToJsonbValue(), because generally we find it more - * convenient to directly iterate through the Jsonb representation and only - * really convert nested scalar values. JsonbIteratorNext() does this, so that - * clients of the iteration code don't have to directly deal with the binary - * representation (JsonbDeepContains() is a notable exception, although all - * exceptions are internal to this module). In general, functions that accept - * a JsonbValue argument are concerned with the manipulation of scalar values, - * or simple containers of scalar values, where it would be inconvenient to - * deal with a great amount of other state. + * Generally we find it more convenient to directly iterate through the Jsonb + * representation and only really convert nested scalar values. + * JsonbIteratorNext() does this, so that clients of the iteration code don't + * have to directly deal with the binary representation (JsonbDeepContains() is + * a notable exception, although all exceptions are internal to this module). + * In general, functions that accept a JsonbValue argument are concerned with + * the manipulation of scalar values, or simple containers of scalar values, + * where it would be inconvenient to deal with a great amount of other state. */ Jsonb * JsonbValueToJsonb(JsonbValue *val) @@ -563,6 +570,30 @@ pushJsonbValue(JsonbParseState **pstate, JsonbIteratorToken seq, JsonbValue *res = NULL; JsonbValue v; JsonbIteratorToken tok; + int i; + + if (jbval && (seq == WJB_ELEM || seq == WJB_VALUE) && jbval->type == jbvObject) + { + pushJsonbValue(pstate, WJB_BEGIN_OBJECT, NULL); + for (i = 0; i < jbval->val.object.nPairs; i++) + { + pushJsonbValue(pstate, WJB_KEY, &jbval->val.object.pairs[i].key); + pushJsonbValue(pstate, WJB_VALUE, &jbval->val.object.pairs[i].value); + } + + return pushJsonbValue(pstate, WJB_END_OBJECT, NULL); + } + + if (jbval && (seq == WJB_ELEM || seq == WJB_VALUE) && jbval->type == jbvArray) + { + pushJsonbValue(pstate, WJB_BEGIN_ARRAY, NULL); + for (i = 0; i < jbval->val.array.nElems; i++) + { + pushJsonbValue(pstate, WJB_ELEM, &jbval->val.array.elems[i]); + } + + return pushJsonbValue(pstate, WJB_END_ARRAY, NULL); + } if (!jbval || (seq != WJB_ELEM && seq != WJB_VALUE) || jbval->type != jbvBinary) @@ -573,9 +604,30 @@ pushJsonbValue(JsonbParseState **pstate, JsonbIteratorToken seq, /* unpack the binary and add each piece to the pstate */ it = JsonbIteratorInit(jbval->val.binary.data); + + if ((jbval->val.binary.data->header & JB_FSCALAR) && *pstate) + { + tok = JsonbIteratorNext(&it, &v, true); + Assert(tok == WJB_BEGIN_ARRAY); + Assert(v.type == jbvArray && v.val.array.rawScalar); + + tok = JsonbIteratorNext(&it, &v, true); + Assert(tok == WJB_ELEM); + + res = pushJsonbValueScalar(pstate, seq, &v); + + tok = JsonbIteratorNext(&it, &v, true); + Assert(tok == WJB_END_ARRAY); + Assert(it == NULL); + + return res; + } + while ((tok = JsonbIteratorNext(&it, &v, false)) != WJB_DONE) res = pushJsonbValueScalar(pstate, tok, - tok < WJB_BEGIN_ARRAY ? &v : NULL); + tok < WJB_BEGIN_ARRAY || + (tok == WJB_BEGIN_ARRAY && + v.val.array.rawScalar) ? &v : NULL); return res; } diff --git a/src/backend/utils/adt/jsonbsubs.c b/src/backend/utils/adt/jsonbsubs.c new file mode 100644 index 0000000000000..5868aad0578f0 --- /dev/null +++ b/src/backend/utils/adt/jsonbsubs.c @@ -0,0 +1,417 @@ +/*------------------------------------------------------------------------- + * + * jsonbsubs.c + * Subscripting support functions for jsonb. + * + * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * + * IDENTIFICATION + * src/backend/utils/adt/jsonbsubs.c + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include "executor/execExpr.h" +#include "nodes/makefuncs.h" +#include "nodes/nodeFuncs.h" +#include "nodes/subscripting.h" +#include "parser/parse_coerce.h" +#include "parser/parse_expr.h" +#include "utils/jsonb.h" +#include "utils/jsonfuncs.h" +#include "utils/builtins.h" +#include "utils/lsyscache.h" + + +/* SubscriptingRefState.workspace for jsonb subscripting execution */ +typedef struct JsonbSubWorkspace +{ + bool expectArray; /* jsonb root is expected to be an array */ + Oid *indexOid; /* OID of coerced subscript expression, could + * be only integer or text */ + Datum *index; /* Subscript values in Datum format */ +} JsonbSubWorkspace; + + +/* + * Finish parse analysis of a SubscriptingRef expression for a jsonb. + * + * Transform the subscript expressions, coerce them to text, + * and determine the result type of the SubscriptingRef node. + */ +static void +jsonb_subscript_transform(SubscriptingRef *sbsref, + List *indirection, + ParseState *pstate, + bool isSlice, + bool isAssignment) +{ + List *upperIndexpr = NIL; + ListCell *idx; + + /* + * Transform and convert the subscript expressions. Jsonb subscripting + * does not support slices, look only and the upper index. + */ + foreach(idx, indirection) + { + A_Indices *ai = lfirst_node(A_Indices, idx); + Node *subExpr; + + if (isSlice) + { + Node *expr = ai->uidx ? ai->uidx : ai->lidx; + + ereport(ERROR, + (errcode(ERRCODE_DATATYPE_MISMATCH), + errmsg("jsonb subscript does not support slices"), + parser_errposition(pstate, exprLocation(expr)))); + } + + if (ai->uidx) + { + Oid subExprType = InvalidOid, + targetType = UNKNOWNOID; + + subExpr = transformExpr(pstate, ai->uidx, pstate->p_expr_kind); + subExprType = exprType(subExpr); + + if (subExprType != UNKNOWNOID) + { + Oid targets[2] = {INT4OID, TEXTOID}; + + /* + * Jsonb can handle multiple subscript types, but cases when a + * subscript could be coerced to multiple target types must be + * avoided, similar to overloaded functions. It could be + * possibly extend with jsonpath in the future. + */ + for (int i = 0; i < 2; i++) + { + if (can_coerce_type(1, &subExprType, &targets[i], COERCION_IMPLICIT)) + { + /* + * One type has already succeeded, it means there are + * two coercion targets possible, failure. + */ + if (targetType != UNKNOWNOID) + ereport(ERROR, + (errcode(ERRCODE_DATATYPE_MISMATCH), + errmsg("subscript type is not supported"), + errhint("Jsonb subscript must be coerced " + "only to one type, integer or text."), + parser_errposition(pstate, exprLocation(subExpr)))); + + targetType = targets[i]; + } + } + + /* + * No suitable types were found, failure. + */ + if (targetType == UNKNOWNOID) + ereport(ERROR, + (errcode(ERRCODE_DATATYPE_MISMATCH), + errmsg("subscript type is not supported"), + errhint("Jsonb subscript must be coerced to either integer or text"), + parser_errposition(pstate, exprLocation(subExpr)))); + } + else + targetType = TEXTOID; + + /* + * We known from can_coerce_type that coercion will succeed, so + * coerce_type could be used. Note the implicit coercion context, + * which is required to handle subscripts of different types, + * similar to overloaded functions. + */ + subExpr = coerce_type(pstate, + subExpr, subExprType, + targetType, -1, + COERCION_IMPLICIT, + COERCE_IMPLICIT_CAST, + -1); + if (subExpr == NULL) + ereport(ERROR, + (errcode(ERRCODE_DATATYPE_MISMATCH), + errmsg("jsonb subscript must have text type"), + parser_errposition(pstate, exprLocation(subExpr)))); + } + else + { + /* + * Slice with omitted upper bound. Should not happen as we already + * errored out on slice earlier, but handle this just in case. + */ + Assert(isSlice && ai->is_slice); + ereport(ERROR, + (errcode(ERRCODE_DATATYPE_MISMATCH), + errmsg("jsonb subscript does not support slices"), + parser_errposition(pstate, exprLocation(ai->uidx)))); + } + + upperIndexpr = lappend(upperIndexpr, subExpr); + } + + /* store the transformed lists into the SubscriptRef node */ + sbsref->refupperindexpr = upperIndexpr; + sbsref->reflowerindexpr = NIL; + + /* Determine the result type of the subscripting operation; always jsonb */ + sbsref->refrestype = JSONBOID; + sbsref->reftypmod = -1; +} + +/* + * During execution, process the subscripts in a SubscriptingRef expression. + * + * The subscript expressions are already evaluated in Datum form in the + * SubscriptingRefState's arrays. Check and convert them as necessary. + * + * If any subscript is NULL, we throw error in assignment cases, or in fetch + * cases set result to NULL and return false (instructing caller to skip the + * rest of the SubscriptingRef sequence). + */ +static bool +jsonb_subscript_check_subscripts(ExprState *state, + ExprEvalStep *op, + ExprContext *econtext) +{ + SubscriptingRefState *sbsrefstate = op->d.sbsref_subscript.state; + JsonbSubWorkspace *workspace = (JsonbSubWorkspace *) sbsrefstate->workspace; + + /* + * In case if the first subscript is an integer, the source jsonb is + * expected to be an array. This information is not used directly, all + * such cases are handled within corresponding jsonb assign functions. But + * if the source jsonb is NULL the expected type will be used to construct + * an empty source. + */ + if (sbsrefstate->numupper > 0 && sbsrefstate->upperprovided[0] && + !sbsrefstate->upperindexnull[0] && workspace->indexOid[0] == INT4OID) + workspace->expectArray = true; + + /* Process upper subscripts */ + for (int i = 0; i < sbsrefstate->numupper; i++) + { + if (sbsrefstate->upperprovided[i]) + { + /* If any index expr yields NULL, result is NULL or error */ + if (sbsrefstate->upperindexnull[i]) + { + if (sbsrefstate->isassignment) + ereport(ERROR, + (errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED), + errmsg("jsonb subscript in assignment must not be null"))); + *op->resnull = true; + return false; + } + + /* + * For jsonb fetch and assign functions we need to provide path in + * text format. Convert if it's not already text. + */ + if (workspace->indexOid[i] == INT4OID) + { + Datum datum = sbsrefstate->upperindex[i]; + char *cs = DatumGetCString(DirectFunctionCall1(int4out, datum)); + + workspace->index[i] = CStringGetTextDatum(cs); + } + else + workspace->index[i] = sbsrefstate->upperindex[i]; + } + } + + return true; +} + +/* + * Evaluate SubscriptingRef fetch for a jsonb element. + * + * Source container is in step's result variable (it's known not NULL, since + * we set fetch_strict to true). + */ +static void +jsonb_subscript_fetch(ExprState *state, + ExprEvalStep *op, + ExprContext *econtext) +{ + SubscriptingRefState *sbsrefstate = op->d.sbsref.state; + JsonbSubWorkspace *workspace = (JsonbSubWorkspace *) sbsrefstate->workspace; + Jsonb *jsonbSource; + + /* Should not get here if source jsonb (or any subscript) is null */ + Assert(!(*op->resnull)); + + jsonbSource = DatumGetJsonbP(*op->resvalue); + *op->resvalue = jsonb_get_element(jsonbSource, + workspace->index, + sbsrefstate->numupper, + op->resnull, + false); +} + +/* + * Evaluate SubscriptingRef assignment for a jsonb element assignment. + * + * Input container (possibly null) is in result area, replacement value is in + * SubscriptingRefState's replacevalue/replacenull. + */ +static void +jsonb_subscript_assign(ExprState *state, + ExprEvalStep *op, + ExprContext *econtext) +{ + SubscriptingRefState *sbsrefstate = op->d.sbsref.state; + JsonbSubWorkspace *workspace = (JsonbSubWorkspace *) sbsrefstate->workspace; + Jsonb *jsonbSource; + JsonbValue replacevalue; + + if (sbsrefstate->replacenull) + replacevalue.type = jbvNull; + else + JsonbToJsonbValue(DatumGetJsonbP(sbsrefstate->replacevalue), + &replacevalue); + + /* + * In case if the input container is null, set up an empty jsonb and + * proceed with the assignment. + */ + if (*op->resnull) + { + JsonbValue newSource; + + /* + * To avoid any surprising results, set up an empty jsonb array in + * case of an array is expected (i.e. the first subscript is integer), + * otherwise jsonb object. + */ + if (workspace->expectArray) + { + newSource.type = jbvArray; + newSource.val.array.nElems = 0; + newSource.val.array.rawScalar = false; + } + else + { + newSource.type = jbvObject; + newSource.val.object.nPairs = 0; + } + + jsonbSource = JsonbValueToJsonb(&newSource); + *op->resnull = false; + } + else + jsonbSource = DatumGetJsonbP(*op->resvalue); + + *op->resvalue = jsonb_set_element(jsonbSource, + workspace->index, + sbsrefstate->numupper, + &replacevalue); + /* The result is never NULL, so no need to change *op->resnull */ +} + +/* + * Compute old jsonb element value for a SubscriptingRef assignment + * expression. Will only be called if the new-value subexpression + * contains SubscriptingRef or FieldStore. This is the same as the + * regular fetch case, except that we have to handle a null jsonb, + * and the value should be stored into the SubscriptingRefState's + * prevvalue/prevnull fields. + */ +static void +jsonb_subscript_fetch_old(ExprState *state, + ExprEvalStep *op, + ExprContext *econtext) +{ + SubscriptingRefState *sbsrefstate = op->d.sbsref.state; + + if (*op->resnull) + { + /* whole jsonb is null, so any element is too */ + sbsrefstate->prevvalue = (Datum) 0; + sbsrefstate->prevnull = true; + } + else + { + Jsonb *jsonbSource = DatumGetJsonbP(*op->resvalue); + + sbsrefstate->prevvalue = jsonb_get_element(jsonbSource, + sbsrefstate->upperindex, + sbsrefstate->numupper, + &sbsrefstate->prevnull, + false); + } +} + +/* + * Set up execution state for a jsonb subscript operation. Opposite to the + * arrays subscription, there is no limit for number of subscripts as jsonb + * type itself doesn't have nesting limits. + */ +static void +jsonb_exec_setup(const SubscriptingRef *sbsref, + SubscriptingRefState *sbsrefstate, + SubscriptExecSteps *methods) +{ + JsonbSubWorkspace *workspace; + ListCell *lc; + int nupper = sbsref->refupperindexpr->length; + char *ptr; + + /* Allocate type-specific workspace with space for per-subscript data */ + workspace = palloc0(MAXALIGN(sizeof(JsonbSubWorkspace)) + + nupper * (sizeof(Datum) + sizeof(Oid))); + workspace->expectArray = false; + ptr = ((char *) workspace) + MAXALIGN(sizeof(JsonbSubWorkspace)); + + /* + * This coding assumes sizeof(Datum) >= sizeof(Oid), else we might + * misalign the indexOid pointer + */ + workspace->index = (Datum *) ptr; + ptr += nupper * sizeof(Datum); + workspace->indexOid = (Oid *) ptr; + + sbsrefstate->workspace = workspace; + + /* Collect subscript data types necessary at execution time */ + foreach(lc, sbsref->refupperindexpr) + { + Node *expr = lfirst(lc); + int i = foreach_current_index(lc); + + workspace->indexOid[i] = exprType(expr); + } + + /* + * Pass back pointers to appropriate step execution functions. + */ + methods->sbs_check_subscripts = jsonb_subscript_check_subscripts; + methods->sbs_fetch = jsonb_subscript_fetch; + methods->sbs_assign = jsonb_subscript_assign; + methods->sbs_fetch_old = jsonb_subscript_fetch_old; +} + +/* + * jsonb_subscript_handler + * Subscripting handler for jsonb. + * + */ +Datum +jsonb_subscript_handler(PG_FUNCTION_ARGS) +{ + static const SubscriptRoutines sbsroutines = { + .transform = jsonb_subscript_transform, + .exec_setup = jsonb_exec_setup, + .fetch_strict = true, /* fetch returns NULL for NULL inputs */ + .fetch_leakproof = true, /* fetch returns NULL for bad subscript */ + .store_leakproof = false /* ... but assignment throws error */ + }; + + PG_RETURN_POINTER(&sbsroutines); +} diff --git a/src/backend/utils/adt/jsonfuncs.c b/src/backend/utils/adt/jsonfuncs.c index 69100feab7c1c..215a10f16ef6e 100644 --- a/src/backend/utils/adt/jsonfuncs.c +++ b/src/backend/utils/adt/jsonfuncs.c @@ -3,7 +3,7 @@ * jsonfuncs.c * Functions to process JSON data types. * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION @@ -44,6 +44,8 @@ #define JB_PATH_INSERT_AFTER 0x0010 #define JB_PATH_CREATE_OR_INSERT \ (JB_PATH_INSERT_BEFORE | JB_PATH_INSERT_AFTER | JB_PATH_CREATE) +#define JB_PATH_FILL_GAPS 0x0020 +#define JB_PATH_CONSISTENT_POSITION 0x0040 /* state for json_object_keys */ typedef struct OkeysState @@ -463,16 +465,16 @@ static JsonbValue *IteratorConcat(JsonbIterator **it1, JsonbIterator **it2, JsonbParseState **state); static JsonbValue *setPath(JsonbIterator **it, Datum *path_elems, bool *path_nulls, int path_len, - JsonbParseState **st, int level, Jsonb *newval, + JsonbParseState **st, int level, JsonbValue *newval, int op_type); static void setPathObject(JsonbIterator **it, Datum *path_elems, bool *path_nulls, int path_len, JsonbParseState **st, int level, - Jsonb *newval, uint32 npairs, int op_type); + JsonbValue *newval, uint32 npairs, int op_type); static void setPathArray(JsonbIterator **it, Datum *path_elems, bool *path_nulls, int path_len, JsonbParseState **st, - int level, Jsonb *newval, uint32 nelems, int op_type); -static void addJsonbToParseState(JsonbParseState **jbps, Jsonb *jb); + int level, + JsonbValue *newval, uint32 nelems, int op_type); /* function supporting iterate_json_values */ static void iterate_values_scalar(void *state, char *token, JsonTokenType tokentype); @@ -1448,13 +1450,9 @@ get_jsonb_path_all(FunctionCallInfo fcinfo, bool as_text) ArrayType *path = PG_GETARG_ARRAYTYPE_P(1); Datum *pathtext; bool *pathnulls; + bool isnull; int npath; - int i; - bool have_object = false, - have_array = false; - JsonbValue *jbvp = NULL; - JsonbValue jbvbuf; - JsonbContainer *container; + Datum res; /* * If the array contains any null elements, return NULL, on the grounds @@ -1469,9 +1467,26 @@ get_jsonb_path_all(FunctionCallInfo fcinfo, bool as_text) deconstruct_array(path, TEXTOID, -1, false, TYPALIGN_INT, &pathtext, &pathnulls, &npath); - /* Identify whether we have object, array, or scalar at top-level */ - container = &jb->root; + res = jsonb_get_element(jb, pathtext, npath, &isnull, as_text); + + if (isnull) + PG_RETURN_NULL(); + else + PG_RETURN_DATUM(res); +} + +Datum +jsonb_get_element(Jsonb *jb, Datum *path, int npath, bool *isnull, bool as_text) +{ + JsonbContainer *container = &jb->root; + JsonbValue *jbvp = NULL; + int i; + bool have_object = false, + have_array = false; + *isnull = false; + + /* Identify whether we have object, array, or scalar at top-level */ if (JB_ROOT_IS_OBJECT(jb)) have_object = true; else if (JB_ROOT_IS_ARRAY(jb) && !JB_ROOT_IS_SCALAR(jb)) @@ -1496,9 +1511,9 @@ get_jsonb_path_all(FunctionCallInfo fcinfo, bool as_text) { if (as_text) { - PG_RETURN_TEXT_P(cstring_to_text(JsonbToCString(NULL, - container, - VARSIZE(jb)))); + return PointerGetDatum(cstring_to_text(JsonbToCString(NULL, + container, + VARSIZE(jb)))); } else { @@ -1512,22 +1527,25 @@ get_jsonb_path_all(FunctionCallInfo fcinfo, bool as_text) if (have_object) { jbvp = getKeyJsonValueFromContainer(container, - VARDATA(pathtext[i]), - VARSIZE(pathtext[i]) - VARHDRSZ, - &jbvbuf); + VARDATA(path[i]), + VARSIZE(path[i]) - VARHDRSZ, + NULL); } else if (have_array) { long lindex; uint32 index; - char *indextext = TextDatumGetCString(pathtext[i]); + char *indextext = TextDatumGetCString(path[i]); char *endptr; errno = 0; lindex = strtol(indextext, &endptr, 10); if (endptr == indextext || *endptr != '\0' || errno != 0 || lindex > INT_MAX || lindex < INT_MIN) - PG_RETURN_NULL(); + { + *isnull = true; + return PointerGetDatum(NULL); + } if (lindex >= 0) { @@ -1545,7 +1563,10 @@ get_jsonb_path_all(FunctionCallInfo fcinfo, bool as_text) nelements = JsonContainerSize(container); if (-lindex > nelements) - PG_RETURN_NULL(); + { + *isnull = true; + return PointerGetDatum(NULL); + } else index = nelements + lindex; } @@ -1555,11 +1576,15 @@ get_jsonb_path_all(FunctionCallInfo fcinfo, bool as_text) else { /* scalar, extraction yields a null */ - PG_RETURN_NULL(); + *isnull = true; + return PointerGetDatum(NULL); } if (jbvp == NULL) - PG_RETURN_NULL(); + { + *isnull = true; + return PointerGetDatum(NULL); + } else if (i == npath - 1) break; @@ -1581,9 +1606,12 @@ get_jsonb_path_all(FunctionCallInfo fcinfo, bool as_text) if (as_text) { if (jbvp->type == jbvNull) - PG_RETURN_NULL(); + { + *isnull = true; + return PointerGetDatum(NULL); + } - PG_RETURN_TEXT_P(JsonbValueAsText(jbvp)); + return PointerGetDatum(JsonbValueAsText(jbvp)); } else { @@ -1594,6 +1622,131 @@ get_jsonb_path_all(FunctionCallInfo fcinfo, bool as_text) } } +Datum +jsonb_set_element(Jsonb *jb, Datum *path, int path_len, + JsonbValue *newval) +{ + JsonbValue *res; + JsonbParseState *state = NULL; + JsonbIterator *it; + bool *path_nulls = palloc0(path_len * sizeof(bool)); + + if (newval->type == jbvArray && newval->val.array.rawScalar) + *newval = newval->val.array.elems[0]; + + it = JsonbIteratorInit(&jb->root); + + res = setPath(&it, path, path_nulls, path_len, &state, 0, newval, + JB_PATH_CREATE | JB_PATH_FILL_GAPS | + JB_PATH_CONSISTENT_POSITION); + + pfree(path_nulls); + + PG_RETURN_JSONB_P(JsonbValueToJsonb(res)); +} + +static void +push_null_elements(JsonbParseState **ps, int num) +{ + JsonbValue null; + + null.type = jbvNull; + + while (num-- > 0) + pushJsonbValue(ps, WJB_ELEM, &null); +} + +/* + * Prepare a new structure containing nested empty objects and arrays + * corresponding to the specified path, and assign a new value at the end of + * this path. E.g. the path [a][0][b] with the new value 1 will produce the + * structure {a: [{b: 1}]}. + * + * Called is responsible to make sure such path does not exist yet. + */ +static void +push_path(JsonbParseState **st, int level, Datum *path_elems, + bool *path_nulls, int path_len, JsonbValue *newval) +{ + /* + * tpath contains expected type of an empty jsonb created at each level + * higher or equal than the current one, either jbvObject or jbvArray. + * Since it contains only information about path slice from level to the + * end, the access index must be normalized by level. + */ + enum jbvType *tpath = palloc0((path_len - level) * sizeof(enum jbvType)); + long lindex; + JsonbValue newkey; + + /* + * Create first part of the chain with beginning tokens. For the current + * level WJB_BEGIN_OBJECT/WJB_BEGIN_ARRAY was already created, so start + * with the next one. + */ + for (int i = level + 1; i < path_len; i++) + { + char *c, + *badp; + + if (path_nulls[i]) + break; + + /* + * Try to convert to an integer to find out the expected type, object + * or array. + */ + c = TextDatumGetCString(path_elems[i]); + errno = 0; + lindex = strtol(c, &badp, 10); + if (errno != 0 || badp == c || *badp != '\0' || lindex > INT_MAX || + lindex < INT_MIN) + { + /* text, an object is expected */ + newkey.type = jbvString; + newkey.val.string.len = VARSIZE_ANY_EXHDR(path_elems[i]); + newkey.val.string.val = VARDATA_ANY(path_elems[i]); + + (void) pushJsonbValue(st, WJB_BEGIN_OBJECT, NULL); + (void) pushJsonbValue(st, WJB_KEY, &newkey); + + tpath[i - level] = jbvObject; + } + else + { + /* integer, an array is expected */ + (void) pushJsonbValue(st, WJB_BEGIN_ARRAY, NULL); + + push_null_elements(st, lindex); + + tpath[i - level] = jbvArray; + } + + } + + /* Insert an actual value for either an object or array */ + if (tpath[(path_len - level) - 1] == jbvArray) + { + (void) pushJsonbValue(st, WJB_ELEM, newval); + } + else + (void) pushJsonbValue(st, WJB_VALUE, newval); + + /* + * Close everything up to the last but one level. The last one will be + * closed outside of this function. + */ + for (int i = path_len - 1; i > level; i--) + { + if (path_nulls[i]) + break; + + if (tpath[i - level] == jbvObject) + (void) pushJsonbValue(st, WJB_END_OBJECT, NULL); + else + (void) pushJsonbValue(st, WJB_END_ARRAY, NULL); + } +} + /* * Return the text representation of the given JsonbValue. */ @@ -4151,58 +4304,6 @@ jsonb_strip_nulls(PG_FUNCTION_ARGS) PG_RETURN_POINTER(JsonbValueToJsonb(res)); } -/* - * Add values from the jsonb to the parse state. - * - * If the parse state container is an object, the jsonb is pushed as - * a value, not a key. - * - * This needs to be done using an iterator because pushJsonbValue doesn't - * like getting jbvBinary values, so we can't just push jb as a whole. - */ -static void -addJsonbToParseState(JsonbParseState **jbps, Jsonb *jb) -{ - JsonbIterator *it; - JsonbValue *o = &(*jbps)->contVal; - JsonbValue v; - JsonbIteratorToken type; - - it = JsonbIteratorInit(&jb->root); - - Assert(o->type == jbvArray || o->type == jbvObject); - - if (JB_ROOT_IS_SCALAR(jb)) - { - (void) JsonbIteratorNext(&it, &v, false); /* skip array header */ - Assert(v.type == jbvArray); - (void) JsonbIteratorNext(&it, &v, false); /* fetch scalar value */ - - switch (o->type) - { - case jbvArray: - (void) pushJsonbValue(jbps, WJB_ELEM, &v); - break; - case jbvObject: - (void) pushJsonbValue(jbps, WJB_VALUE, &v); - break; - default: - elog(ERROR, "unexpected parent of nested structure"); - } - } - else - { - while ((type = JsonbIteratorNext(&it, &v, false)) != WJB_DONE) - { - if (type == WJB_KEY || type == WJB_VALUE || type == WJB_ELEM) - (void) pushJsonbValue(jbps, type, &v); - else - (void) pushJsonbValue(jbps, type, NULL); - } - } - -} - /* * SQL function jsonb_pretty (jsonb) * @@ -4474,7 +4575,8 @@ jsonb_set(PG_FUNCTION_ARGS) { Jsonb *in = PG_GETARG_JSONB_P(0); ArrayType *path = PG_GETARG_ARRAYTYPE_P(1); - Jsonb *newval = PG_GETARG_JSONB_P(2); + Jsonb *newjsonb = PG_GETARG_JSONB_P(2); + JsonbValue newval; bool create = PG_GETARG_BOOL(3); JsonbValue *res = NULL; Datum *path_elems; @@ -4483,6 +4585,8 @@ jsonb_set(PG_FUNCTION_ARGS) JsonbIterator *it; JsonbParseState *st = NULL; + JsonbToJsonbValue(newjsonb, &newval); + if (ARR_NDIM(path) > 1) ereport(ERROR, (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR), @@ -4505,7 +4609,7 @@ jsonb_set(PG_FUNCTION_ARGS) it = JsonbIteratorInit(&in->root); res = setPath(&it, path_elems, path_nulls, path_len, &st, - 0, newval, create ? JB_PATH_CREATE : JB_PATH_REPLACE); + 0, &newval, create ? JB_PATH_CREATE : JB_PATH_REPLACE); Assert(res != NULL); @@ -4632,7 +4736,8 @@ jsonb_insert(PG_FUNCTION_ARGS) { Jsonb *in = PG_GETARG_JSONB_P(0); ArrayType *path = PG_GETARG_ARRAYTYPE_P(1); - Jsonb *newval = PG_GETARG_JSONB_P(2); + Jsonb *newjsonb = PG_GETARG_JSONB_P(2); + JsonbValue newval; bool after = PG_GETARG_BOOL(3); JsonbValue *res = NULL; Datum *path_elems; @@ -4641,6 +4746,8 @@ jsonb_insert(PG_FUNCTION_ARGS) JsonbIterator *it; JsonbParseState *st = NULL; + JsonbToJsonbValue(newjsonb, &newval); + if (ARR_NDIM(path) > 1) ereport(ERROR, (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR), @@ -4659,7 +4766,7 @@ jsonb_insert(PG_FUNCTION_ARGS) it = JsonbIteratorInit(&in->root); - res = setPath(&it, path_elems, path_nulls, path_len, &st, 0, newval, + res = setPath(&it, path_elems, path_nulls, path_len, &st, 0, &newval, after ? JB_PATH_INSERT_AFTER : JB_PATH_INSERT_BEFORE); Assert(res != NULL); @@ -4784,13 +4891,28 @@ IteratorConcat(JsonbIterator **it1, JsonbIterator **it2, * Bits JB_PATH_INSERT_BEFORE and JB_PATH_INSERT_AFTER in op_type * behave as JB_PATH_CREATE if new value is inserted in JsonbObject. * + * If JB_PATH_FILL_GAPS bit is set, this will change an assignment logic in + * case if target is an array. The assignment index will not be restricted by + * number of elements in the array, and if there are any empty slots between + * last element of the array and a new one they will be filled with nulls. If + * the index is negative, it still will be considered an an index from the end + * of the array. Of a part of the path is not present and this part is more + * than just one last element, this flag will instruct to create the whole + * chain of corresponding objects and insert the value. + * + * JB_PATH_CONSISTENT_POSITION for an array indicates that the called wants to + * keep values with fixed indices. Indices for existing elements could be + * changed (shifted forward) in case if the array is prepended with a new value + * and a negative index out of the range, so this behavior will be prevented + * and return an error. + * * All path elements before the last must already exist * whatever bits in op_type are set, or nothing is done. */ static JsonbValue * setPath(JsonbIterator **it, Datum *path_elems, bool *path_nulls, int path_len, - JsonbParseState **st, int level, Jsonb *newval, int op_type) + JsonbParseState **st, int level, JsonbValue *newval, int op_type) { JsonbValue v; JsonbIteratorToken r; @@ -4809,6 +4931,21 @@ setPath(JsonbIterator **it, Datum *path_elems, switch (r) { case WJB_BEGIN_ARRAY: + + /* + * If instructed complain about attempts to replace whithin a raw + * scalar value. This happens even when current level is equal to + * path_len, because the last path key should also correspond to + * an object or an array, not raw scalar. + */ + if ((op_type & JB_PATH_FILL_GAPS) && (level <= path_len - 1) && + v.val.array.rawScalar) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("cannot replace existing key"), + errdetail("The path assumes key is a composite object, " + "but it is a scalar value."))); + (void) pushJsonbValue(st, r, NULL); setPathArray(it, path_elems, path_nulls, path_len, st, level, newval, v.val.array.nElems, op_type); @@ -4826,6 +4963,20 @@ setPath(JsonbIterator **it, Datum *path_elems, break; case WJB_ELEM: case WJB_VALUE: + + /* + * If instructed complain about attempts to replace whithin a + * scalar value. This happens even when current level is equal to + * path_len, because the last path key should also correspond to + * an object or an array, not an element or value. + */ + if ((op_type & JB_PATH_FILL_GAPS) && (level <= path_len - 1)) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("cannot replace existing key"), + errdetail("The path assumes key is a composite object, " + "but it is a scalar value."))); + res = pushJsonbValue(st, r, &v); break; default: @@ -4843,11 +4994,11 @@ setPath(JsonbIterator **it, Datum *path_elems, static void setPathObject(JsonbIterator **it, Datum *path_elems, bool *path_nulls, int path_len, JsonbParseState **st, int level, - Jsonb *newval, uint32 npairs, int op_type) + JsonbValue *newval, uint32 npairs, int op_type) { - JsonbValue v; int i; - JsonbValue k; + JsonbValue k, + v; bool done = false; if (level >= path_len || path_nulls[level]) @@ -4864,7 +5015,7 @@ setPathObject(JsonbIterator **it, Datum *path_elems, bool *path_nulls, newkey.val.string.val = VARDATA_ANY(path_elems[level]); (void) pushJsonbValue(st, WJB_KEY, &newkey); - addJsonbToParseState(st, newval); + (void) pushJsonbValue(st, WJB_VALUE, newval); } for (i = 0; i < npairs; i++) @@ -4878,6 +5029,8 @@ setPathObject(JsonbIterator **it, Datum *path_elems, bool *path_nulls, memcmp(k.val.string.val, VARDATA_ANY(path_elems[level]), k.val.string.len) == 0) { + done = true; + if (level == path_len - 1) { /* @@ -4895,9 +5048,8 @@ setPathObject(JsonbIterator **it, Datum *path_elems, bool *path_nulls, if (!(op_type & JB_PATH_DELETE)) { (void) pushJsonbValue(st, WJB_KEY, &k); - addJsonbToParseState(st, newval); + (void) pushJsonbValue(st, WJB_VALUE, newval); } - done = true; } else { @@ -4918,7 +5070,7 @@ setPathObject(JsonbIterator **it, Datum *path_elems, bool *path_nulls, newkey.val.string.val = VARDATA_ANY(path_elems[level]); (void) pushJsonbValue(st, WJB_KEY, &newkey); - addJsonbToParseState(st, newval); + (void) pushJsonbValue(st, WJB_VALUE, newval); } (void) pushJsonbValue(st, r, &k); @@ -4942,6 +5094,31 @@ setPathObject(JsonbIterator **it, Datum *path_elems, bool *path_nulls, } } } + + /*-- + * If we got here there are only few possibilities: + * - no target path was found, and an open object with some keys/values was + * pushed into the state + * - an object is empty, only WJB_BEGIN_OBJECT is pushed + * + * In both cases if instructed to create the path when not present, + * generate the whole chain of empty objects and insert the new value + * there. + */ + if (!done && (op_type & JB_PATH_FILL_GAPS) && (level < path_len - 1)) + { + JsonbValue newkey; + + newkey.type = jbvString; + newkey.val.string.len = VARSIZE_ANY_EXHDR(path_elems[level]); + newkey.val.string.val = VARDATA_ANY(path_elems[level]); + + (void) pushJsonbValue(st, WJB_KEY, &newkey); + (void) push_path(st, level, path_elems, path_nulls, + path_len, newval); + + /* Result is closed with WJB_END_OBJECT outside of this function */ + } } /* @@ -4950,7 +5127,7 @@ setPathObject(JsonbIterator **it, Datum *path_elems, bool *path_nulls, static void setPathArray(JsonbIterator **it, Datum *path_elems, bool *path_nulls, int path_len, JsonbParseState **st, int level, - Jsonb *newval, uint32 nelems, int op_type) + JsonbValue *newval, uint32 nelems, int op_type) { JsonbValue v; int idx, @@ -4980,25 +5157,48 @@ setPathArray(JsonbIterator **it, Datum *path_elems, bool *path_nulls, if (idx < 0) { if (-idx > nelems) - idx = INT_MIN; + { + /* + * If asked to keep elements position consistent, it's not allowed + * to prepend the array. + */ + if (op_type & JB_PATH_CONSISTENT_POSITION) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("path element at position %d is out of range: %d", + level + 1, idx))); + else + idx = INT_MIN; + } else idx = nelems + idx; } - if (idx > 0 && idx > nelems) - idx = nelems; + /* + * Filling the gaps means there are no limits on the positive index are + * imposed, we can set any element. Otherwise limit the index by nelems. + */ + if (!(op_type & JB_PATH_FILL_GAPS)) + { + if (idx > 0 && idx > nelems) + idx = nelems; + } /* * if we're creating, and idx == INT_MIN, we prepend the new value to the * array also if the array is empty - in which case we don't really care * what the idx value is */ - if ((idx == INT_MIN || nelems == 0) && (level == path_len - 1) && (op_type & JB_PATH_CREATE_OR_INSERT)) { Assert(newval != NULL); - addJsonbToParseState(st, newval); + + if (op_type & JB_PATH_FILL_GAPS && nelems == 0 && idx > 0) + push_null_elements(st, idx); + + (void) pushJsonbValue(st, WJB_ELEM, newval); + done = true; } @@ -5009,12 +5209,14 @@ setPathArray(JsonbIterator **it, Datum *path_elems, bool *path_nulls, if (i == idx && level < path_len) { + done = true; + if (level == path_len - 1) { r = JsonbIteratorNext(it, &v, true); /* skip */ if (op_type & (JB_PATH_INSERT_BEFORE | JB_PATH_CREATE)) - addJsonbToParseState(st, newval); + (void) pushJsonbValue(st, WJB_ELEM, newval); /* * We should keep current value only in case of @@ -5025,9 +5227,7 @@ setPathArray(JsonbIterator **it, Datum *path_elems, bool *path_nulls, (void) pushJsonbValue(st, r, &v); if (op_type & (JB_PATH_INSERT_AFTER | JB_PATH_REPLACE)) - addJsonbToParseState(st, newval); - - done = true; + (void) pushJsonbValue(st, WJB_ELEM, newval); } else (void) setPath(it, path_elems, path_nulls, path_len, @@ -5055,14 +5255,42 @@ setPathArray(JsonbIterator **it, Datum *path_elems, bool *path_nulls, (void) pushJsonbValue(st, r, r < WJB_BEGIN_ARRAY ? &v : NULL); } } - - if ((op_type & JB_PATH_CREATE_OR_INSERT) && !done && - level == path_len - 1 && i == nelems - 1) - { - addJsonbToParseState(st, newval); - } } } + + if ((op_type & JB_PATH_CREATE_OR_INSERT) && !done && level == path_len - 1) + { + /* + * If asked to fill the gaps, idx could be bigger than nelems, so + * prepend the new element with nulls if that's the case. + */ + if (op_type & JB_PATH_FILL_GAPS && idx > nelems) + push_null_elements(st, idx - nelems); + + (void) pushJsonbValue(st, WJB_ELEM, newval); + done = true; + } + + /*-- + * If we got here there are only few possibilities: + * - no target path was found, and an open array with some keys/values was + * pushed into the state + * - an array is empty, only WJB_BEGIN_ARRAY is pushed + * + * In both cases if instructed to create the path when not present, + * generate the whole chain of empty objects and insert the new value + * there. + */ + if (!done && (op_type & JB_PATH_FILL_GAPS) && (level < path_len - 1)) + { + if (idx > 0) + push_null_elements(st, idx - nelems); + + (void) push_path(st, level, path_elems, path_nulls, + path_len, newval); + + /* Result is closed with WJB_END_OBJECT outside of this function */ + } } /* diff --git a/src/backend/utils/adt/jsonpath.c b/src/backend/utils/adt/jsonpath.c index 31d9d92d14ed5..fa22546f22d58 100644 --- a/src/backend/utils/adt/jsonpath.c +++ b/src/backend/utils/adt/jsonpath.c @@ -53,7 +53,7 @@ * | |__| |__||________________________||___________________| | * |_______________________________________________________________________| * - * Copyright (c) 2019-2020, PostgreSQL Global Development Group + * Copyright (c) 2019-2021, PostgreSQL Global Development Group * * IDENTIFICATION * src/backend/utils/adt/jsonpath.c diff --git a/src/backend/utils/adt/jsonpath_exec.c b/src/backend/utils/adt/jsonpath_exec.c index 1059f34130aee..4d185c27b47f9 100644 --- a/src/backend/utils/adt/jsonpath_exec.c +++ b/src/backend/utils/adt/jsonpath_exec.c @@ -49,7 +49,7 @@ * we calculate operands first. Then we check that results are numeric * singleton lists, calculate the result and pass it to the next path item. * - * Copyright (c) 2019-2020, PostgreSQL Global Development Group + * Copyright (c) 2019-2021, PostgreSQL Global Development Group * * IDENTIFICATION * src/backend/utils/adt/jsonpath_exec.c diff --git a/src/backend/utils/adt/jsonpath_gram.y b/src/backend/utils/adt/jsonpath_gram.y index 01b8d451a709b..de3d97931ef45 100644 --- a/src/backend/utils/adt/jsonpath_gram.y +++ b/src/backend/utils/adt/jsonpath_gram.y @@ -6,7 +6,7 @@ * * Transforms tokenized jsonpath into tree of JsonPathParseItem structs. * - * Copyright (c) 2019-2020, PostgreSQL Global Development Group + * Copyright (c) 2019-2021, PostgreSQL Global Development Group * * IDENTIFICATION * src/backend/utils/adt/jsonpath_gram.y diff --git a/src/backend/utils/adt/jsonpath_scan.l b/src/backend/utils/adt/jsonpath_scan.l index f723462a1f75a..72d4c5e946a8c 100644 --- a/src/backend/utils/adt/jsonpath_scan.l +++ b/src/backend/utils/adt/jsonpath_scan.l @@ -7,7 +7,7 @@ * Splits jsonpath string into tokens represented as JsonPathString structs. * Decodes unicode and hex escaped strings. * - * Copyright (c) 2019-2020, PostgreSQL Global Development Group + * Copyright (c) 2019-2021, PostgreSQL Global Development Group * * IDENTIFICATION * src/backend/utils/adt/jsonpath_scan.l diff --git a/src/backend/utils/adt/levenshtein.c b/src/backend/utils/adt/levenshtein.c index d11278c505be0..f8979776d0d5c 100644 --- a/src/backend/utils/adt/levenshtein.c +++ b/src/backend/utils/adt/levenshtein.c @@ -16,7 +16,7 @@ * PHP 4.0.6 distribution for inspiration. Configurable penalty costs * extension is introduced by Volkan YAZICI (7/95). * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION diff --git a/src/backend/utils/adt/like_match.c b/src/backend/utils/adt/like_match.c index ee30170fbb44a..2f32cdaf020a5 100644 --- a/src/backend/utils/adt/like_match.c +++ b/src/backend/utils/adt/like_match.c @@ -16,7 +16,7 @@ * do_like_escape - name of function if wanted - needs CHAREQ and CopyAdvChar * MATCH_LOWER - define for case (4) to specify case folding for 1-byte chars * - * Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Copyright (c) 1996-2021, PostgreSQL Global Development Group * * IDENTIFICATION * src/backend/utils/adt/like_match.c diff --git a/src/backend/utils/adt/like_support.c b/src/backend/utils/adt/like_support.c index bcfbaa1c3d184..c746592cbcaa2 100644 --- a/src/backend/utils/adt/like_support.c +++ b/src/backend/utils/adt/like_support.c @@ -23,7 +23,7 @@ * from LIKE to indexscan limits rather harder than one might think ... * but that's the basic idea.) * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * diff --git a/src/backend/utils/adt/lockfuncs.c b/src/backend/utils/adt/lockfuncs.c index f592292d067b8..b1cf5b79a75fe 100644 --- a/src/backend/utils/adt/lockfuncs.c +++ b/src/backend/utils/adt/lockfuncs.c @@ -3,7 +3,7 @@ * lockfuncs.c * Functions for SQL access to various lock-manager capabilities. * - * Copyright (c) 2002-2020, PostgreSQL Global Development Group + * Copyright (c) 2002-2021, PostgreSQL Global Development Group * * IDENTIFICATION * src/backend/utils/adt/lockfuncs.c @@ -629,7 +629,7 @@ pg_isolation_test_session_is_blocked(PG_FUNCTION_ARGS) * Check if any of these are in the list of interesting PIDs, that being * the sessions that the isolation tester is running. We don't use * "arrayoverlaps" here, because it would lead to cache lookups and one of - * our goals is to run quickly under CLOBBER_CACHE_ALWAYS. We expect + * our goals is to run quickly with debug_invalidate_system_caches_always > 0. We expect * blocking_pids to be usually empty and otherwise a very small number in * isolation tester cases, so make that the outer loop of a naive search * for a match. @@ -644,7 +644,7 @@ pg_isolation_test_session_is_blocked(PG_FUNCTION_ARGS) /* * Check if blocked_pid is waiting for a safe snapshot. We could in * theory check the resulting array of blocker PIDs against the - * interesting PIDs whitelist, but since there is no danger of autovacuum + * interesting PIDs list, but since there is no danger of autovacuum * blocking GetSafeSnapshot there seems to be no point in expending cycles * on allocating a buffer and searching for overlap; so it's presently * sufficient for the isolation tester's purposes to use a single element diff --git a/src/backend/utils/adt/mac.c b/src/backend/utils/adt/mac.c index 8aeddc686326c..844d8814e67fc 100644 --- a/src/backend/utils/adt/mac.c +++ b/src/backend/utils/adt/mac.c @@ -3,7 +3,7 @@ * mac.c * PostgreSQL type definitions for 6 byte, EUI-48, MAC addresses. * - * Portions Copyright (c) 1998-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1998-2021, PostgreSQL Global Development Group * * IDENTIFICATION * src/backend/utils/adt/mac.c diff --git a/src/backend/utils/adt/mac8.c b/src/backend/utils/adt/mac8.c index b7b2968b926c9..41753fac6fd80 100644 --- a/src/backend/utils/adt/mac8.c +++ b/src/backend/utils/adt/mac8.c @@ -11,7 +11,7 @@ * The following code is written with the assumption that the OUI field * size is 24 bits. * - * Portions Copyright (c) 1998-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1998-2021, PostgreSQL Global Development Group * * IDENTIFICATION * src/backend/utils/adt/mac8.c diff --git a/src/backend/utils/adt/mcxtfuncs.c b/src/backend/utils/adt/mcxtfuncs.c index 50e1b07ff02c6..c02fa47550a6d 100644 --- a/src/backend/utils/adt/mcxtfuncs.c +++ b/src/backend/utils/adt/mcxtfuncs.c @@ -3,7 +3,7 @@ * mcxtfuncs.c * Functions to show backend memory context. * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * diff --git a/src/backend/utils/adt/misc.c b/src/backend/utils/adt/misc.c index b2bf9fa8cbcfd..634f574d7eb89 100644 --- a/src/backend/utils/adt/misc.c +++ b/src/backend/utils/adt/misc.c @@ -3,7 +3,7 @@ * misc.c * * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * @@ -25,6 +25,7 @@ #include "catalog/catalog.h" #include "catalog/pg_tablespace.h" #include "catalog/pg_type.h" +#include "catalog/system_fk_info.h" #include "commands/dbcommands.h" #include "commands/tablespace.h" #include "common/keywords.h" @@ -37,6 +38,7 @@ #include "storage/fd.h" #include "tcop/tcopprot.h" #include "utils/builtins.h" +#include "utils/fmgroids.h" #include "utils/lsyscache.h" #include "utils/ruleutils.h" #include "utils/timestamp.h" @@ -489,6 +491,84 @@ pg_get_keywords(PG_FUNCTION_ARGS) } +/* Function to return the list of catalog foreign key relationships */ +Datum +pg_get_catalog_foreign_keys(PG_FUNCTION_ARGS) +{ + FuncCallContext *funcctx; + FmgrInfo *arrayinp; + + if (SRF_IS_FIRSTCALL()) + { + MemoryContext oldcontext; + TupleDesc tupdesc; + + funcctx = SRF_FIRSTCALL_INIT(); + oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx); + + tupdesc = CreateTemplateTupleDesc(6); + TupleDescInitEntry(tupdesc, (AttrNumber) 1, "fktable", + REGCLASSOID, -1, 0); + TupleDescInitEntry(tupdesc, (AttrNumber) 2, "fkcols", + TEXTARRAYOID, -1, 0); + TupleDescInitEntry(tupdesc, (AttrNumber) 3, "pktable", + REGCLASSOID, -1, 0); + TupleDescInitEntry(tupdesc, (AttrNumber) 4, "pkcols", + TEXTARRAYOID, -1, 0); + TupleDescInitEntry(tupdesc, (AttrNumber) 5, "is_array", + BOOLOID, -1, 0); + TupleDescInitEntry(tupdesc, (AttrNumber) 6, "is_opt", + BOOLOID, -1, 0); + + funcctx->tuple_desc = BlessTupleDesc(tupdesc); + + /* + * We use array_in to convert the C strings in sys_fk_relationships[] + * to text arrays. But we cannot use DirectFunctionCallN to call + * array_in, and it wouldn't be very efficient if we could. Fill an + * FmgrInfo to use for the call. + */ + arrayinp = (FmgrInfo *) palloc(sizeof(FmgrInfo)); + fmgr_info(F_ARRAY_IN, arrayinp); + funcctx->user_fctx = arrayinp; + + MemoryContextSwitchTo(oldcontext); + } + + funcctx = SRF_PERCALL_SETUP(); + arrayinp = (FmgrInfo *) funcctx->user_fctx; + + if (funcctx->call_cntr < lengthof(sys_fk_relationships)) + { + const SysFKRelationship *fkrel = &sys_fk_relationships[funcctx->call_cntr]; + Datum values[6]; + bool nulls[6]; + HeapTuple tuple; + + memset(nulls, false, sizeof(nulls)); + + values[0] = ObjectIdGetDatum(fkrel->fk_table); + values[1] = FunctionCall3(arrayinp, + CStringGetDatum(fkrel->fk_columns), + ObjectIdGetDatum(TEXTOID), + Int32GetDatum(-1)); + values[2] = ObjectIdGetDatum(fkrel->pk_table); + values[3] = FunctionCall3(arrayinp, + CStringGetDatum(fkrel->pk_columns), + ObjectIdGetDatum(TEXTOID), + Int32GetDatum(-1)); + values[4] = BoolGetDatum(fkrel->is_array); + values[5] = BoolGetDatum(fkrel->is_opt); + + tuple = heap_form_tuple(funcctx->tuple_desc, values, nulls); + + SRF_RETURN_NEXT(funcctx, HeapTupleGetDatum(tuple)); + } + + SRF_RETURN_DONE(funcctx); +} + + /* * Return the type of the argument. */ diff --git a/src/backend/utils/adt/multirangetypes.c b/src/backend/utils/adt/multirangetypes.c index 06316ba6b6570..b3964ea27fdd5 100644 --- a/src/backend/utils/adt/multirangetypes.c +++ b/src/backend/utils/adt/multirangetypes.c @@ -21,7 +21,7 @@ * for a particular range index. Offsets are counted starting from the end of * flags aligned to the bound type. * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * @@ -128,9 +128,9 @@ multirange_in(PG_FUNCTION_ARGS) MultirangeType *ret; MultirangeParseState parse_state; const char *ptr = input_str; - const char *range_str = NULL; + const char *range_str_begin = NULL; int32 range_str_len; - char *range_str_copy; + char *range_str; cache = get_multirange_io_data(fcinfo, mltrngtypoid, IOFunc_input); rangetyp = cache->typcache->rngtype; @@ -170,7 +170,7 @@ multirange_in(PG_FUNCTION_ARGS) case MULTIRANGE_BEFORE_RANGE: if (ch == '[' || ch == '(') { - range_str = ptr; + range_str_begin = ptr; parse_state = MULTIRANGE_IN_RANGE; } else if (ch == '}' && ranges_seen == 0) @@ -191,14 +191,10 @@ multirange_in(PG_FUNCTION_ARGS) errdetail("Expected range start."))); break; case MULTIRANGE_IN_RANGE: - if (ch == '"') - parse_state = MULTIRANGE_IN_RANGE_QUOTED; - else if (ch == '\\') - parse_state = MULTIRANGE_IN_RANGE_ESCAPED; - else if (ch == ']' || ch == ')') + if (ch == ']' || ch == ')') { - range_str_len = ptr - range_str + 1; - range_str_copy = pnstrdup(range_str, range_str_len); + range_str_len = ptr - range_str_begin + 1; + range_str = pnstrdup(range_str_begin, range_str_len); if (range_capacity == range_count) { range_capacity *= 2; @@ -207,7 +203,7 @@ multirange_in(PG_FUNCTION_ARGS) } ranges_seen++; range = DatumGetRangeTypeP(InputFunctionCall(&cache->typioproc, - range_str_copy, + range_str, cache->typioparam, typmod)); if (!RangeIsEmpty(range)) @@ -215,10 +211,22 @@ multirange_in(PG_FUNCTION_ARGS) parse_state = MULTIRANGE_AFTER_RANGE; } else - /* include it in range_str */ ; + { + if (ch == '"') + parse_state = MULTIRANGE_IN_RANGE_QUOTED; + else if (ch == '\\') + parse_state = MULTIRANGE_IN_RANGE_ESCAPED; + /* + * We will include this character into range_str once we + * find the end of the range value. + */ + } break; case MULTIRANGE_IN_RANGE_ESCAPED: - /* include it in range_str */ + /* + * We will include this character into range_str once we find + * the end of the range value. + */ parse_state = MULTIRANGE_IN_RANGE; break; case MULTIRANGE_IN_RANGE_QUOTED: @@ -232,8 +240,11 @@ multirange_in(PG_FUNCTION_ARGS) parse_state = MULTIRANGE_IN_RANGE; else if (ch == '\\') parse_state = MULTIRANGE_IN_RANGE_QUOTED_ESCAPED; - else - /* include it in range_str */ ; + + /* + * We will include this character into range_str once we + * find the end of the range value. + */ break; case MULTIRANGE_AFTER_RANGE: if (ch == ',') @@ -248,7 +259,10 @@ multirange_in(PG_FUNCTION_ARGS) errdetail("Expected comma or end of multirange."))); break; case MULTIRANGE_IN_RANGE_QUOTED_ESCAPED: - /* include it in range_str */ + /* + * We will include this character into range_str once we find + * the end of the range value. + */ parse_state = MULTIRANGE_IN_RANGE_QUOTED; break; default: @@ -768,6 +782,27 @@ multirange_get_bounds(TypeCacheEntry *rangetyp, upper->lower = false; } +/* + * Construct union range from the multirange. + */ +RangeType * +multirange_get_union_range(TypeCacheEntry *rangetyp, + const MultirangeType *mr) +{ + RangeBound lower, + upper, + tmp; + + if (MultirangeIsEmpty(mr)) + return make_empty_range(rangetyp); + + multirange_get_bounds(rangetyp, mr, 0, &lower, &tmp); + multirange_get_bounds(rangetyp, mr, mr->rangeCount - 1, &tmp, &upper); + + return make_range(rangetyp, &lower, &upper, false); +} + + /* * multirange_deserialize: deconstruct a multirange value * @@ -847,7 +882,7 @@ range_bounds_contains(TypeCacheEntry *typcache, * that would count as a mismatch. */ static bool -multirange_bsearch_match(TypeCacheEntry *typcache, MultirangeType *mr, +multirange_bsearch_match(TypeCacheEntry *typcache, const MultirangeType *mr, void *key, multirange_bsearch_comparison cmp_func) { uint32 l, @@ -1552,7 +1587,7 @@ multirange_contains_elem(PG_FUNCTION_ARGS) typcache = multirange_get_typcache(fcinfo, MultirangeTypeGetOid(mr)); - PG_RETURN_BOOL(multirange_contains_elem_internal(typcache, mr, val)); + PG_RETURN_BOOL(multirange_contains_elem_internal(typcache->rngtype, mr, val)); } /* contained by? */ @@ -1565,7 +1600,7 @@ elem_contained_by_multirange(PG_FUNCTION_ARGS) typcache = multirange_get_typcache(fcinfo, MultirangeTypeGetOid(mr)); - PG_RETURN_BOOL(multirange_contains_elem_internal(typcache, mr, val)); + PG_RETURN_BOOL(multirange_contains_elem_internal(typcache->rngtype, mr, val)); } /* @@ -1606,13 +1641,13 @@ multirange_elem_bsearch_comparison(TypeCacheEntry *typcache, * Test whether multirange mr contains a specific element value. */ bool -multirange_contains_elem_internal(TypeCacheEntry *typcache, - MultirangeType *mr, Datum val) +multirange_contains_elem_internal(TypeCacheEntry *rangetyp, + const MultirangeType *mr, Datum val) { if (MultirangeIsEmpty(mr)) return false; - return multirange_bsearch_match(typcache->rngtype, mr, &val, + return multirange_bsearch_match(rangetyp, mr, &val, multirange_elem_bsearch_comparison); } @@ -1628,7 +1663,19 @@ multirange_contains_range(PG_FUNCTION_ARGS) typcache = multirange_get_typcache(fcinfo, MultirangeTypeGetOid(mr)); - PG_RETURN_BOOL(multirange_contains_range_internal(typcache, mr, r)); + PG_RETURN_BOOL(multirange_contains_range_internal(typcache->rngtype, mr, r)); +} + +Datum +range_contains_multirange(PG_FUNCTION_ARGS) +{ + RangeType *r = PG_GETARG_RANGE_P(0); + MultirangeType *mr = PG_GETARG_MULTIRANGE_P(1); + TypeCacheEntry *typcache; + + typcache = multirange_get_typcache(fcinfo, MultirangeTypeGetOid(mr)); + + PG_RETURN_BOOL(range_contains_multirange_internal(typcache->rngtype, r, mr)); } /* contained by? */ @@ -1641,7 +1688,19 @@ range_contained_by_multirange(PG_FUNCTION_ARGS) typcache = multirange_get_typcache(fcinfo, MultirangeTypeGetOid(mr)); - PG_RETURN_BOOL(multirange_contains_range_internal(typcache, mr, r)); + PG_RETURN_BOOL(multirange_contains_range_internal(typcache->rngtype, mr, r)); +} + +Datum +multirange_contained_by_range(PG_FUNCTION_ARGS) +{ + MultirangeType *mr = PG_GETARG_MULTIRANGE_P(0); + RangeType *r = PG_GETARG_RANGE_P(1); + TypeCacheEntry *typcache; + + typcache = multirange_get_typcache(fcinfo, MultirangeTypeGetOid(mr)); + + PG_RETURN_BOOL(range_contains_multirange_internal(typcache->rngtype, r, mr)); } /* @@ -1660,7 +1719,7 @@ multirange_range_contains_bsearch_comparison(TypeCacheEntry *typcache, if (range_cmp_bounds(typcache, keyUpper, lower) < 0) return -1; if (range_cmp_bounds(typcache, keyLower, upper) > 0) - return -1; + return 1; /* * At this point we found overlapping range. But we have to check if it @@ -1676,14 +1735,13 @@ multirange_range_contains_bsearch_comparison(TypeCacheEntry *typcache, * Test whether multirange mr contains a specific range r. */ bool -multirange_contains_range_internal(TypeCacheEntry *typcache, MultirangeType *mr, RangeType *r) +multirange_contains_range_internal(TypeCacheEntry *rangetyp, + const MultirangeType *mr, + const RangeType *r) { - TypeCacheEntry *rangetyp; RangeBound bounds[2]; bool empty; - rangetyp = typcache->rngtype; - /* * Every multirange contains an infinite number of empty ranges, even an * empty one. @@ -1701,14 +1759,49 @@ multirange_contains_range_internal(TypeCacheEntry *typcache, MultirangeType *mr, multirange_range_contains_bsearch_comparison); } +/* + * Test whether range r contains a multirange mr. + */ +bool +range_contains_multirange_internal(TypeCacheEntry *rangetyp, + const RangeType *r, + const MultirangeType *mr) +{ + RangeBound lower1, + upper1, + lower2, + upper2, + tmp; + bool empty; + + /* + * Every range contains an infinite number of empty multiranges, even an + * empty one. + */ + if (MultirangeIsEmpty(mr)) + return true; + + if (RangeIsEmpty(r)) + return false; + + /* Range contains multirange iff it contains its union range. */ + range_deserialize(rangetyp, r, &lower1, &upper1, &empty); + Assert(!empty); + multirange_get_bounds(rangetyp, mr, 0, &lower2, &tmp); + multirange_get_bounds(rangetyp, mr, mr->rangeCount - 1, &tmp, &upper2); + + return range_bounds_contains(rangetyp, &lower1, &upper1, &lower2, &upper2); +} + /* multirange, multirange -> bool functions */ /* equality (internal version) */ bool -multirange_eq_internal(TypeCacheEntry *typcache, MultirangeType *mr1, MultirangeType *mr2) +multirange_eq_internal(TypeCacheEntry *rangetyp, + const MultirangeType *mr1, + const MultirangeType *mr2) { - TypeCacheEntry *rangetyp = typcache->rngtype; int32 range_count_1; int32 range_count_2; int32 i; @@ -1750,14 +1843,16 @@ multirange_eq(PG_FUNCTION_ARGS) typcache = multirange_get_typcache(fcinfo, MultirangeTypeGetOid(mr1)); - PG_RETURN_BOOL(multirange_eq_internal(typcache, mr1, mr2)); + PG_RETURN_BOOL(multirange_eq_internal(typcache->rngtype, mr1, mr2)); } /* inequality (internal version) */ bool -multirange_ne_internal(TypeCacheEntry *typcache, MultirangeType *mr1, MultirangeType *mr2) +multirange_ne_internal(TypeCacheEntry *rangetyp, + const MultirangeType *mr1, + const MultirangeType *mr2) { - return (!multirange_eq_internal(typcache, mr1, mr2)); + return (!multirange_eq_internal(rangetyp, mr1, mr2)); } /* inequality */ @@ -1770,7 +1865,7 @@ multirange_ne(PG_FUNCTION_ARGS) typcache = multirange_get_typcache(fcinfo, MultirangeTypeGetOid(mr1)); - PG_RETURN_BOOL(multirange_ne_internal(typcache, mr1, mr2)); + PG_RETURN_BOOL(multirange_ne_internal(typcache->rngtype, mr1, mr2)); } /* overlaps? */ @@ -1783,7 +1878,7 @@ range_overlaps_multirange(PG_FUNCTION_ARGS) typcache = multirange_get_typcache(fcinfo, MultirangeTypeGetOid(mr)); - PG_RETURN_BOOL(range_overlaps_multirange_internal(typcache, r, mr)); + PG_RETURN_BOOL(range_overlaps_multirange_internal(typcache->rngtype, r, mr)); } Datum @@ -1795,7 +1890,7 @@ multirange_overlaps_range(PG_FUNCTION_ARGS) typcache = multirange_get_typcache(fcinfo, MultirangeTypeGetOid(mr)); - PG_RETURN_BOOL(range_overlaps_multirange_internal(typcache, r, mr)); + PG_RETURN_BOOL(range_overlaps_multirange_internal(typcache->rngtype, r, mr)); } Datum @@ -1807,7 +1902,7 @@ multirange_overlaps_multirange(PG_FUNCTION_ARGS) typcache = multirange_get_typcache(fcinfo, MultirangeTypeGetOid(mr1)); - PG_RETURN_BOOL(multirange_overlaps_multirange_internal(typcache, mr1, mr2)); + PG_RETURN_BOOL(multirange_overlaps_multirange_internal(typcache->rngtype, mr1, mr2)); } /* @@ -1825,21 +1920,20 @@ multirange_range_overlaps_bsearch_comparison(TypeCacheEntry *typcache, if (range_cmp_bounds(typcache, keyUpper, lower) < 0) return -1; if (range_cmp_bounds(typcache, keyLower, upper) > 0) - return -1; + return 1; *match = true; return 0; } bool -range_overlaps_multirange_internal(TypeCacheEntry *typcache, RangeType *r, MultirangeType *mr) +range_overlaps_multirange_internal(TypeCacheEntry *rangetyp, + const RangeType *r, + const MultirangeType *mr) { - TypeCacheEntry *rangetyp; RangeBound bounds[2]; bool empty; - rangetyp = typcache->rngtype; - /* * Empties never overlap, even with empties. (This seems strange since * they *do* contain each other, but we want to follow how ranges work.) @@ -1855,10 +1949,10 @@ range_overlaps_multirange_internal(TypeCacheEntry *typcache, RangeType *r, Multi } bool -multirange_overlaps_multirange_internal(TypeCacheEntry *typcache, MultirangeType *mr1, - MultirangeType *mr2) +multirange_overlaps_multirange_internal(TypeCacheEntry *rangetyp, + const MultirangeType *mr1, + const MultirangeType *mr2) { - TypeCacheEntry *rangetyp; int32 range_count1; int32 range_count2; int32 i1; @@ -1875,8 +1969,6 @@ multirange_overlaps_multirange_internal(TypeCacheEntry *typcache, MultirangeType if (MultirangeIsEmpty(mr1) || MultirangeIsEmpty(mr2)) return false; - rangetyp = typcache->rngtype; - range_count1 = mr1->rangeCount; range_count2 = mr2->rangeCount; @@ -1914,12 +2006,11 @@ multirange_overlaps_multirange_internal(TypeCacheEntry *typcache, MultirangeType } /* does not extend to right of? */ -Datum -range_overleft_multirange(PG_FUNCTION_ARGS) +bool +range_overleft_multirange_internal(TypeCacheEntry *rangetyp, + const RangeType *r, + const MultirangeType *mr) { - RangeType *r = PG_GETARG_RANGE_P(0); - MultirangeType *mr = PG_GETARG_MULTIRANGE_P(1); - TypeCacheEntry *typcache; RangeBound lower1, upper1, lower2, @@ -1929,14 +2020,25 @@ range_overleft_multirange(PG_FUNCTION_ARGS) if (RangeIsEmpty(r) || MultirangeIsEmpty(mr)) PG_RETURN_BOOL(false); - typcache = multirange_get_typcache(fcinfo, MultirangeTypeGetOid(mr)); - range_deserialize(typcache->rngtype, r, &lower1, &upper1, &empty); + range_deserialize(rangetyp, r, &lower1, &upper1, &empty); Assert(!empty); - multirange_get_bounds(typcache->rngtype, mr, mr->rangeCount - 1, + multirange_get_bounds(rangetyp, mr, mr->rangeCount - 1, &lower2, &upper2); - PG_RETURN_BOOL(range_cmp_bounds(typcache->rngtype, &upper1, &upper2) <= 0); + PG_RETURN_BOOL(range_cmp_bounds(rangetyp, &upper1, &upper2) <= 0); +} + +Datum +range_overleft_multirange(PG_FUNCTION_ARGS) +{ + RangeType *r = PG_GETARG_RANGE_P(0); + MultirangeType *mr = PG_GETARG_MULTIRANGE_P(1); + TypeCacheEntry *typcache; + + typcache = multirange_get_typcache(fcinfo, MultirangeTypeGetOid(mr)); + + PG_RETURN_BOOL(range_overleft_multirange_internal(typcache->rngtype, r, mr)); } Datum @@ -1989,12 +2091,11 @@ multirange_overleft_multirange(PG_FUNCTION_ARGS) } /* does not extend to left of? */ -Datum -range_overright_multirange(PG_FUNCTION_ARGS) +bool +range_overright_multirange_internal(TypeCacheEntry *rangetyp, + const RangeType *r, + const MultirangeType *mr) { - RangeType *r = PG_GETARG_RANGE_P(0); - MultirangeType *mr = PG_GETARG_MULTIRANGE_P(1); - TypeCacheEntry *typcache; RangeBound lower1, upper1, lower2, @@ -2004,13 +2105,23 @@ range_overright_multirange(PG_FUNCTION_ARGS) if (RangeIsEmpty(r) || MultirangeIsEmpty(mr)) PG_RETURN_BOOL(false); - typcache = multirange_get_typcache(fcinfo, MultirangeTypeGetOid(mr)); - - range_deserialize(typcache->rngtype, r, &lower1, &upper1, &empty); + range_deserialize(rangetyp, r, &lower1, &upper1, &empty); Assert(!empty); - multirange_get_bounds(typcache->rngtype, mr, 0, &lower2, &upper2); + multirange_get_bounds(rangetyp, mr, 0, &lower2, &upper2); - PG_RETURN_BOOL(range_cmp_bounds(typcache->rngtype, &lower1, &lower2) >= 0); + return (range_cmp_bounds(rangetyp, &lower1, &lower2) >= 0); +} + +Datum +range_overright_multirange(PG_FUNCTION_ARGS) +{ + RangeType *r = PG_GETARG_RANGE_P(0); + MultirangeType *mr = PG_GETARG_MULTIRANGE_P(1); + TypeCacheEntry *typcache; + + typcache = multirange_get_typcache(fcinfo, MultirangeTypeGetOid(mr)); + + PG_RETURN_BOOL(range_overright_multirange_internal(typcache->rngtype, r, mr)); } Datum @@ -2069,7 +2180,7 @@ multirange_contains_multirange(PG_FUNCTION_ARGS) typcache = multirange_get_typcache(fcinfo, MultirangeTypeGetOid(mr1)); - PG_RETURN_BOOL(multirange_contains_multirange_internal(typcache, mr1, mr2)); + PG_RETURN_BOOL(multirange_contains_multirange_internal(typcache->rngtype, mr1, mr2)); } /* contained by? */ @@ -2082,17 +2193,17 @@ multirange_contained_by_multirange(PG_FUNCTION_ARGS) typcache = multirange_get_typcache(fcinfo, MultirangeTypeGetOid(mr1)); - PG_RETURN_BOOL(multirange_contains_multirange_internal(typcache, mr2, mr1)); + PG_RETURN_BOOL(multirange_contains_multirange_internal(typcache->rngtype, mr2, mr1)); } /* * Test whether multirange mr1 contains every range from another multirange mr2. */ bool -multirange_contains_multirange_internal(TypeCacheEntry *typcache, - MultirangeType *mr1, MultirangeType *mr2) +multirange_contains_multirange_internal(TypeCacheEntry *rangetyp, + const MultirangeType *mr1, + const MultirangeType *mr2) { - TypeCacheEntry *rangetyp; int32 range_count1 = mr1->rangeCount; int32 range_count2 = mr2->rangeCount; int i1, @@ -2102,8 +2213,6 @@ multirange_contains_multirange_internal(TypeCacheEntry *typcache, lower2, upper2; - rangetyp = typcache->rngtype; - /* * We follow the same logic for empties as ranges: - an empty multirange * contains an empty range/multirange. - an empty multirange can't contain @@ -2161,7 +2270,7 @@ range_before_multirange(PG_FUNCTION_ARGS) typcache = multirange_get_typcache(fcinfo, MultirangeTypeGetOid(mr)); - PG_RETURN_BOOL(range_before_multirange_internal(typcache, r, mr)); + PG_RETURN_BOOL(range_before_multirange_internal(typcache->rngtype, r, mr)); } Datum @@ -2173,7 +2282,7 @@ multirange_before_range(PG_FUNCTION_ARGS) typcache = multirange_get_typcache(fcinfo, MultirangeTypeGetOid(mr)); - PG_RETURN_BOOL(range_after_multirange_internal(typcache, r, mr)); + PG_RETURN_BOOL(range_after_multirange_internal(typcache->rngtype, r, mr)); } Datum @@ -2185,7 +2294,7 @@ multirange_before_multirange(PG_FUNCTION_ARGS) typcache = multirange_get_typcache(fcinfo, MultirangeTypeGetOid(mr1)); - PG_RETURN_BOOL(multirange_before_multirange_internal(typcache, mr1, mr2)); + PG_RETURN_BOOL(multirange_before_multirange_internal(typcache->rngtype, mr1, mr2)); } /* strictly right of? */ @@ -2198,7 +2307,7 @@ range_after_multirange(PG_FUNCTION_ARGS) typcache = multirange_get_typcache(fcinfo, MultirangeTypeGetOid(mr)); - PG_RETURN_BOOL(range_after_multirange_internal(typcache, r, mr)); + PG_RETURN_BOOL(range_after_multirange_internal(typcache->rngtype, r, mr)); } Datum @@ -2210,7 +2319,7 @@ multirange_after_range(PG_FUNCTION_ARGS) typcache = multirange_get_typcache(fcinfo, MultirangeTypeGetOid(mr)); - PG_RETURN_BOOL(range_before_multirange_internal(typcache, r, mr)); + PG_RETURN_BOOL(range_before_multirange_internal(typcache->rngtype, r, mr)); } Datum @@ -2222,13 +2331,14 @@ multirange_after_multirange(PG_FUNCTION_ARGS) typcache = multirange_get_typcache(fcinfo, MultirangeTypeGetOid(mr1)); - PG_RETURN_BOOL(multirange_before_multirange_internal(typcache, mr2, mr1)); + PG_RETURN_BOOL(multirange_before_multirange_internal(typcache->rngtype, mr2, mr1)); } /* strictly left of? (internal version) */ bool -range_before_multirange_internal(TypeCacheEntry *typcache, RangeType *r, - MultirangeType *mr) +range_before_multirange_internal(TypeCacheEntry *rangetyp, + const RangeType *r, + const MultirangeType *mr) { RangeBound lower1, upper1, @@ -2239,19 +2349,18 @@ range_before_multirange_internal(TypeCacheEntry *typcache, RangeType *r, if (RangeIsEmpty(r) || MultirangeIsEmpty(mr)) return false; - range_deserialize(typcache->rngtype, r, &lower1, &upper1, &empty); + range_deserialize(rangetyp, r, &lower1, &upper1, &empty); Assert(!empty); - multirange_get_bounds(typcache->rngtype, mr, 0, - &lower2, &upper2); + multirange_get_bounds(rangetyp, mr, 0, &lower2, &upper2); - return (range_cmp_bounds(typcache->rngtype, &upper1, &lower2) < 0); + return (range_cmp_bounds(rangetyp, &upper1, &lower2) < 0); } bool -multirange_before_multirange_internal(TypeCacheEntry *typcache, - MultirangeType *mr1, - MultirangeType *mr2) +multirange_before_multirange_internal(TypeCacheEntry *rangetyp, + const MultirangeType *mr1, + const MultirangeType *mr2) { RangeBound lower1, upper1, @@ -2261,18 +2370,19 @@ multirange_before_multirange_internal(TypeCacheEntry *typcache, if (MultirangeIsEmpty(mr1) || MultirangeIsEmpty(mr2)) return false; - multirange_get_bounds(typcache->rngtype, mr1, mr1->rangeCount - 1, + multirange_get_bounds(rangetyp, mr1, mr1->rangeCount - 1, &lower1, &upper1); - multirange_get_bounds(typcache->rngtype, mr2, 0, + multirange_get_bounds(rangetyp, mr2, 0, &lower2, &upper2); - return (range_cmp_bounds(typcache->rngtype, &upper1, &lower2) < 0); + return (range_cmp_bounds(rangetyp, &upper1, &lower2) < 0); } /* strictly right of? (internal version) */ bool -range_after_multirange_internal(TypeCacheEntry *typcache, RangeType *r, - MultirangeType *mr) +range_after_multirange_internal(TypeCacheEntry *rangetyp, + const RangeType *r, + const MultirangeType *mr) { RangeBound lower1, upper1, @@ -2284,19 +2394,20 @@ range_after_multirange_internal(TypeCacheEntry *typcache, RangeType *r, if (RangeIsEmpty(r) || MultirangeIsEmpty(mr)) return false; - range_deserialize(typcache->rngtype, r, &lower1, &upper1, &empty); + range_deserialize(rangetyp, r, &lower1, &upper1, &empty); Assert(!empty); range_count = mr->rangeCount; - multirange_get_bounds(typcache->rngtype, mr, range_count - 1, + multirange_get_bounds(rangetyp, mr, range_count - 1, &lower2, &upper2); - return (range_cmp_bounds(typcache->rngtype, &lower1, &upper2) > 0); + return (range_cmp_bounds(rangetyp, &lower1, &upper2) > 0); } bool -range_adjacent_multirange_internal(TypeCacheEntry *typcache, RangeType *r, - MultirangeType *mr) +range_adjacent_multirange_internal(TypeCacheEntry *rangetyp, + const RangeType *r, + const MultirangeType *mr) { RangeBound lower1, upper1, @@ -2308,21 +2419,21 @@ range_adjacent_multirange_internal(TypeCacheEntry *typcache, RangeType *r, if (RangeIsEmpty(r) || MultirangeIsEmpty(mr)) return false; - range_deserialize(typcache->rngtype, r, &lower1, &upper1, &empty); + range_deserialize(rangetyp, r, &lower1, &upper1, &empty); Assert(!empty); range_count = mr->rangeCount; - multirange_get_bounds(typcache->rngtype, mr, 0, + multirange_get_bounds(rangetyp, mr, 0, &lower2, &upper2); - if (bounds_adjacent(typcache->rngtype, upper1, lower2)) + if (bounds_adjacent(rangetyp, upper1, lower2)) return true; if (range_count > 1) - multirange_get_bounds(typcache->rngtype, mr, range_count - 1, + multirange_get_bounds(rangetyp, mr, range_count - 1, &lower2, &upper2); - if (bounds_adjacent(typcache->rngtype, upper2, lower1)) + if (bounds_adjacent(rangetyp, upper2, lower1)) return true; return false; @@ -2338,7 +2449,7 @@ range_adjacent_multirange(PG_FUNCTION_ARGS) typcache = multirange_get_typcache(fcinfo, MultirangeTypeGetOid(mr)); - PG_RETURN_BOOL(range_adjacent_multirange_internal(typcache, r, mr)); + PG_RETURN_BOOL(range_adjacent_multirange_internal(typcache->rngtype, r, mr)); } Datum @@ -2353,7 +2464,7 @@ multirange_adjacent_range(PG_FUNCTION_ARGS) typcache = multirange_get_typcache(fcinfo, MultirangeTypeGetOid(mr)); - PG_RETURN_BOOL(range_adjacent_multirange_internal(typcache, r, mr)); + PG_RETURN_BOOL(range_adjacent_multirange_internal(typcache->rngtype, r, mr)); } Datum diff --git a/src/backend/utils/adt/multirangetypes_selfuncs.c b/src/backend/utils/adt/multirangetypes_selfuncs.c index 7259af0b85305..551176bc21377 100644 --- a/src/backend/utils/adt/multirangetypes_selfuncs.c +++ b/src/backend/utils/adt/multirangetypes_selfuncs.c @@ -6,7 +6,7 @@ * Estimates are based on histograms of lower and upper bounds, and the * fraction of empty multiranges. * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * @@ -86,6 +86,8 @@ default_multirange_selectivity(Oid operator) case OID_RANGE_OVERLAPS_MULTIRANGE_OP: return 0.01; + case OID_RANGE_CONTAINS_MULTIRANGE_OP: + case OID_RANGE_MULTIRANGE_CONTAINED_OP: case OID_MULTIRANGE_CONTAINS_RANGE_OP: case OID_MULTIRANGE_CONTAINS_MULTIRANGE_OP: case OID_MULTIRANGE_RANGE_CONTAINED_OP: @@ -224,7 +226,8 @@ multirangesel(PG_FUNCTION_ARGS) 1, &constrange); } } - else if (operator == OID_MULTIRANGE_CONTAINS_RANGE_OP || + else if (operator == OID_RANGE_MULTIRANGE_CONTAINED_OP || + operator == OID_MULTIRANGE_CONTAINS_RANGE_OP || operator == OID_MULTIRANGE_OVERLAPS_RANGE_OP || operator == OID_MULTIRANGE_OVERLAPS_LEFT_RANGE_OP || operator == OID_MULTIRANGE_OVERLAPS_RIGHT_RANGE_OP || @@ -248,6 +251,7 @@ multirangesel(PG_FUNCTION_ARGS) operator == OID_RANGE_OVERLAPS_RIGHT_MULTIRANGE_OP || operator == OID_RANGE_LEFT_MULTIRANGE_OP || operator == OID_RANGE_RIGHT_MULTIRANGE_OP || + operator == OID_RANGE_CONTAINS_MULTIRANGE_OP || operator == OID_MULTIRANGE_ELEM_CONTAINED_OP || operator == OID_MULTIRANGE_RANGE_CONTAINED_OP) { @@ -645,6 +649,7 @@ calc_hist_selectivity(TypeCacheEntry *typcache, VariableStatData *vardata, case OID_MULTIRANGE_RANGE_CONTAINED_OP: case OID_MULTIRANGE_MULTIRANGE_CONTAINED_OP: + case OID_RANGE_MULTIRANGE_CONTAINED_OP: if (const_lower.infinite) { /* diff --git a/src/backend/utils/adt/name.c b/src/backend/utils/adt/name.c index a3ce3f3d1e182..c93be3350ea72 100644 --- a/src/backend/utils/adt/name.c +++ b/src/backend/utils/adt/name.c @@ -9,7 +9,7 @@ * always use NAMEDATALEN as the symbolic constant! - jolly 8/21/95 * * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * diff --git a/src/backend/utils/adt/network_gist.c b/src/backend/utils/adt/network_gist.c index 9813a1d2b8cae..54e8edcdbd072 100644 --- a/src/backend/utils/adt/network_gist.c +++ b/src/backend/utils/adt/network_gist.c @@ -34,7 +34,7 @@ * twice as fast as for a simpler design in which a single field doubles as * the common prefix length and the minimum ip_bits value. * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * diff --git a/src/backend/utils/adt/network_selfuncs.c b/src/backend/utils/adt/network_selfuncs.c index 955e0ee87f803..dca2c6321236c 100644 --- a/src/backend/utils/adt/network_selfuncs.c +++ b/src/backend/utils/adt/network_selfuncs.c @@ -7,7 +7,7 @@ * operators. Estimates are based on null fraction, most common values, * and histogram of inet/cidr columns. * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * diff --git a/src/backend/utils/adt/network_spgist.c b/src/backend/utils/adt/network_spgist.c index 4a0b0073c7382..e496a470d0ab3 100644 --- a/src/backend/utils/adt/network_spgist.c +++ b/src/backend/utils/adt/network_spgist.c @@ -21,7 +21,7 @@ * the address family, everything goes into node 0 (which will probably * lead to creating an allTheSame tuple). * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION diff --git a/src/backend/utils/adt/numeric.c b/src/backend/utils/adt/numeric.c index 20c9cac2fa2e0..682200f636b9b 100644 --- a/src/backend/utils/adt/numeric.c +++ b/src/backend/utils/adt/numeric.c @@ -11,7 +11,7 @@ * Transactions on Mathematical Software, Vol. 24, No. 4, December 1998, * pages 359-367. * - * Copyright (c) 1998-2020, PostgreSQL Global Development Group + * Copyright (c) 1998-2021, PostgreSQL Global Development Group * * IDENTIFICATION * src/backend/utils/adt/numeric.c @@ -10290,7 +10290,7 @@ power_var_int(const NumericVar *base, int exp, NumericVar *result, int rscale) * to around log10(abs(exp)) digits, so work with this many extra digits * of precision (plus a few more for good measure). */ - sig_digits += (int) log(Abs(exp)) + 8; + sig_digits += (int) log(fabs((double) exp)) + 8; /* * Now we can proceed with the multiplications. diff --git a/src/backend/utils/adt/numutils.c b/src/backend/utils/adt/numutils.c index 412ae361d2c0f..b93096f288f38 100644 --- a/src/backend/utils/adt/numutils.c +++ b/src/backend/utils/adt/numutils.c @@ -3,7 +3,7 @@ * numutils.c * utility functions for I/O of built-in numeric types. * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * diff --git a/src/backend/utils/adt/oid.c b/src/backend/utils/adt/oid.c index 4ac691966247f..fd94e0c881829 100644 --- a/src/backend/utils/adt/oid.c +++ b/src/backend/utils/adt/oid.c @@ -3,7 +3,7 @@ * oid.c * Functions for the built-in type Oid ... also oidvector. * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * diff --git a/src/backend/utils/adt/oracle_compat.c b/src/backend/utils/adt/oracle_compat.c index 76e666474e847..f737aa6fbde73 100644 --- a/src/backend/utils/adt/oracle_compat.c +++ b/src/backend/utils/adt/oracle_compat.c @@ -2,7 +2,7 @@ * oracle_compat.c * Oracle compatible functions. * - * Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Copyright (c) 1996-2021, PostgreSQL Global Development Group * * Author: Edmund Mergl * Multibyte enhancement: Tatsuo Ishii @@ -24,6 +24,8 @@ static text *dotrim(const char *string, int stringlen, const char *set, int setlen, bool doltrim, bool dortrim); +static bytea *dobyteatrim(bytea *string, bytea *set, + bool doltrim, bool dortrim); /******************************************************************** @@ -521,27 +523,12 @@ dotrim(const char *string, int stringlen, return cstring_to_text_with_len(string, stringlen); } -/******************************************************************** - * - * byteatrim - * - * Syntax: - * - * bytea byteatrim(bytea string, bytea set) - * - * Purpose: - * - * Returns string with characters removed from the front and back - * up to the first character not in set. - * - * Cloned from btrim and modified as required. - ********************************************************************/ - -Datum -byteatrim(PG_FUNCTION_ARGS) +/* + * Common implementation for bytea versions of btrim, ltrim, rtrim + */ +bytea * +dobyteatrim(bytea *string, bytea *set, bool doltrim, bool dortrim) { - bytea *string = PG_GETARG_BYTEA_PP(0); - bytea *set = PG_GETARG_BYTEA_PP(1); bytea *ret; char *ptr, *end, @@ -556,7 +543,7 @@ byteatrim(PG_FUNCTION_ARGS) setlen = VARSIZE_ANY_EXHDR(set); if (stringlen <= 0 || setlen <= 0) - PG_RETURN_BYTEA_P(string); + return string; m = stringlen; ptr = VARDATA_ANY(string); @@ -564,39 +551,126 @@ byteatrim(PG_FUNCTION_ARGS) ptr2start = VARDATA_ANY(set); end2 = ptr2start + setlen - 1; - while (m > 0) + if (doltrim) { - ptr2 = ptr2start; - while (ptr2 <= end2) + while (m > 0) { - if (*ptr == *ptr2) + ptr2 = ptr2start; + while (ptr2 <= end2) + { + if (*ptr == *ptr2) + break; + ++ptr2; + } + if (ptr2 > end2) break; - ++ptr2; + ptr++; + m--; } - if (ptr2 > end2) - break; - ptr++; - m--; } - while (m > 0) + if (dortrim) { - ptr2 = ptr2start; - while (ptr2 <= end2) + while (m > 0) { - if (*end == *ptr2) + ptr2 = ptr2start; + while (ptr2 <= end2) + { + if (*end == *ptr2) + break; + ++ptr2; + } + if (ptr2 > end2) break; - ++ptr2; + end--; + m--; } - if (ptr2 > end2) - break; - end--; - m--; } ret = (bytea *) palloc(VARHDRSZ + m); SET_VARSIZE(ret, VARHDRSZ + m); memcpy(VARDATA(ret), ptr, m); + return ret; +} + +/******************************************************************** + * + * byteatrim + * + * Syntax: + * + * bytea byteatrim(bytea string, bytea set) + * + * Purpose: + * + * Returns string with characters removed from the front and back + * up to the first character not in set. + * + * Cloned from btrim and modified as required. + ********************************************************************/ + +Datum +byteatrim(PG_FUNCTION_ARGS) +{ + bytea *string = PG_GETARG_BYTEA_PP(0); + bytea *set = PG_GETARG_BYTEA_PP(1); + bytea *ret; + + ret = dobyteatrim(string, set, true, true); + + PG_RETURN_BYTEA_P(ret); +} + +/******************************************************************** + * + * bytealtrim + * + * Syntax: + * + * bytea bytealtrim(bytea string, bytea set) + * + * Purpose: + * + * Returns string with initial characters removed up to the first + * character not in set. + * + ********************************************************************/ + +Datum +bytealtrim(PG_FUNCTION_ARGS) +{ + bytea *string = PG_GETARG_BYTEA_PP(0); + bytea *set = PG_GETARG_BYTEA_PP(1); + bytea *ret; + + ret = dobyteatrim(string, set, true, false); + + PG_RETURN_BYTEA_P(ret); +} + +/******************************************************************** + * + * byteartrim + * + * Syntax: + * + * bytea byteartrim(bytea string, bytea set) + * + * Purpose: + * + * Returns string with final characters removed after the last + * character not in set. + * + ********************************************************************/ + +Datum +byteartrim(PG_FUNCTION_ARGS) +{ + bytea *string = PG_GETARG_BYTEA_PP(0); + bytea *set = PG_GETARG_BYTEA_PP(1); + bytea *ret; + + ret = dobyteatrim(string, set, false, true); PG_RETURN_BYTEA_P(ret); } diff --git a/src/backend/utils/adt/orderedsetaggs.c b/src/backend/utils/adt/orderedsetaggs.c index f9b5d7024b5be..89f1c3b3a0d36 100644 --- a/src/backend/utils/adt/orderedsetaggs.c +++ b/src/backend/utils/adt/orderedsetaggs.c @@ -3,7 +3,7 @@ * orderedsetaggs.c * Ordered-set aggregate functions. * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * diff --git a/src/backend/utils/adt/partitionfuncs.c b/src/backend/utils/adt/partitionfuncs.c index c1120403fd989..03660d5db6c76 100644 --- a/src/backend/utils/adt/partitionfuncs.c +++ b/src/backend/utils/adt/partitionfuncs.c @@ -3,7 +3,7 @@ * partitionfuncs.c * Functions for accessing partition-related metadata * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * diff --git a/src/backend/utils/adt/pg_locale.c b/src/backend/utils/adt/pg_locale.c index c39d67645c693..e9c1231f9baea 100644 --- a/src/backend/utils/adt/pg_locale.c +++ b/src/backend/utils/adt/pg_locale.c @@ -2,7 +2,7 @@ * * PostgreSQL locale utilities * - * Portions Copyright (c) 2002-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 2002-2021, PostgreSQL Global Development Group * * src/backend/utils/adt/pg_locale.c * @@ -105,20 +105,6 @@ char *localized_full_months[12 + 1]; static bool CurrentLocaleConvValid = false; static bool CurrentLCTimeValid = false; -/* Environment variable storage area */ - -#define LC_ENV_BUFSIZE (NAMEDATALEN + 20) - -static char lc_collate_envbuf[LC_ENV_BUFSIZE]; -static char lc_ctype_envbuf[LC_ENV_BUFSIZE]; - -#ifdef LC_MESSAGES -static char lc_messages_envbuf[LC_ENV_BUFSIZE]; -#endif -static char lc_monetary_envbuf[LC_ENV_BUFSIZE]; -static char lc_numeric_envbuf[LC_ENV_BUFSIZE]; -static char lc_time_envbuf[LC_ENV_BUFSIZE]; - /* Cache for collation-related knowledge */ typedef struct @@ -163,7 +149,6 @@ pg_perm_setlocale(int category, const char *locale) { char *result; const char *envvar; - char *envbuf; #ifndef WIN32 result = setlocale(category, locale); @@ -199,7 +184,7 @@ pg_perm_setlocale(int category, const char *locale) */ if (category == LC_CTYPE) { - static char save_lc_ctype[LC_ENV_BUFSIZE]; + static char save_lc_ctype[NAMEDATALEN + 20]; /* copy setlocale() return value before callee invokes it again */ strlcpy(save_lc_ctype, result, sizeof(save_lc_ctype)); @@ -216,16 +201,13 @@ pg_perm_setlocale(int category, const char *locale) { case LC_COLLATE: envvar = "LC_COLLATE"; - envbuf = lc_collate_envbuf; break; case LC_CTYPE: envvar = "LC_CTYPE"; - envbuf = lc_ctype_envbuf; break; #ifdef LC_MESSAGES case LC_MESSAGES: envvar = "LC_MESSAGES"; - envbuf = lc_messages_envbuf; #ifdef WIN32 result = IsoLocaleName(locale); if (result == NULL) @@ -236,26 +218,19 @@ pg_perm_setlocale(int category, const char *locale) #endif /* LC_MESSAGES */ case LC_MONETARY: envvar = "LC_MONETARY"; - envbuf = lc_monetary_envbuf; break; case LC_NUMERIC: envvar = "LC_NUMERIC"; - envbuf = lc_numeric_envbuf; break; case LC_TIME: envvar = "LC_TIME"; - envbuf = lc_time_envbuf; break; default: elog(FATAL, "unrecognized LC category: %d", category); - envvar = NULL; /* keep compiler quiet */ - envbuf = NULL; - return NULL; + return NULL; /* keep compiler quiet */ } - snprintf(envbuf, LC_ENV_BUFSIZE - 1, "%s=%s", envvar, result); - - if (putenv(envbuf)) + if (setenv(envvar, result, 1) != 0) return NULL; return result; diff --git a/src/backend/utils/adt/pg_lsn.c b/src/backend/utils/adt/pg_lsn.c index ad0a7bd869d1e..12ad0c4c31e82 100644 --- a/src/backend/utils/adt/pg_lsn.c +++ b/src/backend/utils/adt/pg_lsn.c @@ -3,7 +3,7 @@ * pg_lsn.c * Operations for the pg_lsn datatype. * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION diff --git a/src/backend/utils/adt/pg_upgrade_support.c b/src/backend/utils/adt/pg_upgrade_support.c index 521ebaaab6d4e..a575c950790ad 100644 --- a/src/backend/utils/adt/pg_upgrade_support.c +++ b/src/backend/utils/adt/pg_upgrade_support.c @@ -5,7 +5,7 @@ * to control oid and relfilenode assignment, and do other special * hacks needed for pg_upgrade. * - * Copyright (c) 2010-2020, PostgreSQL Global Development Group + * Copyright (c) 2010-2021, PostgreSQL Global Development Group * src/backend/utils/adt/pg_upgrade_support.c */ diff --git a/src/backend/utils/adt/pgstatfuncs.c b/src/backend/utils/adt/pgstatfuncs.c index 6afe1b6f56ebc..62bff52638d11 100644 --- a/src/backend/utils/adt/pgstatfuncs.c +++ b/src/backend/utils/adt/pgstatfuncs.c @@ -3,7 +3,7 @@ * pgstatfuncs.c * Functions for accessing the statistics collector data * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * @@ -494,6 +494,8 @@ pg_stat_get_progress_info(PG_FUNCTION_ARGS) cmdtype = PROGRESS_COMMAND_CREATE_INDEX; else if (pg_strcasecmp(cmd, "BASEBACKUP") == 0) cmdtype = PROGRESS_COMMAND_BASEBACKUP; + else if (pg_strcasecmp(cmd, "COPY") == 0) + cmdtype = PROGRESS_COMMAND_COPY; else ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), @@ -1629,6 +1631,100 @@ pg_stat_get_db_blk_write_time(PG_FUNCTION_ARGS) PG_RETURN_FLOAT8(result); } +Datum +pg_stat_get_db_session_time(PG_FUNCTION_ARGS) +{ + Oid dbid = PG_GETARG_OID(0); + double result = 0.0; + PgStat_StatDBEntry *dbentry; + + /* convert counter from microsec to millisec for display */ + if ((dbentry = pgstat_fetch_stat_dbentry(dbid)) != NULL) + result = ((double) dbentry->total_session_time) / 1000.0; + + PG_RETURN_FLOAT8(result); +} + +Datum +pg_stat_get_db_active_time(PG_FUNCTION_ARGS) +{ + Oid dbid = PG_GETARG_OID(0); + double result = 0.0; + PgStat_StatDBEntry *dbentry; + + /* convert counter from microsec to millisec for display */ + if ((dbentry = pgstat_fetch_stat_dbentry(dbid)) != NULL) + result = ((double) dbentry->total_active_time) / 1000.0; + + PG_RETURN_FLOAT8(result); +} + +Datum +pg_stat_get_db_idle_in_transaction_time(PG_FUNCTION_ARGS) +{ + Oid dbid = PG_GETARG_OID(0); + double result = 0.0; + PgStat_StatDBEntry *dbentry; + + /* convert counter from microsec to millisec for display */ + if ((dbentry = pgstat_fetch_stat_dbentry(dbid)) != NULL) + result = ((double) dbentry->total_idle_in_xact_time) / 1000.0; + + PG_RETURN_FLOAT8(result); +} + +Datum +pg_stat_get_db_sessions(PG_FUNCTION_ARGS) +{ + Oid dbid = PG_GETARG_OID(0); + int64 result = 0; + PgStat_StatDBEntry *dbentry; + + if ((dbentry = pgstat_fetch_stat_dbentry(dbid)) != NULL) + result = (int64) (dbentry->n_sessions); + + PG_RETURN_INT64(result); +} + +Datum +pg_stat_get_db_sessions_abandoned(PG_FUNCTION_ARGS) +{ + Oid dbid = PG_GETARG_OID(0); + int64 result = 0; + PgStat_StatDBEntry *dbentry; + + if ((dbentry = pgstat_fetch_stat_dbentry(dbid)) != NULL) + result = (int64) (dbentry->n_sessions_abandoned); + + PG_RETURN_INT64(result); +} + +Datum +pg_stat_get_db_sessions_fatal(PG_FUNCTION_ARGS) +{ + Oid dbid = PG_GETARG_OID(0); + int64 result = 0; + PgStat_StatDBEntry *dbentry; + + if ((dbentry = pgstat_fetch_stat_dbentry(dbid)) != NULL) + result = (int64) (dbentry->n_sessions_fatal); + + PG_RETURN_INT64(result); +} + +Datum +pg_stat_get_db_sessions_killed(PG_FUNCTION_ARGS) +{ + Oid dbid = PG_GETARG_OID(0); + int64 result = 0; + PgStat_StatDBEntry *dbentry; + + if ((dbentry = pgstat_fetch_stat_dbentry(dbid)) != NULL) + result = (int64) (dbentry->n_sessions_killed); + + PG_RETURN_INT64(result); +} + Datum pg_stat_get_bgwriter_timed_checkpoints(PG_FUNCTION_ARGS) { diff --git a/src/backend/utils/adt/pseudotypes.c b/src/backend/utils/adt/pseudotypes.c index 99a93271fe1c0..c2f910d606770 100644 --- a/src/backend/utils/adt/pseudotypes.c +++ b/src/backend/utils/adt/pseudotypes.c @@ -11,7 +11,7 @@ * we do better?) * * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * diff --git a/src/backend/utils/adt/quote.c b/src/backend/utils/adt/quote.c index 906bf329b8def..8de4eace9eda2 100644 --- a/src/backend/utils/adt/quote.c +++ b/src/backend/utils/adt/quote.c @@ -3,7 +3,7 @@ * quote.c * Functions for quoting identifiers and literals * - * Portions Copyright (c) 2000-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 2000-2021, PostgreSQL Global Development Group * * * IDENTIFICATION diff --git a/src/backend/utils/adt/rangetypes.c b/src/backend/utils/adt/rangetypes.c index 8957cc1984227..815175a654e31 100644 --- a/src/backend/utils/adt/rangetypes.c +++ b/src/backend/utils/adt/rangetypes.c @@ -19,7 +19,7 @@ * value; we must detoast it first. * * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * diff --git a/src/backend/utils/adt/rangetypes_gist.c b/src/backend/utils/adt/rangetypes_gist.c index 75069c3ac2c8a..69515b06782e2 100644 --- a/src/backend/utils/adt/rangetypes_gist.c +++ b/src/backend/utils/adt/rangetypes_gist.c @@ -3,7 +3,7 @@ * rangetypes_gist.c * GiST support for range types. * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * @@ -19,6 +19,7 @@ #include "utils/datum.h" #include "utils/float.h" #include "utils/fmgrprotos.h" +#include "utils/multirangetypes.h" #include "utils/rangetypes.h" /* @@ -135,12 +136,30 @@ typedef struct static RangeType *range_super_union(TypeCacheEntry *typcache, RangeType *r1, RangeType *r2); -static bool range_gist_consistent_int(TypeCacheEntry *typcache, - StrategyNumber strategy, const RangeType *key, - Datum query); -static bool range_gist_consistent_leaf(TypeCacheEntry *typcache, - StrategyNumber strategy, const RangeType *key, - Datum query); +static bool range_gist_consistent_int_range(TypeCacheEntry *typcache, + StrategyNumber strategy, + const RangeType *key, + const RangeType *query); +static bool range_gist_consistent_int_multirange(TypeCacheEntry *typcache, + StrategyNumber strategy, + const RangeType *key, + const MultirangeType *query); +static bool range_gist_consistent_int_element(TypeCacheEntry *typcache, + StrategyNumber strategy, + const RangeType *key, + Datum query); +static bool range_gist_consistent_leaf_range(TypeCacheEntry *typcache, + StrategyNumber strategy, + const RangeType *key, + const RangeType *query); +static bool range_gist_consistent_leaf_multirange(TypeCacheEntry *typcache, + StrategyNumber strategy, + const RangeType *key, + const MultirangeType *query); +static bool range_gist_consistent_leaf_element(TypeCacheEntry *typcache, + StrategyNumber strategy, + const RangeType *key, + Datum query); static void range_gist_fallback_split(TypeCacheEntry *typcache, GistEntryVector *entryvec, GIST_SPLITVEC *v); @@ -174,8 +193,8 @@ range_gist_consistent(PG_FUNCTION_ARGS) GISTENTRY *entry = (GISTENTRY *) PG_GETARG_POINTER(0); Datum query = PG_GETARG_DATUM(1); StrategyNumber strategy = (StrategyNumber) PG_GETARG_UINT16(2); - - /* Oid subtype = PG_GETARG_OID(3); */ + bool result; + Oid subtype = PG_GETARG_OID(3); bool *recheck = (bool *) PG_GETARG_POINTER(4); RangeType *key = DatumGetRangeTypeP(entry->key); TypeCacheEntry *typcache; @@ -185,12 +204,119 @@ range_gist_consistent(PG_FUNCTION_ARGS) typcache = range_get_typcache(fcinfo, RangeTypeGetOid(key)); + /* + * Perform consistent checking using function corresponding to key type + * (leaf or internal) and query subtype (range, multirange, or element). + * Note that invalid subtype means that query type matches key type + * (range). + */ if (GIST_LEAF(entry)) - PG_RETURN_BOOL(range_gist_consistent_leaf(typcache, strategy, - key, query)); + { + if (!OidIsValid(subtype) || subtype == ANYRANGEOID) + result = range_gist_consistent_leaf_range(typcache, strategy, key, + DatumGetRangeTypeP(query)); + else if (subtype == ANYMULTIRANGEOID) + result = range_gist_consistent_leaf_multirange(typcache, strategy, key, + DatumGetMultirangeTypeP(query)); + else + result = range_gist_consistent_leaf_element(typcache, strategy, + key, query); + } else - PG_RETURN_BOOL(range_gist_consistent_int(typcache, strategy, - key, query)); + { + if (!OidIsValid(subtype) || subtype == ANYRANGEOID) + result = range_gist_consistent_int_range(typcache, strategy, key, + DatumGetRangeTypeP(query)); + else if (subtype == ANYMULTIRANGEOID) + result = range_gist_consistent_int_multirange(typcache, strategy, key, + DatumGetMultirangeTypeP(query)); + else + result = range_gist_consistent_int_element(typcache, strategy, + key, query); + } + PG_RETURN_BOOL(result); +} + +/* + * GiST compress method for multiranges: multirange is approximated as union + * range with no gaps. + */ +Datum +multirange_gist_compress(PG_FUNCTION_ARGS) +{ + GISTENTRY *entry = (GISTENTRY *) PG_GETARG_POINTER(0); + + if (entry->leafkey) + { + MultirangeType *mr = DatumGetMultirangeTypeP(entry->key); + RangeType *r; + TypeCacheEntry *typcache; + GISTENTRY *retval = palloc(sizeof(GISTENTRY)); + + typcache = multirange_get_typcache(fcinfo, MultirangeTypeGetOid(mr)); + r = multirange_get_union_range(typcache->rngtype, mr); + + gistentryinit(*retval, RangeTypePGetDatum(r), + entry->rel, entry->page, entry->offset, false); + + PG_RETURN_POINTER(retval); + } + + PG_RETURN_POINTER(entry); +} + +/* GiST query consistency check for multiranges */ +Datum +multirange_gist_consistent(PG_FUNCTION_ARGS) +{ + GISTENTRY *entry = (GISTENTRY *) PG_GETARG_POINTER(0); + Datum query = PG_GETARG_DATUM(1); + StrategyNumber strategy = (StrategyNumber) PG_GETARG_UINT16(2); + bool result; + Oid subtype = PG_GETARG_OID(3); + bool *recheck = (bool *) PG_GETARG_POINTER(4); + RangeType *key = DatumGetRangeTypeP(entry->key); + TypeCacheEntry *typcache; + + /* + * All operators served by this function are inexact because multirange is + * approximated by union range with no gaps. + */ + *recheck = true; + + typcache = range_get_typcache(fcinfo, RangeTypeGetOid(key)); + + /* + * Perform consistent checking using function corresponding to key type + * (leaf or internal) and query subtype (range, multirange, or element). + * Note that invalid subtype means that query type matches key type + * (multirange). + */ + if (GIST_LEAF(entry)) + { + if (!OidIsValid(subtype) || subtype == ANYMULTIRANGEOID) + result = range_gist_consistent_leaf_multirange(typcache, strategy, key, + DatumGetMultirangeTypeP(query)); + else if (subtype == ANYRANGEOID) + result = range_gist_consistent_leaf_range(typcache, strategy, key, + DatumGetRangeTypeP(query)); + else + result = range_gist_consistent_leaf_element(typcache, strategy, + key, query); + } + else + { + if (!OidIsValid(subtype) || subtype == ANYMULTIRANGEOID) + result = range_gist_consistent_int_multirange(typcache, strategy, key, + DatumGetMultirangeTypeP(query)); + else if (subtype == ANYRANGEOID) + result = range_gist_consistent_int_range(typcache, strategy, key, + DatumGetRangeTypeP(query)); + else + result = range_gist_consistent_int_element(typcache, strategy, + key, query); + } + PG_RETURN_BOOL(result); } /* form union range */ @@ -758,49 +884,67 @@ range_super_union(TypeCacheEntry *typcache, RangeType *r1, RangeType *r2) return result; } +static bool +multirange_union_range_equal(TypeCacheEntry *typcache, + const RangeType *r, + const MultirangeType *mr) +{ + RangeBound lower1, + upper1, + lower2, + upper2, + tmp; + bool empty; + + if (RangeIsEmpty(r) || MultirangeIsEmpty(mr)) + return (RangeIsEmpty(r) && MultirangeIsEmpty(mr)); + + range_deserialize(typcache, r, &lower1, &upper1, &empty); + Assert(!empty); + multirange_get_bounds(typcache, mr, 0, &lower2, &tmp); + multirange_get_bounds(typcache, mr, mr->rangeCount - 1, &tmp, &upper2); + + return (range_cmp_bounds(typcache, &lower1, &lower2) == 0 && + range_cmp_bounds(typcache, &upper1, &upper2) == 0); +} + /* - * GiST consistent test on an index internal page + * GiST consistent test on an index internal page with range query */ static bool -range_gist_consistent_int(TypeCacheEntry *typcache, StrategyNumber strategy, - const RangeType *key, Datum query) +range_gist_consistent_int_range(TypeCacheEntry *typcache, + StrategyNumber strategy, + const RangeType *key, + const RangeType *query) { switch (strategy) { case RANGESTRAT_BEFORE: - if (RangeIsEmpty(key) || RangeIsEmpty(DatumGetRangeTypeP(query))) + if (RangeIsEmpty(key) || RangeIsEmpty(query)) return false; - return (!range_overright_internal(typcache, key, - DatumGetRangeTypeP(query))); + return (!range_overright_internal(typcache, key, query)); case RANGESTRAT_OVERLEFT: - if (RangeIsEmpty(key) || RangeIsEmpty(DatumGetRangeTypeP(query))) + if (RangeIsEmpty(key) || RangeIsEmpty(query)) return false; - return (!range_after_internal(typcache, key, - DatumGetRangeTypeP(query))); + return (!range_after_internal(typcache, key, query)); case RANGESTRAT_OVERLAPS: - return range_overlaps_internal(typcache, key, - DatumGetRangeTypeP(query)); + return range_overlaps_internal(typcache, key, query); case RANGESTRAT_OVERRIGHT: - if (RangeIsEmpty(key) || RangeIsEmpty(DatumGetRangeTypeP(query))) + if (RangeIsEmpty(key) || RangeIsEmpty(query)) return false; - return (!range_before_internal(typcache, key, - DatumGetRangeTypeP(query))); + return (!range_before_internal(typcache, key, query)); case RANGESTRAT_AFTER: - if (RangeIsEmpty(key) || RangeIsEmpty(DatumGetRangeTypeP(query))) + if (RangeIsEmpty(key) || RangeIsEmpty(query)) return false; - return (!range_overleft_internal(typcache, key, - DatumGetRangeTypeP(query))); + return (!range_overleft_internal(typcache, key, query)); case RANGESTRAT_ADJACENT: - if (RangeIsEmpty(key) || RangeIsEmpty(DatumGetRangeTypeP(query))) + if (RangeIsEmpty(key) || RangeIsEmpty(query)) return false; - if (range_adjacent_internal(typcache, key, - DatumGetRangeTypeP(query))) + if (range_adjacent_internal(typcache, key, query)) return true; - return range_overlaps_internal(typcache, key, - DatumGetRangeTypeP(query)); + return range_overlaps_internal(typcache, key, query); case RANGESTRAT_CONTAINS: - return range_contains_internal(typcache, key, - DatumGetRangeTypeP(query)); + return range_contains_internal(typcache, key, query); case RANGESTRAT_CONTAINED_BY: /* @@ -810,20 +954,16 @@ range_gist_consistent_int(TypeCacheEntry *typcache, StrategyNumber strategy, */ if (RangeIsOrContainsEmpty(key)) return true; - return range_overlaps_internal(typcache, key, - DatumGetRangeTypeP(query)); - case RANGESTRAT_CONTAINS_ELEM: - return range_contains_elem_internal(typcache, key, query); + return range_overlaps_internal(typcache, key, query); case RANGESTRAT_EQ: /* * If query is empty, descend only if the key is or contains any * empty ranges. Otherwise, descend if key contains query. */ - if (RangeIsEmpty(DatumGetRangeTypeP(query))) + if (RangeIsEmpty(query)) return RangeIsOrContainsEmpty(key); - return range_contains_internal(typcache, key, - DatumGetRangeTypeP(query)); + return range_contains_internal(typcache, key, query); default: elog(ERROR, "unrecognized range strategy: %d", strategy); return false; /* keep compiler quiet */ @@ -831,42 +971,169 @@ range_gist_consistent_int(TypeCacheEntry *typcache, StrategyNumber strategy, } /* - * GiST consistent test on an index leaf page + * GiST consistent test on an index internal page with multirange query */ static bool -range_gist_consistent_leaf(TypeCacheEntry *typcache, StrategyNumber strategy, - const RangeType *key, Datum query) +range_gist_consistent_int_multirange(TypeCacheEntry *typcache, + StrategyNumber strategy, + const RangeType *key, + const MultirangeType *query) { switch (strategy) { case RANGESTRAT_BEFORE: - return range_before_internal(typcache, key, - DatumGetRangeTypeP(query)); + if (RangeIsEmpty(key) || MultirangeIsEmpty(query)) + return false; + return (!range_overright_multirange_internal(typcache, key, query)); case RANGESTRAT_OVERLEFT: - return range_overleft_internal(typcache, key, - DatumGetRangeTypeP(query)); + if (RangeIsEmpty(key) || MultirangeIsEmpty(query)) + return false; + return (!range_after_multirange_internal(typcache, key, query)); case RANGESTRAT_OVERLAPS: - return range_overlaps_internal(typcache, key, - DatumGetRangeTypeP(query)); + return range_overlaps_multirange_internal(typcache, key, query); case RANGESTRAT_OVERRIGHT: - return range_overright_internal(typcache, key, - DatumGetRangeTypeP(query)); + if (RangeIsEmpty(key) || MultirangeIsEmpty(query)) + return false; + return (!range_before_multirange_internal(typcache, key, query)); case RANGESTRAT_AFTER: - return range_after_internal(typcache, key, - DatumGetRangeTypeP(query)); + if (RangeIsEmpty(key) || MultirangeIsEmpty(query)) + return false; + return (!range_overleft_multirange_internal(typcache, key, query)); case RANGESTRAT_ADJACENT: - return range_adjacent_internal(typcache, key, - DatumGetRangeTypeP(query)); + if (RangeIsEmpty(key) || MultirangeIsEmpty(query)) + return false; + if (range_adjacent_multirange_internal(typcache, key, query)) + return true; + return range_overlaps_multirange_internal(typcache, key, query); case RANGESTRAT_CONTAINS: - return range_contains_internal(typcache, key, - DatumGetRangeTypeP(query)); + return range_contains_multirange_internal(typcache, key, query); case RANGESTRAT_CONTAINED_BY: - return range_contained_by_internal(typcache, key, - DatumGetRangeTypeP(query)); + + /* + * Empty ranges are contained by anything, so if key is or + * contains any empty ranges, we must descend into it. Otherwise, + * descend only if key overlaps the query. + */ + if (RangeIsOrContainsEmpty(key)) + return true; + return range_overlaps_multirange_internal(typcache, key, query); + case RANGESTRAT_EQ: + + /* + * If query is empty, descend only if the key is or contains any + * empty ranges. Otherwise, descend if key contains query. + */ + if (MultirangeIsEmpty(query)) + return RangeIsOrContainsEmpty(key); + return range_contains_multirange_internal(typcache, key, query); + default: + elog(ERROR, "unrecognized range strategy: %d", strategy); + return false; /* keep compiler quiet */ + } +} + +/* + * GiST consistent test on an index internal page with element query + */ +static bool +range_gist_consistent_int_element(TypeCacheEntry *typcache, + StrategyNumber strategy, + const RangeType *key, + Datum query) +{ + switch (strategy) + { case RANGESTRAT_CONTAINS_ELEM: return range_contains_elem_internal(typcache, key, query); + default: + elog(ERROR, "unrecognized range strategy: %d", strategy); + return false; /* keep compiler quiet */ + } +} + +/* + * GiST consistent test on an index leaf page with range query + */ +static bool +range_gist_consistent_leaf_range(TypeCacheEntry *typcache, + StrategyNumber strategy, + const RangeType *key, + const RangeType *query) +{ + switch (strategy) + { + case RANGESTRAT_BEFORE: + return range_before_internal(typcache, key, query); + case RANGESTRAT_OVERLEFT: + return range_overleft_internal(typcache, key, query); + case RANGESTRAT_OVERLAPS: + return range_overlaps_internal(typcache, key, query); + case RANGESTRAT_OVERRIGHT: + return range_overright_internal(typcache, key, query); + case RANGESTRAT_AFTER: + return range_after_internal(typcache, key, query); + case RANGESTRAT_ADJACENT: + return range_adjacent_internal(typcache, key, query); + case RANGESTRAT_CONTAINS: + return range_contains_internal(typcache, key, query); + case RANGESTRAT_CONTAINED_BY: + return range_contained_by_internal(typcache, key, query); + case RANGESTRAT_EQ: + return range_eq_internal(typcache, key, query); + default: + elog(ERROR, "unrecognized range strategy: %d", strategy); + return false; /* keep compiler quiet */ + } +} + +/* + * GiST consistent test on an index leaf page with multirange query + */ +static bool +range_gist_consistent_leaf_multirange(TypeCacheEntry *typcache, + StrategyNumber strategy, + const RangeType *key, + const MultirangeType *query) +{ + switch (strategy) + { + case RANGESTRAT_BEFORE: + return range_before_multirange_internal(typcache, key, query); + case RANGESTRAT_OVERLEFT: + return range_overleft_multirange_internal(typcache, key, query); + case RANGESTRAT_OVERLAPS: + return range_overlaps_multirange_internal(typcache, key, query); + case RANGESTRAT_OVERRIGHT: + return range_overright_multirange_internal(typcache, key, query); + case RANGESTRAT_AFTER: + return range_after_multirange_internal(typcache, key, query); + case RANGESTRAT_ADJACENT: + return range_adjacent_multirange_internal(typcache, key, query); + case RANGESTRAT_CONTAINS: + return range_contains_multirange_internal(typcache, key, query); + case RANGESTRAT_CONTAINED_BY: + return multirange_contains_range_internal(typcache, query, key); case RANGESTRAT_EQ: - return range_eq_internal(typcache, key, DatumGetRangeTypeP(query)); + return multirange_union_range_equal(typcache, key, query); + default: + elog(ERROR, "unrecognized range strategy: %d", strategy); + return false; /* keep compiler quiet */ + } +} + +/* + * GiST consistent test on an index leaf page with element query + */ +static bool +range_gist_consistent_leaf_element(TypeCacheEntry *typcache, + StrategyNumber strategy, + const RangeType *key, + Datum query) +{ + switch (strategy) + { + case RANGESTRAT_CONTAINS_ELEM: + return range_contains_elem_internal(typcache, key, query); default: elog(ERROR, "unrecognized range strategy: %d", strategy); return false; /* keep compiler quiet */ diff --git a/src/backend/utils/adt/rangetypes_selfuncs.c b/src/backend/utils/adt/rangetypes_selfuncs.c index 25dd84f4df626..a6c3c450ac9ed 100644 --- a/src/backend/utils/adt/rangetypes_selfuncs.c +++ b/src/backend/utils/adt/rangetypes_selfuncs.c @@ -6,7 +6,7 @@ * Estimates are based on histograms of lower and upper bounds, and the * fraction of empty ranges. * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * diff --git a/src/backend/utils/adt/rangetypes_spgist.c b/src/backend/utils/adt/rangetypes_spgist.c index 9bbef531495c2..f29de6aab4884 100644 --- a/src/backend/utils/adt/rangetypes_spgist.c +++ b/src/backend/utils/adt/rangetypes_spgist.c @@ -25,7 +25,7 @@ * This implementation only uses the comparison function of the range element * datatype, therefore it works for any range type. * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION diff --git a/src/backend/utils/adt/rangetypes_typanalyze.c b/src/backend/utils/adt/rangetypes_typanalyze.c index d5fa36b6ff0d9..2c10f2c867c64 100644 --- a/src/backend/utils/adt/rangetypes_typanalyze.c +++ b/src/backend/utils/adt/rangetypes_typanalyze.c @@ -13,7 +13,7 @@ * come from different tuples. In theory, the standard scalar selectivity * functions could be used with the combined histogram. * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * diff --git a/src/backend/utils/adt/regexp.c b/src/backend/utils/adt/regexp.c index c70c5eeeb37f9..a32c5c82ab437 100644 --- a/src/backend/utils/adt/regexp.c +++ b/src/backend/utils/adt/regexp.c @@ -3,7 +3,7 @@ * regexp.c * Postgres' interface to the regular expression package. * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * diff --git a/src/backend/utils/adt/regproc.c b/src/backend/utils/adt/regproc.c index 6c1ee9c92df3e..f998fe20763da 100644 --- a/src/backend/utils/adt/regproc.c +++ b/src/backend/utils/adt/regproc.c @@ -8,7 +8,7 @@ * special I/O conversion routines. * * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * diff --git a/src/backend/utils/adt/ri_triggers.c b/src/backend/utils/adt/ri_triggers.c index 5ab134a85370d..6e3a41062fcc4 100644 --- a/src/backend/utils/adt/ri_triggers.c +++ b/src/backend/utils/adt/ri_triggers.c @@ -14,7 +14,7 @@ * plan --- consider improving this someday. * * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * * src/backend/utils/adt/ri_triggers.c * diff --git a/src/backend/utils/adt/rowtypes.c b/src/backend/utils/adt/rowtypes.c index 5c4648bccff72..23787a6ae7d71 100644 --- a/src/backend/utils/adt/rowtypes.c +++ b/src/backend/utils/adt/rowtypes.c @@ -3,7 +3,7 @@ * rowtypes.c * I/O and comparison functions for generic composite types. * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * diff --git a/src/backend/utils/adt/ruleutils.c b/src/backend/utils/adt/ruleutils.c index 7d4443e807d80..4a9244f4f665a 100644 --- a/src/backend/utils/adt/ruleutils.c +++ b/src/backend/utils/adt/ruleutils.c @@ -4,7 +4,7 @@ * Functions to convert stored expressions/querytrees back to * source text * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * @@ -3126,13 +3126,14 @@ print_function_trftypes(StringInfo buf, HeapTuple proctup) { int i; - appendStringInfoString(buf, "\n TRANSFORM "); + appendStringInfoString(buf, " TRANSFORM "); for (i = 0; i < ntypes; i++) { if (i != 0) appendStringInfoString(buf, ", "); appendStringInfo(buf, "FOR TYPE %s", format_type_be(trftypes[i])); } + appendStringInfoChar(buf, '\n'); } } @@ -5167,6 +5168,53 @@ get_with_clause(Query *query, deparse_context *context) if (PRETTY_INDENT(context)) appendContextKeyword(context, "", 0, 0, 0); appendStringInfoChar(buf, ')'); + + if (cte->search_clause) + { + bool first = true; + ListCell *lc; + + appendStringInfo(buf, " SEARCH %s FIRST BY ", + cte->search_clause->search_breadth_first ? "BREADTH" : "DEPTH"); + + foreach(lc, cte->search_clause->search_col_list) + { + if (first) + first = false; + else + appendStringInfoString(buf, ", "); + appendStringInfoString(buf, + quote_identifier(strVal(lfirst(lc)))); + } + + appendStringInfo(buf, " SET %s", quote_identifier(cte->search_clause->search_seq_column)); + } + + if (cte->cycle_clause) + { + bool first = true; + ListCell *lc; + + appendStringInfoString(buf, " CYCLE "); + + foreach(lc, cte->cycle_clause->cycle_col_list) + { + if (first) + first = false; + else + appendStringInfoString(buf, ", "); + appendStringInfoString(buf, + quote_identifier(strVal(lfirst(lc)))); + } + + appendStringInfo(buf, " SET %s", quote_identifier(cte->cycle_clause->cycle_mark_column)); + appendStringInfoString(buf, " TO "); + get_rule_expr(cte->cycle_clause->cycle_mark_value, context, false); + appendStringInfoString(buf, " DEFAULT "); + get_rule_expr(cte->cycle_clause->cycle_mark_default, context, false); + appendStringInfo(buf, " USING %s", quote_identifier(cte->cycle_clause->cycle_path_column)); + } + sep = ", "; } @@ -9680,6 +9728,7 @@ get_func_sql_syntax(FuncExpr *expr, deparse_context *context) appendStringInfoChar(buf, ')'); return true; + case F_LTRIM_BYTEA_BYTEA: case F_LTRIM_TEXT: case F_LTRIM_TEXT_TEXT: /* TRIM() */ @@ -9694,6 +9743,7 @@ get_func_sql_syntax(FuncExpr *expr, deparse_context *context) appendStringInfoChar(buf, ')'); return true; + case F_RTRIM_BYTEA_BYTEA: case F_RTRIM_TEXT: case F_RTRIM_TEXT_TEXT: /* TRIM() */ diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c index 80bd60f8767b7..47ca4ddbb5250 100644 --- a/src/backend/utils/adt/selfuncs.c +++ b/src/backend/utils/adt/selfuncs.c @@ -10,7 +10,7 @@ * Index cost functions are located via the index AM's API struct, * which is obtained from the handler function registered in pg_am. * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * @@ -2206,7 +2206,7 @@ rowcomparesel(PlannerInfo *root, /* * Otherwise, it's a join if there's more than one relation used. */ - is_join_clause = (NumRelids((Node *) opargs) > 1); + is_join_clause = (NumRelids(root, (Node *) opargs) > 1); } if (is_join_clause) @@ -4771,7 +4771,7 @@ examine_variable(PlannerInfo *root, Node *node, int varRelid, * membership. Note that when varRelid isn't zero, only vars of that * relation are considered "real" vars. */ - varnos = pull_varnos(basenode); + varnos = pull_varnos(root, basenode); onerel = NULL; diff --git a/src/backend/utils/adt/tid.c b/src/backend/utils/adt/tid.c index 52028603d239d..e9f1fc706f834 100644 --- a/src/backend/utils/adt/tid.c +++ b/src/backend/utils/adt/tid.c @@ -3,7 +3,7 @@ * tid.c * Functions for the built-in type tuple id * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * diff --git a/src/backend/utils/adt/timestamp.c b/src/backend/utils/adt/timestamp.c index 2dbd309122236..0b1f95a5b4ecd 100644 --- a/src/backend/utils/adt/timestamp.c +++ b/src/backend/utils/adt/timestamp.c @@ -3,7 +3,7 @@ * timestamp.c * Functions for the built-in SQL types "timestamp" and "interval". * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * diff --git a/src/backend/utils/adt/trigfuncs.c b/src/backend/utils/adt/trigfuncs.c index 41377270ed27e..cc28cfd186b11 100644 --- a/src/backend/utils/adt/trigfuncs.c +++ b/src/backend/utils/adt/trigfuncs.c @@ -4,7 +4,7 @@ * Builtin functions for useful trigger support. * * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * src/backend/utils/adt/trigfuncs.c diff --git a/src/backend/utils/adt/tsginidx.c b/src/backend/utils/adt/tsginidx.c index b3e3ffc577635..6c913baabacd9 100644 --- a/src/backend/utils/adt/tsginidx.c +++ b/src/backend/utils/adt/tsginidx.c @@ -3,7 +3,7 @@ * tsginidx.c * GIN support functions for tsvector_ops * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * * * IDENTIFICATION diff --git a/src/backend/utils/adt/tsgistidx.c b/src/backend/utils/adt/tsgistidx.c index a601965bd83e6..c09eefdda2313 100644 --- a/src/backend/utils/adt/tsgistidx.c +++ b/src/backend/utils/adt/tsgistidx.c @@ -3,7 +3,7 @@ * tsgistidx.c * GiST support functions for tsvector_ops * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * * * IDENTIFICATION diff --git a/src/backend/utils/adt/tsquery.c b/src/backend/utils/adt/tsquery.c index 092e8a130bfc2..fe4470174f5d1 100644 --- a/src/backend/utils/adt/tsquery.c +++ b/src/backend/utils/adt/tsquery.c @@ -3,7 +3,7 @@ * tsquery.c * I/O functions for tsquery * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * * * IDENTIFICATION diff --git a/src/backend/utils/adt/tsquery_cleanup.c b/src/backend/utils/adt/tsquery_cleanup.c index 2481cf8c7bf56..82ae284403a4f 100644 --- a/src/backend/utils/adt/tsquery_cleanup.c +++ b/src/backend/utils/adt/tsquery_cleanup.c @@ -4,7 +4,7 @@ * Cleanup query from NOT values and/or stopword * Utility functions to correct work. * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * * * IDENTIFICATION diff --git a/src/backend/utils/adt/tsquery_gist.c b/src/backend/utils/adt/tsquery_gist.c index ea18a350188a0..14d7343afa7c6 100644 --- a/src/backend/utils/adt/tsquery_gist.c +++ b/src/backend/utils/adt/tsquery_gist.c @@ -3,7 +3,7 @@ * tsquery_gist.c * GiST index support for tsquery * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * * * IDENTIFICATION diff --git a/src/backend/utils/adt/tsquery_op.c b/src/backend/utils/adt/tsquery_op.c index ea40804110c76..0575b55272b3f 100644 --- a/src/backend/utils/adt/tsquery_op.c +++ b/src/backend/utils/adt/tsquery_op.c @@ -3,7 +3,7 @@ * tsquery_op.c * Various operations with tsquery * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * * * IDENTIFICATION diff --git a/src/backend/utils/adt/tsquery_rewrite.c b/src/backend/utils/adt/tsquery_rewrite.c index 1be89e833c85b..cf0cc974ae5c2 100644 --- a/src/backend/utils/adt/tsquery_rewrite.c +++ b/src/backend/utils/adt/tsquery_rewrite.c @@ -3,7 +3,7 @@ * tsquery_rewrite.c * Utilities for reconstructing tsquery * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * * * IDENTIFICATION diff --git a/src/backend/utils/adt/tsquery_util.c b/src/backend/utils/adt/tsquery_util.c index e5c684e289ea1..7f936427b5fd6 100644 --- a/src/backend/utils/adt/tsquery_util.c +++ b/src/backend/utils/adt/tsquery_util.c @@ -3,7 +3,7 @@ * tsquery_util.c * Utilities for tsquery datatype * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * * * IDENTIFICATION diff --git a/src/backend/utils/adt/tsrank.c b/src/backend/utils/adt/tsrank.c index 38b413f6ffff8..977f70047932b 100644 --- a/src/backend/utils/adt/tsrank.c +++ b/src/backend/utils/adt/tsrank.c @@ -3,7 +3,7 @@ * tsrank.c * rank tsvector by tsquery * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * * * IDENTIFICATION diff --git a/src/backend/utils/adt/tsvector.c b/src/backend/utils/adt/tsvector.c index cd3bb9b63e313..b02fecc0811ca 100644 --- a/src/backend/utils/adt/tsvector.c +++ b/src/backend/utils/adt/tsvector.c @@ -3,7 +3,7 @@ * tsvector.c * I/O functions for tsvector * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * * * IDENTIFICATION diff --git a/src/backend/utils/adt/tsvector_op.c b/src/backend/utils/adt/tsvector_op.c index 756a48a167ad3..2939fb5c21037 100644 --- a/src/backend/utils/adt/tsvector_op.c +++ b/src/backend/utils/adt/tsvector_op.c @@ -3,7 +3,7 @@ * tsvector_op.c * operations over tsvector * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * * * IDENTIFICATION diff --git a/src/backend/utils/adt/tsvector_parser.c b/src/backend/utils/adt/tsvector_parser.c index cfc181c20dfc8..c2df4093e6be3 100644 --- a/src/backend/utils/adt/tsvector_parser.c +++ b/src/backend/utils/adt/tsvector_parser.c @@ -3,7 +3,7 @@ * tsvector_parser.c * Parser for tsvector * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * * * IDENTIFICATION diff --git a/src/backend/utils/adt/uuid.c b/src/backend/utils/adt/uuid.c index c906ee789d92f..b02c9fcf984ad 100644 --- a/src/backend/utils/adt/uuid.c +++ b/src/backend/utils/adt/uuid.c @@ -3,7 +3,7 @@ * uuid.c * Functions for the built-in type "uuid". * - * Copyright (c) 2007-2020, PostgreSQL Global Development Group + * Copyright (c) 2007-2021, PostgreSQL Global Development Group * * IDENTIFICATION * src/backend/utils/adt/uuid.c diff --git a/src/backend/utils/adt/varbit.c b/src/backend/utils/adt/varbit.c index 3c03459f5192d..2235866244da3 100644 --- a/src/backend/utils/adt/varbit.c +++ b/src/backend/utils/adt/varbit.c @@ -20,7 +20,7 @@ * * Code originally contributed by Adriaan Joubert. * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION @@ -1059,7 +1059,7 @@ bitsubstring(VarBit *arg, int32 s, int32 l, bool length_not_specified) len, ishift, i; - int e, + int32 e, s1, e1; bits8 *r, @@ -1072,18 +1072,24 @@ bitsubstring(VarBit *arg, int32 s, int32 l, bool length_not_specified) { e1 = bitlen + 1; } - else + else if (l < 0) + { + /* SQL99 says to throw an error for E < S, i.e., negative length */ + ereport(ERROR, + (errcode(ERRCODE_SUBSTRING_ERROR), + errmsg("negative substring length not allowed"))); + e1 = -1; /* silence stupider compilers */ + } + else if (pg_add_s32_overflow(s, l, &e)) { - e = s + l; - /* - * A negative value for L is the only way for the end position to be - * before the start. SQL99 says to throw an error. + * L could be large enough for S + L to overflow, in which case the + * substring must run to end of string. */ - if (e < s) - ereport(ERROR, - (errcode(ERRCODE_SUBSTRING_ERROR), - errmsg("negative substring length not allowed"))); + e1 = bitlen + 1; + } + else + { e1 = Min(e, bitlen + 1); } if (s1 > bitlen || e1 <= s1) diff --git a/src/backend/utils/adt/varchar.c b/src/backend/utils/adt/varchar.c index b595ab9569cfd..8fc84649f1954 100644 --- a/src/backend/utils/adt/varchar.c +++ b/src/backend/utils/adt/varchar.c @@ -3,7 +3,7 @@ * varchar.c * Functions for the built-in types char(n) and varchar(n). * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * diff --git a/src/backend/utils/adt/varlena.c b/src/backend/utils/adt/varlena.c index 9300d19e0c013..479ed9ae54cd9 100644 --- a/src/backend/utils/adt/varlena.c +++ b/src/backend/utils/adt/varlena.c @@ -3,7 +3,7 @@ * varlena.c * Functions for the variable-length built-in types. * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * @@ -21,8 +21,8 @@ #include "catalog/pg_collation.h" #include "catalog/pg_type.h" #include "common/hashfn.h" +#include "common/hex.h" #include "common/int.h" -#include "common/hex_decode.h" #include "common/unicode_norm.h" #include "lib/hyperloglog.h" #include "libpq/pqformat.h" @@ -304,10 +304,12 @@ byteain(PG_FUNCTION_ARGS) if (inputText[0] == '\\' && inputText[1] == 'x') { size_t len = strlen(inputText); + uint64 dstlen = pg_hex_dec_len(len - 2); - bc = (len - 2) / 2 + VARHDRSZ; /* maximum possible length */ + bc = dstlen + VARHDRSZ; /* maximum possible length */ result = palloc(bc); - bc = hex_decode(inputText + 2, len - 2, VARDATA(result)); + + bc = pg_hex_decode(inputText + 2, len - 2, VARDATA(result), dstlen); SET_VARSIZE(result, bc + VARHDRSZ); /* actual length */ PG_RETURN_BYTEA_P(result); @@ -396,11 +398,15 @@ byteaout(PG_FUNCTION_ARGS) if (bytea_output == BYTEA_OUTPUT_HEX) { + uint64 dstlen = pg_hex_enc_len(VARSIZE_ANY_EXHDR(vlena)); + /* Print hex format */ - rp = result = palloc(VARSIZE_ANY_EXHDR(vlena) * 2 + 2 + 1); + rp = result = palloc(dstlen + 2 + 1); *rp++ = '\\'; *rp++ = 'x'; - rp += hex_encode(VARDATA_ANY(vlena), VARSIZE_ANY_EXHDR(vlena), rp); + + rp += pg_hex_encode(VARDATA_ANY(vlena), VARSIZE_ANY_EXHDR(vlena), rp, + dstlen); } else if (bytea_output == BYTEA_OUTPUT_ESCAPE) { @@ -868,29 +874,38 @@ text_substring(Datum str, int32 start, int32 length, bool length_not_specified) int32 S = start; /* start position */ int32 S1; /* adjusted start position */ int32 L1; /* adjusted substring length */ + int32 E; /* end position */ + + /* + * SQL99 says S can be zero or negative, but we still must fetch from the + * start of the string. + */ + S1 = Max(S, 1); /* life is easy if the encoding max length is 1 */ if (eml == 1) { - S1 = Max(S, 1); - if (length_not_specified) /* special case - get length to end of * string */ L1 = -1; - else + else if (length < 0) + { + /* SQL99 says to throw an error for E < S, i.e., negative length */ + ereport(ERROR, + (errcode(ERRCODE_SUBSTRING_ERROR), + errmsg("negative substring length not allowed"))); + L1 = -1; /* silence stupider compilers */ + } + else if (pg_add_s32_overflow(S, length, &E)) { - /* end position */ - int E = S + length; - /* - * A negative value for L is the only way for the end position to - * be before the start. SQL99 says to throw an error. + * L could be large enough for S + L to overflow, in which case + * the substring must run to end of string. */ - if (E < S) - ereport(ERROR, - (errcode(ERRCODE_SUBSTRING_ERROR), - errmsg("negative substring length not allowed"))); - + L1 = -1; + } + else + { /* * A zero or negative value for the end position can happen if the * start was negative or one. SQL99 says to return a zero-length @@ -904,8 +919,8 @@ text_substring(Datum str, int32 start, int32 length, bool length_not_specified) /* * If the start position is past the end of the string, SQL99 says to - * return a zero-length string -- PG_GETARG_TEXT_P_SLICE() will do - * that for us. Convert to zero-based starting position + * return a zero-length string -- DatumGetTextPSlice() will do that + * for us. We need only convert S1 to zero-based starting position. */ return DatumGetTextPSlice(str, S1 - 1, L1); } @@ -926,12 +941,6 @@ text_substring(Datum str, int32 start, int32 length, bool length_not_specified) char *s; text *ret; - /* - * if S is past the end of the string, the tuple toaster will return a - * zero-length string to us - */ - S1 = Max(S, 1); - /* * We need to start at position zero because there is no way to know * in advance which byte offset corresponds to the supplied start @@ -942,19 +951,24 @@ text_substring(Datum str, int32 start, int32 length, bool length_not_specified) if (length_not_specified) /* special case - get length to end of * string */ slice_size = L1 = -1; - else + else if (length < 0) + { + /* SQL99 says to throw an error for E < S, i.e., negative length */ + ereport(ERROR, + (errcode(ERRCODE_SUBSTRING_ERROR), + errmsg("negative substring length not allowed"))); + slice_size = L1 = -1; /* silence stupider compilers */ + } + else if (pg_add_s32_overflow(S, length, &E)) { - int E = S + length; - /* - * A negative value for L is the only way for the end position to - * be before the start. SQL99 says to throw an error. + * L could be large enough for S + L to overflow, in which case + * the substring must run to end of string. */ - if (E < S) - ereport(ERROR, - (errcode(ERRCODE_SUBSTRING_ERROR), - errmsg("negative substring length not allowed"))); - + slice_size = L1 = -1; + } + else + { /* * A zero or negative value for the end position can happen if the * start was negative or one. SQL99 says to return a zero-length @@ -972,8 +986,10 @@ text_substring(Datum str, int32 start, int32 length, bool length_not_specified) /* * Total slice size in bytes can't be any longer than the start * position plus substring length times the encoding max length. + * If that overflows, we can just use -1. */ - slice_size = (S1 + L1) * eml; + if (pg_mul_s32_overflow(E, eml, &slice_size)) + slice_size = -1; } /* @@ -3309,9 +3325,13 @@ bytea_substring(Datum str, int L, bool length_not_specified) { - int S1; /* adjusted start position */ - int L1; /* adjusted substring length */ + int32 S1; /* adjusted start position */ + int32 L1; /* adjusted substring length */ + int32 E; /* end position */ + /* + * The logic here should generally match text_substring(). + */ S1 = Max(S, 1); if (length_not_specified) @@ -3322,20 +3342,24 @@ bytea_substring(Datum str, */ L1 = -1; } - else + else if (L < 0) + { + /* SQL99 says to throw an error for E < S, i.e., negative length */ + ereport(ERROR, + (errcode(ERRCODE_SUBSTRING_ERROR), + errmsg("negative substring length not allowed"))); + L1 = -1; /* silence stupider compilers */ + } + else if (pg_add_s32_overflow(S, L, &E)) { - /* end position */ - int E = S + L; - /* - * A negative value for L is the only way for the end position to be - * before the start. SQL99 says to throw an error. + * L could be large enough for S + L to overflow, in which case the + * substring must run to end of string. */ - if (E < S) - ereport(ERROR, - (errcode(ERRCODE_SUBSTRING_ERROR), - errmsg("negative substring length not allowed"))); - + L1 = -1; + } + else + { /* * A zero or negative value for the end position can happen if the * start was negative or one. SQL99 says to return a zero-length @@ -3350,7 +3374,7 @@ bytea_substring(Datum str, /* * If the start position is past the end of the string, SQL99 says to * return a zero-length string -- DatumGetByteaPSlice() will do that for - * us. Convert to zero-based starting position + * us. We need only convert S1 to zero-based starting position. */ return DatumGetByteaPSlice(str, S1 - 1, L1); } diff --git a/src/backend/utils/adt/version.c b/src/backend/utils/adt/version.c index 37915271c7f2d..7e704987da0b1 100644 --- a/src/backend/utils/adt/version.c +++ b/src/backend/utils/adt/version.c @@ -3,7 +3,7 @@ * version.c * Returns the PostgreSQL version string * - * Copyright (c) 1998-2020, PostgreSQL Global Development Group + * Copyright (c) 1998-2021, PostgreSQL Global Development Group * * IDENTIFICATION * diff --git a/src/backend/utils/adt/windowfuncs.c b/src/backend/utils/adt/windowfuncs.c index f0c8ae686dd45..9c127617d1e1e 100644 --- a/src/backend/utils/adt/windowfuncs.c +++ b/src/backend/utils/adt/windowfuncs.c @@ -3,7 +3,7 @@ * windowfuncs.c * Standard window functions defined in SQL spec. * - * Portions Copyright (c) 2000-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 2000-2021, PostgreSQL Global Development Group * * * IDENTIFICATION diff --git a/src/backend/utils/adt/xid.c b/src/backend/utils/adt/xid.c index a4762014ba1fa..24c1c9373265d 100644 --- a/src/backend/utils/adt/xid.c +++ b/src/backend/utils/adt/xid.c @@ -3,7 +3,7 @@ * xid.c * POSTGRES transaction identifier and command identifier datatypes. * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * diff --git a/src/backend/utils/adt/xid8funcs.c b/src/backend/utils/adt/xid8funcs.c index c4401f4adf720..cc2b4ac7979a8 100644 --- a/src/backend/utils/adt/xid8funcs.c +++ b/src/backend/utils/adt/xid8funcs.c @@ -15,7 +15,7 @@ * users. The txid_XXX variants should eventually be dropped. * * - * Copyright (c) 2003-2020, PostgreSQL Global Development Group + * Copyright (c) 2003-2021, PostgreSQL Global Development Group * Author: Jan Wieck, Afilias USA INC. * 64-bit txids: Marko Kreen, Skype Technologies * diff --git a/src/backend/utils/adt/xml.c b/src/backend/utils/adt/xml.c index 4c299057a6fb2..7350940b66dcd 100644 --- a/src/backend/utils/adt/xml.c +++ b/src/backend/utils/adt/xml.c @@ -4,7 +4,7 @@ * XML data type support. * * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * src/backend/utils/adt/xml.c @@ -4534,13 +4534,7 @@ XmlTableFetchRow(TableFuncScanState *state) xtCxt = GetXmlTableBuilderPrivateData(state, "XmlTableFetchRow"); - /* - * XmlTable returns table - set of composite values. The error context, is - * used for producement more values, between two calls, there can be - * created and used another libxml2 error context. It is libxml2 global - * value, so it should be refreshed any time before any libxml2 usage, - * that is finished by returning some value. - */ + /* Propagate our own error context to libxml2 */ xmlSetStructuredErrorFunc((void *) xtCxt->xmlerrcxt, xml_errorHandler); if (xtCxt->xpathobj == NULL) @@ -4594,7 +4588,7 @@ XmlTableGetValue(TableFuncScanState *state, int colnum, xtCxt->xpathobj->type == XPATH_NODESET && xtCxt->xpathobj->nodesetval != NULL); - /* Propagate context related error context to libxml2 */ + /* Propagate our own error context to libxml2 */ xmlSetStructuredErrorFunc((void *) xtCxt->xmlerrcxt, xml_errorHandler); *isnull = false; @@ -4737,7 +4731,7 @@ XmlTableDestroyOpaque(TableFuncScanState *state) xtCxt = GetXmlTableBuilderPrivateData(state, "XmlTableDestroyOpaque"); - /* Propagate context related error context to libxml2 */ + /* Propagate our own error context to libxml2 */ xmlSetStructuredErrorFunc((void *) xtCxt->xmlerrcxt, xml_errorHandler); if (xtCxt->xpathscomp != NULL) diff --git a/src/backend/utils/cache/attoptcache.c b/src/backend/utils/cache/attoptcache.c index 934a84e03f1d0..72d89cb64164a 100644 --- a/src/backend/utils/cache/attoptcache.c +++ b/src/backend/utils/cache/attoptcache.c @@ -6,7 +6,7 @@ * Attribute options are cached separately from the fixed-size portion of * pg_attribute entries, which are handled by the relcache. * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION diff --git a/src/backend/utils/cache/catcache.c b/src/backend/utils/cache/catcache.c index 3613ae5f44d3d..fa2b49c676e6b 100644 --- a/src/backend/utils/cache/catcache.c +++ b/src/backend/utils/cache/catcache.c @@ -3,7 +3,7 @@ * catcache.c * System catalog cache for tuples matching a key. * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * @@ -1128,7 +1128,7 @@ IndexScanOK(CatCache *cache, ScanKey cur_skey) } /* - * SearchCatCacheInternal + * SearchCatCache * * This call searches a system cache for a tuple, opening the relation * if necessary (on the first access to a particular cache). diff --git a/src/backend/utils/cache/evtcache.c b/src/backend/utils/cache/evtcache.c index 0877bc7e0e016..460b720a65121 100644 --- a/src/backend/utils/cache/evtcache.c +++ b/src/backend/utils/cache/evtcache.c @@ -3,7 +3,7 @@ * evtcache.c * Special-purpose cache for event trigger data. * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION diff --git a/src/backend/utils/cache/inval.c b/src/backend/utils/cache/inval.c index e7279d06a3ddd..f54dc12b718a2 100644 --- a/src/backend/utils/cache/inval.c +++ b/src/backend/utils/cache/inval.c @@ -89,7 +89,7 @@ * support the decoding of the in-progress transactions. See * CommandEndInvalidationMessages. * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION @@ -109,6 +109,7 @@ #include "storage/sinval.h" #include "storage/smgr.h" #include "utils/catcache.h" +#include "utils/guc.h" #include "utils/inval.h" #include "utils/memdebug.h" #include "utils/memutils.h" @@ -179,6 +180,8 @@ static SharedInvalidationMessage *SharedInvalidMessagesArray; static int numSharedInvalidMessagesArray; static int maxSharedInvalidMessagesArray; +/* GUC storage */ +int debug_invalidate_system_caches_always = 0; /* * Dynamically-registered callback functions. Current implementation @@ -689,35 +692,32 @@ AcceptInvalidationMessages(void) /* * Test code to force cache flushes anytime a flush could happen. * - * If used with CLOBBER_FREED_MEMORY, CLOBBER_CACHE_ALWAYS provides a - * fairly thorough test that the system contains no cache-flush hazards. - * However, it also makes the system unbelievably slow --- the regression - * tests take about 100 times longer than normal. + * This helps detect intermittent faults caused by code that reads a + * cache entry and then performs an action that could invalidate the entry, + * but rarely actually does so. This can spot issues that would otherwise + * only arise with badly timed concurrent DDL, for example. * - * If you're a glutton for punishment, try CLOBBER_CACHE_RECURSIVELY. This - * slows things by at least a factor of 10000, so I wouldn't suggest - * trying to run the entire regression tests that way. It's useful to try - * a few simple tests, to make sure that cache reload isn't subject to - * internal cache-flush hazards, but after you've done a few thousand - * recursive reloads it's unlikely you'll learn more. + * The default debug_invalidate_system_caches_always = 0 does no forced cache flushes. + * + * If used with CLOBBER_FREED_MEMORY, debug_invalidate_system_caches_always = 1 + * (CLOBBER_CACHE_ALWAYS) provides a fairly thorough test that the system + * contains no cache-flush hazards. However, it also makes the system + * unbelievably slow --- the regression tests take about 100 times longer + * than normal. + * + * If you're a glutton for punishment, try debug_invalidate_system_caches_always = 3 + * (CLOBBER_CACHE_RECURSIVELY). This slows things by at least a factor + * of 10000, so I wouldn't suggest trying to run the entire regression + * tests that way. It's useful to try a few simple tests, to make sure + * that cache reload isn't subject to internal cache-flush hazards, but + * after you've done a few thousand recursive reloads it's unlikely + * you'll learn more. */ -#if defined(CLOBBER_CACHE_ALWAYS) - { - static bool in_recursion = false; - - if (!in_recursion) - { - in_recursion = true; - InvalidateSystemCaches(); - in_recursion = false; - } - } -#elif defined(CLOBBER_CACHE_RECURSIVELY) +#ifdef CLOBBER_CACHE_ENABLED { static int recursion_depth = 0; - /* Maximum depth is arbitrary depending on your threshold of pain */ - if (recursion_depth < 3) + if (recursion_depth < debug_invalidate_system_caches_always) { recursion_depth++; InvalidateSystemCaches(); diff --git a/src/backend/utils/cache/lsyscache.c b/src/backend/utils/cache/lsyscache.c index ad92636f7f19d..85c458bc46ee4 100644 --- a/src/backend/utils/cache/lsyscache.c +++ b/src/backend/utils/cache/lsyscache.c @@ -3,7 +3,7 @@ * lsyscache.c * Convenience routines for common queries in the system catalog cache. * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION diff --git a/src/backend/utils/cache/partcache.c b/src/backend/utils/cache/partcache.c index acf8a44f30fc6..a6388d980ed06 100644 --- a/src/backend/utils/cache/partcache.c +++ b/src/backend/utils/cache/partcache.c @@ -4,7 +4,7 @@ * Support routines for manipulating partition information cached in * relcache * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION diff --git a/src/backend/utils/cache/plancache.c b/src/backend/utils/cache/plancache.c index 50d6ad28b4cdb..1a0950489d741 100644 --- a/src/backend/utils/cache/plancache.c +++ b/src/backend/utils/cache/plancache.c @@ -44,7 +44,7 @@ * if the old one gets invalidated. * * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION @@ -533,7 +533,7 @@ ReleaseGenericPlan(CachedPlanSource *plansource) Assert(plan->magic == CACHEDPLAN_MAGIC); plansource->gplan = NULL; - ReleaseCachedPlan(plan, false); + ReleaseCachedPlan(plan, NULL); } } @@ -897,7 +897,7 @@ BuildCachedPlan(CachedPlanSource *plansource, List *qlist, * rejected a generic plan, it's possible to reach here with is_valid * false due to an invalidation while making the generic plan. In theory * the invalidation must be a false positive, perhaps a consequence of an - * sinval reset event or the CLOBBER_CACHE_ALWAYS debug code. But for + * sinval reset event or the debug_invalidate_system_caches_always code. But for * safety, let's treat it as real and redo the RevalidateCachedQuery call. */ if (!plansource->is_valid) @@ -1130,16 +1130,16 @@ cached_plan_cost(CachedPlan *plan, bool include_planner) * execution. * * On return, the refcount of the plan has been incremented; a later - * ReleaseCachedPlan() call is expected. The refcount has been reported - * to the CurrentResourceOwner if useResOwner is true (note that that must - * only be true if it's a "saved" CachedPlanSource). + * ReleaseCachedPlan() call is expected. If "owner" is not NULL then + * the refcount has been reported to that ResourceOwner (note that this + * is only supported for "saved" CachedPlanSources). * * Note: if any replanning activity is required, the caller's memory context * is used for that work. */ CachedPlan * GetCachedPlan(CachedPlanSource *plansource, ParamListInfo boundParams, - bool useResOwner, QueryEnvironment *queryEnv) + ResourceOwner owner, QueryEnvironment *queryEnv) { CachedPlan *plan = NULL; List *qlist; @@ -1149,7 +1149,7 @@ GetCachedPlan(CachedPlanSource *plansource, ParamListInfo boundParams, Assert(plansource->magic == CACHEDPLANSOURCE_MAGIC); Assert(plansource->is_complete); /* This seems worth a real test, though */ - if (useResOwner && !plansource->is_saved) + if (owner && !plansource->is_saved) elog(ERROR, "cannot apply ResourceOwner to non-saved cached plan"); /* Make sure the querytree list is valid and we have parse-time locks */ @@ -1228,11 +1228,11 @@ GetCachedPlan(CachedPlanSource *plansource, ParamListInfo boundParams, Assert(plan != NULL); /* Flag the plan as in use by caller */ - if (useResOwner) - ResourceOwnerEnlargePlanCacheRefs(CurrentResourceOwner); + if (owner) + ResourceOwnerEnlargePlanCacheRefs(owner); plan->refcount++; - if (useResOwner) - ResourceOwnerRememberPlanCacheRef(CurrentResourceOwner, plan); + if (owner) + ResourceOwnerRememberPlanCacheRef(owner, plan); /* * Saved plans should be under CacheMemoryContext so they will not go away @@ -1253,21 +1253,21 @@ GetCachedPlan(CachedPlanSource *plansource, ParamListInfo boundParams, * ReleaseCachedPlan: release active use of a cached plan. * * This decrements the reference count, and frees the plan if the count - * has thereby gone to zero. If useResOwner is true, it is assumed that - * the reference count is managed by the CurrentResourceOwner. + * has thereby gone to zero. If "owner" is not NULL, it is assumed that + * the reference count is managed by that ResourceOwner. * - * Note: useResOwner = false is used for releasing references that are in + * Note: owner == NULL is used for releasing references that are in * persistent data structures, such as the parent CachedPlanSource or a * Portal. Transient references should be protected by a resource owner. */ void -ReleaseCachedPlan(CachedPlan *plan, bool useResOwner) +ReleaseCachedPlan(CachedPlan *plan, ResourceOwner owner) { Assert(plan->magic == CACHEDPLAN_MAGIC); - if (useResOwner) + if (owner) { Assert(plan->is_saved); - ResourceOwnerForgetPlanCacheRef(CurrentResourceOwner, plan); + ResourceOwnerForgetPlanCacheRef(owner, plan); } Assert(plan->refcount > 0); plan->refcount--; diff --git a/src/backend/utils/cache/relcache.c b/src/backend/utils/cache/relcache.c index 3bd5e18042522..7ef510cd01b70 100644 --- a/src/backend/utils/cache/relcache.c +++ b/src/backend/utils/cache/relcache.c @@ -3,7 +3,7 @@ * relcache.c * POSTGRES relation descriptor cache code * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * @@ -91,15 +91,15 @@ #define RELCACHE_INIT_FILEMAGIC 0x573266 /* version ID value */ /* - * Default policy for whether to apply RECOVER_RELATION_BUILD_MEMORY: - * do so in clobber-cache builds but not otherwise. This choice can be - * overridden at compile time with -DRECOVER_RELATION_BUILD_MEMORY=1 or =0. + * Whether to bother checking if relation cache memory needs to be freed + * eagerly. See also RelationBuildDesc() and pg_config_manual.h. */ -#ifndef RECOVER_RELATION_BUILD_MEMORY -#if defined(CLOBBER_CACHE_ALWAYS) || defined(CLOBBER_CACHE_RECURSIVELY) -#define RECOVER_RELATION_BUILD_MEMORY 1 +#if defined(RECOVER_RELATION_BUILD_MEMORY) && (RECOVER_RELATION_BUILD_MEMORY != 0) +#define MAYBE_RECOVER_RELATION_BUILD_MEMORY 1 #else #define RECOVER_RELATION_BUILD_MEMORY 0 +#ifdef CLOBBER_CACHE_ENABLED +#define MAYBE_RECOVER_RELATION_BUILD_MEMORY 1 #endif #endif @@ -1040,19 +1040,25 @@ RelationBuildDesc(Oid targetRelId, bool insertIt) * scope, and relcache loads shouldn't happen so often that it's essential * to recover transient data before end of statement/transaction. However * that's definitely not true in clobber-cache test builds, and perhaps - * it's not true in other cases. If RECOVER_RELATION_BUILD_MEMORY is not - * zero, arrange to allocate the junk in a temporary context that we'll - * free before returning. Make it a child of caller's context so that it - * will get cleaned up appropriately if we error out partway through. + * it's not true in other cases. + * + * When cache clobbering is enabled or when forced to by + * RECOVER_RELATION_BUILD_MEMORY=1, arrange to allocate the junk in a + * temporary context that we'll free before returning. Make it a child + * of caller's context so that it will get cleaned up appropriately if + * we error out partway through. */ -#if RECOVER_RELATION_BUILD_MEMORY - MemoryContext tmpcxt; - MemoryContext oldcxt; +#ifdef MAYBE_RECOVER_RELATION_BUILD_MEMORY + MemoryContext tmpcxt = NULL; + MemoryContext oldcxt = NULL; - tmpcxt = AllocSetContextCreate(CurrentMemoryContext, - "RelationBuildDesc workspace", - ALLOCSET_DEFAULT_SIZES); - oldcxt = MemoryContextSwitchTo(tmpcxt); + if (RECOVER_RELATION_BUILD_MEMORY || debug_invalidate_system_caches_always > 0) + { + tmpcxt = AllocSetContextCreate(CurrentMemoryContext, + "RelationBuildDesc workspace", + ALLOCSET_DEFAULT_SIZES); + oldcxt = MemoryContextSwitchTo(tmpcxt); + } #endif /* @@ -1065,10 +1071,13 @@ RelationBuildDesc(Oid targetRelId, bool insertIt) */ if (!HeapTupleIsValid(pg_class_tuple)) { -#if RECOVER_RELATION_BUILD_MEMORY - /* Return to caller's context, and blow away the temporary context */ - MemoryContextSwitchTo(oldcxt); - MemoryContextDelete(tmpcxt); +#ifdef MAYBE_RECOVER_RELATION_BUILD_MEMORY + if (tmpcxt) + { + /* Return to caller's context, and blow away the temporary context */ + MemoryContextSwitchTo(oldcxt); + MemoryContextDelete(tmpcxt); + } #endif return NULL; } @@ -1247,10 +1256,13 @@ RelationBuildDesc(Oid targetRelId, bool insertIt) /* It's fully valid */ relation->rd_isvalid = true; -#if RECOVER_RELATION_BUILD_MEMORY - /* Return to caller's context, and blow away the temporary context */ - MemoryContextSwitchTo(oldcxt); - MemoryContextDelete(tmpcxt); +#ifdef MAYBE_RECOVER_RELATION_BUILD_MEMORY + if (tmpcxt) + { + /* Return to caller's context, and blow away the temporary context */ + MemoryContextSwitchTo(oldcxt); + MemoryContextDelete(tmpcxt); + } #endif return relation; @@ -1646,8 +1658,9 @@ LookupOpclassInfo(Oid operatorClassOid, * while we are loading the info, and it's very hard to provoke that if * this happens only once per opclass per backend. */ -#if defined(CLOBBER_CACHE_ALWAYS) - opcentry->valid = false; +#ifdef CLOBBER_CACHE_ENABLED + if (debug_invalidate_system_caches_always > 0) + opcentry->valid = false; #endif if (opcentry->valid) diff --git a/src/backend/utils/cache/relfilenodemap.c b/src/backend/utils/cache/relfilenodemap.c index 38e6379974c05..56d7c73d3398f 100644 --- a/src/backend/utils/cache/relfilenodemap.c +++ b/src/backend/utils/cache/relfilenodemap.c @@ -3,7 +3,7 @@ * relfilenodemap.c * relfilenode to oid mapping cache. * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION diff --git a/src/backend/utils/cache/relmapper.c b/src/backend/utils/cache/relmapper.c index 73e45eb4846ce..424624cf0dad2 100644 --- a/src/backend/utils/cache/relmapper.c +++ b/src/backend/utils/cache/relmapper.c @@ -28,7 +28,7 @@ * all these files commit in a single map file update rather than being tied * to transaction commit. * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * diff --git a/src/backend/utils/cache/spccache.c b/src/backend/utils/cache/spccache.c index c8387e2541359..5870f436df828 100644 --- a/src/backend/utils/cache/spccache.c +++ b/src/backend/utils/cache/spccache.c @@ -8,7 +8,7 @@ * be a measurable performance gain from doing this, but that might change * in the future as we add more options. * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION diff --git a/src/backend/utils/cache/syscache.c b/src/backend/utils/cache/syscache.c index 89f08a4f43ca4..e4dc4ee34eebf 100644 --- a/src/backend/utils/cache/syscache.c +++ b/src/backend/utils/cache/syscache.c @@ -3,7 +3,7 @@ * syscache.c * System cache management routines * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * diff --git a/src/backend/utils/cache/ts_cache.c b/src/backend/utils/cache/ts_cache.c index a2867fac7de0d..384107b6bac39 100644 --- a/src/backend/utils/cache/ts_cache.c +++ b/src/backend/utils/cache/ts_cache.c @@ -17,7 +17,7 @@ * any database access. * * - * Copyright (c) 2006-2020, PostgreSQL Global Development Group + * Copyright (c) 2006-2021, PostgreSQL Global Development Group * * IDENTIFICATION * src/backend/utils/cache/ts_cache.c diff --git a/src/backend/utils/cache/typcache.c b/src/backend/utils/cache/typcache.c index 8c97ef3955772..4915ef59349be 100644 --- a/src/backend/utils/cache/typcache.c +++ b/src/backend/utils/cache/typcache.c @@ -31,7 +31,7 @@ * constraint changes are also tracked properly. * * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION diff --git a/src/backend/utils/errcodes.txt b/src/backend/utils/errcodes.txt index c79312ed03974..9874a77805934 100644 --- a/src/backend/utils/errcodes.txt +++ b/src/backend/utils/errcodes.txt @@ -2,7 +2,7 @@ # errcodes.txt # PostgreSQL error codes # -# Copyright (c) 2003-2020, PostgreSQL Global Development Group +# Copyright (c) 2003-2021, PostgreSQL Global Development Group # # This list serves as the basis for generating source files containing error # codes. It is kept in a common format to make sure all these source files have @@ -428,6 +428,7 @@ Section: Class 57 - Operator Intervention 57P02 E ERRCODE_CRASH_SHUTDOWN crash_shutdown 57P03 E ERRCODE_CANNOT_CONNECT_NOW cannot_connect_now 57P04 E ERRCODE_DATABASE_DROPPED database_dropped +57P05 E ERRCODE_IDLE_SESSION_TIMEOUT idle_session_timeout Section: Class 58 - System Error (errors external to PostgreSQL itself) diff --git a/src/backend/utils/error/assert.c b/src/backend/utils/error/assert.c index a8c0a8ec487c9..70a410a1910f9 100644 --- a/src/backend/utils/error/assert.c +++ b/src/backend/utils/error/assert.c @@ -3,7 +3,7 @@ * assert.c * Assert support code. * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * diff --git a/src/backend/utils/error/elog.c b/src/backend/utils/error/elog.c index 3558e660c73b2..80c26724612cb 100644 --- a/src/backend/utils/error/elog.c +++ b/src/backend/utils/error/elog.c @@ -43,7 +43,7 @@ * overflow.) * * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * @@ -72,6 +72,7 @@ #include "libpq/pqformat.h" #include "mb/pg_wchar.h" #include "miscadmin.h" +#include "pgstat.h" #include "postmaster/bgworker.h" #include "postmaster/postmaster.h" #include "postmaster/syslogger.h" @@ -202,6 +203,11 @@ is_log_level_output(int elevel, int log_min_level) if (log_min_level == LOG || log_min_level <= ERROR) return true; } + else if (elevel == WARNING_CLIENT_ONLY) + { + /* never sent to log, regardless of log_min_level */ + return false; + } else if (log_min_level == LOG) { /* elevel != LOG */ @@ -453,7 +459,7 @@ errstart(int elevel, const char *domain) /* Select default errcode based on elevel */ if (elevel >= ERROR) edata->sqlerrcode = ERRCODE_INTERNAL_ERROR; - else if (elevel == WARNING) + else if (elevel >= WARNING) edata->sqlerrcode = ERRCODE_WARNING; else edata->sqlerrcode = ERRCODE_SUCCESSFUL_COMPLETION; @@ -651,6 +657,13 @@ errfinish(const char *filename, int lineno, const char *funcname) fflush(stdout); fflush(stderr); + /* + * Let the statistics collector know. Only mark the session as + * terminated by fatal error if there is no other known cause. + */ + if (pgStatSessionEndCause == DISCONNECT_NORMAL) + pgStatSessionEndCause = DISCONNECT_FATAL; + /* * Do normal process-exit cleanup, then return exit code 1 to indicate * FATAL termination. The postmaster may or may not consider this @@ -2152,6 +2165,7 @@ write_eventlog(int level, const char *line, int len) eventlevel = EVENTLOG_INFORMATION_TYPE; break; case WARNING: + case WARNING_CLIENT_ONLY: eventlevel = EVENTLOG_WARNING_TYPE; break; case ERROR: @@ -3109,6 +3123,7 @@ send_message_to_server_log(ErrorData *edata) break; case NOTICE: case WARNING: + case WARNING_CLIENT_ONLY: syslog_level = LOG_NOTICE; break; case ERROR: @@ -3484,6 +3499,7 @@ error_severity(int elevel) prefix = gettext_noop("NOTICE"); break; case WARNING: + case WARNING_CLIENT_ONLY: prefix = gettext_noop("WARNING"); break; case ERROR: diff --git a/src/backend/utils/fmgr/dfmgr.c b/src/backend/utils/fmgr/dfmgr.c index adb31e109f721..e8c6cdde9728b 100644 --- a/src/backend/utils/fmgr/dfmgr.c +++ b/src/backend/utils/fmgr/dfmgr.c @@ -3,7 +3,7 @@ * dfmgr.c * Dynamic function manager code. * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * diff --git a/src/backend/utils/fmgr/fmgr.c b/src/backend/utils/fmgr/fmgr.c index fa5f7ac615847..b6835c2c4c1ef 100644 --- a/src/backend/utils/fmgr/fmgr.c +++ b/src/backend/utils/fmgr/fmgr.c @@ -3,7 +3,7 @@ * fmgr.c * The Postgres function manager. * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * diff --git a/src/backend/utils/fmgr/funcapi.c b/src/backend/utils/fmgr/funcapi.c index f6fa4ab2fb26e..717b62907c78e 100644 --- a/src/backend/utils/fmgr/funcapi.c +++ b/src/backend/utils/fmgr/funcapi.c @@ -4,7 +4,7 @@ * Utility and convenience functions for fmgr functions that return * sets and/or composite types, or deal with VARIADIC inputs. * - * Copyright (c) 2002-2020, PostgreSQL Global Development Group + * Copyright (c) 2002-2021, PostgreSQL Global Development Group * * IDENTIFICATION * src/backend/utils/fmgr/funcapi.c @@ -1357,7 +1357,9 @@ get_func_arg_info(HeapTuple procTup, /* * get_func_trftypes * - * Returns the number of transformed types used by function. + * Returns the number of transformed types used by the function. + * If there are any, a palloc'd array of the type OIDs is returned + * into *p_trftypes. */ int get_func_trftypes(HeapTuple procTup, @@ -1386,7 +1388,6 @@ get_func_trftypes(HeapTuple procTup, ARR_HASNULL(arr) || ARR_ELEMTYPE(arr) != OIDOID) elog(ERROR, "protrftypes is not a 1-D Oid array or it contains nulls"); - Assert(nelems >= ((Form_pg_proc) GETSTRUCT(procTup))->pronargs); *p_trftypes = (Oid *) palloc(nelems * sizeof(Oid)); memcpy(*p_trftypes, ARR_DATA_PTR(arr), nelems * sizeof(Oid)); diff --git a/src/backend/utils/generate-errcodes.pl b/src/backend/utils/generate-errcodes.pl index 1a071fbb1f43c..c5cdd388138da 100644 --- a/src/backend/utils/generate-errcodes.pl +++ b/src/backend/utils/generate-errcodes.pl @@ -1,7 +1,7 @@ #!/usr/bin/perl # # Generate the errcodes.h header from errcodes.txt -# Copyright (c) 2000-2020, PostgreSQL Global Development Group +# Copyright (c) 2000-2021, PostgreSQL Global Development Group use strict; use warnings; diff --git a/src/backend/utils/hash/dynahash.c b/src/backend/utils/hash/dynahash.c index ce93ca642fad9..6546e3c7c79fe 100644 --- a/src/backend/utils/hash/dynahash.c +++ b/src/backend/utils/hash/dynahash.c @@ -52,7 +52,7 @@ * dynahash has better performance for large entries. * - Guarantees stable pointers to entries. * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * diff --git a/src/backend/utils/hash/pg_crc.c b/src/backend/utils/hash/pg_crc.c index 41e9597fb0091..77e3f6e655316 100644 --- a/src/backend/utils/hash/pg_crc.c +++ b/src/backend/utils/hash/pg_crc.c @@ -7,7 +7,7 @@ * A PAINLESS GUIDE TO CRC ERROR DETECTION ALGORITHMS, available from * http://ross.net/crc/download/crc_v3.txt or several other net sites. * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * diff --git a/src/backend/utils/init/globals.c b/src/backend/utils/init/globals.c index 6ab8216839891..a5976ad5b11a8 100644 --- a/src/backend/utils/init/globals.c +++ b/src/backend/utils/init/globals.c @@ -3,7 +3,7 @@ * globals.c * global variable declarations * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * @@ -32,6 +32,7 @@ volatile sig_atomic_t QueryCancelPending = false; volatile sig_atomic_t ProcDiePending = false; volatile sig_atomic_t ClientConnectionLost = false; volatile sig_atomic_t IdleInTransactionSessionTimeoutPending = false; +volatile sig_atomic_t IdleSessionTimeoutPending = false; volatile sig_atomic_t ProcSignalBarrierPending = false; volatile uint32 InterruptHoldoffCount = 0; volatile uint32 QueryCancelHoldoffCount = 0; @@ -136,7 +137,7 @@ int max_parallel_workers = 8; int MaxBackends = 0; int VacuumCostPageHit = 1; /* GUC parameters for vacuum */ -int VacuumCostPageMiss = 10; +int VacuumCostPageMiss = 2; int VacuumCostPageDirty = 20; int VacuumCostLimit = 200; double VacuumCostDelay = 0; diff --git a/src/backend/utils/init/miscinit.c b/src/backend/utils/init/miscinit.c index ed2ab4b5b29a8..0f67b99cc5534 100644 --- a/src/backend/utils/init/miscinit.c +++ b/src/backend/utils/init/miscinit.c @@ -3,7 +3,7 @@ * miscinit.c * miscellaneous initialization support stuff * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * diff --git a/src/backend/utils/init/postinit.c b/src/backend/utils/init/postinit.c index 9723e457ce920..e5965bc517dd9 100644 --- a/src/backend/utils/init/postinit.c +++ b/src/backend/utils/init/postinit.c @@ -3,7 +3,7 @@ * postinit.c * postgres initialization utilities * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * @@ -72,6 +72,7 @@ static void ShutdownPostgres(int code, Datum arg); static void StatementTimeoutHandler(void); static void LockTimeoutHandler(void); static void IdleInTransactionSessionTimeoutHandler(void); +static void IdleSessionTimeoutHandler(void); static bool ThereIsAtLeastOneRole(void); static void process_startup_options(Port *port, bool am_superuser); static void process_settings(Oid databaseid, Oid roleid); @@ -619,6 +620,7 @@ InitPostgres(const char *in_dbname, Oid dboid, const char *username, RegisterTimeout(LOCK_TIMEOUT, LockTimeoutHandler); RegisterTimeout(IDLE_IN_TRANSACTION_SESSION_TIMEOUT, IdleInTransactionSessionTimeoutHandler); + RegisterTimeout(IDLE_SESSION_TIMEOUT, IdleSessionTimeoutHandler); } /* @@ -1233,6 +1235,14 @@ IdleInTransactionSessionTimeoutHandler(void) SetLatch(MyLatch); } +static void +IdleSessionTimeoutHandler(void) +{ + IdleSessionTimeoutPending = true; + InterruptPending = true; + SetLatch(MyLatch); +} + /* * Returns true if at least one role is defined in this database cluster. */ diff --git a/src/backend/utils/mb/Unicode/Makefile b/src/backend/utils/mb/Unicode/Makefile index da307d8eb95c9..ed6fc07e08802 100644 --- a/src/backend/utils/mb/Unicode/Makefile +++ b/src/backend/utils/mb/Unicode/Makefile @@ -2,7 +2,7 @@ # # Makefile for src/backend/utils/mb/Unicode # -# Copyright (c) 2001-2020, PostgreSQL Global Development Group +# Copyright (c) 2001-2021, PostgreSQL Global Development Group # # src/backend/utils/mb/Unicode/Makefile # diff --git a/src/backend/utils/mb/Unicode/UCS_to_BIG5.pl b/src/backend/utils/mb/Unicode/UCS_to_BIG5.pl index 84c9c5354130a..67b6b432113f4 100755 --- a/src/backend/utils/mb/Unicode/UCS_to_BIG5.pl +++ b/src/backend/utils/mb/Unicode/UCS_to_BIG5.pl @@ -1,6 +1,6 @@ #! /usr/bin/perl # -# Copyright (c) 2001-2020, PostgreSQL Global Development Group +# Copyright (c) 2001-2021, PostgreSQL Global Development Group # # src/backend/utils/mb/Unicode/UCS_to_BIG5.pl # diff --git a/src/backend/utils/mb/Unicode/UCS_to_EUC_CN.pl b/src/backend/utils/mb/Unicode/UCS_to_EUC_CN.pl index 1596b64238f12..88c561b32d0e1 100755 --- a/src/backend/utils/mb/Unicode/UCS_to_EUC_CN.pl +++ b/src/backend/utils/mb/Unicode/UCS_to_EUC_CN.pl @@ -1,6 +1,6 @@ #! /usr/bin/perl # -# Copyright (c) 2007-2020, PostgreSQL Global Development Group +# Copyright (c) 2007-2021, PostgreSQL Global Development Group # # src/backend/utils/mb/Unicode/UCS_to_GB18030.pl # diff --git a/src/backend/utils/mb/Unicode/UCS_to_EUC_JIS_2004.pl b/src/backend/utils/mb/Unicode/UCS_to_EUC_JIS_2004.pl index 6d1681a18a356..ea558dba68b1b 100755 --- a/src/backend/utils/mb/Unicode/UCS_to_EUC_JIS_2004.pl +++ b/src/backend/utils/mb/Unicode/UCS_to_EUC_JIS_2004.pl @@ -1,6 +1,6 @@ #! /usr/bin/perl # -# Copyright (c) 2007-2020, PostgreSQL Global Development Group +# Copyright (c) 2007-2021, PostgreSQL Global Development Group # # src/backend/utils/mb/Unicode/UCS_to_EUC_JIS_2004.pl # diff --git a/src/backend/utils/mb/Unicode/UCS_to_EUC_JP.pl b/src/backend/utils/mb/Unicode/UCS_to_EUC_JP.pl index 3414796ffd356..bd50f63dbaf08 100755 --- a/src/backend/utils/mb/Unicode/UCS_to_EUC_JP.pl +++ b/src/backend/utils/mb/Unicode/UCS_to_EUC_JP.pl @@ -1,6 +1,6 @@ #! /usr/bin/perl # -# Copyright (c) 2001-2020, PostgreSQL Global Development Group +# Copyright (c) 2001-2021, PostgreSQL Global Development Group # # src/backend/utils/mb/Unicode/UCS_to_EUC_JP.pl # diff --git a/src/backend/utils/mb/Unicode/UCS_to_EUC_KR.pl b/src/backend/utils/mb/Unicode/UCS_to_EUC_KR.pl index b560f9f37eaf3..a037493fd16e5 100755 --- a/src/backend/utils/mb/Unicode/UCS_to_EUC_KR.pl +++ b/src/backend/utils/mb/Unicode/UCS_to_EUC_KR.pl @@ -1,6 +1,6 @@ #! /usr/bin/perl # -# Copyright (c) 2001-2020, PostgreSQL Global Development Group +# Copyright (c) 2001-2021, PostgreSQL Global Development Group # # src/backend/utils/mb/Unicode/UCS_to_EUC_KR.pl # diff --git a/src/backend/utils/mb/Unicode/UCS_to_EUC_TW.pl b/src/backend/utils/mb/Unicode/UCS_to_EUC_TW.pl index 0f52183ff5fa5..7f49be8ad1d2e 100755 --- a/src/backend/utils/mb/Unicode/UCS_to_EUC_TW.pl +++ b/src/backend/utils/mb/Unicode/UCS_to_EUC_TW.pl @@ -1,6 +1,6 @@ #! /usr/bin/perl # -# Copyright (c) 2001-2020, PostgreSQL Global Development Group +# Copyright (c) 2001-2021, PostgreSQL Global Development Group # # src/backend/utils/mb/Unicode/UCS_to_EUC_TW.pl # diff --git a/src/backend/utils/mb/Unicode/UCS_to_GB18030.pl b/src/backend/utils/mb/Unicode/UCS_to_GB18030.pl index 57e63b4004a2c..61c47970fc688 100755 --- a/src/backend/utils/mb/Unicode/UCS_to_GB18030.pl +++ b/src/backend/utils/mb/Unicode/UCS_to_GB18030.pl @@ -1,6 +1,6 @@ #! /usr/bin/perl # -# Copyright (c) 2007-2020, PostgreSQL Global Development Group +# Copyright (c) 2007-2021, PostgreSQL Global Development Group # # src/backend/utils/mb/Unicode/UCS_to_GB18030.pl # diff --git a/src/backend/utils/mb/Unicode/UCS_to_JOHAB.pl b/src/backend/utils/mb/Unicode/UCS_to_JOHAB.pl index 0bcea9e0d4f30..0f4bfe8af8993 100755 --- a/src/backend/utils/mb/Unicode/UCS_to_JOHAB.pl +++ b/src/backend/utils/mb/Unicode/UCS_to_JOHAB.pl @@ -1,6 +1,6 @@ #! /usr/bin/perl # -# Copyright (c) 2001-2020, PostgreSQL Global Development Group +# Copyright (c) 2001-2021, PostgreSQL Global Development Group # # src/backend/utils/mb/Unicode/UCS_to_JOHAB.pl # diff --git a/src/backend/utils/mb/Unicode/UCS_to_SHIFT_JIS_2004.pl b/src/backend/utils/mb/Unicode/UCS_to_SHIFT_JIS_2004.pl index b86714dd46dff..710d5ce3c880f 100755 --- a/src/backend/utils/mb/Unicode/UCS_to_SHIFT_JIS_2004.pl +++ b/src/backend/utils/mb/Unicode/UCS_to_SHIFT_JIS_2004.pl @@ -1,6 +1,6 @@ #! /usr/bin/perl # -# Copyright (c) 2007-2020, PostgreSQL Global Development Group +# Copyright (c) 2007-2021, PostgreSQL Global Development Group # # src/backend/utils/mb/Unicode/UCS_to_SHIFT_JIS_2004.pl # diff --git a/src/backend/utils/mb/Unicode/UCS_to_SJIS.pl b/src/backend/utils/mb/Unicode/UCS_to_SJIS.pl index 5f4512ec87ed5..bb1f51c044867 100755 --- a/src/backend/utils/mb/Unicode/UCS_to_SJIS.pl +++ b/src/backend/utils/mb/Unicode/UCS_to_SJIS.pl @@ -1,6 +1,6 @@ #! /usr/bin/perl # -# Copyright (c) 2001-2020, PostgreSQL Global Development Group +# Copyright (c) 2001-2021, PostgreSQL Global Development Group # # src/backend/utils/mb/Unicode/UCS_to_SJIS.pl # diff --git a/src/backend/utils/mb/Unicode/UCS_to_UHC.pl b/src/backend/utils/mb/Unicode/UCS_to_UHC.pl index 3282106d7f07b..cc416bd4bfbec 100755 --- a/src/backend/utils/mb/Unicode/UCS_to_UHC.pl +++ b/src/backend/utils/mb/Unicode/UCS_to_UHC.pl @@ -1,6 +1,6 @@ #! /usr/bin/perl # -# Copyright (c) 2007-2020, PostgreSQL Global Development Group +# Copyright (c) 2007-2021, PostgreSQL Global Development Group # # src/backend/utils/mb/Unicode/UCS_to_GB18030.pl # diff --git a/src/backend/utils/mb/Unicode/UCS_to_most.pl b/src/backend/utils/mb/Unicode/UCS_to_most.pl index 8a7b26a5c5f3c..4f974388d75fb 100755 --- a/src/backend/utils/mb/Unicode/UCS_to_most.pl +++ b/src/backend/utils/mb/Unicode/UCS_to_most.pl @@ -1,6 +1,6 @@ #! /usr/bin/perl # -# Copyright (c) 2001-2020, PostgreSQL Global Development Group +# Copyright (c) 2001-2021, PostgreSQL Global Development Group # # src/backend/utils/mb/Unicode/UCS_to_most.pl # diff --git a/src/backend/utils/mb/Unicode/convutils.pm b/src/backend/utils/mb/Unicode/convutils.pm index 9d97061c6fe60..adfe12b2c298e 100644 --- a/src/backend/utils/mb/Unicode/convutils.pm +++ b/src/backend/utils/mb/Unicode/convutils.pm @@ -1,5 +1,5 @@ # -# Copyright (c) 2001-2020, PostgreSQL Global Development Group +# Copyright (c) 2001-2021, PostgreSQL Global Development Group # # src/backend/utils/mb/Unicode/convutils.pm diff --git a/src/backend/utils/mb/conv.c b/src/backend/utils/mb/conv.c index 54dcf71fb7562..a07b54bd3b85c 100644 --- a/src/backend/utils/mb/conv.c +++ b/src/backend/utils/mb/conv.c @@ -2,7 +2,7 @@ * * Utility functions for conversion procs. * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION @@ -653,7 +653,7 @@ LocalToUtf(const unsigned char *iso, int len, continue; } - l = pg_encoding_verifymb(encoding, (const char *) iso, len); + l = pg_encoding_verifymbchar(encoding, (const char *) iso, len); if (l < 0) break; diff --git a/src/backend/utils/mb/conversion_procs/Makefile b/src/backend/utils/mb/conversion_procs/Makefile index e6e844af783b9..a2e935e84c451 100644 --- a/src/backend/utils/mb/conversion_procs/Makefile +++ b/src/backend/utils/mb/conversion_procs/Makefile @@ -2,7 +2,7 @@ # # Makefile for backend/utils/mb/conversion_procs # -# Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group +# Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group # Portions Copyright (c) 1994, Regents of the University of California # # src/backend/utils/mb/conversion_procs/Makefile diff --git a/src/backend/utils/mb/conversion_procs/cyrillic_and_mic/cyrillic_and_mic.c b/src/backend/utils/mb/conversion_procs/cyrillic_and_mic/cyrillic_and_mic.c index 376b48ca611cc..4c5b02654de39 100644 --- a/src/backend/utils/mb/conversion_procs/cyrillic_and_mic/cyrillic_and_mic.c +++ b/src/backend/utils/mb/conversion_procs/cyrillic_and_mic/cyrillic_and_mic.c @@ -2,7 +2,7 @@ * * Cyrillic and MULE_INTERNAL * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION diff --git a/src/backend/utils/mb/conversion_procs/euc2004_sjis2004/euc2004_sjis2004.c b/src/backend/utils/mb/conversion_procs/euc2004_sjis2004/euc2004_sjis2004.c index 9ba6bd3040523..4d7fb116cfdbf 100644 --- a/src/backend/utils/mb/conversion_procs/euc2004_sjis2004/euc2004_sjis2004.c +++ b/src/backend/utils/mb/conversion_procs/euc2004_sjis2004/euc2004_sjis2004.c @@ -2,7 +2,7 @@ * * EUC_JIS_2004, SHIFT_JIS_2004 * - * Copyright (c) 2007-2020, PostgreSQL Global Development Group + * Copyright (c) 2007-2021, PostgreSQL Global Development Group * * IDENTIFICATION * src/backend/utils/mb/conversion_procs/euc2004_sjis2004/euc2004_sjis2004.c @@ -87,7 +87,7 @@ euc_jis_20042shift_jis_2004(const unsigned char *euc, unsigned char *p, int len) continue; } - l = pg_encoding_verifymb(PG_EUC_JIS_2004, (const char *) euc, len); + l = pg_encoding_verifymbchar(PG_EUC_JIS_2004, (const char *) euc, len); if (l < 0) report_invalid_encoding(PG_EUC_JIS_2004, @@ -238,7 +238,7 @@ shift_jis_20042euc_jis_2004(const unsigned char *sjis, unsigned char *p, int len continue; } - l = pg_encoding_verifymb(PG_SHIFT_JIS_2004, (const char *) sjis, len); + l = pg_encoding_verifymbchar(PG_SHIFT_JIS_2004, (const char *) sjis, len); if (l < 0 || l > len) report_invalid_encoding(PG_SHIFT_JIS_2004, diff --git a/src/backend/utils/mb/conversion_procs/euc_cn_and_mic/euc_cn_and_mic.c b/src/backend/utils/mb/conversion_procs/euc_cn_and_mic/euc_cn_and_mic.c index 59c6c3bb12963..e9bb896935f37 100644 --- a/src/backend/utils/mb/conversion_procs/euc_cn_and_mic/euc_cn_and_mic.c +++ b/src/backend/utils/mb/conversion_procs/euc_cn_and_mic/euc_cn_and_mic.c @@ -2,7 +2,7 @@ * * EUC_CN and MULE_INTERNAL * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION diff --git a/src/backend/utils/mb/conversion_procs/euc_jp_and_sjis/euc_jp_and_sjis.c b/src/backend/utils/mb/conversion_procs/euc_jp_and_sjis/euc_jp_and_sjis.c index 4ca8e2126e4a5..5059f917a9828 100644 --- a/src/backend/utils/mb/conversion_procs/euc_jp_and_sjis/euc_jp_and_sjis.c +++ b/src/backend/utils/mb/conversion_procs/euc_jp_and_sjis/euc_jp_and_sjis.c @@ -2,7 +2,7 @@ * * EUC_JP, SJIS and MULE_INTERNAL * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION @@ -291,7 +291,7 @@ mic2sjis(const unsigned char *mic, unsigned char *p, int len) len--; continue; } - l = pg_encoding_verifymb(PG_MULE_INTERNAL, (const char *) mic, len); + l = pg_encoding_verifymbchar(PG_MULE_INTERNAL, (const char *) mic, len); if (l < 0) report_invalid_encoding(PG_MULE_INTERNAL, (const char *) mic, len); @@ -381,7 +381,7 @@ euc_jp2mic(const unsigned char *euc, unsigned char *p, int len) len--; continue; } - l = pg_encoding_verifymb(PG_EUC_JP, (const char *) euc, len); + l = pg_encoding_verifymbchar(PG_EUC_JP, (const char *) euc, len); if (l < 0) report_invalid_encoding(PG_EUC_JP, (const char *) euc, len); @@ -431,7 +431,7 @@ mic2euc_jp(const unsigned char *mic, unsigned char *p, int len) len--; continue; } - l = pg_encoding_verifymb(PG_MULE_INTERNAL, (const char *) mic, len); + l = pg_encoding_verifymbchar(PG_MULE_INTERNAL, (const char *) mic, len); if (l < 0) report_invalid_encoding(PG_MULE_INTERNAL, (const char *) mic, len); @@ -485,7 +485,7 @@ euc_jp2sjis(const unsigned char *euc, unsigned char *p, int len) len--; continue; } - l = pg_encoding_verifymb(PG_EUC_JP, (const char *) euc, len); + l = pg_encoding_verifymbchar(PG_EUC_JP, (const char *) euc, len); if (l < 0) report_invalid_encoding(PG_EUC_JP, (const char *) euc, len); @@ -580,7 +580,7 @@ sjis2euc_jp(const unsigned char *sjis, unsigned char *p, int len) len--; continue; } - l = pg_encoding_verifymb(PG_SJIS, (const char *) sjis, len); + l = pg_encoding_verifymbchar(PG_SJIS, (const char *) sjis, len); if (l < 0) report_invalid_encoding(PG_SJIS, (const char *) sjis, len); diff --git a/src/backend/utils/mb/conversion_procs/euc_kr_and_mic/euc_kr_and_mic.c b/src/backend/utils/mb/conversion_procs/euc_kr_and_mic/euc_kr_and_mic.c index 4d7876a666eeb..ac823d6c27018 100644 --- a/src/backend/utils/mb/conversion_procs/euc_kr_and_mic/euc_kr_and_mic.c +++ b/src/backend/utils/mb/conversion_procs/euc_kr_and_mic/euc_kr_and_mic.c @@ -2,7 +2,7 @@ * * EUC_KR and MULE_INTERNAL * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION @@ -76,7 +76,7 @@ euc_kr2mic(const unsigned char *euc, unsigned char *p, int len) c1 = *euc; if (IS_HIGHBIT_SET(c1)) { - l = pg_encoding_verifymb(PG_EUC_KR, (const char *) euc, len); + l = pg_encoding_verifymbchar(PG_EUC_KR, (const char *) euc, len); if (l != 2) report_invalid_encoding(PG_EUC_KR, (const char *) euc, len); @@ -122,7 +122,7 @@ mic2euc_kr(const unsigned char *mic, unsigned char *p, int len) len--; continue; } - l = pg_encoding_verifymb(PG_MULE_INTERNAL, (const char *) mic, len); + l = pg_encoding_verifymbchar(PG_MULE_INTERNAL, (const char *) mic, len); if (l < 0) report_invalid_encoding(PG_MULE_INTERNAL, (const char *) mic, len); diff --git a/src/backend/utils/mb/conversion_procs/euc_tw_and_big5/euc_tw_and_big5.c b/src/backend/utils/mb/conversion_procs/euc_tw_and_big5/euc_tw_and_big5.c index 82a22b9bebf80..66c242d7f3688 100644 --- a/src/backend/utils/mb/conversion_procs/euc_tw_and_big5/euc_tw_and_big5.c +++ b/src/backend/utils/mb/conversion_procs/euc_tw_and_big5/euc_tw_and_big5.c @@ -2,7 +2,7 @@ * * EUC_TW, BIG5 and MULE_INTERNAL * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION @@ -37,6 +37,8 @@ PG_FUNCTION_INFO_V1(mic_to_big5); * ---------- */ +static void euc_tw2big5(const unsigned char *euc, unsigned char *p, int len); +static void big52euc_tw(const unsigned char *euc, unsigned char *p, int len); static void big52mic(const unsigned char *big5, unsigned char *p, int len); static void mic2big5(const unsigned char *mic, unsigned char *p, int len); static void euc_tw2mic(const unsigned char *euc, unsigned char *p, int len); @@ -48,14 +50,10 @@ euc_tw_to_big5(PG_FUNCTION_ARGS) unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2); unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3); int len = PG_GETARG_INT32(4); - unsigned char *buf; CHECK_ENCODING_CONVERSION_ARGS(PG_EUC_TW, PG_BIG5); - buf = palloc(len * ENCODING_GROWTH_RATE + 1); - euc_tw2mic(src, buf, len); - mic2big5(buf, dest, strlen((char *) buf)); - pfree(buf); + euc_tw2big5(src, dest, len); PG_RETURN_VOID(); } @@ -66,14 +64,10 @@ big5_to_euc_tw(PG_FUNCTION_ARGS) unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2); unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3); int len = PG_GETARG_INT32(4); - unsigned char *buf; CHECK_ENCODING_CONVERSION_ARGS(PG_BIG5, PG_EUC_TW); - buf = palloc(len * ENCODING_GROWTH_RATE + 1); - big52mic(src, buf, len); - mic2euc_tw(buf, dest, strlen((char *) buf)); - pfree(buf); + big52euc_tw(src, dest, len); PG_RETURN_VOID(); } @@ -134,6 +128,136 @@ mic_to_big5(PG_FUNCTION_ARGS) PG_RETURN_VOID(); } + +/* + * EUC_TW ---> Big5 + */ +static void +euc_tw2big5(const unsigned char *euc, unsigned char *p, int len) +{ + unsigned char c1; + unsigned short big5buf, + cnsBuf; + unsigned char lc; + int l; + + while (len > 0) + { + c1 = *euc; + if (IS_HIGHBIT_SET(c1)) + { + /* Verify and decode the next EUC_TW input character */ + l = pg_encoding_verifymbchar(PG_EUC_TW, (const char *) euc, len); + if (l < 0) + report_invalid_encoding(PG_EUC_TW, + (const char *) euc, len); + if (c1 == SS2) + { + c1 = euc[1]; /* plane No. */ + if (c1 == 0xa1) + lc = LC_CNS11643_1; + else if (c1 == 0xa2) + lc = LC_CNS11643_2; + else + lc = c1 - 0xa3 + LC_CNS11643_3; + cnsBuf = (euc[2] << 8) | euc[3]; + } + else + { /* CNS11643-1 */ + lc = LC_CNS11643_1; + cnsBuf = (c1 << 8) | euc[1]; + } + + /* Write it out in Big5 */ + big5buf = CNStoBIG5(cnsBuf, lc); + if (big5buf == 0) + report_untranslatable_char(PG_EUC_TW, PG_BIG5, + (const char *) euc, len); + *p++ = (big5buf >> 8) & 0x00ff; + *p++ = big5buf & 0x00ff; + + euc += l; + len -= l; + } + else + { /* should be ASCII */ + if (c1 == 0) + report_invalid_encoding(PG_EUC_TW, + (const char *) euc, len); + *p++ = c1; + euc++; + len--; + } + } + *p = '\0'; +} + +/* + * Big5 ---> EUC_TW + */ +static void +big52euc_tw(const unsigned char *big5, unsigned char *p, int len) +{ + unsigned short c1; + unsigned short big5buf, + cnsBuf; + unsigned char lc; + int l; + + while (len > 0) + { + /* Verify and decode the next Big5 input character */ + c1 = *big5; + if (IS_HIGHBIT_SET(c1)) + { + l = pg_encoding_verifymbchar(PG_BIG5, (const char *) big5, len); + if (l < 0) + report_invalid_encoding(PG_BIG5, + (const char *) big5, len); + big5buf = (c1 << 8) | big5[1]; + cnsBuf = BIG5toCNS(big5buf, &lc); + + if (lc == LC_CNS11643_1) + { + *p++ = (cnsBuf >> 8) & 0x00ff; + *p++ = cnsBuf & 0x00ff; + } + else if (lc == LC_CNS11643_2) + { + *p++ = SS2; + *p++ = 0xa2; + *p++ = (cnsBuf >> 8) & 0x00ff; + *p++ = cnsBuf & 0x00ff; + } + else if (lc >= LC_CNS11643_3 && lc <= LC_CNS11643_7) + { + *p++ = SS2; + *p++ = lc - LC_CNS11643_3 + 0xa3; + *p++ = (cnsBuf >> 8) & 0x00ff; + *p++ = cnsBuf & 0x00ff; + } + else + report_untranslatable_char(PG_BIG5, PG_EUC_TW, + (const char *) big5, len); + + big5 += l; + len -= l; + } + else + { + /* ASCII */ + if (c1 == 0) + report_invalid_encoding(PG_BIG5, + (const char *) big5, len); + *p++ = c1; + big5++; + len--; + continue; + } + } + *p = '\0'; +} + /* * EUC_TW ---> MIC */ @@ -148,7 +272,7 @@ euc_tw2mic(const unsigned char *euc, unsigned char *p, int len) c1 = *euc; if (IS_HIGHBIT_SET(c1)) { - l = pg_encoding_verifymb(PG_EUC_TW, (const char *) euc, len); + l = pg_encoding_verifymbchar(PG_EUC_TW, (const char *) euc, len); if (l < 0) report_invalid_encoding(PG_EUC_TW, (const char *) euc, len); @@ -213,7 +337,7 @@ mic2euc_tw(const unsigned char *mic, unsigned char *p, int len) len--; continue; } - l = pg_encoding_verifymb(PG_MULE_INTERNAL, (const char *) mic, len); + l = pg_encoding_verifymbchar(PG_MULE_INTERNAL, (const char *) mic, len); if (l < 0) report_invalid_encoding(PG_MULE_INTERNAL, (const char *) mic, len); @@ -272,7 +396,7 @@ big52mic(const unsigned char *big5, unsigned char *p, int len) len--; continue; } - l = pg_encoding_verifymb(PG_BIG5, (const char *) big5, len); + l = pg_encoding_verifymbchar(PG_BIG5, (const char *) big5, len); if (l < 0) report_invalid_encoding(PG_BIG5, (const char *) big5, len); @@ -321,7 +445,7 @@ mic2big5(const unsigned char *mic, unsigned char *p, int len) len--; continue; } - l = pg_encoding_verifymb(PG_MULE_INTERNAL, (const char *) mic, len); + l = pg_encoding_verifymbchar(PG_MULE_INTERNAL, (const char *) mic, len); if (l < 0) report_invalid_encoding(PG_MULE_INTERNAL, (const char *) mic, len); diff --git a/src/backend/utils/mb/conversion_procs/latin2_and_win1250/latin2_and_win1250.c b/src/backend/utils/mb/conversion_procs/latin2_and_win1250/latin2_and_win1250.c index f424f88145989..2e28e6780a58c 100644 --- a/src/backend/utils/mb/conversion_procs/latin2_and_win1250/latin2_and_win1250.c +++ b/src/backend/utils/mb/conversion_procs/latin2_and_win1250/latin2_and_win1250.c @@ -2,7 +2,7 @@ * * LATIN2 and WIN1250 * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION diff --git a/src/backend/utils/mb/conversion_procs/latin_and_mic/latin_and_mic.c b/src/backend/utils/mb/conversion_procs/latin_and_mic/latin_and_mic.c index a358a707c1131..bc651410f21dd 100644 --- a/src/backend/utils/mb/conversion_procs/latin_and_mic/latin_and_mic.c +++ b/src/backend/utils/mb/conversion_procs/latin_and_mic/latin_and_mic.c @@ -2,7 +2,7 @@ * * LATINn and MULE_INTERNAL * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION diff --git a/src/backend/utils/mb/conversion_procs/utf8_and_big5/utf8_and_big5.c b/src/backend/utils/mb/conversion_procs/utf8_and_big5/utf8_and_big5.c index 75ed49ac54e52..d6067cdc24e96 100644 --- a/src/backend/utils/mb/conversion_procs/utf8_and_big5/utf8_and_big5.c +++ b/src/backend/utils/mb/conversion_procs/utf8_and_big5/utf8_and_big5.c @@ -2,7 +2,7 @@ * * BIG5 <--> UTF8 * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION diff --git a/src/backend/utils/mb/conversion_procs/utf8_and_cyrillic/utf8_and_cyrillic.c b/src/backend/utils/mb/conversion_procs/utf8_and_cyrillic/utf8_and_cyrillic.c index 90ad316111a5b..ed90e8e682e5d 100644 --- a/src/backend/utils/mb/conversion_procs/utf8_and_cyrillic/utf8_and_cyrillic.c +++ b/src/backend/utils/mb/conversion_procs/utf8_and_cyrillic/utf8_and_cyrillic.c @@ -2,7 +2,7 @@ * * UTF8 and Cyrillic * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION diff --git a/src/backend/utils/mb/conversion_procs/utf8_and_euc2004/utf8_and_euc2004.c b/src/backend/utils/mb/conversion_procs/utf8_and_euc2004/utf8_and_euc2004.c index 018312489cbc7..d699affce47f3 100644 --- a/src/backend/utils/mb/conversion_procs/utf8_and_euc2004/utf8_and_euc2004.c +++ b/src/backend/utils/mb/conversion_procs/utf8_and_euc2004/utf8_and_euc2004.c @@ -2,7 +2,7 @@ * * EUC_JIS_2004 <--> UTF8 * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION diff --git a/src/backend/utils/mb/conversion_procs/utf8_and_euc_cn/utf8_and_euc_cn.c b/src/backend/utils/mb/conversion_procs/utf8_and_euc_cn/utf8_and_euc_cn.c index 62182a9ba8b5e..d7c0ba6a58b4d 100644 --- a/src/backend/utils/mb/conversion_procs/utf8_and_euc_cn/utf8_and_euc_cn.c +++ b/src/backend/utils/mb/conversion_procs/utf8_and_euc_cn/utf8_and_euc_cn.c @@ -2,7 +2,7 @@ * * EUC_CN <--> UTF8 * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION diff --git a/src/backend/utils/mb/conversion_procs/utf8_and_euc_jp/utf8_and_euc_jp.c b/src/backend/utils/mb/conversion_procs/utf8_and_euc_jp/utf8_and_euc_jp.c index dc5abb5dfd468..13a3a23e77b8d 100644 --- a/src/backend/utils/mb/conversion_procs/utf8_and_euc_jp/utf8_and_euc_jp.c +++ b/src/backend/utils/mb/conversion_procs/utf8_and_euc_jp/utf8_and_euc_jp.c @@ -2,7 +2,7 @@ * * EUC_JP <--> UTF8 * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION diff --git a/src/backend/utils/mb/conversion_procs/utf8_and_euc_kr/utf8_and_euc_kr.c b/src/backend/utils/mb/conversion_procs/utf8_and_euc_kr/utf8_and_euc_kr.c index 088a38d839079..1bbb8aaef7b8d 100644 --- a/src/backend/utils/mb/conversion_procs/utf8_and_euc_kr/utf8_and_euc_kr.c +++ b/src/backend/utils/mb/conversion_procs/utf8_and_euc_kr/utf8_and_euc_kr.c @@ -2,7 +2,7 @@ * * EUC_KR <--> UTF8 * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION diff --git a/src/backend/utils/mb/conversion_procs/utf8_and_euc_tw/utf8_and_euc_tw.c b/src/backend/utils/mb/conversion_procs/utf8_and_euc_tw/utf8_and_euc_tw.c index a9fe94f88b88f..9830045dccd6a 100644 --- a/src/backend/utils/mb/conversion_procs/utf8_and_euc_tw/utf8_and_euc_tw.c +++ b/src/backend/utils/mb/conversion_procs/utf8_and_euc_tw/utf8_and_euc_tw.c @@ -2,7 +2,7 @@ * * EUC_TW <--> UTF8 * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION diff --git a/src/backend/utils/mb/conversion_procs/utf8_and_gb18030/utf8_and_gb18030.c b/src/backend/utils/mb/conversion_procs/utf8_and_gb18030/utf8_and_gb18030.c index 96909b588592d..f86ecf274241d 100644 --- a/src/backend/utils/mb/conversion_procs/utf8_and_gb18030/utf8_and_gb18030.c +++ b/src/backend/utils/mb/conversion_procs/utf8_and_gb18030/utf8_and_gb18030.c @@ -2,7 +2,7 @@ * * GB18030 <--> UTF8 * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION diff --git a/src/backend/utils/mb/conversion_procs/utf8_and_gbk/utf8_and_gbk.c b/src/backend/utils/mb/conversion_procs/utf8_and_gbk/utf8_and_gbk.c index 78bbcd3ce7dd5..2ab8b16c8a819 100644 --- a/src/backend/utils/mb/conversion_procs/utf8_and_gbk/utf8_and_gbk.c +++ b/src/backend/utils/mb/conversion_procs/utf8_and_gbk/utf8_and_gbk.c @@ -2,7 +2,7 @@ * * GBK <--> UTF8 * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION diff --git a/src/backend/utils/mb/conversion_procs/utf8_and_iso8859/utf8_and_iso8859.c b/src/backend/utils/mb/conversion_procs/utf8_and_iso8859/utf8_and_iso8859.c index 348524f4a2c9c..3e49f67ea2f29 100644 --- a/src/backend/utils/mb/conversion_procs/utf8_and_iso8859/utf8_and_iso8859.c +++ b/src/backend/utils/mb/conversion_procs/utf8_and_iso8859/utf8_and_iso8859.c @@ -2,7 +2,7 @@ * * ISO 8859 2-16 <--> UTF8 * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION diff --git a/src/backend/utils/mb/conversion_procs/utf8_and_iso8859_1/utf8_and_iso8859_1.c b/src/backend/utils/mb/conversion_procs/utf8_and_iso8859_1/utf8_and_iso8859_1.c index 2cdca9f780d86..67e713cca11c3 100644 --- a/src/backend/utils/mb/conversion_procs/utf8_and_iso8859_1/utf8_and_iso8859_1.c +++ b/src/backend/utils/mb/conversion_procs/utf8_and_iso8859_1/utf8_and_iso8859_1.c @@ -2,7 +2,7 @@ * * ISO8859_1 <--> UTF8 * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION diff --git a/src/backend/utils/mb/conversion_procs/utf8_and_johab/utf8_and_johab.c b/src/backend/utils/mb/conversion_procs/utf8_and_johab/utf8_and_johab.c index e09a7c8e41eb1..578f5df4e7f72 100644 --- a/src/backend/utils/mb/conversion_procs/utf8_and_johab/utf8_and_johab.c +++ b/src/backend/utils/mb/conversion_procs/utf8_and_johab/utf8_and_johab.c @@ -2,7 +2,7 @@ * * JOHAB <--> UTF8 * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION diff --git a/src/backend/utils/mb/conversion_procs/utf8_and_sjis/utf8_and_sjis.c b/src/backend/utils/mb/conversion_procs/utf8_and_sjis/utf8_and_sjis.c index c56fa80a4bba6..dd9fc2975ad23 100644 --- a/src/backend/utils/mb/conversion_procs/utf8_and_sjis/utf8_and_sjis.c +++ b/src/backend/utils/mb/conversion_procs/utf8_and_sjis/utf8_and_sjis.c @@ -2,7 +2,7 @@ * * SJIS <--> UTF8 * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION diff --git a/src/backend/utils/mb/conversion_procs/utf8_and_sjis2004/utf8_and_sjis2004.c b/src/backend/utils/mb/conversion_procs/utf8_and_sjis2004/utf8_and_sjis2004.c index 458500998d495..4bcc886d674e3 100644 --- a/src/backend/utils/mb/conversion_procs/utf8_and_sjis2004/utf8_and_sjis2004.c +++ b/src/backend/utils/mb/conversion_procs/utf8_and_sjis2004/utf8_and_sjis2004.c @@ -2,7 +2,7 @@ * * SHIFT_JIS_2004 <--> UTF8 * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION diff --git a/src/backend/utils/mb/conversion_procs/utf8_and_uhc/utf8_and_uhc.c b/src/backend/utils/mb/conversion_procs/utf8_and_uhc/utf8_and_uhc.c index 3226ed032583a..c8e512994a103 100644 --- a/src/backend/utils/mb/conversion_procs/utf8_and_uhc/utf8_and_uhc.c +++ b/src/backend/utils/mb/conversion_procs/utf8_and_uhc/utf8_and_uhc.c @@ -2,7 +2,7 @@ * * UHC <--> UTF8 * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION diff --git a/src/backend/utils/mb/conversion_procs/utf8_and_win/utf8_and_win.c b/src/backend/utils/mb/conversion_procs/utf8_and_win/utf8_and_win.c index 1a0074d063cc0..0c9493dee564e 100644 --- a/src/backend/utils/mb/conversion_procs/utf8_and_win/utf8_and_win.c +++ b/src/backend/utils/mb/conversion_procs/utf8_and_win/utf8_and_win.c @@ -2,7 +2,7 @@ * * WIN <--> UTF8 * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION diff --git a/src/backend/utils/mb/mbutils.c b/src/backend/utils/mb/mbutils.c index a8e13cacfde29..2578573b0ab13 100644 --- a/src/backend/utils/mb/mbutils.c +++ b/src/backend/utils/mb/mbutils.c @@ -23,7 +23,7 @@ * the result is validly encoded according to the destination encoding. * * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * @@ -519,7 +519,7 @@ pg_convert(PG_FUNCTION_ARGS) /* make sure that source string is valid */ len = VARSIZE_ANY_EXHDR(string); src_str = VARDATA_ANY(string); - pg_verify_mbstr_len(src_encoding, src_str, len, false); + (void) pg_verify_mbstr(src_encoding, src_str, len, false); /* perform conversion */ dest_str = (char *) pg_do_encoding_conversion((unsigned char *) unconstify(char *, src_str), @@ -1215,10 +1215,10 @@ static bool pg_generic_charinc(unsigned char *charptr, int len) { unsigned char *lastbyte = charptr + len - 1; - mbverifier mbverify; + mbchar_verifier mbverify; /* We can just invoke the character verifier directly. */ - mbverify = pg_wchar_table[GetDatabaseEncoding()].mbverify; + mbverify = pg_wchar_table[GetDatabaseEncoding()].mbverifychar; while (*lastbyte < (unsigned char) 255) { @@ -1445,8 +1445,7 @@ pg_database_encoding_max_length(void) bool pg_verifymbstr(const char *mbstr, int len, bool noError) { - return - pg_verify_mbstr_len(GetDatabaseEncoding(), mbstr, len, noError) >= 0; + return pg_verify_mbstr(GetDatabaseEncoding(), mbstr, len, noError); } /* @@ -1456,7 +1455,18 @@ pg_verifymbstr(const char *mbstr, int len, bool noError) bool pg_verify_mbstr(int encoding, const char *mbstr, int len, bool noError) { - return pg_verify_mbstr_len(encoding, mbstr, len, noError) >= 0; + int oklen; + + Assert(PG_VALID_ENCODING(encoding)); + + oklen = pg_wchar_table[encoding].mbverifystr((const unsigned char *) mbstr, len); + if (oklen != len) + { + if (noError) + return false; + report_invalid_encoding(encoding, mbstr + oklen, len - oklen); + } + return true; } /* @@ -1469,11 +1479,14 @@ pg_verify_mbstr(int encoding, const char *mbstr, int len, bool noError) * If OK, return length of string in the encoding. * If a problem is found, return -1 when noError is * true; when noError is false, ereport() a descriptive message. + * + * Note: We cannot use the faster encoding-specific mbverifystr() function + * here, because we need to count the number of characters in the string. */ int pg_verify_mbstr_len(int encoding, const char *mbstr, int len, bool noError) { - mbverifier mbverify; + mbchar_verifier mbverifychar; int mb_len; Assert(PG_VALID_ENCODING(encoding)); @@ -1493,7 +1506,7 @@ pg_verify_mbstr_len(int encoding, const char *mbstr, int len, bool noError) } /* fetch function pointer just once */ - mbverify = pg_wchar_table[encoding].mbverify; + mbverifychar = pg_wchar_table[encoding].mbverifychar; mb_len = 0; @@ -1516,7 +1529,7 @@ pg_verify_mbstr_len(int encoding, const char *mbstr, int len, bool noError) report_invalid_encoding(encoding, mbstr, len); } - l = (*mbverify) ((const unsigned char *) mbstr, len); + l = (*mbverifychar) ((const unsigned char *) mbstr, len); if (l < 0) { diff --git a/src/backend/utils/mb/stringinfo_mb.c b/src/backend/utils/mb/stringinfo_mb.c index 5f51f538c1803..1fd6c63d3d702 100644 --- a/src/backend/utils/mb/stringinfo_mb.c +++ b/src/backend/utils/mb/stringinfo_mb.c @@ -8,7 +8,7 @@ * code. * * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * diff --git a/src/backend/utils/misc/check_guc b/src/backend/utils/misc/check_guc index 416a0875b6c93..b171ef0e4ff3f 100755 --- a/src/backend/utils/misc/check_guc +++ b/src/backend/utils/misc/check_guc @@ -16,7 +16,7 @@ ## if an option is valid but shows up in only one file (guc.c but not ## postgresql.conf.sample), it should be listed here so that it ## can be ignored -INTENTIONALLY_NOT_INCLUDED="debug_deadlocks \ +INTENTIONALLY_NOT_INCLUDED="debug_deadlocks in_hot_standby \ is_superuser lc_collate lc_ctype lc_messages lc_monetary lc_numeric lc_time \ pre_auth_delay role seed server_encoding server_version server_version_num \ session_authorization trace_lock_oidmin trace_lock_table trace_locks trace_lwlocks \ diff --git a/src/backend/utils/misc/guc-file.l b/src/backend/utils/misc/guc-file.l index c98e2202951ce..7885a169bb97f 100644 --- a/src/backend/utils/misc/guc-file.l +++ b/src/backend/utils/misc/guc-file.l @@ -2,7 +2,7 @@ /* * Scanner for the configuration file * - * Copyright (c) 2000-2020, PostgreSQL Global Development Group + * Copyright (c) 2000-2021, PostgreSQL Global Development Group * * src/backend/utils/misc/guc-file.l */ diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c index 878fcc2236585..eafdb1118ed57 100644 --- a/src/backend/utils/misc/guc.c +++ b/src/backend/utils/misc/guc.c @@ -6,7 +6,7 @@ * See src/backend/utils/misc/README for more information. * * - * Copyright (c) 2000-2020, PostgreSQL Global Development Group + * Copyright (c) 2000-2021, PostgreSQL Global Development Group * Written by Peter Eisentraut . * * IDENTIFICATION @@ -99,6 +99,7 @@ #include "utils/rls.h" #include "utils/snapmgr.h" #include "utils/tzparser.h" +#include "utils/inval.h" #include "utils/varlena.h" #include "utils/xml.h" @@ -209,6 +210,7 @@ static bool check_cluster_name(char **newval, void **extra, GucSource source); static const char *show_unix_socket_permissions(void); static const char *show_log_file_mode(void); static const char *show_data_directory_mode(void); +static const char *show_in_hot_standby(void); static bool check_backtrace_functions(char **newval, void **extra, GucSource source); static void assign_backtrace_functions(const char *newval, void *extra); static bool check_recovery_target_timeline(char **newval, void **extra, GucSource source); @@ -610,6 +612,7 @@ static int wal_block_size; static bool data_checksums; static bool integer_datetimes; static bool assert_enabled; +static bool in_hot_standby; static char *recovery_target_timeline_string; static char *recovery_target_string; static char *recovery_target_xid_string; @@ -1570,7 +1573,15 @@ static struct config_bool ConfigureNamesBool[] = false, NULL, NULL, NULL }, - + { + {"log_recovery_conflict_waits", PGC_SIGHUP, LOGGING_WHAT, + gettext_noop("Logs standby recovery conflict waits."), + NULL + }, + &log_recovery_conflict_waits, + false, + NULL, NULL, NULL + }, { {"log_hostname", PGC_SIGHUP, LOGGING_WHAT, gettext_noop("Logs the host name in the connection logs."), @@ -1844,6 +1855,17 @@ static struct config_bool ConfigureNamesBool[] = NULL, NULL, NULL }, + { + {"in_hot_standby", PGC_INTERNAL, PRESET_OPTIONS, + gettext_noop("Shows whether hot standby is currently active."), + NULL, + GUC_REPORT | GUC_NOT_IN_SAMPLE | GUC_DISALLOW_IN_FILE + }, + &in_hot_standby, + false, + NULL, NULL, show_in_hot_standby + }, + { {"allow_system_table_mods", PGC_SUSET, DEVELOPER_OPTIONS, gettext_noop("Allows modifications of the structure of system tables."), @@ -2391,7 +2413,7 @@ static struct config_int ConfigureNamesInt[] = NULL }, &VacuumCostPageMiss, - 10, 0, 10000, + 2, 0, 10000, NULL, NULL, NULL }, @@ -2495,7 +2517,7 @@ static struct config_int ConfigureNamesInt[] = { {"idle_in_transaction_session_timeout", PGC_USERSET, CLIENT_CONN_STATEMENT, - gettext_noop("Sets the maximum allowed duration of any idling transaction."), + gettext_noop("Sets the maximum allowed idle time between queries, when in a transaction."), gettext_noop("A value of 0 turns off the timeout."), GUC_UNIT_MS }, @@ -2504,6 +2526,17 @@ static struct config_int ConfigureNamesInt[] = NULL, NULL, NULL }, + { + {"idle_session_timeout", PGC_USERSET, CLIENT_CONN_STATEMENT, + gettext_noop("Sets the maximum allowed idle time between queries, when not in a transaction."), + gettext_noop("A value of 0 turns off the timeout."), + GUC_UNIT_MS + }, + &IdleSessionTimeout, + 0, 0, INT_MAX, + NULL, NULL, NULL + }, + { {"vacuum_freeze_min_age", PGC_USERSET, CLIENT_CONN_STATEMENT, gettext_noop("Minimum age at which VACUUM should freeze a table row."), @@ -3389,6 +3422,29 @@ static struct config_int ConfigureNamesInt[] = check_huge_page_size, NULL, NULL }, + { + {"debug_invalidate_system_caches_always", PGC_SUSET, DEVELOPER_OPTIONS, + gettext_noop("Aggressively invalidate system caches for debugging purposes."), + NULL, + GUC_NOT_IN_SAMPLE + }, + &debug_invalidate_system_caches_always, +#ifdef CLOBBER_CACHE_ENABLED + /* Set default based on older compile-time-only cache clobber macros */ +#if defined(CLOBBER_CACHE_RECURSIVELY) + 3, +#elif defined(CLOBBER_CACHE_ALWAYS) + 1, +#else + 0, +#endif + 0, 5, +#else /* not CLOBBER_CACHE_ENABLED */ + 0, 0, 0, +#endif /* not CLOBBER_CACHE_ENABLED */ + NULL, NULL, NULL + }, + /* End-of-list marker */ { {NULL, 0, 0, NULL, NULL}, NULL, 0, 0, 0, NULL, NULL, NULL @@ -6248,6 +6304,14 @@ BeginReportingGUCOptions(void) reporting_enabled = true; + /* + * Hack for in_hot_standby: initialize with the value we're about to send. + * (This could be out of date by the time we actually send it, in which + * case the next ReportChangedGUCOptions call will send a duplicate + * report.) + */ + in_hot_standby = RecoveryInProgress(); + /* Transmit initial values of interesting variables */ for (i = 0; i < num_guc_variables; i++) { @@ -6280,6 +6344,23 @@ ReportChangedGUCOptions(void) if (!reporting_enabled) return; + /* + * Since in_hot_standby isn't actually changed by normal GUC actions, we + * need a hack to check whether a new value needs to be reported to the + * client. For speed, we rely on the assumption that it can never + * transition from false to true. + */ + if (in_hot_standby && !RecoveryInProgress()) + { + struct config_generic *record; + + record = find_option("in_hot_standby", false, ERROR); + Assert(record != NULL); + record->status |= GUC_NEEDS_REPORT; + report_needed = true; + in_hot_standby = false; + } + /* Quick exit if no values have been changed */ if (!report_needed) return; @@ -11773,6 +11854,18 @@ show_data_directory_mode(void) return buf; } +static const char * +show_in_hot_standby(void) +{ + /* + * We display the actual state based on shared memory, so that this GUC + * reports up-to-date state if examined intra-query. The underlying + * variable in_hot_standby changes only when we transmit a new value to + * the client. + */ + return RecoveryInProgress() ? "on" : "off"; +} + /* * We split the input string, where commas separate function names * and certain whitespace chars are ignored, into a \0-separated (and diff --git a/src/backend/utils/misc/help_config.c b/src/backend/utils/misc/help_config.c index c0120e1090813..d97243ddc8b15 100644 --- a/src/backend/utils/misc/help_config.c +++ b/src/backend/utils/misc/help_config.c @@ -7,7 +7,7 @@ * or GUC_DISALLOW_IN_FILE are not displayed, unless the user specifically * requests that variable by name * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * * IDENTIFICATION * src/backend/utils/misc/help_config.c diff --git a/src/backend/utils/misc/pg_config.c b/src/backend/utils/misc/pg_config.c index 7a79cbff92c9a..34d77db75a145 100644 --- a/src/backend/utils/misc/pg_config.c +++ b/src/backend/utils/misc/pg_config.c @@ -3,7 +3,7 @@ * pg_config.c * Expose same output as pg_config except as an SRF * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION diff --git a/src/backend/utils/misc/pg_controldata.c b/src/backend/utils/misc/pg_controldata.c index d50d87a6021c6..209a20a8827d3 100644 --- a/src/backend/utils/misc/pg_controldata.c +++ b/src/backend/utils/misc/pg_controldata.c @@ -5,7 +5,7 @@ * Routines to expose the contents of the control data file via * a set of SQL functions. * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION diff --git a/src/backend/utils/misc/pg_rusage.c b/src/backend/utils/misc/pg_rusage.c index 64a6af3152b04..bb5d9e7c85019 100644 --- a/src/backend/utils/misc/pg_rusage.c +++ b/src/backend/utils/misc/pg_rusage.c @@ -4,7 +4,7 @@ * Resource usage measurement support routines. * * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * diff --git a/src/backend/utils/misc/postgresql.conf.sample b/src/backend/utils/misc/postgresql.conf.sample index b7fb2ec1feb6b..bd57e917e16ed 100644 --- a/src/backend/utils/misc/postgresql.conf.sample +++ b/src/backend/utils/misc/postgresql.conf.sample @@ -92,7 +92,7 @@ #db_user_namespace = off # GSSAPI using Kerberos -#krb_server_keyfile = '' +#krb_server_keyfile = 'FILE:${sysconfdir}/krb5.keytab' #krb_caseins_users = off # - SSL - @@ -164,7 +164,7 @@ #vacuum_cost_delay = 0 # 0-100 milliseconds (0 disables) #vacuum_cost_page_hit = 1 # 0-10000 credits -#vacuum_cost_page_miss = 10 # 0-10000 credits +#vacuum_cost_page_miss = 2 # 0-10000 credits #vacuum_cost_page_dirty = 20 # 0-10000 credits #vacuum_cost_limit = 200 # 1-10000 credits @@ -552,6 +552,8 @@ # %% = '%' # e.g. '<%u%%%d> ' #log_lock_waits = off # log lock waits >= deadlock_timeout +#log_recovery_conflict_waits = off # log standby recovery conflict waits + # >= deadlock_timeout #log_parameter_max_length = -1 # when logging statements, limit logged # bind-parameter values to N bytes; # -1 means print in full, 0 disables @@ -663,6 +665,7 @@ #statement_timeout = 0 # in milliseconds, 0 is disabled #lock_timeout = 0 # in milliseconds, 0 is disabled #idle_in_transaction_session_timeout = 0 # in milliseconds, 0 is disabled +#idle_session_timeout = 0 # in milliseconds, 0 is disabled #vacuum_freeze_min_age = 50000000 #vacuum_freeze_table_age = 150000000 #vacuum_multixact_freeze_min_age = 5000000 diff --git a/src/backend/utils/misc/ps_status.c b/src/backend/utils/misc/ps_status.c index 1e8596e664569..5819faaf2d2f8 100644 --- a/src/backend/utils/misc/ps_status.c +++ b/src/backend/utils/misc/ps_status.c @@ -7,7 +7,7 @@ * * src/backend/utils/misc/ps_status.c * - * Copyright (c) 2000-2020, PostgreSQL Global Development Group + * Copyright (c) 2000-2021, PostgreSQL Global Development Group * various details abducted from various places *-------------------------------------------------------------------- */ diff --git a/src/backend/utils/misc/queryenvironment.c b/src/backend/utils/misc/queryenvironment.c index 31de81f353eba..86d61d083bfd4 100644 --- a/src/backend/utils/misc/queryenvironment.c +++ b/src/backend/utils/misc/queryenvironment.c @@ -11,7 +11,7 @@ * on callers, since this is an opaque structure. This is the reason to * require a create function. * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * diff --git a/src/backend/utils/misc/rls.c b/src/backend/utils/misc/rls.c index 016fe511eb7d2..13d25154dbe93 100644 --- a/src/backend/utils/misc/rls.c +++ b/src/backend/utils/misc/rls.c @@ -3,7 +3,7 @@ * rls.c * RLS-related utility functions. * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * diff --git a/src/backend/utils/misc/sampling.c b/src/backend/utils/misc/sampling.c index 361c15614e7cd..0c327e823f715 100644 --- a/src/backend/utils/misc/sampling.c +++ b/src/backend/utils/misc/sampling.c @@ -3,7 +3,7 @@ * sampling.c * Relation block sampling routines. * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * diff --git a/src/backend/utils/misc/superuser.c b/src/backend/utils/misc/superuser.c index 203a146390f60..9ec5a40f7ec27 100644 --- a/src/backend/utils/misc/superuser.c +++ b/src/backend/utils/misc/superuser.c @@ -9,7 +9,7 @@ * the single-user case works. * * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * diff --git a/src/backend/utils/misc/timeout.c b/src/backend/utils/misc/timeout.c index f1c9518b0c403..95a273d9cfbdb 100644 --- a/src/backend/utils/misc/timeout.c +++ b/src/backend/utils/misc/timeout.c @@ -3,7 +3,7 @@ * timeout.c * Routines to multiplex SIGALRM interrupts for multiple timeout reasons. * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * @@ -53,18 +53,29 @@ static timeout_params *volatile active_timeouts[MAX_TIMEOUTS]; /* * Flag controlling whether the signal handler is allowed to do anything. - * We leave this "false" when we're not expecting interrupts, just in case. + * This is useful to avoid race conditions with the handler. Note in + * particular that this lets us make changes in the data structures without + * tediously disabling and re-enabling the timer signal. Most of the time, + * no interrupt would happen anyway during such critical sections, but if + * one does, this rule ensures it's safe. Leaving the signal enabled across + * multiple operations can greatly reduce the number of kernel calls we make, + * too. See comments in schedule_alarm() about that. * - * Note that we don't bother to reset any pending timer interrupt when we - * disable the signal handler; it's not really worth the cycles to do so, - * since the probability of the interrupt actually occurring while we have - * it disabled is low. See comments in schedule_alarm() about that. + * We leave this "false" when we're not expecting interrupts, just in case. */ static volatile sig_atomic_t alarm_enabled = false; #define disable_alarm() (alarm_enabled = false) #define enable_alarm() (alarm_enabled = true) +/* + * State recording if and when we next expect the interrupt to fire. + * Note that the signal handler will unconditionally reset signal_pending to + * false, so that can change asynchronously even when alarm_enabled is false. + */ +static volatile sig_atomic_t signal_pending = false; +static TimestampTz signal_due_at = 0; /* valid only when signal_pending */ + /***************************************************************************** * Internal helper functions @@ -185,7 +196,11 @@ enable_timeout(TimeoutId id, TimestampTz now, TimestampTz fin_time) * Schedule alarm for the next active timeout, if any * * We assume the caller has obtained the current time, or a close-enough - * approximation. + * approximation. (It's okay if a tick or two has passed since "now", or + * if a little more time elapses before we reach the kernel call; that will + * cause us to ask for an interrupt a tick or two later than the nearest + * timeout, which is no big deal. Passing a "now" value that's in the future + * would be bad though.) */ static void schedule_alarm(TimestampTz now) @@ -193,21 +208,38 @@ schedule_alarm(TimestampTz now) if (num_active_timeouts > 0) { struct itimerval timeval; + TimestampTz nearest_timeout; long secs; int usecs; MemSet(&timeval, 0, sizeof(struct itimerval)); - /* Get the time remaining till the nearest pending timeout */ - TimestampDifference(now, active_timeouts[0]->fin_time, - &secs, &usecs); - /* - * It's possible that the difference is less than a microsecond; - * ensure we don't cancel, rather than set, the interrupt. + * Get the time remaining till the nearest pending timeout. If it is + * negative, assume that we somehow missed an interrupt, and force + * signal_pending off. This gives us a chance to recover if the + * kernel drops a timeout request for some reason. */ - if (secs == 0 && usecs == 0) + nearest_timeout = active_timeouts[0]->fin_time; + if (now > nearest_timeout) + { + signal_pending = false; + /* force an interrupt as soon as possible */ + secs = 0; usecs = 1; + } + else + { + TimestampDifference(now, nearest_timeout, + &secs, &usecs); + + /* + * It's possible that the difference is less than a microsecond; + * ensure we don't cancel, rather than set, the interrupt. + */ + if (secs == 0 && usecs == 0) + usecs = 1; + } timeval.it_value.tv_sec = secs; timeval.it_value.tv_usec = usecs; @@ -218,7 +250,7 @@ schedule_alarm(TimestampTz now) * interrupt could occur before we can set alarm_enabled, so that the * signal handler would fail to do anything. * - * Because we didn't bother to reset the timer in disable_alarm(), + * Because we didn't bother to disable the timer in disable_alarm(), * it's possible that a previously-set interrupt will fire between * enable_alarm() and setitimer(). This is safe, however. There are * two possible outcomes: @@ -244,9 +276,60 @@ schedule_alarm(TimestampTz now) */ enable_alarm(); + /* + * If there is already an interrupt pending that's at or before the + * needed time, we need not do anything more. The signal handler will + * do the right thing in the first case, and re-schedule the interrupt + * for later in the second case. It might seem that the extra + * interrupt is wasted work, but it's not terribly much work, and this + * method has very significant advantages in the common use-case where + * we repeatedly set a timeout that we don't expect to reach and then + * cancel it. Instead of invoking setitimer() every time the timeout + * is set or canceled, we perform one interrupt and a re-scheduling + * setitimer() call at intervals roughly equal to the timeout delay. + * For example, with statement_timeout = 1s and a throughput of + * thousands of queries per second, this method requires an interrupt + * and setitimer() call roughly once a second, rather than thousands + * of setitimer() calls per second. + * + * Because of the possible passage of time between when we obtained + * "now" and when we reach setitimer(), the kernel's opinion of when + * to trigger the interrupt is likely to be a bit later than + * signal_due_at. That's fine, for the same reasons described above. + */ + if (signal_pending && nearest_timeout >= signal_due_at) + return; + + /* + * As with calling enable_alarm(), we must set signal_pending *before* + * calling setitimer(); if we did it after, the signal handler could + * trigger before we set it, leaving us with a false opinion that a + * signal is still coming. + * + * Other race conditions involved with setting/checking signal_pending + * are okay, for the reasons described above. One additional point is + * that the signal handler could fire after we set signal_due_at, but + * still before the setitimer() call. Then the handler could + * overwrite signal_due_at with a value it computes, which will be the + * same as or perhaps later than what we just computed. After we + * perform setitimer(), the net effect would be that signal_due_at + * gives a time later than when the interrupt will really happen; + * which is a safe situation. + */ + signal_due_at = nearest_timeout; + signal_pending = true; + /* Set the alarm timer */ if (setitimer(ITIMER_REAL, &timeval, NULL) != 0) + { + /* + * Clearing signal_pending here is a bit pro forma, but not + * entirely so, since something in the FATAL exit path could try + * to use timeout facilities. + */ + signal_pending = false; elog(FATAL, "could not enable SIGALRM timer: %m"); + } } } @@ -279,6 +362,12 @@ handle_sig_alarm(SIGNAL_ARGS) */ SetLatch(MyLatch); + /* + * Always reset signal_pending, even if !alarm_enabled, since indeed no + * signal is now pending. + */ + signal_pending = false; + /* * Fire any pending timeouts, but only if we're enabled to do so. */ @@ -591,7 +680,7 @@ disable_timeouts(const DisableTimeoutParams *timeouts, int count) } /* - * Disable SIGALRM and remove all timeouts from the active list, + * Disable the signal handler, remove all timeouts from the active list, * and optionally reset their timeout indicators. */ void @@ -602,18 +691,10 @@ disable_all_timeouts(bool keep_indicators) disable_alarm(); /* - * Only bother to reset the timer if we think it's active. We could just - * let the interrupt happen anyway, but it's probably a bit cheaper to do - * setitimer() than to let the useless interrupt happen. + * We used to disable the timer interrupt here, but in common usage + * patterns it's cheaper to leave it enabled; that may save us from having + * to enable it again shortly. See comments in schedule_alarm(). */ - if (num_active_timeouts > 0) - { - struct itimerval timeval; - - MemSet(&timeval, 0, sizeof(struct itimerval)); - if (setitimer(ITIMER_REAL, &timeval, NULL) != 0) - elog(FATAL, "could not disable SIGALRM timer: %m"); - } num_active_timeouts = 0; diff --git a/src/backend/utils/misc/tzparser.c b/src/backend/utils/misc/tzparser.c index 46b2b0ca98dd2..d2aa5ee87d5da 100644 --- a/src/backend/utils/misc/tzparser.c +++ b/src/backend/utils/misc/tzparser.c @@ -11,7 +11,7 @@ * PG_TRY if necessary. * * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION diff --git a/src/backend/utils/mmgr/aset.c b/src/backend/utils/mmgr/aset.c index 60a761caba474..ec6c130d0fb2c 100644 --- a/src/backend/utils/mmgr/aset.c +++ b/src/backend/utils/mmgr/aset.c @@ -7,7 +7,7 @@ * type. * * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION diff --git a/src/backend/utils/mmgr/dsa.c b/src/backend/utils/mmgr/dsa.c index 6e5e412429789..7e2a20b9417ca 100644 --- a/src/backend/utils/mmgr/dsa.c +++ b/src/backend/utils/mmgr/dsa.c @@ -39,7 +39,7 @@ * empty and be returned to the free page manager, and whole segments can * become empty and be returned to the operating system. * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION diff --git a/src/backend/utils/mmgr/freepage.c b/src/backend/utils/mmgr/freepage.c index 77f16f9b21bfe..e4ee1aab979e2 100644 --- a/src/backend/utils/mmgr/freepage.c +++ b/src/backend/utils/mmgr/freepage.c @@ -42,7 +42,7 @@ * where memory fragmentation is very severe, only a tiny fraction of * the pages under management are consumed by this btree. * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION diff --git a/src/backend/utils/mmgr/generation.c b/src/backend/utils/mmgr/generation.c index af52616e575df..2b90034764580 100644 --- a/src/backend/utils/mmgr/generation.c +++ b/src/backend/utils/mmgr/generation.c @@ -6,7 +6,7 @@ * Generation is a custom MemoryContext implementation designed for cases of * chunks with similar lifespan. * - * Portions Copyright (c) 2017-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 2017-2021, PostgreSQL Global Development Group * * IDENTIFICATION * src/backend/utils/mmgr/generation.c diff --git a/src/backend/utils/mmgr/mcxt.c b/src/backend/utils/mmgr/mcxt.c index dda70ef9f3344..84472b9158e7b 100644 --- a/src/backend/utils/mmgr/mcxt.c +++ b/src/backend/utils/mmgr/mcxt.c @@ -9,7 +9,7 @@ * context's MemoryContextMethods struct. * * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * diff --git a/src/backend/utils/mmgr/memdebug.c b/src/backend/utils/mmgr/memdebug.c index 812025b76e228..3644c7f6067a1 100644 --- a/src/backend/utils/mmgr/memdebug.c +++ b/src/backend/utils/mmgr/memdebug.c @@ -5,7 +5,7 @@ * public API of the memory management subsystem. * * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * src/backend/utils/mmgr/memdebug.c diff --git a/src/backend/utils/mmgr/portalmem.c b/src/backend/utils/mmgr/portalmem.c index 283dfe2d9e492..66e3181815687 100644 --- a/src/backend/utils/mmgr/portalmem.c +++ b/src/backend/utils/mmgr/portalmem.c @@ -8,7 +8,7 @@ * doesn't actually run the executor for them. * * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION @@ -310,7 +310,7 @@ PortalReleaseCachedPlan(Portal portal) { if (portal->cplan) { - ReleaseCachedPlan(portal->cplan, false); + ReleaseCachedPlan(portal->cplan, NULL); portal->cplan = NULL; /* diff --git a/src/backend/utils/mmgr/slab.c b/src/backend/utils/mmgr/slab.c index f8d801c4196d6..9213be7c9569f 100644 --- a/src/backend/utils/mmgr/slab.c +++ b/src/backend/utils/mmgr/slab.c @@ -7,7 +7,7 @@ * numbers of equally-sized objects are allocated (and freed). * * - * Portions Copyright (c) 2017-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 2017-2021, PostgreSQL Global Development Group * * IDENTIFICATION * src/backend/utils/mmgr/slab.c diff --git a/src/backend/utils/probes.d b/src/backend/utils/probes.d index a0b0458108936..b0c50a3c7f9f1 100644 --- a/src/backend/utils/probes.d +++ b/src/backend/utils/probes.d @@ -1,7 +1,7 @@ /* ---------- * DTrace probes for PostgreSQL backend * - * Copyright (c) 2006-2020, PostgreSQL Global Development Group + * Copyright (c) 2006-2021, PostgreSQL Global Development Group * * src/backend/utils/probes.d * ---------- diff --git a/src/backend/utils/resowner/resowner.c b/src/backend/utils/resowner/resowner.c index 546ad8d1c5fad..a171df573ceeb 100644 --- a/src/backend/utils/resowner/resowner.c +++ b/src/backend/utils/resowner/resowner.c @@ -9,7 +9,7 @@ * See utils/resowner/README for more info. * * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * @@ -652,7 +652,7 @@ ResourceOwnerReleaseInternal(ResourceOwner owner, if (isCommit) PrintPlanCacheLeakWarning(res); - ReleaseCachedPlan(res, true); + ReleaseCachedPlan(res, owner); } /* Ditto for tupdesc references */ @@ -703,18 +703,14 @@ ResourceOwnerReleaseInternal(ResourceOwner owner, void ResourceOwnerReleaseAllPlanCacheRefs(ResourceOwner owner) { - ResourceOwner save; Datum foundres; - save = CurrentResourceOwner; - CurrentResourceOwner = owner; while (ResourceArrayGetAny(&(owner->planrefarr), &foundres)) { CachedPlan *res = (CachedPlan *) DatumGetPointer(foundres); - ReleaseCachedPlan(res, true); + ReleaseCachedPlan(res, owner); } - CurrentResourceOwner = save; } /* diff --git a/src/backend/utils/sort/logtape.c b/src/backend/utils/sort/logtape.c index 28905124f965a..089ba2e106885 100644 --- a/src/backend/utils/sort/logtape.c +++ b/src/backend/utils/sort/logtape.c @@ -67,7 +67,7 @@ * There will always be the same number of runs as input tapes, and the same * number of input tapes as participants (worker Tuplesortstates). * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION diff --git a/src/backend/utils/sort/sharedtuplestore.c b/src/backend/utils/sort/sharedtuplestore.c index fe298ce92ed5c..57e35db4f8d0a 100644 --- a/src/backend/utils/sort/sharedtuplestore.c +++ b/src/backend/utils/sort/sharedtuplestore.c @@ -10,7 +10,7 @@ * scan where each backend reads an arbitrary subset of the tuples that were * written. * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION diff --git a/src/backend/utils/sort/sortsupport.c b/src/backend/utils/sort/sortsupport.c index c436fbb4ce1e8..6a889ec189fd6 100644 --- a/src/backend/utils/sort/sortsupport.c +++ b/src/backend/utils/sort/sortsupport.c @@ -4,7 +4,7 @@ * Support routines for accelerated sorting. * * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION diff --git a/src/backend/utils/sort/tuplesort.c b/src/backend/utils/sort/tuplesort.c index d0cc04a878a14..7d0f96afb7882 100644 --- a/src/backend/utils/sort/tuplesort.c +++ b/src/backend/utils/sort/tuplesort.c @@ -83,7 +83,7 @@ * produce exactly one output run from their partial input. * * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION diff --git a/src/backend/utils/sort/tuplestore.c b/src/backend/utils/sort/tuplestore.c index 452a85a423bda..509a91b503e1a 100644 --- a/src/backend/utils/sort/tuplestore.c +++ b/src/backend/utils/sort/tuplestore.c @@ -43,7 +43,7 @@ * before switching to the other state or activating a different read pointer. * * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION diff --git a/src/backend/utils/time/combocid.c b/src/backend/utils/time/combocid.c index 9626f9810075b..6d67d38f252fb 100644 --- a/src/backend/utils/time/combocid.c +++ b/src/backend/utils/time/combocid.c @@ -30,7 +30,7 @@ * destroyed at the end of each transaction. * * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION diff --git a/src/backend/utils/time/snapmgr.c b/src/backend/utils/time/snapmgr.c index 8c41483e87c52..95704265b6785 100644 --- a/src/backend/utils/time/snapmgr.c +++ b/src/backend/utils/time/snapmgr.c @@ -35,7 +35,7 @@ * stack is empty. * * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION @@ -1764,7 +1764,11 @@ TransactionIdLimitedForOldSnapshots(TransactionId recentXmin, Assert(OldSnapshotThresholdActive()); Assert(limit_ts != NULL && limit_xid != NULL); - if (!RelationAllowsEarlyPruning(relation)) + /* + * TestForOldSnapshot() assumes early pruning advances the page LSN, so we + * can't prune early when skipping WAL. + */ + if (!RelationAllowsEarlyPruning(relation) || !RelationNeedsWAL(relation)) return false; ts = GetSnapshotCurrentTimestamp(); diff --git a/src/bin/Makefile b/src/bin/Makefile index 8b870357a14fe..f7573efcd30f2 100644 --- a/src/bin/Makefile +++ b/src/bin/Makefile @@ -2,7 +2,7 @@ # # Makefile for src/bin (client programs) # -# Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group +# Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group # Portions Copyright (c) 1994, Regents of the University of California # # src/bin/Makefile diff --git a/src/bin/initdb/Makefile b/src/bin/initdb/Makefile index 7e2375478081c..a620a5bea061b 100644 --- a/src/bin/initdb/Makefile +++ b/src/bin/initdb/Makefile @@ -2,7 +2,7 @@ # # Makefile for src/bin/initdb # -# Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group +# Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group # Portions Copyright (c) 1994, Regents of the University of California # # src/bin/initdb/Makefile diff --git a/src/bin/initdb/findtimezone.c b/src/bin/initdb/findtimezone.c index 764ead97d34ed..8fe1e910f9688 100644 --- a/src/bin/initdb/findtimezone.c +++ b/src/bin/initdb/findtimezone.c @@ -3,7 +3,7 @@ * findtimezone.c * Functions for determining the default timezone to use. * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * * IDENTIFICATION * src/bin/initdb/findtimezone.c diff --git a/src/bin/initdb/initdb.c b/src/bin/initdb/initdb.c index f994c4216bcdc..62540a1b37d5e 100644 --- a/src/bin/initdb/initdb.c +++ b/src/bin/initdb/initdb.c @@ -38,7 +38,7 @@ * * This code is released under the terms of the PostgreSQL License. * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * src/bin/initdb/initdb.c @@ -139,6 +139,7 @@ static const char *authmethodhost = NULL; static const char *authmethodlocal = NULL; static bool debug = false; static bool noclean = false; +static bool noinstructions = false; static bool do_sync = true; static bool sync_only = false; static bool show_setting = false; @@ -158,6 +159,7 @@ static char *conf_file; static char *dictionary_file; static char *info_schema_file; static char *features_file; +static char *system_constraints_file; static char *system_views_file; static bool success = false; static bool made_new_pgdata = false; @@ -250,10 +252,9 @@ static void bootstrap_template1(void); static void setup_auth(FILE *cmdfd); static void get_su_pwd(void); static void setup_depend(FILE *cmdfd); -static void setup_sysviews(FILE *cmdfd); +static void setup_run_file(FILE *cmdfd, const char *filename); static void setup_description(FILE *cmdfd); static void setup_collation(FILE *cmdfd); -static void setup_dictionary(FILE *cmdfd); static void setup_privileges(FILE *cmdfd); static void set_info_version(void); static void setup_schema(FILE *cmdfd); @@ -1599,17 +1600,16 @@ setup_depend(FILE *cmdfd) } /* - * set up system views + * Run external file */ static void -setup_sysviews(FILE *cmdfd) +setup_run_file(FILE *cmdfd, const char *filename) { - char **line; - char **sysviews_setup; + char **lines; - sysviews_setup = readfile(system_views_file); + lines = readfile(filename); - for (line = sysviews_setup; *line != NULL; line++) + for (char **line = lines; *line != NULL; line++) { PG_CMD_PUTS(*line); free(*line); @@ -1617,7 +1617,7 @@ setup_sysviews(FILE *cmdfd) PG_CMD_PUTS("\n\n"); - free(sysviews_setup); + free(lines); } /* @@ -1660,27 +1660,6 @@ setup_collation(FILE *cmdfd) PG_CMD_PUTS("SELECT pg_import_system_collations('pg_catalog');\n\n"); } -/* - * load extra dictionaries (Snowball stemmers) - */ -static void -setup_dictionary(FILE *cmdfd) -{ - char **line; - char **conv_lines; - - conv_lines = readfile(dictionary_file); - for (line = conv_lines; *line != NULL; line++) - { - PG_CMD_PUTS(*line); - free(*line); - } - - PG_CMD_PUTS("\n\n"); - - free(conv_lines); -} - /* * Set up privileges * @@ -1881,20 +1860,7 @@ set_info_version(void) static void setup_schema(FILE *cmdfd) { - char **line; - char **lines; - - lines = readfile(info_schema_file); - - for (line = lines; *line != NULL; line++) - { - PG_CMD_PUTS(*line); - free(*line); - } - - PG_CMD_PUTS("\n\n"); - - free(lines); + setup_run_file(cmdfd, info_schema_file); PG_CMD_PRINTF("UPDATE information_schema.sql_implementation_info " " SET character_value = '%s' " @@ -2275,6 +2241,7 @@ usage(const char *progname) printf(_(" [-D, --pgdata=]DATADIR location for this database cluster\n")); printf(_(" -E, --encoding=ENCODING set default encoding for new databases\n")); printf(_(" -g, --allow-group-access allow group read/execute on data directory\n")); + printf(_(" -k, --data-checksums use data page checksums\n")); printf(_(" --locale=LOCALE set default locale for new databases\n")); printf(_(" --lc-collate=, --lc-ctype=, --lc-messages=LOCALE\n" " --lc-monetary=, --lc-numeric=, --lc-time=LOCALE\n" @@ -2290,10 +2257,10 @@ usage(const char *progname) printf(_(" --wal-segsize=SIZE size of WAL segments, in megabytes\n")); printf(_("\nLess commonly used options:\n")); printf(_(" -d, --debug generate lots of debugging output\n")); - printf(_(" -k, --data-checksums use data page checksums\n")); printf(_(" -L DIRECTORY where to find the input files\n")); printf(_(" -n, --no-clean do not clean up after errors\n")); printf(_(" -N, --no-sync do not wait for changes to be written safely to disk\n")); + printf(_(" --no-instructions do not print instructions for next steps\n")); printf(_(" -s, --show show internal settings\n")); printf(_(" -S, --sync-only only sync data directory\n")); printf(_("\nOther options:\n")); @@ -2355,8 +2322,7 @@ check_need_password(const char *authmethodlocal, const char *authmethodhost) void setup_pgdata(void) { - char *pgdata_get_env, - *pgdata_set_env; + char *pgdata_get_env; if (!pg_data) { @@ -2386,8 +2352,11 @@ setup_pgdata(void) * need quotes otherwise on Windows because paths there are most likely to * have embedded spaces. */ - pgdata_set_env = psprintf("PGDATA=%s", pg_data); - putenv(pgdata_set_env); + if (setenv("PGDATA", pg_data, 1) != 0) + { + pg_log_error("could not set environment"); + exit(1); + } } @@ -2530,6 +2499,7 @@ setup_data_file_paths(void) set_input(&dictionary_file, "snowball_create.sql"); set_input(&info_schema_file, "information_schema.sql"); set_input(&features_file, "sql_features.txt"); + set_input(&system_constraints_file, "system_constraints.sql"); set_input(&system_views_file, "system_views.sql"); if (show_setting || debug) @@ -2891,6 +2861,8 @@ initialize_data_directory(void) setup_auth(cmdfd); + setup_run_file(cmdfd, system_constraints_file); + setup_depend(cmdfd); /* @@ -2898,13 +2870,13 @@ initialize_data_directory(void) * They are all droppable at the whim of the DBA. */ - setup_sysviews(cmdfd); + setup_run_file(cmdfd, system_views_file); setup_description(cmdfd); setup_collation(cmdfd); - setup_dictionary(cmdfd); + setup_run_file(cmdfd, dictionary_file); setup_privileges(cmdfd); @@ -2953,6 +2925,7 @@ main(int argc, char *argv[]) {"no-clean", no_argument, NULL, 'n'}, {"nosync", no_argument, NULL, 'N'}, /* for backwards compatibility */ {"no-sync", no_argument, NULL, 'N'}, + {"no-instructions", no_argument, NULL, 13}, {"sync-only", no_argument, NULL, 'S'}, {"waldir", required_argument, NULL, 'X'}, {"wal-segsize", required_argument, NULL, 12}, @@ -3093,6 +3066,9 @@ main(int argc, char *argv[]) case 12: str_wal_segment_size_mb = pg_strdup(optarg); break; + case 13: + noinstructions = true; + break; case 'g': SetDataDirectoryCreatePerm(PG_DIR_MODE_GROUP); break; @@ -3243,34 +3219,40 @@ main(int argc, char *argv[]) "--auth-local and --auth-host, the next time you run initdb.\n")); } - /* - * Build up a shell command to tell the user how to start the server - */ - start_db_cmd = createPQExpBuffer(); + if (!noinstructions) + { + /* + * Build up a shell command to tell the user how to start the server + */ + start_db_cmd = createPQExpBuffer(); + + /* Get directory specification used to start initdb ... */ + strlcpy(pg_ctl_path, argv[0], sizeof(pg_ctl_path)); + canonicalize_path(pg_ctl_path); + get_parent_directory(pg_ctl_path); + /* ... and tag on pg_ctl instead */ + join_path_components(pg_ctl_path, pg_ctl_path, "pg_ctl"); - /* Get directory specification used to start initdb ... */ - strlcpy(pg_ctl_path, argv[0], sizeof(pg_ctl_path)); - canonicalize_path(pg_ctl_path); - get_parent_directory(pg_ctl_path); - /* ... and tag on pg_ctl instead */ - join_path_components(pg_ctl_path, pg_ctl_path, "pg_ctl"); + /* path to pg_ctl, properly quoted */ + appendShellString(start_db_cmd, pg_ctl_path); - /* path to pg_ctl, properly quoted */ - appendShellString(start_db_cmd, pg_ctl_path); + /* add -D switch, with properly quoted data directory */ + appendPQExpBufferStr(start_db_cmd, " -D "); + appendShellString(start_db_cmd, pgdata_native); - /* add -D switch, with properly quoted data directory */ - appendPQExpBufferStr(start_db_cmd, " -D "); - appendShellString(start_db_cmd, pgdata_native); + /* add suggested -l switch and "start" command */ + /* translator: This is a placeholder in a shell command. */ + appendPQExpBuffer(start_db_cmd, " -l %s start", _("logfile")); - /* add suggested -l switch and "start" command */ - /* translator: This is a placeholder in a shell command. */ - appendPQExpBuffer(start_db_cmd, " -l %s start", _("logfile")); + printf(_("\nSuccess. You can now start the database server using:\n\n" + " %s\n\n"), + start_db_cmd->data); - printf(_("\nSuccess. You can now start the database server using:\n\n" - " %s\n\n"), - start_db_cmd->data); + destroyPQExpBuffer(start_db_cmd); + + printf(_("\nSuccess.\n")); + } - destroyPQExpBuffer(start_db_cmd); success = true; return 0; diff --git a/src/bin/pg_basebackup/Makefile b/src/bin/pg_basebackup/Makefile index 988007c6fdd68..1d861087ad1e7 100644 --- a/src/bin/pg_basebackup/Makefile +++ b/src/bin/pg_basebackup/Makefile @@ -2,7 +2,7 @@ # # Makefile for src/bin/pg_basebackup # -# Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group +# Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group # Portions Copyright (c) 1994, Regents of the University of California # # src/bin/pg_basebackup/Makefile diff --git a/src/bin/pg_basebackup/pg_basebackup.c b/src/bin/pg_basebackup/pg_basebackup.c index 7a5d4562f9461..16d8929b238fd 100644 --- a/src/bin/pg_basebackup/pg_basebackup.c +++ b/src/bin/pg_basebackup/pg_basebackup.c @@ -4,7 +4,7 @@ * * Author: Magnus Hagander * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * * IDENTIFICATION * src/bin/pg_basebackup/pg_basebackup.c diff --git a/src/bin/pg_basebackup/pg_receivewal.c b/src/bin/pg_basebackup/pg_receivewal.c index cddc896390da9..4122d840941b1 100644 --- a/src/bin/pg_basebackup/pg_receivewal.c +++ b/src/bin/pg_basebackup/pg_receivewal.c @@ -5,7 +5,7 @@ * * Author: Magnus Hagander * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * * IDENTIFICATION * src/bin/pg_basebackup/pg_receivewal.c diff --git a/src/bin/pg_basebackup/pg_recvlogical.c b/src/bin/pg_basebackup/pg_recvlogical.c index a4e0d6aeb29c9..553ba7b8f4d02 100644 --- a/src/bin/pg_basebackup/pg_recvlogical.c +++ b/src/bin/pg_basebackup/pg_recvlogical.c @@ -3,7 +3,7 @@ * pg_recvlogical.c - receive data from a logical decoding slot in a streaming * fashion and write it to a local file. * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * * IDENTIFICATION * src/bin/pg_basebackup/pg_recvlogical.c diff --git a/src/bin/pg_basebackup/receivelog.c b/src/bin/pg_basebackup/receivelog.c index dc97c7e89c4d6..4fc050f3a1c81 100644 --- a/src/bin/pg_basebackup/receivelog.c +++ b/src/bin/pg_basebackup/receivelog.c @@ -5,7 +5,7 @@ * * Author: Magnus Hagander * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * * IDENTIFICATION * src/bin/pg_basebackup/receivelog.c diff --git a/src/bin/pg_basebackup/receivelog.h b/src/bin/pg_basebackup/receivelog.h index efe7620401a3e..e04333bf81d73 100644 --- a/src/bin/pg_basebackup/receivelog.h +++ b/src/bin/pg_basebackup/receivelog.h @@ -2,7 +2,7 @@ * * receivelog.h * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * * IDENTIFICATION * src/bin/pg_basebackup/receivelog.h diff --git a/src/bin/pg_basebackup/streamutil.c b/src/bin/pg_basebackup/streamutil.c index da577a7f8f485..99daf0e97278d 100644 --- a/src/bin/pg_basebackup/streamutil.c +++ b/src/bin/pg_basebackup/streamutil.c @@ -5,7 +5,7 @@ * * Author: Magnus Hagander * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * * IDENTIFICATION * src/bin/pg_basebackup/streamutil.c diff --git a/src/bin/pg_basebackup/streamutil.h b/src/bin/pg_basebackup/streamutil.h index 57448656e3dfa..10f87ad0c14b3 100644 --- a/src/bin/pg_basebackup/streamutil.h +++ b/src/bin/pg_basebackup/streamutil.h @@ -2,7 +2,7 @@ * * streamutil.h * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * * IDENTIFICATION * src/bin/pg_basebackup/streamutil.h diff --git a/src/bin/pg_basebackup/t/010_pg_basebackup.pl b/src/bin/pg_basebackup/t/010_pg_basebackup.pl index f674a7c94e70e..9eba7d8d7d260 100644 --- a/src/bin/pg_basebackup/t/010_pg_basebackup.pl +++ b/src/bin/pg_basebackup/t/010_pg_basebackup.pl @@ -502,10 +502,10 @@ # create tables to corrupt and get their relfilenodes my $file_corrupt1 = $node->safe_psql('postgres', - q{SELECT a INTO corrupt1 FROM generate_series(1,10000) AS a; ALTER TABLE corrupt1 SET (autovacuum_enabled=false); SELECT pg_relation_filepath('corrupt1')} + q{CREATE TABLE corrupt1 AS SELECT a FROM generate_series(1,10000) AS a; ALTER TABLE corrupt1 SET (autovacuum_enabled=false); SELECT pg_relation_filepath('corrupt1')} ); my $file_corrupt2 = $node->safe_psql('postgres', - q{SELECT b INTO corrupt2 FROM generate_series(1,2) AS b; ALTER TABLE corrupt2 SET (autovacuum_enabled=false); SELECT pg_relation_filepath('corrupt2')} + q{CREATE TABLE corrupt2 AS SELECT b FROM generate_series(1,2) AS b; ALTER TABLE corrupt2 SET (autovacuum_enabled=false); SELECT pg_relation_filepath('corrupt2')} ); # set page header and block sizes diff --git a/src/bin/pg_basebackup/walmethods.c b/src/bin/pg_basebackup/walmethods.c index bd1947d623fef..a15bbb20e7373 100644 --- a/src/bin/pg_basebackup/walmethods.c +++ b/src/bin/pg_basebackup/walmethods.c @@ -5,7 +5,7 @@ * NOTE! The caller must ensure that only one method is instantiated in * any given program, and that it's only instantiated once! * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * * IDENTIFICATION * src/bin/pg_basebackup/walmethods.c diff --git a/src/bin/pg_basebackup/walmethods.h b/src/bin/pg_basebackup/walmethods.h index 9a661c673ccdf..fc4bb52cb742c 100644 --- a/src/bin/pg_basebackup/walmethods.h +++ b/src/bin/pg_basebackup/walmethods.h @@ -2,7 +2,7 @@ * * walmethods.h * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * * IDENTIFICATION * src/bin/pg_basebackup/walmethods.h diff --git a/src/bin/pg_checksums/Makefile b/src/bin/pg_checksums/Makefile index b1cfa5733d61b..ba62406105d1a 100644 --- a/src/bin/pg_checksums/Makefile +++ b/src/bin/pg_checksums/Makefile @@ -2,7 +2,7 @@ # # Makefile for src/bin/pg_checksums # -# Copyright (c) 1998-2020, PostgreSQL Global Development Group +# Copyright (c) 1998-2021, PostgreSQL Global Development Group # # src/bin/pg_checksums/Makefile # diff --git a/src/bin/pg_checksums/pg_checksums.c b/src/bin/pg_checksums/pg_checksums.c index 28aba92a4c3c5..0223ee440829d 100644 --- a/src/bin/pg_checksums/pg_checksums.c +++ b/src/bin/pg_checksums/pg_checksums.c @@ -4,7 +4,7 @@ * Checks, enables or disables page level checksums for an offline * cluster * - * Copyright (c) 2010-2020, PostgreSQL Global Development Group + * Copyright (c) 2010-2021, PostgreSQL Global Development Group * * IDENTIFICATION * src/bin/pg_checksums/pg_checksums.c diff --git a/src/bin/pg_checksums/t/002_actions.pl b/src/bin/pg_checksums/t/002_actions.pl index 4e4934532a30d..8a81f36a067fd 100644 --- a/src/bin/pg_checksums/t/002_actions.pl +++ b/src/bin/pg_checksums/t/002_actions.pl @@ -21,7 +21,7 @@ sub check_relation_corruption $node->safe_psql( 'postgres', - "SELECT a INTO $table FROM generate_series(1,10000) AS a; + "CREATE TABLE $table AS SELECT a FROM generate_series(1,10000) AS a; ALTER TABLE $table SET (autovacuum_enabled=false);"); $node->safe_psql('postgres', diff --git a/src/bin/pg_config/Makefile b/src/bin/pg_config/Makefile index d3b5f1fa7591d..fa60d602460bf 100644 --- a/src/bin/pg_config/Makefile +++ b/src/bin/pg_config/Makefile @@ -2,7 +2,7 @@ # # Makefile for src/bin/pg_config # -# Copyright (c) 1998-2020, PostgreSQL Global Development Group +# Copyright (c) 1998-2021, PostgreSQL Global Development Group # # src/bin/pg_config/Makefile # diff --git a/src/bin/pg_config/pg_config.c b/src/bin/pg_config/pg_config.c index f5410f64187a6..c40bb3282e949 100644 --- a/src/bin/pg_config/pg_config.c +++ b/src/bin/pg_config/pg_config.c @@ -15,7 +15,7 @@ * * This code is released under the terms of the PostgreSQL License. * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * * src/bin/pg_config/pg_config.c * diff --git a/src/bin/pg_controldata/Makefile b/src/bin/pg_controldata/Makefile index 76b330dc1f9ea..c5405b8a080d5 100644 --- a/src/bin/pg_controldata/Makefile +++ b/src/bin/pg_controldata/Makefile @@ -2,7 +2,7 @@ # # Makefile for src/bin/pg_controldata # -# Copyright (c) 1998-2020, PostgreSQL Global Development Group +# Copyright (c) 1998-2021, PostgreSQL Global Development Group # # src/bin/pg_controldata/Makefile # diff --git a/src/bin/pg_ctl/Makefile b/src/bin/pg_ctl/Makefile index 14602c1185129..5d5f5372a3f06 100644 --- a/src/bin/pg_ctl/Makefile +++ b/src/bin/pg_ctl/Makefile @@ -2,7 +2,7 @@ # # Makefile for src/bin/pg_ctl # -# Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group +# Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group # Portions Copyright (c) 1994, Regents of the University of California # # src/bin/pg_ctl/Makefile diff --git a/src/bin/pg_ctl/pg_ctl.c b/src/bin/pg_ctl/pg_ctl.c index fc07f1aba6e77..7985da0a94302 100644 --- a/src/bin/pg_ctl/pg_ctl.c +++ b/src/bin/pg_ctl/pg_ctl.c @@ -2,7 +2,7 @@ * * pg_ctl --- start/stops/restarts the PostgreSQL server * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * * src/bin/pg_ctl/pg_ctl.c * @@ -889,11 +889,10 @@ do_start(void) */ #ifndef WIN32 { - static char env_var[32]; + char env_var[32]; - snprintf(env_var, sizeof(env_var), "PG_GRANDPARENT_PID=%d", - (int) getppid()); - putenv(env_var); + snprintf(env_var, sizeof(env_var), "%d", (int) getppid()); + setenv("PG_GRANDPARENT_PID", env_var, 1); } #endif @@ -2340,12 +2339,10 @@ main(int argc, char **argv) case 'D': { char *pgdata_D; - char *env_var; pgdata_D = pg_strdup(optarg); canonicalize_path(pgdata_D); - env_var = psprintf("PGDATA=%s", pgdata_D); - putenv(env_var); + setenv("PGDATA", pgdata_D, 1); /* * We could pass PGDATA just in an environment @@ -2353,6 +2350,7 @@ main(int argc, char **argv) * 'ps' display */ pgdata_opt = psprintf("-D \"%s\" ", pgdata_D); + free(pgdata_D); break; } case 'e': diff --git a/src/bin/pg_dump/Makefile b/src/bin/pg_dump/Makefile index 2532d9183a665..38988eccc0da1 100644 --- a/src/bin/pg_dump/Makefile +++ b/src/bin/pg_dump/Makefile @@ -2,7 +2,7 @@ # # Makefile for src/bin/pg_dump # -# Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group +# Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group # Portions Copyright (c) 1994, Regents of the University of California # # src/bin/pg_dump/Makefile diff --git a/src/bin/pg_dump/common.c b/src/bin/pg_dump/common.c index 634ca86cfb788..1a261a5545668 100644 --- a/src/bin/pg_dump/common.c +++ b/src/bin/pg_dump/common.c @@ -4,7 +4,7 @@ * Catalog routines used by pg_dump; long ago these were shared * by another dump tool, but not anymore. * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * @@ -52,6 +52,7 @@ static DumpableObject **oprinfoindex; static DumpableObject **collinfoindex; static DumpableObject **nspinfoindex; static DumpableObject **extinfoindex; +static DumpableObject **pubinfoindex; static int numTables; static int numTypes; static int numFuncs; @@ -59,6 +60,7 @@ static int numOperators; static int numCollations; static int numNamespaces; static int numExtensions; +static int numPublications; /* This is an array of object identities, not actual DumpableObjects */ static ExtensionMemberId *extmembers; @@ -93,6 +95,7 @@ getSchemaData(Archive *fout, int *numTablesPtr) CollInfo *collinfo; NamespaceInfo *nspinfo; ExtensionInfo *extinfo; + PublicationInfo *pubinfo; InhInfo *inhinfo; int numAggregates; int numInherits; @@ -247,7 +250,9 @@ getSchemaData(Archive *fout, int *numTablesPtr) getPolicies(fout, tblinfo, numTables); pg_log_info("reading publications"); - getPublications(fout); + pubinfo = getPublications(fout, &numPublications); + pubinfoindex = buildIndexArray(pubinfo, numPublications, + sizeof(PublicationInfo)); pg_log_info("reading publication membership"); getPublicationTables(fout, tblinfo, numTables); @@ -261,7 +266,9 @@ getSchemaData(Archive *fout, int *numTablesPtr) /* flagInhTables - * Fill in parent link fields of tables for which we need that information, - * and mark parents of target tables as interesting + * mark parents of target tables as interesting, and create + * TableAttachInfo objects for partitioned tables with appropriate + * dependency links. * * Note that only direct ancestors of targets are marked interesting. * This is sufficient; we don't much care whether they inherited their @@ -320,6 +327,40 @@ flagInhTables(Archive *fout, TableInfo *tblinfo, int numTables, for (j = 0; j < numParents; j++) parents[j]->interesting = true; } + + /* Create TableAttachInfo object if needed */ + if (tblinfo[i].dobj.dump && tblinfo[i].ispartition) + { + TableAttachInfo *attachinfo; + + /* With partitions there can only be one parent */ + if (tblinfo[i].numParents != 1) + fatal("invalid number of parents %d for table \"%s\"", + tblinfo[i].numParents, + tblinfo[i].dobj.name); + + attachinfo = (TableAttachInfo *) palloc(sizeof(TableAttachInfo)); + attachinfo->dobj.objType = DO_TABLE_ATTACH; + attachinfo->dobj.catId.tableoid = 0; + attachinfo->dobj.catId.oid = 0; + AssignDumpId(&attachinfo->dobj); + attachinfo->dobj.name = pg_strdup(tblinfo[i].dobj.name); + attachinfo->dobj.namespace = tblinfo[i].dobj.namespace; + attachinfo->parentTbl = tblinfo[i].parents[0]; + attachinfo->partitionTbl = &tblinfo[i]; + + /* + * We must state the DO_TABLE_ATTACH object's dependencies + * explicitly, since it will not match anything in pg_depend. + * + * Give it dependencies on both the partition table and the parent + * table, so that it will not be executed till both of those + * exist. (There's no need to care what order those are created + * in.) + */ + addObjectDependency(&attachinfo->dobj, tblinfo[i].dobj.dumpId); + addObjectDependency(&attachinfo->dobj, tblinfo[i].parents[0]->dobj.dumpId); + } } } @@ -426,12 +467,22 @@ flagInhIndexes(Archive *fout, TableInfo tblinfo[], int numTables) /* flagInhAttrs - * for each dumpable table in tblinfo, flag its inherited attributes * - * What we need to do here is detect child columns that inherit NOT NULL - * bits from their parents (so that we needn't specify that again for the - * child) and child columns that have DEFAULT NULL when their parents had - * some non-null default. In the latter case, we make up a dummy AttrDefInfo - * object so that we'll correctly emit the necessary DEFAULT NULL clause; - * otherwise the backend will apply an inherited default to the column. + * What we need to do here is: + * + * - Detect child columns that inherit NOT NULL bits from their parents, so + * that we needn't specify that again for the child. + * + * - Detect child columns that have DEFAULT NULL when their parents had some + * non-null default. In this case, we make up a dummy AttrDefInfo object so + * that we'll correctly emit the necessary DEFAULT NULL clause; otherwise + * the backend will apply an inherited default to the column. + * + * - Detect child columns that have a generation expression when their parents + * also have one. Generation expressions are always inherited, so there is + * no need to set them again in child tables, and there is no syntax for it + * either. (Exception: In binary upgrade mode we dump them because + * inherited tables are recreated standalone first and then reattached to + * the parent.) * * modifies tblinfo */ @@ -469,6 +520,7 @@ flagInhAttrs(DumpOptions *dopt, TableInfo *tblinfo, int numTables) { bool foundNotNull; /* Attr was NOT NULL in a parent */ bool foundDefault; /* Found a default in a parent */ + bool foundGenerated; /* Found a generated in a parent */ /* no point in examining dropped columns */ if (tbinfo->attisdropped[j]) @@ -476,6 +528,7 @@ flagInhAttrs(DumpOptions *dopt, TableInfo *tblinfo, int numTables) foundNotNull = false; foundDefault = false; + foundGenerated = false; for (k = 0; k < numParents; k++) { TableInfo *parent = parents[k]; @@ -487,7 +540,8 @@ flagInhAttrs(DumpOptions *dopt, TableInfo *tblinfo, int numTables) if (inhAttrInd >= 0) { foundNotNull |= parent->notnull[inhAttrInd]; - foundDefault |= (parent->attrdefs[inhAttrInd] != NULL); + foundDefault |= (parent->attrdefs[inhAttrInd] != NULL && !parent->attgenerated[inhAttrInd]); + foundGenerated |= parent->attgenerated[inhAttrInd]; } } @@ -529,6 +583,10 @@ flagInhAttrs(DumpOptions *dopt, TableInfo *tblinfo, int numTables) tbinfo->attrdefs[j] = attrDef; } + + /* Remove generation expression from child */ + if (foundGenerated && !dopt->binary_upgrade) + tbinfo->attrdefs[j] = NULL; } } } @@ -548,6 +606,7 @@ AssignDumpId(DumpableObject *dobj) dobj->name = NULL; /* must be set later */ dobj->namespace = NULL; /* may be set later */ dobj->dump = DUMP_COMPONENT_ALL; /* default assumption */ + dobj->dump_contains = DUMP_COMPONENT_ALL; /* default assumption */ dobj->ext_member = false; /* default assumption */ dobj->depends_on_ext = false; /* default assumption */ dobj->dependencies = NULL; @@ -900,6 +959,17 @@ findExtensionByOid(Oid oid) return (ExtensionInfo *) findObjectByOid(oid, extinfoindex, numExtensions); } +/* + * findPublicationByOid + * finds the entry (in pubinfo) of the publication with the given oid + * returns NULL if not found + */ +PublicationInfo * +findPublicationByOid(Oid oid) +{ + return (PublicationInfo *) findObjectByOid(oid, pubinfoindex, numPublications); +} + /* * findIndexByOid * find the entry of the index with the given oid diff --git a/src/bin/pg_dump/compress_io.c b/src/bin/pg_dump/compress_io.c index 1417401086a67..808df19495543 100644 --- a/src/bin/pg_dump/compress_io.c +++ b/src/bin/pg_dump/compress_io.c @@ -4,7 +4,7 @@ * Routines for archivers to write an uncompressed or compressed data * stream. * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * This file includes two APIs for dealing with compressed data. The first diff --git a/src/bin/pg_dump/compress_io.h b/src/bin/pg_dump/compress_io.h index d2e6e1b85480e..1eafbd8456686 100644 --- a/src/bin/pg_dump/compress_io.h +++ b/src/bin/pg_dump/compress_io.h @@ -3,7 +3,7 @@ * compress_io.h * Interface to compress_io.c routines * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION diff --git a/src/bin/pg_dump/dumputils.c b/src/bin/pg_dump/dumputils.c index 536c9ffec8cde..60d306e7c3a32 100644 --- a/src/bin/pg_dump/dumputils.c +++ b/src/bin/pg_dump/dumputils.c @@ -5,7 +5,7 @@ * Basically this is stuff that is useful in both pg_dump and pg_dumpall. * * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * src/bin/pg_dump/dumputils.c @@ -168,48 +168,28 @@ buildACLCommands(const char *name, const char *subname, const char *nspname, for (i = 0; i < nraclitems; i++) { if (!parseAclItem(raclitems[i], type, name, subname, remoteVersion, - grantee, grantor, privs, privswgo)) + grantee, grantor, privs, NULL)) { ok = false; break; } - if (privs->len > 0 || privswgo->len > 0) + if (privs->len > 0) { - if (privs->len > 0) - { - appendPQExpBuffer(firstsql, "%sREVOKE %s ON %s ", - prefix, privs->data, type); - if (nspname && *nspname) - appendPQExpBuffer(firstsql, "%s.", fmtId(nspname)); - appendPQExpBuffer(firstsql, "%s FROM ", name); - if (grantee->len == 0) - appendPQExpBufferStr(firstsql, "PUBLIC;\n"); - else if (strncmp(grantee->data, "group ", - strlen("group ")) == 0) - appendPQExpBuffer(firstsql, "GROUP %s;\n", - fmtId(grantee->data + strlen("group "))); - else - appendPQExpBuffer(firstsql, "%s;\n", - fmtId(grantee->data)); - } - if (privswgo->len > 0) - { - appendPQExpBuffer(firstsql, - "%sREVOKE GRANT OPTION FOR %s ON %s ", - prefix, privswgo->data, type); - if (nspname && *nspname) - appendPQExpBuffer(firstsql, "%s.", fmtId(nspname)); - appendPQExpBuffer(firstsql, "%s FROM ", name); - if (grantee->len == 0) - appendPQExpBufferStr(firstsql, "PUBLIC"); - else if (strncmp(grantee->data, "group ", - strlen("group ")) == 0) - appendPQExpBuffer(firstsql, "GROUP %s", - fmtId(grantee->data + strlen("group "))); - else - appendPQExpBufferStr(firstsql, fmtId(grantee->data)); - } + appendPQExpBuffer(firstsql, "%sREVOKE %s ON %s ", + prefix, privs->data, type); + if (nspname && *nspname) + appendPQExpBuffer(firstsql, "%s.", fmtId(nspname)); + appendPQExpBuffer(firstsql, "%s FROM ", name); + if (grantee->len == 0) + appendPQExpBufferStr(firstsql, "PUBLIC;\n"); + else if (strncmp(grantee->data, "group ", + strlen("group ")) == 0) + appendPQExpBuffer(firstsql, "GROUP %s;\n", + fmtId(grantee->data + strlen("group "))); + else + appendPQExpBuffer(firstsql, "%s;\n", + fmtId(grantee->data)); } } } @@ -462,8 +442,11 @@ buildDefaultACLCommands(const char *type, const char *nspname, * The returned grantee string will be the dequoted username or groupname * (preceded with "group " in the latter case). Note that a grant to PUBLIC * is represented by an empty grantee string. The returned grantor is the - * dequoted grantor name. Privilege characters are decoded and split between - * privileges with grant option (privswgo) and without (privs). + * dequoted grantor name. Privilege characters are translated to GRANT/REVOKE + * comma-separated privileges lists. If "privswgo" is non-NULL, the result is + * separate lists for privileges with grant option ("privswgo") and without + * ("privs"). Otherwise, "privs" bears every relevant privilege, ignoring the + * grant option distinction. * * Note: for cross-version compatibility, it's important to use ALL to * represent the privilege sets whenever appropriate. @@ -514,7 +497,7 @@ parseAclItem(const char *item, const char *type, do { \ if ((pos = strchr(eqpos + 1, code))) \ { \ - if (*(pos + 1) == '*') \ + if (*(pos + 1) == '*' && privswgo != NULL) \ { \ AddAcl(privswgo, keywd, subname); \ all_without_go = false; \ diff --git a/src/bin/pg_dump/dumputils.h b/src/bin/pg_dump/dumputils.h index d35d9d34d28c1..6e97da7487ef1 100644 --- a/src/bin/pg_dump/dumputils.h +++ b/src/bin/pg_dump/dumputils.h @@ -5,7 +5,7 @@ * Basically this is stuff that is useful in both pg_dump and pg_dumpall. * * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * src/bin/pg_dump/dumputils.h diff --git a/src/bin/pg_dump/parallel.c b/src/bin/pg_dump/parallel.c index b51cc76c7dc21..c7351a43fde2d 100644 --- a/src/bin/pg_dump/parallel.c +++ b/src/bin/pg_dump/parallel.c @@ -4,7 +4,7 @@ * * Parallel support for pg_dump and pg_restore * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION diff --git a/src/bin/pg_dump/parallel.h b/src/bin/pg_dump/parallel.h index a2e98cb87bf01..0fbf736c811cc 100644 --- a/src/bin/pg_dump/parallel.h +++ b/src/bin/pg_dump/parallel.h @@ -4,7 +4,7 @@ * * Parallel support for pg_dump and pg_restore * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION diff --git a/src/bin/pg_dump/pg_backup_db.c b/src/bin/pg_dump/pg_backup_db.c index 5ba43441f50aa..2856c16e853db 100644 --- a/src/bin/pg_dump/pg_backup_db.c +++ b/src/bin/pg_dump/pg_backup_db.c @@ -188,12 +188,10 @@ ConnectDatabase(Archive *AHX, if (PQstatus(AH->connection) == CONNECTION_BAD) { if (isReconnect) - fatal("reconnection to database \"%s\" failed: %s", - PQdb(AH->connection) ? PQdb(AH->connection) : "", + fatal("reconnection failed: %s", PQerrorMessage(AH->connection)); else - fatal("connection to database \"%s\" failed: %s", - PQdb(AH->connection) ? PQdb(AH->connection) : "", + fatal("%s", PQerrorMessage(AH->connection)); } diff --git a/src/bin/pg_dump/pg_backup_directory.c b/src/bin/pg_dump/pg_backup_directory.c index 650b542fce15d..fb8c7713a508a 100644 --- a/src/bin/pg_dump/pg_backup_directory.c +++ b/src/bin/pg_dump/pg_backup_directory.c @@ -17,7 +17,7 @@ * sync. * * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * Portions Copyright (c) 2000, Philip Warner * diff --git a/src/bin/pg_dump/pg_backup_utils.c b/src/bin/pg_dump/pg_backup_utils.c index 5729a20a84af3..c709a40e06d5c 100644 --- a/src/bin/pg_dump/pg_backup_utils.c +++ b/src/bin/pg_dump/pg_backup_utils.c @@ -4,7 +4,7 @@ * Utility routines shared by pg_dump and pg_restore * * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * src/bin/pg_dump/pg_backup_utils.c diff --git a/src/bin/pg_dump/pg_backup_utils.h b/src/bin/pg_dump/pg_backup_utils.h index 2bea167a69bc5..306798f9ac979 100644 --- a/src/bin/pg_dump/pg_backup_utils.h +++ b/src/bin/pg_dump/pg_backup_utils.h @@ -4,7 +4,7 @@ * Utility routines shared by pg_dump and pg_restore. * * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * src/bin/pg_dump/pg_backup_utils.h diff --git a/src/bin/pg_dump/pg_dump.c b/src/bin/pg_dump/pg_dump.c index 1ab98a2286e2a..d99b61e6215f1 100644 --- a/src/bin/pg_dump/pg_dump.c +++ b/src/bin/pg_dump/pg_dump.c @@ -4,7 +4,7 @@ * pg_dump is a utility for dumping out a postgres database * into a script file. * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * pg_dump will read the system catalogs in a database and dump out a @@ -202,6 +202,7 @@ static void dumpTrigger(Archive *fout, TriggerInfo *tginfo); static void dumpEventTrigger(Archive *fout, EventTriggerInfo *evtinfo); static void dumpTable(Archive *fout, TableInfo *tbinfo); static void dumpTableSchema(Archive *fout, TableInfo *tbinfo); +static void dumpTableAttach(Archive *fout, TableAttachInfo *tbinfo); static void dumpAttrDef(Archive *fout, AttrDefInfo *adinfo); static void dumpSequence(Archive *fout, TableInfo *tbinfo); static void dumpSequenceData(Archive *fout, TableDataInfo *tdinfo); @@ -289,7 +290,7 @@ static const char *fmtCopyColumnList(const TableInfo *ti, PQExpBuffer buffer); static bool nonemptyReloptions(const char *reloptions); static void appendIndexCollationVersion(PQExpBuffer buffer, IndxInfo *indxinfo, int enc, bool coll_unknown, - Archive *fount); + Archive *fout); static void appendReloptionsArrayAH(PQExpBuffer buffer, const char *reloptions, const char *prefix, Archive *fout); static char *get_synchronized_snapshot(Archive *fout); @@ -3864,8 +3865,8 @@ dumpPolicy(Archive *fout, PolicyInfo *polinfo) * getPublications * get information about publications */ -void -getPublications(Archive *fout) +PublicationInfo * +getPublications(Archive *fout, int *numPublications) { DumpOptions *dopt = fout->dopt; PQExpBuffer query; @@ -3885,7 +3886,10 @@ getPublications(Archive *fout) ntups; if (dopt->no_publications || fout->remoteVersion < 100000) - return; + { + *numPublications = 0; + return NULL; + } query = createPQExpBuffer(); @@ -3963,6 +3967,9 @@ getPublications(Archive *fout) PQclear(res); destroyPQExpBuffer(query); + + *numPublications = ntups; + return pubinfo; } /* @@ -4071,7 +4078,8 @@ getPublicationTables(Archive *fout, TableInfo tblinfo[], int numTables) DumpOptions *dopt = fout->dopt; int i_tableoid; int i_oid; - int i_pubname; + int i_prpubid; + int i_prrelid; int i, j, ntups; @@ -4081,15 +4089,39 @@ getPublicationTables(Archive *fout, TableInfo tblinfo[], int numTables) query = createPQExpBuffer(); - for (i = 0; i < numTables; i++) + /* Collect all publication membership info. */ + appendPQExpBufferStr(query, + "SELECT tableoid, oid, prpubid, prrelid " + "FROM pg_catalog.pg_publication_rel"); + res = ExecuteSqlQuery(fout, query->data, PGRES_TUPLES_OK); + + ntups = PQntuples(res); + + i_tableoid = PQfnumber(res, "tableoid"); + i_oid = PQfnumber(res, "oid"); + i_prpubid = PQfnumber(res, "prpubid"); + i_prrelid = PQfnumber(res, "prrelid"); + + /* this allocation may be more than we need */ + pubrinfo = pg_malloc(ntups * sizeof(PublicationRelInfo)); + j = 0; + + for (i = 0; i < ntups; i++) { - TableInfo *tbinfo = &tblinfo[i]; + Oid prpubid = atooid(PQgetvalue(res, i, i_prpubid)); + Oid prrelid = atooid(PQgetvalue(res, i, i_prrelid)); + PublicationInfo *pubinfo; + TableInfo *tbinfo; /* - * Only regular and partitioned tables can be added to publications. + * Ignore any entries for which we aren't interested in either the + * publication or the rel. */ - if (tbinfo->relkind != RELKIND_RELATION && - tbinfo->relkind != RELKIND_PARTITIONED_TABLE) + pubinfo = findPublicationByOid(prpubid); + if (pubinfo == NULL) + continue; + tbinfo = findTableByOid(prrelid); + if (tbinfo == NULL) continue; /* @@ -4099,55 +4131,24 @@ getPublicationTables(Archive *fout, TableInfo tblinfo[], int numTables) if (!(tbinfo->dobj.dump & DUMP_COMPONENT_DEFINITION)) continue; - pg_log_info("reading publication membership for table \"%s.%s\"", - tbinfo->dobj.namespace->dobj.name, - tbinfo->dobj.name); - - resetPQExpBuffer(query); - - /* Get the publication membership for the table. */ - appendPQExpBuffer(query, - "SELECT pr.tableoid, pr.oid, p.pubname " - "FROM pg_publication_rel pr, pg_publication p " - "WHERE pr.prrelid = '%u'" - " AND p.oid = pr.prpubid", - tbinfo->dobj.catId.oid); - res = ExecuteSqlQuery(fout, query->data, PGRES_TUPLES_OK); - - ntups = PQntuples(res); - - if (ntups == 0) - { - /* - * Table is not member of any publications. Clean up and return. - */ - PQclear(res); - continue; - } - - i_tableoid = PQfnumber(res, "tableoid"); - i_oid = PQfnumber(res, "oid"); - i_pubname = PQfnumber(res, "pubname"); + /* OK, make a DumpableObject for this relationship */ + pubrinfo[j].dobj.objType = DO_PUBLICATION_REL; + pubrinfo[j].dobj.catId.tableoid = + atooid(PQgetvalue(res, i, i_tableoid)); + pubrinfo[j].dobj.catId.oid = atooid(PQgetvalue(res, i, i_oid)); + AssignDumpId(&pubrinfo[j].dobj); + pubrinfo[j].dobj.namespace = tbinfo->dobj.namespace; + pubrinfo[j].dobj.name = tbinfo->dobj.name; + pubrinfo[j].publication = pubinfo; + pubrinfo[j].pubtable = tbinfo; - pubrinfo = pg_malloc(ntups * sizeof(PublicationRelInfo)); + /* Decide whether we want to dump it */ + selectDumpablePublicationTable(&(pubrinfo[j].dobj), fout); - for (j = 0; j < ntups; j++) - { - pubrinfo[j].dobj.objType = DO_PUBLICATION_REL; - pubrinfo[j].dobj.catId.tableoid = - atooid(PQgetvalue(res, j, i_tableoid)); - pubrinfo[j].dobj.catId.oid = atooid(PQgetvalue(res, j, i_oid)); - AssignDumpId(&pubrinfo[j].dobj); - pubrinfo[j].dobj.namespace = tbinfo->dobj.namespace; - pubrinfo[j].dobj.name = tbinfo->dobj.name; - pubrinfo[j].pubname = pg_strdup(PQgetvalue(res, j, i_pubname)); - pubrinfo[j].pubtable = tbinfo; - - /* Decide whether we want to dump it */ - selectDumpablePublicationTable(&(pubrinfo[j].dobj), fout); - } - PQclear(res); + j++; } + + PQclear(res); destroyPQExpBuffer(query); } @@ -4158,6 +4159,7 @@ getPublicationTables(Archive *fout, TableInfo tblinfo[], int numTables) static void dumpPublicationTable(Archive *fout, PublicationRelInfo *pubrinfo) { + PublicationInfo *pubinfo = pubrinfo->publication; TableInfo *tbinfo = pubrinfo->pubtable; PQExpBuffer query; char *tag; @@ -4165,22 +4167,26 @@ dumpPublicationTable(Archive *fout, PublicationRelInfo *pubrinfo) if (!(pubrinfo->dobj.dump & DUMP_COMPONENT_DEFINITION)) return; - tag = psprintf("%s %s", pubrinfo->pubname, tbinfo->dobj.name); + tag = psprintf("%s %s", pubinfo->dobj.name, tbinfo->dobj.name); query = createPQExpBuffer(); appendPQExpBuffer(query, "ALTER PUBLICATION %s ADD TABLE ONLY", - fmtId(pubrinfo->pubname)); + fmtId(pubinfo->dobj.name)); appendPQExpBuffer(query, " %s;\n", fmtQualifiedDumpable(tbinfo)); /* - * There is no point in creating drop query as the drop is done by table - * drop. + * There is no point in creating a drop query as the drop is done by table + * drop. (If you think to change this, see also _printTocEntry().) + * Although this object doesn't really have ownership as such, set the + * owner field anyway to ensure that the command is run by the correct + * role at restore time. */ ArchiveEntry(fout, pubrinfo->dobj.catId, pubrinfo->dobj.dumpId, ARCHIVE_OPTS(.tag = tag, .namespace = tbinfo->dobj.namespace->dobj.name, + .owner = pubinfo->rolname, .description = "PUBLICATION TABLE", .section = SECTION_POST_DATA, .createStmt = query->data)); @@ -8833,13 +8839,37 @@ getTableAttrs(Archive *fout, TableInfo *tblinfo, int numTables) attrdefs[j].dobj.dump = tbinfo->dobj.dump; /* - * Defaults on a VIEW must always be dumped as separate ALTER - * TABLE commands. Defaults on regular tables are dumped as - * part of the CREATE TABLE if possible, which it won't be if - * the column is not going to be emitted explicitly. + * Figure out whether the default/generation expression should + * be dumped as part of the main CREATE TABLE (or similar) + * command or as a separate ALTER TABLE (or similar) command. + * The preference is to put it into the CREATE command, but in + * some cases that's not possible. */ - if (tbinfo->relkind == RELKIND_VIEW) + if (tbinfo->attgenerated[adnum - 1]) + { + /* + * Column generation expressions cannot be dumped + * separately, because there is no syntax for it. The + * !shouldPrintColumn case below will be tempted to set + * them to separate if they are attached to an inherited + * column without a local definition, but that would be + * wrong and unnecessary, because generation expressions + * are always inherited, so there is no need to set them + * again in child tables, and there is no syntax for it + * either. By setting separate to false here we prevent + * the "default" from being processed as its own dumpable + * object, and flagInhAttrs() will remove it from the + * table when it detects that it belongs to an inherited + * column. + */ + attrdefs[j].separate = false; + } + else if (tbinfo->relkind == RELKIND_VIEW) { + /* + * Defaults on a VIEW must always be dumped as separate + * ALTER TABLE commands. + */ attrdefs[j].separate = true; } else if (!shouldPrintColumn(dopt, tbinfo, adnum - 1)) @@ -8850,7 +8880,10 @@ getTableAttrs(Archive *fout, TableInfo *tblinfo, int numTables) else { attrdefs[j].separate = false; + } + if (!attrdefs[j].separate) + { /* * Mark the default as needing to appear before the table, * so that any dependencies it has must be emitted before @@ -10176,6 +10209,9 @@ dumpDumpableObject(Archive *fout, DumpableObject *dobj) case DO_TABLE: dumpTable(fout, (TableInfo *) dobj); break; + case DO_TABLE_ATTACH: + dumpTableAttach(fout, (TableAttachInfo *) dobj); + break; case DO_ATTRDEF: dumpAttrDef(fout, (AttrDefInfo *) dobj); break; @@ -11183,7 +11219,7 @@ dumpDomain(Archive *fout, TypeInfo *tyinfo) if (dopt->binary_upgrade) binary_upgrade_set_type_oids_by_type_oid(fout, q, tyinfo->dobj.catId.oid, - true, /* force array type */ + true, /* force array type */ false); /* force multirange type */ qtypname = pg_strdup(fmtId(tyinfo->dobj.name)); @@ -16133,27 +16169,6 @@ dumpTableSchema(Archive *fout, TableInfo *tbinfo) } } - /* - * For partitioned tables, emit the ATTACH PARTITION clause. Note - * that we always want to create partitions this way instead of using - * CREATE TABLE .. PARTITION OF, mainly to preserve a possible column - * layout discrepancy with the parent, but also to ensure it gets the - * correct tablespace setting if it differs from the parent's. - */ - if (tbinfo->ispartition) - { - /* With partitions there can only be one parent */ - if (tbinfo->numParents != 1) - fatal("invalid number of parents %d for table \"%s\"", - tbinfo->numParents, tbinfo->dobj.name); - - /* Perform ALTER TABLE on the parent */ - appendPQExpBuffer(q, - "ALTER TABLE ONLY %s ATTACH PARTITION %s %s;\n", - fmtQualifiedDumpable(parents[0]), - qualrelname, tbinfo->partbound); - } - /* * In binary_upgrade mode, arrange to restore the old relfrozenxid and * relminmxid of all vacuumable relations. (While vacuum.c processes @@ -16383,6 +16398,62 @@ dumpTableSchema(Archive *fout, TableInfo *tbinfo) free(qualrelname); } +/* + * dumpTableAttach + * write to fout the commands to attach a child partition + * + * Child partitions are always made by creating them separately + * and then using ATTACH PARTITION, rather than using + * CREATE TABLE ... PARTITION OF. This is important for preserving + * any possible discrepancy in column layout, to allow assigning the + * correct tablespace if different, and so that it's possible to restore + * a partition without restoring its parent. (You'll get an error from + * the ATTACH PARTITION command, but that can be ignored, or skipped + * using "pg_restore -L" if you prefer.) The last point motivates + * treating ATTACH PARTITION as a completely separate ArchiveEntry + * rather than emitting it within the child partition's ArchiveEntry. + */ +static void +dumpTableAttach(Archive *fout, TableAttachInfo *attachinfo) +{ + DumpOptions *dopt = fout->dopt; + PQExpBuffer q; + + if (dopt->dataOnly) + return; + + if (!(attachinfo->partitionTbl->dobj.dump & DUMP_COMPONENT_DEFINITION)) + return; + + q = createPQExpBuffer(); + + /* Perform ALTER TABLE on the parent */ + appendPQExpBuffer(q, + "ALTER TABLE ONLY %s ", + fmtQualifiedDumpable(attachinfo->parentTbl)); + appendPQExpBuffer(q, + "ATTACH PARTITION %s %s;\n", + fmtQualifiedDumpable(attachinfo->partitionTbl), + attachinfo->partitionTbl->partbound); + + /* + * There is no point in creating a drop query as the drop is done by table + * drop. (If you think to change this, see also _printTocEntry().) + * Although this object doesn't really have ownership as such, set the + * owner field anyway to ensure that the command is run by the correct + * role at restore time. + */ + ArchiveEntry(fout, attachinfo->dobj.catId, attachinfo->dobj.dumpId, + ARCHIVE_OPTS(.tag = attachinfo->dobj.name, + .namespace = attachinfo->dobj.namespace->dobj.name, + .owner = attachinfo->partitionTbl->rolname, + .description = "TABLE ATTACH", + .section = SECTION_PRE_DATA, + .createStmt = q->data)); + + destroyPQExpBuffer(q); +} + /* * dumpAttrDef --- dump an attribute's default-value declaration */ @@ -16653,9 +16724,17 @@ dumpIndexAttach(Archive *fout, IndexAttachInfo *attachinfo) appendPQExpBuffer(q, "ATTACH PARTITION %s;\n", fmtQualifiedDumpable(attachinfo->partitionIdx)); + /* + * There is no point in creating a drop query as the drop is done by + * index drop. (If you think to change this, see also + * _printTocEntry().) Although this object doesn't really have + * ownership as such, set the owner field anyway to ensure that the + * command is run by the correct role at restore time. + */ ArchiveEntry(fout, attachinfo->dobj.catId, attachinfo->dobj.dumpId, ARCHIVE_OPTS(.tag = attachinfo->dobj.name, .namespace = attachinfo->dobj.namespace->dobj.name, + .owner = attachinfo->parentIdx->indextable->rolname, .description = "INDEX ATTACH", .section = SECTION_POST_DATA, .createStmt = q->data)); @@ -18344,6 +18423,7 @@ addBoundaryDependencies(DumpableObject **dobjs, int numObjs, case DO_COLLATION: case DO_CONVERSION: case DO_TABLE: + case DO_TABLE_ATTACH: case DO_ATTRDEF: case DO_PROCLANG: case DO_CAST: diff --git a/src/bin/pg_dump/pg_dump.h b/src/bin/pg_dump/pg_dump.h index d7f77f1d3e005..1290f9659b853 100644 --- a/src/bin/pg_dump/pg_dump.h +++ b/src/bin/pg_dump/pg_dump.h @@ -3,7 +3,7 @@ * pg_dump.h * Common header file for the pg_dump utility * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * src/bin/pg_dump/pg_dump.h @@ -50,6 +50,7 @@ typedef enum DO_COLLATION, DO_CONVERSION, DO_TABLE, + DO_TABLE_ATTACH, DO_ATTRDEF, DO_INDEX, DO_INDEX_ATTACH, @@ -338,6 +339,13 @@ typedef struct _tableInfo struct _triggerInfo *triggers; /* array of TriggerInfo structs */ } TableInfo; +typedef struct _tableAttachInfo +{ + DumpableObject dobj; + TableInfo *parentTbl; /* link to partitioned table */ + TableInfo *partitionTbl; /* link to partition */ +} TableAttachInfo; + typedef struct _attrDefInfo { DumpableObject dobj; /* note: dobj.name is name of table */ @@ -367,7 +375,7 @@ typedef struct _indxInfo int indnattrs; /* total number of index attributes */ Oid *indkeys; /* In spite of the name 'indkeys' this field * contains both key and nonkey attributes */ - char *inddependcollnames; /* FQ names of depended-on collations */ + char *inddependcollnames; /* FQ names of depended-on collations */ char *inddependcollversions; /* versions of the above */ bool indisclustered; bool indisreplident; @@ -615,8 +623,8 @@ typedef struct _PublicationInfo typedef struct _PublicationRelInfo { DumpableObject dobj; + PublicationInfo *publication; TableInfo *pubtable; - char *pubname; } PublicationRelInfo; /* @@ -667,6 +675,7 @@ extern OprInfo *findOprByOid(Oid oid); extern CollInfo *findCollationByOid(Oid oid); extern NamespaceInfo *findNamespaceByOid(Oid oid); extern ExtensionInfo *findExtensionByOid(Oid oid); +extern PublicationInfo *findPublicationByOid(Oid oid); extern void setExtensionMembership(ExtensionMemberId *extmems, int nextmems); extern ExtensionInfo *findOwningExtension(CatalogId catalogId); @@ -719,7 +728,8 @@ extern void processExtensionTables(Archive *fout, ExtensionInfo extinfo[], int numExtensions); extern EventTriggerInfo *getEventTriggers(Archive *fout, int *numEventTriggers); extern void getPolicies(Archive *fout, TableInfo tblinfo[], int numTables); -extern void getPublications(Archive *fout); +extern PublicationInfo *getPublications(Archive *fout, + int *numPublications); extern void getPublicationTables(Archive *fout, TableInfo tblinfo[], int numTables); extern void getSubscriptions(Archive *fout); diff --git a/src/bin/pg_dump/pg_dump_sort.c b/src/bin/pg_dump/pg_dump_sort.c index 654e2ec51417e..46461fb6a1868 100644 --- a/src/bin/pg_dump/pg_dump_sort.c +++ b/src/bin/pg_dump/pg_dump_sort.c @@ -4,7 +4,7 @@ * Sort the items of a dump into a safe order for dumping * * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * @@ -39,52 +39,103 @@ * POST_DATA objects must sort after DO_POST_DATA_BOUNDARY, and DATA objects * must sort between them. */ + +/* This enum lists the priority levels in order */ +enum dbObjectTypePriorities +{ + PRIO_NAMESPACE = 1, + PRIO_PROCLANG, + PRIO_COLLATION, + PRIO_TRANSFORM, + PRIO_EXTENSION, + PRIO_TYPE, /* used for DO_TYPE and DO_SHELL_TYPE */ + PRIO_FUNC, + PRIO_AGG, + PRIO_ACCESS_METHOD, + PRIO_OPERATOR, + PRIO_OPFAMILY, /* used for DO_OPFAMILY and DO_OPCLASS */ + PRIO_CAST, + PRIO_CONVERSION, + PRIO_TSPARSER, + PRIO_TSTEMPLATE, + PRIO_TSDICT, + PRIO_TSCONFIG, + PRIO_FDW, + PRIO_FOREIGN_SERVER, + PRIO_TABLE, + PRIO_TABLE_ATTACH, + PRIO_DUMMY_TYPE, + PRIO_ATTRDEF, + PRIO_BLOB, + PRIO_PRE_DATA_BOUNDARY, /* boundary! */ + PRIO_TABLE_DATA, + PRIO_SEQUENCE_SET, + PRIO_BLOB_DATA, + PRIO_POST_DATA_BOUNDARY, /* boundary! */ + PRIO_CONSTRAINT, + PRIO_INDEX, + PRIO_INDEX_ATTACH, + PRIO_STATSEXT, + PRIO_RULE, + PRIO_TRIGGER, + PRIO_FK_CONSTRAINT, + PRIO_POLICY, + PRIO_PUBLICATION, + PRIO_PUBLICATION_REL, + PRIO_SUBSCRIPTION, + PRIO_DEFAULT_ACL, /* done in ACL pass */ + PRIO_EVENT_TRIGGER, /* must be next to last! */ + PRIO_REFRESH_MATVIEW /* must be last! */ +}; + +/* This table is indexed by enum DumpableObjectType */ static const int dbObjectTypePriority[] = { - 1, /* DO_NAMESPACE */ - 4, /* DO_EXTENSION */ - 5, /* DO_TYPE */ - 5, /* DO_SHELL_TYPE */ - 6, /* DO_FUNC */ - 7, /* DO_AGG */ - 8, /* DO_OPERATOR */ - 8, /* DO_ACCESS_METHOD */ - 9, /* DO_OPCLASS */ - 9, /* DO_OPFAMILY */ - 3, /* DO_COLLATION */ - 11, /* DO_CONVERSION */ - 18, /* DO_TABLE */ - 20, /* DO_ATTRDEF */ - 28, /* DO_INDEX */ - 29, /* DO_INDEX_ATTACH */ - 30, /* DO_STATSEXT */ - 31, /* DO_RULE */ - 32, /* DO_TRIGGER */ - 27, /* DO_CONSTRAINT */ - 33, /* DO_FK_CONSTRAINT */ - 2, /* DO_PROCLANG */ - 10, /* DO_CAST */ - 23, /* DO_TABLE_DATA */ - 24, /* DO_SEQUENCE_SET */ - 19, /* DO_DUMMY_TYPE */ - 12, /* DO_TSPARSER */ - 14, /* DO_TSDICT */ - 13, /* DO_TSTEMPLATE */ - 15, /* DO_TSCONFIG */ - 16, /* DO_FDW */ - 17, /* DO_FOREIGN_SERVER */ - 38, /* DO_DEFAULT_ACL --- done in ACL pass */ - 3, /* DO_TRANSFORM */ - 21, /* DO_BLOB */ - 25, /* DO_BLOB_DATA */ - 22, /* DO_PRE_DATA_BOUNDARY */ - 26, /* DO_POST_DATA_BOUNDARY */ - 39, /* DO_EVENT_TRIGGER --- next to last! */ - 40, /* DO_REFRESH_MATVIEW --- last! */ - 34, /* DO_POLICY */ - 35, /* DO_PUBLICATION */ - 36, /* DO_PUBLICATION_REL */ - 37 /* DO_SUBSCRIPTION */ + PRIO_NAMESPACE, /* DO_NAMESPACE */ + PRIO_EXTENSION, /* DO_EXTENSION */ + PRIO_TYPE, /* DO_TYPE */ + PRIO_TYPE, /* DO_SHELL_TYPE */ + PRIO_FUNC, /* DO_FUNC */ + PRIO_AGG, /* DO_AGG */ + PRIO_OPERATOR, /* DO_OPERATOR */ + PRIO_ACCESS_METHOD, /* DO_ACCESS_METHOD */ + PRIO_OPFAMILY, /* DO_OPCLASS */ + PRIO_OPFAMILY, /* DO_OPFAMILY */ + PRIO_COLLATION, /* DO_COLLATION */ + PRIO_CONVERSION, /* DO_CONVERSION */ + PRIO_TABLE, /* DO_TABLE */ + PRIO_TABLE_ATTACH, /* DO_TABLE_ATTACH */ + PRIO_ATTRDEF, /* DO_ATTRDEF */ + PRIO_INDEX, /* DO_INDEX */ + PRIO_INDEX_ATTACH, /* DO_INDEX_ATTACH */ + PRIO_STATSEXT, /* DO_STATSEXT */ + PRIO_RULE, /* DO_RULE */ + PRIO_TRIGGER, /* DO_TRIGGER */ + PRIO_CONSTRAINT, /* DO_CONSTRAINT */ + PRIO_FK_CONSTRAINT, /* DO_FK_CONSTRAINT */ + PRIO_PROCLANG, /* DO_PROCLANG */ + PRIO_CAST, /* DO_CAST */ + PRIO_TABLE_DATA, /* DO_TABLE_DATA */ + PRIO_SEQUENCE_SET, /* DO_SEQUENCE_SET */ + PRIO_DUMMY_TYPE, /* DO_DUMMY_TYPE */ + PRIO_TSPARSER, /* DO_TSPARSER */ + PRIO_TSDICT, /* DO_TSDICT */ + PRIO_TSTEMPLATE, /* DO_TSTEMPLATE */ + PRIO_TSCONFIG, /* DO_TSCONFIG */ + PRIO_FDW, /* DO_FDW */ + PRIO_FOREIGN_SERVER, /* DO_FOREIGN_SERVER */ + PRIO_DEFAULT_ACL, /* DO_DEFAULT_ACL */ + PRIO_TRANSFORM, /* DO_TRANSFORM */ + PRIO_BLOB, /* DO_BLOB */ + PRIO_BLOB_DATA, /* DO_BLOB_DATA */ + PRIO_PRE_DATA_BOUNDARY, /* DO_PRE_DATA_BOUNDARY */ + PRIO_POST_DATA_BOUNDARY, /* DO_POST_DATA_BOUNDARY */ + PRIO_EVENT_TRIGGER, /* DO_EVENT_TRIGGER */ + PRIO_REFRESH_MATVIEW, /* DO_REFRESH_MATVIEW */ + PRIO_POLICY, /* DO_POLICY */ + PRIO_PUBLICATION, /* DO_PUBLICATION */ + PRIO_PUBLICATION_REL, /* DO_PUBLICATION_REL */ + PRIO_SUBSCRIPTION /* DO_SUBSCRIPTION */ }; StaticAssertDecl(lengthof(dbObjectTypePriority) == (DO_SUBSCRIPTION + 1), @@ -1275,6 +1326,11 @@ describeDumpableObject(DumpableObject *obj, char *buf, int bufsize) "TABLE %s (ID %d OID %u)", obj->name, obj->dumpId, obj->catId.oid); return; + case DO_TABLE_ATTACH: + snprintf(buf, bufsize, + "TABLE ATTACH %s (ID %d)", + obj->name, obj->dumpId); + return; case DO_ATTRDEF: snprintf(buf, bufsize, "ATTRDEF %s.%s (ID %d OID %u)", diff --git a/src/bin/pg_dump/pg_dumpall.c b/src/bin/pg_dump/pg_dumpall.c index 2fa11745cc330..007a3d0f9a372 100644 --- a/src/bin/pg_dump/pg_dumpall.c +++ b/src/bin/pg_dump/pg_dumpall.c @@ -2,7 +2,7 @@ * * pg_dumpall.c * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * pg_dumpall forces all pg_dump output to be text, since it also outputs @@ -1768,8 +1768,7 @@ connectDatabase(const char *dbname, const char *connection_string, { if (fail_on_error) { - pg_log_error("could not connect to database \"%s\": %s", - dbname, PQerrorMessage(conn)); + pg_log_error("%s", PQerrorMessage(conn)); exit_nicely(1); } else diff --git a/src/bin/pg_dump/t/002_pg_dump.pl b/src/bin/pg_dump/t/002_pg_dump.pl index 11dc98ee0a5ee..737e46464ab7e 100644 --- a/src/bin/pg_dump/t/002_pg_dump.pl +++ b/src/bin/pg_dump/t/002_pg_dump.pl @@ -2493,6 +2493,52 @@ unlike => { exclude_dump_test_schema => 1, }, }, + 'CREATE TABLE test_table_generated_child1 (without local columns)' => { + create_order => 4, + create_sql => 'CREATE TABLE dump_test.test_table_generated_child1 () + INHERITS (dump_test.test_table_generated);', + regexp => qr/^ + \QCREATE TABLE dump_test.test_table_generated_child1 (\E\n + \)\n + \QINHERITS (dump_test.test_table_generated);\E\n + /xms, + like => + { %full_runs, %dump_test_schema_runs, section_pre_data => 1, }, + unlike => { + binary_upgrade => 1, + exclude_dump_test_schema => 1, + }, + }, + + 'ALTER TABLE test_table_generated_child1' => { + regexp => + qr/^\QALTER TABLE ONLY dump_test.test_table_generated_child1 ALTER COLUMN col2 \E/m, + + # should not get emitted + like => {}, + }, + + 'CREATE TABLE test_table_generated_child2 (with local columns)' => { + create_order => 4, + create_sql => 'CREATE TABLE dump_test.test_table_generated_child2 ( + col1 int, + col2 int + ) INHERITS (dump_test.test_table_generated);', + regexp => qr/^ + \QCREATE TABLE dump_test.test_table_generated_child2 (\E\n + \s+\Qcol1 integer,\E\n + \s+\Qcol2 integer\E\n + \)\n + \QINHERITS (dump_test.test_table_generated);\E\n + /xms, + like => + { %full_runs, %dump_test_schema_runs, section_pre_data => 1, }, + unlike => { + binary_upgrade => 1, + exclude_dump_test_schema => 1, + }, + }, + 'CREATE TABLE table_with_stats' => { create_order => 98, create_sql => 'CREATE TABLE dump_test.table_index_stats ( @@ -3460,7 +3506,7 @@ command_fails_like( [ 'pg_dump', '-p', "$port", 'qqq' ], - qr/\Qpg_dump: error: connection to database "qqq" failed: FATAL: database "qqq" does not exist\E/, + qr/pg_dump: error: connection to server .* failed: FATAL: database "qqq" does not exist/, 'connecting to a non-existent database'); ######################################### diff --git a/src/bin/pg_resetwal/Makefile b/src/bin/pg_resetwal/Makefile index 464268e9788a8..7dfa80c5e51ff 100644 --- a/src/bin/pg_resetwal/Makefile +++ b/src/bin/pg_resetwal/Makefile @@ -2,7 +2,7 @@ # # Makefile for src/bin/pg_resetwal # -# Copyright (c) 1998-2020, PostgreSQL Global Development Group +# Copyright (c) 1998-2021, PostgreSQL Global Development Group # # src/bin/pg_resetwal/Makefile # diff --git a/src/bin/pg_resetwal/pg_resetwal.c b/src/bin/pg_resetwal/pg_resetwal.c index cb6ef1918206a..805dafef072b1 100644 --- a/src/bin/pg_resetwal/pg_resetwal.c +++ b/src/bin/pg_resetwal/pg_resetwal.c @@ -20,7 +20,7 @@ * step 2 ... * * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * src/bin/pg_resetwal/pg_resetwal.c diff --git a/src/bin/pg_rewind/Makefile b/src/bin/pg_rewind/Makefile index 9bfde5c087b26..5514b95e6c1e1 100644 --- a/src/bin/pg_rewind/Makefile +++ b/src/bin/pg_rewind/Makefile @@ -2,7 +2,7 @@ # # Makefile for src/bin/pg_rewind # -# Portions Copyright (c) 2013-2020, PostgreSQL Global Development Group +# Portions Copyright (c) 2013-2021, PostgreSQL Global Development Group # # src/bin/pg_rewind/Makefile # diff --git a/src/bin/pg_rewind/datapagemap.c b/src/bin/pg_rewind/datapagemap.c index 16fa89da943ec..3f8952b8f3be0 100644 --- a/src/bin/pg_rewind/datapagemap.c +++ b/src/bin/pg_rewind/datapagemap.c @@ -5,7 +5,7 @@ * * This is a fairly simple bitmap. * - * Copyright (c) 2013-2020, PostgreSQL Global Development Group + * Copyright (c) 2013-2021, PostgreSQL Global Development Group * *------------------------------------------------------------------------- */ diff --git a/src/bin/pg_rewind/datapagemap.h b/src/bin/pg_rewind/datapagemap.h index b5fac09ea6b27..76e9f20c94124 100644 --- a/src/bin/pg_rewind/datapagemap.h +++ b/src/bin/pg_rewind/datapagemap.h @@ -2,7 +2,7 @@ * * datapagemap.h * - * Copyright (c) 2013-2020, PostgreSQL Global Development Group + * Copyright (c) 2013-2021, PostgreSQL Global Development Group * *------------------------------------------------------------------------- */ diff --git a/src/bin/pg_rewind/file_ops.c b/src/bin/pg_rewind/file_ops.c index 065368a2208ee..c50f283ede41b 100644 --- a/src/bin/pg_rewind/file_ops.c +++ b/src/bin/pg_rewind/file_ops.c @@ -8,7 +8,7 @@ * do nothing if it's enabled. You should avoid accessing the target files * directly but if you do, make sure you honor the --dry-run mode! * - * Portions Copyright (c) 2013-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 2013-2021, PostgreSQL Global Development Group * *------------------------------------------------------------------------- */ diff --git a/src/bin/pg_rewind/file_ops.h b/src/bin/pg_rewind/file_ops.h index c763085976830..611981f293a12 100644 --- a/src/bin/pg_rewind/file_ops.h +++ b/src/bin/pg_rewind/file_ops.h @@ -3,7 +3,7 @@ * file_ops.h * Helper functions for operating on files * - * Copyright (c) 2013-2020, PostgreSQL Global Development Group + * Copyright (c) 2013-2021, PostgreSQL Global Development Group * *------------------------------------------------------------------------- */ diff --git a/src/bin/pg_rewind/filemap.c b/src/bin/pg_rewind/filemap.c index ba34dbac1468f..2618b4c957b2f 100644 --- a/src/bin/pg_rewind/filemap.c +++ b/src/bin/pg_rewind/filemap.c @@ -16,7 +16,7 @@ * for each file. Finally, it sorts the array to the final order that the * actions should be executed in. * - * Copyright (c) 2013-2020, PostgreSQL Global Development Group + * Copyright (c) 2013-2021, PostgreSQL Global Development Group * *------------------------------------------------------------------------- */ diff --git a/src/bin/pg_rewind/filemap.h b/src/bin/pg_rewind/filemap.h index 6f03447d7ebf5..926463efadc74 100644 --- a/src/bin/pg_rewind/filemap.h +++ b/src/bin/pg_rewind/filemap.h @@ -2,7 +2,7 @@ * * filemap.h * - * Copyright (c) 2013-2020, PostgreSQL Global Development Group + * Copyright (c) 2013-2021, PostgreSQL Global Development Group *------------------------------------------------------------------------- */ #ifndef FILEMAP_H diff --git a/src/bin/pg_rewind/libpq_source.c b/src/bin/pg_rewind/libpq_source.c index 47beba277a4ca..86d2adcaee99f 100644 --- a/src/bin/pg_rewind/libpq_source.c +++ b/src/bin/pg_rewind/libpq_source.c @@ -3,7 +3,7 @@ * libpq_source.c * Functions for fetching files from a remote server via libpq. * - * Copyright (c) 2013-2020, PostgreSQL Global Development Group + * Copyright (c) 2013-2021, PostgreSQL Global Development Group * *------------------------------------------------------------------------- */ diff --git a/src/bin/pg_rewind/local_source.c b/src/bin/pg_rewind/local_source.c index fa1b6e80ec328..9c3491c3fba1b 100644 --- a/src/bin/pg_rewind/local_source.c +++ b/src/bin/pg_rewind/local_source.c @@ -3,7 +3,7 @@ * local_source.c * Functions for using a local data directory as the source. * - * Portions Copyright (c) 2013-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 2013-2021, PostgreSQL Global Development Group * *------------------------------------------------------------------------- */ diff --git a/src/bin/pg_rewind/parsexlog.c b/src/bin/pg_rewind/parsexlog.c index 9275cba51bd65..7117ae522972d 100644 --- a/src/bin/pg_rewind/parsexlog.c +++ b/src/bin/pg_rewind/parsexlog.c @@ -3,7 +3,7 @@ * parsexlog.c * Functions for reading Write-Ahead-Log * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * *------------------------------------------------------------------------- diff --git a/src/bin/pg_rewind/pg_rewind.c b/src/bin/pg_rewind/pg_rewind.c index d89c08f81da08..359a6a587cb7f 100644 --- a/src/bin/pg_rewind/pg_rewind.c +++ b/src/bin/pg_rewind/pg_rewind.c @@ -3,7 +3,7 @@ * pg_rewind.c * Synchronizes a PostgreSQL data directory to a new timeline * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * *------------------------------------------------------------------------- */ diff --git a/src/bin/pg_rewind/pg_rewind.h b/src/bin/pg_rewind/pg_rewind.h index 0dc3dbd52551c..d38635a73dc8a 100644 --- a/src/bin/pg_rewind/pg_rewind.h +++ b/src/bin/pg_rewind/pg_rewind.h @@ -3,7 +3,7 @@ * pg_rewind.h * * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * *------------------------------------------------------------------------- diff --git a/src/bin/pg_rewind/rewind_source.h b/src/bin/pg_rewind/rewind_source.h index e87f239a47a09..2da92dbff9487 100644 --- a/src/bin/pg_rewind/rewind_source.h +++ b/src/bin/pg_rewind/rewind_source.h @@ -8,7 +8,7 @@ * operations to fetch data from the source system, so that the rest of * the code doesn't need to care what kind of a source its dealing with. * - * Copyright (c) 2013-2020, PostgreSQL Global Development Group + * Copyright (c) 2013-2021, PostgreSQL Global Development Group * *------------------------------------------------------------------------- */ diff --git a/src/bin/pg_rewind/timeline.c b/src/bin/pg_rewind/timeline.c index 1ea6607189380..6756c5ddbf79a 100644 --- a/src/bin/pg_rewind/timeline.c +++ b/src/bin/pg_rewind/timeline.c @@ -3,7 +3,7 @@ * timeline.c * timeline-related functions. * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * *------------------------------------------------------------------------- */ diff --git a/src/bin/pg_upgrade/check.c b/src/bin/pg_upgrade/check.c index f3afea9d56117..43fc297eb69d5 100644 --- a/src/bin/pg_upgrade/check.c +++ b/src/bin/pg_upgrade/check.c @@ -3,7 +3,7 @@ * * server checks and output routines * - * Copyright (c) 2010-2020, PostgreSQL Global Development Group + * Copyright (c) 2010-2021, PostgreSQL Global Development Group * src/bin/pg_upgrade/check.c */ diff --git a/src/bin/pg_upgrade/controldata.c b/src/bin/pg_upgrade/controldata.c index 39bcaa8fe1a2b..4f647cdf33474 100644 --- a/src/bin/pg_upgrade/controldata.c +++ b/src/bin/pg_upgrade/controldata.c @@ -3,7 +3,7 @@ * * controldata functions * - * Copyright (c) 2010-2020, PostgreSQL Global Development Group + * Copyright (c) 2010-2021, PostgreSQL Global Development Group * src/bin/pg_upgrade/controldata.c */ @@ -97,20 +97,20 @@ get_control_data(ClusterInfo *cluster, bool live_check) if (getenv("LC_MESSAGES")) lc_messages = pg_strdup(getenv("LC_MESSAGES")); - pg_putenv("LC_COLLATE", NULL); - pg_putenv("LC_CTYPE", NULL); - pg_putenv("LC_MONETARY", NULL); - pg_putenv("LC_NUMERIC", NULL); - pg_putenv("LC_TIME", NULL); + unsetenv("LC_COLLATE"); + unsetenv("LC_CTYPE"); + unsetenv("LC_MONETARY"); + unsetenv("LC_NUMERIC"); + unsetenv("LC_TIME"); #ifndef WIN32 - pg_putenv("LANG", NULL); + unsetenv("LANG"); #else /* On Windows the default locale may not be English, so force it */ - pg_putenv("LANG", "en"); + setenv("LANG", "en", 1); #endif - pg_putenv("LANGUAGE", NULL); - pg_putenv("LC_ALL", NULL); - pg_putenv("LC_MESSAGES", "C"); + unsetenv("LANGUAGE"); + unsetenv("LC_ALL"); + setenv("LC_MESSAGES", "C", 1); /* * Check for clean shutdown @@ -490,17 +490,31 @@ get_control_data(ClusterInfo *cluster, bool live_check) pclose(output); /* - * Restore environment variables + * Restore environment variables. Note all but LANG and LC_MESSAGES were + * unset above. */ - pg_putenv("LC_COLLATE", lc_collate); - pg_putenv("LC_CTYPE", lc_ctype); - pg_putenv("LC_MONETARY", lc_monetary); - pg_putenv("LC_NUMERIC", lc_numeric); - pg_putenv("LC_TIME", lc_time); - pg_putenv("LANG", lang); - pg_putenv("LANGUAGE", language); - pg_putenv("LC_ALL", lc_all); - pg_putenv("LC_MESSAGES", lc_messages); + if (lc_collate) + setenv("LC_COLLATE", lc_collate, 1); + if (lc_ctype) + setenv("LC_CTYPE", lc_ctype, 1); + if (lc_monetary) + setenv("LC_MONETARY", lc_monetary, 1); + if (lc_numeric) + setenv("LC_NUMERIC", lc_numeric, 1); + if (lc_time) + setenv("LC_TIME", lc_time, 1); + if (lang) + setenv("LANG", lang, 1); + else + unsetenv("LANG"); + if (language) + setenv("LANGUAGE", language, 1); + if (lc_all) + setenv("LC_ALL", lc_all, 1); + if (lc_messages) + setenv("LC_MESSAGES", lc_messages, 1); + else + unsetenv("LC_MESSAGES"); pg_free(lc_collate); pg_free(lc_ctype); diff --git a/src/bin/pg_upgrade/dump.c b/src/bin/pg_upgrade/dump.c index 20e73be361538..33d9591f3743f 100644 --- a/src/bin/pg_upgrade/dump.c +++ b/src/bin/pg_upgrade/dump.c @@ -3,7 +3,7 @@ * * dump functions * - * Copyright (c) 2010-2020, PostgreSQL Global Development Group + * Copyright (c) 2010-2021, PostgreSQL Global Development Group * src/bin/pg_upgrade/dump.c */ diff --git a/src/bin/pg_upgrade/exec.c b/src/bin/pg_upgrade/exec.c index bdff13bb688fe..43a4565c2eff0 100644 --- a/src/bin/pg_upgrade/exec.c +++ b/src/bin/pg_upgrade/exec.c @@ -3,7 +3,7 @@ * * execution functions * - * Copyright (c) 2010-2020, PostgreSQL Global Development Group + * Copyright (c) 2010-2021, PostgreSQL Global Development Group * src/bin/pg_upgrade/exec.c */ diff --git a/src/bin/pg_upgrade/file.c b/src/bin/pg_upgrade/file.c index cc8a675d0095c..9b0cc16e452fc 100644 --- a/src/bin/pg_upgrade/file.c +++ b/src/bin/pg_upgrade/file.c @@ -3,7 +3,7 @@ * * file system operations * - * Copyright (c) 2010-2020, PostgreSQL Global Development Group + * Copyright (c) 2010-2021, PostgreSQL Global Development Group * src/bin/pg_upgrade/file.c */ diff --git a/src/bin/pg_upgrade/function.c b/src/bin/pg_upgrade/function.c index e0bc368e1e168..4952de1de5a9d 100644 --- a/src/bin/pg_upgrade/function.c +++ b/src/bin/pg_upgrade/function.c @@ -3,7 +3,7 @@ * * server-side function support * - * Copyright (c) 2010-2020, PostgreSQL Global Development Group + * Copyright (c) 2010-2021, PostgreSQL Global Development Group * src/bin/pg_upgrade/function.c */ diff --git a/src/bin/pg_upgrade/info.c b/src/bin/pg_upgrade/info.c index 7e524ea19206d..5d9a26cf82290 100644 --- a/src/bin/pg_upgrade/info.c +++ b/src/bin/pg_upgrade/info.c @@ -3,7 +3,7 @@ * * information support functions * - * Copyright (c) 2010-2020, PostgreSQL Global Development Group + * Copyright (c) 2010-2021, PostgreSQL Global Development Group * src/bin/pg_upgrade/info.c */ diff --git a/src/bin/pg_upgrade/option.c b/src/bin/pg_upgrade/option.c index 548d648e8c4e6..9c9b313e0cf9e 100644 --- a/src/bin/pg_upgrade/option.c +++ b/src/bin/pg_upgrade/option.c @@ -3,7 +3,7 @@ * * options functions * - * Copyright (c) 2010-2020, PostgreSQL Global Development Group + * Copyright (c) 2010-2021, PostgreSQL Global Development Group * src/bin/pg_upgrade/option.c */ @@ -193,7 +193,7 @@ parseCommandLine(int argc, char *argv[]) * Push the user name into the environment so pre-9.1 * pg_ctl/libpq uses it. */ - pg_putenv("PGUSER", os_info.user); + setenv("PGUSER", os_info.user, 1); break; case 'v': @@ -245,11 +245,11 @@ parseCommandLine(int argc, char *argv[]) char *pgoptions = psprintf("%s %s", FIX_DEFAULT_READ_ONLY, getenv("PGOPTIONS")); - pg_putenv("PGOPTIONS", pgoptions); + setenv("PGOPTIONS", pgoptions, 1); pfree(pgoptions); } else - pg_putenv("PGOPTIONS", FIX_DEFAULT_READ_ONLY); + setenv("PGOPTIONS", FIX_DEFAULT_READ_ONLY, 1); /* Get values from env if not already set */ check_required_directory(&old_cluster.bindir, "PGBINOLD", false, diff --git a/src/bin/pg_upgrade/parallel.c b/src/bin/pg_upgrade/parallel.c index 5e8cfbbec909c..d5883e2eba46b 100644 --- a/src/bin/pg_upgrade/parallel.c +++ b/src/bin/pg_upgrade/parallel.c @@ -3,7 +3,7 @@ * * multi-process support * - * Copyright (c) 2010-2020, PostgreSQL Global Development Group + * Copyright (c) 2010-2021, PostgreSQL Global Development Group * src/bin/pg_upgrade/parallel.c */ diff --git a/src/bin/pg_upgrade/pg_upgrade.c b/src/bin/pg_upgrade/pg_upgrade.c index e2253ecd5d068..e23b8ca88d919 100644 --- a/src/bin/pg_upgrade/pg_upgrade.c +++ b/src/bin/pg_upgrade/pg_upgrade.c @@ -3,7 +3,7 @@ * * main source file * - * Copyright (c) 2010-2020, PostgreSQL Global Development Group + * Copyright (c) 2010-2021, PostgreSQL Global Development Group * src/bin/pg_upgrade/pg_upgrade.c */ diff --git a/src/bin/pg_upgrade/pg_upgrade.h b/src/bin/pg_upgrade/pg_upgrade.h index ee70243c2e946..919a7849fd042 100644 --- a/src/bin/pg_upgrade/pg_upgrade.h +++ b/src/bin/pg_upgrade/pg_upgrade.h @@ -1,7 +1,7 @@ /* * pg_upgrade.h * - * Copyright (c) 2010-2020, PostgreSQL Global Development Group + * Copyright (c) 2010-2021, PostgreSQL Global Development Group * src/bin/pg_upgrade/pg_upgrade.h */ @@ -436,7 +436,6 @@ void end_progress_output(void); void prep_status(const char *fmt,...) pg_attribute_printf(1, 2); void check_ok(void); unsigned int str2uint(const char *str); -void pg_putenv(const char *var, const char *val); /* version.c */ diff --git a/src/bin/pg_upgrade/relfilenode.c b/src/bin/pg_upgrade/relfilenode.c index f76ddaaf3a161..4deae7d9858ba 100644 --- a/src/bin/pg_upgrade/relfilenode.c +++ b/src/bin/pg_upgrade/relfilenode.c @@ -3,7 +3,7 @@ * * relfilenode functions * - * Copyright (c) 2010-2020, PostgreSQL Global Development Group + * Copyright (c) 2010-2021, PostgreSQL Global Development Group * src/bin/pg_upgrade/relfilenode.c */ diff --git a/src/bin/pg_upgrade/server.c b/src/bin/pg_upgrade/server.c index 713509f54062a..7fed0ae1086dc 100644 --- a/src/bin/pg_upgrade/server.c +++ b/src/bin/pg_upgrade/server.c @@ -3,7 +3,7 @@ * * database server functions * - * Copyright (c) 2010-2020, PostgreSQL Global Development Group + * Copyright (c) 2010-2021, PostgreSQL Global Development Group * src/bin/pg_upgrade/server.c */ @@ -30,8 +30,7 @@ connectToServer(ClusterInfo *cluster, const char *db_name) if (conn == NULL || PQstatus(conn) != CONNECTION_OK) { - pg_log(PG_REPORT, "connection to database failed: %s", - PQerrorMessage(conn)); + pg_log(PG_REPORT, "%s", PQerrorMessage(conn)); if (conn) PQfinish(conn); @@ -50,6 +49,8 @@ connectToServer(ClusterInfo *cluster, const char *db_name) * get_db_conn() * * get database connection, using named database + standard params for cluster + * + * Caller must check for connection failure! */ static PGconn * get_db_conn(ClusterInfo *cluster, const char *db_name) @@ -294,8 +295,7 @@ start_postmaster(ClusterInfo *cluster, bool report_and_exit_on_error) if ((conn = get_db_conn(cluster, "template1")) == NULL || PQstatus(conn) != CONNECTION_OK) { - pg_log(PG_REPORT, "\nconnection to database failed: %s", - PQerrorMessage(conn)); + pg_log(PG_REPORT, "\n%s", PQerrorMessage(conn)); if (conn) PQfinish(conn); if (cluster == &old_cluster) diff --git a/src/bin/pg_upgrade/tablespace.c b/src/bin/pg_upgrade/tablespace.c index 11a242973818c..bd49d300db111 100644 --- a/src/bin/pg_upgrade/tablespace.c +++ b/src/bin/pg_upgrade/tablespace.c @@ -3,7 +3,7 @@ * * tablespace functions * - * Copyright (c) 2010-2020, PostgreSQL Global Development Group + * Copyright (c) 2010-2021, PostgreSQL Global Development Group * src/bin/pg_upgrade/tablespace.c */ diff --git a/src/bin/pg_upgrade/test.sh b/src/bin/pg_upgrade/test.sh index 04aa7fd9f513c..ca923ba01bc4d 100644 --- a/src/bin/pg_upgrade/test.sh +++ b/src/bin/pg_upgrade/test.sh @@ -6,7 +6,7 @@ # runs the regression tests (to put in some data), runs pg_dumpall, # runs pg_upgrade, runs pg_dumpall again, compares the dumps. # -# Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group +# Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group # Portions Copyright (c) 1994, Regents of the University of California set -e @@ -167,17 +167,24 @@ createdb "regression$dbname3" || createdb_status=$? if "$MAKE" -C "$oldsrc" installcheck-parallel; then oldpgversion=`psql -X -A -t -d regression -c "SHOW server_version_num"` - # before dumping, get rid of objects not existing in later versions + # before dumping, get rid of objects not feasible in later versions if [ "$newsrc" != "$oldsrc" ]; then fix_sql="" case $oldpgversion in 804??) - fix_sql="DROP FUNCTION public.myfunc(integer); DROP FUNCTION public.oldstyle_length(integer, text);" - ;; - *) - fix_sql="DROP FUNCTION public.oldstyle_length(integer, text);" + fix_sql="DROP FUNCTION public.myfunc(integer);" ;; esac + fix_sql="$fix_sql + DROP FUNCTION IF EXISTS + public.oldstyle_length(integer, text); -- last in 9.6 + DROP FUNCTION IF EXISTS + public.putenv(text); -- last in v13 + DROP OPERATOR IF EXISTS -- last in v13 + public.#@# (pg_catalog.int8, NONE), + public.#%# (pg_catalog.int8, NONE), + public.!=- (pg_catalog.int8, NONE), + public.#@%# (pg_catalog.int8, NONE);" psql -X -d regression -c "$fix_sql;" || psql_fix_sql_status=$? fi diff --git a/src/bin/pg_upgrade/util.c b/src/bin/pg_upgrade/util.c index a16c794261b59..fc20472fe7b8c 100644 --- a/src/bin/pg_upgrade/util.c +++ b/src/bin/pg_upgrade/util.c @@ -3,7 +3,7 @@ * * utility functions * - * Copyright (c) 2010-2020, PostgreSQL Global Development Group + * Copyright (c) 2010-2021, PostgreSQL Global Development Group * src/bin/pg_upgrade/util.c */ @@ -241,39 +241,3 @@ str2uint(const char *str) { return strtoul(str, NULL, 10); } - - -/* - * pg_putenv() - * - * This is like putenv(), but takes two arguments. - * It also does unsetenv() if val is NULL. - */ -void -pg_putenv(const char *var, const char *val) -{ - if (val) - { -#ifndef WIN32 - char *envstr; - - envstr = psprintf("%s=%s", var, val); - putenv(envstr); - - /* - * Do not free envstr because it becomes part of the environment on - * some operating systems. See port/unsetenv.c::unsetenv. - */ -#else - SetEnvironmentVariableA(var, val); -#endif - } - else - { -#ifndef WIN32 - unsetenv(var); -#else - SetEnvironmentVariableA(var, ""); -#endif - } -} diff --git a/src/bin/pg_upgrade/version.c b/src/bin/pg_upgrade/version.c index db1934124ee35..a41247b33d2f6 100644 --- a/src/bin/pg_upgrade/version.c +++ b/src/bin/pg_upgrade/version.c @@ -3,7 +3,7 @@ * * Postgres-version-specific routines * - * Copyright (c) 2010-2020, PostgreSQL Global Development Group + * Copyright (c) 2010-2021, PostgreSQL Global Development Group * src/bin/pg_upgrade/version.c */ diff --git a/src/bin/pg_verifybackup/parse_manifest.c b/src/bin/pg_verifybackup/parse_manifest.c index 5b4ce28837149..db2fa90cfec1f 100644 --- a/src/bin/pg_verifybackup/parse_manifest.c +++ b/src/bin/pg_verifybackup/parse_manifest.c @@ -3,7 +3,7 @@ * parse_manifest.c * Parse a backup manifest in JSON format. * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * src/bin/pg_verifybackup/parse_manifest.c diff --git a/src/bin/pg_verifybackup/parse_manifest.h b/src/bin/pg_verifybackup/parse_manifest.h index cbb7ca1397e6d..b0745a0a5928a 100644 --- a/src/bin/pg_verifybackup/parse_manifest.h +++ b/src/bin/pg_verifybackup/parse_manifest.h @@ -3,7 +3,7 @@ * parse_manifest.h * Parse a backup manifest in JSON format. * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * src/bin/pg_verifybackup/parse_manifest.h diff --git a/src/bin/pg_verifybackup/pg_verifybackup.c b/src/bin/pg_verifybackup/pg_verifybackup.c index bf388de79fa5d..bb3f2783d0c2e 100644 --- a/src/bin/pg_verifybackup/pg_verifybackup.c +++ b/src/bin/pg_verifybackup/pg_verifybackup.c @@ -3,7 +3,7 @@ * pg_verifybackup.c * Verify a backup against a backup manifest. * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * src/bin/pg_verifybackup/pg_verifybackup.c diff --git a/src/bin/pg_waldump/compat.c b/src/bin/pg_waldump/compat.c index 7cb0539e749c0..08bd62743dd27 100644 --- a/src/bin/pg_waldump/compat.c +++ b/src/bin/pg_waldump/compat.c @@ -3,7 +3,7 @@ * compat.c * Reimplementations of various backend functions. * - * Portions Copyright (c) 2013-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 2013-2021, PostgreSQL Global Development Group * * IDENTIFICATION * src/bin/pg_waldump/compat.c diff --git a/src/bin/pg_waldump/pg_waldump.c b/src/bin/pg_waldump/pg_waldump.c index 31e99c2a6da5d..164868d16efc1 100644 --- a/src/bin/pg_waldump/pg_waldump.c +++ b/src/bin/pg_waldump/pg_waldump.c @@ -2,7 +2,7 @@ * * pg_waldump.c - decode and display WAL * - * Copyright (c) 2013-2020, PostgreSQL Global Development Group + * Copyright (c) 2013-2021, PostgreSQL Global Development Group * * IDENTIFICATION * src/bin/pg_waldump/pg_waldump.c diff --git a/src/bin/pgbench/exprparse.y b/src/bin/pgbench/exprparse.y index 85d61caa9f10d..4d529ea550010 100644 --- a/src/bin/pgbench/exprparse.y +++ b/src/bin/pgbench/exprparse.y @@ -4,7 +4,7 @@ * exprparse.y * bison grammar for a simple expression syntax * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * src/bin/pgbench/exprparse.y diff --git a/src/bin/pgbench/exprscan.l b/src/bin/pgbench/exprscan.l index 430bff38a617d..75432cedc6530 100644 --- a/src/bin/pgbench/exprscan.l +++ b/src/bin/pgbench/exprscan.l @@ -15,7 +15,7 @@ * * Note that this lexer operates within the framework created by psqlscan.l, * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * src/bin/pgbench/exprscan.l diff --git a/src/bin/pgbench/pgbench.c b/src/bin/pgbench/pgbench.c index 3057665bbec56..a4a3f40048e82 100644 --- a/src/bin/pgbench/pgbench.c +++ b/src/bin/pgbench/pgbench.c @@ -5,7 +5,7 @@ * Originally written by Tatsuo Ishii and enhanced by many contributors. * * src/bin/pgbench/pgbench.c - * Copyright (c) 2000-2020, PostgreSQL Global Development Group + * Copyright (c) 2000-2021, PostgreSQL Global Development Group * ALL RIGHTS RESERVED; * * Permission to use, copy, modify, and distribute this software and its @@ -1225,8 +1225,7 @@ doConnect(void) /* check to see that the backend connection was successfully made */ if (PQstatus(conn) == CONNECTION_BAD) { - pg_log_error("connection to database \"%s\" failed: %s", - dbName, PQerrorMessage(conn)); + pg_log_error("%s", PQerrorMessage(conn)); PQfinish(conn); return NULL; } @@ -1375,6 +1374,7 @@ makeVariableValue(Variable *var) * "src/bin/pgbench/exprscan.l". Also see parseVariable(), below. * * Note: this static function is copied from "src/bin/psql/variables.c" + * but changed to disallow variable names starting with a digit. */ static bool valid_variable_name(const char *name) @@ -1385,6 +1385,15 @@ valid_variable_name(const char *name) if (*ptr == '\0') return false; + /* must not start with [0-9] */ + if (IS_HIGHBIT_SET(*ptr) || + strchr("ABCDEFGHIJKLMNOPQRSTUVWXYZ" "abcdefghijklmnopqrstuvwxyz" + "_", *ptr) != NULL) + ptr++; + else + return false; + + /* remaining characters can include [0-9] */ while (*ptr) { if (IS_HIGHBIT_SET(*ptr) || @@ -1505,23 +1514,27 @@ putVariableInt(CState *st, const char *context, char *name, int64 value) * * "sql" points at a colon. If what follows it looks like a valid * variable name, return a malloc'd string containing the variable name, - * and set *eaten to the number of characters consumed. + * and set *eaten to the number of characters consumed (including the colon). * Otherwise, return NULL. */ static char * parseVariable(const char *sql, int *eaten) { - int i = 0; + int i = 1; /* starting at 1 skips the colon */ char *name; - do - { + /* keep this logic in sync with valid_variable_name() */ + if (IS_HIGHBIT_SET(sql[i]) || + strchr("ABCDEFGHIJKLMNOPQRSTUVWXYZ" "abcdefghijklmnopqrstuvwxyz" + "_", sql[i]) != NULL) + i++; + else + return NULL; + + while (IS_HIGHBIT_SET(sql[i]) || + strchr("ABCDEFGHIJKLMNOPQRSTUVWXYZ" "abcdefghijklmnopqrstuvwxyz" + "_0123456789", sql[i]) != NULL) i++; - } while (IS_HIGHBIT_SET(sql[i]) || - strchr("ABCDEFGHIJKLMNOPQRSTUVWXYZ" "abcdefghijklmnopqrstuvwxyz" - "_0123456789", sql[i]) != NULL); - if (i == 1) - return NULL; /* no valid variable name chars */ name = pg_malloc(i); memcpy(name, &sql[1], i - 1); @@ -6027,13 +6040,6 @@ main(int argc, char **argv) if (con == NULL) exit(1); - if (PQstatus(con) == CONNECTION_BAD) - { - pg_log_fatal("connection to database \"%s\" failed: %s", - dbName, PQerrorMessage(con)); - exit(1); - } - if (internal_script_used) GetTableInfo(con, scale_given); diff --git a/src/bin/pgbench/pgbench.h b/src/bin/pgbench/pgbench.h index fb2c34f512fb2..3a9d89e6f1509 100644 --- a/src/bin/pgbench/pgbench.h +++ b/src/bin/pgbench/pgbench.h @@ -2,7 +2,7 @@ * * pgbench.h * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * *------------------------------------------------------------------------- diff --git a/src/bin/pgbench/t/001_pgbench_with_server.pl b/src/bin/pgbench/t/001_pgbench_with_server.pl index 61b671d54fd66..daffc18e52194 100644 --- a/src/bin/pgbench/t/001_pgbench_with_server.pl +++ b/src/bin/pgbench/t/001_pgbench_with_server.pl @@ -90,7 +90,7 @@ sub pgbench 1, [qr{^$}], [ - qr{connection to database "no-such-database" failed}, + qr{connection to server .* failed}, qr{FATAL: database "no-such-database" does not exist} ], 'no such database'); diff --git a/src/bin/pgevent/Makefile b/src/bin/pgevent/Makefile index 28c3078b01c0a..da69e91839d5a 100644 --- a/src/bin/pgevent/Makefile +++ b/src/bin/pgevent/Makefile @@ -2,7 +2,7 @@ # # Makefile for src/bin/pgevent # -# Copyright (c) 1996-2020, PostgreSQL Global Development Group +# Copyright (c) 1996-2021, PostgreSQL Global Development Group # #------------------------------------------------------------------------- diff --git a/src/bin/psql/Makefile b/src/bin/psql/Makefile index 2305d93e39cf7..d00881163c023 100644 --- a/src/bin/psql/Makefile +++ b/src/bin/psql/Makefile @@ -2,7 +2,7 @@ # # Makefile for src/bin/psql # -# Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group +# Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group # Portions Copyright (c) 1994, Regents of the University of California # # src/bin/psql/Makefile diff --git a/src/bin/psql/command.c b/src/bin/psql/command.c index 38b588882d154..c98e3d31d0c71 100644 --- a/src/bin/psql/command.c +++ b/src/bin/psql/command.c @@ -1,7 +1,7 @@ /* * psql - the PostgreSQL interactive terminal * - * Copyright (c) 2000-2020, PostgreSQL Global Development Group + * Copyright (c) 2000-2021, PostgreSQL Global Development Group * * src/bin/psql/command.c */ @@ -928,6 +928,9 @@ exec_command_d(PsqlScanState scan_state, bool active_branch, const char *cmd) else success = listExtensions(pattern); break; + case 'X': /* Extended Statistics */ + success = listExtendedStats(pattern); + break; case 'y': /* Event Triggers */ success = listEventTriggers(pattern, show_verbose); break; @@ -2296,17 +2299,8 @@ exec_command_setenv(PsqlScanState scan_state, bool active_branch, else { /* Set variable to the value of the next argument */ - char *newval; - - newval = psprintf("%s=%s", envvar, envval); - putenv(newval); + setenv(envvar, envval, 1); success = true; - - /* - * Do not free newval here, it will screw up the environment if - * you do. See putenv man page for details. That means we leak a - * bit of memory here, but not enough to worry about. - */ } free(envvar); free(envval); diff --git a/src/bin/psql/command.h b/src/bin/psql/command.h index 006832f1022a9..0ff5b768db1a0 100644 --- a/src/bin/psql/command.h +++ b/src/bin/psql/command.h @@ -1,7 +1,7 @@ /* * psql - the PostgreSQL interactive terminal * - * Copyright (c) 2000-2020, PostgreSQL Global Development Group + * Copyright (c) 2000-2021, PostgreSQL Global Development Group * * src/bin/psql/command.h */ diff --git a/src/bin/psql/common.c b/src/bin/psql/common.c index dfbc22970f831..6f507104f464f 100644 --- a/src/bin/psql/common.c +++ b/src/bin/psql/common.c @@ -1,7 +1,7 @@ /* * psql - the PostgreSQL interactive terminal * - * Copyright (c) 2000-2020, PostgreSQL Global Development Group + * Copyright (c) 2000-2021, PostgreSQL Global Development Group * * src/bin/psql/common.c */ diff --git a/src/bin/psql/common.h b/src/bin/psql/common.h index ec4e83c9fdf8a..041b2ac068a74 100644 --- a/src/bin/psql/common.h +++ b/src/bin/psql/common.h @@ -1,7 +1,7 @@ /* * psql - the PostgreSQL interactive terminal * - * Copyright (c) 2000-2020, PostgreSQL Global Development Group + * Copyright (c) 2000-2021, PostgreSQL Global Development Group * * src/bin/psql/common.h */ diff --git a/src/bin/psql/copy.c b/src/bin/psql/copy.c index f59db8d7bd080..78f0dc5a507af 100644 --- a/src/bin/psql/copy.c +++ b/src/bin/psql/copy.c @@ -1,7 +1,7 @@ /* * psql - the PostgreSQL interactive terminal * - * Copyright (c) 2000-2020, PostgreSQL Global Development Group + * Copyright (c) 2000-2021, PostgreSQL Global Development Group * * src/bin/psql/copy.c */ diff --git a/src/bin/psql/copy.h b/src/bin/psql/copy.h index b2daf91851282..5923da8698433 100644 --- a/src/bin/psql/copy.h +++ b/src/bin/psql/copy.h @@ -1,7 +1,7 @@ /* * psql - the PostgreSQL interactive terminal * - * Copyright (c) 2000-2020, PostgreSQL Global Development Group + * Copyright (c) 2000-2021, PostgreSQL Global Development Group * * src/bin/psql/copy.h */ diff --git a/src/bin/psql/create_help.pl b/src/bin/psql/create_help.pl index 60e093bad4902..83324239740b0 100644 --- a/src/bin/psql/create_help.pl +++ b/src/bin/psql/create_help.pl @@ -3,7 +3,7 @@ ################################################################# # create_help.pl -- converts SGML docs to internal psql help # -# Copyright (c) 2000-2020, PostgreSQL Global Development Group +# Copyright (c) 2000-2021, PostgreSQL Global Development Group # # src/bin/psql/create_help.pl ################################################################# diff --git a/src/bin/psql/crosstabview.c b/src/bin/psql/crosstabview.c index f06cb068c90b5..97515f0d4a0bf 100644 --- a/src/bin/psql/crosstabview.c +++ b/src/bin/psql/crosstabview.c @@ -1,7 +1,7 @@ /* * psql - the PostgreSQL interactive terminal * - * Copyright (c) 2000-2020, PostgreSQL Global Development Group + * Copyright (c) 2000-2021, PostgreSQL Global Development Group * * src/bin/psql/crosstabview.c */ diff --git a/src/bin/psql/crosstabview.h b/src/bin/psql/crosstabview.h index 096e76b622491..53d0e4182ba95 100644 --- a/src/bin/psql/crosstabview.h +++ b/src/bin/psql/crosstabview.h @@ -1,7 +1,7 @@ /* * psql - the PostgreSQL interactive terminal * - * Copyright (c) 2000-2020, PostgreSQL Global Development Group + * Copyright (c) 2000-2021, PostgreSQL Global Development Group * * src/bin/psql/crosstabview.h */ diff --git a/src/bin/psql/describe.c b/src/bin/psql/describe.c index 14150d05a988d..20af5a92b4f41 100644 --- a/src/bin/psql/describe.c +++ b/src/bin/psql/describe.c @@ -6,7 +6,7 @@ * with servers of versions 7.4 and up. It's okay to omit irrelevant * information for an old server, but not to fail outright. * - * Copyright (c) 2000-2020, PostgreSQL Global Development Group + * Copyright (c) 2000-2021, PostgreSQL Global Development Group * * src/bin/psql/describe.c */ @@ -3718,6 +3718,7 @@ listTables(const char *tabtypes, const char *pattern, bool verbose, bool showSys " WHEN " CppAsString2(RELKIND_INDEX) " THEN '%s'" " WHEN " CppAsString2(RELKIND_SEQUENCE) " THEN '%s'" " WHEN 's' THEN '%s'" + " WHEN " CppAsString2(RELKIND_TOASTVALUE) " THEN '%s'" " WHEN " CppAsString2(RELKIND_FOREIGN_TABLE) " THEN '%s'" " WHEN " CppAsString2(RELKIND_PARTITIONED_TABLE) " THEN '%s'" " WHEN " CppAsString2(RELKIND_PARTITIONED_INDEX) " THEN '%s'" @@ -3731,6 +3732,7 @@ listTables(const char *tabtypes, const char *pattern, bool verbose, bool showSys gettext_noop("index"), gettext_noop("sequence"), gettext_noop("special"), + gettext_noop("TOAST table"), gettext_noop("foreign table"), gettext_noop("partitioned table"), gettext_noop("partitioned index"), @@ -3813,8 +3815,13 @@ listTables(const char *tabtypes, const char *pattern, bool verbose, bool showSys appendPQExpBufferStr(&buf, "\nWHERE c.relkind IN ("); if (showTables) + { appendPQExpBufferStr(&buf, CppAsString2(RELKIND_RELATION) "," CppAsString2(RELKIND_PARTITIONED_TABLE) ","); + /* with 'S' or a pattern, allow 't' to match TOAST tables too */ + if (showSystem || pattern) + appendPQExpBufferStr(&buf, CppAsString2(RELKIND_TOASTVALUE) ","); + } if (showViews) appendPQExpBufferStr(&buf, CppAsString2(RELKIND_VIEW) ","); if (showMatViews) @@ -3834,17 +3841,9 @@ listTables(const char *tabtypes, const char *pattern, bool verbose, bool showSys if (!showSystem && !pattern) appendPQExpBufferStr(&buf, " AND n.nspname <> 'pg_catalog'\n" + " AND n.nspname !~ '^pg_toast'\n" " AND n.nspname <> 'information_schema'\n"); - /* - * TOAST objects are suppressed unconditionally. Since we don't provide - * any way to select RELKIND_TOASTVALUE above, we would never show toast - * tables in any case; it seems a bit confusing to allow their indexes to - * be shown. Use plain \d if you really need to look at a TOAST - * table/index. - */ - appendPQExpBufferStr(&buf, " AND n.nspname !~ '^pg_toast'\n"); - processSQLNamePattern(pset.db, &buf, pattern, true, false, "n.nspname", "c.relname", NULL, "pg_catalog.pg_table_is_visible(c.oid)"); @@ -4057,17 +4056,9 @@ listPartitionedTables(const char *reltypes, const char *pattern, bool verbose) if (!pattern) appendPQExpBufferStr(&buf, " AND n.nspname <> 'pg_catalog'\n" + " AND n.nspname !~ '^pg_toast'\n" " AND n.nspname <> 'information_schema'\n"); - /* - * TOAST objects are suppressed unconditionally. Since we don't provide - * any way to select RELKIND_TOASTVALUE above, we would never show toast - * tables in any case; it seems a bit confusing to allow their indexes to - * be shown. Use plain \d if you really need to look at a TOAST - * table/index. - */ - appendPQExpBufferStr(&buf, " AND n.nspname !~ '^pg_toast'\n"); - processSQLNamePattern(pset.db, &buf, pattern, true, false, "n.nspname", "c.relname", NULL, "pg_catalog.pg_table_is_visible(c.oid)"); @@ -4401,6 +4392,89 @@ listEventTriggers(const char *pattern, bool verbose) return true; } +/* + * \dX + * + * Describes extended statistics. + */ +bool +listExtendedStats(const char *pattern) +{ + PQExpBufferData buf; + PGresult *res; + printQueryOpt myopt = pset.popt; + + if (pset.sversion < 100000) + { + char sverbuf[32]; + + pg_log_error("The server (version %s) does not support extended statistics.", + formatPGVersionNumber(pset.sversion, false, + sverbuf, sizeof(sverbuf))); + return true; + } + + initPQExpBuffer(&buf); + printfPQExpBuffer(&buf, + "SELECT \n" + "es.stxnamespace::pg_catalog.regnamespace::text AS \"%s\", \n" + "es.stxname AS \"%s\", \n" + "pg_catalog.format('%%s FROM %%s', \n" + " (SELECT pg_catalog.string_agg(pg_catalog.quote_ident(a.attname),', ') \n" + " FROM pg_catalog.unnest(es.stxkeys) s(attnum) \n" + " JOIN pg_catalog.pg_attribute a \n" + " ON (es.stxrelid = a.attrelid \n" + " AND a.attnum = s.attnum \n" + " AND NOT a.attisdropped)), \n" + "es.stxrelid::regclass) AS \"%s\"", + gettext_noop("Schema"), + gettext_noop("Name"), + gettext_noop("Definition")); + + appendPQExpBuffer(&buf, + ",\nCASE WHEN 'd' = any(es.stxkind) THEN 'defined' \n" + "END AS \"%s\", \n" + "CASE WHEN 'f' = any(es.stxkind) THEN 'defined' \n" + "END AS \"%s\"", + gettext_noop("Ndistinct"), + gettext_noop("Dependencies")); + + /* + * Include the MCV statistics kind. + */ + if (pset.sversion >= 120000) + { + appendPQExpBuffer(&buf, + ",\nCASE WHEN 'm' = any(es.stxkind) THEN 'defined' \n" + "END AS \"%s\" ", + gettext_noop("MCV")); + } + + appendPQExpBufferStr(&buf, + " \nFROM pg_catalog.pg_statistic_ext es \n"); + + processSQLNamePattern(pset.db, &buf, pattern, + false, false, + "es.stxnamespace::pg_catalog.regnamespace::text", "es.stxname", + NULL, NULL); + + appendPQExpBufferStr(&buf, "ORDER BY 1, 2;"); + + res = PSQLexec(buf.data); + termPQExpBuffer(&buf); + if (!res) + return false; + + myopt.nullPrint = NULL; + myopt.title = _("List of extended statistics"); + myopt.translate_header = true; + + printQuery(res, &myopt, pset.queryFout, false, pset.logfile); + + PQclear(res); + return true; +} + /* * \dC * diff --git a/src/bin/psql/describe.h b/src/bin/psql/describe.h index f0e3ec957c055..39856a0c7e8f0 100644 --- a/src/bin/psql/describe.h +++ b/src/bin/psql/describe.h @@ -1,7 +1,7 @@ /* * psql - the PostgreSQL interactive terminal * - * Copyright (c) 2000-2020, PostgreSQL Global Development Group + * Copyright (c) 2000-2021, PostgreSQL Global Development Group * * src/bin/psql/describe.h */ @@ -102,6 +102,9 @@ extern bool listExtensions(const char *pattern); /* \dx+ */ extern bool listExtensionContents(const char *pattern); +/* \dX */ +extern bool listExtendedStats(const char *pattern); + /* \dy */ extern bool listEventTriggers(const char *pattern, bool verbose); diff --git a/src/bin/psql/help.c b/src/bin/psql/help.c index af829282e6031..e44120bf76913 100644 --- a/src/bin/psql/help.c +++ b/src/bin/psql/help.c @@ -1,7 +1,7 @@ /* * psql - the PostgreSQL interactive terminal * - * Copyright (c) 2000-2020, PostgreSQL Global Development Group + * Copyright (c) 2000-2021, PostgreSQL Global Development Group * * src/bin/psql/help.c */ @@ -267,6 +267,7 @@ slashUsage(unsigned short int pager) fprintf(output, _(" \\du[S+] [PATTERN] list roles\n")); fprintf(output, _(" \\dv[S+] [PATTERN] list views\n")); fprintf(output, _(" \\dx[+] [PATTERN] list extensions\n")); + fprintf(output, _(" \\dX [PATTERN] list extended statistics\n")); fprintf(output, _(" \\dy [PATTERN] list event triggers\n")); fprintf(output, _(" \\l[+] [PATTERN] list databases\n")); fprintf(output, _(" \\sf[+] FUNCNAME show a function's definition\n")); @@ -534,6 +535,7 @@ helpSQL(const char *topic, unsigned short int pager) int i; int j; + /* Find screen width to determine how many columns will fit */ #ifdef TIOCGWINSZ struct winsize screen_size; @@ -571,56 +573,63 @@ helpSQL(const char *topic, unsigned short int pager) else { int i, - j, - x = 0; - bool help_found = false; + pass; FILE *output = NULL; size_t len, - wordlen; - int nl_count = 0; + wordlen, + j; + int nl_count; /* + * len is the amount of the input to compare to the help topic names. * We first try exact match, then first + second words, then first * word only. */ len = strlen(topic); - for (x = 1; x <= 3; x++) + for (pass = 1; pass <= 3; pass++) { - if (x > 1) /* Nothing on first pass - try the opening + if (pass > 1) /* Nothing on first pass - try the opening * word(s) */ { wordlen = j = 1; - while (topic[j] != ' ' && j++ < len) + while (j < len && topic[j++] != ' ') wordlen++; - if (x == 2) + if (pass == 2 && j < len) { - j++; - while (topic[j] != ' ' && j++ <= len) + wordlen++; + while (j < len && topic[j++] != ' ') wordlen++; } - if (wordlen >= len) /* Don't try again if the same word */ + if (wordlen >= len) { - if (!output) - output = PageOutput(nl_count, pager ? &(pset.popt.topt) : NULL); - break; + /* Failed to shorten input, so try next pass if any */ + continue; } len = wordlen; } - /* Count newlines for pager */ + /* + * Count newlines for pager. This logic must agree with what the + * following loop will do! + */ + nl_count = 0; for (i = 0; QL_HELP[i].cmd; i++) { if (pg_strncasecmp(topic, QL_HELP[i].cmd, len) == 0 || strcmp(topic, "*") == 0) { - nl_count += 5 + QL_HELP[i].nl_count; + /* magic constant here must match format below! */ + nl_count += 7 + QL_HELP[i].nl_count; /* If we have an exact match, exit. Fixes \h SELECT */ if (pg_strcasecmp(topic, QL_HELP[i].cmd) == 0) break; } } + /* If no matches, don't open the output yet */ + if (nl_count == 0) + continue; if (!output) output = PageOutput(nl_count, pager ? &(pset.popt.topt) : NULL); @@ -635,10 +644,10 @@ helpSQL(const char *topic, unsigned short int pager) initPQExpBuffer(&buffer); QL_HELP[i].syntaxfunc(&buffer); - help_found = true; url = psprintf("https://www.postgresql.org/docs/%s/%s.html", strstr(PG_VERSION, "devel") ? "devel" : PG_MAJORVERSION, QL_HELP[i].docbook_id); + /* # of newlines in format must match constant above! */ fprintf(output, _("Command: %s\n" "Description: %s\n" "Syntax:\n%s\n\n" @@ -648,17 +657,24 @@ helpSQL(const char *topic, unsigned short int pager) buffer.data, url); free(url); + termPQExpBuffer(&buffer); + /* If we have an exact match, exit. Fixes \h SELECT */ if (pg_strcasecmp(topic, QL_HELP[i].cmd) == 0) break; } } - if (help_found) /* Don't keep trying if we got a match */ - break; + break; } - if (!help_found) - fprintf(output, _("No help available for \"%s\".\nTry \\h with no arguments to see available help.\n"), topic); + /* If we never found anything, report that */ + if (!output) + { + output = PageOutput(2, pager ? &(pset.popt.topt) : NULL); + fprintf(output, _("No help available for \"%s\".\n" + "Try \\h with no arguments to see available help.\n"), + topic); + } ClosePager(output); } @@ -671,7 +687,7 @@ print_copyright(void) { puts("PostgreSQL Database Management System\n" "(formerly known as Postgres, then as Postgres95)\n\n" - "Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group\n\n" + "Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group\n\n" "Portions Copyright (c) 1994, The Regents of the University of California\n\n" "Permission to use, copy, modify, and distribute this software and its\n" "documentation for any purpose, without fee, and without a written agreement\n" diff --git a/src/bin/psql/help.h b/src/bin/psql/help.h index 2e2666d3d0e8d..d4f91e0be2bf6 100644 --- a/src/bin/psql/help.h +++ b/src/bin/psql/help.h @@ -1,7 +1,7 @@ /* * psql - the PostgreSQL interactive terminal * - * Copyright (c) 2000-2020, PostgreSQL Global Development Group + * Copyright (c) 2000-2021, PostgreSQL Global Development Group * * src/bin/psql/help.h */ diff --git a/src/bin/psql/input.c b/src/bin/psql/input.c index 788ff1f9b7b07..f926bc98dc208 100644 --- a/src/bin/psql/input.c +++ b/src/bin/psql/input.c @@ -1,7 +1,7 @@ /* * psql - the PostgreSQL interactive terminal * - * Copyright (c) 2000-2020, PostgreSQL Global Development Group + * Copyright (c) 2000-2021, PostgreSQL Global Development Group * * src/bin/psql/input.c */ diff --git a/src/bin/psql/input.h b/src/bin/psql/input.h index cfa03f59ea731..1a5a1be999e19 100644 --- a/src/bin/psql/input.h +++ b/src/bin/psql/input.h @@ -1,7 +1,7 @@ /* * psql - the PostgreSQL interactive terminal * - * Copyright (c) 2000-2020, PostgreSQL Global Development Group + * Copyright (c) 2000-2021, PostgreSQL Global Development Group * * src/bin/psql/input.h */ diff --git a/src/bin/psql/large_obj.c b/src/bin/psql/large_obj.c index cae81c0f15222..c15fcc08851d5 100644 --- a/src/bin/psql/large_obj.c +++ b/src/bin/psql/large_obj.c @@ -1,7 +1,7 @@ /* * psql - the PostgreSQL interactive terminal * - * Copyright (c) 2000-2020, PostgreSQL Global Development Group + * Copyright (c) 2000-2021, PostgreSQL Global Development Group * * src/bin/psql/large_obj.c */ diff --git a/src/bin/psql/large_obj.h b/src/bin/psql/large_obj.h index 755b9e70f0dff..003acbf52c9d8 100644 --- a/src/bin/psql/large_obj.h +++ b/src/bin/psql/large_obj.h @@ -1,7 +1,7 @@ /* * psql - the PostgreSQL interactive terminal * - * Copyright (c) 2000-2020, PostgreSQL Global Development Group + * Copyright (c) 2000-2021, PostgreSQL Global Development Group * * src/bin/psql/large_obj.h */ diff --git a/src/bin/psql/mainloop.c b/src/bin/psql/mainloop.c index 7abe016e40382..e49ed022938b1 100644 --- a/src/bin/psql/mainloop.c +++ b/src/bin/psql/mainloop.c @@ -1,7 +1,7 @@ /* * psql - the PostgreSQL interactive terminal * - * Copyright (c) 2000-2020, PostgreSQL Global Development Group + * Copyright (c) 2000-2021, PostgreSQL Global Development Group * * src/bin/psql/mainloop.c */ diff --git a/src/bin/psql/mainloop.h b/src/bin/psql/mainloop.h index d9680d45b18df..dd7a1889dee46 100644 --- a/src/bin/psql/mainloop.h +++ b/src/bin/psql/mainloop.h @@ -1,7 +1,7 @@ /* * psql - the PostgreSQL interactive terminal * - * Copyright (c) 2000-2020, PostgreSQL Global Development Group + * Copyright (c) 2000-2021, PostgreSQL Global Development Group * * src/bin/psql/mainloop.h */ diff --git a/src/bin/psql/prompt.c b/src/bin/psql/prompt.c index f42c3dfc7488d..9f236049f000a 100644 --- a/src/bin/psql/prompt.c +++ b/src/bin/psql/prompt.c @@ -1,7 +1,7 @@ /* * psql - the PostgreSQL interactive terminal * - * Copyright (c) 2000-2020, PostgreSQL Global Development Group + * Copyright (c) 2000-2021, PostgreSQL Global Development Group * * src/bin/psql/prompt.c */ diff --git a/src/bin/psql/prompt.h b/src/bin/psql/prompt.h index 3c8666918cdc7..ad6646d99b68c 100644 --- a/src/bin/psql/prompt.h +++ b/src/bin/psql/prompt.h @@ -1,7 +1,7 @@ /* * psql - the PostgreSQL interactive terminal * - * Copyright (c) 2000-2020, PostgreSQL Global Development Group + * Copyright (c) 2000-2021, PostgreSQL Global Development Group * * src/bin/psql/prompt.h */ diff --git a/src/bin/psql/psqlscanslash.h b/src/bin/psql/psqlscanslash.h index 7210e49240795..074e961e18c41 100644 --- a/src/bin/psql/psqlscanslash.h +++ b/src/bin/psql/psqlscanslash.h @@ -1,7 +1,7 @@ /* * psql - the PostgreSQL interactive terminal * - * Copyright (c) 2000-2020, PostgreSQL Global Development Group + * Copyright (c) 2000-2021, PostgreSQL Global Development Group * * src/bin/psql/psqlscanslash.h */ diff --git a/src/bin/psql/psqlscanslash.l b/src/bin/psql/psqlscanslash.l index 4dff84d6271a8..4bb18f132f4f4 100644 --- a/src/bin/psql/psqlscanslash.l +++ b/src/bin/psql/psqlscanslash.l @@ -8,7 +8,7 @@ * * See fe_utils/psqlscan_int.h for additional commentary. * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION diff --git a/src/bin/psql/settings.h b/src/bin/psql/settings.h index 9601f6e90ce89..d65990059d978 100644 --- a/src/bin/psql/settings.h +++ b/src/bin/psql/settings.h @@ -1,7 +1,7 @@ /* * psql - the PostgreSQL interactive terminal * - * Copyright (c) 2000-2020, PostgreSQL Global Development Group + * Copyright (c) 2000-2021, PostgreSQL Global Development Group * * src/bin/psql/settings.h */ diff --git a/src/bin/psql/startup.c b/src/bin/psql/startup.c index 586fcb33661cd..780479c8d7698 100644 --- a/src/bin/psql/startup.c +++ b/src/bin/psql/startup.c @@ -1,7 +1,7 @@ /* * psql - the PostgreSQL interactive terminal * - * Copyright (c) 2000-2020, PostgreSQL Global Development Group + * Copyright (c) 2000-2021, PostgreSQL Global Development Group * * src/bin/psql/startup.c */ diff --git a/src/bin/psql/stringutils.c b/src/bin/psql/stringutils.c index c521749661c30..0acc53801cb4a 100644 --- a/src/bin/psql/stringutils.c +++ b/src/bin/psql/stringutils.c @@ -1,7 +1,7 @@ /* * psql - the PostgreSQL interactive terminal * - * Copyright (c) 2000-2020, PostgreSQL Global Development Group + * Copyright (c) 2000-2021, PostgreSQL Global Development Group * * src/bin/psql/stringutils.c */ diff --git a/src/bin/psql/stringutils.h b/src/bin/psql/stringutils.h index 4be172e031f9c..b47425e8644c3 100644 --- a/src/bin/psql/stringutils.h +++ b/src/bin/psql/stringutils.h @@ -1,7 +1,7 @@ /* * psql - the PostgreSQL interactive terminal * - * Copyright (c) 2000-2020, PostgreSQL Global Development Group + * Copyright (c) 2000-2021, PostgreSQL Global Development Group * * src/bin/psql/stringutils.h */ diff --git a/src/bin/psql/tab-complete.c b/src/bin/psql/tab-complete.c index 3a43c09bf6807..17f726503888d 100644 --- a/src/bin/psql/tab-complete.c +++ b/src/bin/psql/tab-complete.c @@ -1,7 +1,7 @@ /* * psql - the PostgreSQL interactive terminal * - * Copyright (c) 2000-2020, PostgreSQL Global Development Group + * Copyright (c) 2000-2021, PostgreSQL Global Development Group * * src/bin/psql/tab-complete.c */ @@ -976,6 +976,11 @@ static const SchemaQuery Query_for_list_of_statistics = { " and pg_catalog.pg_table_is_visible(c2.oid)"\ " and c2.relispartition = 'true'" +#define Query_for_list_of_cursors \ +" SELECT pg_catalog.quote_ident(name) "\ +" FROM pg_catalog.pg_cursors "\ +" WHERE substring(pg_catalog.quote_ident(name),1,%d)='%s'" + /* * These object types were introduced later than our support cutoff of * server version 7.4. We use the VersionedQuery infrastructure so that @@ -1500,7 +1505,7 @@ psql_completion(const char *text, int start, int end) "\\dF", "\\dFd", "\\dFp", "\\dFt", "\\dg", "\\di", "\\dl", "\\dL", "\\dm", "\\dn", "\\do", "\\dO", "\\dp", "\\dP", "\\dPi", "\\dPt", "\\drds", "\\dRs", "\\dRp", "\\ds", "\\dS", - "\\dt", "\\dT", "\\dv", "\\du", "\\dx", "\\dy", + "\\dt", "\\dT", "\\dv", "\\du", "\\dx", "\\dX", "\\dy", "\\e", "\\echo", "\\ef", "\\elif", "\\else", "\\encoding", "\\endif", "\\errverbose", "\\ev", "\\f", @@ -2284,6 +2289,10 @@ psql_completion(const char *text, int start, int end) COMPLETE_WITH_VERSIONED_SCHEMA_QUERY(Query_for_list_of_procedures, NULL); else if (Matches("CALL", MatchAny)) COMPLETE_WITH("("); +/* CLOSE */ + else if (Matches("CLOSE")) + COMPLETE_WITH_QUERY(Query_for_list_of_cursors + " UNION SELECT 'ALL'"); /* CLUSTER */ else if (Matches("CLUSTER")) COMPLETE_WITH_SCHEMA_QUERY(Query_for_list_of_clusterables, "UNION SELECT 'VERBOSE'"); @@ -3002,11 +3011,44 @@ psql_completion(const char *text, int start, int end) " UNION SELECT 'ALL'"); /* DECLARE */ + + /* + * Complete DECLARE with one of BINARY, INSENSITIVE, SCROLL, NO + * SCROLL, and CURSOR. + */ else if (Matches("DECLARE", MatchAny)) COMPLETE_WITH("BINARY", "INSENSITIVE", "SCROLL", "NO SCROLL", "CURSOR"); + + /* + * Complete DECLARE ...