diff -r --exclude=.git --exclude=src/test --exclude=README postgres_head/doc/src/sgml/config.sgml postgres_new/doc/src/sgml/config.sgml 6708a6709,6773 > > TOAST > > > > > target_tuple_size (integer) > > target_tuple_size configuration parameter > > > > > The maximum size (in bytes) that a tuple may be without incurring the TOAST > code. Tuples smaller than this size will always be stored as-is, without > compression or external storage. > > > > Lowering this value will cause smaller tuples to be considered for compression > and external storage, possibly improving on-disk storage efficiency, but at > the cost of additional CPU load during data modification. > > > > Setting this value too high may cause inefficient on-disk storage by causing > large tuples to fill most of a page without leaving enough space for > additional tuples on the same page (thus resulting in significant wasted > space on each database page) This value must be smaller than the system > wide page size (normally 8k). > > > > Columns will be compressed and/or moved external to the table until the > size is less than target_tuple_size or no additional gains can be made. > > > > > > > target_compression_savings (integer) > > target_compression_savings configuration parameter > > > > > The minimum amount of space that must be saved in order for compression to > be used on an eligible column, as a percent of the original size. > > > > Setting this value properly avoids the CPU overhead of compressing columns > where very little space is saved. Valid values are between 1 and 99. Setting > this to a high value (such as 90) will only use compression if it results in > a large amount of space savings, where as a low value will result in compression > being used frequently. > > > > > > > diff -r --exclude=.git --exclude=src/test --exclude=README postgres_head/src/backend/access/heap/heapam.c postgres_new/src/backend/access/heap/heapam.c 68a69 > #include "utils/guc.h" 2616c2617 < else if (HeapTupleHasExternal(tup) || tup->t_len > TOAST_TUPLE_THRESHOLD) --- > else if (HeapTupleHasExternal(tup) || tup->t_len > target_tuple_size) 3918c3919 < newtup->t_len > TOAST_TUPLE_THRESHOLD); --- > newtup->t_len > target_tuple_size); diff -r --exclude=.git --exclude=src/test --exclude=README postgres_head/src/backend/access/heap/rewriteheap.c postgres_new/src/backend/access/heap/rewriteheap.c 128a129 > #include "utils/guc.h" 650c651 < else if (HeapTupleHasExternal(tup) || tup->t_len > TOAST_TUPLE_THRESHOLD) --- > else if (HeapTupleHasExternal(tup) || tup->t_len > target_tuple_size) diff -r --exclude=.git --exclude=src/test --exclude=README postgres_head/src/backend/access/heap/tuptoaster.c postgres_new/src/backend/access/heap/tuptoaster.c 41a42 > #include "utils/guc.h" 731c732,736 < maxDataLen = TOAST_TUPLE_TARGET - hoff; --- > maxDataLen = target_tuple_size; > if (hoff >= maxDataLen) > maxDataLen = 1; > else > maxDataLen = maxDataLen - hoff; 1324,1332c1329,1348 < * We recheck the actual size even if pglz_compress() reports success, < * because it might be satisfied with having saved as little as one byte < * in the compressed data --- which could turn into a net loss once you < * consider header and alignment padding. Worst case, the compressed < * format might require three padding bytes (plus header, which is < * included in VARSIZE(tmp)), whereas the uncompressed format would take < * only one header byte and no padding if the value is short enough. So < * we insist on a savings of more than 2 bytes to ensure we have a gain. < */ --- > * If the value is larger than 1k, test compress the first 1k > * to see if it's worthwhile to compress it all. On very large > * datums that aren't compressable, this will save the work > * of processing all the data only to discard the result. On > * compressable data, the extra work of doing the first 1k > * twice is negligable. > */ > if (valsize > COMPRESSION_TEST_SIZE) > { > len = pglz_compress(VARDATA_ANY(DatumGetPointer(value)), > 1024, > TOAST_COMPRESS_RAWDATA(tmp), > PGLZ_strategy_default); > if (len < 0 || len > COMPRESSION_TEST_SIZE * target_compression_savings / 100) > { > /* Sample data did not compress enough to be worthwhile */ > pfree(tmp); > return PointerGetDatum(NULL); > } > } 1337,1338c1353,1354 < if (len >= 0 && < len + TOAST_COMPRESS_HDRSZ < valsize - 2) --- > /* Check to ensure compression saved enough space */ > if ((len + TOAST_COMPRESS_HDRSZ) < valsize * target_compression_savings / 100) diff -r --exclude=.git --exclude=src/test --exclude=README postgres_head/src/backend/catalog/toasting.c postgres_new/src/backend/catalog/toasting.c 405,408c405,410 < * Check to see whether the table needs a TOAST table. It does only if < * (1) there are any toastable attributes, and (2) the maximum length < * of a tuple could exceed TOAST_TUPLE_THRESHOLD. (We don't want to < * create a toast table for something like "f1 varchar(20)".) --- > * Check to see whether the table needs a TOAST table. It does if > * there are any toastable attributes. Since the target_tuple_size > * GUC could change at any time, we don't attempt to determine whether > * a single tuple could exceed that value, since the answer might change > * at some future point and it's impractical to re-examine every single > * table any time (for example) a SIGHUP occurs. 413,415d414 < int32 data_length = 0; < bool maxlength_unknown = false; < bool has_toastable_attrs = false; 418d416 < int32 tuple_length; 428,429c426 < data_length = att_align_nominal(data_length, att[i]->attalign); < if (att[i]->attlen > 0) --- > if (att[i]->attlen <= 0) 432,442d428 < data_length += att[i]->attlen; < } < else < { < int32 maxlen = type_maximum_size(att[i]->atttypid, < att[i]->atttypmod); < < if (maxlen < 0) < maxlength_unknown = true; < else < data_length += maxlen; 444c430 < has_toastable_attrs = true; --- > return true; 447,454c433 < if (!has_toastable_attrs) < return false; /* nothing to toast? */ < if (maxlength_unknown) < return true; /* any unlimited-length attrs? */ < tuple_length = MAXALIGN(SizeofHeapTupleHeader + < BITMAPLEN(tupdesc->natts)) + < MAXALIGN(data_length); < return (tuple_length > TOAST_TUPLE_THRESHOLD); --- > return false; diff -r --exclude=.git --exclude=src/test --exclude=README postgres_head/src/backend/utils/misc/guc.c postgres_new/src/backend/utils/misc/guc.c 31a32 > #include "access/tuptoaster.h" 418a420,421 > int target_tuple_size = 4; > int target_compression_savings = 20; 2660a2664,2685 > {"target_tuple_size", PGC_SIGHUP, AUTOVACUUM, > gettext_noop("TOAST tuples larger than this to attempt to keep them below this size."), > gettext_noop("Smaller values may result in more efficient storage at the expense of higher CPU usage"), > NULL > }, > &target_tuple_size, > MaximumBytesPerTuple(4), 1, BLCKSZ - MAXALIGN(SizeOfPageHeaderData), > NULL, NULL, NULL > }, > > { > {"target_compression_savings", PGC_SIGHUP, AUTOVACUUM, > gettext_noop("Only compress data if the result saves at least this percent of space."), > NULL, > NULL > }, > &target_compression_savings, > 80, 1, 99, > NULL, NULL, NULL > }, > > { diff -r --exclude=.git --exclude=src/test --exclude=README postgres_head/src/include/access/tuptoaster.h postgres_new/src/include/access/tuptoaster.h 20a21,27 > * For values larger than this value, initially compress only these > * many bytes and only compress the entire value if the initial > * test compression shows sufficient savings > */ > #define COMPRESSION_TEST_SIZE 1024 > > /* 36,59d42 < * These symbols control toaster activation. If a tuple is larger than < * TOAST_TUPLE_THRESHOLD, we will try to toast it down to no more than < * TOAST_TUPLE_TARGET bytes through compressing compressible fields and < * moving EXTENDED and EXTERNAL data out-of-line. < * < * The numbers need not be the same, though they currently are. It doesn't < * make sense for TARGET to exceed THRESHOLD, but it could be useful to make < * it be smaller. < * < * Currently we choose both values to match the largest tuple size for which < * TOAST_TUPLES_PER_PAGE tuples can fit on a heap page. < * < * XXX while these can be modified without initdb, some thought needs to be < * given to needs_toast_table() in toasting.c before unleashing random < * changes. Also see LOBLKSIZE in large_object.h, which can *not* be < * changed without initdb. < */ < #define TOAST_TUPLES_PER_PAGE 4 < < #define TOAST_TUPLE_THRESHOLD MaximumBytesPerTuple(TOAST_TUPLES_PER_PAGE) < < #define TOAST_TUPLE_TARGET TOAST_TUPLE_THRESHOLD < < /* diff -r --exclude=.git --exclude=src/test --exclude=README postgres_head/src/include/utils/guc.h postgres_new/src/include/utils/guc.h 253a254,255 > extern int target_tuple_size; > extern int target_compression_savings; Only in postgres_new/src/test: toast