and this time the patch is attached
Andrew Dunstan wrote:
>
>
> Tom Lane wrote:
>> What I think we'd need to have a complete solution is
>>
>> convert(text, name) returns bytea
>> -- convert from DB encoding to arbitrary encoding
>>
>> convert(bytea, name, name) returns bytea
>> -- convert between any two encodings
>>
>> convert(bytea, name) returns text
>> -- convert from arbitrary encoding to DB encoding
>>
>> The second and third would need to do a verify step before
>> converting, of course.
>>
>>
>>
>
> Here's a patch that implements the above. It actually does the verify
> step for all three cases - if that bothers people I can remove it at
> the cost of a little code complexity.
>
> It also fixes the "convert ... using ..." case in a similar way (makes
> it return a bytea).
>
> On reflection I think we also need to provide length(bytea, name) as
> has been suggested, so we can check the length in the foreign encoding
> of a bytea we have converted this way. That shouldn't be too difficult
> to add.
>
> cheers
>
> andrew
>
Index: src/backend/catalog/pg_conversion.c
===================================================================
RCS file: /cvsroot/pgsql/src/backend/catalog/pg_conversion.c,v
retrieving revision 1.36
diff -c -r1.36 pg_conversion.c
*** src/backend/catalog/pg_conversion.c 27 Feb 2007 23:48:07 -0000 1.36
--- src/backend/catalog/pg_conversion.c 16 Sep 2007 01:43:24 -0000
***************
*** 282,288 ****
* CONVERT <left paren> <character value expression>
* USING <form-of-use conversion name> <right paren>
*
! * TEXT convert_using(TEXT string, TEXT conversion_name)
*/
Datum
pg_convert_using(PG_FUNCTION_ARGS)
--- 282,291 ----
* CONVERT <left paren> <character value expression>
* USING <form-of-use conversion name> <right paren>
*
! * BYTEA convert_using(TEXT string, TEXT conversion_name)
! *
! * bytea is returned so we don't give a value that is
! * not valid in the database encoding.
*/
Datum
pg_convert_using(PG_FUNCTION_ARGS)
***************
*** 344,348 ****
pfree(result);
pfree(str);
! PG_RETURN_TEXT_P(retval);
}
--- 347,351 ----
pfree(result);
pfree(str);
! PG_RETURN_BYTEA_P(retval);
}
Index: src/backend/utils/mb/mbutils.c
===================================================================
RCS file: /cvsroot/pgsql/src/backend/utils/mb/mbutils.c,v
retrieving revision 1.63
diff -c -r1.63 mbutils.c
*** src/backend/utils/mb/mbutils.c 28 May 2007 16:43:24 -0000 1.63
--- src/backend/utils/mb/mbutils.c 16 Sep 2007 01:43:25 -0000
***************
*** 292,303 ****
}
/*
! * Convert string using encoding_nanme. We assume that string's
! * encoding is same as DB encoding.
*
! * TEXT convert(TEXT string, NAME encoding_name) */
Datum
! pg_convert(PG_FUNCTION_ARGS)
{
Datum string = PG_GETARG_DATUM(0);
Datum dest_encoding_name = PG_GETARG_DATUM(1);
--- 292,303 ----
}
/*
! * Convert string using encoding_name. The source
! * encoding is the DB encoding.
*
! * BYTEA convert(TEXT string, NAME encoding_name) */
Datum
! pg_convert_from_db(PG_FUNCTION_ARGS)
{
Datum string = PG_GETARG_DATUM(0);
Datum dest_encoding_name = PG_GETARG_DATUM(1);
***************
*** 306,312 ****
Datum result;
result = DirectFunctionCall3(
! pg_convert2, string, src_encoding_name, dest_encoding_name);
/* free memory allocated by namein */
pfree((void *) src_encoding_name);
--- 306,335 ----
Datum result;
result = DirectFunctionCall3(
! pg_convert, string, src_encoding_name, dest_encoding_name);
!
! /* free memory allocated by namein */
! pfree((void *) src_encoding_name);
!
! PG_RETURN_BYTEA_P(result);
! }
!
! /*
! * Convert string using encoding_name. The destination
! * encoding is the DB encoding.
! *
! * TEXT convert(BYTEA string, NAME encoding_name) */
! Datum
! pg_convert_to_db(PG_FUNCTION_ARGS)
! {
! Datum string = PG_GETARG_DATUM(0);
! Datum src_encoding_name = PG_GETARG_DATUM(1);
! Datum dest_encoding_name = DirectFunctionCall1(
! namein, CStringGetDatum(DatabaseEncoding->name));
! Datum result;
!
! result = DirectFunctionCall3(
! pg_convert, string, src_encoding_name, dest_encoding_name);
/* free memory allocated by namein */
pfree((void *) src_encoding_name);
***************
*** 315,334 ****
}
/*
! * Convert string using encoding_name.
*
! * TEXT convert2(TEXT string, NAME src_encoding_name, NAME dest_encoding_name)
*/
Datum
! pg_convert2(PG_FUNCTION_ARGS)
{
! text *string = PG_GETARG_TEXT_P(0);
char *src_encoding_name = NameStr(*PG_GETARG_NAME(1));
int src_encoding = pg_char_to_encoding(src_encoding_name);
char *dest_encoding_name = NameStr(*PG_GETARG_NAME(2));
int dest_encoding = pg_char_to_encoding(dest_encoding_name);
unsigned char *result;
! text *retval;
unsigned char *str;
int len;
--- 338,357 ----
}
/*
! * Convert string using encoding_names.
*
! * BYTEA convert(BYTEA string, NAME src_encoding_name, NAME dest_encoding_name)
*/
Datum
! pg_convert(PG_FUNCTION_ARGS)
{
! bytea *string = PG_GETARG_TEXT_P(0);
char *src_encoding_name = NameStr(*PG_GETARG_NAME(1));
int src_encoding = pg_char_to_encoding(src_encoding_name);
char *dest_encoding_name = NameStr(*PG_GETARG_NAME(2));
int dest_encoding = pg_char_to_encoding(dest_encoding_name);
unsigned char *result;
! bytea *retval;
unsigned char *str;
int len;
***************
*** 343,350 ****
errmsg("invalid destination encoding name \"%s\"",
dest_encoding_name)));
! /* make sure that source string is null terminated */
len = VARSIZE(string) - VARHDRSZ;
str = palloc(len + 1);
memcpy(str, VARDATA(string), len);
*(str + len) = '\0';
--- 366,374 ----
errmsg("invalid destination encoding name \"%s\"",
dest_encoding_name)));
! /* make sure that source string is valid and null terminated */
len = VARSIZE(string) - VARHDRSZ;
+ pg_verify_mbstr(src_encoding,VARDATA(string),len,false);
str = palloc(len + 1);
memcpy(str, VARDATA(string), len);
*(str + len) = '\0';
***************
*** 354,361 ****
elog(ERROR, "encoding conversion failed");
/*
! * build text data type structure. we cannot use textin() here, since
! * textin assumes that input string encoding is same as database encoding.
*/
len = strlen((char *) result) + VARHDRSZ;
retval = palloc(len);
--- 378,384 ----
elog(ERROR, "encoding conversion failed");
/*
! * build bytea data type structure.
*/
len = strlen((char *) result) + VARHDRSZ;
retval = palloc(len);
***************
*** 369,375 ****
/* free memory if allocated by the toaster */
PG_FREE_IF_COPY(string, 0);
! PG_RETURN_TEXT_P(retval);
}
/*
--- 392,398 ----
/* free memory if allocated by the toaster */
PG_FREE_IF_COPY(string, 0);
! PG_RETURN_BYTEA_P(retval);
}
/*
Index: src/include/catalog/catversion.h
===================================================================
RCS file: /cvsroot/pgsql/src/include/catalog/catversion.h,v
retrieving revision 1.423
diff -c -r1.423 catversion.h
*** src/include/catalog/catversion.h 5 Sep 2007 18:10:48 -0000 1.423
--- src/include/catalog/catversion.h 16 Sep 2007 01:43:25 -0000
***************
*** 53,58 ****
*/
/* yyyymmddN */
! #define CATALOG_VERSION_NO 200709042
#endif
--- 53,58 ----
*/
/* yyyymmddN */
! #define CATALOG_VERSION_NO 200709151
#endif
Index: src/include/catalog/pg_proc.h
===================================================================
RCS file: /cvsroot/pgsql/src/include/catalog/pg_proc.h,v
retrieving revision 1.468
diff -c -r1.468 pg_proc.h
*** src/include/catalog/pg_proc.h 4 Sep 2007 16:41:42 -0000 1.468
--- src/include/catalog/pg_proc.h 16 Sep 2007 01:43:25 -0000
***************
*** 2232,2244 ****
DATA(insert OID = 810 ( pg_client_encoding PGNSP PGUID 12 1 0 f f t f s 0 19 "" _null_ _null_ _null_
pg_client_encoding- _null_ _null_ ));
DESCR("encoding name of current database");
! DATA(insert OID = 1717 ( convert PGNSP PGUID 12 1 0 f f t f s 2 25 "25 19" _null_ _null_ _null_ pg_convert
-_null_ _null_ ));
DESCR("convert string with specified destination encoding name");
! DATA(insert OID = 1813 ( convert PGNSP PGUID 12 1 0 f f t f s 3 25 "25 19 19" _null_ _null_ _null_
pg_convert2- _null_ _null_ ));
DESCR("convert string with specified encoding names");
! DATA(insert OID = 1619 ( convert_using PGNSP PGUID 12 1 0 f f t f s 2 25 "25 25" _null_ _null_ _null_
pg_convert_using- _null_ _null_ ));
DESCR("convert string with specified conversion name");
DATA(insert OID = 1264 ( pg_char_to_encoding PGNSP PGUID 12 1 0 f f t f s 1 23 "19" _null_ _null_ _null_
PG_char_to_encoding- _null_ _null_ ));
--- 2232,2247 ----
DATA(insert OID = 810 ( pg_client_encoding PGNSP PGUID 12 1 0 f f t f s 0 19 "" _null_ _null_ _null_
pg_client_encoding- _null_ _null_ ));
DESCR("encoding name of current database");
! DATA(insert OID = 1717 ( convert PGNSP PGUID 12 1 0 f f t f s 2 17 "25 19" _null_ _null_ _null_
pg_convert_from_db- _null_ _null_ ));
DESCR("convert string with specified destination encoding name");
! DATA(insert OID = 1713 ( convert PGNSP PGUID 12 1 0 f f t f s 2 25 "17 19" _null_ _null_ _null_
pg_convert_to_db- _null_ _null_ ));
! DESCR("convert string with specified source encoding name");
!
! DATA(insert OID = 1813 ( convert PGNSP PGUID 12 1 0 f f t f s 3 17 "17 19 19" _null_ _null_ _null_
pg_convert- _null_ _null_ ));
DESCR("convert string with specified encoding names");
! DATA(insert OID = 1619 ( convert_using PGNSP PGUID 12 1 0 f f t f s 2 17 "25 25" _null_ _null_ _null_
pg_convert_using- _null_ _null_ ));
DESCR("convert string with specified conversion name");
DATA(insert OID = 1264 ( pg_char_to_encoding PGNSP PGUID 12 1 0 f f t f s 1 23 "19" _null_ _null_ _null_
PG_char_to_encoding- _null_ _null_ ));
Index: src/include/utils/builtins.h
===================================================================
RCS file: /cvsroot/pgsql/src/include/utils/builtins.h,v
retrieving revision 1.302
diff -c -r1.302 builtins.h
*** src/include/utils/builtins.h 4 Sep 2007 16:41:43 -0000 1.302
--- src/include/utils/builtins.h 16 Sep 2007 01:43:26 -0000
***************
*** 902,908 ****
extern Datum PG_character_set_name(PG_FUNCTION_ARGS);
extern Datum PG_character_set_id(PG_FUNCTION_ARGS);
extern Datum pg_convert(PG_FUNCTION_ARGS);
! extern Datum pg_convert2(PG_FUNCTION_ARGS);
/* format_type.c */
extern Datum format_type(PG_FUNCTION_ARGS);
--- 902,909 ----
extern Datum PG_character_set_name(PG_FUNCTION_ARGS);
extern Datum PG_character_set_id(PG_FUNCTION_ARGS);
extern Datum pg_convert(PG_FUNCTION_ARGS);
! extern Datum pg_convert_to_db(PG_FUNCTION_ARGS);
! extern Datum pg_convert_from_db(PG_FUNCTION_ARGS);
/* format_type.c */
extern Datum format_type(PG_FUNCTION_ARGS);