Обсуждение: patch for character set encoding detection for JDBC
In looking at the 7.1beta1 code for JDBC, I noticed that support was added to support character set encodings. However I noticed that the encoding that is used isn't obtained from the DB. Since Java uses unicode UCS2 internally the character set encoding is used to translate strings from/to the DB encoding. So it seems logical that the code would get the encoding from the DB instead of the current method of requiring the user pass it as a parameter. Attached is a patch that gets the DB encoding from the DB in the same manner as is done in libpq/fe-connect.c. The patch is created off of the latest CVS sources (Connection.java version 1.10). thanks, --Barry*** ./Connection.java.orig Thu Dec 21 18:20:36 2000 --- ./Connection.java Thu Dec 21 18:21:49 2000 *************** *** 125,132 **** PG_HOST = host; PG_STATUS = CONNECTION_BAD; - encoding = info.getProperty("charSet"); // could be null - // Now make the initial connection try { --- 125,130 ---- *************** *** 265,274 **** // This may cause some clients to break when they assume anything other than ISO, // but then - they should be using the proper methods ;-) // // firstWarning = null; ! ExecSQL("set datestyle to 'ISO'"); // Initialise object handling initObjectTypes(); --- 263,346 ---- // This may cause some clients to break when they assume anything other than ISO, // but then - they should be using the proper methods ;-) // + // We also ask the DB for certain properties (i.e. DatabaseEncoding at this time) // firstWarning = null; ! java.sql.ResultSet initrset = ExecSQL("set datestyle to 'ISO'; select getdatabaseencoding()"); ! ! String dbEncoding = null; ! //retrieve DB properties ! if(initrset.next()) { ! ! //handle DatabaseEncoding ! dbEncoding = initrset.getString(1); ! //convert from the PostgreSQL name to the Java name ! if (dbEncoding.equals("SQL_ASCII")) { ! dbEncoding = "ASCII"; ! } else if (dbEncoding.equals("UNICODE")) { ! dbEncoding = "UTF8"; ! } else if (dbEncoding.equals("LATIN1")) { ! dbEncoding = "ISO8859_1"; ! } else if (dbEncoding.equals("LATIN2")) { ! dbEncoding = "ISO8859_2"; ! } else if (dbEncoding.equals("LATIN3")) { ! dbEncoding = "ISO8859_3"; ! } else if (dbEncoding.equals("LATIN4")) { ! dbEncoding = "ISO8859_4"; ! } else if (dbEncoding.equals("LATIN5")) { ! dbEncoding = "ISO8859_5"; ! } else if (dbEncoding.equals("LATIN6")) { ! dbEncoding = "ISO8859_6"; ! } else if (dbEncoding.equals("LATIN7")) { ! dbEncoding = "ISO8859_7"; ! } else if (dbEncoding.equals("LATIN8")) { ! dbEncoding = "ISO8859_8"; ! } else if (dbEncoding.equals("LATIN9")) { ! dbEncoding = "ISO8859_9"; ! } else if (dbEncoding.equals("EUC_JP")) { ! dbEncoding = "EUC_JP"; ! } else if (dbEncoding.equals("EUC_CN")) { ! dbEncoding = "EUC_CN"; ! } else if (dbEncoding.equals("EUC_KR")) { ! dbEncoding = "EUC_KR"; ! } else if (dbEncoding.equals("EUC_TW")) { ! dbEncoding = "EUC_TW"; ! } else if (dbEncoding.equals("KOI8")) { ! dbEncoding = "KOI8_R"; ! } else if (dbEncoding.equals("WIN")) { ! dbEncoding = "Cp1252"; ! } else { ! dbEncoding = null; ! } ! } ! ! ! //Set the encoding for this connection ! //Since the encoding could be specified or obtained from the DB we use the ! //following order: ! // 1. passed as a property ! // 2. value from DB if supported by current JVM ! // 3. default for JVM (leave encoding null) ! String passedEncoding = info.getProperty("charSet"); // could be null ! ! if (passedEncoding != null) { ! encoding = passedEncoding; ! } else { ! if (dbEncoding != null) { ! //test DB encoding ! try { ! "TEST".getBytes(dbEncoding); ! //no error the encoding is supported by the current JVM ! encoding = dbEncoding; ! } catch (UnsupportedEncodingException uee) { ! //dbEncoding is not supported by the current JVM ! encoding = null; ! } ! } else { ! encoding = null; ! } ! } // Initialise object handling initObjectTypes();
Thanks. Applied. *** ./Connection.java.orig Thu Dec 21 18:20:36 2000 --- ./Connection.java Thu Dec 21 18:21:49 2000 *************** *** 125,132 **** PG_HOST = host; PG_STATUS = CONNECTION_BAD; - encoding = info.getProperty("charSet"); // could be null - // Now make the initial connection try { --- 125,130 ---- *************** *** 265,274 **** // This may cause some clients to break when they assume anything other than ISO, // but then - they should be using the proper methods ;-) // // firstWarning = null; ! ExecSQL("set datestyle to 'ISO'"); // Initialise object handling initObjectTypes(); --- 263,346 ---- // This may cause some clients to break when they assume anything other than ISO, // but then - they should be using the proper methods ;-) // + // We also ask the DB for certain properties (i.e. DatabaseEncoding at this time) // firstWarning = null; ! java.sql.ResultSet initrset = ExecSQL("set datestyle to 'ISO'; select getdatabaseencoding()"); ! ! String dbEncoding = null; ! //retrieve DB properties ! if(initrset.next()) { ! ! //handle DatabaseEncoding ! dbEncoding = initrset.getString(1); ! //convert from the PostgreSQL name to the Java name ! if (dbEncoding.equals("SQL_ASCII")) { ! dbEncoding = "ASCII"; ! } else if (dbEncoding.equals("UNICODE")) { ! dbEncoding = "UTF8"; ! } else if (dbEncoding.equals("LATIN1")) { ! dbEncoding = "ISO8859_1"; ! } else if (dbEncoding.equals("LATIN2")) { ! dbEncoding = "ISO8859_2"; ! } else if (dbEncoding.equals("LATIN3")) { ! dbEncoding = "ISO8859_3"; ! } else if (dbEncoding.equals("LATIN4")) { ! dbEncoding = "ISO8859_4"; ! } else if (dbEncoding.equals("LATIN5")) { ! dbEncoding = "ISO8859_5"; ! } else if (dbEncoding.equals("LATIN6")) { ! dbEncoding = "ISO8859_6"; ! } else if (dbEncoding.equals("LATIN7")) { ! dbEncoding = "ISO8859_7"; ! } else if (dbEncoding.equals("LATIN8")) { ! dbEncoding = "ISO8859_8"; ! } else if (dbEncoding.equals("LATIN9")) { ! dbEncoding = "ISO8859_9"; ! } else if (dbEncoding.equals("EUC_JP")) { ! dbEncoding = "EUC_JP"; ! } else if (dbEncoding.equals("EUC_CN")) { ! dbEncoding = "EUC_CN"; ! } else if (dbEncoding.equals("EUC_KR")) { ! dbEncoding = "EUC_KR"; ! } else if (dbEncoding.equals("EUC_TW")) { ! dbEncoding = "EUC_TW"; ! } else if (dbEncoding.equals("KOI8")) { ! dbEncoding = "KOI8_R"; ! } else if (dbEncoding.equals("WIN")) { ! dbEncoding = "Cp1252"; ! } else { ! dbEncoding = null; ! } ! } ! ! ! //Set the encoding for this connection ! //Since the encoding could be specified or obtained from the DB we use the ! //following order: ! // 1. passed as a property ! // 2. value from DB if supported by current JVM ! // 3. default for JVM (leave encoding null) ! String passedEncoding = info.getProperty("charSet"); // could be null ! ! if (passedEncoding != null) { ! encoding = passedEncoding; ! } else { ! if (dbEncoding != null) { ! //test DB encoding ! try { ! "TEST".getBytes(dbEncoding); ! //no error the encoding is supported by the current JVM ! encoding = dbEncoding; ! } catch (UnsupportedEncodingException uee) { ! //dbEncoding is not supported by the current JVM ! encoding = null; ! } ! } else { ! encoding = null; ! } ! } // Initialise object handling initObjectTypes(); [ Charset UTF-8 unsupported, skipping... ] [ Charset UTF-8 unsupported, skipping... ] -- Bruce Momjian | http://candle.pha.pa.us pgman@candle.pha.pa.us | (610) 853-3000 + If your life is a hard drive, | 830 Blythe Avenue + Christ can be your backup. | Drexel Hill, Pennsylvania 19026