Skip to content

Commit

Permalink
Fix decoding of Cursor.description column names.
Browse files Browse the repository at this point in the history
Fixes #190 - The issue was a column alias with an i with acute which caused an error.  pyodbc
now (1) uses the Unicode version of SQLDescribeColumn and (2) uses the configured SQL_C_WCHAR
encoding.  I've added SQL Server test decode_meta for this.

As part of this, I've moved much of the Unicode handling to textenc.h and textenc.cpp.  I fully
expect to replace SQLWChar in the future and move all handling to that file.
  • Loading branch information
mkleehammer committed Feb 11, 2017
1 parent 23db3d0 commit 5728869
Show file tree
Hide file tree
Showing 12 changed files with 328 additions and 225 deletions.
1 change: 1 addition & 0 deletions src/cnxninfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@

#include "pyodbc.h"
#include "wrapper.h"
#include "textenc.h"
#include "cnxninfo.h"
#include "connection.h"

Expand Down
21 changes: 17 additions & 4 deletions src/connection.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@

#include "pyodbc.h"
#include "wrapper.h"
#include "textenc.h"
#include "connection.h"
#include "cursor.h"
#include "pyodbcmodule.h"
Expand Down Expand Up @@ -211,6 +212,10 @@ PyObject* Connection_New(PyObject* pConnectString, bool fAutoCommit, bool fAnsi,
cnxn->conv_types = 0;
cnxn->conv_funcs = 0;

// This is an inefficient default, but should work all the time. When we are offered
// single-byte text we don't actually know what the encoding is. For example, with SQL
// Server the encoding is based on the database's collation. We ask the driver / DB to
// convert to SQL_C_WCHAR and use the ODBC default of UTF-16LE.
cnxn->sqlchar_enc.optenc = OPTENC_UTF16LE;
cnxn->sqlchar_enc.name = _strdup("utf-16le");
cnxn->sqlchar_enc.ctype = SQL_C_WCHAR;
Expand All @@ -219,6 +224,10 @@ PyObject* Connection_New(PyObject* pConnectString, bool fAutoCommit, bool fAnsi,
cnxn->sqlwchar_enc.name = _strdup("utf-16le");
cnxn->sqlwchar_enc.ctype = SQL_C_WCHAR;

cnxn->metadata_enc.optenc = OPTENC_UTF16LE;
cnxn->metadata_enc.name = _strdup("utf-16le");
cnxn->metadata_enc.ctype = SQL_C_WCHAR;

// Note: I attempted to use UTF-8 here too since it can hold any type, but SQL Server fails
// with a data truncation error if we send something encoded in 2 bytes to a column with 1
// character. I don't know if this is a bug in SQL Server's driver or if I'm missing
Expand All @@ -234,9 +243,10 @@ PyObject* Connection_New(PyObject* pConnectString, bool fAutoCommit, bool fAnsi,

cnxn->sqlchar_enc.to = TO_UNICODE;
cnxn->sqlwchar_enc.to = TO_UNICODE;
cnxn->metadata_enc.to = TO_UNICODE;
#endif

if (!cnxn->sqlchar_enc.name || !cnxn->sqlwchar_enc.name || !cnxn->unicode_enc.name
if (!cnxn->sqlchar_enc.name || !cnxn->sqlwchar_enc.name || !cnxn->metadata_enc.name || !cnxn->unicode_enc.name
#if PY_MAJOR_VERSION < 3
|| !cnxn->str_enc.name
#endif
Expand Down Expand Up @@ -396,6 +406,8 @@ static int Connection_clear(PyObject* self)
cnxn->sqlchar_enc.name = 0;
free((void*)cnxn->sqlwchar_enc.name);
cnxn->sqlwchar_enc.name = 0;
free((void*)cnxn->metadata_enc.name);
cnxn->metadata_enc.name = 0;
free((void*)cnxn->unicode_enc.name);
cnxn->unicode_enc.name = 0;
#if PY_MAJOR_VERSION < 3
Expand Down Expand Up @@ -1245,10 +1257,11 @@ static PyObject* Connection_setdecoding(PyObject* self, PyObject* args, PyObject
allow_raw = (sqltype == SQL_CHAR && to != TO_UNICODE);
#endif

if (sqltype != SQL_WCHAR && sqltype != SQL_CHAR)
return PyErr_Format(PyExc_ValueError, "Invalid sqltype %d. Must be SQL_CHAR or SQL_WCHAR", sqltype);
if (sqltype != SQL_WCHAR && sqltype != SQL_CHAR && sqltype != SQL_WMETADATA)
return PyErr_Format(PyExc_ValueError, "Invalid sqltype %d. Must be SQL_CHAR or SQL_WCHAR or SQL_WMETADATA", sqltype);

TextEnc& enc = (sqltype == SQL_CHAR) ? cnxn->sqlchar_enc : cnxn->sqlwchar_enc;
TextEnc& enc = (sqltype == SQL_CHAR) ? cnxn->sqlchar_enc :
((sqltype == SQL_WMETADATA) ? cnxn->metadata_enc : cnxn->sqlwchar_enc);

if (!SetTextEncCommon(enc, encoding, ctype, allow_raw))
return 0;
Expand Down
57 changes: 8 additions & 49 deletions src/connection.h
Original file line number Diff line number Diff line change
Expand Up @@ -16,55 +16,7 @@ struct Cursor;

extern PyTypeObject ConnectionType;

enum {
BYTEORDER_LE = -1,
BYTEORDER_NATIVE = 0,
BYTEORDER_BE = 1,

OPTENC_NONE = 0, // No optimized encoding - use the named encoding
OPTENC_RAW = 1, // In Python 2, pass bytes directly to string - no decoder
OPTENC_UTF8 = 2,
OPTENC_UTF16 = 3, // "Native", so check for BOM and default to BE
OPTENC_UTF16BE = 4,
OPTENC_UTF16LE = 5,
OPTENC_LATIN1 = 6,

#if PY_MAJOR_VERSION < 3
TO_UNICODE = 1,
TO_STR = 2
#endif
};


struct TextEnc
{
// Holds encoding information for reading or writing text. Since some drivers / databases
// are not easy to configure efficiently, a separate instance of this structure is
// configured for:
//
// * reading SQL_CHAR
// * reading SQL_WCHAR
// * writing unicode strings
// * writing non-unicode strings (Python 2.7 only)

#if PY_MAJOR_VERSION < 3
int to;
// The type of object to return if reading from the database: str or unicode.
#endif

int optenc;
// Set to one of the OPTENC constants to indicate whether an optimized encoding is to be
// used or a custom one. If OPTENC_NONE, no optimized encoding is set and `name` should be
// used.

const char* name;
// The name of the encoding. This must be freed using `free`.

SQLSMALLINT ctype;
// The C type to use, SQL_C_CHAR or SQL_C_WCHAR. Normally this matches the SQL type of the
// column (SQL_C_CHAR is used for SQL_CHAR, etc.). At least one database reports it has
// SQL_WCHAR data even when configured for UTF-8 which is better suited for SQL_C_CHAR.
};
struct TextEnc;

struct Connection
{
Expand Down Expand Up @@ -100,6 +52,13 @@ struct Connection
TextEnc str_enc; // encoding used when writing non-unicode strings
#endif

TextEnc metadata_enc;
// Used when reading column names for Cursor.description. I originally thought I could use
// the TextEncs above based on whether I called SQLDescribeCol vs SQLDescribeColW.
// Unfortunately it looks like PostgreSQL and MySQL (and probably others) ignore the ODBC
// specification regarding encoding everywhere *except* in these functions - SQLDescribeCol
// seems to always return UTF-16LE by them regardless of the connection settings.

long maxwrite;
// Used to override varchar_maxlength, etc. Those are initialized from
// SQLGetTypeInfo but some drivers (e.g. psqlodbc) return almost arbitrary
Expand Down
67 changes: 39 additions & 28 deletions src/cursor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@

#include "pyodbc.h"
#include "wrapper.h"
#include "textenc.h"
#include "cursor.h"
#include "pyodbcmodule.h"
#include "connection.h"
Expand Down Expand Up @@ -153,14 +154,15 @@ static bool create_name_map(Cursor* cur, SQLSMALLINT field_count, bool lower)

for (int i = 0; i < field_count; i++)
{
SQLCHAR name[300];
ODBCCHAR szName[300];
SQLSMALLINT cchName;
SQLSMALLINT nDataType;
SQLULEN nColSize; // precision
SQLSMALLINT cDecimalDigits; // scale
SQLSMALLINT nullable;

Py_BEGIN_ALLOW_THREADS
ret = SQLDescribeCol(cur->hstmt, (SQLUSMALLINT)(i + 1), name, _countof(name), 0, &nDataType, &nColSize, &cDecimalDigits, &nullable);
ret = SQLDescribeColW(cur->hstmt, (SQLUSMALLINT)(i + 1), (SQLWCHAR*)szName, _countof(szName), &cchName, &nDataType, &nColSize, &cDecimalDigits, &nullable);
Py_END_ALLOW_THREADS

if (cur->cnxn->hdbc == SQL_NULL_HANDLE)
Expand All @@ -178,10 +180,21 @@ static bool create_name_map(Cursor* cur, SQLSMALLINT field_count, bool lower)

TRACE("Col %d: type=%s (%d) colsize=%d\n", (i+1), SqlTypeName(nDataType), (int)nDataType, (int)nColSize);

const TextEnc& enc = cur->cnxn->metadata_enc;
Object name(TextBufferToObject(enc, szName, (Py_ssize_t)(cchName * sizeof(ODBCCHAR))));

if (!name)
goto done;

if (lower)
_strlwr((char*)name);
{
PyObject* l = PyObject_CallMethod(name, "lower", 0);
if (!l)
goto done;
name.Attach(l);
}

type = PythonTypeFromSqlType(cur, name, nDataType);
type = PythonTypeFromSqlType(cur, nDataType);
if (!type)
goto done;

Expand Down Expand Up @@ -220,8 +233,8 @@ static bool create_name_map(Cursor* cur, SQLSMALLINT field_count, bool lower)
}
}

colinfo = Py_BuildValue("(sOOiiiO)",
(char*)name,
colinfo = Py_BuildValue("(OOOiiiO)",
name.Get(),
type, // type_code
Py_None, // display size
(int)nColSize, // internal_size
Expand All @@ -231,14 +244,13 @@ static bool create_name_map(Cursor* cur, SQLSMALLINT field_count, bool lower)
if (!colinfo)
goto done;


nullable_obj = 0;

index = PyInt_FromLong(i);
if (!index)
goto done;

PyDict_SetItemString(colmap, (const char*)name, index);
PyDict_SetItem(colmap, name.Get(), index);
Py_DECREF(index); // SetItemString increments
index = 0;

Expand Down Expand Up @@ -583,34 +595,33 @@ static PyObject* execute(Cursor* cur, PyObject* pSql, PyObject* params, bool ski
cur->pPreparedSQL = 0;

szLastFunction = "SQLExecDirect";

const TextEnc* penc = 0;

#if PY_MAJOR_VERSION < 3
if (PyString_Check(pSql))
{
const TextEnc& enc = cur->cnxn->str_enc;
SQLWChar query(pSql, enc.ctype, enc.name);
if (!query)
return 0;
Py_BEGIN_ALLOW_THREADS
if (enc.ctype == SQL_C_WCHAR)
ret = SQLExecDirectW(cur->hstmt, (SQLWCHAR*)query.value(), (SQLINTEGER)query.charlen());
else
ret = SQLExecDirect(cur->hstmt, (SQLCHAR*)query.value(), (SQLINTEGER)query.charlen());
Py_END_ALLOW_THREADS
penc = &cur->cnxn->str_enc;
}
else
#endif
{
const TextEnc& enc = cur->cnxn->unicode_enc;
SQLWChar query(pSql, enc.ctype, enc.name);
if (!query)
return 0;
Py_BEGIN_ALLOW_THREADS
if (enc.ctype == SQL_C_WCHAR)
ret = SQLExecDirectW(cur->hstmt, (SQLWCHAR*)query.value(), (SQLINTEGER)query.charlen());
else
ret = SQLExecDirect(cur->hstmt, (SQLCHAR*)query.value(), (SQLINTEGER)query.charlen());
Py_END_ALLOW_THREADS
penc = &cur->cnxn->unicode_enc;
}

Object query(penc->Encode(pSql));
if (!query)
return 0;

const char* pch = PyBytes_AS_STRING(query.Get());
SQLINTEGER cch = (SQLINTEGER)PyBytes_GET_SIZE(query.Get());

Py_BEGIN_ALLOW_THREADS
if (penc->ctype == SQL_C_WCHAR)
ret = SQLExecDirectW(cur->hstmt, (SQLWCHAR*)pch, cch);
else
ret = SQLExecDirect(cur->hstmt, (SQLCHAR*)pch, cch);
Py_END_ALLOW_THREADS
}

if (cur->cnxn->hdbc == SQL_NULL_HANDLE)
Expand Down
Loading

0 comments on commit 5728869

Please sign in to comment.