Influxdb-python: ошибка вставки данных utf8

Я хочу вставить некоторые данные из mysql в influxDB. Данные в sql кодируются utf-8, и я использую python 2.6.6 на vagrant.

$] python
Python 2.6.6 (r266:84292, Jul 23 2015, 15:22:56)
[GCC 4.4.7 20120313 (Red Hat 4.4.7-11)] on linux2

Вот структура таблицы и образца данных mysql.

mysql> show create table countries;
-----------------------------------+
| Table     | Create Table         |
+-----------+----------------------+
| countries | CREATE TABLE `countries` (
  `id` smallint(5) unsigned DEFAULT NULL,
  `name` varchar(100) DEFAULT NULL,
  KEY `name` (`name`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8 |
+----------------------------------+

mysql> select id,name from countries;
+------+----------------------------------+
| id   | name                             |
+------+----------------------------------+
|   11 | Afghanistan                      |
|   12 | Åland Islands                    |
|   13 | Albania                          |
|   14 | Côte d’Ivoire                    |
+------+----------------------------------+

Некоторые страны имеют особый характер. Я использую ниже код python для извлечения данных из mysql и вставки в influxDB.

#!/usr/bin/python

import MySQLdb
import json
from influxdb import InfluxDBClient

# Open database connection
db = MySQLdb.connect("localhost","root","","platform" )

# prepare a cursor object using cursor() method
cursor = db.cursor()

# execute SQL query using execute() method.
cursor.execute("SELECT id,name from countries")

# Fetch a all row using fetchall() method.
data = cursor.fetchall()

json_body = []

for id,name in data:
 print id,name
 json_1 = {
    "measurement": "cpu_load_short",
    "tags": {
        "host": "server01",
        "region": "us-west"
    },
    "time": id,
    "fields": {
        "value": name.decode('utf8') // ERROR
    }
 }
 #json_1 = json.dumps(json_1).encode('utf8')
 json_body.append(json_1)

#json_body = json.dumps(json_body, ensure_ascii=False).encode('utf8')

client = InfluxDBClient('localhost', 8086, 'root', 'root', 'example')

client.create_database('example')

client.write_points(json_body)

result = client.query('select * from cpu_load_short;')

print("Result: {0}".format(result))

# disconnect from server
db.close()  

Я получаю ошибку при декодировании данных:

$] python test.py
11 Afghanistan
12 Åland Islands
Traceback (most recent call last):
  File "test.py", line 31, in <module>
    "value": name.decode('utf8')
  File "/usr/lib64/python2.6/encodings/utf_8.py", line 16, in decode
    return codecs.utf_8_decode(input, errors, True)
UnicodeDecodeError: 'utf8' codec can't decode byte 0xc5 in position 0: invalid continuation byte

Если удалить декодирование из name:

"fields": {
        "value": name
    }

Я все еще получаю ошибку

Traceback (most recent call last):
  File "test.py", line 43, in <module>
    client.write_points(json_body)
  File "/usr/lib/python2.6/site-packages/influxdb/client.py", line 391, in write_points
    tags=tags)
  File "/usr/lib/python2.6/site-packages/influxdb/client.py", line 436, in _write_points
    expected_response_code=204
  File "/usr/lib/python2.6/site-packages/influxdb/client.py", line 276, in write
    data=make_lines(data, precision).encode('utf-8'),
  File "/usr/lib/python2.6/site-packages/influxdb/line_protocol.py", line 119, in make_lines
    value = _escape_value(point['fields'][field_key])
  File "/usr/lib/python2.6/site-packages/influxdb/line_protocol.py", line 53, in _escape_value
    value = _get_unicode(value)
  File "/usr/lib/python2.6/site-packages/influxdb/line_protocol.py", line 73, in _get_unicode
    return data.decode('utf-8')
  File "/usr/lib64/python2.6/encodings/utf_8.py", line 16, in decode
    return codecs.utf_8_decode(input, errors, True)
UnicodeDecodeError: 'utf8' codec can't decode byte 0xc5 in position 0: invalid continuation byte

Какое решение этой проблемы ?

2 ответа

  1. Используется unidecodeдля преобразования данных юникода в текст ASCII.

    import unidecode
    name = unidecode.unidecode_expect_nonascii(name)