You can use the execute many method in queries that insert data at once, such as VALUES.
conn = pymysql.connect(
mysql_endpoint,
user=username,
passwd=password,
db=dbname
)
def bulk_insert_and_update_users():
prepared_statement = [
[
1, # id
'Qiita Taro', # name
20 # age
],
[
2,
'Qiita Hanako',
18
]
]
with conn.cursor() as cursor:
sql = (
'INSERT INTO users '
'(id, name, age) '
'VALUES (%s, %s, %s) '
'ON DUPLICATE KEY UPDATE '
'name = VALUES(name), '
'age = VALUES(age)'
)
cursor.executemany(sql, prepared_statement)
conn.commit()
Doing this will generate a query like this:
INSERT INTO users (id, name, age)
VALUES
(`1`, `Qiita Taro`, `20`),
(`2`, `Qiita Hanako`, `18`)
ON DUPLICATE KEY UPDATE
name = VALUES(name),
age = VALUES(age);
It's simple and easy to understand.
ELT & FIELD
If you don't do INSERT or if you are worried about the problem of auto_increment due to INSERT ... ON DUPLICATE KEY UPDATE, you should use this. (Reference)
In that case, there are some points to be aware of, so let's take a look.
conn = pymysql.connect(
mysql_endpoint,
user=username,
passwd=password,
db=dbname
)
def bulk_update_users():
records = [
{
'user_id': 1,
'user_name': 'Qiita Taro',
'user_age': 20
},
{
'user_id': 2,
'user_name': 'Qiita Hanako',
'user_age': 18
}
]
id_list = []
name_list = []
age_list = []
for record in records:
id_list.append(str(record['user_id']))
name_list.append(record['user_name'])
age_list.append(record['user_age'])
id_strings = ','.join(['%s'] * len(id_list))
name_strings = ','.join(['%s'] * len(name_list))
age_strings = ','.join(['%s'] * len(age_list))
sql = (
'UPDATE users SET '
'name = '
'ELT(FIELD(id, %(user_ids)s), %(user_names)s), '
'age = '
'ELT(FIELD(id, %(user_ids)s), %(user_ages)s) '
'WHERE id IN (%(user_ids)s);'
'' % dict(user_ids=id_strings,
user_names=name_strings,
user_ages=age_strings)
)
prepared_statement = tuple(id_list) \
+ tuple(name_list) \
+ tuple(id_list) \
+ tuple(age_list) \
+ tuple(id_list)
with conn.cursor() as cursor:
cursor.execute(sql, prepared_statement)
conn.commit()
This is the characteristic part.
id_list = []
name_list = []
age_list = []
for record in records:
id_list.append(str(record['user_id']))
name_list.append(record['user_name'])
age_list.append(record['user_age'])
id_strings = ','.join(['%s'] * len(id_list))
name_strings = ','.join(['%s'] * len(name_list))
age_strings = ','.join(['%s'] * len(age_list))
sql = (
'UPDATE users SET '
'name = '
'ELT(FIELD(id, %(user_ids)s), %(user_names)s), '
'age = '
'ELT(FIELD(id, %(user_ids)s), %(user_ages)s) '
'WHERE id IN (%(user_ids)s);'
'' % dict(user_ids=id_strings,
user_names=name_strings,
user_ages=age_strings)
)
When you do this, the variable sql will contain a string like this:
UPDATE users SET
name = ELT(FIELD(id, %s, %s), %s, %s),
age = ELT(FIELD(id, %s, %s), %s, %s)
WHERE id IN (%s, %s);
This time, I wrote it for the sake of clarity, but in reality, the numbers of % s are all the same, so there is no problem even if you generate a character string from the number of elements of records and reuse it.
And all the parameters are combined in the order of insertion with the% operator.
prepared_statement = tuple(id_list) \
+ tuple(name_list) \
+ tuple(id_list) \
+ tuple(age_list) \
+ tuple(id_list)
# (1, 2, `Qiita Taro`, `Qiita Hanako`, 1, 2, 20, 18, 1, 2)
By passing this to the argument of conn.execute, the following query will be executed.
UPDATE users SET
name = ELT(FIELD(id, `1`, `2`), `Qiita Taro`, `Qiita Hanako`),
age = ELT(FIELD(id, `1`, `2`), `20`, `18`)
WHERE id IN (`1`, `2`);
If you just want to make a query, it's easy to concatenate strings. However, this is vulnerable to SQL injection.
For example, if the user's name is set to "'; DROP TABLE users;'" , there is no protection.
Bad example
id_list = []
name_list = []
age_list = []
for record in records:
id_list += [f"`{str(record['user_id'])}`"]
name_list += [f"`{record['user_name']}`"]
age_list += [f"`{record['user_age']}`"]
id_list_s = ",".join(id_list)
name_list_s = ",".join(name_list)
age_list_s = ",".join(age_list)
sql = (
'UPDATE users SET '
'name = '
f'ELT(FIELD(id, {id_list_s}), {name_list_s}), '
'age = '
f'ELT(FIELD(id,{id_list_s}), {age_list_s}) '
f'WHERE id IN ({id_list_s});'
)
It is possible to replace ' with \' by replace etc., but since it escapes on the pymysql side when execute is executed, it is better to insert the parameter obediently with a prepared statement. It will be good. (Reference)
To check the SQL statement actually executed by pymysql, it is saved in self.executed after execute, so you can check it by accessing this property.
with conn.cursor() as cursor:
sql = "SELECT * FROM users WHERE id = %s"
cursor.execute(sql, (1))
print(cursor._executed)
# SELECT * FROM users WHERE id = 1
PyMySQL 10.5.5 MySQLCursor.executemany() Method The story of a counter stop when using ON DUPLICATE KEY UPDATE in MySQL imploding a list for use in a python MySQLDB IN clause Python prepared statement. SELECT IN problem
Recommended Posts