Support utf8mb4 if the database does. Fixes #535

What MySQL calls "UTF8" is actually a subset of the full unicode specification.
It doesn't accept 4-byte UTF-8 characters.  Proper support is called "utf8mb4"
and is, these days, fairly common (Ubuntu 10.04LTS's bundled MySQL does not
support it, but later ones do)

Detection of utf8mb4: "SHOW CHARACTER SET WHERE charset='utf8mb4';" returns at
least one row

Conversion of pre-existing DBs: this was easy, we already had code in the
updater for this for when we started enforcing utf8. It was version-bumped and
set to update to either utf8mb4 or utf8, depending

Making it use utf8mb4 in the database connection: Actually hard.  Connector/J
5.1.13 or newer should autodetect this, but in my testing it didn't
(http://dev.mysql.com/doc/relnotes/connector-j/en/news-5-1-13.html)

As such, if utf8mb4 has been detected, I've added code that should manually
execute "SET NAMES utf8mb4;" on all new connections.

If a database does not support utf8mb4 (rare these days) I've added code to
strip these characters before DB insertion; they won't be recorded correctly,
but it'll avoid the exception from issue #535
This commit is contained in:
Philip Cass
2015-02-20 22:48:40 +00:00
parent 3711aa3890
commit 82b4ffc2a2
8 changed files with 62 additions and 40 deletions

View File

@ -8,6 +8,7 @@ import de.diddiz.util.BukkitUtils;
import org.bukkit.Location;
import de.diddiz.LogBlock.config.Config;
import static de.diddiz.util.LoggingUtil.checkText;
import org.bukkit.Material;
public class BlockChange implements LookupCacheElement
@ -29,7 +30,7 @@ public class BlockChange implements LookupCacheElement
this.replaced = replaced;
this.type = type;
this.data = data;
this.signtext = signtext;
this.signtext = checkText(signtext);
this.ca = ca;
this.playerName = actor == null ? null : actor.getName();
}

View File

@ -1,5 +1,6 @@
package de.diddiz.LogBlock;
import static de.diddiz.util.LoggingUtil.checkText;
import java.sql.ResultSet;
import java.sql.SQLException;
import org.bukkit.Location;
@ -14,7 +15,7 @@ public class ChatMessage implements LookupCacheElement
id = 0;
date = System.currentTimeMillis() / 1000;
this.player = player;
this.message = message;
this.message = checkText(message);
this.playerName = player == null ? null : player.getName();
}

View File

@ -86,6 +86,13 @@ public class LogBlock extends JavaPlugin
noDb = true;
return;
}
final Statement st = conn.createStatement();
final ResultSet rs = st.executeQuery("SHOW CHARACTER SET where charset='utf8mb4';");
if (rs.next()) {
Config.mb4=true;
// Allegedly JDBC driver since 2010 hasn't needed this. I did.
st.executeQuery("SET NAMES utf8mb4;");
}
conn.close();
if (updater.update())
load(this);

View File

@ -1,5 +1,6 @@
package de.diddiz.LogBlock;
import de.diddiz.LogBlock.config.Config;
import de.diddiz.LogBlock.config.WorldConfig;
import org.bukkit.Bukkit;
import org.bukkit.configuration.ConfigurationSection;
@ -208,42 +209,6 @@ class Updater
}
config.set("version", "1.52");
}
// Ensure charset for free-text fields is UTF-8
// As this may be an expensive operation and the database default may already be UTF-8, check on a table-by-table basis before converting
if (config.getString("version").compareTo("1.71") < 0) {
getLogger().info("Updating tables to 1.71 ...");
final Connection conn = logblock.getConnection();
try {
conn.setAutoCommit(true);
final Statement st = conn.createStatement();
if (isLogging(Logging.CHAT)) {
final ResultSet rs = st.executeQuery("SHOW FULL COLUMNS FROM `lb-chat` WHERE field = 'message'");
if (rs.next() && !rs.getString("Collation").substring(0,4).equalsIgnoreCase("utf8")) {
st.execute("ALTER TABLE `lb-chat` CONVERT TO CHARSET utf8");
getLogger().info("Table lb-chat modified");
} else {
getLogger().info("Table lb-chat already fine, skipping it");
}
}
for (final WorldConfig wcfg : getLoggedWorlds()) {
if (wcfg.isLogging(Logging.SIGNTEXT)) {
final ResultSet rs = st.executeQuery("SHOW FULL COLUMNS FROM `"+wcfg.table+"-sign` WHERE field = 'signtext'");
if (rs.next() && !rs.getString("Collation").substring(0,4).equalsIgnoreCase("utf8")) {
st.execute("ALTER TABLE `"+wcfg.table+"-sign` CONVERT TO CHARSET utf8");
getLogger().info("Table "+wcfg.table+"-sign modified");
} else {
getLogger().info("Table "+wcfg.table+"-sign already fine, skipping it");
}
}
}
st.close();
conn.close();
} catch (final SQLException ex) {
Bukkit.getLogger().log(Level.SEVERE, "[Updater] Error: ", ex);
return false;
}
config.set("version", "1.71");
}
if (config.getString("version").compareTo("1.81") < 0) {
getLogger().info("Updating tables to 1.81 ...");
final Connection conn = logblock.getConnection();
@ -342,7 +307,6 @@ class Updater
}
config.set("version", "1.90");
}
if (config.getString("version").compareTo("1.91") < 0) {
getLogger().info("Updating tables to 1.91 ...");
final Connection conn = logblock.getConnection();
@ -375,6 +339,44 @@ class Updater
}
config.set("version", "1.91");
}
// Ensure charset for free-text fields is UTF-8, or UTF8-mb4 if possible
// As this may be an expensive operation and the database default may already be this, check on a table-by-table basis before converting
if (config.getString("version").compareTo("1.92") < 0) {
getLogger().info("Updating tables to 1.92 ...");
String charset = "utf8";
if ( Config.mb4) charset="utf8mb4";
final Connection conn = logblock.getConnection();
try {
conn.setAutoCommit(true);
final Statement st = conn.createStatement();
if (isLogging(Logging.CHAT)) {
final ResultSet rs = st.executeQuery("SHOW FULL COLUMNS FROM `lb-chat` WHERE field = 'message'");
if (rs.next() && !rs.getString("Collation").substring(0,4).equalsIgnoreCase(charset)) {
st.execute("ALTER TABLE `lb-chat` CONVERT TO CHARSET " + charset);
getLogger().info("Table lb-chat modified");
} else {
getLogger().info("Table lb-chat already fine, skipping it");
}
}
for (final WorldConfig wcfg : getLoggedWorlds()) {
if (wcfg.isLogging(Logging.SIGNTEXT)) {
final ResultSet rs = st.executeQuery("SHOW FULL COLUMNS FROM `"+wcfg.table+"-sign` WHERE field = 'signtext'");
if (rs.next() && !rs.getString("Collation").substring(0,4).equalsIgnoreCase(charset)) {
st.execute("ALTER TABLE `"+wcfg.table+"-sign` CONVERT TO CHARSET " + charset);
getLogger().info("Table "+wcfg.table+"-sign modified");
} else {
getLogger().info("Table "+wcfg.table+"-sign already fine, skipping it");
}
}
}
st.close();
conn.close();
} catch (final SQLException ex) {
Bukkit.getLogger().log(Level.SEVERE, "[Updater] Error: ", ex);
return false;
}
config.set("version", "1.92");
}
logblock.saveConfig();
return true;
}

View File

@ -47,6 +47,8 @@ public class Config
public static SimpleDateFormat formatter;
public static boolean safetyIdCheck;
public static boolean logEnvironmentalKills;
// Not loaded from config - checked at runtime
public static boolean mb4 = false;
public static enum LogKillsLevel
{

View File

@ -4,6 +4,7 @@ import de.diddiz.LogBlock.Actor;
import de.diddiz.LogBlock.Consumer;
import de.diddiz.LogBlock.Logging;
import static de.diddiz.LogBlock.config.Config.getWorldConfig;
import static de.diddiz.LogBlock.config.Config.mb4;
import de.diddiz.LogBlock.config.WorldConfig;
import java.util.List;
import org.bukkit.Location;
@ -200,4 +201,10 @@ public class LoggingUtil {
// Do this down here so that the block is added after blocks sitting on it
consumer.queueBlockBreak(actor, origin.getState());
}
public static String checkText(String text) {
if (text==null) return text;
if (mb4) return text;
return text.replaceAll("[^\\u0000-\\uFFFF]", "?");
}
}

View File

@ -1,5 +1,6 @@
package de.diddiz.util;
import static de.diddiz.LogBlock.config.Config.mb4;
import java.io.Closeable;
import java.sql.Array;
import java.sql.Blob;
@ -76,6 +77,7 @@ public class MySQLConnectionPool implements Closeable
throw new SQLException("Failed to validate a brand new connection");
}
connections.add(conn);
if (mb4) conn.createStatement().executeQuery("SET NAMES utf8mb4");
return conn;
} finally {
lock.unlock();

View File

@ -1,5 +1,5 @@
name: ${project.name}
version: '1.91'
version: '1.92'
author: DiddiZ
authors: [md_5, ammar2, frymaster]
website: http://dev.bukkit.org/server-mods/logblock/