001// License: GPL. For details, see LICENSE file. 002package org.openstreetmap.josm.data.validation.tests; 003 004import static org.openstreetmap.josm.tools.I18n.marktr; 005import static org.openstreetmap.josm.tools.I18n.tr; 006 007import java.awt.GridBagConstraints; 008import java.awt.event.ActionListener; 009import java.io.BufferedReader; 010import java.io.IOException; 011import java.lang.Character.UnicodeBlock; 012import java.util.ArrayList; 013import java.util.Arrays; 014import java.util.Collection; 015import java.util.Collections; 016import java.util.HashMap; 017import java.util.HashSet; 018import java.util.List; 019import java.util.Locale; 020import java.util.Map; 021import java.util.Map.Entry; 022import java.util.Set; 023import java.util.regex.Pattern; 024 025import javax.swing.JCheckBox; 026import javax.swing.JLabel; 027import javax.swing.JPanel; 028 029import org.openstreetmap.josm.command.ChangePropertyCommand; 030import org.openstreetmap.josm.command.ChangePropertyKeyCommand; 031import org.openstreetmap.josm.command.Command; 032import org.openstreetmap.josm.command.SequenceCommand; 033import org.openstreetmap.josm.data.osm.AbstractPrimitive; 034import org.openstreetmap.josm.data.osm.OsmPrimitive; 035import org.openstreetmap.josm.data.osm.Tag; 036import org.openstreetmap.josm.data.osm.Tagged; 037import org.openstreetmap.josm.data.preferences.sources.ValidatorPrefHelper; 038import org.openstreetmap.josm.data.validation.Severity; 039import org.openstreetmap.josm.data.validation.Test.TagTest; 040import org.openstreetmap.josm.data.validation.TestError; 041import org.openstreetmap.josm.data.validation.util.Entities; 042import org.openstreetmap.josm.gui.progress.ProgressMonitor; 043import org.openstreetmap.josm.gui.tagging.presets.TaggingPreset; 044import org.openstreetmap.josm.gui.tagging.presets.TaggingPresetItem; 045import org.openstreetmap.josm.gui.tagging.presets.TaggingPresetListener; 046import org.openstreetmap.josm.gui.tagging.presets.TaggingPresets; 047import org.openstreetmap.josm.gui.tagging.presets.items.Check; 048import org.openstreetmap.josm.gui.tagging.presets.items.CheckGroup; 049import org.openstreetmap.josm.gui.tagging.presets.items.KeyedItem; 050import org.openstreetmap.josm.gui.widgets.EditableList; 051import org.openstreetmap.josm.io.CachedFile; 052import org.openstreetmap.josm.spi.preferences.Config; 053import org.openstreetmap.josm.tools.GBC; 054import org.openstreetmap.josm.tools.Logging; 055import org.openstreetmap.josm.tools.MultiMap; 056import org.openstreetmap.josm.tools.Utils; 057 058/** 059 * Check for misspelled or wrong tags 060 * 061 * @author frsantos 062 * @since 3669 063 */ 064public class TagChecker extends TagTest implements TaggingPresetListener { 065 066 /** The config file of ignored tags */ 067 public static final String IGNORE_FILE = "resource://data/validator/ignoretags.cfg"; 068 /** The config file of dictionary words */ 069 public static final String SPELL_FILE = "resource://data/validator/words.cfg"; 070 071 /** Normalized keys: the key should be substituted by the value if the key was not found in presets */ 072 private static final Map<String, String> harmonizedKeys = new HashMap<>(); 073 /** The spell check preset values which are not stored in TaggingPresets */ 074 private static volatile HashSet<String> additionalPresetsValueData; 075 /** often used tags which are not in presets */ 076 private static volatile MultiMap<String, String> oftenUsedTags = new MultiMap<>(); 077 078 private static final Pattern UNWANTED_NON_PRINTING_CONTROL_CHARACTERS = Pattern.compile( 079 "[\\x00-\\x09\\x0B\\x0C\\x0E-\\x1F\\x7F\\u200e-\\u200f\\u202a-\\u202e]"); 080 081 /** The TagChecker data */ 082 private static final List<String> ignoreDataStartsWith = new ArrayList<>(); 083 private static final Set<String> ignoreDataEquals = new HashSet<>(); 084 private static final List<String> ignoreDataEndsWith = new ArrayList<>(); 085 private static final List<Tag> ignoreDataTag = new ArrayList<>(); 086 /** tag keys that have only numerical values in the presets */ 087 private static final Set<String> ignoreForLevenshtein = new HashSet<>(); 088 089 /** The preferences prefix */ 090 protected static final String PREFIX = ValidatorPrefHelper.PREFIX + "." + TagChecker.class.getSimpleName(); 091 092 /** 093 * The preference key to check values 094 */ 095 public static final String PREF_CHECK_VALUES = PREFIX + ".checkValues"; 096 /** 097 * The preference key to check keys 098 */ 099 public static final String PREF_CHECK_KEYS = PREFIX + ".checkKeys"; 100 /** 101 * The preference key to enable complex checks 102 */ 103 public static final String PREF_CHECK_COMPLEX = PREFIX + ".checkComplex"; 104 /** 105 * The preference key to search for fixme tags 106 */ 107 public static final String PREF_CHECK_FIXMES = PREFIX + ".checkFixmes"; 108 109 /** 110 * The preference key for source files 111 * @see #DEFAULT_SOURCES 112 */ 113 public static final String PREF_SOURCES = PREFIX + ".source"; 114 115 private static final String BEFORE_UPLOAD = "BeforeUpload"; 116 /** 117 * The preference key to check keys - used before upload 118 */ 119 public static final String PREF_CHECK_KEYS_BEFORE_UPLOAD = PREF_CHECK_KEYS + BEFORE_UPLOAD; 120 /** 121 * The preference key to check values - used before upload 122 */ 123 public static final String PREF_CHECK_VALUES_BEFORE_UPLOAD = PREF_CHECK_VALUES + BEFORE_UPLOAD; 124 /** 125 * The preference key to run complex tests - used before upload 126 */ 127 public static final String PREF_CHECK_COMPLEX_BEFORE_UPLOAD = PREF_CHECK_COMPLEX + BEFORE_UPLOAD; 128 /** 129 * The preference key to search for fixmes - used before upload 130 */ 131 public static final String PREF_CHECK_FIXMES_BEFORE_UPLOAD = PREF_CHECK_FIXMES + BEFORE_UPLOAD; 132 133 private static final int MAX_LEVENSHTEIN_DISTANCE = 2; 134 135 protected boolean checkKeys; 136 protected boolean checkValues; 137 /** Was used for special configuration file, might be used to disable value spell checker. */ 138 protected boolean checkComplex; 139 protected boolean checkFixmes; 140 141 protected JCheckBox prefCheckKeys; 142 protected JCheckBox prefCheckValues; 143 protected JCheckBox prefCheckComplex; 144 protected JCheckBox prefCheckFixmes; 145 protected JCheckBox prefCheckPaint; 146 147 protected JCheckBox prefCheckKeysBeforeUpload; 148 protected JCheckBox prefCheckValuesBeforeUpload; 149 protected JCheckBox prefCheckComplexBeforeUpload; 150 protected JCheckBox prefCheckFixmesBeforeUpload; 151 protected JCheckBox prefCheckPaintBeforeUpload; 152 153 // CHECKSTYLE.OFF: SingleSpaceSeparator 154 protected static final int EMPTY_VALUES = 1200; 155 protected static final int INVALID_KEY = 1201; 156 protected static final int INVALID_VALUE = 1202; 157 protected static final int FIXME = 1203; 158 protected static final int INVALID_SPACE = 1204; 159 protected static final int INVALID_KEY_SPACE = 1205; 160 protected static final int INVALID_HTML = 1206; /* 1207 was PAINT */ 161 protected static final int LONG_VALUE = 1208; 162 protected static final int LONG_KEY = 1209; 163 protected static final int LOW_CHAR_VALUE = 1210; 164 protected static final int LOW_CHAR_KEY = 1211; 165 protected static final int MISSPELLED_VALUE = 1212; 166 protected static final int MISSPELLED_KEY = 1213; 167 protected static final int MULTIPLE_SPACES = 1214; 168 protected static final int MISSPELLED_VALUE_NO_FIX = 1215; 169 protected static final int UNUSUAL_UNICODE_CHAR_VALUE = 1216; 170 // CHECKSTYLE.ON: SingleSpaceSeparator 171 172 protected EditableList sourcesList; 173 174 private static final List<String> DEFAULT_SOURCES = Arrays.asList(IGNORE_FILE, SPELL_FILE); 175 176 /** 177 * Constructor 178 */ 179 public TagChecker() { 180 super(tr("Tag checker"), tr("This test checks for errors in tag keys and values.")); 181 } 182 183 @Override 184 public void initialize() throws IOException { 185 TaggingPresets.addListener(this); 186 initializeData(); 187 initializePresets(); 188 analysePresets(); 189 } 190 191 /** 192 * Add presets that contain only numerical values to the ignore list 193 */ 194 private static void analysePresets() { 195 for (String key : TaggingPresets.getPresetKeys()) { 196 if (isKeyIgnored(key)) 197 continue; 198 boolean allNumerical = true; 199 Set<String> values = TaggingPresets.getPresetValues(key); 200 if (values.isEmpty()) 201 allNumerical = false; 202 for (String val : values) { 203 if (!isNum(val)) { 204 allNumerical = false; 205 break; 206 } 207 } 208 if (allNumerical) { 209 ignoreForLevenshtein.add(key); 210 } 211 } 212 } 213 214 /** 215 * Reads the spell-check file into a HashMap. 216 * The data file is a list of words, beginning with +/-. If it starts with +, 217 * the word is valid, but if it starts with -, the word should be replaced 218 * by the nearest + word before this. 219 * 220 * @throws IOException if any I/O error occurs 221 */ 222 private static void initializeData() throws IOException { 223 ignoreDataStartsWith.clear(); 224 ignoreDataEquals.clear(); 225 ignoreDataEndsWith.clear(); 226 ignoreDataTag.clear(); 227 harmonizedKeys.clear(); 228 ignoreForLevenshtein.clear(); 229 oftenUsedTags.clear(); 230 231 StringBuilder errorSources = new StringBuilder(); 232 for (String source : Config.getPref().getList(PREF_SOURCES, DEFAULT_SOURCES)) { 233 try ( 234 CachedFile cf = new CachedFile(source); 235 BufferedReader reader = cf.getContentReader() 236 ) { 237 String okValue = null; 238 boolean tagcheckerfile = false; 239 boolean ignorefile = false; 240 boolean isFirstLine = true; 241 String line; 242 while ((line = reader.readLine()) != null) { 243 if (line.isEmpty()) { 244 // ignore 245 } else if (line.startsWith("#")) { 246 if (line.startsWith("# JOSM TagChecker")) { 247 tagcheckerfile = true; 248 Logging.error(tr("Ignoring {0}. Support was dropped", source)); 249 } else 250 if (line.startsWith("# JOSM IgnoreTags")) { 251 ignorefile = true; 252 if (!DEFAULT_SOURCES.contains(source)) { 253 Logging.info(tr("Adding {0} to ignore tags", source)); 254 } 255 } 256 } else if (ignorefile) { 257 parseIgnoreFileLine(source, line); 258 } else if (tagcheckerfile) { 259 // ignore 260 } else if (line.charAt(0) == '+') { 261 okValue = line.substring(1); 262 } else if (line.charAt(0) == '-' && okValue != null) { 263 String hk = harmonizeKey(line.substring(1)); 264 if (!okValue.equals(hk) && harmonizedKeys.put(hk, okValue) != null) { 265 Logging.debug(tr("Line was ignored: {0}", line)); 266 } 267 } else { 268 Logging.error(tr("Invalid spellcheck line: {0}", line)); 269 } 270 if (isFirstLine) { 271 isFirstLine = false; 272 if (!(tagcheckerfile || ignorefile) && !DEFAULT_SOURCES.contains(source)) { 273 Logging.info(tr("Adding {0} to spellchecker", source)); 274 } 275 } 276 } 277 } catch (IOException e) { 278 Logging.error(e); 279 errorSources.append(source).append('\n'); 280 } 281 } 282 283 if (errorSources.length() > 0) 284 throw new IOException(tr("Could not access data file(s):\n{0}", errorSources)); 285 } 286 287 /** 288 * Parse a line found in a configuration file 289 * @param source name of configuration file 290 * @param line the line to parse 291 */ 292 private static void parseIgnoreFileLine(String source, String line) { 293 line = line.trim(); 294 if (line.length() < 4) { 295 return; 296 } 297 try { 298 String key = line.substring(0, 2); 299 line = line.substring(2); 300 301 switch (key) { 302 case "S:": 303 ignoreDataStartsWith.add(line); 304 break; 305 case "E:": 306 ignoreDataEquals.add(line); 307 addToKeyDictionary(line); 308 break; 309 case "F:": 310 ignoreDataEndsWith.add(line); 311 break; 312 case "K:": 313 Tag tag = Tag.ofString(line); 314 ignoreDataTag.add(tag); 315 oftenUsedTags.put(tag.getKey(), tag.getValue()); 316 addToKeyDictionary(tag.getKey()); 317 break; 318 default: 319 if (!key.startsWith(";")) { 320 Logging.warn("Unsupported TagChecker key: " + key); 321 } 322 } 323 } catch (IllegalArgumentException e) { 324 Logging.error("Invalid line in {0} : {1}", source, e.getMessage()); 325 Logging.trace(e); 326 } 327 } 328 329 private static void addToKeyDictionary(String key) { 330 if (key != null) { 331 String hk = harmonizeKey(key); 332 if (!key.equals(hk)) { 333 harmonizedKeys.put(hk, key); 334 } 335 } 336 } 337 338 /** 339 * Reads the presets data. 340 * 341 */ 342 public static void initializePresets() { 343 344 if (!Config.getPref().getBoolean(PREF_CHECK_VALUES, true)) 345 return; 346 347 Collection<TaggingPreset> presets = TaggingPresets.getTaggingPresets(); 348 if (!presets.isEmpty()) { 349 initAdditionalPresetsValueData(); 350 for (TaggingPreset p : presets) { 351 for (TaggingPresetItem i : p.data) { 352 if (i instanceof KeyedItem) { 353 addPresetValue((KeyedItem) i); 354 } else if (i instanceof CheckGroup) { 355 for (Check c : ((CheckGroup) i).checks) { 356 addPresetValue(c); 357 } 358 } 359 } 360 } 361 } 362 } 363 364 private static void initAdditionalPresetsValueData() { 365 additionalPresetsValueData = new HashSet<>(); 366 for (String a : AbstractPrimitive.getUninterestingKeys()) { 367 additionalPresetsValueData.add(a); 368 } 369 for (String a : Config.getPref().getList(ValidatorPrefHelper.PREFIX + ".knownkeys", 370 Arrays.asList("is_in", "int_ref", "fixme", "population"))) { 371 additionalPresetsValueData.add(a); 372 } 373 } 374 375 private static void addPresetValue(KeyedItem ky) { 376 if (ky.key != null && ky.getValues() != null) { 377 addToKeyDictionary(ky.key); 378 } 379 } 380 381 /** 382 * Checks given string (key or value) if it contains unwanted non-printing control characters (either ASCII or Unicode bidi characters) 383 * @param s string to check 384 * @return {@code true} if {@code s} contains non-printing control characters 385 */ 386 static boolean containsUnwantedNonPrintingControlCharacter(String s) { 387 return s != null && !s.isEmpty() && ( 388 isJoiningChar(s.charAt(0)) || 389 isJoiningChar(s.charAt(s.length() - 1)) || 390 s.chars().anyMatch(c -> (isAsciiControlChar(c) && !isNewLineChar(c)) || isBidiControlChar(c)) 391 ); 392 } 393 394 private static boolean isAsciiControlChar(int c) { 395 return c < 0x20 || c == 0x7F; 396 } 397 398 private static boolean isNewLineChar(int c) { 399 return c == 0x0a || c == 0x0d; 400 } 401 402 private static boolean isJoiningChar(int c) { 403 return c == 0x200c || c == 0x200d; // ZWNJ, ZWJ 404 } 405 406 private static boolean isBidiControlChar(int c) { 407 /* check for range 0x200e to 0x200f (LRM, RLM) or 408 0x202a to 0x202e (LRE, RLE, PDF, LRO, RLO) */ 409 return (c >= 0x200e && c <= 0x200f) || (c >= 0x202a && c <= 0x202e); 410 } 411 412 static String removeUnwantedNonPrintingControlCharacters(String s) { 413 // Remove all unwanted characters 414 String result = UNWANTED_NON_PRINTING_CONTROL_CHARACTERS.matcher(s).replaceAll(""); 415 // Remove joining characters located at the beginning of the string 416 while (!result.isEmpty() && isJoiningChar(result.charAt(0))) { 417 result = result.substring(1); 418 } 419 // Remove joining characters located at the end of the string 420 while (!result.isEmpty() && isJoiningChar(result.charAt(result.length() - 1))) { 421 result = result.substring(0, result.length() - 1); 422 } 423 return result; 424 } 425 426 static boolean containsUnusualUnicodeCharacter(String key, String value) { 427 return value != null && value.chars().anyMatch(c -> isUnusualUnicodeBlock(key, c)); 428 } 429 430 /** 431 * Detects highly suspicious Unicode characters that have been seen in OSM database. 432 * @param key tag key 433 * @param c current character code point 434 * @return {@code true} if the current unicode block is very unusual for the given key 435 */ 436 private static boolean isUnusualUnicodeBlock(String key, int c) { 437 UnicodeBlock b = UnicodeBlock.of(c); 438 return isUnusualPhoneticUse(key, b, c) || isUnusualBmpUse(b) || isUnusualSmpUse(b); 439 } 440 441 private static boolean isAllowedPhoneticCharacter(String key, int c) { 442 return c == 0x0259 // U+0259 is used as a standard character in azerbaidjani 443 || (key.endsWith("ref") && 0x1D2C <= c && c <= 0x1D42); // allow uppercase superscript latin characters in *ref tags 444 } 445 446 private static boolean isUnusualPhoneticUse(String key, UnicodeBlock b, int c) { 447 return !isAllowedPhoneticCharacter(key, c) 448 && (b == UnicodeBlock.IPA_EXTENSIONS // U+0250..U+02AF 449 || b == UnicodeBlock.PHONETIC_EXTENSIONS // U+1D00..U+1D7F 450 || b == UnicodeBlock.PHONETIC_EXTENSIONS_SUPPLEMENT) // U+1D80..U+1DBF 451 && !key.endsWith(":pronunciation"); 452 } 453 454 private static boolean isUnusualBmpUse(UnicodeBlock b) { 455 // CHECKSTYLE.OFF: BooleanExpressionComplexity 456 return b == UnicodeBlock.COMBINING_MARKS_FOR_SYMBOLS // U+20D0..U+20FF 457 || b == UnicodeBlock.MATHEMATICAL_OPERATORS // U+2200..U+22FF 458 || b == UnicodeBlock.ENCLOSED_ALPHANUMERICS // U+2460..U+24FF 459 || b == UnicodeBlock.BOX_DRAWING // U+2500..U+257F 460 || b == UnicodeBlock.GEOMETRIC_SHAPES // U+25A0..U+25FF 461 || b == UnicodeBlock.DINGBATS // U+2700..U+27BF 462 || b == UnicodeBlock.MISCELLANEOUS_SYMBOLS_AND_ARROWS // U+2B00..U+2BFF 463 || b == UnicodeBlock.GLAGOLITIC // U+2C00..U+2C5F 464 || b == UnicodeBlock.HANGUL_COMPATIBILITY_JAMO // U+3130..U+318F 465 || b == UnicodeBlock.ENCLOSED_CJK_LETTERS_AND_MONTHS // U+3200..U+32FF 466 || b == UnicodeBlock.LATIN_EXTENDED_D // U+A720..U+A7FF 467 || b == UnicodeBlock.CJK_COMPATIBILITY_IDEOGRAPHS // U+F900..U+FAFF 468 || b == UnicodeBlock.ALPHABETIC_PRESENTATION_FORMS // U+FB00..U+FB4F 469 || b == UnicodeBlock.VARIATION_SELECTORS // U+FE00..U+FE0F 470 || b == UnicodeBlock.SPECIALS; // U+FFF0..U+FFFF 471 // CHECKSTYLE.ON: BooleanExpressionComplexity 472 } 473 474 private static boolean isUnusualSmpUse(UnicodeBlock b) { 475 // UnicodeBlock.SUPPLEMENTAL_SYMBOLS_AND_PICTOGRAPHS is only defined in Java 9+ 476 return b == UnicodeBlock.MUSICAL_SYMBOLS // U+1D100..U+1D1FF 477 || b == UnicodeBlock.ENCLOSED_ALPHANUMERIC_SUPPLEMENT // U+1F100..U+1F1FF 478 || b == UnicodeBlock.EMOTICONS // U+1F600..U+1F64F 479 || b == UnicodeBlock.TRANSPORT_AND_MAP_SYMBOLS; // U+1F680..U+1F6FF 480 } 481 482 /** 483 * Get set of preset values for the given key. 484 * @param key the key 485 * @return null if key is not in presets or in additionalPresetsValueData, 486 * else a set which might be empty. 487 */ 488 private static Set<String> getPresetValues(String key) { 489 Set<String> res = TaggingPresets.getPresetValues(key); 490 if (res != null) 491 return res; 492 if (additionalPresetsValueData.contains(key)) 493 return Collections.emptySet(); 494 // null means key is not known 495 return null; 496 } 497 498 /** 499 * Determines if the given key is in internal presets. 500 * @param key key 501 * @return {@code true} if the given key is in internal presets 502 * @since 9023 503 */ 504 public static boolean isKeyInPresets(String key) { 505 return TaggingPresets.getPresetValues(key) != null; 506 } 507 508 /** 509 * Determines if the given tag is in internal presets. 510 * @param key key 511 * @param value value 512 * @return {@code true} if the given tag is in internal presets 513 * @since 9023 514 */ 515 public static boolean isTagInPresets(String key, String value) { 516 final Set<String> values = getPresetValues(key); 517 return values != null && values.contains(value); 518 } 519 520 /** 521 * Returns the list of ignored tags. 522 * @return the list of ignored tags 523 * @since 9023 524 */ 525 public static List<Tag> getIgnoredTags() { 526 return new ArrayList<>(ignoreDataTag); 527 } 528 529 /** 530 * Determines if the given tag key is ignored for checks "key/tag not in presets". 531 * @param key key 532 * @return true if the given key is ignored 533 */ 534 private static boolean isKeyIgnored(String key) { 535 if (ignoreDataEquals.contains(key)) { 536 return true; 537 } 538 for (String a : ignoreDataStartsWith) { 539 if (key.startsWith(a)) { 540 return true; 541 } 542 } 543 for (String a : ignoreDataEndsWith) { 544 if (key.endsWith(a)) { 545 return true; 546 } 547 } 548 return false; 549 } 550 551 /** 552 * Determines if the given tag is ignored for checks "key/tag not in presets". 553 * @param key key 554 * @param value value 555 * @return {@code true} if the given tag is ignored 556 * @since 9023 557 */ 558 public static boolean isTagIgnored(String key, String value) { 559 if (isKeyIgnored(key)) 560 return true; 561 final Set<String> values = getPresetValues(key); 562 if (values != null && values.isEmpty()) 563 return true; 564 if (!isTagInPresets(key, value)) { 565 for (Tag a : ignoreDataTag) { 566 if (key.equals(a.getKey()) && value.equals(a.getValue())) { 567 return true; 568 } 569 } 570 } 571 return false; 572 } 573 574 /** 575 * Checks the primitive tags 576 * @param p The primitive to check 577 */ 578 @Override 579 public void check(OsmPrimitive p) { 580 if (!p.isTagged()) 581 return; 582 583 // Just a collection to know if a primitive has been already marked with error 584 MultiMap<OsmPrimitive, String> withErrors = new MultiMap<>(); 585 586 for (Entry<String, String> prop : p.getKeys().entrySet()) { 587 String s = marktr("Tag ''{0}'' invalid."); 588 String key = prop.getKey(); 589 String value = prop.getValue(); 590 591 if (checkKeys) { 592 checkSingleTagKeySimple(withErrors, p, s, key); 593 } 594 if (checkValues) { 595 checkSingleTagValueSimple(withErrors, p, s, key, value); 596 checkSingleTagComplex(withErrors, p, key, value); 597 } 598 if (checkFixmes && key != null && value != null && !value.isEmpty() && isFixme(key, value) && !withErrors.contains(p, "FIXME")) { 599 errors.add(TestError.builder(this, Severity.OTHER, FIXME) 600 .message(tr("FIXMES")) 601 .primitives(p) 602 .build()); 603 withErrors.put(p, "FIXME"); 604 } 605 } 606 } 607 608 private void checkSingleTagValueSimple(MultiMap<OsmPrimitive, String> withErrors, OsmPrimitive p, String s, String key, String value) { 609 if (!checkValues || value == null) 610 return; 611 if ((containsUnwantedNonPrintingControlCharacter(value)) && !withErrors.contains(p, "ICV")) { 612 errors.add(TestError.builder(this, Severity.WARNING, LOW_CHAR_VALUE) 613 .message(tr("Tag value contains non-printing (usually invisible) character"), s, key) 614 .primitives(p) 615 .fix(() -> new ChangePropertyCommand(p, key, removeUnwantedNonPrintingControlCharacters(value))) 616 .build()); 617 withErrors.put(p, "ICV"); 618 } 619 if ((containsUnusualUnicodeCharacter(key, value)) && !withErrors.contains(p, "UUCV")) { 620 errors.add(TestError.builder(this, Severity.WARNING, UNUSUAL_UNICODE_CHAR_VALUE) 621 .message(tr("Tag value contains unusual Unicode character"), s, key) 622 .primitives(p) 623 .build()); 624 withErrors.put(p, "UUCV"); 625 } 626 if ((value.length() > Tagged.MAX_TAG_LENGTH) && !withErrors.contains(p, "LV")) { 627 errors.add(TestError.builder(this, Severity.ERROR, LONG_VALUE) 628 .message(tr("Tag value longer than {0} characters ({1} characters)", Tagged.MAX_TAG_LENGTH, value.length()), s, key) 629 .primitives(p) 630 .build()); 631 withErrors.put(p, "LV"); 632 } 633 if ((value.trim().isEmpty()) && !withErrors.contains(p, "EV")) { 634 errors.add(TestError.builder(this, Severity.WARNING, EMPTY_VALUES) 635 .message(tr("Tags with empty values"), s, key) 636 .primitives(p) 637 .build()); 638 withErrors.put(p, "EV"); 639 } 640 final String errTypeSpace = "SPACE"; 641 if ((value.startsWith(" ") || value.endsWith(" ")) && !withErrors.contains(p, errTypeSpace)) { 642 errors.add(TestError.builder(this, Severity.WARNING, INVALID_SPACE) 643 .message(tr("Property values start or end with white space"), s, key) 644 .primitives(p) 645 .build()); 646 withErrors.put(p, errTypeSpace); 647 } 648 if (value.contains(" ") && !withErrors.contains(p, errTypeSpace)) { 649 errors.add(TestError.builder(this, Severity.WARNING, MULTIPLE_SPACES) 650 .message(tr("Property values contain multiple white spaces"), s, key) 651 .primitives(p) 652 .build()); 653 withErrors.put(p, errTypeSpace); 654 } 655 if (!value.equals(Entities.unescape(value)) && !withErrors.contains(p, "HTML")) { 656 errors.add(TestError.builder(this, Severity.OTHER, INVALID_HTML) 657 .message(tr("Property values contain HTML entity"), s, key) 658 .primitives(p) 659 .build()); 660 withErrors.put(p, "HTML"); 661 } 662 } 663 664 private void checkSingleTagKeySimple(MultiMap<OsmPrimitive, String> withErrors, OsmPrimitive p, String s, String key) { 665 if (!checkKeys || key == null) 666 return; 667 if ((containsUnwantedNonPrintingControlCharacter(key)) && !withErrors.contains(p, "ICK")) { 668 errors.add(TestError.builder(this, Severity.WARNING, LOW_CHAR_KEY) 669 .message(tr("Tag key contains non-printing character"), s, key) 670 .primitives(p) 671 .fix(() -> new ChangePropertyCommand(p, key, removeUnwantedNonPrintingControlCharacters(key))) 672 .build()); 673 withErrors.put(p, "ICK"); 674 } 675 if (key.length() > Tagged.MAX_TAG_LENGTH && !withErrors.contains(p, "LK")) { 676 errors.add(TestError.builder(this, Severity.ERROR, LONG_KEY) 677 .message(tr("Tag key longer than {0} characters ({1} characters)", Tagged.MAX_TAG_LENGTH, key.length()), s, key) 678 .primitives(p) 679 .build()); 680 withErrors.put(p, "LK"); 681 } 682 if (key.indexOf(' ') >= 0 && !withErrors.contains(p, "IPK")) { 683 errors.add(TestError.builder(this, Severity.WARNING, INVALID_KEY_SPACE) 684 .message(tr("Invalid white space in property key"), s, key) 685 .primitives(p) 686 .build()); 687 withErrors.put(p, "IPK"); 688 } 689 } 690 691 private void checkSingleTagComplex(MultiMap<OsmPrimitive, String> withErrors, OsmPrimitive p, String key, String value) { 692 if (!checkValues || key == null || value == null || value.isEmpty()) 693 return; 694 if (additionalPresetsValueData != null && !isTagIgnored(key, value)) { 695 if (!isKeyInPresets(key)) { 696 spellCheckKey(withErrors, p, key); 697 } else if (!isTagInPresets(key, value)) { 698 if (oftenUsedTags.contains(key, value)) { 699 // tag is quite often used but not in presets 700 errors.add(TestError.builder(this, Severity.OTHER, INVALID_VALUE) 701 .message(tr("Presets do not contain property value"), 702 marktr("Value ''{0}'' for key ''{1}'' not in presets, but is known."), value, key) 703 .primitives(p) 704 .build()); 705 withErrors.put(p, "UPV"); 706 } else { 707 tryGuess(p, key, value, withErrors); 708 } 709 } 710 } 711 } 712 713 private void spellCheckKey(MultiMap<OsmPrimitive, String> withErrors, OsmPrimitive p, String key) { 714 String prettifiedKey = harmonizeKey(key); 715 String fixedKey; 716 if (ignoreDataEquals.contains(prettifiedKey)) { 717 fixedKey = prettifiedKey; 718 } else { 719 fixedKey = isKeyInPresets(prettifiedKey) ? prettifiedKey : harmonizedKeys.get(prettifiedKey); 720 } 721 if (fixedKey == null) { 722 for (Tag a : ignoreDataTag) { 723 if (a.getKey().equals(prettifiedKey)) { 724 fixedKey = prettifiedKey; 725 break; 726 } 727 } 728 } 729 730 if (fixedKey != null && !"".equals(fixedKey) && !fixedKey.equals(key)) { 731 final String proposedKey = fixedKey; 732 // misspelled preset key 733 final TestError.Builder error = TestError.builder(this, Severity.WARNING, MISSPELLED_KEY) 734 .message(tr("Misspelled property key"), marktr("Key ''{0}'' looks like ''{1}''."), key, proposedKey) 735 .primitives(p); 736 if (p.hasKey(fixedKey)) { 737 errors.add(error.build()); 738 } else { 739 errors.add(error.fix(() -> new ChangePropertyKeyCommand(p, key, proposedKey)).build()); 740 } 741 withErrors.put(p, "WPK"); 742 } else { 743 errors.add(TestError.builder(this, Severity.OTHER, INVALID_KEY) 744 .message(tr("Presets do not contain property key"), marktr("Key ''{0}'' not in presets."), key) 745 .primitives(p) 746 .build()); 747 withErrors.put(p, "UPK"); 748 } 749 } 750 751 private void tryGuess(OsmPrimitive p, String key, String value, MultiMap<OsmPrimitive, String> withErrors) { 752 // try to fix common typos and check again if value is still unknown 753 final String harmonizedValue = harmonizeValue(value); 754 if (harmonizedValue == null || harmonizedValue.isEmpty()) 755 return; 756 String fixedValue = null; 757 List<Set<String>> sets = new ArrayList<>(); 758 Set<String> presetValues = getPresetValues(key); 759 if (presetValues != null) 760 sets.add(presetValues); 761 Set<String> usedValues = oftenUsedTags.get(key); 762 if (usedValues != null) 763 sets.add(usedValues); 764 for (Set<String> possibleValues: sets) { 765 if (possibleValues.contains(harmonizedValue)) { 766 fixedValue = harmonizedValue; 767 break; 768 } 769 } 770 if (fixedValue == null && !ignoreForLevenshtein.contains(key)) { 771 int maxPresetValueLen = 0; 772 List<String> fixVals = new ArrayList<>(); 773 // use Levenshtein distance to find typical typos 774 int minDist = MAX_LEVENSHTEIN_DISTANCE + 1; 775 String closest = null; 776 for (Set<String> possibleValues: sets) { 777 for (String possibleVal : possibleValues) { 778 if (possibleVal.isEmpty()) 779 continue; 780 maxPresetValueLen = Math.max(maxPresetValueLen, possibleVal.length()); 781 if (harmonizedValue.length() < 3 && possibleVal.length() >= harmonizedValue.length() + MAX_LEVENSHTEIN_DISTANCE) { 782 // don't suggest fix value when given value is short and lengths are too different 783 // for example surface=u would result in surface=mud 784 continue; 785 } 786 int dist = Utils.getLevenshteinDistance(possibleVal, harmonizedValue); 787 if (dist >= harmonizedValue.length()) { 788 // short value, all characters are different. Don't warn, might say Value '10' for key 'fee' looks like 'no'. 789 continue; 790 } 791 if (dist < minDist) { 792 closest = possibleVal; 793 minDist = dist; 794 fixVals.clear(); 795 fixVals.add(possibleVal); 796 } else if (dist == minDist) { 797 fixVals.add(possibleVal); 798 } 799 } 800 } 801 802 if (minDist <= MAX_LEVENSHTEIN_DISTANCE && maxPresetValueLen > MAX_LEVENSHTEIN_DISTANCE 803 && (harmonizedValue.length() > 3 || minDist < MAX_LEVENSHTEIN_DISTANCE)) { 804 if (fixVals.size() < 2) { 805 fixedValue = closest; 806 } else { 807 Collections.sort(fixVals); 808 // misspelled preset value with multiple good alternatives 809 errors.add(TestError.builder(this, Severity.WARNING, MISSPELLED_VALUE_NO_FIX) 810 .message(tr("Unknown property value"), 811 marktr("Value ''{0}'' for key ''{1}'' is unknown, maybe one of {2} is meant?"), 812 value, key, fixVals) 813 .primitives(p).build()); 814 withErrors.put(p, "WPV"); 815 return; 816 } 817 } 818 } 819 if (fixedValue != null && !fixedValue.equals(value)) { 820 final String newValue = fixedValue; 821 // misspelled preset value 822 errors.add(TestError.builder(this, Severity.WARNING, MISSPELLED_VALUE) 823 .message(tr("Unknown property value"), 824 marktr("Value ''{0}'' for key ''{1}'' is unknown, maybe ''{2}'' is meant?"), value, key, newValue) 825 .primitives(p) 826 .build()); 827 withErrors.put(p, "WPV"); 828 } else { 829 // unknown preset value 830 errors.add(TestError.builder(this, Severity.OTHER, INVALID_VALUE) 831 .message(tr("Presets do not contain property value"), 832 marktr("Value ''{0}'' for key ''{1}'' not in presets."), value, key) 833 .primitives(p) 834 .build()); 835 withErrors.put(p, "UPV"); 836 } 837 } 838 839 private static boolean isNum(String harmonizedValue) { 840 try { 841 Double.parseDouble(harmonizedValue); 842 return true; 843 } catch (NumberFormatException e) { 844 return false; 845 } 846 } 847 848 private static boolean isFixme(String key, String value) { 849 return key.toLowerCase(Locale.ENGLISH).contains("fixme") || key.contains("todo") 850 || value.toLowerCase(Locale.ENGLISH).contains("fixme") || value.contains("check and delete"); 851 } 852 853 private static String harmonizeKey(String key) { 854 return Utils.strip(key.toLowerCase(Locale.ENGLISH).replace('-', '_').replace(':', '_').replace(' ', '_'), "-_;:,"); 855 } 856 857 private static String harmonizeValue(String value) { 858 return Utils.strip(value.toLowerCase(Locale.ENGLISH).replace('-', '_').replace(' ', '_'), "-_;:,"); 859 } 860 861 @Override 862 public void startTest(ProgressMonitor monitor) { 863 super.startTest(monitor); 864 checkKeys = Config.getPref().getBoolean(PREF_CHECK_KEYS, true); 865 if (isBeforeUpload) { 866 checkKeys = checkKeys && Config.getPref().getBoolean(PREF_CHECK_KEYS_BEFORE_UPLOAD, true); 867 } 868 869 checkValues = Config.getPref().getBoolean(PREF_CHECK_VALUES, true); 870 if (isBeforeUpload) { 871 checkValues = checkValues && Config.getPref().getBoolean(PREF_CHECK_VALUES_BEFORE_UPLOAD, true); 872 } 873 874 checkComplex = Config.getPref().getBoolean(PREF_CHECK_COMPLEX, true); 875 if (isBeforeUpload) { 876 checkComplex = checkComplex && Config.getPref().getBoolean(PREF_CHECK_COMPLEX_BEFORE_UPLOAD, true); 877 } 878 879 checkFixmes = Config.getPref().getBoolean(PREF_CHECK_FIXMES, true); 880 if (isBeforeUpload) { 881 checkFixmes = checkFixmes && Config.getPref().getBoolean(PREF_CHECK_FIXMES_BEFORE_UPLOAD, true); 882 } 883 } 884 885 @Override 886 public void visit(Collection<OsmPrimitive> selection) { 887 if (checkKeys || checkValues || checkComplex || checkFixmes) { 888 super.visit(selection); 889 } 890 } 891 892 @Override 893 public void addGui(JPanel testPanel) { 894 GBC a = GBC.eol(); 895 a.anchor = GridBagConstraints.EAST; 896 897 testPanel.add(new JLabel(name+" :"), GBC.eol().insets(3, 0, 0, 0)); 898 899 prefCheckKeys = new JCheckBox(tr("Check property keys."), Config.getPref().getBoolean(PREF_CHECK_KEYS, true)); 900 prefCheckKeys.setToolTipText(tr("Validate that property keys are valid checking against list of words.")); 901 testPanel.add(prefCheckKeys, GBC.std().insets(20, 0, 0, 0)); 902 903 prefCheckKeysBeforeUpload = new JCheckBox(); 904 prefCheckKeysBeforeUpload.setSelected(Config.getPref().getBoolean(PREF_CHECK_KEYS_BEFORE_UPLOAD, true)); 905 testPanel.add(prefCheckKeysBeforeUpload, a); 906 907 prefCheckComplex = new JCheckBox(tr("Use complex property checker."), Config.getPref().getBoolean(PREF_CHECK_COMPLEX, true)); 908 prefCheckComplex.setToolTipText(tr("Validate property values and tags using complex rules.")); 909 testPanel.add(prefCheckComplex, GBC.std().insets(20, 0, 0, 0)); 910 911 prefCheckComplexBeforeUpload = new JCheckBox(); 912 prefCheckComplexBeforeUpload.setSelected(Config.getPref().getBoolean(PREF_CHECK_COMPLEX_BEFORE_UPLOAD, true)); 913 testPanel.add(prefCheckComplexBeforeUpload, a); 914 915 final Collection<String> sources = Config.getPref().getList(PREF_SOURCES, DEFAULT_SOURCES); 916 sourcesList = new EditableList(tr("TagChecker source")); 917 sourcesList.setItems(sources); 918 testPanel.add(new JLabel(tr("Data sources ({0})", "*.cfg")), GBC.eol().insets(23, 0, 0, 0)); 919 testPanel.add(sourcesList, GBC.eol().fill(GridBagConstraints.HORIZONTAL).insets(23, 0, 0, 0)); 920 921 ActionListener disableCheckActionListener = e -> handlePrefEnable(); 922 prefCheckKeys.addActionListener(disableCheckActionListener); 923 prefCheckKeysBeforeUpload.addActionListener(disableCheckActionListener); 924 prefCheckComplex.addActionListener(disableCheckActionListener); 925 prefCheckComplexBeforeUpload.addActionListener(disableCheckActionListener); 926 927 handlePrefEnable(); 928 929 prefCheckValues = new JCheckBox(tr("Check property values."), Config.getPref().getBoolean(PREF_CHECK_VALUES, true)); 930 prefCheckValues.setToolTipText(tr("Validate that property values are valid checking against presets.")); 931 testPanel.add(prefCheckValues, GBC.std().insets(20, 0, 0, 0)); 932 933 prefCheckValuesBeforeUpload = new JCheckBox(); 934 prefCheckValuesBeforeUpload.setSelected(Config.getPref().getBoolean(PREF_CHECK_VALUES_BEFORE_UPLOAD, true)); 935 testPanel.add(prefCheckValuesBeforeUpload, a); 936 937 prefCheckFixmes = new JCheckBox(tr("Check for FIXMES."), Config.getPref().getBoolean(PREF_CHECK_FIXMES, true)); 938 prefCheckFixmes.setToolTipText(tr("Looks for nodes or ways with FIXME in any property value.")); 939 testPanel.add(prefCheckFixmes, GBC.std().insets(20, 0, 0, 0)); 940 941 prefCheckFixmesBeforeUpload = new JCheckBox(); 942 prefCheckFixmesBeforeUpload.setSelected(Config.getPref().getBoolean(PREF_CHECK_FIXMES_BEFORE_UPLOAD, true)); 943 testPanel.add(prefCheckFixmesBeforeUpload, a); 944 } 945 946 /** 947 * Enables/disables the source list field 948 */ 949 public void handlePrefEnable() { 950 boolean selected = prefCheckKeys.isSelected() || prefCheckKeysBeforeUpload.isSelected() 951 || prefCheckComplex.isSelected() || prefCheckComplexBeforeUpload.isSelected(); 952 sourcesList.setEnabled(selected); 953 } 954 955 @Override 956 public boolean ok() { 957 enabled = prefCheckKeys.isSelected() || prefCheckValues.isSelected() || prefCheckComplex.isSelected() || prefCheckFixmes.isSelected(); 958 testBeforeUpload = prefCheckKeysBeforeUpload.isSelected() || prefCheckValuesBeforeUpload.isSelected() 959 || prefCheckFixmesBeforeUpload.isSelected() || prefCheckComplexBeforeUpload.isSelected(); 960 961 Config.getPref().putBoolean(PREF_CHECK_VALUES, prefCheckValues.isSelected()); 962 Config.getPref().putBoolean(PREF_CHECK_COMPLEX, prefCheckComplex.isSelected()); 963 Config.getPref().putBoolean(PREF_CHECK_KEYS, prefCheckKeys.isSelected()); 964 Config.getPref().putBoolean(PREF_CHECK_FIXMES, prefCheckFixmes.isSelected()); 965 Config.getPref().putBoolean(PREF_CHECK_VALUES_BEFORE_UPLOAD, prefCheckValuesBeforeUpload.isSelected()); 966 Config.getPref().putBoolean(PREF_CHECK_COMPLEX_BEFORE_UPLOAD, prefCheckComplexBeforeUpload.isSelected()); 967 Config.getPref().putBoolean(PREF_CHECK_KEYS_BEFORE_UPLOAD, prefCheckKeysBeforeUpload.isSelected()); 968 Config.getPref().putBoolean(PREF_CHECK_FIXMES_BEFORE_UPLOAD, prefCheckFixmesBeforeUpload.isSelected()); 969 return Config.getPref().putList(PREF_SOURCES, sourcesList.getItems()); 970 } 971 972 @Override 973 public Command fixError(TestError testError) { 974 List<Command> commands = new ArrayList<>(50); 975 976 Collection<? extends OsmPrimitive> primitives = testError.getPrimitives(); 977 for (OsmPrimitive p : primitives) { 978 Map<String, String> tags = p.getKeys(); 979 if (tags.isEmpty()) { 980 continue; 981 } 982 983 for (Entry<String, String> prop: tags.entrySet()) { 984 String key = prop.getKey(); 985 String value = prop.getValue(); 986 if (value == null || value.trim().isEmpty()) { 987 commands.add(new ChangePropertyCommand(p, key, null)); 988 } else if (value.startsWith(" ") || value.endsWith(" ") || value.contains(" ")) { 989 commands.add(new ChangePropertyCommand(p, key, Utils.removeWhiteSpaces(value))); 990 } else if (key.startsWith(" ") || key.endsWith(" ") || key.contains(" ")) { 991 commands.add(new ChangePropertyKeyCommand(p, key, Utils.removeWhiteSpaces(key))); 992 } else { 993 String evalue = Entities.unescape(value); 994 if (!evalue.equals(value)) { 995 commands.add(new ChangePropertyCommand(p, key, evalue)); 996 } 997 } 998 } 999 } 1000 1001 if (commands.isEmpty()) 1002 return null; 1003 if (commands.size() == 1) 1004 return commands.get(0); 1005 1006 return new SequenceCommand(tr("Fix tags"), commands); 1007 } 1008 1009 @Override 1010 public boolean isFixable(TestError testError) { 1011 if (testError.getTester() instanceof TagChecker) { 1012 int code = testError.getCode(); 1013 return code == EMPTY_VALUES || code == INVALID_SPACE || 1014 code == INVALID_KEY_SPACE || code == INVALID_HTML || 1015 code == MULTIPLE_SPACES; 1016 } 1017 1018 return false; 1019 } 1020 1021 @Override 1022 public void taggingPresetsModified() { 1023 try { 1024 initializeData(); 1025 initializePresets(); 1026 analysePresets(); 1027 } catch (IOException e) { 1028 Logging.error(e); 1029 } 1030 } 1031}