001//////////////////////////////////////////////////////////////////////////////// 002// checkstyle: Checks Java source code for adherence to a set of rules. 003// Copyright (C) 2001-2019 the original author or authors. 004// 005// This library is free software; you can redistribute it and/or 006// modify it under the terms of the GNU Lesser General Public 007// License as published by the Free Software Foundation; either 008// version 2.1 of the License, or (at your option) any later version. 009// 010// This library is distributed in the hope that it will be useful, 011// but WITHOUT ANY WARRANTY; without even the implied warranty of 012// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 013// Lesser General Public License for more details. 014// 015// You should have received a copy of the GNU Lesser General Public 016// License along with this library; if not, write to the Free Software 017// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 018//////////////////////////////////////////////////////////////////////////////// 019 020package com.puppycrawl.tools.checkstyle.checks.regexp; 021 022import java.util.regex.Matcher; 023import java.util.regex.Pattern; 024 025import com.puppycrawl.tools.checkstyle.FileStatefulCheck; 026import com.puppycrawl.tools.checkstyle.api.AbstractCheck; 027import com.puppycrawl.tools.checkstyle.api.DetailAST; 028import com.puppycrawl.tools.checkstyle.api.FileContents; 029import com.puppycrawl.tools.checkstyle.api.FileText; 030import com.puppycrawl.tools.checkstyle.api.LineColumn; 031import com.puppycrawl.tools.checkstyle.utils.CommonUtil; 032 033/** 034 * <p> 035 * A check that makes sure that a specified pattern exists, exists less than 036 * a set number of times, or does not exist in the file. 037 * </p> 038 * <p> 039 * This check combines all the functionality provided by 040 * <a href="https://checkstyle.org/config_header.html#RegexpHeader">RegexpHeader</a> 041 * except supplying the regular expression from a file. 042 * </p> 043 * <p> 044 * It differs from them in that it works in multiline mode. Its regular expression 045 * can span multiple lines and it checks this against the whole file at once. 046 * The others work in singleline mode. Their single or multiple regular expressions 047 * can only span one line. They check each of these against each line in the file in turn. 048 * </p> 049 * <p> 050 * <b>Note:</b> Because of the different mode of operation there may be some 051 * changes in the regular expressions used to achieve a particular end. 052 * </p> 053 * <p> 054 * In multiline mode... 055 * </p> 056 * <ul> 057 * <li> 058 * {@code ^} means the beginning of a line, as opposed to beginning of the input. 059 * </li> 060 * <li> 061 * For beginning of the input use {@code \A}. 062 * </li> 063 * <li> 064 * {@code $} means the end of a line, as opposed to the end of the input. 065 * </li> 066 * <li> 067 * For end of input use {@code \Z}. 068 * </li> 069 * <li> 070 * Each line in the file is terminated with a line feed character. 071 * </li> 072 * </ul> 073 * <p> 074 * <b>Note:</b> Not all regular expression engines are created equal. 075 * Some provide extra functions that others do not and some elements 076 * of the syntax may vary. This check makes use of the 077 * <a href="https://docs.oracle.com/en/java/javase/11/docs/api/java.base/java/util/regex/package-summary.html"> 078 * java.util.regex package</a>; please check its documentation for details 079 * of how to construct a regular expression to achieve a particular goal. 080 * </p> 081 * <p> 082 * <b>Note:</b> When entering a regular expression as a parameter in 083 * the XML config file you must also take into account the XML rules. e.g. 084 * if you want to match a < symbol you need to enter &lt;. 085 * The regular expression should be entered on one line. 086 * </p> 087 * <ul> 088 * <li> 089 * Property {@code format} - Specify the pattern to match against. 090 * Default value is {@code "^$"}(empty). 091 * </li> 092 * <li> 093 * Property {@code message} - Specify message which is used to notify about 094 * violations, if empty then the default (hard-coded) message is used. 095 * Default value is {@code null}. 096 * </li> 097 * <li> 098 * Property {@code illegalPattern} - Control whether the pattern is required or illegal. 099 * Default value is {@code false}. 100 * </li> 101 * <li> 102 * Property {@code duplicateLimit} - Control whether to check for duplicates 103 * of a required pattern, any negative value means no checking for duplicates, 104 * any positive value is used as the maximum number of allowed duplicates, 105 * if the limit is exceeded violations will be logged. 106 * Default value is {@code 0}. 107 * </li> 108 * <li> 109 * Property {@code errorLimit} - Specify the maximum number of violations before 110 * the check will abort. 111 * Default value is {@code 100}. 112 * </li> 113 * <li> 114 * Property {@code ignoreComments} - Control whether to ignore matches found within comments. 115 * Default value is {@code false}. 116 * </li> 117 * </ul> 118 * <p> 119 * The following examples are mainly copied from the other 3 checks mentioned above, 120 * to show how the same results can be achieved using this check in place of them. 121 * </p> 122 * <p> 123 * <b>To use like Required Regexp check:</b> 124 * </p> 125 * <p> 126 * An example of how to configure the check to make sure a copyright statement 127 * is included in the file: 128 * </p> 129 * <p> 130 * The statement. 131 * </p> 132 * <pre> 133 * // This code is copyrighted 134 * </pre> 135 * <p> 136 * The check. 137 * </p> 138 * <pre> 139 * <module name="Regexp"> 140 * <property name="format" value="// This code is copyrighted"/> 141 * </module> 142 * </pre> 143 * <p> 144 * Your statement may be multiline. 145 * </p> 146 * <pre> 147 * // This code is copyrighted 148 * // (c) MyCompany 149 * </pre> 150 * <p> 151 * Then the check would be. 152 * </p> 153 * <pre> 154 * <module name="Regexp"> 155 * <property name="format" value="// This code is copyrighted\n// \(c\) MyCompany"/> 156 * </module> 157 * </pre> 158 * <p> 159 * <b>Note:</b> To search for parentheses () in a regular expression you must 160 * escape them like \(\). This is required by the regexp engine, otherwise it will 161 * think they are special instruction characters. 162 * </p> 163 * <p> 164 * And to make sure it appears only once: 165 * </p> 166 * <pre> 167 * <module name="Regexp"> 168 * <property name="format" value="// This code is copyrighted\n// \(c\) MyCompany"/> 169 * <property name="duplicateLimit" value="0"/> 170 * </module> 171 * </pre> 172 * <p> 173 * It can also be useful to attach a meaningful message to the check: 174 * </p> 175 * <pre> 176 * <module name="Regexp"> 177 * <property name="format" value="// This code is copyrighted\n// \(c\) MyCompany"/> 178 * <property name="message" value="Copyright"/> 179 * </module> 180 * </pre> 181 * <p> 182 * <b>To use like illegal regexp check:</b> 183 * </p> 184 * <p> 185 * An example of how to configure the check to make sure there are no calls to 186 * {@code System.out.println}: 187 * </p> 188 * <pre> 189 * <module name="Regexp"> 190 * <!-- . matches any character, so we need to escape it and use \. to match dots. --> 191 * <property name="format" value="System\.out\.println"/> 192 * <property name="illegalPattern" value="true"/> 193 * </module> 194 * </pre> 195 * <p> 196 * You may want to make the above check ignore comments, like this: 197 * </p> 198 * <pre> 199 * <module name="Regexp"> 200 * <property name="format" value="System\.out\.println"/> 201 * <property name="illegalPattern" value="true"/> 202 * <property name="ignoreComments" value="true"/> 203 * </module> 204 * </pre> 205 * <p> 206 * An example of how to configure the check to find trailing whitespace at the end of a line: 207 * </p> 208 * <pre> 209 * <module name="Regexp"> 210 * <property name="format" value="[ \t]+$"/> 211 * <property name="illegalPattern" value="true"/> 212 * <property name="message" value="Trailing whitespace"/> 213 * </module> 214 * </pre> 215 * <p> 216 * An example of how to configure the check to find case-insensitive occurrences of "debug": 217 * </p> 218 * <pre> 219 * <module name="Regexp"> 220 * <property name="format" value="(?i)debug"/> 221 * <property name="illegalPattern" value="true"/> 222 * </module> 223 * </pre> 224 * <p> 225 * <b>Note:</b> The (?i) at the beginning of the regular expression tells the 226 * regexp engine to ignore the case. 227 * </p> 228 * <p> 229 * There is also a feature to limit the number of violations reported. 230 * When the limit is reached the check aborts with a message reporting that 231 * the limit has been reached. The default limit setting is 100, 232 * but this can be change as shown in the following example. 233 * </p> 234 * <pre> 235 * <module name="Regexp"> 236 * <property name="format" value="(?i)debug"/> 237 * <property name="illegalPattern" value="true"/> 238 * <property name="errorLimit" value="1000"/> 239 * </module> 240 * </pre> 241 * <p> 242 * <b>To use like <a href="https://checkstyle.org/config_header.html#RegexpHeader"> 243 * RegexpHeader</a>:</b> 244 * </p> 245 * <p> 246 * To configure the check to verify that each file starts with the following multiline header. 247 * </p> 248 * <p> 249 * Note the following: 250 * </p> 251 * <ul> 252 * <li> 253 * \A means the start of the file. 254 * </li> 255 * <li> 256 * The date can be any 4 digit number. 257 * </li> 258 * </ul> 259 * <pre> 260 * // Copyright (C) 2004 MyCompany 261 * // All rights reserved 262 * </pre> 263 * <pre> 264 * <module name="Regexp"> 265 * <property 266 * name="format" 267 * value="\A// Copyright \(C\) \d\d\d\d MyCompany\n// All rights reserved"/> 268 * </module> 269 * </pre> 270 * <p> 271 * A more complex example. Note how the import and javadoc multilines are handled, 272 * there can be any number of them. 273 * </p> 274 * <pre> 275 * /////////////////////////////////////////////////////////////////////// 276 * // checkstyle: 277 * // Checks Java source code for adherence to a set of rules. 278 * // Copyright (C) 2004 Oliver Burn 279 * // Last modification by $Author A.N.Other$ 280 * /////////////////////////////////////////////////////////////////////// 281 * 282 * package com.puppycrawl.checkstyle; 283 * 284 * import java.util.thing1; 285 * import java.util.thing2; 286 * import java.util.thing3; 287 * 288 * /** 289 * * javadoc line 1 290 * * javadoc line 2 291 * * javadoc line 3 292 * */ 293 * </pre> 294 * <pre> 295 * <module name="Regexp"> 296 * <property 297 * name="format" 298 * value="\A/{71}\n// checkstyle:\n// Checks Java source code for 299 * adherence to a set of rules\.\n// Copyright \(C\) \d\d\d\d Oliver Burn\n 300 * // Last modification by \$Author.*\$\n/{71}\n\npackage [\w\.]*;\n\n 301 * (import [\w\.]*;\n)*\n/\*\*\n( \*[^/]*\n)* \*/"/> 302 * </module> 303 * </pre> 304 * <p> 305 * <b>More examples:</b> 306 * </p> 307 * <p> 308 * The next 2 examples deal with the following example Java source file: 309 * </p> 310 * <pre> 311 * /* 312 * * PID.java 313 * * 314 * * Copyright (c) 2001 ACME 315 * * 123 Some St. 316 * * Somewhere. 317 * * 318 * * This software is the confidential and proprietary information of ACME. 319 * * ("Confidential Information"). You shall not disclose such 320 * * Confidential Information and shall use it only in accordance with 321 * * the terms of the license agreement you entered into with ACME. 322 * * 323 * * $Log: config_misc.xml,v $ 324 * * Revision 1.7 2007/01/16 12:16:35 oburn 325 * * Removing all reference to mailing lists 326 * * 327 * * Revision 1.6 2005/12/25 16:13:10 o_sukhodolsky 328 * * Fix for rfe 1248106 (TYPECAST is now accepted by NoWhitespaceAfter) 329 * * 330 * * Fix for rfe 953266 (thanks to Paul Guyot (pguyot) for submitting patch) 331 * * IllegalType can be configured to accept some abstract classes which 332 * * matches to regexp of illegal type names (property legalAbstractClassNames) 333 * * 334 * * TrailingComment now can be configured to accept some trailing comments 335 * * (such as NOI18N) (property legalComment, rfe 1385344). 336 * * 337 * * Revision 1.5 2005/11/06 11:54:12 oburn 338 * * Incorporate excellent patch [ 1344344 ] Consolidation of regexp checks. 339 * * 340 * * Revision 1.3.8.1 2005/10/11 14:26:32 someone 341 * * Fix for bug 251. The broken bit is fixed 342 * */ 343 * 344 * package com.acme.tools; 345 * 346 * import com.acme.thing1; 347 * import com.acme.thing2; 348 * import com.acme.thing3; 349 * 350 * /** 351 * * 352 * * <P> 353 * * <I>This software is the confidential and proprietary information of 354 * * ACME (<B>"Confidential Information"</B>). You shall not 355 * * disclose such Confidential Information and shall use it only in 356 * * accordance with the terms of the license agreement you entered into 357 * * with ACME.</I> 358 * * </P> 359 * * 360 * * &#169; copyright 2002 ACME 361 * * 362 * * @author Some Body 363 * */ 364 * public class PID extends StateMachine implements WebObject.Constants { 365 * 366 * /** javadoc. */ 367 * public static final int A_SETPOINT = 1; 368 * . 369 * . 370 * . 371 * } // class PID 372 * </pre> 373 * <p> 374 * This checks for the presence of the header, the first 16 lines. 375 * </p> 376 * <p> 377 * Note the following: 378 * </p> 379 * <ul> 380 * <li> 381 * Line 2 and 13 contain the file name. These are checked to make sure they 382 * are the same, and that they match the class name. 383 * </li> 384 * <li> 385 * The date can be any 4 digit number. 386 * </li> 387 * </ul> 388 * <pre> 389 * <module name="Regexp"> 390 * <property 391 * name="format" 392 * value="\A/\*\n \* (\w*)\.java\n \*\n \* Copyright \(c\) 393 * \d\d\d\d ACME\n \* 123 Some St\.\n \* Somewhere\.\n \*\n 394 * \* This software is the confidential and proprietary information 395 * of ACME\.\n \* \(&quot;Confidential Information&quot;\)\. You 396 * shall not disclose such\n \* Confidential Information and shall 397 * use it only in accordance with\n \* the terms of the license 398 * agreement you entered into with ACME\.\n \*\n 399 * \* \$Log: config_misc\.xml,v $ 400 * \* Revision 1\.7 2007/01/16 12:16:35 oburn 401 * \* Removing all reference to mailing lists 402 * \* \ 403 * \* Revision 1.6 2005/12/25 16:13:10 o_sukhodolsky 404 * \* Fix for rfe 1248106 \(TYPECAST is now accepted by NoWhitespaceAfter\) 405 * \* \ 406 * \* Fix for rfe 953266 \(thanks to Paul Guyot \(pguyot\) for submitting patch\) 407 * \* IllegalType can be configured to accept some abstract classes which 408 * \* matches to regexp of illegal type names \(property legalAbstractClassNames\) 409 * \* 410 * \* TrailingComment now can be configured to accept some trailing comments 411 * \* \(such as NOI18N\) \(property legalComment, rfe 1385344\). 412 * \* 413 * \* Revision 1.5 2005/11/06 11:54:12 oburn 414 * \* Incorporate excellent patch \[ 1344344 \] Consolidation of regexp checks. 415 * \* \\n(.*\n)*([\w|\s]*( class | interface )\1)"/> 416 * <property name="message" value="Correct header not found"/> 417 * </module> 418 * </pre> 419 * <p> 420 * This checks for the presence of a copyright notice within the class javadoc, lines 24 to 37. 421 * </p> 422 * <pre> 423 * <module name="Regexp"> 424 * <property 425 * name="format" 426 * value="(/\*\*\n)( \*.*\n)*( \* <P>\n \* <I> 427 * This software is the confidential and proprietary information of\n 428 * \* ACME \(<B>&quot;Confidential Information&quot;</B> 429 * \)\. You shall not\n \* disclose such Confidential Information 430 * and shall use it only in\n \* accordance with the terms of the 431 * license agreement you entered into\n \* with ACME\.</I>\n 432 * \* </P>\n \*\n \* &#169; copyright \d\d\d\d ACME\n 433 * \*\n \* @author .*)(\n\s\*.*)*/\n[\w|\s]*( class | interface )"/> 434 * <property name="message" 435 * value="Copyright in class/interface Javadoc"/> 436 * <property name="duplicateLimit" value="0"/> 437 * </module> 438 * </pre> 439 * <p> 440 * <b>Note:</b> To search for things that mean something in XML, like < 441 * you need to escape them like &lt;. This is required so the XML parser 442 * does not act on them, but instead passes the correct character to the regexp engine. 443 * </p> 444 * 445 * @since 4.0 446 */ 447@FileStatefulCheck 448public class RegexpCheck extends AbstractCheck { 449 450 /** 451 * A key is pointing to the warning message text in "messages.properties" 452 * file. 453 */ 454 public static final String MSG_ILLEGAL_REGEXP = "illegal.regexp"; 455 456 /** 457 * A key is pointing to the warning message text in "messages.properties" 458 * file. 459 */ 460 public static final String MSG_REQUIRED_REGEXP = "required.regexp"; 461 462 /** 463 * A key is pointing to the warning message text in "messages.properties" 464 * file. 465 */ 466 public static final String MSG_DUPLICATE_REGEXP = "duplicate.regexp"; 467 468 /** Default duplicate limit. */ 469 private static final int DEFAULT_DUPLICATE_LIMIT = -1; 470 471 /** Default error report limit. */ 472 private static final int DEFAULT_ERROR_LIMIT = 100; 473 474 /** Error count exceeded message. */ 475 private static final String ERROR_LIMIT_EXCEEDED_MESSAGE = 476 "The error limit has been exceeded, " 477 + "the check is aborting, there may be more unreported errors."; 478 479 /** 480 * Specify message which is used to notify about violations, 481 * if empty then the default (hard-coded) message is used. 482 */ 483 private String message; 484 485 /** Control whether to ignore matches found within comments. */ 486 private boolean ignoreComments; 487 488 /** Control whether the pattern is required or illegal. */ 489 private boolean illegalPattern; 490 491 /** Specify the maximum number of violations before the check will abort. */ 492 private int errorLimit = DEFAULT_ERROR_LIMIT; 493 494 /** 495 * Control whether to check for duplicates of a required pattern, 496 * any negative value means no checking for duplicates, 497 * any positive value is used as the maximum number of allowed duplicates, 498 * if the limit is exceeded violations will be logged. 499 */ 500 private int duplicateLimit; 501 502 /** Boolean to say if we should check for duplicates. */ 503 private boolean checkForDuplicates; 504 505 /** Tracks number of matches made. */ 506 private int matchCount; 507 508 /** Tracks number of errors. */ 509 private int errorCount; 510 511 /** Specify the pattern to match against. */ 512 private Pattern format = Pattern.compile("^$", Pattern.MULTILINE); 513 514 /** The matcher. */ 515 private Matcher matcher; 516 517 /** 518 * Setter to specify message which is used to notify about violations, 519 * if empty then the default (hard-coded) message is used. 520 * 521 * @param message custom message which should be used in report. 522 */ 523 public void setMessage(String message) { 524 if (message == null) { 525 this.message = ""; 526 } 527 else { 528 this.message = message; 529 } 530 } 531 532 /** 533 * Setter to control whether to ignore matches found within comments. 534 * 535 * @param ignoreComments True if comments should be ignored. 536 */ 537 public void setIgnoreComments(boolean ignoreComments) { 538 this.ignoreComments = ignoreComments; 539 } 540 541 /** 542 * Setter to control whether the pattern is required or illegal. 543 * 544 * @param illegalPattern True if pattern is not allowed. 545 */ 546 public void setIllegalPattern(boolean illegalPattern) { 547 this.illegalPattern = illegalPattern; 548 } 549 550 /** 551 * Setter to specify the maximum number of violations before the check will abort. 552 * 553 * @param errorLimit the number of errors to report. 554 */ 555 public void setErrorLimit(int errorLimit) { 556 this.errorLimit = errorLimit; 557 } 558 559 /** 560 * Setter to control whether to check for duplicates of a required pattern, 561 * any negative value means no checking for duplicates, 562 * any positive value is used as the maximum number of allowed duplicates, 563 * if the limit is exceeded violations will be logged. 564 * 565 * @param duplicateLimit negative values mean no duplicate checking, 566 * any positive value is used as the limit. 567 */ 568 public void setDuplicateLimit(int duplicateLimit) { 569 this.duplicateLimit = duplicateLimit; 570 checkForDuplicates = duplicateLimit > DEFAULT_DUPLICATE_LIMIT; 571 } 572 573 /** 574 * Setter to specify the pattern to match against. 575 * 576 * @param pattern the new pattern 577 */ 578 public final void setFormat(Pattern pattern) { 579 format = CommonUtil.createPattern(pattern.pattern(), Pattern.MULTILINE); 580 } 581 582 @Override 583 public int[] getDefaultTokens() { 584 return getRequiredTokens(); 585 } 586 587 @Override 588 public int[] getAcceptableTokens() { 589 return getRequiredTokens(); 590 } 591 592 @Override 593 public int[] getRequiredTokens() { 594 return CommonUtil.EMPTY_INT_ARRAY; 595 } 596 597 @Override 598 public void beginTree(DetailAST rootAST) { 599 matcher = format.matcher(getFileContents().getText().getFullText()); 600 matchCount = 0; 601 errorCount = 0; 602 findMatch(); 603 } 604 605 /** Recursive method that finds the matches. */ 606 private void findMatch() { 607 final boolean foundMatch = matcher.find(); 608 if (foundMatch) { 609 final FileText text = getFileContents().getText(); 610 final LineColumn start = text.lineColumn(matcher.start()); 611 final int startLine = start.getLine(); 612 613 final boolean ignore = isIgnore(startLine, text, start); 614 615 if (!ignore) { 616 matchCount++; 617 if (illegalPattern || checkForDuplicates 618 && matchCount - 1 > duplicateLimit) { 619 errorCount++; 620 logMessage(startLine); 621 } 622 } 623 if (canContinueValidation(ignore)) { 624 findMatch(); 625 } 626 } 627 else if (!illegalPattern && matchCount == 0) { 628 logMessage(0); 629 } 630 } 631 632 /** 633 * Check if we can stop validation. 634 * @param ignore flag 635 * @return true is we can continue 636 */ 637 private boolean canContinueValidation(boolean ignore) { 638 return errorCount <= errorLimit - 1 639 && (ignore || illegalPattern || checkForDuplicates); 640 } 641 642 /** 643 * Detect ignore situation. 644 * @param startLine position of line 645 * @param text file text 646 * @param start line column 647 * @return true is that need to be ignored 648 */ 649 private boolean isIgnore(int startLine, FileText text, LineColumn start) { 650 final LineColumn end; 651 if (matcher.end() == 0) { 652 end = text.lineColumn(0); 653 } 654 else { 655 end = text.lineColumn(matcher.end() - 1); 656 } 657 boolean ignore = false; 658 if (ignoreComments) { 659 final FileContents theFileContents = getFileContents(); 660 final int startColumn = start.getColumn(); 661 final int endLine = end.getLine(); 662 final int endColumn = end.getColumn(); 663 ignore = theFileContents.hasIntersectionWithComment(startLine, 664 startColumn, endLine, endColumn); 665 } 666 return ignore; 667 } 668 669 /** 670 * Displays the right message. 671 * @param lineNumber the line number the message relates to. 672 */ 673 private void logMessage(int lineNumber) { 674 String msg; 675 676 if (message == null || message.isEmpty()) { 677 msg = format.pattern(); 678 } 679 else { 680 msg = message; 681 } 682 683 if (errorCount >= errorLimit) { 684 msg = ERROR_LIMIT_EXCEEDED_MESSAGE + msg; 685 } 686 687 if (illegalPattern) { 688 log(lineNumber, MSG_ILLEGAL_REGEXP, msg); 689 } 690 else { 691 if (lineNumber > 0) { 692 log(lineNumber, MSG_DUPLICATE_REGEXP, msg); 693 } 694 else { 695 log(lineNumber, MSG_REQUIRED_REGEXP, msg); 696 } 697 } 698 } 699 700}