Regular Expression Java Class
This Java class can be used to work with regular expressions. First, you create the object:String strExpression = "^4\d{3}([\ \-]?)\d{4}\1\d{4}\1\d{4}$";
boolean caseSensitive = false;
RegularExpression expression = new RegularExpression(strExpression, caseSensitive);
Then, to actually use the expression, you call the search method and other methods to get the results:
String potentialCC = "4387-7522-2222-2227";
expression.search();
int numMatches = expression.numMatches();
for (int j=0; j<numMatches; j++) {
String indMatch = expression.getMatch(j);
System.out.println(indMatch);
}
When you're all done, you can free up memory with the object, since Regular Expressions can be memory hogs:
expression.eraseMatches();
and
expression.cleanup();
There's also methods to help with logging, like if you want to print out the expression string:
System.out.println(expression.getExpression());
and help if you're running out of Java Heap Space
System.out.println(expression.getHeapSpace());
Here's the code:
package com.breakingpar.regexp;
import java.util.Vector;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class RegularExpression {
/* Miscellaneous string constants */
private static final String EMPTY_STRING = "";
/* Global fields */
private Vector<String> matches = new Vector<String>(); // array of matches
private String strExpression;
private Pattern regExpPattern;
private Matcher regExpMatcher;
private int count = -1; // When -1, no search has been done
private long heapSpace; // java.lang.OutOfMemoryError: Java heap space can be triggered. Track how much heap space there is.
/* IOException string used by every class */
private static final String CANNOT_DESERIALIZE = "Class cannot be deserialized";
/**
* Initialize and compile the pattern to assist in improving performance
* @param expression String that is the regular expression pattern
* like \d{3}\-\d{2}\-\d{4} to find a social
* security number.
* @param caseSensitive boolean true if the expression string is case
* sensitive ([A] matches "A" but not "a") and
* false if it is not case sensitive.
*/
public RegularExpression(String expression, boolean caseSensitive) {
if (caseSensitive) {
regExpPattern = Pattern.compile(expression, 0);
} else { // Case insensitive
regExpPattern = Pattern.compile(expression, Pattern.CASE_INSENSITIVE);
}
strExpression = expression;
}
/**
* Cloning allows an attacker to instantiate a class without running any
* of the class constructors. Prevent that ability.
*/
public final Object clone() throws java.lang.CloneNotSupportedException {
throw new java.lang.CloneNotSupportedException();
}
/**
* Prevent deserialization and initiate an instance of the class with the byte array.
* @param in
* @throws java.io.IOException
*/
private final void readObject(java.io.ObjectInputStream in) throws java.io.IOException {
throw new java.io.IOException(CANNOT_DESERIALIZE);
}
/**
* This method returns the string pattern that was used to do the search
* @return String the pattern
*/
public String getExpression() {
if (count == -1) // the search has never been performed
return EMPTY_STRING;
else
return strExpression;
}
/**
* Once the pattern has been compiled, you can run the pattern against an input
* string with this method. It's a call with no return value - you have to use
* the "getMatch" method to see the results
* @param source The string to be searched
* */
public void search(String source) {
// Track how much Java heap space is available before doing the pattern searching. If an error
// is triggered, we can reference the Java heap space and report how little there is.
heapSpace = Runtime.getRuntime().totalMemory();
// Attempt to match the pre-compiled pattern to the passed-in string
regExpMatcher = regExpPattern.matcher(source);
count = 0;
while (regExpMatcher.find()) {
count++;
String value = source.substring(regExpMatcher.start(), regExpMatcher.end());
matches.addElement(value);
}
}
/**
* After a call to the "search" method, you can use this method to find out how
* many matches there were. If 0, then no matches. Note that if you never called
* the "search" method, this method would return -1 (count's initial value)
* @return int number of matches (0 if none) or -1 if never searched
*/
public int numMatches() {
return count;
}
/**
* Use this method to return the match at the given index position. The index
* position is 0-based (the first element is 0). If you ask for an index position
* that is out of bounds, then an empty string is returned.
* @param pos integer position of the match number to be returned
* @return String string from the original source that matched the expression
*/
public String getMatch(int pos) {
if (pos > count) { // Greater than number of matches, or "search" never ran
return EMPTY_STRING;
} else if (pos < 0) { // Supplied a negative number - invalid
return EMPTY_STRING;
} else if (count == 0) { // There were no matches
return EMPTY_STRING;
} else {
return (String) matches.elementAt(pos);
}
}
/**
* Use this method to clean up the matches that had previously been found
*/
public void eraseMatches() {
matches.removeAllElements();
}
/**
* Clean up method to (hopefully) free up memory. Use when you are done with the regular expression.
*/
public void cleanup() {
matches = null;
regExpPattern = null;
regExpMatcher = null;
count = -1;
heapSpace = 0;
System.runFinalization();
System.gc();
}
/**
* Getter function for returning the heap space
* @return long total memory used right at the start of calling the "search" method
*/
public long getHeapSpace() {
return heapSpace;
}
}