Regular Expressions, the Faster, Simpler Alternative

Sometime back I came across the following snippet:

function containsBadChars(str){
      if ((str.indexOf("\#") != -1) ||
          (str.indexOf("\&") != -1) ||
          (str.indexOf("\=") != -1) ||
          (str.indexOf("\\") != -1) ||
          (str.indexOf("\?") != -1) ||
          (str.indexOf("\:") != -1) ||
          (str.indexOf("\;") != -1) ||
          (str.indexOf("\'") != -1) ||
          (str.indexOf("\"") != -1) ||
          (str.indexOf("\[") != -1) ||
          (str.indexOf("\]") != -1) ||
          (str.indexOf("\{") != -1) ||
          (str.indexOf("\}") != -1)){
              return true;
          } else{
              return false;
          }
       };

This functions checks the existence of any of the following characters ‘#’, ‘&’, ‘=’, ‘\\’, ‘?’, ‘:’, ‘;’, ‘\”, ‘”‘, ‘[‘, ‘]’, ‘{‘, ‘}’ and returns true if found.

Using Regular Expressions we can modify this function in a more elegant one like this:

function containsBadCharsRegex(str){
      return /[\#\&\=\\\?\:\;\'\"\[\]\{\}]/.test(str);
};

Every time I talk about elegance with programmers they point out the performance gain/loss issue. A valid concern. So I did a performance test. Here is the code I used:

<html>
<head>
<title>Regex Demo</title>
<head>
   
<script>
    //in-elegant but functionally operational function
    function containsBadChars(str){
      if ((str.indexOf("\#") != -1) || 
          (str.indexOf("\&") != -1) || 
          (str.indexOf("\=") != -1) || 
          (str.indexOf("\\") != -1) ||
          (str.indexOf("\?") != -1) || 
          (str.indexOf("\:") != -1) || 
          (str.indexOf("\;") != -1) || 
          (str.indexOf("\'") != -1) || 
          (str.indexOf("\"") != -1) || 
          (str.indexOf("\[") != -1) || 
          (str.indexOf("\]") != -1) || 
          (str.indexOf("\{") != -1) || 
          (str.indexOf("\}") != -1)){
              return true; 
          } else{ 
              return false;
          }
       };
       
    //elegant and functionally operational function   
    function containsBadCharsRegex(str){
      return /[\#\&\=\\\?\:\;\'\"\[\]\{\}]/.test(str);
    };
    
    //List of test characters
    //Last four are valid characters
    //first 13 bad characters
    var testCharacters = ['#', '&', '=', '\\', '?', ':', ';', '\'', '"', '[', ']', '{', '}', '$', 'H', '1', '*']; 
   
    console.log('Testing the long way:');
    console.profile() 
    //check each character
    for(var i in testCharacters){
      //create the test string
      var str = "The quick brown fox quickly jumped over the lazy dog. This is the " + testCharacters[i] + " for testing";
      var isValid = containsBadChars(str);
      //comment for profiling
      console.log('Long Way. Character: ' + testCharacters[i] + ' isValid: ' + isValid);
    }
    console.profileEnd()
    
    console.log('Testing the short(regex) way');
    console.profile() 
    for(var i in testCharacters){
      var str = "The quick brown fox quickly jumped over the lazy dog. This is the " + testCharacters[i] + " for testing";
      var isValid = containsBadCharsRegex(str);
      //comment for profiling
      console.log('Short(Regex) Way. Character: ' + testCharacters[i] + ' isValid: ' + isValid);
    }
    console.profileEnd()
    
</script>
</head>
<body>
</body>
</html>

First we need to make sure that they are functionally identical.

Running the program gives the following console log output:

Testing the long way:
Long Way. Character: # isValid: true
Long Way. Character: & isValid: true
Long Way. Character: = isValid: true
Long Way. Character: \ isValid: true
Long Way. Character: ? isValid: true
Long Way. Character: : isValid: true
Long Way. Character: ; isValid: true
Long Way. Character: ‘ isValid: true
Long Way. Character: ” isValid: true
Long Way. Character: [ isValid: true
Long Way. Character: ] isValid: true
Long Way. Character: { isValid: true
Long Way. Character: } isValid: true
Long Way. Character: $ isValid: false
Long Way. Character: H isValid: false
Long Way. Character: 1 isValid: false
Long Way. Character: * isValid: false

Testing the short(regex) way:
Short(Regex) Way. Character: # isValid: true
Short(Regex) Way. Character: & isValid: true
Short(Regex) Way. Character: = isValid: true
Short(Regex) Way. Character: \ isValid: true
Short(Regex) Way. Character: ? isValid: true
Short(Regex) Way. Character: : isValid: true
Short(Regex) Way. Character: ; isValid: true
Short(Regex) Way. Character: ‘ isValid: true
Short(Regex) Way. Character: ” isValid: true
Short(Regex) Way. Character: [ isValid: true
Short(Regex) Way. Character: ] isValid: true
Short(Regex) Way. Character: { isValid: true
Short(Regex) Way. Character: } isValid: true
Short(Regex) Way. Character: $ isValid: false
Short(Regex) Way. Character: H isValid: false
Short(Regex) Way. Character: 1 isValid: false
Short(Regex) Way. Character: * isValid: false

They are functionally equivalent. Now on to performance.

After commenting out the console.log statements the profiler gives the following output:

Testing the long way:
Profile (0.143ms, 17 calls)
containsBadChars 17 100% 0.143ms 0.143ms 0.008ms 0.002ms 0.067ms

Testing the short(regex) way:
Profile (0.047ms, 17 calls)
containsBadCharsRegex 17 100% 0.047ms 0.047ms 0.003ms 0.002ms 0.006ms

As you can see the Regex way is about 3 times faster.

Happy programming!

Advertisements

Leave a Reply

Fill in your details below or click an icon to log in:

WordPress.com Logo

You are commenting using your WordPress.com account. Log Out / Change )

Twitter picture

You are commenting using your Twitter account. Log Out / Change )

Facebook photo

You are commenting using your Facebook account. Log Out / Change )

Google+ photo

You are commenting using your Google+ account. Log Out / Change )

Connecting to %s