Let's write a perl module GC.pm that contains a subroutine to calculate the GC content of a DNA sequence, and test it.
package GC;
use strict;
use warnings;
use Math::Round; # has the nearest() function
use Carp::Assert; # has the assert() function
use Scalar::Util qw(looks_like_number);
use base 'Exporter';
our @EXPORT_OK = qw( gc_content_one_seq );
sub gc_content_one_seq
{
my $seq = $_[0];
my $g_or_c = 0;
my $gc;
# throw an exception if the sequence is uninitialised (undefined), or an empty string:
throw Error::Simple("sequence not defined") if (!(defined($seq)));
throw Error::Simple("sequence does not exist") if ($seq eq '');
# calculate the GC content:
$seq =~ tr/[a-z]/[A-Z]/; # convert the sequence to uppercase
$g_or_c = ($seq =~ tr/G|C//); # counts number of Gs or Cs in the sequence
$gc = $g_or_c*100/length($seq);
$gc = nearest(0.01, $gc); # round to 0.01 precision
# die if the GC content is not between 0 and 100:
assert($gc >= 0.00 && $gc <= 100.00); # this should never happen, program will die if it does
# die if the GC content is not numeric:
assert(looks_like_number($gc)); # this should never happen, program will die if it does
return $gc;
}
1;
Testing the perl module using ok(), use_ok(), can_ok(), and throws_ok():
Then you can use the testing script GC.t to test the subroutines in module GC.pm:
#!perl
use strict;
use warnings;
use Test::More tests => 7;
use Error; # has Error::Simple
use Test::Exception; # has throws_ok()
# Specify the subroutines to import:
my @subs = qw ( gc_content_one_seq );
# Check we can import the subroutines:
use_ok( 'GC', @subs);
can_ok( __PACKAGE__, 'gc_content_one_seq');
# Test the gc_content_one_seq() subroutine:
my $seq = "AAAAAAAAAAGGGGGGGGGGAAAAAAAAAAAAAAAAAAAAGGGGGGGGGGAAAAAAAAAA";
my $gc_seq = GC::gc_content_one_seq($seq);
ok ($gc_seq == 33.33, "Test 3: check gc_content_one_seq() correctly gives GC=33.33");
$seq = "AAAAAAAAAAGGGGGGGGGGAAAAAAAAAAAA";
$gc_seq = GC::gc_content_one_seq($seq);
ok ($gc_seq == 31.25, "Test 4: check gc_content_one_seq() correctly gives GC=31.25'");
$seq = "AAAAAAAAA";
$gc_seq = GC::gc_content_one_seq($seq);
ok ($gc_seq == 0.00, "Test 5: check gc_content_one_seq() correctly gives GC=0.00");
$seq = ""; # Check an error is thrown if the sequence is an empty string:
throws_ok { $gc_seq = GC::gc_content_one_seq($seq) } 'Error::Simple','Test 6: sequence not defined';
my $seq2; # Check an error is thrown if the sequence is undefined:
throws_ok { $gc_seq = GC::gc_content_one_seq($seq2) } 'Error::Simple','Test 7: sequence does not exist';
When you run the tests you see:
% prove GC.t
GC.t .. ok
All tests successful.
Files=1, Tests=7, 0 wallclock secs ( 0.04 usr 0.01 sys + 0.04 cusr 0.02 csys = 0.11 CPU)
Result: PASS
% prove -v GC.t
GC.t ..
1..7
ok 1 - use GC;
ok 2 - main->can('gc_content_one_seq')
ok 3 - Test 3: check gc_content_one_seq() correctly gives GC=33.33
ok 4 - Test 4: check gc_content_one_seq() correctly gives GC=31.25'
ok 5 - Test 5: check gc_content_one_seq() correctly gives GC=0.00
ok 6 - Test 6: sequence not defined
ok 7 - Test 7: sequence does not exist
ok
All tests successful.
Files=1, Tests=7, 0 wallclock secs ( 0.04 usr 0.01 sys + 0.02 cusr 0.02 csys = 0.09 CPU)
Result: PASS
Using the perl module in a perl script:
We can use the subroutine gc_content_one_seq() in a perl script gc.pl like this:#!/usr/bin/perl
use strict;
use warnings;
use GC;
my $gc = GC::gc_content_one_seq("ACGT");
print "GC = $gc\n";
% perl -w gc.pl
GC = 50
Thanks to my colleagues Daria Gordon and Bhavana Harsha for lots of helpful discussion about this.
1 comment:
Thanks for sharing this.
Post a Comment