# page id seeds, until I can figure out how to auto-increment w/in the import
# script
-my $SOURCE_TITLE_SEED = 1;
-my $ANNOT_TITLE_SEED = 1;
-my $PUB_TITLE_SEED = 1;
+my $SOURCE_TITLE_SEED = 2;
+my $ANNOT_TITLE_SEED = 21;
+my $PUB_TITLE_SEED = 7;
$Data::Dumper::Pad = "... ";
# set species-specific values
switch ($annotation_universals{"Species ID"}) {
case "NCBI:3702" {
- $locus_finder_expr = "/[Aa][Tt].[Gg]/";
+ $locus_finder_expr = "^([Aa][Tt].[Gg])";
$annotation_universals{"Species Name"} = "Arabidopsis thaliana"; # temp; need an NCBI lookup
}
+ case "NCBI:4530" {
+ $locus_finder_expr = "^(LOC_|[Oo][Ss]|osa-)";
+ $annotation_universals{"Species Name"} = "Oryza sativa"; # temp; need an NCBI lookup
+ }
else {
die($annotation_universals{"Species ID"}
. " is not a valid NCBI taxon ID.\n");
# print "\n*** NEW SYMBOL: $curr_line_hash{'db_object_symbol'} ***\n"; # TEST
# prepare Gene Locus, Source Accession ID, Chromosome, Gene Synonyms
- my @synonyms = split('\|',$curr_line_hash{"db_object_synonym"});
-
- if ($debug) { print "...<DEBUG: \@synonyms>\n"
- . Dumper(\@synonyms) . "\n\n"; }
-
- # find the gene locus, if it is listed (first "AT.G")
- my @loci = grep $locus_finder_expr, @synonyms;
-
- if ($debug) { print "...<DEBUG: \@loci>\n"
- . Dumper(\@loci) . "\n\n"; }
-
+ my @loci;
my $locus = "";
+
+ if (exists $curr_line_hash{"db_object_synonym"})
+ {
+ my @synonyms = split('\|',$curr_line_hash{"db_object_synonym"});
+
+ if ($debug) { print "...<DEBUG: \@synonyms>\n"
+ . Dumper(\@synonyms) . "\n\n"; }
+
+ # find the gene locus, if it is listed
+ @loci = grep /$locus_finder_expr/, @synonyms;
+
+ if ($debug) { print "...<DEBUG: \@loci>\n"
+ . Dumper(\@loci) . "\n\n"; }
+ }
if (scalar(@loci) > 0) # we have at least one match; use the first one
{
if ($curr_line_hash{"db_object_symbol"} =~ $locus_finder_expr)
{
# the split drops the variant/allele signifier, if present
- $locus = (split('.',$curr_line_hash{"db_object_symbol"}))[0];
+ if (!(split('.',$curr_line_hash{"db_object_symbol"}))[0])
+ {
+ $locus = $curr_line_hash{"db_object_symbol"};
+ }
+ else
+ {
+ $locus = (split('.',$curr_line_hash{"db_object_symbol"}))[0];
+ }
}
else # no match; attempt to use the Gene Name instead
{
}
}
- # chromosome = third char in locus, if it exists
- my $chromosome = ($locus ne "" ? (split('',$locus))[2] : "");
-
+ my $chromosome = "";
+ if ($locus) {
+ if ($annotation_universals{"Species ID"} eq "NCBI:3702") {
+ # for Ath, third char in locus, if it exists
+ $chromosome = ($locus ne "" ? (split('',$locus))[2] : "");
+ }
+ }
+
# set some sort of pseudo-unique value as the accession id,
# in order of succession: locus, then symbol
# (NOTE: this is dangerous; a stable identifier is preferred)