2006-07-11UniProt on ActiveRecord
Modeling for UniProt/Knowledgebase Entry by ActiveRecord
- UniProt
- UniProt Knowledgebase User Manual
- ExPASy: SIB Bioinformatics Resource Portal - Home
- Download
- Error
- ATPase family AAA domain-containing protein 1 - Homo sapiens (Human)
- BioRuby
- ViewVC Exception
Examples
Loading and find Entry by name (entry_id).
$ script/console Loading development environment. >> Entry.find_by_name("ATAD1_HUMAN") => #<Entry:0x241cf5c @attributes={"name"=>"ATAD1_HUMAN", "entry_type"=>nil, "dt_create"=>"13-SEP-2005, integrated into UniProtKB/Swiss-Prot.", "dt_annotation"=>"18-APR-2006, entry version 26.", "sequence"=>"MVHAEAFSRPLSRNEVVGLIFRLTIFGAVTYFTIKWMVDAIDPTRKQKVEAQKQAEKLMKQIGVKNVKLSEYEMSIAAHLVDPLNMHVTWSDIAGLDDVITDLKDTVILPIKKKHLFENSRLLQPPKGVLLYGPPGCGKTLIAKATAKEAGCRFINLQPSTLTDKWYGESQKLAAAVFSLAIKLQPSIIFIDEIDSFLRNRSSSDHEATAMMKAQFMSLWDGLDTDHSCQVIVMGATNRPQDLDSAIMRRMPTRFHINQPALKQREAILKLILKNENVDRHVDLLEVAQETDGFSGSDLKEMCRDAALLCVREYVNSTSEESHDEDEIRPVQQQDLHRAIEKMKKSKDAAFQNVLTHVCLD", "molecular_type"=>"PRT", "sequence_length"=>"361", "id"=>"11877", "data_class"=>"STANDARD", "crc64"=>"2FAE88BA7E7140BC", "definition"=>"ATPase family AAA domain-containing protein 1.", "dt_sequence"=>"01-OCT-2002, sequence version 1.", "mw"=>"40744"}>
Accessions (AC line)
>> Entry.find_by_name("ATAD1_HUMAN").acs.map {|ac| ac.name } => ["Q8NBU5", "Q6P4B9", "Q8N3G1", "Q8WYR9", "Q969Y3"]
Keywords (KW line)
>> Entry.find_by_name("ATAD1_HUMAN").kws.map {|keyword| keyword.name } => ["ATP-binding", "Nucleotide-binding"]
Database-cross references (DR line)
>> Entry.find_by_name("ATAD1_HUMAN").drs.map {|x| x.db_name } => ["EMBL", "EMBL", "EMBL", "EMBL", "EMBL", "EMBL", "EMBL", "EMBL", "LinkHub", "Pfam", "InterPro", "InterPro", "InterPro", "Ensembl", "HSSP", "PROSITE", "SMART", "HGNC", "UniGene"]
Sequence
>> Entry.find_by_name("ATAD1_HUMAN").sequence => "MVHAEAFSRPLSRNEVVGLIFRLTIFGAVTYFTIKWMVDAIDPTRKQKVEAQKQAEKLMKQIGVKNVKLSEYEMSIAAHLVDPLNMHVTWSDIAGLDDVITDLKDTVILPIKKKHLFENSRLLQPPKGVLLYGPPGCGKTLIAKATAKEAGCRFINLQPSTLTDKWYGESQKLAAAVFSLAIKLQPSIIFIDEIDSFLRNRSSSDHEATAMMKAQFMSLWDGLDTDHSCQVIVMGATNRPQDLDSAIMRRMPTRFHINQPALKQREAILKLILKNENVDRHVDLLEVAQETDGFSGSDLKEMCRDAALLCVREYVNSTSEESHDEDEIRPVQQQDLHRAIEKMKKSKDAAFQNVLTHVCLD"
References
>> Entry.find_by_name("ATAD1_HUMAN").refs_count => 5 >> Entry.find_by_name("ATAD1_HUMAN").refs[0] => #<Ref:0x27944a0 @rcs=[#<Rc:0x27932a8 @attributes={"text"=>"Pituitary", "token"=>"TISSUE", "id"=>"16335", "ref_id"=>"26323"}>], @attributes={"entry_id"=>"11877", "title"=>"A novel gene expressed in fetal normal pituitary.", "auther"=>"Liu F., Xu X.R., Qian B.Z., Xiao H., Chen Z., Han Z.", "id"=>"26323", "location"=>"Submitted (MAR-2001) to the EMBL/GenBank/DDBJ databases."}, @rps=[#<Rp:0x2793578 @attributes={"id"=>"31223", "ref_id"=>"26323", "comment"=>"NUCLEOTIDE SEQUENCE [MRNA]"}>], @rgs=[], @rxs=[]>
Comments (CC line)
>> Entry.find_by_name("ATAD1_HUMAN").ccs => [#<Cc:0x26cd594 @attributes={"entry_id"=>"11877", "topic"=>"SIMILARITY", "id"=>"69666", "contents"=>"Belongs to the AAA ATPase family."}>]
Count Homo sapiens entries
>> Os.find_by_name("Homo sapiens") => #<Os:0x24940fc @attributes={"name"=>"Homo sapiens", "common_name"=>"(Human)", "id"=>"31"}> >> Os.find_by_name("Homo sapiens").entries_count => 1701
uniprot_sprot.dat.gz and Rails
$ curl -O ftp://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/complete/uniprot_sprot.dat.gz $ rails uniprot -d mysql $ cd uniprot
Then save uniprot/Rakefile, uniprot/config/database.yml, uniprot/db/migrate/001_create_entries.rb and uniprot/app/models/entry.rb.
Importing UniProt data into database
$ rake generate $ rake db:migrate $ rake import
After 20 hours,
$ script/console
Have fun !
uniprot/Rakefile
# Add your own tasks in files placed in lib/tasks ending in .rake, # for example lib/tasks/capistrano.rake, and they will automatically be available to Rake. require(File.join(File.dirname(__FILE__), 'config', 'boot')) require 'rake' require 'rake/testtask' require 'rake/rdoctask' require 'tasks/rails' desc "import data" task :import do require "#{RAILS_ROOT}/config/environment" require 'bio' require 'zlib' io = Zlib::GzipReader.open("../uniprot_sprot.dat.gz") Bio::FlatFile.open(io).each do |entry| print entry.entry_id if Entry.find_by_name(entry.entry_id) puts "\tskipped" next else $stdout.sync = true print "\t." end e = Entry.new(:name => entry.entry_id, :data_class => entry.id_line['DATA_CLASS'].to_s, :molecular_type => entry.id_line['MOLECULE_TYPE'].to_s, :sequence_length => entry.id_line['SEQUENCE_LENGTH'].to_i, :dt_create => entry.dt['created'].to_s, :dt_sequence => entry.dt['sequence'].to_s, :dt_annotation => entry.dt['annotation'].to_s, :definition => entry.de) e.sequence = entry.seq.to_s e.crc64 = entry.sq['CRC64'].to_s e.mw = entry.sq['MW'].to_i print "." entry.accessions.each do |ac| e.acs << Ac.new(:name => ac) end print "." entry.os.each do |os| if o = Os.find_by_name(os['os']) else o = Os.new(:name => os['os'], :common_name => os['name'].to_s) end e.oss << o end print "." entry.oc.each_with_index do |key, level| if o = Oc.find_by_name(key) else o = Oc.new(:name => key, :level => level) end e.ocs << o end print "." entry.ox.each do |db_name, accs| accs.each do |acc| if o = Ox.find(:first, :conditions => ["db_name = ? AND accession = ?", db_name, acc]) else o = Ox.new(:db_name => db_name, :accession => acc) end e.oxs << o end end e.gn = Gn.new entry.gn.each do |g| unless g.class == Hash cannonical_key = {'ORFNames' => :orfs, 'Name' => :name, 'OrderedLocusNames' => :loci} g2 = {} g.to_s.split(';').map {|x| x.strip }.each do |ge| key, value = ge.split('=') g2[cannonical_key[key]] = value end g = g2 end g[:synonyms] = [] unless g[:synonyms] g[:name] = '' unless g[:name] g[:loci] = [] unless g[:loci] g[:orfs] = [] unless g[:orfs] e.gn.name = g[:name] g[:synonyms].map do |synonym| e.gn.synonyms << GnSynonym.new(:synonym => synonym) end g[:loci].each do |locus| e.gn.loci << GnLocus.new(:locus => locus) end g[:orfs].map do |orf| e.gn.orf_names << GnOrfName.new(:name => orf) end end print "." entry.ref.each do |ref| r = Ref.new(:title => ref['RT'], :auther => ref['RA'], :location => ref['RL']) print "." ref['RG'].each do |rg| r.rgs << Rg.new(:name => rg) end print "." ref['RX'].each do |key, value| next if value == nil r.rxs << Rx.new(:name => key, :identifier => value) end print "." ref['RP'].each do |rp| r.rps << Rp.new(:comment => rp) end print "." ref['RC'].each do |rc| r.rcs << Rc.new(:token => rc['Token'], :text => rc['Text']) end e.refs << r end print "." entry.cc.each do |k, v| [entry.cc(k)].flatten.each do |value| e.ccs << Cc.new(:topic => k, :contents => value) end end print "." entry.dr.each do |db_name, vs| vs.each do |v| e.drs << Dr.new(:db_name => db_name, :entry_name => v[0], :content1 => v[1].to_s, :content2 => v[2].to_s, :content3 => v[3].to_s) end end print "." entry.kw.each do |key| if kw = Kw.find_by_name(key) else kw = Kw.new(:name => key) end e.kws << kw end entry.ft.each do |name, fts| fts.each do |ft| e.fts << Ft.new(:name => name, :from => ft['From'], :to => ft['To'], :description => ft['Description'], :ftid => ft['FTId']) end end print "." e.save puts 'done' end end desc "mysql create database" task :create do sh "mysqladmin5 -uroot drop uniprot_development" sh "mysqladmin5 -uroot create uniprot_development" end
uniprot/config/database.yml
development: adapter: mysql database: uniprot_development username: root password: host: localhost
uniprot/db/migrate/001_create_entries.rb
class CreateEntries < ActiveRecord::Migration def self.up create_table(:entries) do |t| t.column(:name, :string) t.column(:data_class, :string) t.column(:molecular_type, :string) t.column(:sequence_length, :integer) t.column(:entry_type, :string) t.column(:dt_create, :string) t.column(:dt_sequence, :string) t.column(:dt_annotation, :string) t.column(:definition, :string) t.column(:sequence, :text) t.column(:mw, :integer) t.column(:crc64, :string) end add_index(:entries, :name) create_table(:acs) do |t| t.column(:entry_id, :string) t.column(:name, :string) end add_index(:acs, :name) add_index(:acs, :entry_id) create_table(:gns) do |t| t.column(:name, :string) t.column(:entry_id, :string) end add_index(:gns, :name) add_index(:gns, :entry_id) create_table(:gn_synonyms) do |t| t.column(:gn_id, :integer) t.column(:synonym, :integer) end add_index(:gn_synonyms, :synonym) add_index(:gn_synonyms, :gn_id) create_table(:gn_loci) do |t| t.column(:gn_id, :integer) t.column(:locus, :integer) end add_index(:gn_loci, :locus) add_index(:gn_loci, :gn_id) create_table(:gn_orf_names) do |t| t.column(:gn_id, :integer) t.column(:name, :integer) end add_index(:gn_orf_names, :name) add_index(:gn_orf_names, :gn_id) create_table(:entries_oss, :id => false) do |t| t.column(:entry_id, :integer) t.column(:os_id, :integer) end add_index(:entries_oss, :entry_id) add_index(:entries_oss, :os_id) create_table(:oss) do |t| t.column(:name, :string) t.column(:common_name, :string) end add_index(:oss, :name) create_table(:entries_ocs, :id => false) do |t| t.column(:entry_id, :integer) t.column(:oc_id, :integer) end add_index(:entries_ocs, :entry_id) add_index(:entries_ocs, :oc_id) create_table(:ocs) do |t| t.column(:level, :integer) t.column(:name, :string) end add_index(:ocs, :name) create_table(:entries_oxs, :id => false) do |t| t.column(:entry_id, :integer) t.column(:ox_id, :integer) end add_index(:entries_oxs, :entry_id) add_index(:entries_oxs, :ox_id) create_table(:oxs) do |t| t.column(:db_name, :string) t.column(:accession, :string) end add_index(:oxs, :db_name) add_index(:oxs, :accession) # references create_table(:refs) do |t| t.column(:entry_id, :integer) t.column(:title, :string) t.column(:auther, :string) t.column(:location, :string) end add_index(:refs, :location) add_index(:refs, :entry_id) create_table(:rxs) do |t| t.column(:name, :string) t.column(:identifier, :string) t.column(:ref_id, :integer) end add_index(:rxs, :name) create_table(:rgs) do |t| t.column(:name, :string) t.column(:ref_id, :integer) end add_index(:rgs, :name) create_table(:rps) do |t| t.column(:comment, :string) t.column(:ref_id, :integer) end add_index(:rps, :comment) create_table(:rcs) do |t| t.column(:token, :string) t.column(:text, :string) t.column(:ref_id, :integer) end add_index(:rcs, :token) create_table(:ccs) do |t| t.column(:topic, :string) t.column(:contents, :text) t.column(:entry_id, :string) end add_index(:ccs, :entry_id) add_index(:ccs, :topic) create_table(:drs) do |t| t.column(:entry_id, :string) t.column(:db_name, :string) t.column(:entry_name, :string) t.column(:content1, :string) t.column(:content2, :string) t.column(:content3, :string) end add_index(:drs, :entry_id) add_index(:drs, :db_name) add_index(:drs, :entry_name) create_table(:entries_kws, :id => false) do |t| t.column(:entry_id, :integer) t.column(:kw_id, :integer) end add_index(:entries_kws, :entry_id) add_index(:entries_kws, :kw_id) create_table(:kws) do |t| t.column(:name, :string) end add_index(:kws, :name) create_table(:fts) do |t| t.column(:entry_id, :string) t.column(:name, :string) t.column(:from, :string) t.column(:to, :string) t.column(:description, :string) t.column(:ftid, :string) end add_index(:fts, :entry_id) add_index(:fts, :name) add_index(:fts, :ftid) end def self.down drop_table :entries drop_table :acs drop_table :gns drop_table :gn_synonyms drop_table :gn_loci drop_table :gn_orf_names drop_table :ccs drop_table :drs drop_table :kws drop_table :fts end end
コメントを書く
トラックバック - http://bioruby.g.hatena.ne.jp/nakao_mitsuteru/20060711







