## Bioinformatik für Tier- und Pflanzenwissenschaften ## VL-12 Databases ## Antworten zur Uebung ## # 1) UniprotKB # a) # https://www.uniprot.org/help/query-fields # b) # (organism_id:9031) AND (reviewed:true) # 2308 # c) # (organism_id:9031) AND (reviewed:true) AND (date_created:[2011-09-01 TO *]) # 98 # d) # O18836 GDF8_BOVIN # e) # Name:MSTN # Synonyms:GDF8, MH # f) # ENSBTAT00000015674, ENSBTAP00000015674, ENSBTAG00000011808 # g) # https://rest.uniprot.org/uniprotkb/O18836.txt # DT: date, OS: organism Species, DR: Database cross-reference, FT: feature # h) # Bos gaurus, Bos indicus, Bubalus bubalis (Domestic water buffalo), Taurotragus derbianus (Giant eland) (Derby eland), # Pig, Pronghorn # 2) biomaRt # a) if (!require("BiocManager", quietly = TRUE)) install.packages("BiocManager") BiocManager::install("biomaRt") library(biomaRt) # b) mart <- useMart("ensembl") # c) listDatasets(mart) dim(listDatasets(mart)) # 215 datasets # d) mart <- useMart("ensembl", "mmusculus_gene_ensembl") # e) listAttributes(mart) dim(listAttributes(mart)) # 2972 # f) # nur getBM() angucken # g) getBM(attributes = c("chromosome_name"), filters = "mgi_symbol", values = "Mstn", mart = mart) getBM(attributes = c("chromosome_name"), values = "Mstn", mart = mart) # h) getBM(attributes = c("chromosome_name", "start_position", "end_position"), filters = "mgi_symbol", values = "Mstn", mart = mart) # mgi_symbol chromosome_name start_position end_position # 1 Mstn 1 53100799 53107238 # i) # 53100799 - 100000 = 53000799 # 53107238 + 100000 = 53207238 getBM(attributes = c("mgi_symbol", "chromosome_name", "start_position", "end_position"), filters = c("chromosomal_region"), values = "1:53000799:53207238", mart = mart) # 3 genes without Mstn # mgi_symbol chromosome_name start_position end_position # 1 1700019D03Rik 1 52961483 53059338 # 2 Mstn 1 53100799 53107238 # 3 Gm24349 1 53139209 53139309 # 4 1700019A02Rik 1 53197736 53226795