Skip to contents

For filtering out the select transposable elements from gene, family, and class to down-stream analysis, we provide this function to help quickly list all options out. You can run this function first setting show to TRUE to see every possibility. Then you provide a character vector including the name of family/class you want to select. The function will then return a subset of the rmsk dataframe.

Usage

rmskFilter(rmskInfo, gene = NULL, family = NULL, class = NULL, show = FALSE)

Arguments

rmskInfo

rmsk dataframe, with three columns, gene_id, family_id, and class_id

gene

a vector including the gene names you want

family

a vector including the family names you want

class

a vector including the class names you want

show

if you want to first see all possible family/class, set this as TRUE

Value

a subset of rmsk dataframe including three columns, gene, family and class

Examples

data(hg19rmsk_info)
df <- hg19rmsk_info

#list all possibilities
rmskFilter(df, show=TRUE)
#> [1] "Family ID"
#> [1] "========================="
#>  [1] "Alu"           "hAT-Charlie"   "ERV1"          "MIR"          
#>  [5] "L2"            "ERVL"          "TcMar-Tigger"  "ERVL-MaLR"    
#>  [9] "L1"            "Gypsy"         "CR1"           "TcMar-Mariner"
#> [13] "RNA"           "hAT-Tip100"    "DNA"           "Deu"          
#> [17] "ERVK"          "Helitron"      "TcMar-Tc2"     "hAT-Blackjack"
#> [21] "TcMar"         "SVA_D"         "SVA_C"         "MamRep605"    
#> [25] "hAT"           "RTE-BovB"      "ERV"           "RTE"          
#> [29] "LTR"           "PiggyBac"      "MuDR"          "UCON9"        
#> [33] "Dong-R4"       "telo"          "SINE"          "centr"        
#> [37] "UCON28a"       "UCON2"         "UCON22"        "UCON5"        
#> [41] "MamRep564"     "UCON31"        "SVA_F"         "SVA_B"        
#> [45] "SVA_E"         "SVA_A"         "UCON26"        "Satellite"    
#> [49] "acro"          "UCON18"        "UCON27"        "Penelope"     
#> [53] "UCON4"         "UCON24"        "UCON16"        "UCON11"       
#> [57] "Merlin"        "UCON10"        "MER130"        "Eulor4"       
#> [61] "UCON17"        "UCON6"         "UCON20"        "UCON28c"      
#> [65] "UCON8"         "UCON1"         "UCON19"        "UCON28b"      
#> [69] "UCON12A"       "UCON25"        "UCON15"        "UCON12"       
#> [1] "-------------------------"
#> [1] "Class ID"
#> [1] "========================="
#>  [1] "SINE"      "DNA"       "LTR"       "LINE"      "RNA"       "DNA?"     
#>  [7] "RC"        "SVA"       "Unknown"   "Satellite" "SINE?"     "LTR?"     
#> [13] "LINE?"     "Unknown?" 
#>             gene_id     family_id class_id
#> 1            AluSx3           Alu     SINE
#> 2             MER5A   hAT-Charlie      DNA
#> 3            MER41B          ERV1      LTR
#> 4               MIR           MIR     SINE
#> 5              AluY           Alu     SINE
#> 6              MIRb           MIR     SINE
#> 7               L2a            L2     LINE
#> 8            MER77B          ERVL      LTR
#> 9             LTR44          ERV1      LTR
#> 10            AluJo           Alu     SINE
#> 11         Tigger2a  TcMar-Tigger      DNA
#> 12            MLT1C     ERVL-MaLR      LTR
#> 13            MER1A   hAT-Charlie      DNA
#> 14            L1MEg            L1     LINE
#> 15            AluJr           Alu     SINE
#> 16           AluSc5           Alu     SINE
#> 17            MER5B   hAT-Charlie      DNA
#> 18              L2c            L2     LINE
#> 19            AluSz           Alu     SINE
#> 20           AluSx1           Alu     SINE
#> 21             MIRc           MIR     SINE
#> 22             L1M5            L1     LINE
#> 23            AluSc           Alu     SINE
#> 24             MIR3           MIR     SINE
#> 25          MER106B   hAT-Charlie      DNA
#> 26            L1PB3            L1     LINE
#> 27            AluSx           Alu     SINE
#> 28            L1MA7            L1     LINE
#> 29              L2b            L2     LINE
#> 30            LTR78          ERV1      LTR
#> 31            AluJb           Alu     SINE
#> 32            L1PA5            L1     LINE
#> 33           L1PA11            L1     LINE
#> 34            L1PA4            L1     LINE
#> 35            L1MDa            L1     LINE
#> 36            L1MA4            L1     LINE
#> 37           LTR88b         Gypsy      LTR
#> 38            L1PA8            L1     LINE
#> 39               L3           CR1     LINE
#> 40           L1ME2z            L1     LINE
#> 41             FRAM           Alu     SINE
#> 42           L1PA14            L1     LINE
#> 43            AluYc           Alu     SINE
#> 44           MLT1G3     ERVL-MaLR      LTR
#> 45            MLT2E          ERVL      LTR
#> 46            AluSg           Alu     SINE
#> 47        MER41-int          ERV1      LTR
#> 48            MER20   hAT-Charlie      DNA
#> 49            L1MEe            L1     LINE
#> 50            MLT2F          ERVL      LTR
#> 51           LTR67B          ERVL      LTR
#> 52            THE1B     ERVL-MaLR      LTR
#> 53      MamGypLTR2b         Gypsy      LTR
#> 54            L1PA3            L1     LINE
#> 55            MADE1 TcMar-Mariner      DNA
#> 56       Charlie26a   hAT-Charlie      DNA
#> 57           L1ME4a            L1     LINE
#> 58            MLT1B     ERVL-MaLR      LTR
#> 59               L2            L2     LINE
#> 60            L1MD3            L1     LINE
#> 61           L1PA16            L1     LINE
#> 62              7SK           RNA      RNA
#> 63           MER45B    hAT-Tip100      DNA
#> 64             L1MC            L1     LINE
#> 65           AluSg7           Alu     SINE
#> 66        MLT1F-int     ERVL-MaLR      LTR
#> 67            L1MC1            L1     LINE
#> 68            MLT1F     ERVL-MaLR      LTR
#> 69       MamGypLTR3         Gypsy      LTR
#> 70          Eulor5A           DNA     DNA?
#> 71           AluSq2           Alu     SINE
#> 72         AmnSINE2           Deu     SINE
#> 73           AluSz6           Alu     SINE
#> 74           LTR13A          ERVK      LTR
#> 75            L1MB8            L1     LINE
#> 76            MLT1J     ERVL-MaLR      LTR
#> 77          LTR16B1          ERVL      LTR
#> 78          LTR16B2          ERVL      LTR
#> 79           LTR16B          ERVL      LTR
#> 80             HAL1            L1     LINE
#> 81           MER45A    hAT-Tip100      DNA
#> 82             L1M1            L1     LINE
#> 83  Helitron2Na_Mam      Helitron       RC
#> 84            MARNA TcMar-Mariner      DNA
#> 85           MER91A    hAT-Tip100      DNA
#> 86           AluSc8           Alu     SINE
#> 87          Kanga1d     TcMar-Tc2      DNA
#> 88            L1PB4            L1     LINE
#> 89           FLAM_C           Alu     SINE
#> 90           MER58C   hAT-Charlie      DNA
#> 91            MER30   hAT-Charlie      DNA
#> 92  ERV3-16A3_I-int          ERVL      LTR
#> 93            L1MC3            L1     LINE
#> 94          LTR16E1          ERVL      LTR
#> 95            L1MEa            L1     LINE
#> 96            THE1D     ERVL-MaLR      LTR
#> 97            L1MB2            L1     LINE
#> 98             L1M4            L1     LINE
#> 99            L1MB5            L1     LINE
#> 100           L1MB3            L1     LINE
#> 101           L1MB4            L1     LINE
#> 102            LTR1          ERV1      LTR
#> 103           AluSp           Alu     SINE
#> 104          MER63B hAT-Blackjack      DNA
#> 105           L1MA8            L1     LINE
#> 106          L1PA17            L1     LINE
#> 107            L1M6            L1     LINE
#> 108          AluSg4           Alu     SINE
#> 109       Charlie7a   hAT-Charlie      DNA
#> 110           MER1B   hAT-Charlie      DNA
#> 111            MSTA     ERVL-MaLR      LTR
#> 112           LTR87          ERVL      LTR
#> 113          MER135           DNA      DNA
#> 114          MER58B   hAT-Charlie      DNA
#> 115           L1MC4            L1     LINE
#> 116           MER6B  TcMar-Tigger      DNA
#> 117         Tigger2  TcMar-Tigger      DNA
#> 118          L1PA15            L1     LINE
#> 119         MER103C   hAT-Charlie      DNA
#> 120           L1MCa            L1     LINE
#> 121          MLT2B3          ERVL      LTR
#> 122           LTR39          ERV1      LTR
#> 123         Tigger1  TcMar-Tigger      DNA
#> 124          MER112   hAT-Charlie      DNA
#> 125      MamRep1161         TcMar      DNA
#> 126           MER4C          ERV1      LTR
#> 127          MER11A          ERVK      LTR
#> 128            LTR8          ERV1      LTR
#> 129       Charlie1b   hAT-Charlie      DNA
#> 130           LTR32          ERVL      LTR
#> 131          MER65D          ERV1      LTR
#> 132           MLT1D     ERVL-MaLR      LTR
#> 133          MER117   hAT-Charlie      DNA
#> 134           MLT1A     ERVL-MaLR      LTR
#> 135          MER51E          ERV1      LTR
#> 136          AluJr4           Alu     SINE
#> 137          MLT2C1          ERVL      LTR
#> 138           L1ME1            L1     LINE
#> 139            L1M7            L1     LINE
#> 140       MLT1A-int     ERVL-MaLR      LTR
#> 141           LTR5B          ERVK      LTR
#> 142           L1PA7            L1     LINE
#> 143          L1PA13            L1     LINE
#> 144          MLT2B4          ERVL      LTR
#> 145          MLT1F2     ERVL-MaLR      LTR
#> 146          MER47B  TcMar-Tigger      DNA
#> 147           L1MA1            L1     LINE
#> 148           MER84          ERV1      LTR
#> 149          MLT1A0     ERVL-MaLR      LTR
#> 150           LTR33          ERVL      LTR
#> 151          LTR78B          ERV1      LTR
#> 152       Charlie2b   hAT-Charlie      DNA
#> 153         Arthur1    hAT-Tip100      DNA
#> 154       Charlie2a   hAT-Charlie      DNA
#> 155           SVA_D         SVA_D      SVA
#> 156          MER5A1   hAT-Charlie      DNA
#> 157           L1PB1            L1     LINE
#> 158         AluYk11           Alu     SINE
#> 159         LTR5_Hs          ERVK      LTR
#> 160          LTR81B         Gypsy      LTR
#> 161           LTR79          ERVL      LTR
#> 162          MER9a1          ERVK      LTR
#> 163           MLT1H     ERVL-MaLR      LTR
#> 164           MER33   hAT-Charlie      DNA
#> 165        Charlie7   hAT-Charlie      DNA
#> 166           SVA_C         SVA_C      SVA
#> 167          MER58A   hAT-Charlie      DNA
#> 168          MER63A hAT-Blackjack      DNA
#> 169           MADE2 TcMar-Mariner      DNA
#> 170          MER44B  TcMar-Tigger      DNA
#> 171         Tigger7  TcMar-Tigger      DNA
#> 172      Charlie17a   hAT-Charlie      DNA
#> 173           L1M4b            L1     LINE
#> 174          AluSx4           Alu     SINE
#> 175      Charlie21a   hAT-Charlie      DNA
#> 176      Charlie15a   hAT-Charlie      DNA
#> 177       MamRep605     MamRep605  Unknown
#> 178          L1ME3C            L1     LINE
#> 179           LTR83          ERVL      LTR
#> 180       Charlie4z   hAT-Charlie      DNA
#> 181          MLT1J2     ERVL-MaLR      LTR
#> 182          FLAM_A           Alu     SINE
#> 183       Tigger12A  TcMar-Tigger      DNA
#> 184           L1ME5            L1     LINE
#> 185           L1MD2            L1     LINE
#> 186           L1MB7            L1     LINE
#> 187           L1MEc            L1     LINE
#> 188          L1ME3F            L1     LINE
#> 189       Tigger15a  TcMar-Tigger      DNA
#> 190       MamRep137         TcMar      DNA
#> 191          MER110          ERV1      LTR
#> 192          L1ME3D            L1     LINE
#> 193           LTR27          ERV1      LTR
#> 194          LTR48B          ERV1      LTR
#> 195         MER34A1          ERV1      LTR
#> 196       MER31-int          ERV1      LTR
#> 197      MER34B-int          ERV1      LTR
#> 198           LTR45          ERV1      LTR
#> 199          LTR14B          ERVK      LTR
#> 200         MLT1E1A     ERVL-MaLR      LTR
#> 201          MER92C          ERV1      LTR
#> 202          LTR37A          ERV1      LTR
#> 203           THE1C     ERVL-MaLR      LTR
#> 204           AluSq           Alu     SINE
#> 205         AluSq10           Alu     SINE
#> 206          MER34A          ERV1      LTR
#> 207           LTR6B          ERV1      LTR
#> 208          AluYa5           Alu     SINE
#> 209       MLT1I-int     ERVL-MaLR      LTR
#> 210           LTR23          ERV1      LTR
#> 211           MLT1I     ERVL-MaLR      LTR
#> 212     ERVL-B4-int          ERVL      LTR
#> 213           MER53           hAT      DNA
#> 214           LTR49          ERV1      LTR
#> 215           THE1A     ERVL-MaLR      LTR
#> 216       MamRep434  TcMar-Tigger      DNA
#> 217          L1MC4a            L1     LINE
#> 218           L1MC5            L1     LINE
#> 219           MLT1L     ERVL-MaLR      LTR
#> 220          L1MA4A            L1     LINE
#> 221         MER113A   hAT-Charlie      DNA
#> 222            MER3   hAT-Charlie      DNA
#> 223       Tigger12c  TcMar-Tigger      DNA
#> 224           LTR1D          ERV1      LTR
#> 225            MER8  TcMar-Tigger      DNA
#> 226          MLT1N2     ERVL-MaLR      LTR
#> 227            MER2  TcMar-Tigger      DNA
#> 228          MLT1F1     ERVL-MaLR      LTR
#> 229           L1PB2            L1     LINE
#> 230           L1MA3            L1     LINE
#> 231            L1M2            L1     LINE
#> 232           MER81 hAT-Blackjack      DNA
#> 233      Charlie18a   hAT-Charlie      DNA
#> 234          AluYf4           Alu     SINE
#> 235        Charlie8   hAT-Charlie      DNA
#> 236      Charlie22a   hAT-Charlie      DNA
#> 237           L1MEf            L1     LINE
#> 238          MLT1J1     ERVL-MaLR      LTR
#> 239          MER90a          ERV1      LTR
#> 240          L1PA10            L1     LINE
#> 241           L1ME3            L1     LINE
#> 242           LTR51          ERV1      LTR
#> 243            MSTB     ERVL-MaLR      LTR
#> 244           MER39          ERV1      LTR
#> 245        Tigger3a  TcMar-Tigger      DNA
#> 246          MER104     TcMar-Tc2      DNA
#> 247          LTR40a          ERVL      LTR
#> 248       Tigger16b  TcMar-Tigger      DNA
#> 249             FAM           Alu     SINE
#> 250            L1P4            L1     LINE
#> 251          AluSq4           Alu     SINE
#> 252          MER66B          ERV1      LTR
#> 253          MER31B          ERV1      LTR
#> 254       THE1B-int     ERVL-MaLR      LTR
#> 255            L1MD            L1     LINE
#> 256            L1P5            L1     LINE
#> 257        Tigger3d  TcMar-Tigger      DNA
#> 258          AluYd8           Alu     SINE
#> 259            L1M3            L1     LINE
#> 260          AluYg6           Alu     SINE
#> 261             L3b           CR1     LINE
#> 262           MLT1K     ERVL-MaLR      LTR
#> 263         X3_LINE      RTE-BovB     LINE
#> 264           MER5C   hAT-Charlie      DNA
#> 265        Tigger4b  TcMar-Tigger      DNA
#> 266         LTR16A1          ERVL      LTR
#> 267          LTR24C          ERV1      LTR
#> 268        X7C_LINE           CR1     LINE
#> 269         Tigger5  TcMar-Tigger      DNA
#> 270          LTR10C          ERV1      LTR
#> 271      HAL1-3A_ME            L1     LINE
#> 272          MER44A  TcMar-Tigger      DNA
#> 273           MER95           ERV      LTR
#> 274          MER34C          ERV1      LTR
#> 275          MER21B          ERVL      LTR
#> 276         MER102c   hAT-Charlie      DNA
#> 277         L1PREC2            L1     LINE
#> 278          LTR33A          ERVL      LTR
#> 279    HUERS-P1-int          ERV1      LTR
#> 280      ERVL-E-int          ERVL      LTR
#> 281          MLT2A2          ERVL      LTR
#> 282          MER47A  TcMar-Tigger      DNA
#> 283          MLT2B2          ERVL      LTR
#> 284          MLT2B1          ERVL      LTR
#> 285          MER83B          ERV1      LTR
#> 286          MER52A          ERV1      LTR
#> 287          L1ME3E            L1     LINE
#> 288          MER21C          ERVL      LTR
#> 289          MER51A          ERV1      LTR
#> 290        MSTB-int     ERVL-MaLR      LTR
#> 291           MLT1M     ERVL-MaLR      LTR
#> 292              L4           RTE     LINE
#> 293          LTR16C          ERVL      LTR
#> 294           MER50          ERV1      LTR
#> 295       MER50-int          ERV1      LTR
#> 296          MER4D1          ERV1      LTR
#> 297          LTR88c         Gypsy      LTR
#> 298            MSTD     ERVL-MaLR      LTR
#> 299    Tigger2b_Pri  TcMar-Tigger      DNA
#> 300         Plat_L3           CR1     LINE
#> 301            LTR9          ERV1      LTR
#> 302           MSTB1     ERVL-MaLR      LTR
#> 303       Charlie24   hAT-Charlie      DNA
#> 304        MamRep38           hAT      DNA
#> 305          L1MA5A            L1     LINE
#> 306        Arthur1A    hAT-Tip100      DNA
#> 307          MER124           DNA     DNA?
#> 308          MER113   hAT-Charlie      DNA
#> 309          MER63C hAT-Blackjack      DNA
#> 310       Charlie4a   hAT-Charlie      DNA
#> 311        X6B_LINE           CR1     LINE
#> 312             L1M            L1     LINE
#> 313           L1ME2            L1     LINE
#> 314           L1PA6            L1     LINE
#> 315         LTR86B2          ERVL      LTR
#> 316        Tigger3b  TcMar-Tigger      DNA
#> 317           L1MEd            L1     LINE
#> 318           ALINE           RTE     LINE
#> 319      MamRep1527           LTR      LTR
#> 320           MER68          ERVL      LTR
#> 321          L1MA10            L1     LINE
#> 322            MSTC     ERVL-MaLR      LTR
#> 323         ORSL-2a    hAT-Tip100      DNA
#> 324          MER115    hAT-Tip100      DNA
#> 325           MER75      PiggyBac      DNA
#> 326      PRIMA4-int          ERV1      LTR
#> 327      PRIMAX-int          ERV1      LTR
#> 328       THE1D-int     ERVL-MaLR      LTR
#> 329          MLT2A1          ERVL      LTR
#> 330       HERVL-int          ERVL      LTR
#> 331         Kanga1c     TcMar-Tc2      DNA
#> 332           L1MC2            L1     LINE
#> 333          LTR85b         Gypsy      LTR
#>  [ reached 'max' / getOption("max.print") -- omitted 659 rows ]

# select family
rmskFilter(df, family="hAT-Charlie")
#>           gene_id   family_id class_id
#> 1           MER5A hAT-Charlie      DNA
#> 2           MER1A hAT-Charlie      DNA
#> 3           MER5B hAT-Charlie      DNA
#> 4         MER106B hAT-Charlie      DNA
#> 5           MER20 hAT-Charlie      DNA
#> 6      Charlie26a hAT-Charlie      DNA
#> 7          MER58C hAT-Charlie      DNA
#> 8           MER30 hAT-Charlie      DNA
#> 9       Charlie7a hAT-Charlie      DNA
#> 10          MER1B hAT-Charlie      DNA
#> 11         MER58B hAT-Charlie      DNA
#> 12        MER103C hAT-Charlie      DNA
#> 13         MER112 hAT-Charlie      DNA
#> 14      Charlie1b hAT-Charlie      DNA
#> 15         MER117 hAT-Charlie      DNA
#> 16      Charlie2b hAT-Charlie      DNA
#> 17      Charlie2a hAT-Charlie      DNA
#> 18         MER5A1 hAT-Charlie      DNA
#> 19          MER33 hAT-Charlie      DNA
#> 20       Charlie7 hAT-Charlie      DNA
#> 21         MER58A hAT-Charlie      DNA
#> 22     Charlie17a hAT-Charlie      DNA
#> 23     Charlie21a hAT-Charlie      DNA
#> 24     Charlie15a hAT-Charlie      DNA
#> 25      Charlie4z hAT-Charlie      DNA
#> 26        MER113A hAT-Charlie      DNA
#> 27           MER3 hAT-Charlie      DNA
#> 28     Charlie18a hAT-Charlie      DNA
#> 29       Charlie8 hAT-Charlie      DNA
#> 30     Charlie22a hAT-Charlie      DNA
#> 31          MER5C hAT-Charlie      DNA
#> 32        MER102c hAT-Charlie      DNA
#> 33      Charlie24 hAT-Charlie      DNA
#> 34         MER113 hAT-Charlie      DNA
#> 35      Charlie4a hAT-Charlie      DNA
#> 36     Charlie13b hAT-Charlie      DNA
#> 37        MER102b hAT-Charlie      DNA
#> 38       Charlie5 hAT-Charlie      DNA
#> 39      Charlie10 hAT-Charlie      DNA
#> 40     Charlie16a hAT-Charlie      DNA
#> 41         MER119 hAT-Charlie      DNA
#> 42     Charlie10a hAT-Charlie      DNA
#> 43     Charlie23a hAT-Charlie      DNA
#> 44     Charlie19a hAT-Charlie      DNA
#> 45       Charlie9 hAT-Charlie      DNA
#> 46     Charlie20a hAT-Charlie      DNA
#> 47         MER20B hAT-Charlie      DNA
#> 48       Charlie3 hAT-Charlie      DNA
#> 49      Charlie1a hAT-Charlie      DNA
#> 50        MER106A hAT-Charlie      DNA
#> 51      Charlie25 hAT-Charlie      DNA
#> 52         MER5C1 hAT-Charlie      DNA
#> 53     Charlie13a hAT-Charlie      DNA
#> 54        MER102a hAT-Charlie      DNA
#> 55       Charlie1 hAT-Charlie      DNA
#> 56         MER58D hAT-Charlie      DNA
#> 57       Cheshire hAT-Charlie      DNA
#> 58       Charlie4 hAT-Charlie      DNA
#> 59        MER113B hAT-Charlie      DNA
#> 60     Charlie14a hAT-Charlie      DNA
#> 61         MER30B hAT-Charlie      DNA
#> 62       Charlie6 hAT-Charlie      DNA
#> 63     Charlie10b hAT-Charlie      DNA
#> 64      Charlie11 hAT-Charlie      DNA
#> 65 Charlie1b_Mars hAT-Charlie      DNA
#> 66      CheshMITE hAT-Charlie      DNA
#> 67      Charlie12 hAT-Charlie      DNA
#> 68 Cheshire_Mars_ hAT-Charlie      DNA

# select class
df_subset <- rmskFilter(df, class= c("LINE", "SINE"))