#!/usr/bin/env RScript #' 1. This script will generate the FAM for fastFAM using provided pedigree information #' (like the ukb_inferredRelationship.txt by UKB cohort). #' 2. Two input files are needed: #' a. A .fam file from the PLINK format genotype. It contains the individuals' FIDs and IIDs and some other information. #' see (https://www.cog-genomics.org/plink/1.9/formats#fam) for details. #' b. A inferredRelationship file. We will use the UKB provided one. #' It should contains: "FID", "IID", and a "relationship" columns. The "relationship" column should contain #' the following elements (these are used by UK Biobank to indicate the inferred relationship between #' two individuals, see https://www.biorxiv.org/content/early/2017/07/20/166298) : #' "MZtwin", #' "parentOffspring", #' "fullSib", #' "second", #' "third" #' #' Author: Longda Jiang, longda.jiang@uq.edu.au #' Date: June 28, 2018 #' Version: 1.0.0 args <- commandArgs(trailingOnly = TRUE) if(length(args) != 3){ message("Input: path of fam file") message("Input: path of pedigree information") message("Output: path of output without extension") stop("The FAM from pedigree needs 3 parameters") } path_to_fam <- as.character(args[1]) path_to_rel <- as.character(args[2]) output_file <- as.character(args[3]) message("Transform the pedigree information...") message("From: ", path_to_rel) message("To: ", output_file) message("Align order: ", path_to_fam) if(!file.exists(path_to_fam)){ stop("FAM file didn't exist") } if(!file.exists(path_to_rel)){ stop("Relationship file didn't exist") } ############### # Load the input data ############### # the .fam file fam_file <- read.table(file=path_to_fam, stringsAsFactors = F, head = F ) fam_file <- fam_file[, 1:2] names(fam_file) <- c("FID", "IID") fam_file$order <- 1:nrow(fam_file) rel <- read.table(file=path_to_rel, head=F, stringsAsFactors = F) # the rel file we only need the 1, 2, and last columns: rel <- rel[, c(1, 2, ncol(rel))] names(rel) <- c("IID_1", "IID_2", "relationship") # check relation labels relation_labels = unique(rel$relationship) setting_labels = c("MZtwin", "parentOffspring", "fullSib", "second", "third") if(length(setdiff(relation_labels, setting_labels)) != 0){ stop("FAM fro pedigree can only handle 5 relationship labels: MZtwin, parentOffspring, fullSib, second, third") } rel2 <- rel[,c(2,1,3)] names(rel2) <- c("IID_1", "IID_2", "relationship") rel <- rbind(rel, rel2) ############### # format the data ############### rel <- merge(rel, fam_file[,c("IID", "order")], by.x="IID_1", by.y="IID") rel <- merge(rel, fam_file[,c("IID", "order")], by.x="IID_2", by.y="IID") rel <- rel[, c("IID_1", "IID_2", "relationship", "order.x", "order.y")] # The order of IID_1 is important: # The order of IID_1 should always be larger than the order of IID_2. # (as the sparse FAM is saved as a lower triangular matrix) rel <- rel[order(rel$order.x, rel$order.y), ] sum(rel$order.x >= rel$order.y) rel <- rel[rel$order.x > rel$order.y, ] ############### # generate the FAM ############### rel$coef <- 0 rel[rel$relationship == "MZtwin", "coef"] <- 1 rel[rel$relationship == "parentOffspring", "coef"] <- 0.5 rel[rel$relationship == "fullSib", "coef"] <- 0.5 rel[rel$relationship == "second", "coef"] <- 0.25 rel[rel$relationship == "third", "coef"] <- 0.125 # the diagonal elements of FAM is simply 1 rel_part2 <- fam_file[,c("IID", "IID")] rel_part2$order.x <- 1:nrow(rel_part2) rel_part2$order.y <- 1:nrow(rel_part2) names(rel_part2) <- c("IID_1", "IID_2", "order.x", "order.y") rel_part2$coef <- 1 #### FAM_sp <- rbind(rel[, c("IID_1", "IID_2", "order.x", "order.y", "coef")], rel_part2) FAM_sp <- FAM_sp[order(FAM_sp$order.x, FAM_sp$order.y), ] ### the actual order starts from 0. need to minus 1. FAM_sp$order.x <- FAM_sp$order.x - 1 FAM_sp$order.y <- FAM_sp$order.y - 1 write.table(fam_file, file=paste0(output_file, ".grm.id"), quote=F, row.names = F, col.names = F) write.table(FAM_sp[,c("order.x", "order.y", "coef")], file=paste0(output_file, ".grm.sp"), quote=F, row.names = F, col.names = F) message("Sparse FAM generated: ", output_file, ".") #### Done