3D Genome三維基因組學NGS 軟體開發

【3D Genome】 應用電腦視覺於TAD拓撲結構域之預測創意發想

由於Hi-C 視覺化之後,可視為一個二維矩陣,類似於灰階影像。而拓撲結構域Topologically associating domains (TAD),被定義為相較其外部,它的內部有高度交互作用的一個方形區域。因此可以利用先將Hi-C矩陣轉換到影像空間之後,再利用常見的電腦視覺的技巧把TAD框出來,類似傳統找ROI的處理技巧來做到這件事。

方法設計

下圖是設計的流程,大致上按照傳統電腦視覺的邏輯去搭建

實驗討論

實作的效果如下圖

,最終call出 的TAD由綠色框線所標示

經過一些簡單的實驗之後知道,把HiC矩陣轉換成視覺圖的參數需要自動化,並且引入一個類似感知機的方程式,類似下圖

實作程式碼

HiCCVTAD.java


package com.whuang022.bio;

import org.bytedeco.javacpp.opencv_core.Mat;
import org.bytedeco.javacpp.opencv_core.MatVector;

/**
 * HiC TAD DTO Class
 * @author whuang022
 */
public class HiCCVTAD{
    public Mat TADImage;
    public MatVector  TADRects;
}

HiCCV.java


package com.whuang022.bio;

import org.bytedeco.javacpp.BytePointer;
import org.bytedeco.javacpp.opencv_core;
import static org.bytedeco.javacpp.opencv_core.IPL_DEPTH_8U;
import org.bytedeco.javacpp.opencv_core.IplImage;
import org.bytedeco.javacpp.opencv_core.Mat;
import org.bytedeco.javacpp.opencv_core.MatVector;
import org.bytedeco.javacpp.opencv_core.Scalar;
import static org.bytedeco.javacpp.opencv_core.cvCreateImage;
import static org.bytedeco.javacpp.opencv_core.cvGetSize;
import static org.bytedeco.javacpp.opencv_imgproc.COLOR_GRAY2BGR;
import static org.bytedeco.javacpp.opencv_imgproc.CV_CHAIN_APPROX_NONE;
import static org.bytedeco.javacpp.opencv_imgproc.CV_RETR_EXTERNAL;
import static org.bytedeco.javacpp.opencv_imgproc.CV_THRESH_BINARY;
import static org.bytedeco.javacpp.opencv_imgproc.CV_THRESH_OTSU;
import static org.bytedeco.javacpp.opencv_imgproc.boundingRect;
import static org.bytedeco.javacpp.opencv_imgproc.cvDilate;
import static org.bytedeco.javacpp.opencv_imgproc.cvErode;
import static org.bytedeco.javacpp.opencv_imgproc.cvtColor;
import static org.bytedeco.javacpp.opencv_imgproc.findContours;
import static org.bytedeco.javacpp.opencv_imgproc.rectangle;
import static org.bytedeco.javacpp.opencv_imgproc.threshold;

/**
 * HiC JavaCV Process Class
 * @author whuang022
 */



public class HiCCV {
    
    public static Mat HiCtoMat (HiC hic,int shiftVal){
        IplImage image = IplImage.create(hic.getRowSize(),hic.getColSize(), IPL_DEPTH_8U, 1);
        Mat mImage=new Mat(image);
        for(int i=1;i<hic.getRowSize();i++){
            for(int j=1;j<hic.getColSize();j++){
                BytePointer bytePointer = mImage.ptr(i,j,0);
                double val=hic.get(i,j);
                double color= ((val - hic.getMin()) * (1/( hic.getMax() -  hic.getMin()) * 255)); //linear mapping HiC to  Gay Scale Image (0-255)
                bytePointer.putInt((int)color+shiftVal);
            }
        }
        return mImage;
    }
    
    public static HiCCVTAD HiCTADCallingMorphology (HiC hic,int shiftVal,int min){
        
    Mat HiCMat;
    Mat otsu = new Mat(); 
    IplImage otsuIp ;
    IplImage colseIp ;  
    Mat HicMatOut = new Mat(); 
    HiCMat=HiCCV.HiCtoMat(hic,shiftVal);
    MatVector contours = new MatVector();
    MatVector outcontours = new MatVector();
    threshold(HiCMat, otsu, 10, 255, CV_THRESH_OTSU + CV_THRESH_BINARY); // OTSU to Mark Acctivate Zone
    otsuIp = new IplImage(otsu);
    colseIp= cvCreateImage(cvGetSize(otsuIp), IPL_DEPTH_8U, 1); 
    //Opening Operation
    cvErode(otsuIp, colseIp, null, 1);
    cvDilate(colseIp, colseIp, null, 3);    
    cvDilate(colseIp, colseIp, null, 1);
    Mat colse=new Mat(colseIp);
    findContours(colse, contours,CV_RETR_EXTERNAL, CV_CHAIN_APPROX_NONE);
    cvtColor(HiCMat,HicMatOut, COLOR_GRAY2BGR);
    for(int i=0;i<contours.size();i++){
        opencv_core.Rect mr = boundingRect(contours.get(i));
        if(mr.area()>min){
            rectangle(HicMatOut, mr, new Scalar(0, 255, 0, 1));
            outcontours.put(mr);
        }
    }
    HiCCVTAD out =new HiCCVTAD();
    out.TADImage=HicMatOut;
    out.TADRects=outcontours;
    return out;
    }
}

HiC.java


package com.whuang022.bio;

import java.io.BufferedReader;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.IOException;
import java.util.ArrayList;


/**
 * HiC Data Reader Class
 * @author whuang022
 */
public class HiC {
    private ArrayList<ArrayList<Double>> data = new ArrayList<ArrayList<Double>>();
    private double maxVal;
    private double minVal;
    public int getRowSize(){
        return data.size();
    }
    public int getColSize(){
        return data.get(0).size();
    }
    public double get(int row,int col){
        return data.get(row).get(col);
    }
    public double getMax(){
        return maxVal;
    }
    public double getMin(){
        return minVal;
    }
    public HiC(String filePath) throws FileNotFoundException, IOException{
        BufferedReader reader = new BufferedReader(new FileReader(filePath));
        maxVal=Double.MIN_VALUE;
        minVal=Double.MAX_VALUE;
        String line ;
        while((line=reader.readLine())!=null){
            String[] rowData = line.split("	");
            ArrayList<Double> row=new ArrayList<>();
            for (String rowData1 : rowData) {
                double val=Double.parseDouble(rowData1);
                row.add(val);
                if(val>maxVal){
                    maxVal=val;
                }
                if(val<minVal){
                    minVal=val;
                }
            }
            data.add(row);
       }
    }
    
}

實驗結果

最終的結果如下,已經可以抓出大概的TAD區域

以上就是一個TAD calling的演算法發想,未來可以再加強它的效果,並與主流的TAD calling做比較

發佈留言

發佈留言必須填寫的電子郵件地址不會公開。 必填欄位標示為 *

zh_TWChinese