Skip to content
Snippets Groups Projects

handling mutliple compartments in side compound scan

Merged Clement Frainay requested to merge feature/mergedSideCompounds into develop
@@ -4,19 +4,22 @@ import fr.inrae.toulouse.metexplore.met4j_chemUtils.FormulaParser;
import fr.inrae.toulouse.metexplore.met4j_core.biodata.BioMetabolite;
import fr.inrae.toulouse.metexplore.met4j_core.biodata.BioNetwork;
import fr.inrae.toulouse.metexplore.met4j_graph.computation.connect.weighting.DefaultWeightPolicy;
import fr.inrae.toulouse.metexplore.met4j_graph.computation.transform.VertexContraction;
import fr.inrae.toulouse.metexplore.met4j_graph.core.WeightingPolicy;
import fr.inrae.toulouse.metexplore.met4j_graph.core.compound.CompoundGraph;
import fr.inrae.toulouse.metexplore.met4j_graph.io.Bionetwork2BioGraph;
import fr.inrae.toulouse.metexplore.met4j_io.jsbml.reader.JsbmlReader;
import fr.inrae.toulouse.metexplore.met4j_io.jsbml.reader.Met4jSbmlReaderException;
import fr.inrae.toulouse.metexplore.met4j_core.utils.StringUtils;
import fr.inrae.toulouse.metexplore.met4j_toolbox.generic.AbstractMet4jApplication;
import org.apache.commons.math3.stat.descriptive.DescriptiveStatistics;
import org.kohsuke.args4j.Option;
import java.io.FileWriter;
import java.io.IOException;
import java.util.regex.Pattern;
import java.util.HashMap;
import java.util.Map;
import java.util.function.Function;
import java.util.stream.Collectors;
/**
*
@@ -50,6 +53,11 @@ public class SideCompoundsScan extends AbstractMet4jApplication {
@Option(name = "-er", aliases = {"--edgeRedundancy"}, usage = "flag as side compound any compound with a number of redundancy in incident edges (parallel edges connecting to the same neighbor) above the given threshold")
public double parallelEdge = Double.NaN;
enum strategy {by_name,by_id}
@Option(name = "-m", aliases = {"--merge"}, usage = "Degree is shared between compounds in different compartments. " +
"Use names if consistent and unambiguous across compartments, or identifiers if compartment suffix is present (id in form \"xxx_y\" with xxx as base identifier and y as compartment label).")
public strategy mergingStrat = null;
public static void main(String[] args) throws IOException, Met4jSbmlReaderException {
@@ -86,12 +94,32 @@ public class SideCompoundsScan extends AbstractMet4jApplication {
//perform scan
//------------
System.err.println("Scaning...");
//if merging compartment
Map<String, Integer> mergedDegree = new HashMap<>();
Boolean merge = (mergingStrat!=null);
Function<BioMetabolite,String> getSharedId = BioMetabolite::getName;
if(merge){
if(mergingStrat.equals(strategy.by_id)) getSharedId = (new VertexContraction.MapByIdSubString("^(\\w+)_\\w$"))::commonField;
mergedDegree = graph.vertexSet().stream().collect(
Collectors.groupingBy(
getSharedId,
Collectors.summingInt(v -> graph.degreeOf(v))
)
);
}
//degree statistics
DescriptiveStatistics degreeStats = new DescriptiveStatistics();
double dt = degree;
if (!Double.isNaN(degreePrecentile)) {
for (BioMetabolite v : graph.vertexSet()) {
degreeStats.addValue(graph.degreeOf(v));
if (merge){
degreeStats.addValue(mergedDegree.get(getSharedId.apply(v)));
}else{
degreeStats.addValue(graph.degreeOf(v));
}
}
dt = degreeStats.getPercentile(degreePrecentile);
}
@@ -120,7 +148,7 @@ public class SideCompoundsScan extends AbstractMet4jApplication {
StringBuffer l = new StringBuffer(v.getId());
if (reportValue) l.append("\t" + v.getName());
int d = graph.degreeOf(v);
int d = merge ? mergedDegree.get(getSharedId.apply(v)) : graph.degreeOf(v);
boolean sideFromDegree = (d >= degree);
if (sideFromDegree) side = true;
if (reportValue) l.append("\t" + d);
@@ -137,7 +165,7 @@ public class SideCompoundsScan extends AbstractMet4jApplication {
if (flagInorganic || flagNoFormula) {
String formula = v.getChemicalFormula();
String inorganic = "?";
String validForumla = "true";
String validFormula = "true";
try{
FormulaParser fp = new FormulaParser(formula);
if(flagInorganic){
@@ -150,14 +178,14 @@ public class SideCompoundsScan extends AbstractMet4jApplication {
}
}catch(IllegalArgumentException e){
if(flagNoFormula){
validForumla = "false";
validFormula = "false";
side = true;
}
}
if (reportValue){
if(flagInorganic) l.append("\t" + inorganic);
if(flagNoFormula) l.append("\t" + validForumla);
if(flagNoFormula) l.append("\t" + validFormula);
}
}
@@ -185,7 +213,7 @@ public class SideCompoundsScan extends AbstractMet4jApplication {
public String getLongDescription() {
return this.getShortDescription() + "\n" +
"Side compounds are metabolites of small relevance for topological analysis. Their definition can be quite subjective and varies between sources.\n" +
"Side compounds tends to be ubiquitous and not specific to a particular biochemical or physiological process.\n" +
"Side compounds tend to be ubiquitous and not specific to a particular biochemical or physiological process." +
"Compounds usually considered as side compounds include water, atp or carbon dioxide. By being involved in many reactions and thus connected to many compounds, " +
"they tend to significantly lower the average shortest path distances beyond expected metabolic relatedness.\n" +
"This tool attempts to propose a list of side compounds according to specific criteria: \n" +
Loading