Issue
I have a script that I run in IPython, and basically takes an input .csv file of gene_names
and pushes them into this for loop, where
with open('C:\Users\Work\Desktop\Book1.csv', 'rU') as f:
reader = csv.reader(f)
with PdfPages('poopyheadjoe04.pdf') as pdf:
for row in reader:
gene_name = row
probe_exclusion_keyword = []
print gene_name
The gene_name values from this list(in the .csv file) is then fed into a line, if inference_method == "approximate_random"
: (in Scripts.py)
with open('C:\Users\Work\Desktop\Book1.csv', 'rU') as f:
reader = csv.reader(f)
with PdfPages('poopyheadjoe04.pdf') as pdf:
for row in reader:
gene_name = row
probe_exclusion_keyword = []
print gene_name
print "Fetching probe ids for gene %s" % gene_name
probes_dict = get_probes_from_genes(gene_name)
print "Found %s probes: %s" % (len(probes_dict), ", ".join(probes_dict.values()))
if probe_exclusion_keyword:
probes_dict = {probe_id: probe_name for (probe_id, probe_name) in probes_dict.iteritems() if not args.probe_exclusion_keyword in probe_name}
print "Probes after applying exclusion cryterion: %s" % (", ".join(probes_dict.values()))
print "Fetching expression values for probes %s" % (", ".join(probes_dict.values()))
expression_values, well_ids, donor_names = get_expression_values_from_probe_ids(
probes_dict.keys())
print "Found data from %s wells sampled across %s donors" % (len(well_ids), len(set(donor_names)))
print "Combining information from selected probes"
combined_expression_values = combine_expression_values(
expression_values, method=probes_reduction_method)
print "Translating locations of the wells to MNI space"
mni_coordinates = get_mni_coordinates_from_wells(well_ids)
print "Checking values of the provided NIFTI file at well locations"
nifti_values = get_values_at_locations(
stat_map, mni_coordinates, mask_file=mask, radius=radius, verbose=True)
# preparing the data frame
names = ["NIFTI values", "%s expression" % gene_name, "donor ID"]
data = pd.DataFrame(np.array(
[nifti_values, combined_expression_values, donor_names]).T, columns=names)
data = data.convert_objects(convert_numeric=True)
len_before = len(data)
data.dropna(axis=0, inplace=True)
nans = len_before - len(data)
if nans > 0:
print "%s wells fall outside of the mask" % nans
if inference_method == "fixed":
print "Performing fixed effect analysis"
fixed_effects(data, ["NIFTI values", "%s expression" % gene_name])
**if inference_method == "approximate_random":**
print "Performing approximate random effect analysis"
approximate_random_effects(
data, ["NIFTI values", "%s expression" % gene_name], "donor ID")
print "poopy"
pdf.savefig()
plt.ion() #should i add ion() here?
if inference_method == "bayesian_random":
print "Fitting Bayesian hierarchical model"
bayesian_random_effects(
data, ["NIFTI values", "%s expression" % gene_name], "donor ID", n_samples, n_burnin)
# if __name__ == '__main__': #What exactly does this do? Start trigger for the script to run?
# main()
that triggers approximate_random_effects
(in Analysis.py) to plot two graphs, the violinplot and the lmplot:
def approximate_random_effects(data, labels, group):
correlation_per_donor = {}
for donor_id in set(data[group]):
correlation_per_donor[donor_id], _, _, _, _ = linregress(list(data[labels[0]][data[group] == donor_id]),
list(data[labels[1]][data[group] == donor_id]))
average_slope = np.array(correlation_per_donor.values()).mean()
t, p_val = ttest_1samp(correlation_per_donor.values(), 0)
print "Averaged slope across donors = %g (t=%g, p=%g)"%(average_slope, t, p_val)
sns.violinplot([correlation_per_donor.values()], inner="points", names=["donors"])
plt.ylabel("Linear regression slopes between %s and %s"%(labels[0],labels[1]))
plt.axhline(0, color="red")
sns.lmplot(labels[0], labels[1], data, hue=group, col=group, col_wrap=3)
plt.ion()
return average_slope, t, p_val
I'm trying to save both graphs for all the gene_names into a pdf file, by roughly following "Saving multiple figures to one pdf file in matplotlib" and the matplotlib.PdfPages
approach.
However, in the pdf file, I am only getting the lmplot for all my gene_names and NOT the violin plot. What do I do to fix this?
Thanks! Help will be much appreciated!
Solution
It looks like your code is creating two figures, one for each plots, but you only call pdf.savefig()
once after the second figure is created, therefore only saving the second figure.
If you want one figure per page in your pdf, you need to call pdf.savefig()
twice: once after creating each plot.
I would recommend that you change the structure of your program a bit, so you can save the pdf after each plot:
def approximate_random_effects(data, labels, group):
correlation_per_donor = {}
for donor_id in set(data[group]):
correlation_per_donor[donor_id], _, _, _, _ = linregress(list(data[labels[0]][data[group] == donor_id]),
list(data[labels[1]][data[group] == donor_id]))
average_slope = np.array(correlation_per_donor.values()).mean()
t, p_val = ttest_1samp(correlation_per_donor.values(), 0)
print "Averaged slope across donors = %g (t=%g, p=%g)"%(average_slope, t, p_val)
with PdfPages('poopyheadjoe04.pdf') as pdf:
fig = plt.figure()
sns.violinplot([correlation_per_donor.values()], inner="points", names=["donors"])
plt.ylabel("Linear regression slopes between %s and %s"%(labels[0],labels[1]))
plt.axhline(0, color="red")
pdf.savefig(fig) ## Saving first figure
fig = plt.figure()
sns.lmplot(labels[0], labels[1], data, hue=group, col=group, col_wrap=3)
pdf.savefig(fig) ## Saving second figure
return average_slope, t, p_val
You then need to delete in your main program the lines with PdfPages('poopyheadjoe04.pdf') as pdf:
, pdf.savefig()
and plt.ion()
.
If you need the two plots on the same pdf page, you need change the violinplot
and lmplot
in such a way that they use different axes on the same figure.
Answered By - Julien Spronck
0 comments:
Post a Comment
Note: Only a member of this blog may post a comment.