4  R and Python

R and Python can interact together via the reticulate package.

The documentation for the reticulate package can be found here: https://rstudio.github.io/reticulate/

reticulate

Installation and configuration instructions are in the reticulate documentation.

Or we can create our environment following - R and Python – a happy union with reticulate webinar

install.packages("reticulate")

Note that the reticulate package requires Python >= 3.8 and NumPy >= 1.16.

4.1 Using Python with RMarkdown and RStudio

The R build-in dataset will be used later

library("reticulate")
# use_virtualenv("myenv")
data("mtcars")

options(
        dase_python_ready = reticulate::py_module_available("pandas") &&
                reticulate::py_module_available("seaborn") &&
                reticulate::py_module_available("matplotlib")
)

if (!isTRUE(getOption("dase_python_ready"))) {
        message("Python demo chunks requiring pandas/seaborn/matplotlib are skipped in this environment.")
}
print("Hello Python!")
datactrs = {
    'CHN': {'COUNTRY': 'China', 'POP': 1_398.72, 'AREA': 9_596.96,
            'GDP': 12_234.78, 'CONT': 'Asia'},
    'IND': {'COUNTRY': 'India', 'POP': 1_351.16, 'AREA': 3_287.26,
            'GDP': 2_575.67, 'CONT': 'Asia', 'IND_DAY': '1947-08-15'},
    'USA': {'COUNTRY': 'US', 'POP': 329.74, 'AREA': 9_833.52,
            'GDP': 19_485.39, 'CONT': 'N.America',
            'IND_DAY': '1776-07-04'},
    'IDN': {'COUNTRY': 'Indonesia', 'POP': 268.07, 'AREA': 1_910.93,
            'GDP': 1_015.54, 'CONT': 'Asia', 'IND_DAY': '1945-08-17'},
    'BRA': {'COUNTRY': 'Brazil', 'POP': 210.32, 'AREA': 8_515.77,
            'GDP': 2_055.51, 'CONT': 'S.America', 'IND_DAY': '1822-09-07'},
    'PAK': {'COUNTRY': 'Pakistan', 'POP': 205.71, 'AREA': 881.91,
            'GDP': 302.14, 'CONT': 'Asia', 'IND_DAY': '1947-08-14'},
    'NGA': {'COUNTRY': 'Nigeria', 'POP': 200.96, 'AREA': 923.77,
            'GDP': 375.77, 'CONT': 'Africa', 'IND_DAY': '1960-10-01'},
    'BGD': {'COUNTRY': 'Bangladesh', 'POP': 167.09, 'AREA': 147.57,
            'GDP': 245.63, 'CONT': 'Asia', 'IND_DAY': '1971-03-26'},
    'RUS': {'COUNTRY': 'Russia', 'POP': 146.79, 'AREA': 17_098.25,
            'GDP': 1_530.75, 'IND_DAY': '1992-06-12'},
    'MEX': {'COUNTRY': 'Mexico', 'POP': 126.58, 'AREA': 1_964.38,
            'GDP': 1_158.23, 'CONT': 'N.America', 'IND_DAY': '1810-09-16'},
    'JPN': {'COUNTRY': 'Japan', 'POP': 126.22, 'AREA': 377.97,
            'GDP': 4_872.42, 'CONT': 'Asia'},
    'DEU': {'COUNTRY': 'Germany', 'POP': 83.02, 'AREA': 357.11,
            'GDP': 3_693.20, 'CONT': 'Europe'},
    'FRA': {'COUNTRY': 'France', 'POP': 67.02, 'AREA': 640.68,
            'GDP': 2_582.49, 'CONT': 'Europe', 'IND_DAY': '1789-07-14'},
    'GBR': {'COUNTRY': 'UK', 'POP': 66.44, 'AREA': 242.50,
            'GDP': 2_631.23, 'CONT': 'Europe'},
    'ITA': {'COUNTRY': 'Italy', 'POP': 60.36, 'AREA': 301.34,
            'GDP': 1_943.84, 'CONT': 'Europe'},
    'ARG': {'COUNTRY': 'Argentina', 'POP': 44.94, 'AREA': 2_780.40,
            'GDP': 637.49, 'CONT': 'S.America', 'IND_DAY': '1816-07-09'},
    'DZA': {'COUNTRY': 'Algeria', 'POP': 43.38, 'AREA': 2_381.74,
            'GDP': 167.56, 'CONT': 'Africa', 'IND_DAY': '1962-07-05'},
    'CAN': {'COUNTRY': 'Canada', 'POP': 37.59, 'AREA': 9_984.67,
            'GDP': 1_647.12, 'CONT': 'N.America', 'IND_DAY': '1867-07-01'},
    'AUS': {'COUNTRY': 'Australia', 'POP': 25.47, 'AREA': 7_692.02,
            'GDP': 1_408.68, 'CONT': 'Oceania'},
    'KAZ': {'COUNTRY': 'Kazakhstan', 'POP': 18.53, 'AREA': 2_724.90,
            'GDP': 159.41, 'CONT': 'Asia', 'IND_DAY': '1991-12-16'}
}

columns = ('COUNTRY', 'POP', 'AREA', 'GDP', 'CONT', 'IND_DAY')
import pandas as pd
import seaborn as sns  #ubuntu #sudo apt-get install -y python3-seaborn 
import matplotlib.pyplot as plt

tips = sns.load_dataset("tips")
mylist = ["youtube", 'linkedin', '1littlecoder']
sns.scatterplot(x=tips['total_bill'], y = tips['tip'], hue=tips['day'])
plt.show()

fmri = sns.load_dataset("fmri")
f1 <- subset(py$fmri, region == "parietal")
import matplotlib as mpl
sns.lmplot("timepoint","signal", data=r.f1)
mpl.pyplot.show()

sns.lmplot("mpg", "cyl", data=r.mtcars)
mpl.pyplot.show()
import pandas as pd 
df = pd.DataFrame(data=datactrs, index=columns).T
df

df.to_csv('datasets/datasets/other/data_countries.csv')

We can read the dataset from python

df1 = pd.read_csv("datasets/other/data_countries.csv", index_col=0)

Use R to read and write data from a package

library("nycflights13")
write.csv(flights, "datasets/other/flights.csv")

Use python to read the dataset and process the data

import pandas
flights = pandas.read_csv("datasets/other/flights.csv")
flights = flights[flights['dest'] =="ORD"]
flights = flights[['carrier', 'dep_delay', 'arr_delay']]
flights = flights.dropna()
print(flights.head())

Use Python for plotting

import matplotlib.pyplot as plt
import numpy as np
t = np.arange(0.0, 2.0, 0.01)
s = 1 + np.sin(2*np.pi*t)
plt.plot(t,s)
plt.xlabel('time (s)')
plt.ylabel('voltage (mV)')
plt.grid(True)
plt.savefig("test.png")
plt.show()

Use R for plotting Python objects:

Note that the echo = FALSE parameter was added to the code chunk to prevent printing of the R code that generated the plot.

4.2 Further Information