Florents (Flo) Tselai
# Find all employees older than 30
result = []
for emp in employees:
if emp.age > 30:
result.append(emp)
Q: Where’s the data, where the execution happens ?
Where’s Pandas Today ?
Those who avoid using SQL are bound to reinvent it…. Poorly!
Standard Query Language
– You want standard?
– Yes!
– I’ll give you standard
– I want custom too!
– !@#$%&&
import
in SQL ?pdf
type for PostgreSQL pdf
-------------------------
PostgreSQL Introduction
Digoal.Zhou
7/20/2011Catalog
PostgreSQL Origin
pdf
type for PostgreSQLCombining extensions.
CREATE EXTENSION spat;
SELECT SPSET('key', 'value');
SELECT SPGET('key'); -- value
SELECT LPUSH('list1', 'elem1');
SELECT LPUSH('list1', 'elem2');
SELECT LPOP('list1'); -- elem2
SELECT SADD('set1', 'elem1', 'elem2');
SELECT SISMEMBER('set1', 'elem1'); -- t
SELECT HSET('h1', 'f1', 'Hello');
SELECT HGET('h1', 'f1'); -- Hello
Makefile
-based.py_string_join
in PL/PythonCREATE OR REPLACE FUNCTION moving_avg(
vals float8[], win int DEFAULT 3) RETURNS float8[]
AS $$
result = []
n = len(vals)
for i in range(n):
start = max(0, i - win + 1)
window_vals = vals[start:i+1]
avg = sum(window_vals) / len(window_vals)
result.append(avg)
return result
$$ LANGUAGE plpython3u IMMUTABLE COST 100 PARALLEL UNSAFE;
Both are serverless, in-process, file-based
import sqlite3
py_string_join
in SQLiteimport sqlite3
conn = sqlite3.connect(":memory:")
# or conn = sqlite3.connect("/path/to/db")
def py_string_join(elements, separator=", "):
if isinstance(elements, str):
elements = elements.split(",")
return separator.join(elements)
conn.create_function("py_string_join", 2, py_string_join)
cur = conn.cursor()
cur.execute("SELECT py_string_join('apple,banana,cherry', ' | ')")
print(cur.fetchone()[0]) # apple | banana | cherry
py_string_join
in DuckDBimport duckdb
import numpy as np # DuckDB UDFs rely on NumPy types internally
con = duckdb.connect()
def py_string_join(elements, separator=", "):
if isinstance(elements, str):
elements = elements.split(",")
return separator.join(elements)
con.create_function(
"py_string_join",
py_string_join,
return_type=duckdb.typing.VARCHAR
)
print(con.execute("SELECT py_string_join('apple,banana,cherry', ' | ')").fetchone()[0])
# → apple | banana | cherry
SELECT corr(x, y)
ξ
(xi) correlation coefficient) go beyond linearityThat’s written in C.
import sqlite3
import numpy as np
from scipy.stats import chatterjeexi
class ChatterjeeXiAgg:
def __init__(self):
self.x, self.y = [], []
def step(self, x, y):
if x is not None and y is not None:
self.x.append(x)
self.y.append(y)
def finalize(self):
if not self.x:
return None
return float(chatterjeexi(np.array(self.x),
np.array(self.y), method="asymptotic"))
con = sqlite3.connect(":memory:")
con.create_aggregate("chatterjee_xi", 2, ChatterjeeXiAgg)
cur = con.cursor()
cur.execute("CREATE TABLE t(x REAL, y REAL)")
cur.executemany("INSERT INTO t VALUES (?, ?)", [
(1, 2),
(2, 5),
(3, 7),
(4, 8),
(5, 10),
])
print(cur.execute("SELECT chatterjee_xi(x, y) FROM t").fetchone()[0])
Florents (Flo) Tselai