Skip to content

Commit b6573c1

Browse files
committed
fix
1 parent d36770e commit b6573c1

File tree

3 files changed

+9222
-10
lines changed

3 files changed

+9222
-10
lines changed

poia/most_used_private_functions.py

Lines changed: 111 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,111 @@
1+
# /// script
2+
# requires-python = ">=3.12"
3+
# dependencies = [
4+
# "marimo",
5+
# "matplotlib==3.10.1",
6+
# "pandas==2.2.3",
7+
# "plotly==6.0.1",
8+
# ]
9+
# ///
10+
11+
import marimo
12+
13+
__generated_with = "0.12.8"
14+
app = marimo.App(width="full")
15+
16+
17+
@app.cell
18+
def _(mo):
19+
mo.md(r"""Aims to list all functions of nilearn that are not in its user facing public API, but""")
20+
return
21+
22+
23+
@app.cell
24+
def _():
25+
import marimo as mo
26+
import pandas as pd
27+
import nilearn
28+
import importlib
29+
import inspect
30+
return importlib, inspect, mo, nilearn, pd
31+
32+
33+
@app.cell
34+
def _(nilearn):
35+
nilearn.__version__
36+
return
37+
38+
39+
@app.cell
40+
def _(importlib, inspect, mo, nilearn):
41+
public_api = ["nilearn"]
42+
for subpackage in nilearn.__all__:
43+
public_api.append(subpackage)
44+
if subpackage.startswith("_"):
45+
continue
46+
mod = importlib.import_module(f"nilearn.{subpackage}")
47+
public_api.extend(mod.__all__)
48+
for x in mod.__all__:
49+
if inspect.ismodule(mod.__dict__[x]):
50+
submod = importlib.import_module(f"nilearn.{subpackage}.{x}")
51+
if hasattr(submod, '__all__'):
52+
public_api.extend(submod.__all__)
53+
mo.md("List all modules, classes, functions that are part of nilearn API.")
54+
return mod, public_api, submod, subpackage, x
55+
56+
57+
@app.cell
58+
def _(mo, pd):
59+
df = pd.read_csv(mo.notebook_location() / 'public'/ 'nilearn' / "functions_used.csv")
60+
return (df,)
61+
62+
63+
@app.cell
64+
def _(df, public_api):
65+
mask = ~df["object"].isin(public_api)
66+
return (mask,)
67+
68+
69+
@app.cell
70+
def _(df, mask):
71+
df[mask]
72+
return
73+
74+
75+
@app.cell
76+
def _():
77+
from poia import plot_usage
78+
return (plot_usage,)
79+
80+
81+
@app.cell
82+
def _(plot_usage):
83+
_, defs = plot_usage.run()
84+
return (defs,)
85+
86+
87+
@app.cell
88+
def _(df, mask):
89+
df_counts = df[mask]['object'].value_counts().reset_index()
90+
df_counts.columns = ['object', 'count']
91+
df_counts
92+
return (df_counts,)
93+
94+
95+
@app.cell
96+
def _(df, mask):
97+
df_weighted = df[mask].groupby('object', as_index=False)['n'].sum().sort_values('n', ascending=False)
98+
df_weighted.columns = ['object', 'weighted_count']
99+
df_weighted
100+
return (df_weighted,)
101+
102+
103+
@app.cell
104+
def _(defs, df, mask):
105+
fig = defs["plot_usage"](df[mask], color="extracted_version")
106+
fig.show()
107+
return (fig,)
108+
109+
110+
if __name__ == "__main__":
111+
app.run()

poia/poia.py

Lines changed: 79 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -432,6 +432,22 @@ def _(data_poi, extract_object_count):
432432
return (import_df,)
433433

434434

435+
@app.cell
436+
def _(import_df):
437+
import_df_counts = import_df['object'].value_counts().reset_index()
438+
import_df_counts.columns = ['object', 'count']
439+
import_df_counts
440+
return (import_df_counts,)
441+
442+
443+
@app.cell
444+
def _(import_df):
445+
import_df_weighted = import_df.groupby('object', as_index=False)['n'].sum().sort_values('n', ascending=False)
446+
import_df_weighted.columns = ['object', 'weighted_count']
447+
import_df_weighted
448+
return (import_df_weighted,)
449+
450+
435451
@app.cell(hide_code=True)
436452
def _(
437453
config,
@@ -441,8 +457,10 @@ def _(
441457
plot_usage,
442458
radio_color,
443459
radio_color_element,
460+
switch,
461+
switch_weigthed_element,
444462
):
445-
subpackage_fig = plot_usage(import_df, color=radio_color.value)
463+
subpackage_fig = plot_usage(import_df, color=radio_color.value, weighted=switch.value)
446464
subpackage_fig.show()
447465

448466
mo.vstack(
@@ -455,6 +473,8 @@ def _(
455473
mo.hstack(
456474
[
457475
radio_color_element,
476+
switch_weigthed_element,
477+
mo.vstack([])
458478
],
459479
align="center",
460480
),
@@ -470,16 +490,41 @@ def _(mo):
470490

471491

472492
@app.cell
473-
def _(data_poi, extract_object_count):
493+
def _(config, data_poi, extract_object_count):
474494
function_df = extract_object_count(data_poi[data_poi["use_imports"]], col="function_counts")
475-
function_df.to_csv("functions_used.csv", index=False)
495+
function_df.to_csv(config["OUTPUT"]["DIR"] / config["PACKAGE_OF_INTEREST"] / "functions_used.csv", index=False)
476496
function_df
477497
return (function_df,)
478498

479499

500+
@app.cell
501+
def _(function_df):
502+
function_df_counts = function_df['object'].value_counts().reset_index()
503+
function_df_counts.columns = ['object', 'count']
504+
function_df_counts
505+
return (function_df_counts,)
506+
507+
508+
@app.cell
509+
def _(function_df):
510+
function_df_weighted = function_df.groupby('object', as_index=False)['n'].sum().sort_values('n', ascending=False)
511+
function_df_weighted.columns = ['object', 'weighted_count']
512+
function_df_weighted
513+
return (function_df_weighted,)
514+
515+
480516
@app.cell(hide_code=True)
481-
def _(config, function_df, mo, plot_usage, radio_color, radio_color_element):
482-
function_fig = plot_usage(function_df, color=radio_color.value)
517+
def _(
518+
config,
519+
function_df,
520+
mo,
521+
plot_usage,
522+
radio_color,
523+
radio_color_element,
524+
switch,
525+
switch_weigthed_element,
526+
):
527+
function_fig = plot_usage(function_df, color=radio_color.value, weighted=switch.value)
483528
function_fig.show()
484529
mo.vstack(
485530
[
@@ -490,6 +535,8 @@ def _(config, function_df, mo, plot_usage, radio_color, radio_color_element):
490535
mo.hstack(
491536
[
492537
radio_color_element,
538+
switch_weigthed_element,
539+
mo.vstack([])
493540
],
494541
align="center",
495542
),
@@ -536,18 +583,26 @@ def _(mo):
536583
return
537584

538585

539-
@app.cell(hide_code=True)
540-
def _(Version, mcolors, plt, px):
541-
def plot_usage(df, color=None):
586+
@app.cell
587+
def plot_usage(Version, mcolors, plt, px):
588+
def plot_usage(df, color=None, weighted=False):
542589
"""Plot how frequently subpackage, classes, functions are used."""
543590
col = "object"
591+
y = None
592+
593+
if weighted:
594+
group_by = ["name", col, color] if color is not None else ["name", col]
595+
df = (
596+
df.groupby(group_by, as_index=False)['n'].sum()
597+
)
598+
df = df.sort_values('n', ascending=False)
599+
y = "n"
544600

545601
color_map = None
546602
if color:
547603
df = df.dropna(subset=[color])
548604

549605
# Sort version labels naturally
550-
category_orders = {color: sorted(df[color].unique())}
551606
if color == "extracted_version":
552607
ordered_versions = sorted(df[color].unique(), key=Version)
553608

@@ -557,8 +612,9 @@ def plot_usage(df, color=None):
557612

558613
# Aggregate and sort modules by total count
559614
order = df.groupby(col)["n"].count().sort_values(ascending=False).index.tolist()
615+
if weighted:
616+
order = df.groupby(col)["n"].sum().sort_values(ascending=False).index.tolist()
560617
category_orders = {col: order}
561-
562618
if color:
563619
category_orders[color] = (
564620
ordered_versions if color == "extracted_version" else sorted(df[color].unique())
@@ -567,6 +623,7 @@ def plot_usage(df, color=None):
567623
fig = px.histogram(
568624
df,
569625
x=col,
626+
y=y,
570627
title=f"Analysis of {len(df['name'].unique())} repositories",
571628
category_orders=category_orders,
572629
color_discrete_sequence=color_map,
@@ -1068,6 +1125,18 @@ def _(mo, radio_include):
10681125
return (radio_include_element,)
10691126

10701127

1128+
@app.cell
1129+
def _(mo):
1130+
switch = mo.ui.switch()
1131+
return (switch,)
1132+
1133+
1134+
@app.cell
1135+
def _(mo, switch):
1136+
switch_weigthed_element = mo.vstack([mo.md("Weighted"), switch])
1137+
return (switch_weigthed_element,)
1138+
1139+
10711140
@app.cell
10721141
def _(radio_color):
10731142
explanation_version_0 = ""

0 commit comments

Comments
 (0)