@@ -432,6 +432,22 @@ def _(data_poi, extract_object_count):
432
432
return (import_df ,)
433
433
434
434
435
+ @app .cell
436
+ def _ (import_df ):
437
+ import_df_counts = import_df ['object' ].value_counts ().reset_index ()
438
+ import_df_counts .columns = ['object' , 'count' ]
439
+ import_df_counts
440
+ return (import_df_counts ,)
441
+
442
+
443
+ @app .cell
444
+ def _ (import_df ):
445
+ import_df_weighted = import_df .groupby ('object' , as_index = False )['n' ].sum ().sort_values ('n' , ascending = False )
446
+ import_df_weighted .columns = ['object' , 'weighted_count' ]
447
+ import_df_weighted
448
+ return (import_df_weighted ,)
449
+
450
+
435
451
@app .cell (hide_code = True )
436
452
def _ (
437
453
config ,
@@ -441,8 +457,10 @@ def _(
441
457
plot_usage ,
442
458
radio_color ,
443
459
radio_color_element ,
460
+ switch ,
461
+ switch_weigthed_element ,
444
462
):
445
- subpackage_fig = plot_usage (import_df , color = radio_color .value )
463
+ subpackage_fig = plot_usage (import_df , color = radio_color .value , weighted = switch . value )
446
464
subpackage_fig .show ()
447
465
448
466
mo .vstack (
@@ -455,6 +473,8 @@ def _(
455
473
mo .hstack (
456
474
[
457
475
radio_color_element ,
476
+ switch_weigthed_element ,
477
+ mo .vstack ([])
458
478
],
459
479
align = "center" ,
460
480
),
@@ -470,16 +490,41 @@ def _(mo):
470
490
471
491
472
492
@app .cell
473
- def _ (data_poi , extract_object_count ):
493
+ def _ (config , data_poi , extract_object_count ):
474
494
function_df = extract_object_count (data_poi [data_poi ["use_imports" ]], col = "function_counts" )
475
- function_df .to_csv ("functions_used.csv" , index = False )
495
+ function_df .to_csv (config [ "OUTPUT" ][ "DIR" ] / config [ "PACKAGE_OF_INTEREST" ] / "functions_used.csv" , index = False )
476
496
function_df
477
497
return (function_df ,)
478
498
479
499
500
+ @app .cell
501
+ def _ (function_df ):
502
+ function_df_counts = function_df ['object' ].value_counts ().reset_index ()
503
+ function_df_counts .columns = ['object' , 'count' ]
504
+ function_df_counts
505
+ return (function_df_counts ,)
506
+
507
+
508
+ @app .cell
509
+ def _ (function_df ):
510
+ function_df_weighted = function_df .groupby ('object' , as_index = False )['n' ].sum ().sort_values ('n' , ascending = False )
511
+ function_df_weighted .columns = ['object' , 'weighted_count' ]
512
+ function_df_weighted
513
+ return (function_df_weighted ,)
514
+
515
+
480
516
@app .cell (hide_code = True )
481
- def _ (config , function_df , mo , plot_usage , radio_color , radio_color_element ):
482
- function_fig = plot_usage (function_df , color = radio_color .value )
517
+ def _ (
518
+ config ,
519
+ function_df ,
520
+ mo ,
521
+ plot_usage ,
522
+ radio_color ,
523
+ radio_color_element ,
524
+ switch ,
525
+ switch_weigthed_element ,
526
+ ):
527
+ function_fig = plot_usage (function_df , color = radio_color .value , weighted = switch .value )
483
528
function_fig .show ()
484
529
mo .vstack (
485
530
[
@@ -490,6 +535,8 @@ def _(config, function_df, mo, plot_usage, radio_color, radio_color_element):
490
535
mo .hstack (
491
536
[
492
537
radio_color_element ,
538
+ switch_weigthed_element ,
539
+ mo .vstack ([])
493
540
],
494
541
align = "center" ,
495
542
),
@@ -536,18 +583,26 @@ def _(mo):
536
583
return
537
584
538
585
539
- @app .cell ( hide_code = True )
540
- def _ (Version , mcolors , plt , px ):
541
- def plot_usage (df , color = None ):
586
+ @app .cell
587
+ def plot_usage (Version , mcolors , plt , px ):
588
+ def plot_usage (df , color = None , weighted = False ):
542
589
"""Plot how frequently subpackage, classes, functions are used."""
543
590
col = "object"
591
+ y = None
592
+
593
+ if weighted :
594
+ group_by = ["name" , col , color ] if color is not None else ["name" , col ]
595
+ df = (
596
+ df .groupby (group_by , as_index = False )['n' ].sum ()
597
+ )
598
+ df = df .sort_values ('n' , ascending = False )
599
+ y = "n"
544
600
545
601
color_map = None
546
602
if color :
547
603
df = df .dropna (subset = [color ])
548
604
549
605
# Sort version labels naturally
550
- category_orders = {color : sorted (df [color ].unique ())}
551
606
if color == "extracted_version" :
552
607
ordered_versions = sorted (df [color ].unique (), key = Version )
553
608
@@ -557,8 +612,9 @@ def plot_usage(df, color=None):
557
612
558
613
# Aggregate and sort modules by total count
559
614
order = df .groupby (col )["n" ].count ().sort_values (ascending = False ).index .tolist ()
615
+ if weighted :
616
+ order = df .groupby (col )["n" ].sum ().sort_values (ascending = False ).index .tolist ()
560
617
category_orders = {col : order }
561
-
562
618
if color :
563
619
category_orders [color ] = (
564
620
ordered_versions if color == "extracted_version" else sorted (df [color ].unique ())
@@ -567,6 +623,7 @@ def plot_usage(df, color=None):
567
623
fig = px .histogram (
568
624
df ,
569
625
x = col ,
626
+ y = y ,
570
627
title = f"Analysis of { len (df ['name' ].unique ())} repositories" ,
571
628
category_orders = category_orders ,
572
629
color_discrete_sequence = color_map ,
@@ -1068,6 +1125,18 @@ def _(mo, radio_include):
1068
1125
return (radio_include_element ,)
1069
1126
1070
1127
1128
+ @app .cell
1129
+ def _ (mo ):
1130
+ switch = mo .ui .switch ()
1131
+ return (switch ,)
1132
+
1133
+
1134
+ @app .cell
1135
+ def _ (mo , switch ):
1136
+ switch_weigthed_element = mo .vstack ([mo .md ("Weighted" ), switch ])
1137
+ return (switch_weigthed_element ,)
1138
+
1139
+
1071
1140
@app .cell
1072
1141
def _ (radio_color ):
1073
1142
explanation_version_0 = ""
0 commit comments