Skip to content

Commit fb29e56

Browse files
authored
chore: add script to compute code samples coverage (#428)
* chore: add script to compute code samples coverage * include Index, Session and ml.ensemble
1 parent cba21ba commit fb29e56

File tree

1 file changed

+147
-0
lines changed

1 file changed

+147
-0
lines changed

scripts/get_code_sample_coverage.py

+147
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,147 @@
1+
# Copyright 2023 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
import argparse
16+
import importlib
17+
import inspect
18+
import sys
19+
from typing import Dict, List
20+
21+
import bigframes
22+
import bigframes.pandas as bpd
23+
24+
PRESENT = "present"
25+
NOT_PRESENT = "not_present"
26+
27+
CLASSES = [
28+
bpd.DataFrame,
29+
bpd.Series,
30+
bpd.Index,
31+
bigframes.session.Session,
32+
bigframes.operations.strings.StringMethods,
33+
bigframes.operations.datetimes.DatetimeMethods,
34+
bigframes.operations.structs.StructAccessor,
35+
]
36+
37+
ML_MODULE_NAMES = [
38+
"cluster",
39+
"compose",
40+
"decomposition",
41+
"ensemble",
42+
"linear_model",
43+
"metrics",
44+
"model_selection",
45+
"pipeline",
46+
"preprocessing",
47+
"llm",
48+
"forecasting",
49+
"imported",
50+
"remote",
51+
]
52+
53+
for module_name in ML_MODULE_NAMES:
54+
module = importlib.import_module(f"bigframes.ml.{module_name}")
55+
classes_ = [
56+
class_ for _, class_ in inspect.getmembers(module, predicate=inspect.isclass)
57+
]
58+
CLASSES.extend(classes_)
59+
60+
61+
def get_code_samples_summary() -> Dict[str, Dict[str, List[str]]]:
62+
"""Get Summary of the code samples coverage in BigFrames APIs.
63+
64+
Returns:
65+
Summary: A dictionary of the format
66+
{
67+
class_1: {
68+
"present": [method1, method2, ...],
69+
"not_present": [method3, method4, ...]
70+
},
71+
class_2: {
72+
...
73+
}
74+
}
75+
"""
76+
summary: Dict[str, Dict[str, List[str]]] = dict()
77+
78+
for class_ in CLASSES:
79+
class_key = f"{class_.__module__}.{class_.__name__}"
80+
summary[class_key] = {PRESENT: [], NOT_PRESENT: []}
81+
82+
members = inspect.getmembers(class_)
83+
84+
for name, obj in members:
85+
# ignore private methods
86+
if name.startswith("_") and not name.startswith("__"):
87+
continue
88+
89+
def predicate(impl):
90+
return (
91+
# This includes class methods like `from_dict`, `from_records`
92+
inspect.ismethod(impl)
93+
# This includes instance methods like `dropna`, join`
94+
or inspect.isfunction(impl)
95+
# This includes properties like `shape`, `values` but not
96+
# generic properties like `__weakref__`
97+
or (inspect.isdatadescriptor(impl) and not name.startswith("__"))
98+
)
99+
100+
if not predicate(obj):
101+
continue
102+
103+
# At this point we have a property or a public method
104+
impl = getattr(class_, name)
105+
106+
docstr = inspect.getdoc(impl)
107+
code_samples_present = docstr and "**Examples:**" in docstr
108+
key = PRESENT if code_samples_present else NOT_PRESENT
109+
summary[class_key][key].append(name)
110+
111+
return summary
112+
113+
114+
if __name__ == "__main__":
115+
parser = argparse.ArgumentParser(
116+
description="Get a summary of code samples coverage in BigFrames APIs."
117+
)
118+
parser.add_argument(
119+
"-d",
120+
"--details",
121+
type=bool,
122+
action=argparse.BooleanOptionalAction,
123+
default=False,
124+
help="Whether to print APIs with and without code samples.",
125+
)
126+
127+
args = parser.parse_args(sys.argv[1:])
128+
129+
summary = get_code_samples_summary()
130+
131+
total_with_code_samples = 0
132+
total = 0
133+
for class_, class_summary in summary.items():
134+
apis_with_code_samples = len(class_summary[PRESENT])
135+
total_with_code_samples += apis_with_code_samples
136+
137+
apis_total = len(class_summary[PRESENT]) + len(class_summary[NOT_PRESENT])
138+
total += apis_total
139+
140+
coverage = 100 * apis_with_code_samples / apis_total
141+
print(f"{class_}: {coverage:.1f}% ({apis_with_code_samples}/{apis_total})")
142+
if args.details:
143+
print(f"===> APIs WITH code samples: {class_summary[PRESENT]}")
144+
print(f"===> APIs WITHOUT code samples: {class_summary[NOT_PRESENT]}")
145+
146+
coverage = 100 * total_with_code_samples / total
147+
print(f"Total: {coverage:.1f}% ({total_with_code_samples}/{total})")

0 commit comments

Comments
 (0)