|
| 1 | +# Copyright 2023 Google LLC |
| 2 | +# |
| 3 | +# Licensed under the Apache License, Version 2.0 (the "License"); |
| 4 | +# you may not use this file except in compliance with the License. |
| 5 | +# You may obtain a copy of the License at |
| 6 | +# |
| 7 | +# http://www.apache.org/licenses/LICENSE-2.0 |
| 8 | +# |
| 9 | +# Unless required by applicable law or agreed to in writing, software |
| 10 | +# distributed under the License is distributed on an "AS IS" BASIS, |
| 11 | +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 12 | +# See the License for the specific language governing permissions and |
| 13 | +# limitations under the License. |
| 14 | + |
| 15 | +import argparse |
| 16 | +import importlib |
| 17 | +import inspect |
| 18 | +import sys |
| 19 | +from typing import Dict, List |
| 20 | + |
| 21 | +import bigframes |
| 22 | +import bigframes.pandas as bpd |
| 23 | + |
| 24 | +PRESENT = "present" |
| 25 | +NOT_PRESENT = "not_present" |
| 26 | + |
| 27 | +CLASSES = [ |
| 28 | + bpd.DataFrame, |
| 29 | + bpd.Series, |
| 30 | + bpd.Index, |
| 31 | + bigframes.session.Session, |
| 32 | + bigframes.operations.strings.StringMethods, |
| 33 | + bigframes.operations.datetimes.DatetimeMethods, |
| 34 | + bigframes.operations.structs.StructAccessor, |
| 35 | +] |
| 36 | + |
| 37 | +ML_MODULE_NAMES = [ |
| 38 | + "cluster", |
| 39 | + "compose", |
| 40 | + "decomposition", |
| 41 | + "ensemble", |
| 42 | + "linear_model", |
| 43 | + "metrics", |
| 44 | + "model_selection", |
| 45 | + "pipeline", |
| 46 | + "preprocessing", |
| 47 | + "llm", |
| 48 | + "forecasting", |
| 49 | + "imported", |
| 50 | + "remote", |
| 51 | +] |
| 52 | + |
| 53 | +for module_name in ML_MODULE_NAMES: |
| 54 | + module = importlib.import_module(f"bigframes.ml.{module_name}") |
| 55 | + classes_ = [ |
| 56 | + class_ for _, class_ in inspect.getmembers(module, predicate=inspect.isclass) |
| 57 | + ] |
| 58 | + CLASSES.extend(classes_) |
| 59 | + |
| 60 | + |
| 61 | +def get_code_samples_summary() -> Dict[str, Dict[str, List[str]]]: |
| 62 | + """Get Summary of the code samples coverage in BigFrames APIs. |
| 63 | +
|
| 64 | + Returns: |
| 65 | + Summary: A dictionary of the format |
| 66 | + { |
| 67 | + class_1: { |
| 68 | + "present": [method1, method2, ...], |
| 69 | + "not_present": [method3, method4, ...] |
| 70 | + }, |
| 71 | + class_2: { |
| 72 | + ... |
| 73 | + } |
| 74 | + } |
| 75 | + """ |
| 76 | + summary: Dict[str, Dict[str, List[str]]] = dict() |
| 77 | + |
| 78 | + for class_ in CLASSES: |
| 79 | + class_key = f"{class_.__module__}.{class_.__name__}" |
| 80 | + summary[class_key] = {PRESENT: [], NOT_PRESENT: []} |
| 81 | + |
| 82 | + members = inspect.getmembers(class_) |
| 83 | + |
| 84 | + for name, obj in members: |
| 85 | + # ignore private methods |
| 86 | + if name.startswith("_") and not name.startswith("__"): |
| 87 | + continue |
| 88 | + |
| 89 | + def predicate(impl): |
| 90 | + return ( |
| 91 | + # This includes class methods like `from_dict`, `from_records` |
| 92 | + inspect.ismethod(impl) |
| 93 | + # This includes instance methods like `dropna`, join` |
| 94 | + or inspect.isfunction(impl) |
| 95 | + # This includes properties like `shape`, `values` but not |
| 96 | + # generic properties like `__weakref__` |
| 97 | + or (inspect.isdatadescriptor(impl) and not name.startswith("__")) |
| 98 | + ) |
| 99 | + |
| 100 | + if not predicate(obj): |
| 101 | + continue |
| 102 | + |
| 103 | + # At this point we have a property or a public method |
| 104 | + impl = getattr(class_, name) |
| 105 | + |
| 106 | + docstr = inspect.getdoc(impl) |
| 107 | + code_samples_present = docstr and "**Examples:**" in docstr |
| 108 | + key = PRESENT if code_samples_present else NOT_PRESENT |
| 109 | + summary[class_key][key].append(name) |
| 110 | + |
| 111 | + return summary |
| 112 | + |
| 113 | + |
| 114 | +if __name__ == "__main__": |
| 115 | + parser = argparse.ArgumentParser( |
| 116 | + description="Get a summary of code samples coverage in BigFrames APIs." |
| 117 | + ) |
| 118 | + parser.add_argument( |
| 119 | + "-d", |
| 120 | + "--details", |
| 121 | + type=bool, |
| 122 | + action=argparse.BooleanOptionalAction, |
| 123 | + default=False, |
| 124 | + help="Whether to print APIs with and without code samples.", |
| 125 | + ) |
| 126 | + |
| 127 | + args = parser.parse_args(sys.argv[1:]) |
| 128 | + |
| 129 | + summary = get_code_samples_summary() |
| 130 | + |
| 131 | + total_with_code_samples = 0 |
| 132 | + total = 0 |
| 133 | + for class_, class_summary in summary.items(): |
| 134 | + apis_with_code_samples = len(class_summary[PRESENT]) |
| 135 | + total_with_code_samples += apis_with_code_samples |
| 136 | + |
| 137 | + apis_total = len(class_summary[PRESENT]) + len(class_summary[NOT_PRESENT]) |
| 138 | + total += apis_total |
| 139 | + |
| 140 | + coverage = 100 * apis_with_code_samples / apis_total |
| 141 | + print(f"{class_}: {coverage:.1f}% ({apis_with_code_samples}/{apis_total})") |
| 142 | + if args.details: |
| 143 | + print(f"===> APIs WITH code samples: {class_summary[PRESENT]}") |
| 144 | + print(f"===> APIs WITHOUT code samples: {class_summary[NOT_PRESENT]}") |
| 145 | + |
| 146 | + coverage = 100 * total_with_code_samples / total |
| 147 | + print(f"Total: {coverage:.1f}% ({total_with_code_samples}/{total})") |
0 commit comments