From a7a282ae3c67a2548e5a13d982ab8daa667a193d Mon Sep 17 00:00:00 2001 From: Trong Nhan Mai Date: Wed, 9 Jul 2025 22:08:14 +1000 Subject: [PATCH 01/27] feat: add macaron database extractor module Signed-off-by: Trong Nhan Mai --- src/macaron/build_spec_generator/__init__.py | 2 + .../macaron_db_extractor.py | 723 ++++++++++++++++++ src/macaron/errors.py | 16 + tests/build_spec_generator/__init__.py | 2 + .../test_macaron_db_extractor.py | 232 ++++++ 5 files changed, 975 insertions(+) create mode 100644 src/macaron/build_spec_generator/__init__.py create mode 100644 src/macaron/build_spec_generator/macaron_db_extractor.py create mode 100644 tests/build_spec_generator/__init__.py create mode 100644 tests/build_spec_generator/test_macaron_db_extractor.py diff --git a/src/macaron/build_spec_generator/__init__.py b/src/macaron/build_spec_generator/__init__.py new file mode 100644 index 000000000..8e17a3508 --- /dev/null +++ b/src/macaron/build_spec_generator/__init__.py @@ -0,0 +1,2 @@ +# Copyright (c) 2025 - 2025, Oracle and/or its affiliates. All rights reserved. +# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. diff --git a/src/macaron/build_spec_generator/macaron_db_extractor.py b/src/macaron/build_spec_generator/macaron_db_extractor.py new file mode 100644 index 000000000..05a485b7d --- /dev/null +++ b/src/macaron/build_spec_generator/macaron_db_extractor.py @@ -0,0 +1,723 @@ +# Copyright (c) 2025 - 2025, Oracle and/or its affiliates. All rights reserved. +# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. + +"""This module contains the logic to extract build relation information for a PURL from the Macaron database.""" + +import json +import logging +from collections.abc import Sequence +from dataclasses import dataclass +from typing import TypeVar + +from packageurl import PackageURL +from sqlalchemy import Select, and_, select +from sqlalchemy.dialects import sqlite +from sqlalchemy.exc import MultipleResultsFound, SQLAlchemyError +from sqlalchemy.orm import Session, aliased + +from macaron.database.table_definitions import Analysis, CheckFacts, Component, MappedCheckResult, Repository +from macaron.errors import QueryMacaronDatabaseError +from macaron.slsa_analyzer.checks.build_as_code_check import BuildAsCodeFacts +from macaron.slsa_analyzer.checks.build_script_check import BuildScriptFacts +from macaron.slsa_analyzer.checks.build_service_check import BuildServiceFacts +from macaron.slsa_analyzer.checks.build_tool_check import BuildToolFacts + +logger: logging.Logger = logging.getLogger(__name__) + + +@dataclass +class GenericBuildCommandInfo: + """Contains the build command information extracted from build related check facts.""" + + command: list[str] + language: str + language_versions: list[str] + build_tool_name: str + + +T = TypeVar("T") + + +def lookup_multiple( + select_statement: Select[tuple[T]], + session: Session, +) -> Sequence[T]: + """Perform an SELECT statement and return all scalar results. + + Parameters + ---------- + select_statement : Select[tuple[T]] + The SQLAlchemy SELECT statement to execute. + session : Session + The SQLAlchemy session to the database we are querying from. + + Returns + ------- + Sequence[T] + The result of executing the SELECT statement as scalar values. + + Raises + ------ + QueryMacaronDatabaseError + If the SELECT statement isn't executed successfully. + For example, if the schema of the target database doesn't match the statement. + """ + try: + sql_results = session.execute(select_statement) + except SQLAlchemyError as generic_exec_error: + raise QueryMacaronDatabaseError( + f"Critical: unexpected error when execute query {compile_sqlite_select_statement(select_statement)}." + ) from generic_exec_error + + return sql_results.scalars().all() + + +def lookup_one_or_none( + select_statement: Select[tuple[T]], + session: Session, +) -> T | None: + """Perform an SELECT statement and return at most one scalar result. + + Parameters + ---------- + select_statement : Select[tuple[T]] + The SQLAlchemy SELECT statement to execute + session : Session + The SQLAlchemy session to the database we are querying from. + + Returns + ------- + T | None + The result of executing the SELECT statement as one scalar value or None + if there isn't any available. + + Raises + ------ + QueryMacaronDatabaseError + If the SELECT statement isn't executed successfully. + For example, if the schema of the target database doesn't match the statement. + Of if there are more than one result obtained from the SELECT statement. + """ + compiled_select_statement = compile_sqlite_select_statement(select_statement) + try: + query_scalar_results = session.execute(select_statement).scalars() + except SQLAlchemyError as generic_exec_error: + raise QueryMacaronDatabaseError( + f"Critical: unexpected error when execute query {compiled_select_statement}." + ) from generic_exec_error + + try: + result = query_scalar_results.one_or_none() + except MultipleResultsFound as error: + raise QueryMacaronDatabaseError( + f"Expect at most one result, found multiple results for query {compiled_select_statement}." + ) from error + + return result + + +def compile_sqlite_select_statement(select_statment: Select) -> str: + """Return the equivalent SQLite SELECT statement from an SQLAlchemy SELECT statement. + + This function also introduces additional cosmetic details so that it can be easily + read from the log. + + Parameters + ---------- + select_statement : Select + The SQLAlchemy Select statement. + + Returns + ------- + str + The equivalent SQLite SELECT statement as a string. + """ + compiled_sqlite = select_statment.compile( + dialect=sqlite.dialect(), # type: ignore + compile_kwargs={"literal_binds": True}, + ) + return f"\n----- Begin SQLite query \n{str(compiled_sqlite)}\n----- End SQLite query\n" + + +def get_sql_stmt_latest_component_for_purl(purl: PackageURL) -> Select[tuple[Component]]: + """Return an SQLAlchemy SELECT statement to query the latest Component. + + Parameters + ---------- + purl : PackageURL + The PackageURL object to find the Component instance. + + Returns + ------- + Select[tuple[Component]] + The SQLAlchemy SELECT statement to query the latest analyzed Component instance + corresponding to the PackageURL. + """ + return ( + select( + Component, + ) + .select_from(Component) + .join( + Analysis, + onclause=Component.id == Analysis.id, + ) + .where(Component.purl == purl.to_string()) + .order_by( + Analysis.analysis_time.desc(), + Analysis.id.desc(), + ) + ) + + +def get_sql_stmt_build_tools(component_id: int) -> Select[tuple[BuildToolFacts]]: + """Return an SQLAlchemy SELECT statement to query the BuildToolFacts for a given PackageURL. + + Parameters + ---------- + purl_string : str + The PackageURL string to find the BuildToolFacts. + + Returns + ------- + Select[tuple[BuildAsCodeFacts]] + The SQLAlchemy SELECT statement. + """ + # Because BuildToolFacts inherit from CheckFacts, SQLAlchemy had to perform implicit alias + # when performing a join between them. This pattern is not recommended, hence a warning is raised + # https://docs.sqlalchemy.org/en/20/errors.html#an-alias-is-being-generated-automatically-due-to-overlapping-tables. + # To resolve this, we need to create an SQLAlchemy alias and use it in the SELECT statement. + build_tool_facts_alias = aliased(BuildToolFacts, flat=True) + + return ( + select(build_tool_facts_alias) + .select_from(Component) + .join( + MappedCheckResult, + onclause=Component.id == MappedCheckResult.component_id, + ) + .join( + CheckFacts, + onclause=MappedCheckResult.id == CheckFacts.check_result_id, + ) + .join( + build_tool_facts_alias, + onclause=CheckFacts.id == build_tool_facts_alias.id, + ) + .where(Component.id == component_id) + .order_by( + build_tool_facts_alias.confidence.desc(), + build_tool_facts_alias.id.asc(), + ) + ) + + +def get_sql_stmt_build_as_code_check(component_id: int) -> Select[tuple[BuildAsCodeFacts]]: + """Return an SQLAlchemy SELECT statement to query the BuildAsCodeFacts for a given PackageURL. + + Parameters + ---------- + purl_string : str + The PackageURL string to find the BuildToolFacts. + + Returns + ------- + Select[tuple[BuildAsCodeFacts]] + The SQLAlchemy SELECT statement. + """ + # Because BuildAsCodeFacts inherit from CheckFacts, SQLAlchemy had to perform implicit alias + # when performing a join between them. This pattern is not recommended, hence a warning is raised + # https://docs.sqlalchemy.org/en/20/errors.html#an-alias-is-being-generated-automatically-due-to-overlapping-tables. + # To resolve this, we need to create an SQLAlchemy alias and use it in the SELECT statement. + build_as_code_facts_alias = aliased(BuildAsCodeFacts, flat=True) + + return ( + select(build_as_code_facts_alias) + .select_from(Component) + .join( + MappedCheckResult, + onclause=MappedCheckResult.id == Component.id, + ) + .join( + CheckFacts, + onclause=MappedCheckResult.id == CheckFacts.id, + ) + .join( + build_as_code_facts_alias, + onclause=CheckFacts.id == build_as_code_facts_alias.id, + ) + .where( + and_( + Component.id == component_id, + build_as_code_facts_alias.deploy_command.is_not(None), + ) + ) + .order_by( + build_as_code_facts_alias.confidence.desc(), + build_as_code_facts_alias.id.asc(), + ) + ) + + +def get_sql_stmt_build_service_check(component_id: int) -> Select[tuple[BuildServiceFacts]]: + """Return an SQLAlchemy SELECT statement to query the BuildServiceFacts for a given PackageURL. + + Parameters + ---------- + purl_string : str + The PackageURL string to find the BuildServiceFacts. + + Returns + ------- + Select[tuple[BuildServiceFacts]] + The SQLAlchemy SELECT statement. + """ + # Because BuildServiceFacts inherit from CheckFacts, SQLAlchemy had to perform implicit alias + # when performing a join between them. This pattern is not recommended, hence a warning is raised + # https://docs.sqlalchemy.org/en/20/errors.html#an-alias-is-being-generated-automatically-due-to-overlapping-tables. + # To resolve this, we need to create an SQLAlchemy alias and use it in the SELECT statement. + build_service_facts_alias = aliased(BuildServiceFacts, flat=True) + + return ( + select(build_service_facts_alias) + .select_from(Component) + .join( + MappedCheckResult, + onclause=MappedCheckResult.component_id == Component.id, + ) + .join( + CheckFacts, + onclause=MappedCheckResult.id == CheckFacts.id, + ) + .join( + build_service_facts_alias, + onclause=CheckFacts.id == build_service_facts_alias.id, + ) + .where( + and_( + Component.id == component_id, + build_service_facts_alias.build_command.is_not(None), + ) + ) + .order_by( + build_service_facts_alias.confidence.desc(), + build_service_facts_alias.id.asc(), + ) + ) + + +def get_sql_stmt_build_script_check(component_id: int) -> Select[tuple[BuildScriptFacts]]: + """Return an SQLAlchemy SELECT statement to query the BuildScriptFacts for a given PackageURL. + + Parameters + ---------- + purl_string : str + The PackageURL string to find the BuildScriptFacts. + + Returns + ------- + Select[tuple[BuildScriptFacts]] + The SQLAlchemy SELECT statement. + """ + # Because BuildScriptFacts inherit from CheckFacts, SQLAlchemy had to perform implicit alias + # when performing a join between them. This pattern is not recommended, hence a warning is raised + # https://docs.sqlalchemy.org/en/20/errors.html#an-alias-is-being-generated-automatically-due-to-overlapping-tables. + # To resolve this, we need to create an SQLAlchemy alias and use it in the SELECT statement. + build_script_facts_alias = aliased(BuildScriptFacts, flat=True) + + return ( + select(build_script_facts_alias) + .select_from(Component) + .join( + MappedCheckResult, + onclause=Component.id == MappedCheckResult.component_id, + ) + .join( + CheckFacts, + onclause=MappedCheckResult.id == CheckFacts.id, + ) + .join( + build_script_facts_alias, + onclause=CheckFacts.id == build_script_facts_alias.id, + ) + .where( + and_( + Component.id == component_id, + build_script_facts_alias.build_tool_command.is_not(None), + ) + ) + .order_by( + build_script_facts_alias.confidence.desc(), + build_script_facts_alias.id.asc(), + ) + ) + + +def get_sql_stmt_repository(component_id: int) -> Select[tuple[Repository]]: + """Return an SQLAlchemy SELECT statement to query the Repository for a given PackageURL. + + Parameters + ---------- + purl_string : str + The PackageURL string to find the Repository. + + Returns + ------- + Select[tuple[Repository]] + The SQLAlchemy SELECT statement. + """ + return ( + select(Repository) + .select_from(Component) + .join( + Repository, + onclause=Component.id == Repository.component_id, + ) + .where(Component.id == component_id) + ) + + +def lookup_latest_component_id(purl: PackageURL, session: Session) -> int | None: + """Return the component id of the latest analysis that matches a given PackageURL string. + + Parameters + ---------- + purl : PackageURL + The PackageURL object to look for the latest component id. + session : Session + The SQLAlcemy Session that connects to the Macaron database. + + Returns + ------- + int | None + The latest component id or None if there isn't one available in the database. + + Raises + ------ + QueryMacaronDatabaseError + If there is an unexpected error when executing the SQLAlchemy query. + """ + latest_component_id_stmt = get_sql_stmt_latest_component_for_purl(purl) + logger.debug("Latest Analysis and Component query \n %s", compile_sqlite_select_statement(latest_component_id_stmt)) + + try: + component_results = session.execute(latest_component_id_stmt) + except SQLAlchemyError as generic_exec_error: + raise QueryMacaronDatabaseError( + f"Critical: unexpected error when execute query {compile_sqlite_select_statement(latest_component_id_stmt)}." + ) from generic_exec_error + + latest_component = component_results.scalars().first() + if not latest_component: + return None + + return latest_component.id + + +def lookup_build_tools_check(component_id: int, session: Session) -> Sequence[BuildToolFacts]: + """Return the sequence of BuildToolFacts instances for given PackageURL string. + + Parameters + ---------- + purl_string : str + The PackageURL string to look for the BuildToolFacts. + session : Session + The SQLAlcemy Session that connects to the Macaron database. + + Returns + ------- + Sequence[BuildToolFacts] + The sequence of BuildToolFacts instances obtained from querying the database. + + Raises + ------ + QueryMacaronDatabaseError + If there is an unexpected error when executing the SQLAlchemy query. + """ + build_tools_statement = get_sql_stmt_build_tools(component_id) + logger.debug( + "Build Tools Check Facts for component %d \n %s", + component_id, + compile_sqlite_select_statement(build_tools_statement), + ) + + build_tool_facts = lookup_multiple( + select_statement=build_tools_statement, + session=session, + ) + + return build_tool_facts + + +def lookup_build_as_code_check(component_id: int, session: Session) -> Sequence[BuildAsCodeFacts]: + """Return the sequence of BuildAsCodeFacts instances for given PackageURL string. + + Parameters + ---------- + purl_string : str + The PackageURL string to look for the BuildAsCodeFacts. + session : Session + The SQLAlcemy Session that connects to the Macaron database. + + Returns + ------- + Sequence[BuildAsCodeFacts] + The sequence of BuildAsCodeFacts instances obtained from querying the database. + + Raises + ------ + QueryMacaronDatabaseError + If there is an unexpected error when executing the SQLAlchemy query. + """ + build_as_code_select_statement = get_sql_stmt_build_as_code_check(component_id) + logger.debug( + "Build As Code Check Fact for component %d \n %s", + component_id, + compile_sqlite_select_statement(build_as_code_select_statement), + ) + + build_as_code_check_facts = lookup_multiple( + select_statement=build_as_code_select_statement, + session=session, + ) + + return build_as_code_check_facts + + +def lookup_build_service_check(component_id: int, session: Session) -> Sequence[BuildServiceFacts]: + """Return the sequence of BuildServiceFacts instances for given PackageURL string. + + Parameters + ---------- + purl_string : str + The PackageURL string to look for the BuildServiceFacts. + session : Session + The SQLAlcemy Session that connects to the Macaron database. + + Returns + ------- + Sequence[BuildServiceFacts] + The sequence of BuildServiceFacts instances obtained from querying the database. + + Raises + ------ + QueryMacaronDatabaseError + If there is an unexpected error when executing the SQLAlchemy query. + """ + build_service_select_statement = get_sql_stmt_build_service_check(component_id) + logger.debug( + "Build Service Check Fact for component %d \n %s", + component_id, + compile_sqlite_select_statement(build_service_select_statement), + ) + + build_service_check_facts = lookup_multiple( + select_statement=build_service_select_statement, + session=session, + ) + + return build_service_check_facts + + +def lookup_build_script_check(component_id: int, session: Session) -> Sequence[BuildScriptFacts]: + """Return the sequence of BuildScriptFacts instances for given PackageURL string. + + Parameters + ---------- + purl_string : str + The PackageURL string to look for the BuildScriptFacts. + session : Session + The SQLAlcemy Session that connects to the Macaron database. + + Returns + ------- + Sequence[BuildScriptFacts] + The sequence of BuildScriptFacts instances obtained from querying the database. + + Raises + ------ + QueryMacaronDatabaseError + If there is an unexpected error when executing the SQLAlchemy query. + """ + build_script_select_statment = get_sql_stmt_build_script_check(component_id) + logger.debug( + "Build Script Check Fact for component %d \n %s", + component_id, + compile_sqlite_select_statement(build_script_select_statment), + ) + + build_script_check_facts = lookup_multiple( + select_statement=build_script_select_statment, + session=session, + ) + + return build_script_check_facts + + +def extract_generic_build_command_info( + check_facts: Sequence[BuildAsCodeFacts] | Sequence[BuildServiceFacts] | Sequence[BuildScriptFacts], +) -> list[GenericBuildCommandInfo]: + """Return the list of GenericBuildCommandInfo instances from a list of Build related Check Facts. + + The following information are captured for each Check Facts + + - ``command``: the build command, but this information is located in different attribute depending on the + type of Build Check Fact (e.g. in `BuildAsCodeFacts` it is stored in `deploy_command`). It's stored + in the database as a serialized JSON object so we need to use json.loads to turn it into a list of strings. + + - ``language`` and ``build_tool_name`` are attributes of all Build Check Fact instances + + - ``language_versions`` is an attribute of all Build Check Fact instances. It's stored + in the database as a serialized JSON object so we need to use json.loads to turn it into a list of strings. + + Parameters + ---------- + check_facts : Sequence[BuildAsCodeFacts] | Sequence[BuildServiceFacts] | Sequence[BuildScriptFacts] + The sequence of check facts obtained from the database. + + Returns + ------- + list[GenericBuildCommandInfo] + The list of GenericBuildCommandInfo instances that store build command information + representing by the Build Check Facts. + + Raises + ------ + json.decoder.JSONDecodeError + If we failed to decode the JSON-serialized values stored in the Build*Facts instances. + """ + result = [] + for fact in check_facts: + match fact: + case BuildAsCodeFacts(): + result.append( + GenericBuildCommandInfo( + command=json.loads(fact.deploy_command), + language=fact.language, + language_versions=json.loads(fact.language_versions) if fact.language_versions else [], + build_tool_name=fact.build_tool_name, + ) + ) + case BuildServiceFacts(): + result.append( + GenericBuildCommandInfo( + command=json.loads(fact.build_command), + language=fact.language, + language_versions=json.loads(fact.language_versions) if fact.language_versions else [], + build_tool_name=fact.build_tool_name, + ) + ) + case BuildScriptFacts(): + result.append( + GenericBuildCommandInfo( + command=json.loads(fact.build_tool_command), + language=fact.language, + language_versions=json.loads(fact.language_versions) if fact.language_versions else [], + build_tool_name=fact.build_tool_name, + ) + ) + + return result + + +def lookup_any_build_command(component_id: int, session: Session) -> list[GenericBuildCommandInfo]: + """Return a list of ``GenericBuildCommandInfo`` instances from looking up any available build command. + + We will look for available build command from build-related check facts. + + Parameters + ---------- + component_id: int + The component id to lookup the build command. + session: Session + The SQLAlchemy session to the database for the lookup. + + Returns + ------- + list[GenericBuildCommandInfo] + This list will be empty if there is no available build command for this component. + + Raises + ------ + QueryMacaronDatabaseError + If there is an unexpected error when executing the SQLAlchemy query for looking up the build commands. + Raised by "lookup_*_check" functions + """ + build_as_code_check_facts = lookup_build_as_code_check( + component_id=component_id, + session=session, + ) + if build_as_code_check_facts: + try: + return extract_generic_build_command_info(build_as_code_check_facts) + except json.decoder.JSONDecodeError as error: + logger.debug( + "Failed to extract generic build command info for build as code check facts for component id %s. " + + "Error %s. Continue", + component_id, + error, + ) + + build_service_check_facts = lookup_build_service_check( + component_id=component_id, + session=session, + ) + if build_service_check_facts: + try: + return extract_generic_build_command_info(build_service_check_facts) + except json.decoder.JSONDecodeError as error: + logger.debug( + "Failed to extract generic build command info for build servoce check facts for component id %s. " + + "Error %s. Continue", + component_id, + error, + ) + + build_script_check_facts = lookup_build_script_check( + component_id=component_id, + session=session, + ) + try: + return extract_generic_build_command_info(build_script_check_facts) + except json.decoder.JSONDecodeError as error: + logger.debug( + "Failed to extract generic build command info for build as code check facts for component id %s. " + + "Error %s. Continue", + component_id, + error, + ) + return [] + + +def lookup_repository(component_id: int, session: Session) -> Repository | None: + """Return the Repository instance for given PackageURL string. + + Parameters + ---------- + component_id : int + The component id to look for the Repository. + session : Session + The SQLAlcemy Session that connects to the Macaron database. + + Returns + ------- + Repository + The Repository instances obtained from querying the database. + + Raises + ------ + QueryMacaronDatabaseError + If the query result from the database contains more than one Repository instance, + or there is an unexpected error when executing the SQLAlchemy query. + """ + repository_select_statement = get_sql_stmt_repository(component_id) + logger.debug( + "Repository for component %d \n %s.", component_id, compile_sqlite_select_statement(repository_select_statement) + ) + + repository_result = lookup_one_or_none( + select_statement=repository_select_statement, + session=session, + ) + + return repository_result diff --git a/src/macaron/errors.py b/src/macaron/errors.py index d5983a0bc..91ce63990 100644 --- a/src/macaron/errors.py +++ b/src/macaron/errors.py @@ -113,3 +113,19 @@ class LocalArtifactFinderError(MacaronError): class SourceCodeError(MacaronError): """Error for operations on package source code.""" + + +class CommandLineParseError(Exception): + """Raised if an error is encountered while parsing a CLI Command.""" + + +class PatchBuildCommandError(Exception): + """Raised if an error is encountered while patching a Maven CLI Command.""" + + +class QueryMacaronDatabaseError(Exception): + """Happens when there is an unexpected error while querying the database using SQLAlchemy.""" + + +class GenerateBuildSpecError(Exception): + """Happens when there is an unexpected error while generate the build spec file.""" diff --git a/tests/build_spec_generator/__init__.py b/tests/build_spec_generator/__init__.py new file mode 100644 index 000000000..8e17a3508 --- /dev/null +++ b/tests/build_spec_generator/__init__.py @@ -0,0 +1,2 @@ +# Copyright (c) 2025 - 2025, Oracle and/or its affiliates. All rights reserved. +# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. diff --git a/tests/build_spec_generator/test_macaron_db_extractor.py b/tests/build_spec_generator/test_macaron_db_extractor.py new file mode 100644 index 000000000..1c7f3c4bd --- /dev/null +++ b/tests/build_spec_generator/test_macaron_db_extractor.py @@ -0,0 +1,232 @@ +# Copyright (c) 2025 - 2025, Oracle and/or its affiliates. All rights reserved. +# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. + +"""This module contains tests for the macaron_db_extractor module.""" + +from collections.abc import Generator +from datetime import datetime, timezone +from typing import Any + +import pytest +from packageurl import PackageURL +from sqlalchemy import create_engine +from sqlalchemy.orm import Session, sessionmaker + +from macaron import __version__ +from macaron.build_spec_generator.macaron_db_extractor import ( + QueryMacaronDatabaseError, + Repository, + lookup_any_build_command, + lookup_build_tools_check, + lookup_latest_component_id, + lookup_repository, +) +from macaron.database.table_definitions import Analysis, CommitFinderInfo, Component, ORMBase, RepoFinderMetadata +from macaron.repo_finder.repo_finder import RepoFinderInfo + +# pylint: disable=redefined-outer-name + + +@pytest.fixture() +def macaron_db_session() -> Generator[Session, Any, None]: + """Return a session to a memory stored SQLite database with Macaron's database schema. + + The database is empty. This fixture's scope is function to prevent polluting between tests. + It also handles closing the session after the test function finishes. + """ + engine = create_engine("sqlite:///:memory:") + ORMBase.metadata.create_all(engine) + + session_maker = sessionmaker(engine) + session = session_maker() + + yield session + + session.close() + + +@pytest.fixture() +def invalid_db_session() -> Generator[Session, Any, None]: + """Return a session to a memory stored SQLite database. + + This databaes doesn't have Macaron database schema, hence it considered invalid. + """ + engine = create_engine("sqlite:///:memory:") + + session_maker = sessionmaker(engine) + session = session_maker() + + yield session + + session.close() + + +@pytest.mark.parametrize( + ("input_data", "query_purl_string", "expect_result"), + [ + pytest.param( + [], + "pkg:maven/oracle/macaron@0.16.0", + None, + id="The database is empty.", + ), + pytest.param( + [ + ( + datetime(year=2025, month=5, day=6, hour=10, minute=30, second=30, tzinfo=timezone.utc), + "pkg:maven/boo/foo@0.2.0", + ), + ( + datetime(year=2025, month=5, day=6, hour=10, minute=30, second=30, tzinfo=timezone.utc), + "pkg:maven/boo/boohoo@1.0", + ), + ], + "pkg:maven/oracle/macaron@0.16.0", + None, + id="The database is not empty, but no component matches the query PackageURL string.", + ), + pytest.param( + [ + ( + datetime(year=2025, month=5, day=6, hour=10, minute=30, second=30, tzinfo=timezone.utc), + "pkg:maven/oracle/macaron@0.16.0", + ), + ( + datetime(year=2025, month=5, day=6, hour=10, minute=30, second=30, tzinfo=timezone.utc), + "pkg:maven/boo/foo@0.1.0", + ), + ( + datetime(year=2025, month=5, day=6, hour=10, minute=30, second=30, tzinfo=timezone.utc), + "pkg:maven/oracle/macaron@0.16.0", + ), + ], + "pkg:maven/oracle/macaron@0.16.0", + 3, + id="When two analyses of the same PURL has the same timestamp, the component id of the latest analysis is returned.", + ), + ], +) +def test_lookup_latest_component_id( + macaron_db_session: Session, + input_data: list[tuple[datetime, str]], + query_purl_string: str, + expect_result: int | None, +) -> None: + """Test the lookup_latest_component_id function.""" + for utc_timestamp, purl_string in input_data: + analysis = Analysis( + analysis_time=utc_timestamp, + macaron_version=__version__, + ) + + repo_finder_metadata = RepoFinderMetadata( + repo_finder_outcome=RepoFinderInfo.NOT_USED, + commit_finder_outcome=CommitFinderInfo.NOT_USED, + found_url="", + found_commit="", + ) + + _ = Component( + purl=purl_string, + analysis=analysis, + repository=None, + repo_finder_metadata=repo_finder_metadata, + ) + + macaron_db_session.add(analysis) + + macaron_db_session.commit() + assert lookup_latest_component_id(PackageURL.from_string(query_purl_string), macaron_db_session) == expect_result + + +def test_lookup_repository_empty_db(macaron_db_session: Session) -> None: + """Test the lookup_repository function.""" + assert not lookup_repository(1, macaron_db_session) + + +def test_lookup_repository(macaron_db_session: Session) -> None: + """Test the lookup_repository function.""" + analysis = Analysis( + analysis_time=datetime(year=2025, month=5, day=6, hour=10, minute=30, second=30, tzinfo=timezone.utc), + macaron_version=__version__, + ) + + repository = Repository( + full_name="oracle/macaron", + complete_name="github.com/oracle/macaron", + remote_path="https://github.com/oracle/macaron", + branch_name="main", + commit_sha="d2b95262091d6572cc12dcda57d89f9cd44ac88b", + commit_date="2023-02-10T15:11:14+08:00", + fs_path="/boo/foo/macaron", + files=["boo.txt", "foo.xml"], + ) + + repo_finder_metadata_1 = RepoFinderMetadata( + repo_finder_outcome=RepoFinderInfo.NOT_USED, + commit_finder_outcome=CommitFinderInfo.NOT_USED, + found_url="", + found_commit="", + ) + + repo_finder_metadata_2 = RepoFinderMetadata( + repo_finder_outcome=RepoFinderInfo.NOT_USED, + commit_finder_outcome=CommitFinderInfo.NOT_USED, + found_url="", + found_commit="", + ) + + component_without_repo = Component( + purl="pkg:maven/boo/foo@0.1.0", + analysis=analysis, + repository=None, + repo_finder_metadata=repo_finder_metadata_1, + ) + + component_with_repo = Component( + purl="pkg:maven/oracle/macaron@0.16.0", + analysis=analysis, + repository=repository, + repo_finder_metadata=repo_finder_metadata_2, + ) + + macaron_db_session.add(analysis) + macaron_db_session.commit() + + assert not lookup_repository(component_without_repo.id, macaron_db_session) + lookup_repo = lookup_repository(component_with_repo.id, macaron_db_session) + assert lookup_repo + assert lookup_repo.remote_path == "https://github.com/oracle/macaron" + assert lookup_repo.commit_sha == "d2b95262091d6572cc12dcda57d89f9cd44ac88b" + + +def test_lookup_any_build_command_empty_db(macaron_db_session: Session) -> None: + """Test the lookup_any_build_command function with an empty database.""" + assert not lookup_any_build_command(component_id=1, session=macaron_db_session) + + +def test_invalid_input_databse(invalid_db_session: Session) -> None: + """Test handling invalid input database.""" + with pytest.raises(QueryMacaronDatabaseError): + lookup_any_build_command( + component_id=1, + session=invalid_db_session, + ) + + with pytest.raises(QueryMacaronDatabaseError): + lookup_build_tools_check( + component_id=1, + session=invalid_db_session, + ) + + with pytest.raises(QueryMacaronDatabaseError): + lookup_repository( + component_id=1, + session=invalid_db_session, + ) + + with pytest.raises(QueryMacaronDatabaseError): + lookup_latest_component_id( + purl=PackageURL.from_string("pkg:maven/oracle/macaron@0.16.0"), + session=invalid_db_session, + ) From ed57cc4287be43e3cf3bb8cb91be8ecffcd377b8 Mon Sep 17 00:00:00 2001 From: Trong Nhan Mai Date: Wed, 9 Jul 2025 22:12:35 +1000 Subject: [PATCH 02/27] feat: add maven and gradle cli parsers Signed-off-by: Trong Nhan Mai --- .../cli_command_parser/__init__.py | 165 +++++ .../cli_command_parser/gradle_cli_command.py | 388 ++++++++++ .../cli_command_parser/gradle_cli_parser.py | 701 ++++++++++++++++++ .../cli_command_parser/maven_cli_command.py | 324 ++++++++ .../cli_command_parser/maven_cli_parser.py | 594 +++++++++++++++ .../unparsed_cli_command.py | 20 + .../test_base_cli_options.py | 129 ++++ .../test_gradle_cli_command.py | 156 ++++ .../test_gradle_cli_parser.py | 165 +++++ .../test_maven_cli_command.py | 142 ++++ .../test_maven_cli_parser.py | 197 +++++ tests/conftest.py | 14 + 12 files changed, 2995 insertions(+) create mode 100644 src/macaron/build_spec_generator/cli_command_parser/__init__.py create mode 100644 src/macaron/build_spec_generator/cli_command_parser/gradle_cli_command.py create mode 100644 src/macaron/build_spec_generator/cli_command_parser/gradle_cli_parser.py create mode 100644 src/macaron/build_spec_generator/cli_command_parser/maven_cli_command.py create mode 100644 src/macaron/build_spec_generator/cli_command_parser/maven_cli_parser.py create mode 100644 src/macaron/build_spec_generator/cli_command_parser/unparsed_cli_command.py create mode 100644 tests/build_spec_generator/cli_command_parser/test_base_cli_options.py create mode 100644 tests/build_spec_generator/cli_command_parser/test_gradle_cli_command.py create mode 100644 tests/build_spec_generator/cli_command_parser/test_gradle_cli_parser.py create mode 100644 tests/build_spec_generator/cli_command_parser/test_maven_cli_command.py create mode 100644 tests/build_spec_generator/cli_command_parser/test_maven_cli_parser.py diff --git a/src/macaron/build_spec_generator/cli_command_parser/__init__.py b/src/macaron/build_spec_generator/cli_command_parser/__init__.py new file mode 100644 index 000000000..8801ea55a --- /dev/null +++ b/src/macaron/build_spec_generator/cli_command_parser/__init__.py @@ -0,0 +1,165 @@ +# Copyright (c) 2025 - 2025, Oracle and/or its affiliates. All rights reserved. +# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. + +"""This module contain the base classes cli command parsers related.""" + +import argparse +from abc import abstractmethod +from collections.abc import Mapping +from dataclasses import dataclass +from enum import Enum +from typing import Any, Generic, Protocol, TypeGuard, TypeVar + + +def is_list_of_strs(value: Any) -> TypeGuard[list[str]]: + """Type guard for a list of strings.""" + return isinstance(value, list) and all(isinstance(ele, str) for ele in value) + + +def is_dict_of_str_to_str_or_none(value: Any) -> TypeGuard[dict[str, str | None]]: + """Type guard for a dictionary with keys are string and values are strings or None.""" + if not isinstance(value, dict): + return False + + for key, val in value.items(): + if not isinstance(key, str): + return False + + if not (val is None or isinstance(val, str)): + return False + + return True + + +def patch_mapping( + original: Mapping[str, str], + patch: Mapping[str, str | None], +) -> dict[str, str]: + """Patch a mapping. + + A key with value in patch set to None will be removed from the original. + + Parameters + ---------- + original: Mapping[str, str] + The original mapping. + patch: Mapping[str, str | None] + The patch. + + Returns + ------- + dict[str, str]: + The new dictionary after applying the patch. + """ + patch_result = dict(original) + + for name, value in patch.items(): + if value is None: + patch_result.pop(name, None) + else: + patch_result[name] = value + + return patch_result + + +P = TypeVar("P") + + +@dataclass +class OptionDef(Generic[P]): + """This class represent a definition of a CLI option for argparse.ArgumentParser. + + This class also contains the information for validating a patch value. + The generic type T is the patch expected type (if it's not None). + """ + + # e.g. `--long-option-name` + # We always require the long name as we use it as the unique identifier in the parser. + long_name: str + + @abstractmethod + def is_valid_patch_option(self, patch: Any) -> TypeGuard[P]: + """Return True if the provide patch value is compatible with the internal type of this option.""" + raise NotImplementedError() + + @abstractmethod + def add_itself_to_arg_parser(self, arg_parse: argparse.ArgumentParser) -> None: + """Add a new argument to argparser.ArgumentParser representing this option.""" + raise NotImplementedError() + + @abstractmethod + def get_patch_type_str(self) -> str: + """Return the expected type for the patch value as string.""" + raise NotImplementedError() + + +class PatchCommandBuildTool(str, Enum): + """Build tool supported for CLICommand patching.""" + + MAVEN = "maven" + GRADLE = "gradle" + + +class CLIOptions(Protocol): + """Interface of the options part of a CLICommand.""" + + def to_option_cmds(self) -> list[str]: + """Return the options as a list of strings.""" + + +class CLICommand(Protocol): + """Interface of a CLI Command.""" + + def to_cmds(self) -> list[str]: + """Return the CLI Command as a list of strings.""" + + +T = TypeVar("T", bound="CLICommand") +Y_contra = TypeVar("Y_contra", contravariant=True) + + +class CLICommandParser(Protocol[T, Y_contra]): + """Interface of a CLI Command Parser.""" + + @property + def build_tool(self) -> PatchCommandBuildTool: + """Return the ``BuildTool`` enum corresponding to this CLICommand.""" + + def parse(self, cmd_list: list[str]) -> CLICommand: + """Parse the CLI Command. + + Parameters + ---------- + cmd_list: list[str] + The CLI Command as list of strings. + + Returns + ------- + CLICommand + The CLICommand instance. + + Raises + ------ + CommandLineParseError + If an error happens when parsing the CLI Command. + """ + + def is_build_tool(self, executable_path: str) -> bool: + """Return True if ``executable_path`` ends the accepted executable for this build tool. + + Parameters + ---------- + executable_path: str + The executable component of a CLI command. + + Returns + ------- + bool + """ + + def apply_patch( + self, + cli_command: T, + options_patch: Mapping[str, Y_contra | None], + ) -> T: + """Return the a new CLICommand object with its option patched, while persisting the executable path.""" diff --git a/src/macaron/build_spec_generator/cli_command_parser/gradle_cli_command.py b/src/macaron/build_spec_generator/cli_command_parser/gradle_cli_command.py new file mode 100644 index 000000000..48d0000fc --- /dev/null +++ b/src/macaron/build_spec_generator/cli_command_parser/gradle_cli_command.py @@ -0,0 +1,388 @@ +# Copyright (c) 2025 - 2025, Oracle and/or its affiliates. All rights reserved. +# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. + +"""This module contains the classes that represent components of a Gradle CLI Command.""" + +import argparse +from dataclasses import dataclass + + +@dataclass +class GradleCLIOptions: + """The class that stores the values of options parsed from a Gradle CLI Command.""" + + # Optional flags with a different attribute name. + continue_: bool | None + help_: bool | None + + # Optional flags. + no_rebuild: bool | None + debug: bool | None + export_keys: bool | None + foreground: bool | None + info: bool | None + offline: bool | None + profile: bool | None + quiet: bool | None + refresh_dependencies: bool | None + refresh_keys: bool | None + rerun_tasks: bool | None + full_stacktrace: bool | None + stacktrace: bool | None + status: bool | None + stop: bool | None + continuous: bool | None + version: bool | None + warn: bool | None + write_locks: bool | None + build_cache: bool | None + configuration_cache: bool | None + configure_on_demand: bool | None + daemon: bool | None + parallel: bool | None + scan: bool | None + watch_fs: bool | None + + # Single value options. + build_file: str | None + settings_file: str | None + configuration_cache_problems: str | None + gradle_user_home: str | None + init_script: str | None + include_build: str | None + write_verification_metadata: str | None + max_workers: str | None + project_dir: str | None + priority: str | None + project_cache_dir: str | None + update_locks: str | None + warning_mode: str | None + + # Appended list option. + exclude_task: list[str] | None + + # Property definition options. + system_prop: dict[str, str] | None + project_prop: dict[str, str] | None + + # Gradle tasks. + tasks: list[str] | None + + @classmethod + def from_parsed_arg( + cls, + parsed_arg: argparse.Namespace, + ) -> "GradleCLIOptions": + """Initialize the instance from an argparse.Namespace object. + + Parameters + ---------- + parsed_arg : argparse.Namespace + The argparse.Namespace object obtained from parsing the CLI Command. + + Returns + ------- + GradleCLIOptions + The intialized GradleCLIOptions object instance. + """ + return cls( + help_=parsed_arg.help_, + no_rebuild=parsed_arg.no_rebuild, + continue_=parsed_arg.continue_, + debug=parsed_arg.debug, + export_keys=parsed_arg.export_keys, + foreground=parsed_arg.foreground, + info=parsed_arg.info, + offline=parsed_arg.offline, + profile=parsed_arg.profile, + quiet=parsed_arg.quiet, + refresh_dependencies=parsed_arg.refresh_dependencies, + refresh_keys=parsed_arg.refresh_keys, + rerun_tasks=parsed_arg.rerun_tasks, + full_stacktrace=parsed_arg.full_stacktrace, + stacktrace=parsed_arg.stacktrace, + status=parsed_arg.status, + stop=parsed_arg.stop, + continuous=parsed_arg.continuous, + version=parsed_arg.version, + warn=parsed_arg.warn, + write_locks=parsed_arg.write_locks, + build_cache=parsed_arg.build_cache, + configuration_cache=parsed_arg.configuration_cache, + configure_on_demand=parsed_arg.configure_on_demand, + daemon=parsed_arg.daemon, + parallel=parsed_arg.parallel, + scan=parsed_arg.scan, + watch_fs=parsed_arg.watch_fs, + build_file=parsed_arg.build_file, + settings_file=parsed_arg.settings_file, + configuration_cache_problems=parsed_arg.configuration_cache_problems, + gradle_user_home=parsed_arg.gradle_user_home, + init_script=parsed_arg.init_script, + include_build=parsed_arg.include_build, + write_verification_metadata=parsed_arg.write_verification_metadata, + max_workers=parsed_arg.max_workers, + project_dir=parsed_arg.project_dir, + priority=parsed_arg.priority, + project_cache_dir=parsed_arg.project_cache_dir, + update_locks=parsed_arg.update_locks, + warning_mode=parsed_arg.warning_mode, + exclude_task=parsed_arg.exclude_task, + system_prop=GradleCLIOptions.parse_properties(parsed_arg.system_prop) if parsed_arg.system_prop else None, + project_prop=( + GradleCLIOptions.parse_properties(parsed_arg.project_prop) if parsed_arg.project_prop else None + ), + tasks=parsed_arg.tasks, + ) + + @staticmethod + def parse_properties(props: list[str]) -> dict[str, str]: + """Return a dictionary that maps between a property and its value. + + Each property definition value in `props` can have either of these format: + - `property=value` (e.g. `property=value` from `-Dproperty=value`): this will + be parsed into a dictionary mapping of `"property": "value"`. + Both the key and value of this mapping is of type string. + - `property` (e.g. `property` from `-Dproperty`): this will be parsed into a + dictionary mapping of `"property": `. + + Parameters + ---------- + props: list[str] + The list of properties definition provided in the cli command. + This is the list parsed by argparse. + + Returns + ------- + dict[str, str]: + The properties dictionary. + + Examples + -------- + >>> GradleCLIOptions.parse_properties(["boo=true", "foo=1", "bar"]) + {'boo': 'true', 'foo': '1', 'bar': ''} + """ + system_props = {} + for ele in props: + prop_name, _, prop_val = ele.partition("=") + + if not prop_val: + system_props[prop_name] = "" + else: + system_props[prop_name] = prop_val + + return system_props + + def to_option_cmds(self) -> list[str]: + """Return the options as a list of strings.""" + result = self.to_cmd_no_tasks() + if self.tasks: + for task in self.tasks: + result.append(task) + + return result + + def to_cmd_no_tasks(self) -> list[str]: + """Return the options only as a list of string. + + Only enabled options are returned. + + Returns + ------- + list[str] + The enabled options. + """ + result = [] + + if self.help_: + result.append("-h") + + if self.no_rebuild: + result.append("-a") + + if self.continue_: + result.append("--continue") + + if self.debug: + result.append("-d") + + if self.export_keys: + result.append("--export-keys") + + if self.foreground: + result.append("--foreground") + + if self.info: + result.append("-i") + + if self.offline: + result.append("--offline") + + if self.profile: + result.append("--profile") + + if self.quiet: + result.append("-q") + + if self.refresh_dependencies: + result.append("--refresh-dependencies") + + if self.refresh_keys: + result.append("--refresh-keys") + + if self.rerun_tasks: + result.append("--rerun-tasks") + + if self.full_stacktrace: + result.append("-S") + + if self.stacktrace: + result.append("-s") + + if self.status: + result.append("--status") + + if self.stop: + result.append("--stop") + + if self.continuous: + result.append("-t") + + if self.version: + result.append("-v") + + if self.warn: + result.append("-w") + + if self.write_locks: + result.append("--write-locks") + + if self.build_cache is not None: + if self.build_cache is True: + result.append("--build-cache") + else: + result.append("--no-build-cache") + + if self.configuration_cache is not None: + if self.configuration_cache is True: + result.append("--configuration-cache") + else: + result.append("--no-configuration-cache") + + if self.configure_on_demand is not None: + if self.configure_on_demand is True: + result.append("--configure-on-demand") + else: + result.append("--no-configure-on-demand") + + if self.daemon is not None: + if self.daemon is True: + result.append("--daemon") + else: + result.append("--no-daemon") + + if self.parallel is not None: + if self.parallel is True: + result.append("--parallel") + else: + result.append("--no-parallel") + + if self.scan is not None: + if self.scan is True: + result.append("--scan") + else: + result.append("--no-scan") + + if self.watch_fs is not None: + if self.watch_fs is True: + result.append("--watch-fs") + else: + result.append("--no-watch-fs") + + if self.build_file: + result.append("-b") + result.append(self.build_file) + + if self.settings_file: + result.append("-c") + result.append(self.settings_file) + + if self.configuration_cache_problems: + result.append("--configuration-cache-problems") + result.append(self.configuration_cache_problems) + + if self.gradle_user_home: + result.append("-g") + result.append(self.gradle_user_home) + + if self.init_script: + result.append("-I") + result.append(self.init_script) + + if self.include_build: + result.append("--include-build") + result.append(self.include_build) + + if self.write_verification_metadata: + result.append("-M") + result.append(self.write_verification_metadata) + + if self.max_workers: + result.append("--max-workers") + result.append(self.max_workers) + + if self.project_dir: + result.append("-p") + result.append(self.project_dir) + + if self.priority: + result.append("--priority") + result.append(self.priority) + + if self.project_cache_dir: + result.append("--project-cache-dir") + result.append(self.project_cache_dir) + + if self.update_locks: + result.append("--update-locks") + result.append(self.update_locks) + + if self.warning_mode: + result.append("--warning-mode") + result.append(self.warning_mode) + + if self.exclude_task: + for task in self.exclude_task: + result.append("-x") + result.append(task) + + if self.system_prop: + for key, value in self.system_prop.items(): + if value: + result.append(f"-D{key}={value}") + else: + result.append(f"-D{key}") + + if self.project_prop: + for key, value in self.project_prop.items(): + if value: + result.append(f"-P{key}={value}") + else: + result.append(f"-P{key}") + + return result + + +@dataclass +class GradleCLICommand: + """The class that stores the values of a Gradle CLI Command.""" + + executable: str + options: GradleCLIOptions + + def to_cmds(self) -> list[str]: + """Return the CLI Command as a list of strings.""" + result = [] + result.append(self.executable) + result.extend(self.options.to_option_cmds()) + return result diff --git a/src/macaron/build_spec_generator/cli_command_parser/gradle_cli_parser.py b/src/macaron/build_spec_generator/cli_command_parser/gradle_cli_parser.py new file mode 100644 index 000000000..c66c6c4e5 --- /dev/null +++ b/src/macaron/build_spec_generator/cli_command_parser/gradle_cli_parser.py @@ -0,0 +1,701 @@ +# Copyright (c) 2025 - 2025, Oracle and/or its affiliates. All rights reserved. +# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. + +"""This module contains the Gradle CLI Command parser.""" + +import argparse +import logging +import os +from collections.abc import Mapping +from copy import deepcopy +from dataclasses import dataclass, field +from typing import Any, TypeGuard + +from macaron.build_spec_generator.cli_command_parser import ( + OptionDef, + PatchCommandBuildTool, + is_dict_of_str_to_str_or_none, + is_list_of_strs, + patch_mapping, +) +from macaron.build_spec_generator.cli_command_parser.gradle_cli_command import GradleCLICommand, GradleCLIOptions +from macaron.errors import CommandLineParseError, PatchBuildCommandError + +logger: logging.Logger = logging.getLogger(__name__) + + +GradleOptionPatchValueType = str | list[str] | bool | dict[str, str | None] + + +@dataclass +class GradleOptionalFlag(OptionDef[bool]): + """This option represents an optional flag in Gradle CLI command. + + For example: + - Has one short name -d/--debug + - Has no short name --continue + - Has multiple short names -?/-h/--help + + This option can have multiple values, and it's not required. + """ + + short_names: list[str] | None + + # Right now this is used for --continue and --help where the default attribute name for it + # in the returned argparse.Namespace is "continue" which conflicts with a Python keyword and + # "help" which conflicts with the built-in function help(). + dest: str | None = field(default=None) + + def is_valid_patch_option(self, patch: Any) -> TypeGuard[bool]: + """Return True if the provide patch value is compatible with the internal type of this option.""" + return isinstance(patch, bool) + + def add_itself_to_arg_parser(self, arg_parse: argparse.ArgumentParser) -> None: + """Add a new argument to argparser.ArgumentParser representing this option.""" + kwargs: dict[str, Any] = {} + + kwargs["action"] = "store_true" + if self.dest: + kwargs["dest"] = self.dest + + if self.short_names: + arg_parse.add_argument( + *(self.short_names + [self.long_name]), + **kwargs, + ) + else: + arg_parse.add_argument( + self.long_name, + **kwargs, + ) + + def get_patch_type_str(self) -> str: + """Return the expected type for the patch value as string.""" + return "bool" + + +@dataclass +class GradleOptionalNegateableFlag(OptionDef[bool]): + """This option represents an optional negateable flag in Gradle CLI command. + + For example: --build-cache/--no-build-cache + """ + + def is_valid_patch_option(self, patch: Any) -> TypeGuard[bool]: + """Return True if the provide patch value is compatible with the internal type of this option.""" + return isinstance(patch, bool) + + @staticmethod + def get_negated_long_name(long_name: str) -> str: + """Return the negated version of a long option name.""" + return f"--no-{long_name.removeprefix('--')}" + + def add_itself_to_arg_parser(self, arg_parse: argparse.ArgumentParser) -> None: + """Add a new argument to argparser.ArgumentParser representing this option.""" + # We allow providing both the normal and negated form. + negated_long_name = self.get_negated_long_name(self.long_name) + dest = self.long_name.removeprefix("--").replace("-", "_") + + # We set the default to None so that we don't print out these options + # if they are not provided in the original build command in to_cmd_tasks(). + arg_parse.add_argument( + self.long_name, + action="store_true", + default=None, + dest=dest, + ) + + arg_parse.add_argument( + negated_long_name, + action="store_false", + default=None, + dest=dest, + ) + + def get_patch_type_str(self) -> str: + """Return the expected type for the patch value as string.""" + return "bool" + + +@dataclass +class GradleSingleValue(OptionDef[str]): + """This option represents an option that takes a value in Grale CLI command.""" + + short_name: str | None + + def is_valid_patch_option(self, patch: Any) -> TypeGuard[str]: + """Return True if the provide patch value is compatible with the internal type of this option.""" + return isinstance(patch, str) + + def add_itself_to_arg_parser(self, arg_parse: argparse.ArgumentParser) -> None: + """Add a new argument to argparser.ArgumentParser representing this option.""" + if self.short_name: + arg_parse.add_argument( + *(self.short_name, self.long_name), + ) + else: + arg_parse.add_argument( + self.long_name, + ) + + def get_patch_type_str(self) -> str: + """Return the expected type for the patch value as string.""" + return "str" + + +@dataclass +class GradlePropeties(OptionDef[dict[str, str | None]]): + """This option represents an option used to define properties values of a Gradle CLI command. + + This option can be defined multiple times and the values are appended into a list of string in argparse. + However, it's stored internally as a dictionary mapping between the system property name to its value. + + In Gradle there are 2 options of this type: + - -D/--system-prop + - -P/--project-prop + """ + + short_name: str + + def is_valid_patch_option(self, patch: Any) -> TypeGuard[dict[str, str | None]]: + """Return True if the provide patch value is compatible with the internal type of this option.""" + return is_dict_of_str_to_str_or_none(patch) + + def add_itself_to_arg_parser(self, arg_parse: argparse.ArgumentParser) -> None: + """Add a new argument to argparser.ArgumentParser representing this option.""" + arg_parse.add_argument( + *(self.short_name, self.long_name), + action="append", + ) + + def get_patch_type_str(self) -> str: + """Return the expected type for the patch value as string.""" + return "dict[str, str | None]" + + +@dataclass +class GradleTask(OptionDef[list[str]]): + """This option represents the positional task option in Maven CLI command. + + argparse.Namespace stores this as a list of string. This is stored internally as a list of string. + """ + + def is_valid_patch_option(self, patch: Any) -> TypeGuard[list[str]]: + """Return True if the provide patch value is compatible with the internal type of this option.""" + return is_list_of_strs(patch) + + def add_itself_to_arg_parser(self, arg_parse: argparse.ArgumentParser) -> None: + """Add a new argument to argparser.ArgumentParser representing this option.""" + # Doesn't require to allow cases like "gradle --help". + arg_parse.add_argument( + self.long_name, + nargs="*", + ) + + def get_patch_type_str(self) -> str: + """Return the expected type for the patch value as string.""" + return "list[str]" + + +@dataclass +class GradleAppendedList(OptionDef[list[str]]): + """This option represents an option that can be specify multiple times and they all appended to a list. + + For example, one can exclude multiple tasks with + gradle --exclude-task taskA --exclude-task taskB + """ + + short_name: str + + def is_valid_patch_option(self, patch: Any) -> TypeGuard[list[str]]: + """Return True if the provide patch value is compatible with the internal type of this option.""" + return is_list_of_strs(patch) + + def add_itself_to_arg_parser(self, arg_parse: argparse.ArgumentParser) -> None: + """Add a new argument to argparser.ArgumentParser representing this option.""" + arg_parse.add_argument( + *(self.short_name, self.long_name), + action="append", + ) + + def get_patch_type_str(self) -> str: + """Return the expected type for the patch value as string.""" + return "list[str]" + + +# TODO: some value option only allows you to provide certain values +# For example: --console allows "plain", "auto", "rich" or "verbose". +# They are right now not enforced. We need to think whether we want to enforce them. +GRADLE_OPTION_DEF: list[OptionDef] = [ + GradleOptionalFlag( + short_names=["-?", "-h"], + long_name="--help", + dest="help_", + ), + GradleOptionalFlag( + short_names=["-a"], + long_name="--no-rebuild", + ), + GradleOptionalFlag( + short_names=None, + long_name="--continue", + dest="continue_", + ), + GradleOptionalFlag( + short_names=["-d"], + long_name="--debug", + ), + GradleOptionalFlag( + short_names=None, + long_name="--export-keys", + ), + GradleOptionalFlag( + short_names=None, + long_name="--foreground", + ), + GradleOptionalFlag( + short_names=["-i"], + long_name="--info", + ), + GradleOptionalFlag( + short_names=None, + long_name="--offline", + ), + GradleOptionalFlag( + short_names=None, + long_name="--profile", + ), + GradleOptionalFlag( + short_names=["-q"], + long_name="--quiet", + ), + GradleOptionalFlag( + short_names=None, + long_name="--refresh-dependencies", + ), + GradleOptionalFlag( + short_names=None, + long_name="--refresh-keys", + ), + GradleOptionalFlag( + short_names=None, + long_name="--rerun-tasks", + ), + GradleOptionalFlag( + short_names=["-S"], + long_name="--full-stacktrace", + ), + GradleOptionalFlag( + short_names=["-s"], + long_name="--stacktrace", + ), + GradleOptionalFlag( + short_names=None, + long_name="--status", + ), + GradleOptionalFlag( + short_names=None, + long_name="--stop", + ), + GradleOptionalFlag( + short_names=["-t"], + long_name="--continuous", + ), + GradleOptionalFlag( + short_names=["-v"], + long_name="--version", + ), + GradleOptionalFlag( + short_names=["-w"], + long_name="--warn", + ), + GradleOptionalFlag( + short_names=None, + long_name="--write-locks", + ), + GradleOptionalNegateableFlag( + long_name="--build-cache", + ), + GradleOptionalNegateableFlag( + long_name="--configuration-cache", + ), + GradleOptionalNegateableFlag( + long_name="--configure-on-demand", + ), + GradleOptionalNegateableFlag( + long_name="--daemon", + ), + GradleOptionalNegateableFlag( + long_name="--parallel", + ), + GradleOptionalNegateableFlag( + long_name="--scan", + ), + GradleOptionalNegateableFlag( + long_name="--watch-fs", + ), + # This has been validated by setting up a minimal gradle project. Gradle version 8.14.2 + # gradle init --type java-library + # And use default values for any prompted configuration. + # Then append this block of code into src/build.gradle + # + # task boo { + # doLast { + # println "Running task: boo" + # } + # } + # task foo { + # doLast { + # println "Running task: foo" + # } + # } + # task bar { + # doLast { + # println "Running task: bar" + # } + # } + # task everything(dependsOn: ['boo', 'foo']) { + # doLast { + # println "Running task: everything" + # } + # } + # And then run ./gradlew everything -x boo -x foo + # > Task :lib:bar + # Running task: gamma + # > Task :lib:everything + # Running task: everything + GradleAppendedList( + short_name="-x", + long_name="--exclude-task", + ), + # TODO: determine which of these options can be provided multiple times + GradleSingleValue( + short_name="-b", + long_name="--build-file", + ), + GradleSingleValue( + short_name="-c", + long_name="--settings-file", + ), + GradleSingleValue( + short_name=None, + long_name="--configuration-cache-problems", + ), + GradleSingleValue( + short_name=None, + long_name="--console", + ), + GradleSingleValue( + short_name="-F", + long_name="--dependency-verification", + ), + GradleSingleValue( + short_name="-g", + long_name="--gradle-user-home", + ), + GradleSingleValue( + short_name="-I", + long_name="--init-script", + ), + GradleSingleValue( + short_name=None, + long_name="--include-build", + ), + GradleSingleValue( + short_name="-M", + long_name="--write-verification-metadata", + ), + GradleSingleValue( + short_name=None, + long_name="--max-workers", + ), + GradleSingleValue( + short_name="-p", + long_name="--project-dir", + ), + GradleSingleValue( + short_name=None, + long_name="--priority", + ), + GradleSingleValue( + short_name=None, + long_name="--project-cache-dir", + ), + GradleSingleValue( + short_name=None, + long_name="--update-locks", + ), + GradleSingleValue( + short_name=None, + long_name="--warning-mode", + ), + GradlePropeties( + short_name="-D", + long_name="--system-prop", + ), + GradlePropeties( + short_name="-P", + long_name="--project-prop", + ), + GradleTask( + long_name="tasks", + ), +] + + +class GradleCLICommandParser: + """A Gradle CLI Command Parser.""" + + ACCEPTABLE_EXECUTABLE = ["gradle", "gradlew"] + + def __init__(self) -> None: + """Initialize the instance.""" + self.arg_parser = argparse.ArgumentParser( + description="Parse Gradle CLI command", + prog="mvn", + add_help=False, + # https://docs.python.org/3/library/argparse.html#exit-on-error + # Best effort of parsing the build command. Therefore, we don't want to exit on error. + exit_on_error=False, + ) + + # A mapping between the long name to its option definition. + self.option_defs: dict[str, OptionDef] = {} + + for opt_def in GRADLE_OPTION_DEF: + opt_def.add_itself_to_arg_parser(self.arg_parser) + + self.option_defs[opt_def.long_name] = opt_def + + self.build_tool = PatchCommandBuildTool.GRADLE + + def is_build_tool(self, executable_path: str) -> bool: + """Return True if ``executable_path`` ends the accepted executable for this build tool. + + Parameters + ---------- + executable_path: str + The executable component of a CLI command. + + Returns + ------- + bool + """ + return os.path.basename(executable_path) in GradleCLICommandParser.ACCEPTABLE_EXECUTABLE + + def validate_patch(self, patch: Mapping[str, GradleOptionPatchValueType | None]) -> bool: + """Return True if the patch conforms to the expected format.""" + for patch_name, patch_value in patch.items(): + opt_def = self.option_defs.get(patch_name) + if not opt_def: + logger.error("Cannot find any option that matches %s", patch_name) + return False + + if patch_value is None: + continue + + if not opt_def.is_valid_patch_option(patch_value): + logger.error( + "The patch value %s of %s is not in the correct type. Expect %s.", + patch_value, + patch_name, + opt_def.get_patch_type_str(), + ) + return False + + return True + + def parse(self, cmd_list: list[str]) -> GradleCLICommand: + """Parse the Gradle CLI Command. + + Parameters + ---------- + cmd_list: list[str] + The Gradle CLI Command as list of strings. + + Returns + ------- + GradleCLICommand + The GradleCLICommand instance. + + Raises + ------ + CommandLineParseError + If an error happens when parsing the Gradle CLI Command. + """ + if not cmd_list: + raise CommandLineParseError("The provided cmd list is empty.") + + exe_path = cmd_list[0] + options = cmd_list[1:] + + if os.path.basename(exe_path) not in GradleCLICommandParser.ACCEPTABLE_EXECUTABLE: + raise CommandLineParseError(f"{exe_path} is not an acceptable Gradle executable path.") + + # TODO: because our parser is not completed for all cases, should we be more relaxed and use + # parse_unknown_options? + try: + parsed_opts = self.arg_parser.parse_args(options) + except argparse.ArgumentError as error: + raise CommandLineParseError(f"Failed to parse {' '.join(options)}.") from error + # Even though we have set `exit_on_error`, argparse still exists unexpectedly in some + # cases. This has been confirmed to be a bug in the argparse library implementation. + # https://github.com/python/cpython/issues/121018. + # This is fixed in Python3.12, but not Python3.11 + except SystemExit as sys_exit_err: + raise CommandLineParseError( + f"Failed to parse the Gradle CLI Options {' '.join(options)}." + ) from sys_exit_err + + gradle_cli_options = GradleCLIOptions.from_parsed_arg(parsed_opts) + + return GradleCLICommand( + executable=exe_path, + options=gradle_cli_options, + ) + + def _patch_properties_mapping( + self, + original_props: dict[str, str], + option_long_name: str, + patch_value: GradleOptionPatchValueType, + ) -> dict[str, str]: + prop_opt_def = self.option_defs.get(option_long_name) + if not prop_opt_def or not isinstance(prop_opt_def, GradlePropeties): + raise PatchBuildCommandError(f"{option_long_name} from the patch is not a property type option.") + + if not prop_opt_def.is_valid_patch_option(patch_value): + raise PatchBuildCommandError( + f"Incorrect runtime type for patch option {option_long_name}, value: {patch_value}." + ) + + return patch_mapping( + original=original_props, + patch=patch_value, + ) + + def apply_patch( + self, + cli_command: GradleCLICommand, + options_patch: Mapping[str, GradleOptionPatchValueType | None], + ) -> GradleCLICommand: + """Patch the options of a Gradle CLI command, while persisting the executable path. + + `options_patch` is a mapping with: + + - **Key**: the long name of an Gradle CLI option as string. For example: ``--continue``, ``--build-cache``. + For patching tasks, use the key ``tasks``. + + - **Value**: The value to patch for an option referred to by the key. The type of this value + depends on the type of option you want to patch. Please see the details below. + + The types of patch values: + + - For optional flag (e.g ``-d/--debug``) that doesn't take in a value, it is boolean. True if you want to + set it, and False if you want to unset it. + + - For ``-D/--system-prop`` and ``-P/--project-prop`` ONLY, it is a a mapping between the property name + and its value. A value of type None can be provided to "unset" the property. + + - For ``-x/--exclude-task`` option, a list of string is required. + + - For options that have a negated form (e.g. ``--build-cache/--no-build-cache``), the key must be the normal + long name (``--build-cache``) and the value is of type boolean. True if you want to set ``--build-cache`` + and False if you want to set ``--no-build-cache``. + + - For other option that expects a value (e.g `-c/--setting-file ``), a string is + expected. + + None can be provided to ANY type of option to forcefully remove it from the original build command. + + Parameters + ---------- + cli_command : GradleCLICommand + The original Gradle command, as a ``GradleCLICommand`` object from ``GradleCLICommandParser.parse(...)`` + patch_options : Mapping[str, GradleOptionPatchValueType | None] + The patch values. + + Returns + ------- + GradleCLICommand + The patched command as a new ``GradleCLICommand`` object. + + Raises + ------ + PatchBuildCommandError + If an error happens during the patching process. + """ + return GradleCLICommand( + executable=cli_command.executable, + options=self.apply_option_patch( + cli_command.options, + patch=options_patch, + ), + ) + + def apply_option_patch( + self, + gradle_cli_options: GradleCLIOptions, + patch: Mapping[str, GradleOptionPatchValueType | None], + ) -> GradleCLIOptions: + """Patch the Gradle CLI Options and return a new copy. + + Parameters + ---------- + gradle_cli_options: GradleCLIOptions + The Gradle CLI Options to patch. + patch: Mapping[str, GradleOptionPatchValueType | None] + A mapping between the name of the attribute in GradleCLIOptions and its patch value + + Returns + ------- + GradleCLIOptions + The new patched gradle cli options. + + Raises + ------ + PatchBuildCommandError + If an error happens during the patching process. + """ + if not self.validate_patch(patch): + raise PatchBuildCommandError("The patch is invalid.") + + # Copy the Maven CLI Options for patching + new_gradle_cli_options = deepcopy(gradle_cli_options) + + for option_long_name, patch_value in patch.items(): + if option_long_name == "--help": + attr_name = "help_" + elif option_long_name == "--continue": + attr_name = "continue_" + else: + # Get the attribute name of GradleCLIOption object. + # They all follow the same rule of removing the prefix -- + # from option long name and replace all "-" with "_" + attr_name = option_long_name.removeprefix("--").replace("-", "_") + + # Ensure that setting any option to None in the patch + # will remove it from the build command. + if patch_value is None: + setattr(new_gradle_cli_options, attr_name, patch_value) + continue + + if option_long_name == "--project-prop": + new_gradle_cli_options.project_prop = self._patch_properties_mapping( + original_props=new_gradle_cli_options.project_prop or {}, + option_long_name=option_long_name, + patch_value=patch_value, + ) + continue + + if option_long_name == "--system-prop": + new_gradle_cli_options.system_prop = self._patch_properties_mapping( + original_props=new_gradle_cli_options.system_prop or {}, + option_long_name=option_long_name, + patch_value=patch_value, + ) + continue + + setattr(new_gradle_cli_options, attr_name, patch_value) + + return new_gradle_cli_options diff --git a/src/macaron/build_spec_generator/cli_command_parser/maven_cli_command.py b/src/macaron/build_spec_generator/cli_command_parser/maven_cli_command.py new file mode 100644 index 000000000..7368e1f52 --- /dev/null +++ b/src/macaron/build_spec_generator/cli_command_parser/maven_cli_command.py @@ -0,0 +1,324 @@ +# Copyright (c) 2025 - 2025, Oracle and/or its affiliates. All rights reserved. +# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. + +"""This module contains the classes that represent components of a Maven CLI Command.""" + +import argparse +from dataclasses import dataclass + + +@dataclass +class MavenCLIOptions: + """The class that stores the values of options parsed from a Maven CLI Command.""" + + # Optional flag. + also_make: bool | None + also_make_dependents: bool | None + batch_mode: bool | None + strict_checksums: bool | None + lax_checksums: bool | None + errors: bool | None + fail_at_end: bool | None + fail_fast: bool | None + fail_never: bool | None + help_: bool | None + non_recursive: bool | None + no_snapshot_updates: bool | None + no_transfer_progress: bool | None + quiet: bool | None + version: bool | None + show_version: bool | None + debug: bool | None + offline: bool | None + update_snapshots: bool | None + + # Single Value Option. + builder: str | None + encrypt_master_password: str | None + encrypt_password: str | None + file: str | None + global_settings: str | None + global_toolchains: str | None + log_file: str | None + resume_from: str | None + settings: str | None + toolchains: str | None + threads: str | None + + # Comma-delim list option. + activate_profiles: list[str] | None + projects: list[str] | None + + # System properties definition. + define: dict[str, str] | None + + # Maven goals and plugin phases. + goals: list[str] | None + + @classmethod + def from_parsed_arg( + cls, + parsed_arg: argparse.Namespace, + ) -> "MavenCLIOptions": + """Initialize the instance from the the argparse.Namespace object. + + Parameters + ---------- + parsed_arg : argparse.Namespace + The argparse.Namespace object obtained from parsing the CLI Command. + + Returns + ------- + MavenCLIOptions + The MavenCLIOptions object. + """ + return cls( + also_make=parsed_arg.also_make, + also_make_dependents=parsed_arg.also_make_dependents, + batch_mode=parsed_arg.batch_mode, + builder=parsed_arg.builder, + strict_checksums=parsed_arg.strict_checksums, + lax_checksums=parsed_arg.lax_checksums, + define=MavenCLIOptions.parse_system_properties(parsed_arg.define) if parsed_arg.define else None, + errors=parsed_arg.errors, + encrypt_master_password=parsed_arg.encrypt_master_password, + encrypt_password=parsed_arg.encrypt_password, + file=parsed_arg.file, + fail_at_end=parsed_arg.fail_at_end, + fail_fast=parsed_arg.fail_fast, + fail_never=parsed_arg.fail_never, + global_settings=parsed_arg.global_settings, + global_toolchains=parsed_arg.global_toolchains, + help_=parsed_arg.help_, + log_file=parsed_arg.log_file, + non_recursive=parsed_arg.non_recursive, + no_snapshot_updates=parsed_arg.no_snapshot_updates, + no_transfer_progress=parsed_arg.no_transfer_progress, + offline=parsed_arg.offline, + activate_profiles=( + MavenCLIOptions.parse_comma_sep_list(parsed_arg.activate_profiles) + if parsed_arg.activate_profiles + else None + ), + projects=MavenCLIOptions.parse_comma_sep_list(parsed_arg.projects) if parsed_arg.projects else None, + quiet=parsed_arg.quiet, + resume_from=parsed_arg.resume_from, + settings=parsed_arg.settings, + toolchains=parsed_arg.toolchains, + threads=parsed_arg.threads, + update_snapshots=parsed_arg.update_snapshots, + version=parsed_arg.version, + show_version=parsed_arg.show_version, + debug=parsed_arg.debug, + goals=parsed_arg.goals, + ) + + @staticmethod + def parse_system_properties(props: list[str]) -> dict[str, str]: + """Return a dictionary that maps between a system propertie and its value. + + Each property definition value in `props` can have either of these format: + - `property=value` (e.g. `-Dproperty=value`): this will be parsed into a + dictionary mapping of `"property": "value"`. Both the key and value + of this mapping is of type string. + - `property` (e.g. `-Dproperty`): this will be parsed into a dictionary mapping of `"property": "true"`. + + Parameters + ---------- + props: list[str] + The list of values provided to -D/--define in the cli command. + This is the list parsed by argparse. + + Returns + ------- + dict[str, str]: + The system properties dictionary. + + Examples + -------- + >>> MavenCLIOptions.parse_system_properties(["boo=true", "foo=1", "bar"]) + {'boo': 'true', 'foo': '1', 'bar': 'true'} + """ + system_props = {} + for ele in props: + prop_name, _, prop_val = ele.partition("=") + # Allow the subsequent definition override the previous one. + # This follows the way Maven is resolving system property. + # For example: + # mvn help:evaluate -Da=foo -Da=bar -Dexpression=a -q -DforceStdout + # => result for `a` is bar + # If ele doesn't have "=", for example `-Dmaven.skip.test`, we store + # the value using the value "true" string. + # + # For example: + # Maven evaluates the system property maven.skip.test to be "true" in these two commands + # mvn clean package -Dmaven.skip.test=true + # mvn clean package -Dmaven.skip.test + # To check how Maven evaluate the expression, run these commands on any project that uses maven. + # mvn help:evaluate -Dmaven.skip.test -Dexpression=maven.skip.test -q -DforceStdout + # mvn help:evaluate -Dmaven.skip.test=true -Dexpression=maven.skip.test -q -DforceStdout + if not prop_val: + system_props[prop_name] = "true" + else: + system_props[prop_name] = prop_val + + return system_props + + @staticmethod + def parse_comma_sep_list(input_val: str) -> list[str]: + """Split a comma delimited string and return a list of string elements. + + Parameters + ---------- + input_val: str + The comma delimited string. + + Returns + ------- + list[str] + The list of string elements. + + Examples + -------- + >>> MavenCLIOptions.parse_comma_sep_list("examples,release") + ['examples', 'release'] + """ + return input_val.split(",") + + def to_option_cmds(self) -> list[str]: + """Return the options as a list of strings.""" + result = self.to_cmd_no_goals() + if self.goals: + for goal in self.goals: + result.append(goal) + + return result + + def to_cmd_no_goals(self) -> list[str]: + """Return the options only as a list of string. + + Only enabled options are returned. + + Returns + ------- + list[str] + The enabled options. + """ + result = [] + + if self.also_make: + result.append("-am") + + if self.also_make_dependents: + result.append("-amd") + + if self.batch_mode: + result.append("-B") + + if self.builder: + result.extend(f"-b {self.builder}".split()) + + if self.strict_checksums: + result.append("-C") + + if self.lax_checksums: + result.append("-c") + + if self.define: + for key, value in self.define.items(): + result.append(f"-D{key}={value}") + + if self.errors: + result.append("-e") + + if self.encrypt_master_password: + result.extend(f"-emp {self.encrypt_master_password}".split()) + + if self.encrypt_password: + result.extend(f"-ep {self.encrypt_password}".split()) + + if self.file: + result.extend(f"-f {self.file}".split()) + + if self.fail_at_end: + result.append("-fae") + + if self.fail_fast: + result.append("-ff") + + if self.fail_never: + result.append("-fn") + + if self.global_settings: + result.extend(f"-gs {self.global_settings}".split()) + + if self.global_toolchains: + result.extend(f"-gt {self.global_toolchains}".split()) + + if self.help_: + result.append("-h") + + if self.log_file: + result.extend(f"-l {self.log_file}".split()) + + if self.non_recursive: + result.append("-N") + + if self.no_snapshot_updates: + result.append("-U") + + if self.no_transfer_progress: + result.append("-ntp") + + if self.offline: + result.append("-o") + + if self.activate_profiles: + result.extend(f"-P {','.join(self.activate_profiles)}".split()) + + if self.projects: + result.extend(f"-pl {','.join(self.projects)}".split()) + + if self.quiet: + result.append("-q") + + if self.resume_from: + result.extend(f"-rf {self.resume_from}".split()) + + if self.settings: + result.extend(f"-s {self.settings}".split()) + + if self.toolchains: + result.extend(f"-t {self.toolchains}".split()) + + if self.threads: + result.extend(f"-T {self.threads}".split()) + + if self.update_snapshots: + result.append("-U") + + if self.version: + result.append("-v") + + if self.show_version: + result.append("-V") + + if self.debug: + result.append("-X") + + return result + + +@dataclass +class MavenCLICommand: + """The class that stores the values of a Maven CLI Command.""" + + executable: str + options: MavenCLIOptions + + def to_cmds(self) -> list[str]: + """Return the CLI Command as a list of strings.""" + result = [] + result.append(self.executable) + result.extend(self.options.to_option_cmds()) + return result diff --git a/src/macaron/build_spec_generator/cli_command_parser/maven_cli_parser.py b/src/macaron/build_spec_generator/cli_command_parser/maven_cli_parser.py new file mode 100644 index 000000000..454f84cb0 --- /dev/null +++ b/src/macaron/build_spec_generator/cli_command_parser/maven_cli_parser.py @@ -0,0 +1,594 @@ +# Copyright (c) 2025 - 2025, Oracle and/or its affiliates. All rights reserved. +# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. + +"""This module contains the Maven CLI Command parser.""" + +import argparse +import logging +import os +from collections.abc import Mapping +from copy import deepcopy +from dataclasses import dataclass, field +from typing import Any, TypeGuard + +from macaron.build_spec_generator.cli_command_parser import ( + OptionDef, + PatchCommandBuildTool, + is_dict_of_str_to_str_or_none, + is_list_of_strs, + patch_mapping, +) +from macaron.build_spec_generator.cli_command_parser.maven_cli_command import MavenCLICommand, MavenCLIOptions +from macaron.errors import CommandLineParseError, PatchBuildCommandError + +logger: logging.Logger = logging.getLogger(__name__) + + +MavenOptionPatchValueType = str | list[str] | bool | dict[str, str | None] + + +@dataclass +class MavenOptionalFlag(OptionDef[bool]): + """This option represents an optional flag in Maven CLI command. + + For example: --debug/-X + + A short form for the option is rquired. + """ + + short_name: str + + # Right now this is used for --help where the default attribute name for it + # in the returned argparse.Namespace is "--help" which conflicts with the built-in function help(). + dest: str | None = field(default=None) + + def is_valid_patch_option(self, patch: Any) -> TypeGuard[bool]: + """Return True if the provide patch value is compatible with the internal type of this option.""" + return isinstance(patch, bool) + + def add_itself_to_arg_parser(self, arg_parse: argparse.ArgumentParser) -> None: + """Add a new argument to argparser.ArgumentParser representing this option.""" + if self.dest: + arg_parse.add_argument( + *(self.short_name, self.long_name), + action="store_true", + dest=self.dest, + ) + else: + arg_parse.add_argument( + *(self.short_name, self.long_name), + action="store_true", + ) + + def get_patch_type_str(self) -> str: + """Return the expected type for the patch value as string.""" + return "bool" + + +@dataclass +class MavenSingleValue(OptionDef[str]): + """This option represents an option that takes a value in Maven CLI command. + + For example: "--settings ./path/to/pom.xml" + + A short form for the option is required. + """ + + short_name: str + + def is_valid_patch_option(self, patch: Any) -> TypeGuard[str]: + """Return True if the provide patch value is compatible with the internal type of this option.""" + return isinstance(patch, str) + + def add_itself_to_arg_parser(self, arg_parse: argparse.ArgumentParser) -> None: + """Add a new argument to argparser.ArgumentParser representing this option.""" + arg_parse.add_argument( + *(self.short_name, self.long_name), + ) + + def get_patch_type_str(self) -> str: + """Return the expected type for the patch value as string.""" + return "str" + + +@dataclass +class MavenCommaDelimList(OptionDef[list[str]]): + """This option represents an option that takes a comma delimited value in Maven CLI command. + + This option can be defined one time only and the value is stored as a string in argparse. + However, it's stored internally as list of strings obtained by spliting its original value in argparse + using comma as the delimiter. + + For example: "-P profile1,profile2,profile3" + will be store as ["profile1", "profile2", "profile3"] + + A short form for the option is required. + """ + + short_name: str + + def is_valid_patch_option(self, patch: Any) -> TypeGuard[list[str]]: + """Return True if the provide patch value is compatible with the internal type of this option.""" + return is_list_of_strs(patch) + + def add_itself_to_arg_parser(self, arg_parse: argparse.ArgumentParser) -> None: + """Add a new argument to argparser.ArgumentParser representing this option.""" + arg_parse.add_argument( + *(self.short_name, self.long_name), + ) + + def get_patch_type_str(self) -> str: + """Return the expected type for the patch value as string.""" + return "list" + + +@dataclass +class MavenSystemPropeties(OptionDef[dict[str, str | None]]): + """This option represents the -D/--define option of a Maven CLI command. + + This option can be defined multiple times and the values are appended into a list of string in argparse. + However, it's stored internally as a dictionary mapping between the system property name to its value. + + For example: ``-Dmaven.skip.test=true -Drat.skip=true`` + will be stored as ``{"maven.skip.test": "true", "rat.skip": "true"}`` + + A short form for the option is required. + """ + + short_name: str + + def is_valid_patch_option(self, patch: Any) -> TypeGuard[dict[str, str | None]]: + """Return True if the provide patch value is compatible with the internal type of this option.""" + return is_dict_of_str_to_str_or_none(patch) + + def add_itself_to_arg_parser(self, arg_parse: argparse.ArgumentParser) -> None: + """Add a new argument to argparser.ArgumentParser representing this option.""" + arg_parse.add_argument( + *(self.short_name, self.long_name), + action="append", + ) + + def get_patch_type_str(self) -> str: + """Return the expected type for the patch value as string.""" + return "dict[str, str | None]" + + +@dataclass +class MavenGoalPhase(OptionDef[list[str]]): + """This option represents the positional goal/plugin-phase option in Maven CLI command. + + argparse.Namespace stores this as a list of string. This is stored internally as a list of string. + """ + + def is_valid_patch_option(self, patch: Any) -> TypeGuard[list[str]]: + """Return True if the provide patch value is compatible with the internal type of this option.""" + return is_list_of_strs(patch) + + def add_itself_to_arg_parser(self, arg_parse: argparse.ArgumentParser) -> None: + """Add a new argument to argparser.ArgumentParser representing this option.""" + # Doesn't require to allow cases like "mvn --help". + arg_parse.add_argument( + self.long_name, + nargs="*", + ) + + def get_patch_type_str(self) -> str: + """Return the expected type for the patch value as string.""" + return "list[str]" + + +# We intend to support Maven version 3.6.3 - 3.9 +MAVEN_OPTION_DEF: list[OptionDef] = [ + MavenOptionalFlag( + short_name="-am", + long_name="--also-make", + ), + MavenOptionalFlag( + short_name="-amd", + long_name="--also-make-dependents", + ), + MavenOptionalFlag( + short_name="-B", + long_name="--batch-mode", + ), + MavenOptionalFlag( + short_name="-C", + long_name="--strict-checksums", + ), + MavenOptionalFlag( + short_name="-c", + long_name="--lax-checksums", + ), + MavenOptionalFlag( + short_name="-cpu", + long_name="--check-plugin-updates", + ), + MavenOptionalFlag( + short_name="-e", + long_name="--errors", + ), + MavenOptionalFlag( + short_name="-fae", + long_name="--fail-at-end", + ), + MavenOptionalFlag( + short_name="-ff", + long_name="--fail-fast", + ), + MavenOptionalFlag( + short_name="-fn", + long_name="--fail-never", + ), + MavenOptionalFlag( + short_name="-h", + long_name="--help", + dest="help_", + ), + MavenOptionalFlag( + short_name="-llr", + long_name="--legacy-local-repository", + ), + MavenOptionalFlag( + short_name="-N", + long_name="--non-recursive", + ), + MavenOptionalFlag( + short_name="-nsu", + long_name="--no-snapshot-updates", + ), + MavenOptionalFlag( + short_name="-ntp", + long_name="--no-transfer-progress", + ), + MavenOptionalFlag( + short_name="-npu", + long_name="--no-plugin-updates", + ), + MavenOptionalFlag( + short_name="-npr", + long_name="--no-plugin-registry", + ), + MavenOptionalFlag( + short_name="-o", + long_name="--offline", + ), + MavenOptionalFlag( + short_name="-q", + long_name="--quiet", + ), + MavenOptionalFlag( + short_name="-U", + long_name="--update-snapshots", + ), + MavenOptionalFlag( + short_name="-up", + long_name="--update-plugins", + ), + MavenOptionalFlag( + short_name="-v", + long_name="--version", + ), + MavenOptionalFlag( + short_name="-V", + long_name="--show-version", + ), + MavenOptionalFlag( + short_name="-X", + long_name="--debug", + ), + MavenGoalPhase( + long_name="goals", + ), + # TODO: we need to confirm whether one can provide + # -P or -pl multiple times and the values will be aggregate into a list of string + # The current implementation only consider one instance of -P or -pl. + # Where to begin: + # https://github.com/apache/maven/blob/maven-3.9.x/maven-embedder/src/main/java/org/apache/maven/cli/CLIManager.java + # https://github.com/apache/commons-cli/blob/master/src/main/java/org/apache/commons/cli/Parser.java + MavenSingleValue( + short_name="-b", + long_name="--builder", + ), + MavenSystemPropeties( + short_name="-D", + long_name="--define", + ), + MavenSingleValue( + short_name="-emp", + long_name="--encrypt-master-password", + ), + MavenSingleValue( + short_name="-ep", + long_name="--encrypt-password", + ), + MavenSingleValue( + short_name="-f", + long_name="--file", + ), + MavenSingleValue( + short_name="-gs", + long_name="--global-settings", + ), + MavenSingleValue( + short_name="-gt", + long_name="--global-toolchains", + ), + MavenSingleValue( + short_name="-l", + long_name="--log-file", + ), + MavenCommaDelimList( + short_name="-P", + long_name="--activate-profiles", + ), + MavenCommaDelimList( + short_name="-pl", + long_name="--projects", + ), + MavenSingleValue( + short_name="-rf", + long_name="--resume-from", + ), + MavenSingleValue( + short_name="-s", + long_name="--settings", + ), + MavenSingleValue( + short_name="-t", + long_name="--toolchains", + ), + MavenSingleValue( + short_name="-T", + long_name="--threads", + ), +] + + +class MavenCLICommandParser: + """A Maven CLI Command Parser.""" + + ACCEPTABLE_EXECUTABLE = ["mvn", "mvnw"] + + def __init__(self) -> None: + """Initialize the instance.""" + self.arg_parser = argparse.ArgumentParser( + description="Parse Maven CLI command", + prog="mvn", + add_help=False, + # https://docs.python.org/3/library/argparse.html#exit-on-error + # Best effort of parsing the build command. Therefore, we don't want to exit on error. + exit_on_error=False, + ) + + # A mapping between the long name to its option definition. + self.option_defs: dict[str, OptionDef] = {} + + for opt_def in MAVEN_OPTION_DEF: + opt_def.add_itself_to_arg_parser(self.arg_parser) + + self.option_defs[opt_def.long_name] = opt_def + + self.build_tool = PatchCommandBuildTool.MAVEN + + def is_build_tool(self, executable_path: str) -> bool: + """Return True if ``executable_path`` ends the accepted executable for this build tool. + + Parameters + ---------- + executable_path: str + The executable component of a CLI command. + + Returns + ------- + bool + """ + return os.path.basename(executable_path) in MavenCLICommandParser.ACCEPTABLE_EXECUTABLE + + def validate_patch(self, patch: Mapping[str, MavenOptionPatchValueType | None]) -> bool: + """Return True if the patch conforms to the expected format.""" + for patch_name, patch_value in patch.items(): + opt_def = self.option_defs.get(patch_name) + if not opt_def: + logger.error("Cannot find any option that matches %s", patch_name) + return False + + if patch_value is None: + continue + + if not opt_def.is_valid_patch_option(patch_value): + logger.error( + "The patch value %s of %s is not in the correct type. Expect %s.", + patch_value, + patch_name, + opt_def.get_patch_type_str(), + ) + return False + + return True + + def parse(self, cmd_list: list[str]) -> "MavenCLICommand": + """Parse the Maven CLI Command. + + Parameters + ---------- + cmd_list: list[str] + The Maven CLI Command as list of strings. + + Returns + ------- + MavenCLICommand + The MavenCLICommand instance. + + Raises + ------ + MavenCLICommandParseError + If an error happens when parsing the Maven CLI Command. + """ + if not cmd_list: + raise CommandLineParseError("The provided cmd list is empty.") + + exe_path = cmd_list[0] + options = cmd_list[1:] + + if os.path.basename(exe_path) not in MavenCLICommandParser.ACCEPTABLE_EXECUTABLE: + raise CommandLineParseError(f"{exe_path} is not an acceptable mvn executable path.") + + # TODO: because our parser is not completed for all cases, should we be more relaxed and use + # parse_unknown_options? + try: + parsed_opts = self.arg_parser.parse_args(options) + except argparse.ArgumentError as error: + raise CommandLineParseError(f"Failed to parse command {' '.join(options)}.") from error + # Even though we have set `exit_on_error`, argparse still exists unexpectedly in some + # cases. This has been confirmed to be a bug in the argparse library implementation. + # https://github.com/python/cpython/issues/121018. + # This is fixed in Python3.12, but not Python3.11 + except SystemExit as sys_exit_err: + raise CommandLineParseError(f"Failed to parse the Maven CLI Options {' '.join(options)}.") from sys_exit_err + + # Handle cases where goal or plugin phase is not provided. + if not parsed_opts.goals: + # Allow cases such as: + # mvn --help + # mvn --version + # Note that we don't allow mvn -V or mvn --show-version as this command will + # failed for mvn + if not parsed_opts.help_ and not parsed_opts.version: + raise CommandLineParseError(f"No goal detected for {' '.join(options)}.") + + maven_cli_options = MavenCLIOptions.from_parsed_arg(parsed_opts) + + return MavenCLICommand( + executable=exe_path, + options=maven_cli_options, + ) + + def _patch_properties_mapping( + self, + original_props: dict[str, str], + option_long_name: str, + patch_value: MavenOptionPatchValueType, + ) -> dict[str, str]: + define_opt_def = self.option_defs.get(option_long_name) + if not define_opt_def or not isinstance(define_opt_def, MavenSystemPropeties): + raise PatchBuildCommandError(f"{option_long_name} from the patch is not a --define option.") + + if not define_opt_def.is_valid_patch_option(patch_value): + raise PatchBuildCommandError(f"Critical, incorrect runtime type for patch --define, value: {patch_value}.") + + return patch_mapping( + original=original_props, + patch=patch_value, + ) + + def apply_patch( + self, + cli_command: MavenCLICommand, + options_patch: Mapping[str, MavenOptionPatchValueType | None], + ) -> MavenCLICommand: + """Patch the options of a Gradle CLI command, while persisting the executable path. + + `options_patch` is a mapping with: + + - **Key**: the long name of a Maven CLI option as a string. For example: ``--define``, ``--settings``. + For patching goals or plugin phases, use the key `goals` with value being a list of string. + + - **Value**: The value to patch. The type of this value depends on the type of option you want to + patch. + + The types of patch values: + + - For optional flag (e.g ``-X/--debug``) it is boolean. True to set it and False to unset it. + + - For ``-D/--define`` ONLY, it will be a mapping between the system property name and its value. + + - For options that expects a comma delimited list of string (e.g. ``-P/--activate-profiles`` + and ``-pl/--projects``), a list of string is expected. + + - For other value option (e.g ``-s/--settings``), a string is expected. + + None can be provided to any type of option to remove it from the original build command. + + Parameters + ---------- + cli_command : MavenCLICommand + The original Maven command, as a ``MavenCLICommand`` object from ``MavenCLICommand.parse(...)`` + patch_options : Mapping[str, MavenOptionPatchValueType | None] + The patch values. + + Returns + ------- + MavenCLICommand + The patched command as a new ``MavenCLICommand`` object. + + Raises + ------ + PatchBuildCommandError + If an error happens during the patching process. + """ + return MavenCLICommand( + executable=cli_command.executable, + options=self.apply_option_patch( + cli_command.options, + patch=options_patch, + ), + ) + + def apply_option_patch( + self, + maven_cli_options: MavenCLIOptions, + patch: Mapping[str, MavenOptionPatchValueType | None], + ) -> MavenCLIOptions: + """Patch the Maven CLI Options and return a new copy. + + Parameters + ---------- + maven_cli_options: MavenCLIOptions + The Maven CLI Options to patch. + patch: Mapping[str, PatchValueType | None] + A mapping between the name of the attribute in MavenCLIOptions and its patch value. + The value can be None to disable an option. + + Returns + ------- + MavenCLIOptions + The new patched maven cli options. + + Raises + ------ + PatchBuildCommandError + If an error happens during the patching process. + """ + if not self.validate_patch(patch): + raise PatchBuildCommandError("The patch is invalid.") + + # Copy the Maven CLI Options for patching + new_maven_cli_options = deepcopy(maven_cli_options) + + for option_long_name, patch_value in patch.items(): + if option_long_name == "--help": + attr_name = "_help" + else: + # Get the attribute name of MavenCLIOption object. + # They all follow the same rule of removing the prefix -- + # from option long name and replace all "-" with "_" + attr_name = option_long_name.removeprefix("--").replace("-", "_") + + # Ensure that setting any option to None in the patch + # will remove it from the build command. + if patch_value is None: + setattr(new_maven_cli_options, attr_name, patch_value) + continue + + # Only for "-D/--define" we patch it differently. + if option_long_name == "--define": + new_maven_cli_options.define = self._patch_properties_mapping( + original_props=new_maven_cli_options.define or {}, + option_long_name=option_long_name, + patch_value=patch_value, + ) + continue + + setattr(new_maven_cli_options, attr_name, patch_value) + + return new_maven_cli_options diff --git a/src/macaron/build_spec_generator/cli_command_parser/unparsed_cli_command.py b/src/macaron/build_spec_generator/cli_command_parser/unparsed_cli_command.py new file mode 100644 index 000000000..805c5c418 --- /dev/null +++ b/src/macaron/build_spec_generator/cli_command_parser/unparsed_cli_command.py @@ -0,0 +1,20 @@ +# Copyright (c) 2025 - 2025, Oracle and/or its affiliates. All rights reserved. +# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. + +"""This module contains the class definition for a CLICommand that we don't support parsing for it.""" + +from macaron.build_spec_generator.cli_command_parser import dataclass + + +@dataclass +class UnparsedCLICommand: + """This class represents a CLICommand that we don't support parsing. + + Therefore, it only stores the original command as is. + """ + + original_cmds: list[str] + + def to_cmds(self) -> list[str]: + """Return the CLI Command as a list of strings.""" + return self.original_cmds diff --git a/tests/build_spec_generator/cli_command_parser/test_base_cli_options.py b/tests/build_spec_generator/cli_command_parser/test_base_cli_options.py new file mode 100644 index 000000000..c2a8824bf --- /dev/null +++ b/tests/build_spec_generator/cli_command_parser/test_base_cli_options.py @@ -0,0 +1,129 @@ +# Copyright (c) 2025 - 2025, Oracle and/or its affiliates. All rights reserved. +# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. + +"""This module contains the tests for the base_cli_options.py module.""" + +from collections.abc import Mapping +from typing import Any + +import pytest + +from macaron.build_spec_generator.cli_command_parser import ( + is_dict_of_str_to_str_or_none, + is_list_of_strs, + patch_mapping, +) + + +@pytest.mark.parametrize( + ("value", "expected"), + [ + pytest.param( + {"A": "B"}, + True, + ), + pytest.param( + {"A": None, "B": "C"}, + True, + ), + pytest.param( + {"A": "B", "C": "D"}, + True, + ), + pytest.param( + True, + False, + ), + pytest.param( + ["A", "B"], + False, + ), + pytest.param( + {"A": "B", "C": 1, "D": {}}, + False, + ), + pytest.param( + {1: "B"}, + False, + ), + ], +) +def test_is_dict_of_str_to_str_or_none(value: Any, expected: bool) -> None: + """Test the is_dict_of_str_to_str_or_none type guard.""" + assert is_dict_of_str_to_str_or_none(value) == expected + + +@pytest.mark.parametrize( + ("value", "expected"), + [ + pytest.param( + ["str1", "str2"], + True, + ), + pytest.param( + [], + True, + ), + pytest.param( + {"A": "B"}, + False, + ), + pytest.param( + "str", + False, + ), + pytest.param( + True, + False, + ), + ], +) +def test_is_list_of_strs(value: Any, expected: bool) -> None: + """Test the is_list_of_strs function.""" + assert is_list_of_strs(value) == expected + + +@pytest.mark.parametrize( + ("original", "patch", "expected"), + [ + pytest.param( + {}, + {}, + {}, + ), + pytest.param( + {"boo": "foo", "bar": "far"}, + {}, + {"boo": "foo", "bar": "far"}, + ), + pytest.param( + {}, + {"boo": "foo", "bar": "far"}, + {"boo": "foo", "bar": "far"}, + ), + pytest.param( + {"boo": "foo", "bar": "far"}, + {"boo": "another_foo"}, + {"boo": "another_foo", "bar": "far"}, + ), + pytest.param( + {"boo": "foo", "bar": "far"}, + {"boo": "another_foo", "bar": None}, + {"boo": "another_foo"}, + id="Use None to remove a system property", + ), + ], +) +def test_patch_mapping( + original: Mapping[str, str], + patch: Mapping[str, str | None], + expected: Mapping[str, str], +) -> None: + """Test the patch mapping function.""" + assert ( + patch_mapping( + original=original, + patch=patch, + ) + == expected + ) diff --git a/tests/build_spec_generator/cli_command_parser/test_gradle_cli_command.py b/tests/build_spec_generator/cli_command_parser/test_gradle_cli_command.py new file mode 100644 index 000000000..80cd3a643 --- /dev/null +++ b/tests/build_spec_generator/cli_command_parser/test_gradle_cli_command.py @@ -0,0 +1,156 @@ +# Copyright (c) 2025 - 2025, Oracle and/or its affiliates. All rights reserved. +# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. + +"""This module contains tests for the gradle_cli_command module.""" + + +from typing import Any + +import pytest + +from macaron.build_spec_generator.cli_command_parser.gradle_cli_command import GradleCLIOptions +from macaron.build_spec_generator.cli_command_parser.gradle_cli_parser import GradleCLICommandParser + + +@pytest.mark.parametrize( + ("this", "that"), + [ + pytest.param( + "gradle", + "gradle", + id="test_equal_only_executable", + ), + pytest.param( + "gradlew -S clean build -x test", + "gradlew clean build -S -x test", + id="test_different_order_of_options", + ), + pytest.param( + "gradlew clean build -Pgnupg.skip -Pskip.signing", + "gradlew clean build -Pskip.signing -Pgnupg.skip ", + id="test_properties_equal_checking", + ), + pytest.param( + "gradlew clean build -Dorg.gradle.caching=true -PmyProperty=boo", + "gradlew clean build -Dorg.gradle.caching=true -PmyProperty=boo", + id="test_properties_with_values_equal_checking", + ), + pytest.param( + "gradlew clean build -x test -x boo", + "gradlew clean build -x test -x boo", + id="test_excluded_tasks", + ), + ], +) +def test_comparing_gradle_cli_command_equal( + gradle_cli_parser: GradleCLICommandParser, + this: str, + that: str, +) -> None: + """Test comparing two equal GradleCLICommand objects.""" + this_command = gradle_cli_parser.parse(this.split()) + that_command = gradle_cli_parser.parse(that.split()) + assert this_command == that_command + + +@pytest.mark.parametrize( + ("this", "that"), + [ + ("gradle clean build", "gradle clean"), + ("gradle", "gradlew"), + ("gradle clean build", "gradle clean build -PmyProperty=true"), + ("gradle clean build -Dorg.gradle.caching=true", "gradle clean build -Dorg.gradle.caching=false"), + ("gradle clean build -Dorg.gradle.caching=true", "gradle clean build -Dorg.gradle.caching"), + ("gradle clean build", "gradle clean build -c settings.gradle"), + ("gradle build", "gradle build -x test"), + # We persist the order which the task names are put into the excluded list. + # Therefore the order of the -x options is important. + ("gradle build -x test -x boo", "gradle build -x boo -x test"), + ("gradle build --no-build-cache", "gradle build --build-cache"), + ], +) +def test_comparing_gradle_cli_command_unequal( + gradle_cli_parser: GradleCLICommandParser, + this: str, + that: str, +) -> None: + """Test comparing two unequal GradleCLICommand objects.""" + this_command = gradle_cli_parser.parse(this.split()) + that_command = gradle_cli_parser.parse(that.split()) + assert not this_command == that_command + + +@pytest.mark.parametrize( + ("command", "that"), + [ + ( + "gradle clean build -x test --debug --stacktrace -Dorg.gradle.caching=true", + True, + ), + ( + "gradle clean build -x test --debug --stacktrace -Dorg.gradle.caching=true", + ["boo", "foo"], + ), + ( + "gradle clean build -x test --debug --stacktrace -Dorg.gradle.caching=true", + {"boo", "foo"}, + ), + ], +) +def test_comparing_gradle_cli_command_unequal_types( + gradle_cli_parser: GradleCLICommandParser, + command: str, + that: Any, +) -> None: + """Test comparing MavenCLICommand with another incompatible type oject.""" + this_command = gradle_cli_parser.parse(command.split()) + assert not this_command == that + + +@pytest.mark.parametrize( + ("command"), + [ + "gradle clean build -x test --debug --stacktrace -Dorg.gradle.caching=true", + "gradle", + "gradle --version", + "gradle -?", + "gradlew --build-cache --continue --no-scan", + "gradlew --build-cache --no-build-cache", + ], +) +def test_to_cmd_goals(gradle_cli_parser: GradleCLICommandParser, command: str) -> None: + """Test the to_cmd_goals method by print out the cmds and the parse it again.""" + gradle_cli_command = gradle_cli_parser.parse(command.split()) + + print_command_with_tasks = [gradle_cli_command.executable] + print_command_with_tasks.extend(gradle_cli_command.options.to_option_cmds()) + + gradle_cli_command_second = gradle_cli_parser.parse(print_command_with_tasks) + assert gradle_cli_command == gradle_cli_command_second + + +@pytest.mark.parametrize( + ("properties", "expected"), + [ + pytest.param( + ["org.gradle.caching.debug=false", "boo=foo"], + {"org.gradle.caching.debug": "false", "boo": "foo"}, + ), + pytest.param( + ["org.gradle.caching.debug=false", "org.gradle.caching.debug=true"], + {"org.gradle.caching.debug": "true"}, + id="test_overriding_behavior_from_input", + ), + pytest.param( + ["org.gradle.caching.debug=false", "boo"], + {"org.gradle.caching.debug": "false", "boo": ""}, + id="test_property_default_value", + ), + ], +) +def test_gradle_cli_option_parse_properties( + properties: list[str], + expected: dict[str, str], +) -> None: + """Test the GradleCLIOptions.parse_properties method.""" + assert GradleCLIOptions.parse_properties(properties) == expected diff --git a/tests/build_spec_generator/cli_command_parser/test_gradle_cli_parser.py b/tests/build_spec_generator/cli_command_parser/test_gradle_cli_parser.py new file mode 100644 index 000000000..094b74a55 --- /dev/null +++ b/tests/build_spec_generator/cli_command_parser/test_gradle_cli_parser.py @@ -0,0 +1,165 @@ +# Copyright (c) 2025 - 2025, Oracle and/or its affiliates. All rights reserved. +# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. + +"""This module contains the tests for the Gradle CLI Parser.""" + +import pytest + +from macaron.build_spec_generator.cli_command_parser.gradle_cli_parser import GradleCLICommandParser +from macaron.errors import CommandLineParseError + + +@pytest.mark.parametrize( + ("command", "expected"), + [ + # Gradle doesn't raise error when you run it like this. + # This is because when you provide no option, it still runs a task called ":help" to + # print out the usage message. + pytest.param( + "gradle", + {"tasks": []}, + id="can_run_gradle_without_any_option", + ), + pytest.param( + "gradle -?", + {"tasks": [], "help_": True}, + id="gradle_print_help_-?", + ), + pytest.param( + "gradle --help", + {"tasks": [], "help_": True}, + id="gradle_print_help_--help", + ), + pytest.param( + "gradle -h", + {"tasks": [], "help_": True}, + id="gradle_print_help_-h", + ), + pytest.param( + "gradle --version", + {"tasks": [], "version": True}, + id="gradle_print_version_long", + ), + pytest.param( + "gradle -v", + {"tasks": [], "version": True}, + id="gradle_print_version_short", + ), + pytest.param( + "gradle clean build", + {"tasks": ["clean", "build"]}, + id="gradle_tasks", + ), + pytest.param( + "gradlew clean build", + {"tasks": ["clean", "build"]}, + id="gradle_wrapper_tasks", + ), + pytest.param( + "gradle clean build --continue", + {"tasks": ["clean", "build"], "continue_": True}, + id="test_continue_flag_with_exception_in_attribute_name", + ), + # TODO: validate if the order of the options decide the final value of + # the negateable option. + # For example: `--build-cache --no-build-cache` is different from `--no-build-cache --build-cache` + pytest.param( + "gradle clean build --build-cache --no-build-cache", + {"tasks": ["clean", "build"], "build_cache": False}, + id="both_normal_and_negated_form_can_be_provided_final_false", + ), + pytest.param( + "gradle clean build --no-build-cache --build-cache", + {"tasks": ["clean", "build"], "build_cache": True}, + id="both_normal_and_negated_form_can_be_provided_final_true", + ), + # This doesn't well represent a real gradle CLI command. + # It's just for the purpose of unit testing. + pytest.param( + "gradle clean build --continue --debug --rerun-tasks -s --console plain --build-cache", + { + "tasks": ["clean", "build"], + "continue_": True, + "debug": True, + "rerun_tasks": True, + "stacktrace": True, + "console": "plain", + "build_cache": True, + }, + id="combination_of_option_types", + ), + ], +) +def test_gradle_cli_command_parser_valid_input( + gradle_cli_parser: GradleCLICommandParser, + command: str, + expected: dict[str, str | None | bool | list[str]], +) -> None: + """Test the gradle cli parser on valid input.""" + parsed_res = gradle_cli_parser.parse(command.split()) + + all_attrs = vars(parsed_res.options).keys() + + for attribute in all_attrs: + if attribute in expected: + assert getattr(parsed_res.options, attribute) == expected[attribute] + else: + # Making sure that we are not enabling flags that are not part of the + # build command. + # We don't compare it to None because some options if not set, argparse + # will assign a different Falsy value depending on the option type. + assert not getattr(parsed_res.options, attribute) + + +@pytest.mark.parametrize( + ("build_command", "expected"), + [ + pytest.param( + "gradle clean build --debug --stacktrace", + "gradle", + ), + pytest.param( + "./gradlew clean build --debug --stacktrace", + "./gradlew", + ), + pytest.param( + "./boo/gradlew clean build --debug --stacktrace", + "./boo/gradlew", + ), + ], +) +def test_gradle_cli_command_parser_executable( + gradle_cli_parser: GradleCLICommandParser, + build_command: str, + expected: str, +) -> None: + """Test the Gradle CLI command parser correctly persisting the executable string.""" + parse_res = gradle_cli_parser.parse(build_command.split()) + assert parse_res.executable == expected + + +@pytest.mark.parametrize( + ("build_command"), + [ + pytest.param("", id="An empty command"), + pytest.param( + "gradle --this-argument-should-never-exist-in-gradle", + id="unrecognized_optional_argument", + ), + pytest.param( + "gradle --this-argument-should-never-exist-in-gradle some-value", + id="unrecognized_value_option", + ), + pytest.param( + "./graaadddllewww clean build", + id="unrecognized_executable_path", + ), + ], +) +def test_gradle_cli_command_parser_invalid_input( + gradle_cli_parser: GradleCLICommandParser, + build_command: str, +) -> None: + """Test the Gradle CLI command parser on invalid input.""" + with pytest.raises(CommandLineParseError): + gradle_cli_parser.parse(build_command.split()) diff --git a/tests/build_spec_generator/cli_command_parser/test_maven_cli_command.py b/tests/build_spec_generator/cli_command_parser/test_maven_cli_command.py new file mode 100644 index 000000000..d0e681e57 --- /dev/null +++ b/tests/build_spec_generator/cli_command_parser/test_maven_cli_command.py @@ -0,0 +1,142 @@ +# Copyright (c) 2025 - 2025, Oracle and/or its affiliates. All rights reserved. +# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. + +"""This module contains tests for the maven_cli_command module.""" + +from typing import Any + +import pytest + +from macaron.build_spec_generator.cli_command_parser.maven_cli_command import MavenCLIOptions +from macaron.build_spec_generator.cli_command_parser.maven_cli_parser import MavenCLICommandParser + + +@pytest.mark.parametrize( + ("this", "that"), + [ + pytest.param( + "mvn clean package", + "mvn clean package", + id="totally_equal", + ), + pytest.param( + "mvn -X clean package -P project1,project2", + "mvn clean package -X -P project1,project2", + id="test_different_order_of_options", + ), + pytest.param( + "mvn clean package -Dmaven.skip.test=true", + "mvn clean package -Dmaven.skip.test", + id="test_default_value_for_system_property", + ), + ], +) +def test_comparing_maven_cli_command_equal( + maven_cli_parser: MavenCLICommandParser, + this: str, + that: str, +) -> None: + """Test comparing two equal MavenCLICommand objects.""" + this_command = maven_cli_parser.parse(this.split()) + that_command = maven_cli_parser.parse(that.split()) + assert this_command == that_command + + +@pytest.mark.parametrize( + ("this", "that"), + [ + ("mvn clean package", "mvn install"), + ("mvn clean package", "mvn clean package -X"), + ("mvn clean package", "mvn clean package -P project1,project2"), + ("mvn clean package", "mvn clean package -Dmaven.skip.test=true"), + ("mvn clean package", "mvn clean package --settings ./pom.xml"), + ("mvn clean package", "mvn package clean"), + ("mvn clean package", "mvnw clean package"), + ], +) +def test_comparing_maven_cli_command_unequal( + maven_cli_parser: MavenCLICommandParser, + this: str, + that: str, +) -> None: + """Test comparing two unequal MavenCLICommand objects.""" + this_command = maven_cli_parser.parse(this.split()) + that_command = maven_cli_parser.parse(that.split()) + assert not this_command == that_command + + +@pytest.mark.parametrize( + ("command", "that"), + [ + ( + "mvn clean package -P profile1,profile2 -T 2C -ntp -Dmaven.skip.test=true -Dboo=foo", + True, + ), + ( + "mvn clean package -P profile1,profile2 -T 2C -ntp -Dmaven.skip.test=true -Dboo=foo", + ["boo", "foo"], + ), + ( + "mvn clean package -P profile1,profile2 -T 2C -ntp -Dmaven.skip.test=true -Dboo=foo", + {"boo", "foo"}, + ), + ], +) +def test_comparing_maven_cli_command_unequal_types( + maven_cli_parser: MavenCLICommandParser, + command: str, + that: Any, +) -> None: + """Test comparing MavenCLICommand with another incompatible type oject.""" + this_command = maven_cli_parser.parse(command.split()) + assert not this_command == that + + +@pytest.mark.parametrize( + ("command"), + [ + "mvn clean package", + "mvn clean package -P profile1,profile2 -T 2C -ntp -Dmaven.skip.test=true -Dboo=foo", + "mvn -f fit/core-reference/pom.xml verify -Dit.test=RESTITCase -Dinvoker.streamLogs=true" + + " -Dmodernizer.skip=true -Drat.skip=true -Dcheckstyle.skip=true -Djacoco.skip=true", + "mvn -s ../.github/maven-settings.xml install -Pexamples,noRun", + "mvn clean package -Dmaven.test.skip", + ], +) +def test_to_cmd_goals(maven_cli_parser: MavenCLICommandParser, command: str) -> None: + """Test the to_cmd_goals method by print out the cmds and the parse it again.""" + maven_cli_command = maven_cli_parser.parse(command.split()) + + print_command_with_goals = [maven_cli_command.executable] + print_command_with_goals.extend(maven_cli_command.options.to_option_cmds()) + + maven_cli_command_second = maven_cli_parser.parse(print_command_with_goals) + assert maven_cli_command == maven_cli_command_second + + +@pytest.mark.parametrize( + ("properties", "expected"), + [ + pytest.param( + ["maven.skip.true=true", "boo=foo"], + {"maven.skip.true": "true", "boo": "foo"}, + ), + pytest.param( + ["maven.skip.true=true", "maven.skip.true=false", "maven.skip.true=true"], + {"maven.skip.true": "true"}, + id="test_overriding_behavior_from_input", + ), + pytest.param( + # For example one can specify mvn clean package -Dmaven.skip.true=true -Dboo + ["maven.skip.true=true", "boo"], + {"maven.skip.true": "true", "boo": "true"}, + id="test_system_property_default_value", + ), + ], +) +def test_maven_cli_option_parse_system_properties( + properties: list[str], + expected: dict[str, str], +) -> None: + """Test the MavenCLIOptions.parse_system_properties method.""" + assert MavenCLIOptions.parse_system_properties(properties) == expected diff --git a/tests/build_spec_generator/cli_command_parser/test_maven_cli_parser.py b/tests/build_spec_generator/cli_command_parser/test_maven_cli_parser.py new file mode 100644 index 000000000..d219e1af2 --- /dev/null +++ b/tests/build_spec_generator/cli_command_parser/test_maven_cli_parser.py @@ -0,0 +1,197 @@ +# Copyright (c) 2025 - 2025, Oracle and/or its affiliates. All rights reserved. +# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. + +"""This module contains the tests for maven cli parser.""" + + +import pytest + +from macaron.build_spec_generator.cli_command_parser.maven_cli_parser import ( + CommandLineParseError, + MavenCLICommandParser, +) + + +@pytest.mark.parametrize( + ("command", "expected"), + [ + pytest.param( + "mvn clean package", + {"goals": ["clean", "package"]}, + id="goal_only_no_option", + ), + # https://maven.apache.org/guides/introduction/introduction-to-the-lifecycle.html#Build_Lifecycle_Basics + pytest.param( + "mvn clean dependency:copy-dependencies package", + {"goals": ["clean", "dependency:copy-dependencies", "package"]}, + id="goal_and_phase_mix", + ), + pytest.param( + "mvn clean package -P profile1,profile2 -T 2C -ntp -Dmaven.skip.test=true -Dboo=foo", + { + "goals": ["clean", "package"], + # "-P" + "activate_profiles": ["profile1", "profile2"], + # "-T" + "threads": "2C", + # "-ntp" + "no_transfer_progress": True, + # "-D=" + "define": {"maven.skip.test": "true", "boo": "foo"}, + }, + id="test_combination_options", + ), + pytest.param( + "mvn clean package -Dmaven.skip.test=true -Dmaven.skip.test=false", + { + "goals": ["clean", "package"], + "define": {"maven.skip.test": "false"}, + }, + id="multiple_definition_of_the_same_property_override_each_other", + ), + pytest.param( + "mvn clean package -Dmaven.skip.test", + { + "goals": ["clean", "package"], + "define": {"maven.skip.test": "true"}, + }, + id="test_default_value_if_no_value_is_provided_for_a_property", + ), + # A modified version of + # https://github.com/apache/syncope/blob/9437c6c978ca8c03b5e5cccc40a5a352be1ecc52/.github/workflows/crosschecks.yml#L70 + pytest.param( + "mvn -f fit/core-reference/pom.xml verify -Dit.test=RESTITCase -Dinvoker.streamLogs=true " + "-Dmodernizer.skip=true -Drat.skip=true -Dcheckstyle.skip=true -Djacoco.skip=true", + { + "file": "fit/core-reference/pom.xml", + "goals": ["verify"], + "define": { + "it.test": "RESTITCase", + "invoker.streamLogs": "true", + "modernizer.skip": "true", + "rat.skip": "true", + "checkstyle.skip": "true", + "jacoco.skip": "true", + }, + }, + id="pkg:maven/org.apache.syncope.common.keymaster.self/syncope-common-keymaster-client-self@3.0.0", + ), + # https://github.com/apache/activemq-artemis/blob/2.27.1/.github/workflows/build.yml + pytest.param( + "mvn -s ../.github/maven-settings.xml install -Pexamples,noRun", + { + "settings": "../.github/maven-settings.xml", + "goals": ["install"], + "activate_profiles": ["examples", "noRun"], + }, + id="pkg:maven/org.apache.activemq/artemis-log-annotation-processor@2.27.1", + ), + pytest.param( + "mvn --help", + { + "goals": [], + "help_": True, + }, + id="allow_no_goal_for_help", + ), + pytest.param( + "mvn --version", + { + "goals": [], + "help_": False, + "version": True, + }, + id="allow_no_goal_for_version", + ), + pytest.param( + "mvn --help --version", + { + "goals": [], + "help_": True, + "version": True, + }, + id="allow_no_goal_for_version_and_help", + ), + ], +) +def test_maven_cli_command_parser_valid_input( + maven_cli_parser: MavenCLICommandParser, + command: str, + expected: dict[str, str | None | bool | list[str]], +) -> None: + """Test the maven cli parser on valid input.""" + parsed_res = maven_cli_parser.parse(command.split()) + + all_attrs = vars(parsed_res.options).keys() + + for attribute in all_attrs: + if attribute in expected: + assert getattr(parsed_res.options, attribute) == expected[attribute] + else: + # Making sure that we are not enabling flags that are not part of the + # build command. + # We don't compare it to None because some options if not set, argparse + # will assign a different Falsy value depending on the option type. + # For example + # - If `--help` is not provide, its value will be False + # - If `--settings` is not provided, its value will be None. + assert not getattr(parsed_res.options, attribute) + + +@pytest.mark.parametrize( + ("build_command", "expected"), + [ + pytest.param( + "mvn clean package -X -ntp", + "mvn", + ), + pytest.param( + "mvnw clean package -X -ntp", + "mvnw", + ), + pytest.param( + "./boo/mvnw clean package -X -ntp", + "./boo/mvnw", + ), + ], +) +def test_maven_cli_command_parser_executable( + maven_cli_parser: MavenCLICommandParser, + build_command: str, + expected: str, +) -> None: + """Test the Maven CLI command correctly persisting the executable string.""" + parse_res = maven_cli_parser.parse(build_command.split()) + assert parse_res.executable == expected + + +@pytest.mark.parametrize( + ("build_command"), + [ + pytest.param("", id="An empty command"), + pytest.param("mvn", id="No goal or phase"), + pytest.param( + "mvn --this-argument-should-never-exist-in-mvn", + id="unrecognized_optional_argument", + ), + pytest.param( + "mvn --this-argument-should-never-exist-in-mvn some-value", + id="unrecognized_value_option", + ), + pytest.param( + "mmmvvvnnn clean package", + id="unrecognized_executable_path", + ), + pytest.param( + "mvn --show-version", + id="show_version_with_no_goal", + ), + ], +) +def test_maven_cli_command_parser_invalid_input( + maven_cli_parser: MavenCLICommandParser, + build_command: str, +) -> None: + """Test the Maven CLI command parser on invalid input.""" + with pytest.raises(CommandLineParseError): + maven_cli_parser.parse(build_command.split()) diff --git a/tests/conftest.py b/tests/conftest.py index 7e97461d0..d4ed2ab1b 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -11,6 +11,8 @@ from pytest_httpserver import HTTPServer import macaron +from macaron.build_spec_generator.cli_command_parser.gradle_cli_parser import GradleCLICommandParser +from macaron.build_spec_generator.cli_command_parser.maven_cli_parser import MavenCLICommandParser from macaron.code_analyzer.call_graph import BaseNode, CallGraph from macaron.config.defaults import create_defaults, defaults, load_defaults from macaron.database.table_definitions import Analysis, Component, RepoFinderMetadata, Repository @@ -491,3 +493,15 @@ def deps_dev_service_mock_(httpserver: HTTPServer, tmp_path: Path) -> dict: "base_scheme": base_url_parsed.scheme, "base_netloc": base_url_parsed.netloc, } + + +@pytest.fixture(scope="module") +def maven_cli_parser() -> MavenCLICommandParser: + """Return a MvnCLICommandParser instance with a module scope.""" + return MavenCLICommandParser() + + +@pytest.fixture(scope="module") +def gradle_cli_parser() -> GradleCLICommandParser: + """Return a GradleCLICommandParser instance with a module scope.""" + return GradleCLICommandParser() From 4aa3743c3d516e14a4d8f2742482edd014356426 Mon Sep 17 00:00:00 2001 From: Trong Nhan Mai Date: Wed, 9 Jul 2025 22:13:47 +1000 Subject: [PATCH 03/27] feat: add jdk version finder from maven central java artifacts Signed-off-by: Trong Nhan Mai --- .../build_spec_generator/jdk_finder.py | 340 ++++++++++++++++++ .../test_jdk_version_finder.py | 102 ++++++ 2 files changed, 442 insertions(+) create mode 100644 src/macaron/build_spec_generator/jdk_finder.py create mode 100644 tests/build_spec_generator/test_jdk_version_finder.py diff --git a/src/macaron/build_spec_generator/jdk_finder.py b/src/macaron/build_spec_generator/jdk_finder.py new file mode 100644 index 000000000..dc9ef8cd4 --- /dev/null +++ b/src/macaron/build_spec_generator/jdk_finder.py @@ -0,0 +1,340 @@ +# Copyright (c) 2025 - 2025, Oracle and/or its affiliates. All rights reserved. +# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. + +"""This module includes the functions for obtaining JDK version from a Java artifact.""" + +import logging +import os +import tempfile +import urllib.parse +import zipfile +from enum import Enum + +import requests + +from macaron.artifact.maven import construct_maven_repository_path +from macaron.config.global_config import global_config +from macaron.errors import InvalidHTTPResponseError + +logger: logging.Logger = logging.getLogger(__name__) + + +class JavaArtifactExt(str, Enum): + """The extensions for Java artifacts.""" + + JAR = ".jar" + + +def download_file(url: str, dest: str) -> None: + """Stream a file into a local destination. + + Parameters + ---------- + url: str + The URL of the file to stream from. + dest: str + The path to the destination file in the local file system. This path + includes the file name. + + Raises + ------ + InvalidHTTPResponseError + If an error happens while streaming the file. + OSError + If the parent directory of ``dest`` doesn't exist. + """ + response = requests.get(url=url, stream=True, timeout=40) + + if response.status_code != 200: + raise InvalidHTTPResponseError(f"Cannot download java artifact file from {url}") + + with open(dest, "wb") as fd: + try: + for chunk in response.iter_content(chunk_size=128, decode_unicode=False): + fd.write(chunk) + except requests.RequestException as error: + response.close() + raise InvalidHTTPResponseError(f"Error while streaming java artifact file from {url}") from error + + +def join_remote_maven_repo_url( + remote_maven_url: str, + maven_repo_path: str, +) -> str: + """Join the base remote maven URL with a maven repository path. + + Parameters + ---------- + remote_maven_url: str + The url to a remove maven layout repository. + For example: https://repo1.maven.org/maven2 + maven_repo_path: str + The maven repository path for a GAV coordinate or an artifact + from the root of the remote maven layout repository. + + Returns + ------- + str + The joined path. + + Examples + -------- + >>> remote_maven_repo = "https://repo1.maven.org/maven2" + >>> artifact_path = "io/liftwizard/liftwizard-checkstyle/2.1.22/liftwizard-checkstyle-2.1.22.jar" + >>> join_remote_maven_repo_url(remote_maven_repo, artifact_path) + 'https://repo1.maven.org/maven2/io/liftwizard/liftwizard-checkstyle/2.1.22/liftwizard-checkstyle-2.1.22.jar' + >>> join_remote_maven_repo_url(remote_maven_repo, "io/liftwizard/liftwizard-checkstyle/2.1.22/") + 'https://repo1.maven.org/maven2/io/liftwizard/liftwizard-checkstyle/2.1.22/' + >>> join_remote_maven_repo_url(f"{remote_maven_repo}/", artifact_path) + 'https://repo1.maven.org/maven2/io/liftwizard/liftwizard-checkstyle/2.1.22/liftwizard-checkstyle-2.1.22.jar' + """ + url_parse_result = urllib.parse.urlparse(remote_maven_url) + new_path_component = os.path.join( + url_parse_result.path, + maven_repo_path, + ) + return urllib.parse.urlunparse( + urllib.parse.ParseResult( + scheme=url_parse_result.scheme, + netloc=url_parse_result.netloc, + path=new_path_component, + params="", + query="", + fragment="", + ) + ) + + +def get_jdk_version_from_jar(artifact_path: str) -> str | None: + """Return the JDK version obtained from a Java artifact. + + Parameters + ---------- + artifact_path: str + The path to the artifact to extract the jdk version. + + Returns + ------- + str | None + The version string extract from the artifact (as is) or None + if there is an error, or if we couldn't find any jdk version. + """ + with zipfile.ZipFile(artifact_path, "r") as jar: + manifest_path = "META-INF/MANIFEST.MF" + with jar.open(manifest_path) as manifest_file: + manifest_content = manifest_file.read().decode("utf-8") + for line in manifest_content.splitlines(): + if "Build-Jdk" in line or "Build-Jdk-Spec" in line: + _, _, version = line.rpartition(":") + logger.debug( + "Found JDK version %s from java artifact at %s", + version.strip(), + artifact_path, + ) + return version.strip() + + logger.debug("Cannot find any JDK version from java artifact at %s", artifact_path) + return None + + +def find_jdk_version_from_remote_maven_repo_standalone( + group_id: str, + artifact_id: str, + version: str, + asset_name: str, + remote_maven_repo_url: str, +) -> str | None: + """Return the jdk version string from an artifact matching a given GAV from a remote maven layout repository. + + This function doesn't cache the downloaded artifact, and remove it after the function exits. + We assume that the remote maven layout repository supports downloading a file through a HTTPS URL. + + Parameters + ---------- + group_id: str + The group ID part of the GAV coordinate. + artifact_id: str + The artifact ID part of the GAV coordinate. + version: str + The version part of the GAV coordinate. + asset_name: str + The name of artifact to download and extract the jdk version. + ext: JavaArtifactExt + The extension of the main artifact file. + remote_maven_repo_url: str + The URL to the remote maven layout repository. + + Returns + ------- + str | None + The version string extract from the artifact (as is) or None + ff there is an error, or if we couldn't find any jdk version. + """ + maven_repository_path = construct_maven_repository_path( + group_id=group_id, + artifact_id=artifact_id, + version=version, + asset_name=asset_name, + ) + + artifact_url = join_remote_maven_repo_url( + remote_maven_repo_url, + maven_repository_path, + ) + logger.debug( + "Find JDK version from jar at %s, using temporary file.", + artifact_url, + ) + with tempfile.TemporaryDirectory() as temp_dir_name: + local_artifact_path = os.path.join(temp_dir_name, asset_name) + try: + download_file( + artifact_url, + local_artifact_path, + ) + except InvalidHTTPResponseError as error: + logger.error("Failed why trying to download jar file. Error: %s", error) + return None + except OSError as os_error: + logger.critical("Critical %s", os_error) + return None + + return get_jdk_version_from_jar(local_artifact_path) + + +def find_jdk_version_from_remote_maven_repo_cache( + group_id: str, + artifact_id: str, + version: str, + asset_name: str, + remote_maven_repo_url: str, + local_cache_repo: str, +) -> str | None: + """Return the jdk version string from an artifact matching a given GAV from a remote maven layout repository. + + This function cache the downloaded artifact in a maven layout https://maven.apache.org/repository/layout.html + undert ``local_cache_repo``. + We assume that the remote maven layout repository supports downloading a file through a HTTPS URL. + + Parameters + ---------- + group_id: str + The group ID part of the GAV coordinate. + artifact_id: str + The artifact ID part of the GAV coordinate. + version: str + The version part of the GAV coordinate. + asset_name: str + The name of artifact to download and extract the jdk version. + remote_maven_repo_url: str + The URL to the remote maven layout repository. + local_cache_repo: str + The path to a local directory for caching the downloaded artifact used in JDK version + extraction. + + Returns + ------- + str | None + The version string extract from the artifact (as is) or None + ff there is an error, or if we couldn't find any jdk version. + """ + maven_repository_path = construct_maven_repository_path( + group_id=group_id, + artifact_id=artifact_id, + version=version, + asset_name=asset_name, + ) + + local_artifact_path = os.path.join( + local_cache_repo, + maven_repository_path, + ) + if os.path.isfile(local_artifact_path): + return get_jdk_version_from_jar(local_artifact_path) + + gav_path = os.path.dirname(local_artifact_path) + os.makedirs( + gav_path, + exist_ok=True, + ) + + artifact_url = join_remote_maven_repo_url( + remote_maven_repo_url, + maven_repository_path, + ) + logger.debug( + "Find JDK version from jar at %s, using cache %s", + artifact_url, + local_artifact_path, + ) + try: + download_file( + artifact_url, + local_artifact_path, + ) + except InvalidHTTPResponseError as error: + logger.error("Failed why trying to download jar file. Error: %s", error) + return None + except OSError as os_error: + logger.critical("Critical %s", os_error) + return None + + return get_jdk_version_from_jar(local_artifact_path) + + +def find_jdk_version_from_central_maven_repo( + group_id: str, + artifact_id: str, + version: str, + use_cache: bool = True, +) -> str | None: + """Return the jdk version string from an artifact matching a given GAV from Maven Central repository. + + The artifacts will be downloaded from https://repo1.maven.org/maven2/ for JDK version extraction. + + We now only support JAR files. + + Parameters + ---------- + group_id: str + The group ID part of the GAV coordinate. + artifact_id: str + The artifact ID part of the GAV coordinate. + version: str + The version part of the GAV coordinate. + remote_maven_repo_url: str + The URL to the remote maven layout repository. + local_cache_repo: str + The path to a local directory for caching the downloaded artifact used in JDK version + extraction. + + Returns + ------- + str | None + The version string extract from the artifact (as is) or None + ff there is an error, or if we couldn't find any jdk version. + """ + central_repo_url = "https://repo1.maven.org/maven2/" + local_cache_maven_repo = os.path.join( + global_config.output_path, + "jdk_finding_cache_maven_repo", + ) + asset_name = f"{artifact_id}-{version}{JavaArtifactExt.JAR.value}" + + if use_cache: + return find_jdk_version_from_remote_maven_repo_cache( + group_id=group_id, + artifact_id=artifact_id, + version=version, + asset_name=asset_name, + remote_maven_repo_url=central_repo_url, + local_cache_repo=local_cache_maven_repo, + ) + + return find_jdk_version_from_remote_maven_repo_standalone( + group_id=group_id, + artifact_id=artifact_id, + version=version, + asset_name=asset_name, + remote_maven_repo_url=central_repo_url, + ) diff --git a/tests/build_spec_generator/test_jdk_version_finder.py b/tests/build_spec_generator/test_jdk_version_finder.py new file mode 100644 index 000000000..f9df00569 --- /dev/null +++ b/tests/build_spec_generator/test_jdk_version_finder.py @@ -0,0 +1,102 @@ +# Copyright (c) 2025 - 2025, Oracle and/or its affiliates. All rights reserved. +# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. + +"""This module contains the tests for the jdk_finder module.""" + +import zipfile +from pathlib import Path + +import pytest + +from macaron.build_spec_generator.jdk_finder import get_jdk_version_from_jar, join_remote_maven_repo_url + + +@pytest.mark.parametrize( + ("remote_maven_url", "maven_repo_path", "expected"), + [ + pytest.param( + "https://repo1.maven.org/maven2", + "com/oracle/", + "https://repo1.maven.org/maven2/com/oracle/", + id="g_coordinate", + ), + pytest.param( + "https://repo1.maven.org/maven2", + "com/oracle/macaron/", + "https://repo1.maven.org/maven2/com/oracle/macaron/", + id="ga_coordinate", + ), + pytest.param( + "https://repo1.maven.org/maven2", + "com/oracle/macaron/0.16.0/", + "https://repo1.maven.org/maven2/com/oracle/macaron/0.16.0/", + id="gav_coordinate", + ), + pytest.param( + "https://repo1.maven.org/maven2", + "com/oracle/macaron/0.16.0/macaron-0.16.0.jar", + "https://repo1.maven.org/maven2/com/oracle/macaron/0.16.0/macaron-0.16.0.jar", + id="gav_asset_coordinate", + ), + pytest.param( + "https://repo1.maven.org/maven2/", + "com/oracle/macaron/0.16.0/", + "https://repo1.maven.org/maven2/com/oracle/macaron/0.16.0/", + id="handle_trailing_slash_in_remote_maven_url", + ), + ], +) +def test_join_remote_maven_repo_url( + remote_maven_url: str, + maven_repo_path: str, + expected: str, +) -> None: + """Test the join remote maven repo url function.""" + assert ( + join_remote_maven_repo_url( + remote_maven_url=remote_maven_url, + maven_repo_path=maven_repo_path, + ) + == expected + ) + + +@pytest.mark.parametrize( + ("manifest_mf_content", "expected"), + [ + ("Build-Jdk: 1.8", "1.8"), + ("Build-Jdk-Spec: 8", "8"), + ], +) +def test_get_jdk_version_from_jar_succeed( + tmp_path: Path, + manifest_mf_content: str, + expected: str, +) -> None: + """Test the get_jdk_version_from_jar function on valid cases.""" + test_jar_file = tmp_path / "example.jar" + + with zipfile.ZipFile(test_jar_file, mode="w") as test_jar: + test_jar.writestr("META-INF/MANIFEST.MF", manifest_mf_content) + + assert get_jdk_version_from_jar(str(test_jar_file)) == expected + + +@pytest.mark.parametrize( + ("manifest_mf_content"), + [ + (""), + ("Build-Jdk-Spec: "), + ], +) +def test_get_jdk_version_from_jar_failed( + tmp_path: Path, + manifest_mf_content: str, +) -> None: + """Test the get_jdk_version_from_jar function on error cases.""" + test_jar_file = tmp_path / "example.jar" + + with zipfile.ZipFile(test_jar_file, mode="w") as test_jar: + test_jar.writestr("META-INF/MANIFEST.MF", manifest_mf_content) + + assert not get_jdk_version_from_jar(str(test_jar_file)) From dcae8e5d2123875673a4d75eaed75e2eda49f54d Mon Sep 17 00:00:00 2001 From: Trong Nhan Mai Date: Wed, 9 Jul 2025 22:14:54 +1000 Subject: [PATCH 04/27] feat: add cli build command patcher Signed-off-by: Trong Nhan Mai --- .../build_command_patcher.py | 137 +++++ .../test_build_command_patcher.py | 561 ++++++++++++++++++ 2 files changed, 698 insertions(+) create mode 100644 src/macaron/build_spec_generator/build_command_patcher.py create mode 100644 tests/build_spec_generator/test_build_command_patcher.py diff --git a/src/macaron/build_spec_generator/build_command_patcher.py b/src/macaron/build_spec_generator/build_command_patcher.py new file mode 100644 index 000000000..f6bac755f --- /dev/null +++ b/src/macaron/build_spec_generator/build_command_patcher.py @@ -0,0 +1,137 @@ +# Copyright (c) 2025 - 2025, Oracle and/or its affiliates. All rights reserved. +# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. + +"""This module contains the implementation of the build command patching.""" + +import logging +from collections.abc import Mapping, Sequence + +from macaron.build_spec_generator.cli_command_parser import CLICommand, CLICommandParser, PatchCommandBuildTool +from macaron.build_spec_generator.cli_command_parser.gradle_cli_parser import ( + GradleCLICommandParser, + GradleOptionPatchValueType, +) +from macaron.build_spec_generator.cli_command_parser.maven_cli_parser import ( + CommandLineParseError, + MavenCLICommandParser, + MavenOptionPatchValueType, + PatchBuildCommandError, +) +from macaron.build_spec_generator.cli_command_parser.unparsed_cli_command import UnparsedCLICommand + +logger: logging.Logger = logging.getLogger(__name__) + +MVN_CLI_PARSER = MavenCLICommandParser() +GRADLE_CLI_PARSER = GradleCLICommandParser() + +PatchValueType = GradleOptionPatchValueType | MavenOptionPatchValueType + + +def _patch_commands( + cmds_sequence: Sequence[list[str]], + cli_parsers: Sequence[CLICommandParser], + patches: Mapping[ + PatchCommandBuildTool, + Mapping[str, PatchValueType | None], + ], +) -> list[CLICommand] | None: + """Patch the sequence of build commands, using the provided CLICommandParser instances. + + For each command in `cmds_sequence`, it will be checked against all CLICommandParser instances until there is + one that can parse it, then a patch from ``patches`` is applied for this command if provided. + + If a command doesn't have any corresponding ``CLICommandParser`` instance it will be parsed as UnparsedCLICommand, + which just holds the original command as a list of string, without any changes. + """ + result: list[CLICommand] = [] + for cmds in cmds_sequence: + effective_cli_parser = None + for cli_parser in cli_parsers: + if cli_parser.is_build_tool(cmds[0]): + effective_cli_parser = cli_parser + break + + if not effective_cli_parser: + result.append(UnparsedCLICommand(original_cmds=cmds)) + continue + + try: + cli_command = effective_cli_parser.parse(cmds) + except CommandLineParseError as error: + logger.error( + "Failed to parse the mvn command %s. Error %s.", + " ".join(cmds), + error, + ) + return None + + patch = patches.get(effective_cli_parser.build_tool, None) + if not patch: + result.append(cli_command) + continue + + try: + new_cli_command = effective_cli_parser.apply_patch( + cli_command=cli_command, + options_patch=patch, + ) + except PatchBuildCommandError as error: + logger.error( + "Failed to patch the mvn command %s. Error %s.", + " ".join(cmds), + error, + ) + return None + + result.append(new_cli_command) + + return result + + +def patch_commands( + cmds_sequence: Sequence[list[str]], + patches: Mapping[ + PatchCommandBuildTool, + Mapping[str, PatchValueType | None], + ], +) -> list[list[str]] | None: + """Patch a sequence of CLI commands. + + For each command in this command sequence: + + - If the command is not a build command or the build tool is not supported by us, it will be leave intact. + + - If the command is a build command supported by us, it will be patch if a patch value is provided to ``patches``. + If no patch value is provided for a build command, it will be leave intact. + + `patches` is a mapping with: + + - **Key**: an instance of the ``BuildTool`` enum + + - **Value**: the patch value provided to ``CLICommandParser.apply_patch``. For more information on the patch value + see the concrete implementations of the ``CLICommandParser.apply_patch`` method. + For example: :class:`macaron.cli_command_parser.maven_cli_parser.MavenCLICommandParser.apply_patch`, + :class:`macaron.cli_command_parser.gradle_cli_parser.GradleCLICommandParser.apply_patch`. + + This means that all commands that matches a BuildTool will be apply by the same patch value. + + Returns + ------- + list[list[str]] | None + The patched command sequence or None if there is an error. The errors that can happen if any command + which we support is invalid in ``cmds_sequence``, or the patch value is valid. + """ + result = [] + patch_cli_commands = _patch_commands( + cmds_sequence=cmds_sequence, + cli_parsers=[MVN_CLI_PARSER, GRADLE_CLI_PARSER], + patches=patches, + ) + + if patch_cli_commands is None: + return None + + for patch_cmd in patch_cli_commands: + result.append(patch_cmd.to_cmds()) + + return result diff --git a/tests/build_spec_generator/test_build_command_patcher.py b/tests/build_spec_generator/test_build_command_patcher.py new file mode 100644 index 000000000..b96de56d4 --- /dev/null +++ b/tests/build_spec_generator/test_build_command_patcher.py @@ -0,0 +1,561 @@ +# Copyright (c) 2025 - 2025, Oracle and/or its affiliates. All rights reserved. +# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. + +"""This module contains the test for the build command patcher.""" + +from collections.abc import Mapping + +import pytest + +from macaron.build_spec_generator.build_command_patcher import ( + CLICommand, + CLICommandParser, + PatchValueType, + _patch_commands, +) +from macaron.build_spec_generator.cli_command_parser import PatchCommandBuildTool +from macaron.build_spec_generator.cli_command_parser.gradle_cli_parser import ( + GradleCLICommandParser, + GradleOptionPatchValueType, +) +from macaron.build_spec_generator.cli_command_parser.maven_cli_parser import ( + MavenCLICommandParser, + MavenOptionPatchValueType, +) +from macaron.build_spec_generator.cli_command_parser.unparsed_cli_command import UnparsedCLICommand + + +@pytest.mark.parametrize( + ("original", "patch_options", "expected"), + [ + pytest.param( + "mvn install -X", + {}, + "mvn install -X", + id="no_patch_value", + ), + pytest.param( + "mvn install -X", + {"goals": ["clean", "package"]}, + "mvn clean package -X", + id="patch_goals_should_persist_order", + ), + pytest.param( + "mvn install", + { + "--no-transfer-progress": True, + }, + "mvn install -ntp", + id="patching_an_optional_flag", + ), + pytest.param( + "mvn install", + { + "--threads": "2C", + }, + "mvn install -T 2C", + id="patching_single_value_option", + ), + pytest.param( + "mvn install", + { + "--activate-profiles": ["profile1", "profile2"], + }, + "mvn install -P profile1,profile2", + id="patching_comma_delimt_list_value_option", + ), + pytest.param( + "mvn install", + { + "--define": { + "maven.skip.test": "true", + "rat.skip": "true", + }, + }, + "mvn install -Dmaven.skip.test=true -Drat.skip=true", + id="patching_system_properties", + ), + # The patch for -D/--define merge with the original the system properties. The patch will always takes precedence. + pytest.param( + "mvn install -Dmaven.skip.test=false -Dboo=foo", + { + "goals": ["clean", "package"], + "--define": { + "maven.skip.test": "true", + "rat.skip": "true", + }, + }, + "mvn clean package -Dmaven.skip.test=true -Drat.skip=true -Dboo=foo", + id="patching_system_properties_merging", + ), + pytest.param( + "mvn install -Dmaven.skip.test=false -Dboo=foo", + { + "goals": ["clean", "package"], + "--define": { + "maven.skip.test": None, + "rat.skip": "true", + }, + }, + "mvn clean package -Drat.skip=true -Dboo=foo", + id="patching_system_properties_disable", + ), + pytest.param( + "mvn install -T 2C -ntp -Dmaven.skip.test=true", + { + "--threads": None, + "--no-transfer-progress": None, + "--define": None, + }, + "mvn install", + id="removing_any_option_using_None", + ), + ], +) +def test_patch_mvn_cli_command( + maven_cli_parser: MavenCLICommandParser, + original: str, + patch_options: Mapping[str, MavenOptionPatchValueType | None], + expected: str, +) -> None: + """Test the patch maven cli command on valid input.""" + patch_cmds = _patch_commands( + cmds_sequence=[original.split()], + cli_parsers=[maven_cli_parser], + patches={PatchCommandBuildTool.MAVEN: patch_options}, + ) + assert patch_cmds + assert len(patch_cmds) == 1 + + patch_mvn_cli_command = maven_cli_parser.parse(patch_cmds.pop().to_cmds()) + expected_mvn_cli_command = maven_cli_parser.parse(expected.split()) + + assert patch_mvn_cli_command == expected_mvn_cli_command + + +@pytest.mark.parametrize( + ("invalid_patch"), + [ + pytest.param( + { + "--this-option-should-never-exist": True, + }, + id="unrecognised_option_name", + ), + pytest.param( + { + "--define": True, + }, + id="incorrect_define_option_type", + ), + pytest.param( + { + "--debug": "some_value", + }, + id="incorrect_debug_option_type", + ), + pytest.param( + { + "--settings": False, + }, + id="incorrect_settings_option_type", + ), + pytest.param( + { + "--activate-profiles": False, + }, + id="incorrect_activate_profiles_option_type", + ), + ], +) +def test_patch_mvn_cli_command_error( + maven_cli_parser: MavenCLICommandParser, + invalid_patch: dict[str, MavenOptionPatchValueType | None], +) -> None: + """Test patch mvn cli command patching with invalid patch.""" + cmd_list = "mvn -s ../.github/maven-settings.xml install -Pexamples,noRun".split() + + assert ( + _patch_commands( + cmds_sequence=[cmd_list], + cli_parsers=[maven_cli_parser], + patches={ + PatchCommandBuildTool.MAVEN: invalid_patch, + }, + ) + is None + ) + + +@pytest.mark.parametrize( + ("original", "patch_options", "expected"), + [ + pytest.param( + "gradle --build-cache clean build", + {}, + "gradle --build-cache clean build", + id="no_patch_value", + ), + pytest.param( + "gradle --build-cache clean build", + {"--build-cache": False}, + "gradle --no-build-cache clean build", + id="test_patching_negateable_option", + ), + pytest.param( + "gradle clean", + {"tasks": ["clean", "build"]}, + "gradle clean build", + id="patch_tasks_should_persist_order", + ), + pytest.param( + "gradle clean build", + {"--debug": True}, + "gradle --debug clean build", + id="patching_an_optional_flag", + ), + pytest.param( + "gradle clean build", + { + "--debug": True, + "--continue": True, + }, + "gradle --debug --continue clean build", + id="patching_an_optional_flag", + ), + pytest.param( + "gradle clean build", + {"--console": "plain"}, + "gradle --console plain clean build", + id="patching_a_single_value_option", + ), + pytest.param( + "gradle clean build -Pboo=foo", + { + "--system-prop": { + "org.gradle.caching": "true", + }, + "--project-prop": { + "bar": "", + "boo": "another_foo", + }, + }, + "gradle clean build -Dorg.gradle.caching=true -Pbar -Pboo=another_foo", + id="patching_properties", + ), + pytest.param( + "gradle clean build -Pboo=foo", + { + "--project-prop": { + "boo": None, + } + }, + "gradle clean build", + id="removing_a_property_using_none", + ), + pytest.param( + "gradle clean build", + {"--exclude-task": ["boo", "test"]}, + "gradle clean build -x boo -x test", + id="excluding_tasks", + ), + pytest.param( + "gradle clean build --debug -x test -Dorg.gradle.caching=true -Pboo=foo --console=plain --no-build-cache", + { + "--exclude-task": None, + "--debug": None, + "--system-prop": None, + "--project-prop": None, + "--console": None, + "--build-cache": None, + }, + "gradle clean build", + id="removing_any_option_using_none", + ), + ], +) +def test_patch_gradle_cli_command( + gradle_cli_parser: GradleCLICommandParser, + original: str, + patch_options: dict[str, GradleOptionPatchValueType | None], + expected: str, +) -> None: + """Test the patch gradle cli command on valid input.""" + patch_cmds = _patch_commands( + cmds_sequence=[original.split()], + cli_parsers=[gradle_cli_parser], + patches={PatchCommandBuildTool.GRADLE: patch_options}, + ) + assert patch_cmds + assert len(patch_cmds) == 1 + + patch_gradle_cli_command = gradle_cli_parser.parse(patch_cmds.pop().to_cmds()) + expected_gradle_cli_command = gradle_cli_parser.parse(expected.split()) + + assert patch_gradle_cli_command == expected_gradle_cli_command + + +@pytest.mark.parametrize( + ("invalid_patch"), + [ + pytest.param( + { + "--this-option-should-never-exist": True, + }, + id="unrecognised_option_name", + ), + pytest.param( + { + "--system-prop": True, + }, + id="incorrect_system_prop_option_type", + ), + pytest.param( + { + "--project-prop": True, + }, + id="incorrect_project_prop_option_type", + ), + pytest.param( + { + "--debug": "some_value", + }, + id="incorrect_debug_option_type", + ), + pytest.param( + { + "--init-script": False, + }, + id="incorrect_init_script_option_type", + ), + pytest.param( + { + "--exclude-task": False, + }, + id="incorrect_exclude_task_option_type", + ), + pytest.param( + { + "tasks": False, + }, + id="incorrect_tasks_type", + ), + pytest.param( + { + "--no-build-cache": True, + }, + id="cannot_use_negated_form_option_as_key_in_patch", + ), + ], +) +def test_patch_gradle_cli_command_error( + gradle_cli_parser: GradleCLICommandParser, + invalid_patch: dict[str, GradleOptionPatchValueType | None], +) -> None: + """Test patch mvn cli command patching with invalid patch.""" + cmd_list = "gradle clean build --no-build-cache --debug --console plain -Dorg.gradle.parallel=true".split() + assert ( + _patch_commands( + cmds_sequence=[cmd_list], + cli_parsers=[gradle_cli_parser], + patches={ + PatchCommandBuildTool.GRADLE: invalid_patch, + }, + ) + is None + ) + + +@pytest.mark.parametrize( + ("cmds_sequence", "patches", "expected"), + [ + pytest.param( + [ + "mvn clean package".split(), + "gradle clean build".split(), + ], + { + PatchCommandBuildTool.MAVEN: { + "--debug": True, + }, + PatchCommandBuildTool.GRADLE: { + "--debug": True, + }, + }, + [ + "mvn clean package --debug".split(), + "gradle clean build --debug".split(), + ], + id="apply_multiple_types_of_patches", + ), + pytest.param( + [ + "mvn clean package".split(), + "gradle clean build".split(), + ], + { + PatchCommandBuildTool.MAVEN: { + "--debug": True, + }, + }, + [ + "mvn clean package --debug".split(), + "gradle clean build".split(), + ], + id="apply_one_type_of_patch_to_multiple_commands", + ), + pytest.param( + [ + "mvn clean package".split(), + "gradle clean build".split(), + ], + {}, + [ + "mvn clean package".split(), + "gradle clean build".split(), + ], + id="apply_no_patch_to_multiple_build_commands", + ), + pytest.param( + [ + "make setup".split(), + "mvn clean package".split(), + "gradle clean build".split(), + "make clean".split(), + ], + { + PatchCommandBuildTool.MAVEN: { + "--debug": True, + }, + PatchCommandBuildTool.GRADLE: { + "--debug": True, + }, + }, + [ + "make setup".split(), + "mvn clean package --debug".split(), + "gradle clean build --debug".split(), + "make clean".split(), + ], + id="command_that_we_cannot_parse_stay_the_same", + ), + ], +) +def test_patching_multiple_commands( + maven_cli_parser: MavenCLICommandParser, + gradle_cli_parser: GradleCLICommandParser, + cmds_sequence: list[list[str]], + patches: Mapping[ + PatchCommandBuildTool, + Mapping[str, PatchValueType | None], + ], + expected: list[list[str]], +) -> None: + """Test patching multiple commands.""" + patch_cli_commands = _patch_commands( + cmds_sequence=cmds_sequence, + cli_parsers=[maven_cli_parser, gradle_cli_parser], + patches=patches, + ) + + assert patch_cli_commands + + expected_cli_commands: list[CLICommand] = [] + cli_parsers: list[CLICommandParser] = [maven_cli_parser, gradle_cli_parser] + for cmd in expected: + effective_cli_parser = None + for cli_parser in cli_parsers: + if cli_parser.is_build_tool(cmd[0]): + effective_cli_parser = cli_parser + break + + if effective_cli_parser: + expected_cli_commands.append(cli_parser.parse(cmd)) + else: + expected_cli_commands.append( + UnparsedCLICommand( + original_cmds=cmd, + ) + ) + + assert patch_cli_commands == expected_cli_commands + + +@pytest.mark.parametrize( + ("cmds_sequence", "patches"), + [ + pytest.param( + [ + "mvn --this-is-not-a-mvn-option".split(), + "gradle clean build".split(), + ], + { + PatchCommandBuildTool.MAVEN: { + "--debug": True, + }, + PatchCommandBuildTool.GRADLE: { + "--debug": True, + }, + }, + id="incorrect_mvn_command", + ), + pytest.param( + [ + "mvn clean package".split(), + "gradle clean build --not-a-gradle-command".split(), + ], + { + PatchCommandBuildTool.MAVEN: { + "--debug": True, + }, + PatchCommandBuildTool.GRADLE: { + "--debug": True, + }, + }, + id="incorrect_gradle_command", + ), + pytest.param( + [ + "mvn clean package".split(), + "gradle clean build".split(), + ], + { + PatchCommandBuildTool.MAVEN: { + "--not-a-valid-option": True, + }, + }, + id="incorrrect_patch_option_long_name", + ), + pytest.param( + [ + "mvn clean package".split(), + "gradle clean build".split(), + ], + { + PatchCommandBuildTool.MAVEN: { + # --debug expects a boolean or a None value. + "--debug": 10, + }, + }, + id="incorrrect_patch_value", + ), + ], +) +def test_patching_multiple_commands_error( + maven_cli_parser: MavenCLICommandParser, + gradle_cli_parser: GradleCLICommandParser, + cmds_sequence: list[list[str]], + patches: Mapping[ + PatchCommandBuildTool, + Mapping[str, PatchValueType | None], + ], +) -> None: + """Test error cases for patching multiple commands.""" + assert ( + _patch_commands( + cmds_sequence=cmds_sequence, + cli_parsers=[maven_cli_parser, gradle_cli_parser], + patches=patches, + ) + is None + ) From 8909a017b0500cc8b92448eff1dd8f7b4328439f Mon Sep 17 00:00:00 2001 From: Trong Nhan Mai Date: Wed, 9 Jul 2025 22:16:05 +1000 Subject: [PATCH 05/27] feat: add jdk version normalizer Signed-off-by: Trong Nhan Mai --- .../jdk_version_normalizer.py | 81 +++++++++++++++++++ .../test_jdk_version_normalizer.py | 53 ++++++++++++ 2 files changed, 134 insertions(+) create mode 100644 src/macaron/build_spec_generator/jdk_version_normalizer.py create mode 100644 tests/build_spec_generator/test_jdk_version_normalizer.py diff --git a/src/macaron/build_spec_generator/jdk_version_normalizer.py b/src/macaron/build_spec_generator/jdk_version_normalizer.py new file mode 100644 index 000000000..852aab9b2 --- /dev/null +++ b/src/macaron/build_spec_generator/jdk_version_normalizer.py @@ -0,0 +1,81 @@ +# Copyright (c) 2025 - 2025, Oracle and/or its affiliates. All rights reserved. +# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. + +"""This module contains the logic to nomarlize a JDK version string to a major version number.""" + +SUPPORTED_JAVA_VERSION = [ + "5", + "6", + "7", + "8", + "9", + "10", + "11", + "12", + "13", + "14", + "15", + "16", + "17", + "18", + "19", + "20", + "21", +] + + +def normalize_jdk_version(jdk_version_str: str) -> str | None: + """Return the major JDK version number. + + We assume that the jdk version string is already valid (e.g not using a JDK + version that is not available in the real world. + + For 1.x versions, we returns the major version as ``x``. + + Parameters + ---------- + jdk_version_str: str + The jdk version string. + + Returns + ------- + str | None + The major jdk version number as string or None if there is an error. + + Examples + -------- + >>> normalize_jdk_version("19") + '19' + >>> normalize_jdk_version("19-ea") + '19' + >>> normalize_jdk_version("11.0.1") + '11' + >>> normalize_jdk_version("1.8") + '8' + >>> normalize_jdk_version("25.0.1") + """ + first, _, after = jdk_version_str.partition(".") + jdk_major_ver = None + if first == "1": + # Cases like 1.8.0_523 + # Or 1.8 + jdk_major_ver, _, _ = after.partition(".") + else: + # Cases like 11 or 11.0 or 11.0.1 + jdk_major_ver = first + + if jdk_major_ver in SUPPORTED_JAVA_VERSION: + return jdk_major_ver + + # Handle edge cases: + # pkg:maven/org.apache.druid.integration-tests/druid-it-cases@25.0.0 + # - "8 (Azul Systems Inc. 25.282-b08)" + # pkg:maven/io.helidon.reactive.media/helidon-reactive-media-jsonp@4.0.0-ALPHA1 + # - "19-ea" + for support in SUPPORTED_JAVA_VERSION: + # Wouldn't work for cases like 19000 but that's not a big problem + # as long as the result is a valid major version. + if jdk_major_ver.startswith(support): + return support + + return None diff --git a/tests/build_spec_generator/test_jdk_version_normalizer.py b/tests/build_spec_generator/test_jdk_version_normalizer.py new file mode 100644 index 000000000..2236505e8 --- /dev/null +++ b/tests/build_spec_generator/test_jdk_version_normalizer.py @@ -0,0 +1,53 @@ +# Copyright (c) 2025 - 2025, Oracle and/or its affiliates. All rights reserved. +# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. + +"""This module contains the tests for the jdk version normalizer module.""" + +import pytest + +from macaron.build_spec_generator.jdk_version_normalizer import normalize_jdk_version + + +@pytest.mark.parametrize( + ("version_string", "expected"), + [ + pytest.param( + "1.8.0_523", + "8", + id="1.x_with_patch_version", + ), + pytest.param( + "1.8", + "8", + id="1.x_without_patch_version", + ), + pytest.param( + "11.0.1", + "11", + id="major_number_stands_first_with_patch_version", + ), + pytest.param( + "11.0", + "11", + id="major_number_stands_first_without_patch_version", + ), + pytest.param( + "11", + "11", + id="just_the_major_version", + ), + pytest.param( + "8 (Azul Systems Inc. 25.282-b08)", + "8", + id="major_follows_with_text", + ), + pytest.param( + "19-ea", + "19", + id="major_follows_with_text", + ), + ], +) +def test_jdk_version_normalizer(version_string: str, expected: str) -> None: + """Test the jdk_version_normalizer function.""" + assert normalize_jdk_version(version_string) == expected From a8662f40345e52446943ae0d4cf909bbbfb53934 Mon Sep 17 00:00:00 2001 From: Trong Nhan Mai Date: Wed, 9 Jul 2025 22:16:58 +1000 Subject: [PATCH 06/27] feat: add reproducible central buildspec generation Signed-off-by: Trong Nhan Mai --- .../build_spec_generator.py | 96 +++++ .../reproducible_central/__init__.py | 2 + .../reproducible_central/rc_build_info.py | 132 +++++++ .../reproducible_central.py | 355 ++++++++++++++++++ .../compare_rc_build_spec.py | 165 ++++++++ .../test_reproducible_central.py | 64 ++++ 6 files changed, 814 insertions(+) create mode 100644 src/macaron/build_spec_generator/build_spec_generator.py create mode 100644 src/macaron/build_spec_generator/reproducible_central/__init__.py create mode 100644 src/macaron/build_spec_generator/reproducible_central/rc_build_info.py create mode 100644 src/macaron/build_spec_generator/reproducible_central/reproducible_central.py create mode 100644 tests/build_spec_generator/reproducible_central/compare_rc_build_spec.py create mode 100644 tests/build_spec_generator/reproducible_central/test_reproducible_central.py diff --git a/src/macaron/build_spec_generator/build_spec_generator.py b/src/macaron/build_spec_generator/build_spec_generator.py new file mode 100644 index 000000000..0868f19a4 --- /dev/null +++ b/src/macaron/build_spec_generator/build_spec_generator.py @@ -0,0 +1,96 @@ +# Copyright (c) 2025 - 2025, Oracle and/or its affiliates. All rights reserved. +# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. + +"""This module contains the functions used for generating build specs from the Macaron database.""" + +import logging +from collections.abc import Mapping +from enum import Enum + +from packageurl import PackageURL +from sqlalchemy import create_engine +from sqlalchemy.orm import Session + +from macaron.build_spec_generator.build_command_patcher import PatchCommandBuildTool, PatchValueType +from macaron.build_spec_generator.reproducible_central.reproducible_central import gen_reproducible_central_build_spec + +logger: logging.Logger = logging.getLogger(__name__) + + +class BuildSpecFormat(str, Enum): + """The build spec format that we supports.""" + + REPRODUCIBLE_CENTRAL = "rc-buildspec" + + +CLI_COMMAND_PATCHES: dict[ + PatchCommandBuildTool, + Mapping[str, PatchValueType | None], +] = { + PatchCommandBuildTool.MAVEN: { + "goals": ["clean", "package"], + "--batch-mode": False, + "--quiet": False, + "--no-transfer-progress": False, + # Example pkg:maven/io.liftwizard/liftwizard-servlet-logging-mdc@1.0.1 + # https://github.com/liftwizard/liftwizard/blob/ + # 4ea841ffc9335b22a28a7a19f9156e8ba5820027/.github/workflows/build-and-test.yml#L23 + "--threads": None, + # For cases such as + # pkg:maven/org.apache.isis.valuetypes/isis-valuetypes-prism-resources@2.0.0-M7 + "--version": False, + "--define": { + # pkg:maven/org.owasp/dependency-check-utils@7.3.2 + # To remove "-Dgpg.passphrase=$MACARON_UNKNOWN" + "gpg.passphrase": None, + "skipTests": "true", + "maven.test.skip": "true", + "maven.site.skip": "true", + "rat.skip": "true", + "maven.javadoc.skip": "true", + }, + }, + PatchCommandBuildTool.GRADLE: { + "tasks": ["clean", "assemble"], + "--console": "plain", + "--exclude-task": ["test"], + "--project-prop": { + "skip.signing": "", + "skipSigning": "", + "gnupg.skip": "", + }, + }, +} + + +def gen_build_spec_str( + purl: PackageURL, + database_path: str, + build_spec_format: BuildSpecFormat, +) -> str | None: + """Return the content of a build spec file from a given PURL. + + Parameters + ---------- + purl: PackageURL + The package URL to generate build spec for. + database_path: str + The path to the Macaron database. + build_spec_format: BuildSpecFormat + The format of the final build spec content. + + Returns + ------- + str | None + The build spec content as a string, or None if there is an error. + """ + db_engine = create_engine(f"sqlite+pysqlite:///{database_path}", echo=False) + + with Session(db_engine) as session, session.begin(): + match build_spec_format: + case BuildSpecFormat.REPRODUCIBLE_CENTRAL: + return gen_reproducible_central_build_spec( + purl=purl, + session=session, + patches=CLI_COMMAND_PATCHES, + ) diff --git a/src/macaron/build_spec_generator/reproducible_central/__init__.py b/src/macaron/build_spec_generator/reproducible_central/__init__.py new file mode 100644 index 000000000..8e17a3508 --- /dev/null +++ b/src/macaron/build_spec_generator/reproducible_central/__init__.py @@ -0,0 +1,2 @@ +# Copyright (c) 2025 - 2025, Oracle and/or its affiliates. All rights reserved. +# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. diff --git a/src/macaron/build_spec_generator/reproducible_central/rc_build_info.py b/src/macaron/build_spec_generator/reproducible_central/rc_build_info.py new file mode 100644 index 000000000..52287df0b --- /dev/null +++ b/src/macaron/build_spec_generator/reproducible_central/rc_build_info.py @@ -0,0 +1,132 @@ +# Copyright (c) 2025 - 2025, Oracle and/or its affiliates. All rights reserved. +# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. + +"""This module contains the representation of information needed for Reproducible Central Buildspec generation.""" + +import logging +from collections.abc import Sequence +from dataclasses import dataclass + +from packageurl import PackageURL +from sqlalchemy.orm import Session + +from macaron.build_spec_generator.macaron_db_extractor import ( + GenericBuildCommandInfo, + lookup_any_build_command, + lookup_build_tools_check, + lookup_latest_component_id, + lookup_repository, +) +from macaron.database.table_definitions import Repository +from macaron.errors import QueryMacaronDatabaseError +from macaron.slsa_analyzer.checks.build_tool_check import BuildToolFacts + +logger: logging.Logger = logging.getLogger(__name__) + + +@dataclass +class RcInternalBuildInfo: + """An internal representation of the information obtained from the database for a PURL. + + This is only used for generating the Reproducible Central build spec. + """ + + purl: PackageURL + repository: Repository + generic_build_command_facts: Sequence[GenericBuildCommandInfo] | None + latest_component_id: int + build_tool_facts: Sequence[BuildToolFacts] + + +def get_rc_internal_build_info( + purl: PackageURL, + session: Session, +) -> RcInternalBuildInfo | None: + """Return an ``RcInternalBuildInfo`` instance that captures the build related information for a PackageURL. + + Parameters + ---------- + purl: PackageURL + The PackageURL to extract information about. + session: Session + The SQLAlchemy Session for the Macaron database. + + Returns + ------- + RcInternalBuildInfo | None + An instance of ``RcInternalBuildInfo`` or None if there was an error. + """ + try: + latest_component_id = lookup_latest_component_id( + purl=purl, + session=session, + ) + except QueryMacaronDatabaseError as lookup_component_error: + logger.error( + "Unexpected result from querying latest component id for %s. Error: %s", + purl.to_string(), + lookup_component_error, + ) + return None + if not latest_component_id: + logger.error( + "Cannot find an analysis result for PackageURL %s in the database. " + + "Please check if an analysis for it exists in the database.", + purl.to_string(), + ) + return None + logger.debug("Latest component ID: %d", latest_component_id) + + try: + build_tool_facts = lookup_build_tools_check( + component_id=latest_component_id, + session=session, + ) + except QueryMacaronDatabaseError as lookup_build_tools_error: + logger.error( + "Unexpected result from querying build tools for %s. Error: %s", + purl.to_string(), + lookup_build_tools_error, + ) + return None + if not build_tool_facts: + logger.error( + "Cannot find any build tool for PackageURL %s in the database.", + purl.to_string(), + ) + return None + logger.debug("Build tools discovered from the %s table: %s", BuildToolFacts.__tablename__, build_tool_facts) + + try: + lookup_component_repository = lookup_repository(latest_component_id, session) + except QueryMacaronDatabaseError as lookup_repository_error: + logger.error( + "Unexpected result from querying repository information for %s. Error: %s", + purl.to_string(), + lookup_repository_error, + ) + return None + if not lookup_component_repository: + logger.error( + "Cannot find any repository information for %s in the database.", + purl.to_string(), + ) + return None + + try: + lookup_build_facts = lookup_any_build_command(latest_component_id, session) + except QueryMacaronDatabaseError as lookup_build_command_error: + logger.error( + "Unexpected result from querying all build command information for %s. Error: %s", + purl.to_string(), + lookup_build_command_error, + ) + return None + + return RcInternalBuildInfo( + purl=purl, + repository=lookup_component_repository, + latest_component_id=latest_component_id, + build_tool_facts=build_tool_facts, + generic_build_command_facts=lookup_build_facts, + ) diff --git a/src/macaron/build_spec_generator/reproducible_central/reproducible_central.py b/src/macaron/build_spec_generator/reproducible_central/reproducible_central.py new file mode 100644 index 000000000..46995f5a0 --- /dev/null +++ b/src/macaron/build_spec_generator/reproducible_central/reproducible_central.py @@ -0,0 +1,355 @@ +# Copyright (c) 2025 - 2025, Oracle and/or its affiliates. All rights reserved. +# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. + +"""This module contains the logic to generate a build spec in Reproducible Central format.""" + +import logging +import shlex +from collections.abc import Mapping, Sequence +from enum import Enum +from importlib import metadata as importlib_metadata +from pprint import pformat + +import sqlalchemy.orm +from packageurl import PackageURL + +from macaron.build_spec_generator.build_command_patcher import PatchCommandBuildTool, PatchValueType, patch_commands +from macaron.build_spec_generator.jdk_finder import find_jdk_version_from_central_maven_repo +from macaron.build_spec_generator.jdk_version_normalizer import normalize_jdk_version +from macaron.build_spec_generator.reproducible_central.rc_build_info import ( + RcInternalBuildInfo, + get_rc_internal_build_info, +) +from macaron.slsa_analyzer.checks.build_tool_check import BuildToolFacts + +logger: logging.Logger = logging.getLogger(__name__) + +# We use a subset of available config options from +# https://github.com/jvm-repo-rebuild/reproducible-central/blob/master/doc/BUILDSPEC.md +# An example: +# https://github.com/jvm-repo-rebuild/reproducible-central/blob/master/content/com/google/guava/guava-32.0.0-android.buildspec +# About this template +# - Because the Reproducible-Central build spec is a bash script by itself, we can use +# Bash comment syntax. +# - We only work with git repository and its commit hash. Therefore `gitRepo` and `gitTag` are used only. +# Even though it's called gitTag, a commit hash would work. +# https://github.com/jvm-repo-rebuild/reproducible-central/blob/46de9b405cb30ff94effe0ba47c1ebecc5a1c17e/bin/includes/fetchSource.sh#L59C1-L59C72 +STRING_TEMPLATE = """# Copyright (c) 2025, Oracle and/or its affiliates. +# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. +# Generated by Macaron version {macaron_version} + +{extra_comment} + +groupId={group_id} +artifactId={artifact_id} +version={version} + +gitRepo={git_repo} + +gitTag={git_tag} + +tool={tool} +jdk={jdk} + +newline={newline} + +command="{command}" + +buildinfo={buildinfo} +""" + + +class _MacaronBuildToolName(str, Enum): + """Represent the name of a build tool that Macaron stores in the database. + + These doesn't cover all build tools that Macaron support, and ONLY include the ones that we + support generating Reproducible Central Buildspec for. + """ + + MAVEN = "maven" + GRADLE = "gradle" + + +class _ReproducibleCentralBuildToolName(str, Enum): + """Represent the name of the build tool used in the Reproducible Central's Buildspec. + + https://github.com/jvm-repo-rebuild/reproducible-central/blob/master/doc/BUILDSPEC.md + """ + + MAVEN = "mvn" + GRADLE = "gradle" + SBT = "sbt" + + +def remove_shell_quote(cmd: list[str]) -> list[str]: + """Remove shell quotes from a shell command. + + Parameters + ---------- + cmd: list[str] + The shell command as list of string. + + Returns + ------- + list[str] + The shell command with all quote removed. + + Examples + -------- + >>> cmd = "mvn -f fit/core-reference/pom.xml verify '-Dit.test=RESTITCase' '-Dmodernizer.skip=true' '-Drat.skip=true'" + >>> remove_shell_quote(cmd.split()) + ['mvn', '-f', 'fit/core-reference/pom.xml', 'verify', '-Dit.test=RESTITCase', '-Dmodernizer.skip=true', '-Drat.skip=true'] + """ + return shlex.split(" ".join(cmd)) + + +def _get_extra_comments(comments: list[str]) -> str: + """Generate the shell comments for adding additional information into the RC-build spec.""" + shell_comments = [f"# {comment}" for comment in comments] + return "\n".join(shell_comments) + + +def _get_build_command_sequence(cmds_sequence: list[list[str]]) -> str: + """Return a build command sequence as a string. + + The build commands in the sequence will be && together, because RC's build spec + is a shell script. + """ + removed_shell_quote = [" ".join(remove_shell_quote(cmds)) for cmds in cmds_sequence] + result = " && ".join(removed_shell_quote) + return result + + +def _get_default_build_command_sequence( + macaron_build_tool_name: _MacaronBuildToolName, + patches: Mapping[ + PatchCommandBuildTool, + Mapping[str, PatchValueType | None], + ], +) -> list[list[str]] | None: + """Return a default build command sequence for the corresponding build tool name discovered by Macaron.""" + default_build_command = None + if macaron_build_tool_name == _MacaronBuildToolName.MAVEN: + default_build_command = "mvn clean package" + + if macaron_build_tool_name == _MacaronBuildToolName.GRADLE: + default_build_command = "./gradlew clean assemble publishToMavenLocal" + + if not default_build_command: + logger.critical( + "The default build command %s is not supported for getting default build command.", + macaron_build_tool_name, + ) + return None + + patched_build_commands = patch_commands( + cmds_sequence=[default_build_command.split()], + patches=patches, + ) + + if not patched_build_commands: + logger.error( + "Failed to patch default build command %s.", + default_build_command, + ) + return None + + return patched_build_commands + + +def _get_macaron_build_tool_name(build_tool_facts: Sequence[BuildToolFacts]) -> _MacaronBuildToolName | None: + """Return the build tool name reported by Macaron from the database.""" + for fact in build_tool_facts: + if fact.language in {"java"}: + try: + macaron_build_tool_name = _MacaronBuildToolName(fact.build_tool_name) + except ValueError: + continue + + # TODO: What happen if we report multiple build tool in the database. + return macaron_build_tool_name + + return None + + +def _gen_reproducible_central_build_spec( + build_info: RcInternalBuildInfo, + patches: Mapping[ + PatchCommandBuildTool, + Mapping[str, PatchValueType | None], + ], +) -> str | None: + """Return the RC's Buildspec content from a ``RcInternalBuildInfo`` instance. + + This function will perform necessary validation on the data captured within ``build_info`` to make sure + it has enough information for generating the output BuildSpec. + + This function will use the information available in ``build_info`` to populate the file. + For example, the GAV coordinate can be obtained from ``build_info.purl``. + + The ``patches`` mapping will be used for patching the build command in the `command` section of the Buildspec + output. + + The function will return the Buildspec file content as string or None if there is an error. + """ + extra_comments = [] + + purl = build_info.purl + logger.debug( + "Generating build spec for %s with command patches:\n%s", + purl, + pformat(patches), + ) + + group = purl.namespace + artifact = purl.name + version = purl.version + if group is None or version is None: + logger.error("Missing group and/or version for purl %s.", purl.to_string()) + return None + + extra_comments.append(f"Input PURL - {purl}") + + macaron_build_tool_name = _get_macaron_build_tool_name(build_info.build_tool_facts) + if not macaron_build_tool_name: + logger.error( + "The PackageURL %s doesn't have any build tool that we support for generating RC buildspec. It has %s.", + purl.to_string(), + [(fact.build_tool_name, fact.language) for fact in build_info.build_tool_facts], + ) + return None + + rc_build_tool_name = None + if macaron_build_tool_name == _MacaronBuildToolName.MAVEN: + rc_build_tool_name = _ReproducibleCentralBuildToolName.MAVEN + elif macaron_build_tool_name == _MacaronBuildToolName.GRADLE: + rc_build_tool_name = _ReproducibleCentralBuildToolName.GRADLE + if not rc_build_tool_name: + logger.critical("%s is not supported to generate RC's buildspec.", macaron_build_tool_name.value) + return None + + # Set the default build command and jdk version. + # The default build command depends on the build tool, while the default jdk version + # is 8. + final_build_command_seq = _get_default_build_command_sequence( + macaron_build_tool_name=macaron_build_tool_name, + patches=patches, + ) + if not final_build_command_seq: + logger.critical( + "Cannot generate a default build command for %s", + purl, + ) + return None + final_jdk_version = "8" + extra_comments.append( + f"Initial default JDK version {final_jdk_version} and default build command {final_build_command_seq}." + ) + + if build_info.generic_build_command_facts: + # The elements are ordered in decreasing confidence score. We pick the highest one. + build_fact = build_info.generic_build_command_facts[0] + lookup_build_command = build_fact.command + extra_comments.append(f"The lookup build command: {lookup_build_command}") + + patched_build_commands = patch_commands( + cmds_sequence=[lookup_build_command], + patches=patches, + ) + if not patched_build_commands: + logger.error( + "Failed to patch look up command %s.", + lookup_build_command, + ) + return None + + final_build_command_seq = patched_build_commands + + lookup_jdk_vers = build_fact.language_versions + if lookup_jdk_vers: + lookup_jdk_ver = lookup_jdk_vers.pop() + extra_comments.append(f"Jdk version from lookup build command {lookup_jdk_ver}.") + final_jdk_version = lookup_jdk_ver + else: + extra_comments.append("No JDK version found from lookup result.") + jdk_from_jar = find_jdk_version_from_central_maven_repo( + group_id=purl.name, + artifact_id=group, + version=version, + ) + if jdk_from_jar: + extra_comments.append(f"Found JDK version from jar {jdk_from_jar}.") + final_jdk_version = jdk_from_jar + else: + extra_comments.append(f"No JDK version found from jar {jdk_from_jar}.") + + major_jdk_version = normalize_jdk_version(final_jdk_version) + if not major_jdk_version: + logger.error("Failed to obtain the major version of %s", final_jdk_version) + return None + + template_format_values: dict[str, str] = { + "macaron_version": importlib_metadata.version("macaron"), + "group_id": group, + "artifact_id": artifact, + "version": version, + "git_repo": build_info.repository.remote_path, + "git_tag": build_info.repository.commit_sha, + "tool": rc_build_tool_name.value, + "newline": "lf", + "buildinfo": f"target/{artifact}-{version}.buildinfo", + "extra_comment": _get_extra_comments(extra_comments), + "jdk": final_jdk_version, + "command": _get_build_command_sequence(final_build_command_seq), + } + + return STRING_TEMPLATE.format_map(template_format_values) + + +def gen_reproducible_central_build_spec( + purl: PackageURL, + session: sqlalchemy.orm.Session, + patches: Mapping[ + PatchCommandBuildTool, + Mapping[str, PatchValueType | None], + ], +) -> str | None: + """Return the content of a Reproducible Central Buildspec File. + + The Reproducible Central Buildspec File Format can be found here: + https://github.com/jvm-repo-rebuild/reproducible-central/blob/e1708dd8dde3cdbe66b0cec9948812b601e90ba6/doc/BUILDSPEC.md#format + + Parameters + ---------- + purl: PackageURL + The PackageURL to generate build spec for. + session: sqlalchemy.orm.Session + The SQLAlchemy Session opened for the database to extract build information. + patches: Mapping[PatchCommandBuildTool, Mapping[str, PatchValueType | None]] + The patches to apply to the build commands in ``build_info`` before being populated in + the output Buildspec. + + Returns + ------- + str | None + The content of the Buildspec as string or None if there is an error. + The errors that can happen are: 1. The input PURL is invalid, 2. There is no supported build tool + for this PURL, 3. Failed to patch the build commands using the provided ``patches``, 4. The database from + ``session`` doesn't contain enough information. + """ + internal_build_info = get_rc_internal_build_info( + purl=purl, + session=session, + ) + + if not internal_build_info: + logger.error( + "Failed to obtain necessary data for purl %s from the database.", + purl, + ) + return None + + return _gen_reproducible_central_build_spec( + build_info=internal_build_info, + patches=patches, + ) diff --git a/tests/build_spec_generator/reproducible_central/compare_rc_build_spec.py b/tests/build_spec_generator/reproducible_central/compare_rc_build_spec.py new file mode 100644 index 000000000..df541d461 --- /dev/null +++ b/tests/build_spec_generator/reproducible_central/compare_rc_build_spec.py @@ -0,0 +1,165 @@ +# Copyright (c) 2025 - 2025, Oracle and/or its affiliates. All rights reserved. +# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. + +"""This script compares 2 Reproducible Central Buildspec files.""" + +import logging +import os +import sys +from collections.abc import Callable + +CompareFn = Callable[[object, object], bool] + +logger = logging.getLogger(__name__) +logger.setLevel(logging.DEBUG) +logging.basicConfig(format="[%(filename)s:%(lineno)s %(tag)s] %(message)s") + + +def log_with_tag(tag: str) -> Callable[[str], None]: + """Generate a log function that prints the name of the file and a tag at the beginning of each line.""" + + def log_fn(msg: str) -> None: + logger.info(msg, extra={"tag": tag}) + + return log_fn + + +log_info = log_with_tag("INFO") +log_err = log_with_tag("ERROR") +log_failed = log_with_tag("FAILED") +log_passed = log_with_tag("PASSED") + + +def log_diff_str(name: str, result: str, expected: str) -> None: + """Pretty-print the diff of two Python strings.""" + output = [ + f"'{name}'", + *("---- Result ---", f"{result}"), + *("---- Expected ---", f"{expected}"), + "-----------------", + ] + log_info("\n".join(output)) + + +def skip_compare(_result: object, _expected: object) -> bool: + """Return ``True`` always. + + This compare function is used when we want to skip comparing a field. + """ + return True + + +def compare_rc_build_spec( + result: dict[str, str], + expected: dict[str, str], + compare_fn_map: dict[str, CompareFn], +) -> bool: + """Compare two dictionaries obatained from 2 Reproducible Central build spec. + + Parameters + ---------- + result : dict[str, str] + The result object. + expected : dict[str, str] + The expected object. + compare_fn_map : str + A map from field name to corresponding compare function. + + Returns + ------- + bool + ``True`` if the comparison is successful, ``False`` otherwise. + """ + result_keys_only = result.keys() - expected.keys() + expected_keys_only = expected.keys() - result.keys() + + equal = True + + if len(result_keys_only) > 0: + log_err(f"Result has the following extraneous fields: {result_keys_only}") + equal = False + + if len(expected_keys_only) > 0: + log_err(f"Result does not contain these expected fields: {expected_keys_only}") + equal = False + + common_keys = set(result.keys()).intersection(set(expected.keys())) + + for key in common_keys: + if key in compare_fn_map: + equal &= compare_fn_map[key](result, expected) + continue + + if result[key] != expected[key]: + log_err(f"Mismatch found in '{key}'") + log_diff_str(key, result[key], expected[key]) + equal = False + + return equal + + +def extract_data_from_build_spec(build_spec_path: str) -> dict[str, str] | None: + """Extract data from build spec.""" + original_build_spec_content = None + try: + with open(build_spec_path, encoding="utf-8") as build_spec_file: + original_build_spec_content = build_spec_file.read() + except OSError as error: + log_err(f"Failed to read the Reproducible Central Buildspec file at {build_spec_path}. Error {error}.") + return None + + build_spec_values: dict[str, str] = {} + + # A Reproducible Central buildspec is a valid bash script. + # We use the following assumption to parse all key value mapping in a Reproducible Central buildspec. + # 1. Each variable-value mapping has the form of + # = + # For example ``tool=mvn`` + # 2. If the first letter of a line is "#" we treat that line as a comment and ignore + # it. + for line in original_build_spec_content.splitlines(): + if not line or line.startswith("#"): + continue + + variable, _, value = line.partition("=") + # We allow defining a variable multiple times, where subsequent definition + # override the previous one. + build_spec_values[variable] = value + + return build_spec_values + + +def main() -> int: + """Compare a Reproducible Central Buildspec file with an expected output.""" + result_path = sys.argv[1] + expect_path = sys.argv[2] + + result_build_spec = extract_data_from_build_spec(result_path) + expect_build_spec = extract_data_from_build_spec(expect_path) + + if not expect_build_spec: + log_err(f"Failed to extract bash variables from expected Buildspec at {expect_path}.") + return os.EX_USAGE + + if not result_build_spec: + log_err(f"Failed to extract bash variables from result Buildspec at {result_build_spec}.") + return os.EX_USAGE + + equal = compare_rc_build_spec( + result=result_build_spec, + expected=expect_build_spec, + compare_fn_map={ + "buildinfo": skip_compare, + }, + ) + + if not equal: + log_failed("The result RC Buildspec does not match the RC Buildspec.") + return os.EX_DATAERR + + log_passed("The result RC Buildspec matches the RC Buildspec.") + return os.EX_OK + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/tests/build_spec_generator/reproducible_central/test_reproducible_central.py b/tests/build_spec_generator/reproducible_central/test_reproducible_central.py new file mode 100644 index 000000000..6197d60c7 --- /dev/null +++ b/tests/build_spec_generator/reproducible_central/test_reproducible_central.py @@ -0,0 +1,64 @@ +# Copyright (c) 2025 - 2025, Oracle and/or its affiliates. All rights reserved. +# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. + +"""This module contains the tests for Reproducible Central build spec generation""" + +import pytest + +from macaron.build_spec_generator.reproducible_central.reproducible_central import ( + _get_build_command_sequence, + _get_extra_comments, +) + + +@pytest.mark.parametrize( + ("comments", "expected"), + [ + pytest.param( + [ + "Input PURL - pkg:maven/oracle/macaron@v0.16.0", + "Initial default JDK version 8 and default build command boo", + ], + "# Input PURL - pkg:maven/oracle/macaron@v0.16.0\n# Initial default JDK version 8 and default build command boo", + ), + pytest.param( + [ + "Input PURL - pkg:maven/oracle/macaron@v0.16.0", + ], + "# Input PURL - pkg:maven/oracle/macaron@v0.16.0", + ), + pytest.param( + [], + "", + ), + ], +) +def test_get_extra_comments(comments: list[str], expected: str) -> None: + """Test the _get_extra_comments function.""" + assert _get_extra_comments(comments) == expected + + +@pytest.mark.parametrize( + ("cmds_sequence", "expected"), + [ + pytest.param( + [ + "make clean".split(), + "mvn clean package".split(), + ], + "make clean && mvn clean package", + ), + pytest.param( + [ + "mvn clean package".split(), + ], + "mvn clean package", + ), + ], +) +def test_get_build_command_sequence( + cmds_sequence: list[list[str]], + expected: str, +) -> None: + """Test the _get_build_command_sequence function.""" + assert _get_build_command_sequence(cmds_sequence) == expected From 426e3032bbfefab91797b13e0fa790ac49e176a1 Mon Sep 17 00:00:00 2001 From: Trong Nhan Mai Date: Wed, 9 Jul 2025 22:17:53 +1000 Subject: [PATCH 07/27] feat: expose macaron gen-build-spec cli command Signed-off-by: Trong Nhan Mai --- src/macaron/__main__.py | 88 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 88 insertions(+) diff --git a/src/macaron/__main__.py b/src/macaron/__main__.py index afaabdbe5..a51125fc5 100644 --- a/src/macaron/__main__.py +++ b/src/macaron/__main__.py @@ -14,6 +14,10 @@ from packageurl import PackageURL import macaron +from macaron.build_spec_generator.build_spec_generator import ( + BuildSpecFormat, + gen_build_spec_str, +) from macaron.config.defaults import create_defaults, load_defaults from macaron.config.global_config import global_config from macaron.errors import ConfigurationError @@ -235,6 +239,63 @@ def verify_policy(verify_policy_args: argparse.Namespace) -> int: return os.EX_USAGE +def gen_build_spec(gen_build_spec_args: argparse.Namespace) -> int: + """Generate a build spec containing the build information discovered by Macaron. + + Returns + ------- + int + Returns os.EX_OK if successful or the corresponding error code on failure. + """ + if not os.path.isfile(gen_build_spec_args.database): + logger.critical("The database file does not exist.") + return os.EX_OSFILE + + output_format = gen_build_spec_args.output_format + + try: + build_spec_format = BuildSpecFormat(output_format) + except ValueError: + logger.error("The output format %s is not supported.", output_format) + return os.EX_USAGE + + try: + purl = PackageURL.from_string(gen_build_spec_args.package_url) + except ValueError as error: + logger.error("Cannot parse purl %s. Error %s", gen_build_spec_args.package_url, error) + return os.EX_USAGE + + build_spec_content = gen_build_spec_str( + purl=purl, + database_path=gen_build_spec_args.database, + build_spec_format=build_spec_format, + ) + + if not build_spec_content: + logger.error("Error while generate reproducible central build spec.") + return os.EX_DATAERR + + logger.debug("Build spec content: \n%s", build_spec_content) + build_spec_filepath = os.path.join(global_config.output_path, "macaron.buildspec") + try: + with open(build_spec_filepath, mode="w", encoding="utf-8") as file: + logger.info( + "Generating the %s format build spec to %s.", + build_spec_format.value, + os.path.relpath(build_spec_filepath, os.getcwd()), + ) + file.write(build_spec_content) + except OSError as error: + logger.error( + "Could not generate the Buildspec to %s. Error: %s", + os.path.relpath(build_spec_filepath, os.getcwd()), + error, + ) + return os.EX_DATAERR + + return os.EX_OK + + def find_source(find_args: argparse.Namespace) -> int: """Perform repo and commit finding for a passed PURL, or commit finding for a passed PURL and repo.""" if repo_finder.find_source(find_args.package_url, find_args.repo_path or None): @@ -283,6 +344,9 @@ def perform_action(action_args: argparse.Namespace) -> None: find_source(action_args) + case "gen-build-spec": + sys.exit(gen_build_spec(action_args)) + case _: logger.error("Macaron does not support command option %s.", action_args.action) sys.exit(os.EX_USAGE) @@ -515,6 +579,30 @@ def main(argv: list[str] | None = None) -> None: ), ) + # Generate a build spec containing rebuild information for a software component. + gen_build_spec_parser = sub_parser.add_parser(name="gen-build-spec") + + gen_build_spec_parser.add_argument( + "-purl", + "--package-url", + required=True, + type=str, + help=("The PURL string of the software component to generate build spec for."), + ) + + gen_build_spec_parser.add_argument( + "--database", + help="Path to the database.", + required=True, + ) + + gen_build_spec_parser.add_argument( + "--output-format", + type=str, + help=('The output format. Can be rc-buildspec (Reproducible-central build spec) (default "rc-buildspec")'), + default="rc-buildspec", + ) + args = main_parser.parse_args(argv) if not args.action: From f24d7af7c9a0a70080fced589c3a3bd81906b92a Mon Sep 17 00:00:00 2001 From: Trong Nhan Mai Date: Wed, 9 Jul 2025 22:19:51 +1000 Subject: [PATCH 08/27] test: modify the integration test script to use the compare rc build spec script Signed-off-by: Trong Nhan Mai --- tests/integration/run.py | 55 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 55 insertions(+) diff --git a/tests/integration/run.py b/tests/integration/run.py index 2cb77025b..ff4cb474d 100644 --- a/tests/integration/run.py +++ b/tests/integration/run.py @@ -80,6 +80,7 @@ def configure_logging(verbose: bool) -> None: "deps_report": ["tests", "dependency_analyzer", "compare_dependencies.py"], "vsa": ["tests", "vsa", "compare_vsa.py"], "find_source": ["tests", "find_source", "compare_source_reports.py"], + "rc_build_spec": ["tests", "build_spec_generator", "reproducible_central", "compare_rc_build_spec.py"], } VALIDATE_SCHEMA_SCRIPTS: dict[str, Sequence[str]] = { @@ -465,6 +466,52 @@ def cmd(self, macaron_cmd: str) -> list[str]: return args +class GenBuildSpecStepOptions(TypedDict): + """The configuration options of an gen-build-spec step.""" + + main_args: Sequence[str] + command_args: Sequence[str] + database: str + + +class GenBuildSpecStep(Step[GenBuildSpecStepOptions]): + """A step running the ``macaron gen-build-spec`` command.""" + + @staticmethod + def options_schema(cwd: str) -> cfgv.Map: # pylint: disable=unused-argument + """Generate the schema of a gen-build-spec step.""" + return cfgv.Map( + "gen-build-spec options", + None, + *[ + cfgv.Optional( + key="main_args", + check_fn=cfgv.check_array(cfgv.check_string), + default=[], + ), + cfgv.Optional( + key="command_args", + check_fn=cfgv.check_array(cfgv.check_string), + default=[], + ), + cfgv.Optional( + key="database", + check_fn=cfgv.check_string, + default="./output/macaron.db", + ), + ], + ) + + def cmd(self, macaron_cmd: str) -> list[str]: + """Generate the command of the step.""" + args = [macaron_cmd] + args.extend(self.options["main_args"]) + args.append("gen-build-spec") + args.extend(["--database", self.options["database"]]) + args.extend(self.options["command_args"]) + return args + + class VerifyStepOptions(TypedDict): """The configuration options of a verify step.""" @@ -599,6 +646,7 @@ def gen_step_schema(cwd: str, check_expected_result_files: bool) -> cfgv.Map: "verify", "validate_schema", "find-source", + "gen-build-spec", ), ), ), @@ -638,6 +686,12 @@ def gen_step_schema(cwd: str, check_expected_result_files: bool) -> cfgv.Map: key="options", schema=VerifyStep.options_schema(cwd=cwd), ), + cfgv.ConditionalRecurse( + condition_key="kind", + condition_value="gen-build-spec", + key="options", + schema=GenBuildSpecStep.options_schema(cwd=cwd), + ), cfgv.ConditionalRecurse( condition_key="kind", condition_value="find-source", @@ -842,6 +896,7 @@ def parse_step_config(step_id: int, step_config: Mapping) -> Step: "compare": CompareStep, "validate_schema": ValidateSchemaStep, "find-source": FindSourceStep, + "gen-build-spec": GenBuildSpecStep, }[kind] return step_cls( # type: ignore # https://github.com/python/mypy/issues/3115 step_id=step_id, From 4c3ef2d056decef1fd62201f1c7fe6c8bf3157dd Mon Sep 17 00:00:00 2001 From: Trong Nhan Mai Date: Wed, 9 Jul 2025 22:20:38 +1000 Subject: [PATCH 09/27] test: add integration tests for the gen-build-spec error Signed-off-by: Trong Nhan Mai --- .../expected_macaron.buildspec | 25 +++++++ .../test.yaml | 33 ++++++++ .../cases/gen_rc_build_spec_error/test.yaml | 75 +++++++++++++++++++ .../expected_macaron.buildspec | 25 +++++++ .../test.yaml | 18 ++++- 5 files changed, 175 insertions(+), 1 deletion(-) create mode 100644 tests/integration/cases/behnazh-w_example-maven-app_gen_rc_build_spec/expected_macaron.buildspec create mode 100644 tests/integration/cases/behnazh-w_example-maven-app_gen_rc_build_spec/test.yaml create mode 100644 tests/integration/cases/gen_rc_build_spec_error/test.yaml create mode 100644 tests/integration/cases/micronaut-projects_micronaut-core/expected_macaron.buildspec diff --git a/tests/integration/cases/behnazh-w_example-maven-app_gen_rc_build_spec/expected_macaron.buildspec b/tests/integration/cases/behnazh-w_example-maven-app_gen_rc_build_spec/expected_macaron.buildspec new file mode 100644 index 000000000..b77755498 --- /dev/null +++ b/tests/integration/cases/behnazh-w_example-maven-app_gen_rc_build_spec/expected_macaron.buildspec @@ -0,0 +1,25 @@ +# Copyright (c) 2025, Oracle and/or its affiliates. +# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. +# Generated by Macaron version 0.15.0 + +# Input PURL - pkg:github/behnazh-w/example-maven-app@1.0 +# Initial default JDK version 8 and default build command [['mvn', '-DskipTests=true', '-Dmaven.test.skip=true', '-Dmaven.site.skip=true', '-Drat.skip=true', '-Dmaven.javadoc.skip=true', 'clean', 'package']]. +# The lookup build command: ['./mvnw', 'clean', 'package'] +# Jdk version from lookup build command 17. + +groupId=behnazh-w +artifactId=example-maven-app +version=1.0 + +gitRepo=https://github.com/behnazh-w/example-maven-app + +gitTag=2deca75ed5dd365eaf1558a82347b1f11306135f + +tool=mvn +jdk=17 + +newline=lf + +command="./mvnw -DskipTests=true -Dmaven.test.skip=true -Dmaven.site.skip=true -Drat.skip=true -Dmaven.javadoc.skip=true clean package" + +buildinfo=target/example-maven-app-1.0.buildinfo diff --git a/tests/integration/cases/behnazh-w_example-maven-app_gen_rc_build_spec/test.yaml b/tests/integration/cases/behnazh-w_example-maven-app_gen_rc_build_spec/test.yaml new file mode 100644 index 000000000..161beeb08 --- /dev/null +++ b/tests/integration/cases/behnazh-w_example-maven-app_gen_rc_build_spec/test.yaml @@ -0,0 +1,33 @@ +# Copyright (c) 2025 - 2025, Oracle and/or its affiliates. All rights reserved. +# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. + +description: | + Test the build spec generation on a Maven project with JDK version obtained + from the github action worfklow. + +tags: +- macaron-python-package +- macaron-docker-image +- macaron-gen-build-spec + +steps: +- name: Run macaron analyze + kind: analyze + options: + command_args: + - -purl + - pkg:github/behnazh-w/example-maven-app@1.0 +- name: Run Reproducible-central build spec generation + kind: gen-build-spec + options: + command_args: + - -purl + - pkg:github/behnazh-w/example-maven-app@1.0 + - --output-format + - rc-buildspec +- name: Compare Buildspec. + kind: compare + options: + kind: rc_build_spec + result: ./output/macaron.buildspec + expected: expected_macaron.buildspec diff --git a/tests/integration/cases/gen_rc_build_spec_error/test.yaml b/tests/integration/cases/gen_rc_build_spec_error/test.yaml new file mode 100644 index 000000000..a04148abb --- /dev/null +++ b/tests/integration/cases/gen_rc_build_spec_error/test.yaml @@ -0,0 +1,75 @@ +# Copyright (c) 2024 - 2025, Oracle and/or its affiliates. All rights reserved. +# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. + +description: | + Running macaron gen-build-spec with invalid arguments. + +tags: +- macaron-python-package +- macaron-docker-image +- macaron-gen-build-spec + +steps: +- name: Run macaron analyze on the remote repository. + kind: analyze + options: + command_args: + - --package-url + - pkg:maven/io.github.behnazh-w.demo/example-maven-app@2.0?type=jar + - -rp + - https://github.com/behnazh-w/example-maven-app +- name: Using a format that we don't support. + kind: gen-build-spec + options: + command_args: + - -purl + - pkg:maven/io.github.behnazh-w.demo/example-maven-app@2.0?type=jar + - --output-format + - this-format-is-not-supported + expect_fail: true +- name: Generate the RC Buildspec for a PURL that we haven't analyzed. + kind: gen-build-spec + options: + command_args: + - -purl + - pkg:maven/io.micronaut/micronaut-core@4.2.3 + - --output-format + - rc-buildspec + expect_fail: true +- name: Generate the RC Buildspec for a PURL that doesn't have namespace information. + kind: gen-build-spec + options: + command_args: + - -purl + - pkg:maven/io.micronaut@4.2.3 + - --output-format + - rc-buildspec + expect_fail: true +- name: Generate the RC Buildspec for a PURL that doesn't have version information. + kind: gen-build-spec + options: + command_args: + - -purl + - pkg:maven/io.micronaut/micronaut-core + - --output-format + - rc-buildspec + expect_fail: true +- name: Generate the RC Buildspec using a database that doesn't exist. + kind: gen-build-spec + options: + database: output/some_database.db + command_args: + - -purl + - pkg:maven/io.github.behnazh-w.demo/example-maven-app@2.0?type=jar + - --output-format + - rc-buildspec + expect_fail: true +- name: Generate the RC Buildspec using an invalid PURL. + kind: gen-build-spec + options: + command_args: + - -purl + - invalid_purl + - --output-format + - rc-buildspec + expect_fail: true diff --git a/tests/integration/cases/micronaut-projects_micronaut-core/expected_macaron.buildspec b/tests/integration/cases/micronaut-projects_micronaut-core/expected_macaron.buildspec new file mode 100644 index 000000000..8caca83d6 --- /dev/null +++ b/tests/integration/cases/micronaut-projects_micronaut-core/expected_macaron.buildspec @@ -0,0 +1,25 @@ +# Copyright (c) 2025, Oracle and/or its affiliates. +# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. +# Generated by Macaron version 0.15.0 + +# Input PURL - pkg:maven/io.micronaut/micronaut-core@4.2.3 +# Initial default JDK version 8 and default build command [['./gradlew', '-x', 'test', '-Pskip.signing', '-PskipSigning', '-Pgnupg.skip', 'clean', 'assemble']]. +# The lookup build command: ['./gradlew', 'publishToSonatype', 'closeAndReleaseSonatypeStagingRepository'] +# Jdk version from lookup build command 17. + +groupId=io.micronaut +artifactId=micronaut-core +version=4.2.3 + +gitRepo=https://github.com/micronaut-projects/micronaut-core + +gitTag=36dcaf0539536dce5fc753677341609ff7f273ca + +tool=gradle +jdk=17 + +newline=lf + +command="./gradlew -x test -Pskip.signing -PskipSigning -Pgnupg.skip clean assemble" + +buildinfo=target/micronaut-core-4.2.3.buildinfo diff --git a/tests/integration/cases/micronaut-projects_micronaut-core/test.yaml b/tests/integration/cases/micronaut-projects_micronaut-core/test.yaml index 467e623bc..7855b1be2 100644 --- a/tests/integration/cases/micronaut-projects_micronaut-core/test.yaml +++ b/tests/integration/cases/micronaut-projects_micronaut-core/test.yaml @@ -1,12 +1,14 @@ -# Copyright (c) 2024 - 2024, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2024 - 2025, Oracle and/or its affiliates. All rights reserved. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. description: | Analyzing the PURL when automatic dependency resolution is skipped. Run policy CLI with micronaut-core results to test deploy command information. + Also generate a build spec for this PURL and validate the build spec content. tags: - macaron-python-package +- macaron-gen-build-spec steps: - name: Run macaron analyze @@ -30,3 +32,17 @@ steps: kind: policy_report result: output/policy_report.json expected: policy_report.json +- name: Run Reproducible-central build spec generation + kind: gen-build-spec + options: + command_args: + - -purl + - pkg:maven/io.micronaut/micronaut-core@4.2.3 + - --output-format + - rc-buildspec +- name: Compare Buildspec. + kind: compare + options: + kind: rc_build_spec + result: ./output/macaron.buildspec + expected: expected_macaron.buildspec From 779c03b88b0360942274ae2f0de247f86c60f157 Mon Sep 17 00:00:00 2001 From: Trong Nhan Mai Date: Fri, 18 Jul 2025 16:48:55 +1000 Subject: [PATCH 10/27] fix: add jdk version 22 and 23 into the list of supported jdk major version --- src/macaron/build_spec_generator/jdk_version_normalizer.py | 3 +++ tests/build_spec_generator/test_jdk_version_normalizer.py | 6 ++++++ 2 files changed, 9 insertions(+) diff --git a/src/macaron/build_spec_generator/jdk_version_normalizer.py b/src/macaron/build_spec_generator/jdk_version_normalizer.py index 852aab9b2..7dc9f169c 100644 --- a/src/macaron/build_spec_generator/jdk_version_normalizer.py +++ b/src/macaron/build_spec_generator/jdk_version_normalizer.py @@ -21,6 +21,9 @@ "19", "20", "21", + "22", + "23", + "24", ] diff --git a/tests/build_spec_generator/test_jdk_version_normalizer.py b/tests/build_spec_generator/test_jdk_version_normalizer.py index 2236505e8..61f085c1d 100644 --- a/tests/build_spec_generator/test_jdk_version_normalizer.py +++ b/tests/build_spec_generator/test_jdk_version_normalizer.py @@ -46,6 +46,12 @@ "19", id="major_follows_with_text", ), + # https://github.com/jboss-logging/jboss-logging/blob/25ad85c9cecf5a2f79db9a4d077221ed087e4ef5/.github/workflows/ci.yml#L46 + pytest.param( + "22-ea", + "22", + id="pkg_maven_org.jboss.logging_jboss-logging_3.6.1.Final", + ), ], ) def test_jdk_version_normalizer(version_string: str, expected: str) -> None: From d0c10de943c51c6a40afa310c248218679146992 Mon Sep 17 00:00:00 2001 From: Trong Nhan Mai Date: Fri, 18 Jul 2025 16:50:12 +1000 Subject: [PATCH 11/27] chore: add a small log message at the beginning of build spec generation --- src/macaron/__main__.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/src/macaron/__main__.py b/src/macaron/__main__.py index a51125fc5..d27e661e9 100644 --- a/src/macaron/__main__.py +++ b/src/macaron/__main__.py @@ -265,6 +265,13 @@ def gen_build_spec(gen_build_spec_args: argparse.Namespace) -> int: logger.error("Cannot parse purl %s. Error %s", gen_build_spec_args.package_url, error) return os.EX_USAGE + logger.info( + "Generating %s buildspec for PURL %s from %s.", + output_format, + purl, + gen_build_spec_args.database, + ) + build_spec_content = gen_build_spec_str( purl=purl, database_path=gen_build_spec_args.database, From 0f9b0c6afb3f7b2de06cad794da455e7aed5f5da Mon Sep 17 00:00:00 2001 From: Trong Nhan Mai Date: Mon, 21 Jul 2025 11:23:48 +1000 Subject: [PATCH 12/27] fix: fix the sql statement for obtaining build check facts where the check result was mistakenly joined on the checkfacts.id instead og checkfact.check_result_id --- src/macaron/build_spec_generator/macaron_db_extractor.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/macaron/build_spec_generator/macaron_db_extractor.py b/src/macaron/build_spec_generator/macaron_db_extractor.py index 05a485b7d..540422b64 100644 --- a/src/macaron/build_spec_generator/macaron_db_extractor.py +++ b/src/macaron/build_spec_generator/macaron_db_extractor.py @@ -240,7 +240,7 @@ def get_sql_stmt_build_as_code_check(component_id: int) -> Select[tuple[BuildAsC ) .join( CheckFacts, - onclause=MappedCheckResult.id == CheckFacts.id, + onclause=MappedCheckResult.id == CheckFacts.check_result_id, ) .join( build_as_code_facts_alias, @@ -287,7 +287,7 @@ def get_sql_stmt_build_service_check(component_id: int) -> Select[tuple[BuildSer ) .join( CheckFacts, - onclause=MappedCheckResult.id == CheckFacts.id, + onclause=MappedCheckResult.id == CheckFacts.check_result_id, ) .join( build_service_facts_alias, @@ -330,11 +330,11 @@ def get_sql_stmt_build_script_check(component_id: int) -> Select[tuple[BuildScri .select_from(Component) .join( MappedCheckResult, - onclause=Component.id == MappedCheckResult.component_id, + onclause=MappedCheckResult.component_id == Component.id, ) .join( CheckFacts, - onclause=MappedCheckResult.id == CheckFacts.id, + onclause=MappedCheckResult.id == CheckFacts.check_result_id, ) .join( build_script_facts_alias, From 15efd52cfe9d429de486f7f5403a94b8610eab87 Mon Sep 17 00:00:00 2001 From: Trong Nhan Mai Date: Mon, 21 Jul 2025 14:52:38 +1000 Subject: [PATCH 13/27] chore: move the looking up of repository before the build tool lookup because without repository no build tool is found This commit also add some useful debug messages for extracting values from the database for Reproducible Central buildspec generation. --- .../reproducible_central/rc_build_info.py | 57 +++++++++++++------ 1 file changed, 39 insertions(+), 18 deletions(-) diff --git a/src/macaron/build_spec_generator/reproducible_central/rc_build_info.py b/src/macaron/build_spec_generator/reproducible_central/rc_build_info.py index 52287df0b..105f10e45 100644 --- a/src/macaron/build_spec_generator/reproducible_central/rc_build_info.py +++ b/src/macaron/build_spec_generator/reproducible_central/rc_build_info.py @@ -4,6 +4,7 @@ """This module contains the representation of information needed for Reproducible Central Buildspec generation.""" import logging +import pprint from collections.abc import Sequence from dataclasses import dataclass @@ -38,6 +39,12 @@ class RcInternalBuildInfo: build_tool_facts: Sequence[BuildToolFacts] +def format_build_command_infos(build_command_infos: list[GenericBuildCommandInfo]) -> str: + """Return the prettified str format for a list of `GenericBuildCommandInfo` instances.""" + pretty_formatted_ouput = [pprint.pformat(build_command_info) for build_command_info in build_command_infos] + return "\n".join(pretty_formatted_ouput) + + def get_rc_internal_build_info( purl: PackageURL, session: Session, @@ -78,43 +85,53 @@ def get_rc_internal_build_info( logger.debug("Latest component ID: %d", latest_component_id) try: - build_tool_facts = lookup_build_tools_check( - component_id=latest_component_id, - session=session, - ) - except QueryMacaronDatabaseError as lookup_build_tools_error: + lookup_component_repository = lookup_repository(latest_component_id, session) + except QueryMacaronDatabaseError as lookup_repository_error: logger.error( - "Unexpected result from querying build tools for %s. Error: %s", + "Unexpected result from querying repository information for %s. Error: %s", purl.to_string(), - lookup_build_tools_error, + lookup_repository_error, ) return None - if not build_tool_facts: + if not lookup_component_repository: logger.error( - "Cannot find any build tool for PackageURL %s in the database.", + "Cannot find any repository information for %s in the database.", purl.to_string(), ) return None - logger.debug("Build tools discovered from the %s table: %s", BuildToolFacts.__tablename__, build_tool_facts) + logger.info( + "Repository information for purl %s: url %s, commit %s", + purl, + lookup_component_repository.remote_path, + lookup_component_repository.commit_sha, + ) try: - lookup_component_repository = lookup_repository(latest_component_id, session) - except QueryMacaronDatabaseError as lookup_repository_error: + build_tool_facts = lookup_build_tools_check( + component_id=latest_component_id, + session=session, + ) + except QueryMacaronDatabaseError as lookup_build_tools_error: logger.error( - "Unexpected result from querying repository information for %s. Error: %s", + "Unexpected result from querying build tools for %s. Error: %s", purl.to_string(), - lookup_repository_error, + lookup_build_tools_error, ) return None - if not lookup_component_repository: + if not build_tool_facts: logger.error( - "Cannot find any repository information for %s in the database.", + "Cannot find any build tool for PackageURL %s in the database.", purl.to_string(), ) return None + logger.info( + "Build tools discovered from the %s table: %s", + BuildToolFacts.__tablename__, + [fact.build_tool_name for fact in build_tool_facts], + ) try: - lookup_build_facts = lookup_any_build_command(latest_component_id, session) + lookup_build_command_infos = lookup_any_build_command(latest_component_id, session) except QueryMacaronDatabaseError as lookup_build_command_error: logger.error( "Unexpected result from querying all build command information for %s. Error: %s", @@ -122,11 +139,15 @@ def get_rc_internal_build_info( lookup_build_command_error, ) return None + logger.debug( + "Build command information discovered\n%s", + format_build_command_infos(lookup_build_command_infos), + ) return RcInternalBuildInfo( purl=purl, repository=lookup_component_repository, latest_component_id=latest_component_id, build_tool_facts=build_tool_facts, - generic_build_command_facts=lookup_build_facts, + generic_build_command_facts=lookup_build_command_infos, ) From 787e9b3bb169b36cd23eff6c8aeac73d3c2eac97 Mon Sep 17 00:00:00 2001 From: Trong Nhan Mai Date: Mon, 21 Jul 2025 17:07:32 +1000 Subject: [PATCH 14/27] chore: support gen-build-spec for the Docker image --- scripts/release_scripts/run_macaron.sh | 37 +++++++++++++++++++++++++- 1 file changed, 36 insertions(+), 1 deletion(-) diff --git a/scripts/release_scripts/run_macaron.sh b/scripts/release_scripts/run_macaron.sh index e9bdae191..65dd08954 100755 --- a/scripts/release_scripts/run_macaron.sh +++ b/scripts/release_scripts/run_macaron.sh @@ -283,7 +283,7 @@ while [[ $# -gt 0 ]]; do entrypoint+=("macaron") ;; # Parsing commands for macaron entrypoint. - analyze|dump-defaults|verify-policy) + analyze|dump-defaults|verify-policy|gen-build-spec) command=$1 shift break @@ -359,6 +359,19 @@ elif [[ $command == "verify-policy" ]]; then esac shift done +elif [[ $command == "gen-build-spec" ]]; then + while [[ $# -gt 0 ]]; do + case $1 in + -d|--database) + gen_build_spec_arg_database="$2" + shift + ;; + *) + rest_command+=("$1") + ;; + esac + shift + done elif [[ $command == "dump-defaults" ]]; then while [[ $# -gt 0 ]]; do case $1 in @@ -512,6 +525,28 @@ if [[ -n "${arg_datalog_policy_file:-}" ]]; then mount_file "-f/--file" "$datalog_policy_file" "$datalog_policy_file_in_container" "ro,Z" fi +# MACARON entrypoint - gen-build-spec command argvs +# This is for macaron gen-build-spec command. +# Determine the database path to be mounted into ${MACARON_WORKSPACE}/database/. +if [[ -n "${gen_build_spec_arg_database:-}" ]]; then + gen_build_spec_database_path="${gen_build_spec_arg_database}" + file_name="$(basename "${gen_build_spec_database_path}")" + gen_build_spec_database_path_in_container="${MACARON_WORKSPACE}/database/${file_name}" + + argv_command+=("--database" "$gen_build_spec_database_path_in_container") + mount_file "-d/--database" "$gen_build_spec_database_path" "$gen_build_spec_database_path_in_container" "rw,Z" +fi + +# Determine that ~/.gradle/gradle.properties exists to be mounted into ${MACARON_WORKSPACE}/gradle.properties +if [[ -f "$HOME/.gradle/gradle.properties" ]]; then + mounts+=("-v" "$HOME/.gradle/gradle.properties":"${MACARON_WORKSPACE}/gradle.properties:ro,Z") +fi + +# Determine that ~/.m2/settings.xml exists to be mounted into ${MACARON_WORKSPACE}/settings.xml +if [[ -f "$HOME/.m2/settings.xml" ]]; then + mounts+=("-v" "$HOME/.m2/settings.xml":"${MACARON_WORKSPACE}/settings.xml:ro,Z") +fi + # Set up proxy. # We respect the host machine's proxy environment variables. proxy_var_names=( From 7d548bb0e5a9cc59eb0126961466f4e45a6e1237 Mon Sep 17 00:00:00 2001 From: Trong Nhan Mai Date: Tue, 5 Aug 2025 10:54:37 +1000 Subject: [PATCH 15/27] fix: use the correct analysis_id foreign key to map with Analaysis in the get latest component for purl select statement --- src/macaron/build_spec_generator/macaron_db_extractor.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/macaron/build_spec_generator/macaron_db_extractor.py b/src/macaron/build_spec_generator/macaron_db_extractor.py index 540422b64..1532265dd 100644 --- a/src/macaron/build_spec_generator/macaron_db_extractor.py +++ b/src/macaron/build_spec_generator/macaron_db_extractor.py @@ -160,7 +160,7 @@ def get_sql_stmt_latest_component_for_purl(purl: PackageURL) -> Select[tuple[Com .select_from(Component) .join( Analysis, - onclause=Component.id == Analysis.id, + onclause=Component.analysis_id == Analysis.id, ) .where(Component.purl == purl.to_string()) .order_by( From 86adc3b243f99f8d1cb21c680286aebf00ea8300 Mon Sep 17 00:00:00 2001 From: Trong Nhan Mai Date: Wed, 6 Aug 2025 11:27:59 +1000 Subject: [PATCH 16/27] chore: simplify the looking up component information by getting the ID and repository from the lookup Component object instead of having their own SELECT query --- .../macaron_db_extractor.py | 78 ++-------- .../reproducible_central/rc_build_info.py | 29 ++-- .../test_macaron_db_extractor.py | 135 +++++++++++++----- 3 files changed, 117 insertions(+), 125 deletions(-) diff --git a/src/macaron/build_spec_generator/macaron_db_extractor.py b/src/macaron/build_spec_generator/macaron_db_extractor.py index 1532265dd..a41a16870 100644 --- a/src/macaron/build_spec_generator/macaron_db_extractor.py +++ b/src/macaron/build_spec_generator/macaron_db_extractor.py @@ -15,7 +15,7 @@ from sqlalchemy.exc import MultipleResultsFound, SQLAlchemyError from sqlalchemy.orm import Session, aliased -from macaron.database.table_definitions import Analysis, CheckFacts, Component, MappedCheckResult, Repository +from macaron.database.table_definitions import Analysis, CheckFacts, Component, MappedCheckResult from macaron.errors import QueryMacaronDatabaseError from macaron.slsa_analyzer.checks.build_as_code_check import BuildAsCodeFacts from macaron.slsa_analyzer.checks.build_script_check import BuildScriptFacts @@ -353,32 +353,8 @@ def get_sql_stmt_build_script_check(component_id: int) -> Select[tuple[BuildScri ) -def get_sql_stmt_repository(component_id: int) -> Select[tuple[Repository]]: - """Return an SQLAlchemy SELECT statement to query the Repository for a given PackageURL. - - Parameters - ---------- - purl_string : str - The PackageURL string to find the Repository. - - Returns - ------- - Select[tuple[Repository]] - The SQLAlchemy SELECT statement. - """ - return ( - select(Repository) - .select_from(Component) - .join( - Repository, - onclause=Component.id == Repository.component_id, - ) - .where(Component.id == component_id) - ) - - -def lookup_latest_component_id(purl: PackageURL, session: Session) -> int | None: - """Return the component id of the latest analysis that matches a given PackageURL string. +def lookup_latest_component(purl: PackageURL, session: Session) -> Component | None: + """Return the component of the latest analysis that matches a given PackageURL string. Parameters ---------- @@ -389,29 +365,29 @@ def lookup_latest_component_id(purl: PackageURL, session: Session) -> int | None Returns ------- - int | None - The latest component id or None if there isn't one available in the database. + Component | None + The latest component or None if there isn't one available in the database. Raises ------ QueryMacaronDatabaseError If there is an unexpected error when executing the SQLAlchemy query. """ - latest_component_id_stmt = get_sql_stmt_latest_component_for_purl(purl) - logger.debug("Latest Analysis and Component query \n %s", compile_sqlite_select_statement(latest_component_id_stmt)) + latest_component_stmt = get_sql_stmt_latest_component_for_purl(purl) + logger.debug("Latest Analysis and Component query \n %s", compile_sqlite_select_statement(latest_component_stmt)) try: - component_results = session.execute(latest_component_id_stmt) + component_results = session.execute(latest_component_stmt) except SQLAlchemyError as generic_exec_error: raise QueryMacaronDatabaseError( - f"Critical: unexpected error when execute query {compile_sqlite_select_statement(latest_component_id_stmt)}." + f"Critical: unexpected error when execute query {compile_sqlite_select_statement(latest_component_stmt)}." ) from generic_exec_error latest_component = component_results.scalars().first() if not latest_component: return None - return latest_component.id + return latest_component def lookup_build_tools_check(component_id: int, session: Session) -> Sequence[BuildToolFacts]: @@ -687,37 +663,3 @@ def lookup_any_build_command(component_id: int, session: Session) -> list[Generi error, ) return [] - - -def lookup_repository(component_id: int, session: Session) -> Repository | None: - """Return the Repository instance for given PackageURL string. - - Parameters - ---------- - component_id : int - The component id to look for the Repository. - session : Session - The SQLAlcemy Session that connects to the Macaron database. - - Returns - ------- - Repository - The Repository instances obtained from querying the database. - - Raises - ------ - QueryMacaronDatabaseError - If the query result from the database contains more than one Repository instance, - or there is an unexpected error when executing the SQLAlchemy query. - """ - repository_select_statement = get_sql_stmt_repository(component_id) - logger.debug( - "Repository for component %d \n %s.", component_id, compile_sqlite_select_statement(repository_select_statement) - ) - - repository_result = lookup_one_or_none( - select_statement=repository_select_statement, - session=session, - ) - - return repository_result diff --git a/src/macaron/build_spec_generator/reproducible_central/rc_build_info.py b/src/macaron/build_spec_generator/reproducible_central/rc_build_info.py index 105f10e45..8e33f0313 100644 --- a/src/macaron/build_spec_generator/reproducible_central/rc_build_info.py +++ b/src/macaron/build_spec_generator/reproducible_central/rc_build_info.py @@ -15,8 +15,7 @@ GenericBuildCommandInfo, lookup_any_build_command, lookup_build_tools_check, - lookup_latest_component_id, - lookup_repository, + lookup_latest_component, ) from macaron.database.table_definitions import Repository from macaron.errors import QueryMacaronDatabaseError @@ -64,36 +63,30 @@ def get_rc_internal_build_info( An instance of ``RcInternalBuildInfo`` or None if there was an error. """ try: - latest_component_id = lookup_latest_component_id( + latest_component = lookup_latest_component( purl=purl, session=session, ) except QueryMacaronDatabaseError as lookup_component_error: logger.error( - "Unexpected result from querying latest component id for %s. Error: %s", + "Unexpected result from querying latest component for %s. Error: %s", purl.to_string(), lookup_component_error, ) return None - if not latest_component_id: + if not latest_component: logger.error( "Cannot find an analysis result for PackageURL %s in the database. " + "Please check if an analysis for it exists in the database.", purl.to_string(), ) return None + + latest_component_id = latest_component.id logger.debug("Latest component ID: %d", latest_component_id) - try: - lookup_component_repository = lookup_repository(latest_component_id, session) - except QueryMacaronDatabaseError as lookup_repository_error: - logger.error( - "Unexpected result from querying repository information for %s. Error: %s", - purl.to_string(), - lookup_repository_error, - ) - return None - if not lookup_component_repository: + latest_component_repository = latest_component.repository + if not latest_component_repository: logger.error( "Cannot find any repository information for %s in the database.", purl.to_string(), @@ -102,8 +95,8 @@ def get_rc_internal_build_info( logger.info( "Repository information for purl %s: url %s, commit %s", purl, - lookup_component_repository.remote_path, - lookup_component_repository.commit_sha, + latest_component_repository.remote_path, + latest_component_repository.commit_sha, ) try: @@ -146,7 +139,7 @@ def get_rc_internal_build_info( return RcInternalBuildInfo( purl=purl, - repository=lookup_component_repository, + repository=latest_component_repository, latest_component_id=latest_component_id, build_tool_facts=build_tool_facts, generic_build_command_facts=lookup_build_command_infos, diff --git a/tests/build_spec_generator/test_macaron_db_extractor.py b/tests/build_spec_generator/test_macaron_db_extractor.py index 1c7f3c4bd..9919a0ca7 100644 --- a/tests/build_spec_generator/test_macaron_db_extractor.py +++ b/tests/build_spec_generator/test_macaron_db_extractor.py @@ -15,13 +15,18 @@ from macaron import __version__ from macaron.build_spec_generator.macaron_db_extractor import ( QueryMacaronDatabaseError, - Repository, lookup_any_build_command, lookup_build_tools_check, - lookup_latest_component_id, - lookup_repository, + lookup_latest_component, +) +from macaron.database.table_definitions import ( + Analysis, + CommitFinderInfo, + Component, + ORMBase, + RepoFinderMetadata, + Repository, ) -from macaron.database.table_definitions import Analysis, CommitFinderInfo, Component, ORMBase, RepoFinderMetadata from macaron.repo_finder.repo_finder import RepoFinderInfo # pylint: disable=redefined-outer-name @@ -62,57 +67,97 @@ def invalid_db_session() -> Generator[Session, Any, None]: @pytest.mark.parametrize( - ("input_data", "query_purl_string", "expect_result"), + ("input_data", "query_purl_string", "expect_id"), [ - pytest.param( - [], - "pkg:maven/oracle/macaron@0.16.0", - None, - id="The database is empty.", - ), pytest.param( [ ( datetime(year=2025, month=5, day=6, hour=10, minute=30, second=30, tzinfo=timezone.utc), - "pkg:maven/boo/foo@0.2.0", + "pkg:maven/oracle/macaron@0.16.0", ), ( datetime(year=2025, month=5, day=6, hour=10, minute=30, second=30, tzinfo=timezone.utc), - "pkg:maven/boo/boohoo@1.0", + "pkg:maven/boo/foo@0.1.0", + ), + ( + datetime(year=2025, month=5, day=6, hour=10, minute=30, second=30, tzinfo=timezone.utc), + "pkg:maven/oracle/macaron@0.16.0", ), ], "pkg:maven/oracle/macaron@0.16.0", - None, - id="The database is not empty, but no component matches the query PackageURL string.", + 3, + id="When two analyses of the same PURL has the same timestamp, the component id of the latest analysis is returned.", + ), + ], +) +def test_lookup_latest_component( + macaron_db_session: Session, + input_data: list[tuple[datetime, str]], + query_purl_string: str, + expect_id: int | None, +) -> None: + """Test the lookup_latest_component function.""" + for utc_timestamp, purl_string in input_data: + analysis = Analysis( + analysis_time=utc_timestamp, + macaron_version=__version__, + ) + + repo_finder_metadata = RepoFinderMetadata( + repo_finder_outcome=RepoFinderInfo.NOT_USED, + commit_finder_outcome=CommitFinderInfo.NOT_USED, + found_url="", + found_commit="", + ) + + _ = Component( + purl=purl_string, + analysis=analysis, + repository=None, + repo_finder_metadata=repo_finder_metadata, + ) + + macaron_db_session.add(analysis) + + macaron_db_session.commit() + latest_component = lookup_latest_component( + PackageURL.from_string(query_purl_string), + macaron_db_session, + ) + assert latest_component + assert latest_component.id == expect_id + + +@pytest.mark.parametrize( + ("input_data", "query_purl_string"), + [ + pytest.param( + [], + "pkg:maven/oracle/macaron@0.16.0", + id="The database is empty.", ), pytest.param( [ ( datetime(year=2025, month=5, day=6, hour=10, minute=30, second=30, tzinfo=timezone.utc), - "pkg:maven/oracle/macaron@0.16.0", - ), - ( - datetime(year=2025, month=5, day=6, hour=10, minute=30, second=30, tzinfo=timezone.utc), - "pkg:maven/boo/foo@0.1.0", + "pkg:maven/boo/foo@0.2.0", ), ( datetime(year=2025, month=5, day=6, hour=10, minute=30, second=30, tzinfo=timezone.utc), - "pkg:maven/oracle/macaron@0.16.0", + "pkg:maven/boo/boohoo@1.0", ), ], "pkg:maven/oracle/macaron@0.16.0", - 3, - id="When two analyses of the same PURL has the same timestamp, the component id of the latest analysis is returned.", + id="The database is not empty, but no component matches the query PackageURL string.", ), ], ) -def test_lookup_latest_component_id( +def test_lookup_latest_component_empty_db( macaron_db_session: Session, input_data: list[tuple[datetime, str]], query_purl_string: str, - expect_result: int | None, ) -> None: - """Test the lookup_latest_component_id function.""" + """Test the lookup_latest_component function with empty database.""" for utc_timestamp, purl_string in input_data: analysis = Analysis( analysis_time=utc_timestamp, @@ -136,16 +181,15 @@ def test_lookup_latest_component_id( macaron_db_session.add(analysis) macaron_db_session.commit() - assert lookup_latest_component_id(PackageURL.from_string(query_purl_string), macaron_db_session) == expect_result - - -def test_lookup_repository_empty_db(macaron_db_session: Session) -> None: - """Test the lookup_repository function.""" - assert not lookup_repository(1, macaron_db_session) + latest_component = lookup_latest_component( + PackageURL.from_string(query_purl_string), + macaron_db_session, + ) + assert not latest_component -def test_lookup_repository(macaron_db_session: Session) -> None: - """Test the lookup_repository function.""" +def test_repository_information_from_latest_component(macaron_db_session: Session) -> None: + """Test getting the repository information from looking up a latest component.""" analysis = Analysis( analysis_time=datetime(year=2025, month=5, day=6, hour=10, minute=30, second=30, tzinfo=timezone.utc), macaron_version=__version__, @@ -193,8 +237,21 @@ def test_lookup_repository(macaron_db_session: Session) -> None: macaron_db_session.add(analysis) macaron_db_session.commit() - assert not lookup_repository(component_without_repo.id, macaron_db_session) - lookup_repo = lookup_repository(component_with_repo.id, macaron_db_session) + latest_component_no_repo = lookup_latest_component( + PackageURL.from_string(component_without_repo.purl), + macaron_db_session, + ) + assert latest_component_no_repo + assert latest_component_no_repo.id == component_without_repo.id + assert not latest_component_no_repo.repository + + latest_component_with_repo = lookup_latest_component( + PackageURL.from_string(component_with_repo.purl), + macaron_db_session, + ) + assert latest_component_with_repo + assert latest_component_with_repo.id == component_with_repo.id + lookup_repo = latest_component_with_repo.repository assert lookup_repo assert lookup_repo.remote_path == "https://github.com/oracle/macaron" assert lookup_repo.commit_sha == "d2b95262091d6572cc12dcda57d89f9cd44ac88b" @@ -220,13 +277,13 @@ def test_invalid_input_databse(invalid_db_session: Session) -> None: ) with pytest.raises(QueryMacaronDatabaseError): - lookup_repository( - component_id=1, + lookup_latest_component( + purl=PackageURL.from_string("pkg:maven/oracle/macaron@0.16.0"), session=invalid_db_session, ) with pytest.raises(QueryMacaronDatabaseError): - lookup_latest_component_id( + lookup_latest_component( purl=PackageURL.from_string("pkg:maven/oracle/macaron@0.16.0"), session=invalid_db_session, ) From c14d8dcd1c957832e182911b4c41a1e2e7bc325b Mon Sep 17 00:00:00 2001 From: Trong Nhan Mai Date: Wed, 6 Aug 2025 20:09:14 +1000 Subject: [PATCH 17/27] refactor: refactor the function to get the purl-based directory path to a separated module and add tests for it --- src/macaron/database/table_definitions.py | 16 +++--- src/macaron/path_utils/__init__.py | 2 + src/macaron/path_utils/purl_based_path.py | 44 +++++++++++++++ tests/path_utils/__init__.py | 2 + tests/path_utils/test_purl_based_path.py | 65 +++++++++++++++++++++++ 5 files changed, 119 insertions(+), 10 deletions(-) create mode 100644 src/macaron/path_utils/__init__.py create mode 100644 src/macaron/path_utils/purl_based_path.py create mode 100644 tests/path_utils/__init__.py create mode 100644 tests/path_utils/test_purl_based_path.py diff --git a/src/macaron/database/table_definitions.py b/src/macaron/database/table_definitions.py index 72ef57b87..6414555c2 100644 --- a/src/macaron/database/table_definitions.py +++ b/src/macaron/database/table_definitions.py @@ -11,7 +11,6 @@ For table associated with a check see the check module. """ import logging -import os import string from datetime import datetime from pathlib import Path @@ -36,6 +35,7 @@ from macaron.database.database_manager import ORMBase from macaron.database.db_custom_types import ProvenancePayload, RFC3339DateTime from macaron.errors import InvalidPURLError +from macaron.path_utils.purl_based_path import get_purl_based_dir from macaron.repo_finder.repo_finder_enums import CommitFinderInfo, RepoFinderInfo from macaron.slsa_analyzer.provenance.intoto import InTotoPayload, ProvenanceSubjectPURLMatcher from macaron.slsa_analyzer.slsa_req import ReqName @@ -256,15 +256,11 @@ def report_dir_name(self) -> str: str The report directory name. """ - # Sanitize the path and make sure it's a valid file name. - # A purl string is an ASCII URL string that can allow uppercase letters for - # certain parts. So we shouldn't change uppercase letters with lower case - # to avoid merging results for two distinct PURL strings. - allowed_chars = string.ascii_letters + string.digits + "-" - p_type = "".join(c if c in allowed_chars else "_" for c in self.type) - p_namespace = "".join(c if c in allowed_chars else "_" for c in self.namespace) if self.namespace else "" - p_name = "".join(c if c in allowed_chars else "_" for c in self.name) - return os.path.join(p_type, p_namespace, p_name) + return get_purl_based_dir( + purl_name=self.name, + purl_namespace=self.namespace, + purl_type=self.type, + ) class Repository(ORMBase): diff --git a/src/macaron/path_utils/__init__.py b/src/macaron/path_utils/__init__.py new file mode 100644 index 000000000..8e17a3508 --- /dev/null +++ b/src/macaron/path_utils/__init__.py @@ -0,0 +1,2 @@ +# Copyright (c) 2025 - 2025, Oracle and/or its affiliates. All rights reserved. +# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. diff --git a/src/macaron/path_utils/purl_based_path.py b/src/macaron/path_utils/purl_based_path.py new file mode 100644 index 000000000..201c8d529 --- /dev/null +++ b/src/macaron/path_utils/purl_based_path.py @@ -0,0 +1,44 @@ +# Copyright (c) 2025 - 2025, Oracle and/or its affiliates. All rights reserved. +# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. + +"""This module contains functions to manage PackageURL-based paths.""" + +import os +import string + + +def get_purl_based_dir( + purl_type: str, + purl_name: str, + purl_namespace: str | None = None, +) -> str: + """Return a directory path according to components of a PackageURL. + + Parameters + ---------- + purl_type: str + The type component of the PackageURL as string. + purl_name:str + The name component of the PackageURL as string. + purl_namespace: str | None = None + The namespace component of the PackageURL as string (optional). + + Returns + ------- + str + The directory path. + + Examples + -------- + >>> get_purl_based_dir(purl_type="maven", purl_name="macaron", purl_namespace="oracle") + 'maven/oracle/macaron' + """ + # Sanitize the path and make sure it's a valid file name. + # A purl string is an ASCII URL string that can allow uppercase letters for + # certain parts. So we shouldn't change uppercase letters with lower case + # to avoid merging results for two distinct PURL strings. + allowed_chars = string.ascii_letters + string.digits + "-" + p_type = "".join(c if c in allowed_chars else "_" for c in purl_type) + p_namespace = "".join(c if c in allowed_chars else "_" for c in purl_namespace) if purl_namespace else "" + p_name = "".join(c if c in allowed_chars else "_" for c in purl_name) + return os.path.join(p_type, p_namespace, p_name) diff --git a/tests/path_utils/__init__.py b/tests/path_utils/__init__.py new file mode 100644 index 000000000..8e17a3508 --- /dev/null +++ b/tests/path_utils/__init__.py @@ -0,0 +1,2 @@ +# Copyright (c) 2025 - 2025, Oracle and/or its affiliates. All rights reserved. +# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. diff --git a/tests/path_utils/test_purl_based_path.py b/tests/path_utils/test_purl_based_path.py new file mode 100644 index 000000000..24b8cab7a --- /dev/null +++ b/tests/path_utils/test_purl_based_path.py @@ -0,0 +1,65 @@ +# Copyright (c) 2025 - 2025, Oracle and/or its affiliates. All rights reserved. +# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. + +"""This module tests the purl_based_path module.""" + +import pytest + +from macaron.path_utils.purl_based_path import get_purl_based_dir + + +@pytest.mark.parametrize( + ("purl_type", "purl_namespace", "purl_name", "expected"), + [ + pytest.param( + "maven", + "oracle", + "macaron", + "maven/oracle/macaron", + id="simple_case_with_no_special_characters", + ), + pytest.param( + "maven", + None, + "macaron", + "maven/macaron", + id="no_namespace", + ), + pytest.param( + "maven", + "boo#bar", + "macaron@oracle", + "maven/boo_bar/macaron_oracle", + id="handle_non_allow_chars", + ), + pytest.param( + "maven", + "boo123bar", + "macaron123oracle", + "maven/boo123bar/macaron123oracle", + id="digits_are_allowed", + ), + pytest.param( + "maven", + "boo-bar", + "macaron-oracle", + "maven/boo-bar/macaron-oracle", + id="dashes_are_allowed", + ), + ], +) +def test_get_purl_based_dir( + purl_type: str, + purl_namespace: str, + purl_name: str, + expected: str, +) -> None: + """Test the get_purl_based_dir function.""" + assert ( + get_purl_based_dir( + purl_type=purl_type, + purl_name=purl_name, + purl_namespace=purl_namespace, + ) + == expected + ) From 2bfc3b2ebe0995a12e9f146242d69351148a59dc Mon Sep 17 00:00:00 2001 From: Trong Nhan Mai Date: Wed, 6 Aug 2025 20:30:07 +1000 Subject: [PATCH 18/27] test: improve test_macaron_db_extractor test module --- .../test_macaron_db_extractor.py | 28 ++++++++++++++++--- 1 file changed, 24 insertions(+), 4 deletions(-) diff --git a/tests/build_spec_generator/test_macaron_db_extractor.py b/tests/build_spec_generator/test_macaron_db_extractor.py index 9919a0ca7..c2c750083 100644 --- a/tests/build_spec_generator/test_macaron_db_extractor.py +++ b/tests/build_spec_generator/test_macaron_db_extractor.py @@ -86,7 +86,26 @@ def invalid_db_session() -> Generator[Session, Any, None]: ], "pkg:maven/oracle/macaron@0.16.0", 3, - id="When two analyses of the same PURL has the same timestamp, the component id of the latest analysis is returned.", + id="two_analysis_on_the_same_purl_with_same_timestamp", + ), + pytest.param( + [ + ( + datetime(year=2025, month=5, day=6, hour=10, minute=30, second=30, tzinfo=timezone.utc), + "pkg:maven/oracle/macaron@0.16.0", + ), + ( + datetime(year=2025, month=12, day=6, hour=10, minute=30, second=30, tzinfo=timezone.utc), + "pkg:maven/oracle/macaron@0.16.0", + ), + ( + datetime(year=2025, month=5, day=6, hour=10, minute=30, second=30, tzinfo=timezone.utc), + "pkg:maven/boo/foo@0.1.0", + ), + ], + "pkg:maven/oracle/macaron@0.16.0", + 2, + id="two_analysis_on_the_same_purl_with_different_timestamp", ), ], ) @@ -94,7 +113,7 @@ def test_lookup_latest_component( macaron_db_session: Session, input_data: list[tuple[datetime, str]], query_purl_string: str, - expect_id: int | None, + expect_id: int, ) -> None: """Test the lookup_latest_component function.""" for utc_timestamp, purl_string in input_data: @@ -125,6 +144,7 @@ def test_lookup_latest_component( macaron_db_session, ) assert latest_component + assert latest_component.purl == query_purl_string assert latest_component.id == expect_id @@ -134,7 +154,7 @@ def test_lookup_latest_component( pytest.param( [], "pkg:maven/oracle/macaron@0.16.0", - id="The database is empty.", + id="empty_database", ), pytest.param( [ @@ -148,7 +168,7 @@ def test_lookup_latest_component( ), ], "pkg:maven/oracle/macaron@0.16.0", - id="The database is not empty, but no component matches the query PackageURL string.", + id="no_component_matched_the_input_purl", ), ], ) From 543246ef3e042407c9f64967514984d65d8a7f2d Mon Sep 17 00:00:00 2001 From: Trong Nhan Mai Date: Wed, 6 Aug 2025 22:00:54 +1000 Subject: [PATCH 19/27] feat: generate build spec into a purl-based path in the output directory --- src/macaron/__main__.py | 29 +--------- .../build_spec_generator.py | 56 +++++++++++++++++-- .../test.yaml | 2 +- .../test.yaml | 2 +- 4 files changed, 56 insertions(+), 33 deletions(-) diff --git a/src/macaron/__main__.py b/src/macaron/__main__.py index d27e661e9..d1180d9bb 100644 --- a/src/macaron/__main__.py +++ b/src/macaron/__main__.py @@ -16,7 +16,7 @@ import macaron from macaron.build_spec_generator.build_spec_generator import ( BuildSpecFormat, - gen_build_spec_str, + gen_build_spec_for_purl, ) from macaron.config.defaults import create_defaults, load_defaults from macaron.config.global_config import global_config @@ -272,36 +272,13 @@ def gen_build_spec(gen_build_spec_args: argparse.Namespace) -> int: gen_build_spec_args.database, ) - build_spec_content = gen_build_spec_str( + return gen_build_spec_for_purl( purl=purl, database_path=gen_build_spec_args.database, build_spec_format=build_spec_format, + output_path=global_config.output_path, ) - if not build_spec_content: - logger.error("Error while generate reproducible central build spec.") - return os.EX_DATAERR - - logger.debug("Build spec content: \n%s", build_spec_content) - build_spec_filepath = os.path.join(global_config.output_path, "macaron.buildspec") - try: - with open(build_spec_filepath, mode="w", encoding="utf-8") as file: - logger.info( - "Generating the %s format build spec to %s.", - build_spec_format.value, - os.path.relpath(build_spec_filepath, os.getcwd()), - ) - file.write(build_spec_content) - except OSError as error: - logger.error( - "Could not generate the Buildspec to %s. Error: %s", - os.path.relpath(build_spec_filepath, os.getcwd()), - error, - ) - return os.EX_DATAERR - - return os.EX_OK - def find_source(find_args: argparse.Namespace) -> int: """Perform repo and commit finding for a passed PURL, or commit finding for a passed PURL and repo.""" diff --git a/src/macaron/build_spec_generator/build_spec_generator.py b/src/macaron/build_spec_generator/build_spec_generator.py index 0868f19a4..265871a94 100644 --- a/src/macaron/build_spec_generator/build_spec_generator.py +++ b/src/macaron/build_spec_generator/build_spec_generator.py @@ -4,6 +4,7 @@ """This module contains the functions used for generating build specs from the Macaron database.""" import logging +import os from collections.abc import Mapping from enum import Enum @@ -13,6 +14,7 @@ from macaron.build_spec_generator.build_command_patcher import PatchCommandBuildTool, PatchValueType from macaron.build_spec_generator.reproducible_central.reproducible_central import gen_reproducible_central_build_spec +from macaron.path_utils.purl_based_path import get_purl_based_dir logger: logging.Logger = logging.getLogger(__name__) @@ -63,11 +65,12 @@ class BuildSpecFormat(str, Enum): } -def gen_build_spec_str( +def gen_build_spec_for_purl( purl: PackageURL, database_path: str, build_spec_format: BuildSpecFormat, -) -> str | None: + output_path: str, +) -> int: """Return the content of a build spec file from a given PURL. Parameters @@ -81,16 +84,59 @@ def gen_build_spec_str( Returns ------- - str | None - The build spec content as a string, or None if there is an error. + int + The exit code for this function. ``os.EX_OK`` if everything is fine, ``os.EX_OSERR`` if the + buildspec file cannot be created in the local filesystem, ``os.EX_DATAERR`` if there was an + error in generate the content for the buildspec file. """ db_engine = create_engine(f"sqlite+pysqlite:///{database_path}", echo=False) with Session(db_engine) as session, session.begin(): + build_spec_content = None match build_spec_format: case BuildSpecFormat.REPRODUCIBLE_CENTRAL: - return gen_reproducible_central_build_spec( + build_spec_content = gen_reproducible_central_build_spec( purl=purl, session=session, patches=CLI_COMMAND_PATCHES, ) + + if not build_spec_content: + logger.error("Error while generate reproducible central build spec.") + return os.EX_DATAERR + + logger.debug("Build spec content: \n%s", build_spec_content) + + build_spec_filepath = os.path.join( + output_path, + "buildspec", + get_purl_based_dir( + purl_name=purl.name, + purl_namespace=purl.namespace, + purl_type=purl.type, + ), + "macaron.buildspec", + ) + + os.makedirs( + name=os.path.dirname(build_spec_filepath), + exist_ok=True, + ) + + try: + with open(build_spec_filepath, mode="w", encoding="utf-8") as file: + logger.info( + "Generating the %s format build spec to %s.", + build_spec_format.value, + os.path.relpath(build_spec_filepath, os.getcwd()), + ) + file.write(build_spec_content) + except OSError as error: + logger.error( + "Could not generate the Buildspec to %s. Error: %s", + os.path.relpath(build_spec_filepath, os.getcwd()), + error, + ) + return os.EX_OSERR + + return os.EX_OK diff --git a/tests/integration/cases/behnazh-w_example-maven-app_gen_rc_build_spec/test.yaml b/tests/integration/cases/behnazh-w_example-maven-app_gen_rc_build_spec/test.yaml index 161beeb08..1843e1f20 100644 --- a/tests/integration/cases/behnazh-w_example-maven-app_gen_rc_build_spec/test.yaml +++ b/tests/integration/cases/behnazh-w_example-maven-app_gen_rc_build_spec/test.yaml @@ -29,5 +29,5 @@ steps: kind: compare options: kind: rc_build_spec - result: ./output/macaron.buildspec + result: ./output/buildspec/github/behnazh-w/example-maven-app/macaron.buildspec expected: expected_macaron.buildspec diff --git a/tests/integration/cases/micronaut-projects_micronaut-core/test.yaml b/tests/integration/cases/micronaut-projects_micronaut-core/test.yaml index 7855b1be2..26361681d 100644 --- a/tests/integration/cases/micronaut-projects_micronaut-core/test.yaml +++ b/tests/integration/cases/micronaut-projects_micronaut-core/test.yaml @@ -44,5 +44,5 @@ steps: kind: compare options: kind: rc_build_spec - result: ./output/macaron.buildspec + result: ./output/buildspec/maven/io_micronaut/micronaut-core/macaron.buildspec expected: expected_macaron.buildspec From 5a896393780f897e84befe9ca28c09161a93378f Mon Sep 17 00:00:00 2001 From: Trong Nhan Mai Date: Thu, 7 Aug 2025 00:01:09 +1000 Subject: [PATCH 20/27] feat: always prioritize jdk version obtained from JAR from maven central --- .../reproducible_central.py | 91 +++++++++++-------- .../test_reproducible_central.py | 66 ++++++++++++++ 2 files changed, 120 insertions(+), 37 deletions(-) diff --git a/src/macaron/build_spec_generator/reproducible_central/reproducible_central.py b/src/macaron/build_spec_generator/reproducible_central/reproducible_central.py index 46995f5a0..5e95d0c75 100644 --- a/src/macaron/build_spec_generator/reproducible_central/reproducible_central.py +++ b/src/macaron/build_spec_generator/reproducible_central/reproducible_central.py @@ -121,7 +121,7 @@ def _get_build_command_sequence(cmds_sequence: list[list[str]]) -> str: def _get_default_build_command_sequence( - macaron_build_tool_name: _MacaronBuildToolName, + rc_build_tool_name: _ReproducibleCentralBuildToolName, patches: Mapping[ PatchCommandBuildTool, Mapping[str, PatchValueType | None], @@ -129,16 +129,19 @@ def _get_default_build_command_sequence( ) -> list[list[str]] | None: """Return a default build command sequence for the corresponding build tool name discovered by Macaron.""" default_build_command = None - if macaron_build_tool_name == _MacaronBuildToolName.MAVEN: - default_build_command = "mvn clean package" - if macaron_build_tool_name == _MacaronBuildToolName.GRADLE: - default_build_command = "./gradlew clean assemble publishToMavenLocal" + match rc_build_tool_name: + case _ReproducibleCentralBuildToolName.MAVEN: + default_build_command = "mvn clean package" + case _ReproducibleCentralBuildToolName.GRADLE: + default_build_command = "./gradlew clean assemble publishToMavenLocal" + case _: + pass if not default_build_command: logger.critical( - "The default build command %s is not supported for getting default build command.", - macaron_build_tool_name, + "There is no default build command available for RC build tool %s.", + rc_build_tool_name, ) return None @@ -166,12 +169,27 @@ def _get_macaron_build_tool_name(build_tool_facts: Sequence[BuildToolFacts]) -> except ValueError: continue - # TODO: What happen if we report multiple build tool in the database. + # TODO: What happen if we report multiple build tools in the database. return macaron_build_tool_name return None +def _get_rc_build_tool_name( + build_tool_facts: Sequence[BuildToolFacts], +) -> _ReproducibleCentralBuildToolName | None: + """Return the build tool name to be put into the RC buildspec.""" + macaron_build_tool_name = _get_macaron_build_tool_name(build_tool_facts) + if not macaron_build_tool_name: + return None + + match macaron_build_tool_name: + case _MacaronBuildToolName.MAVEN: + return _ReproducibleCentralBuildToolName.MAVEN + case _MacaronBuildToolName.GRADLE: + return _ReproducibleCentralBuildToolName.GRADLE + + def _gen_reproducible_central_build_spec( build_info: RcInternalBuildInfo, patches: Mapping[ @@ -201,6 +219,7 @@ def _gen_reproducible_central_build_spec( pformat(patches), ) + # Getting groupid, artifactid and version from PURL. group = purl.namespace artifact = purl.name version = purl.version @@ -210,29 +229,20 @@ def _gen_reproducible_central_build_spec( extra_comments.append(f"Input PURL - {purl}") - macaron_build_tool_name = _get_macaron_build_tool_name(build_info.build_tool_facts) - if not macaron_build_tool_name: + # Getting the RC build tool name from the build tool check facts. + rc_build_tool_name = _get_rc_build_tool_name(build_info.build_tool_facts) + if not rc_build_tool_name: logger.error( - "The PackageURL %s doesn't have any build tool that we support for generating RC buildspec. It has %s.", - purl.to_string(), + "The Component doesn't have any build tool that we support for generating RC buildspec. It has %s.", [(fact.build_tool_name, fact.language) for fact in build_info.build_tool_facts], ) return None - rc_build_tool_name = None - if macaron_build_tool_name == _MacaronBuildToolName.MAVEN: - rc_build_tool_name = _ReproducibleCentralBuildToolName.MAVEN - elif macaron_build_tool_name == _MacaronBuildToolName.GRADLE: - rc_build_tool_name = _ReproducibleCentralBuildToolName.GRADLE - if not rc_build_tool_name: - logger.critical("%s is not supported to generate RC's buildspec.", macaron_build_tool_name.value) - return None - # Set the default build command and jdk version. # The default build command depends on the build tool, while the default jdk version # is 8. final_build_command_seq = _get_default_build_command_sequence( - macaron_build_tool_name=macaron_build_tool_name, + rc_build_tool_name=rc_build_tool_name, patches=patches, ) if not final_build_command_seq: @@ -246,6 +256,22 @@ def _gen_reproducible_central_build_spec( f"Initial default JDK version {final_jdk_version} and default build command {final_build_command_seq}." ) + # We always attempt to get the JDK version from maven central JAR for this GAV artifact. + jdk_from_jar = find_jdk_version_from_central_maven_repo( + group_id=purl.name, + artifact_id=group, + version=version, + ) + if jdk_from_jar: + extra_comments.append(f"Use JDK version from jar {jdk_from_jar}.") + final_jdk_version = jdk_from_jar + else: + extra_comments.append(f"No JDK version found from jar {jdk_from_jar}.") + + # If there is a build command available from the database, patch and use it in the final + # buildspec. + # If we couldn't find a JDK version from Maven Central JAR, we use the language_version + # of the build command with the highest confidence score (if available). if build_info.generic_build_command_facts: # The elements are ordered in decreasing confidence score. We pick the highest one. build_fact = build_info.generic_build_command_facts[0] @@ -265,23 +291,14 @@ def _gen_reproducible_central_build_spec( final_build_command_seq = patched_build_commands - lookup_jdk_vers = build_fact.language_versions - if lookup_jdk_vers: - lookup_jdk_ver = lookup_jdk_vers.pop() - extra_comments.append(f"Jdk version from lookup build command {lookup_jdk_ver}.") + if not jdk_from_jar and build_fact.language_versions: + # We pop the last element without any concrete reason, and we haven't had any issue + # with it so far. + lookup_jdk_ver = build_fact.language_versions.pop() + extra_comments.append(f"Use Jdk version from lookup build command {lookup_jdk_ver}.") final_jdk_version = lookup_jdk_ver else: - extra_comments.append("No JDK version found from lookup result.") - jdk_from_jar = find_jdk_version_from_central_maven_repo( - group_id=purl.name, - artifact_id=group, - version=version, - ) - if jdk_from_jar: - extra_comments.append(f"Found JDK version from jar {jdk_from_jar}.") - final_jdk_version = jdk_from_jar - else: - extra_comments.append(f"No JDK version found from jar {jdk_from_jar}.") + extra_comments.append("No JDK version used from lookup result.") major_jdk_version = normalize_jdk_version(final_jdk_version) if not major_jdk_version: diff --git a/tests/build_spec_generator/reproducible_central/test_reproducible_central.py b/tests/build_spec_generator/reproducible_central/test_reproducible_central.py index 6197d60c7..11c687f42 100644 --- a/tests/build_spec_generator/reproducible_central/test_reproducible_central.py +++ b/tests/build_spec_generator/reproducible_central/test_reproducible_central.py @@ -8,7 +8,10 @@ from macaron.build_spec_generator.reproducible_central.reproducible_central import ( _get_build_command_sequence, _get_extra_comments, + _get_rc_build_tool_name, + _ReproducibleCentralBuildToolName, ) +from macaron.slsa_analyzer.checks.build_tool_check import BuildToolFacts @pytest.mark.parametrize( @@ -62,3 +65,66 @@ def test_get_build_command_sequence( ) -> None: """Test the _get_build_command_sequence function.""" assert _get_build_command_sequence(cmds_sequence) == expected + + +@pytest.mark.parametrize( + ("build_tool_facts", "expected"), + [ + pytest.param( + [ + BuildToolFacts( + language="python", + build_tool_name="pip", + ) + ], + None, + id="python_is_not_supported_for_rc", + ), + pytest.param( + [ + BuildToolFacts( + language="java", + build_tool_name="gradle", + ) + ], + _ReproducibleCentralBuildToolName.GRADLE, + id="build_tool_gradle", + ), + pytest.param( + [ + BuildToolFacts( + language="java", + build_tool_name="maven", + ) + ], + _ReproducibleCentralBuildToolName.MAVEN, + id="build_tool_maven", + ), + pytest.param( + [ + BuildToolFacts( + language="not_java", + build_tool_name="maven", + ) + ], + None, + id="java_is_the_only_supported_language", + ), + pytest.param( + [ + BuildToolFacts( + language="java", + build_tool_name="some_java_build_tool", + ) + ], + None, + id="test_unsupported_java_build_tool", + ), + ], +) +def test_get_rc_build_tool_name( + build_tool_facts: list[BuildToolFacts], + expected: _ReproducibleCentralBuildToolName | None, +) -> None: + """Test the _get_rc_build_tool_name function.""" + assert _get_rc_build_tool_name(build_tool_facts) == expected From d339e503f03ecc72b4d5acc58fe83eaf108eb66c Mon Sep 17 00:00:00 2001 From: Trong Nhan Mai Date: Thu, 7 Aug 2025 09:31:18 +1000 Subject: [PATCH 21/27] chore: fix typos --- src/macaron/build_spec_generator/build_command_patcher.py | 2 +- .../cli_command_parser/gradle_cli_parser.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/macaron/build_spec_generator/build_command_patcher.py b/src/macaron/build_spec_generator/build_command_patcher.py index f6bac755f..bd9db4db5 100644 --- a/src/macaron/build_spec_generator/build_command_patcher.py +++ b/src/macaron/build_spec_generator/build_command_patcher.py @@ -59,7 +59,7 @@ def _patch_commands( cli_command = effective_cli_parser.parse(cmds) except CommandLineParseError as error: logger.error( - "Failed to parse the mvn command %s. Error %s.", + "Failed to parse the cli command %s. Error %s.", " ".join(cmds), error, ) diff --git a/src/macaron/build_spec_generator/cli_command_parser/gradle_cli_parser.py b/src/macaron/build_spec_generator/cli_command_parser/gradle_cli_parser.py index c66c6c4e5..986452b41 100644 --- a/src/macaron/build_spec_generator/cli_command_parser/gradle_cli_parser.py +++ b/src/macaron/build_spec_generator/cli_command_parser/gradle_cli_parser.py @@ -452,7 +452,7 @@ def __init__(self) -> None: """Initialize the instance.""" self.arg_parser = argparse.ArgumentParser( description="Parse Gradle CLI command", - prog="mvn", + prog="gradle", add_help=False, # https://docs.python.org/3/library/argparse.html#exit-on-error # Best effort of parsing the build command. Therefore, we don't want to exit on error. From 95908b4c88649560c10d037c45f5b110a5b3ed9c Mon Sep 17 00:00:00 2001 From: Trong Nhan Mai Date: Fri, 8 Aug 2025 16:06:54 +1000 Subject: [PATCH 22/27] chore: remove extra comments feature and some refactoring to simplify the generation of reproducible central build spec --- .../build_spec_generator/jdk_finder.py | 53 ++- .../reproducible_central/rc_build_info.py | 146 ------ .../reproducible_central.py | 435 +++++++++++------- .../test_reproducible_central.py | 87 ++-- 4 files changed, 346 insertions(+), 375 deletions(-) delete mode 100644 src/macaron/build_spec_generator/reproducible_central/rc_build_info.py diff --git a/src/macaron/build_spec_generator/jdk_finder.py b/src/macaron/build_spec_generator/jdk_finder.py index dc9ef8cd4..d883a94d1 100644 --- a/src/macaron/build_spec_generator/jdk_finder.py +++ b/src/macaron/build_spec_generator/jdk_finder.py @@ -25,6 +25,13 @@ class JavaArtifactExt(str, Enum): JAR = ".jar" +class CacheStrategy(Enum): + """The strategy for caching the downloaded artifacts for JDK version finding.""" + + DISABLE = 0 + MAVEN_LAYOUT = 1 + + def download_file(url: str, dest: str) -> None: """Stream a file into a local destination. @@ -213,7 +220,7 @@ def find_jdk_version_from_remote_maven_repo_cache( """Return the jdk version string from an artifact matching a given GAV from a remote maven layout repository. This function cache the downloaded artifact in a maven layout https://maven.apache.org/repository/layout.html - undert ``local_cache_repo``. + under ``local_cache_repo``. We assume that the remote maven layout repository supports downloading a file through a HTTPS URL. Parameters @@ -286,7 +293,7 @@ def find_jdk_version_from_central_maven_repo( group_id: str, artifact_id: str, version: str, - use_cache: bool = True, + cache_strat: CacheStrategy = CacheStrategy.MAVEN_LAYOUT, ) -> str | None: """Return the jdk version string from an artifact matching a given GAV from Maven Central repository. @@ -302,11 +309,8 @@ def find_jdk_version_from_central_maven_repo( The artifact ID part of the GAV coordinate. version: str The version part of the GAV coordinate. - remote_maven_repo_url: str - The URL to the remote maven layout repository. - local_cache_repo: str - The path to a local directory for caching the downloaded artifact used in JDK version - extraction. + cache_strat: CacheStrategy + Specify how artifacts from maven central are persisted. Returns ------- @@ -321,20 +325,21 @@ def find_jdk_version_from_central_maven_repo( ) asset_name = f"{artifact_id}-{version}{JavaArtifactExt.JAR.value}" - if use_cache: - return find_jdk_version_from_remote_maven_repo_cache( - group_id=group_id, - artifact_id=artifact_id, - version=version, - asset_name=asset_name, - remote_maven_repo_url=central_repo_url, - local_cache_repo=local_cache_maven_repo, - ) - - return find_jdk_version_from_remote_maven_repo_standalone( - group_id=group_id, - artifact_id=artifact_id, - version=version, - asset_name=asset_name, - remote_maven_repo_url=central_repo_url, - ) + match cache_strat: + case CacheStrategy.MAVEN_LAYOUT: + return find_jdk_version_from_remote_maven_repo_cache( + group_id=group_id, + artifact_id=artifact_id, + version=version, + asset_name=asset_name, + remote_maven_repo_url=central_repo_url, + local_cache_repo=local_cache_maven_repo, + ) + case CacheStrategy.DISABLE: + return find_jdk_version_from_remote_maven_repo_standalone( + group_id=group_id, + artifact_id=artifact_id, + version=version, + asset_name=asset_name, + remote_maven_repo_url=central_repo_url, + ) diff --git a/src/macaron/build_spec_generator/reproducible_central/rc_build_info.py b/src/macaron/build_spec_generator/reproducible_central/rc_build_info.py deleted file mode 100644 index 8e33f0313..000000000 --- a/src/macaron/build_spec_generator/reproducible_central/rc_build_info.py +++ /dev/null @@ -1,146 +0,0 @@ -# Copyright (c) 2025 - 2025, Oracle and/or its affiliates. All rights reserved. -# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. - -"""This module contains the representation of information needed for Reproducible Central Buildspec generation.""" - -import logging -import pprint -from collections.abc import Sequence -from dataclasses import dataclass - -from packageurl import PackageURL -from sqlalchemy.orm import Session - -from macaron.build_spec_generator.macaron_db_extractor import ( - GenericBuildCommandInfo, - lookup_any_build_command, - lookup_build_tools_check, - lookup_latest_component, -) -from macaron.database.table_definitions import Repository -from macaron.errors import QueryMacaronDatabaseError -from macaron.slsa_analyzer.checks.build_tool_check import BuildToolFacts - -logger: logging.Logger = logging.getLogger(__name__) - - -@dataclass -class RcInternalBuildInfo: - """An internal representation of the information obtained from the database for a PURL. - - This is only used for generating the Reproducible Central build spec. - """ - - purl: PackageURL - repository: Repository - generic_build_command_facts: Sequence[GenericBuildCommandInfo] | None - latest_component_id: int - build_tool_facts: Sequence[BuildToolFacts] - - -def format_build_command_infos(build_command_infos: list[GenericBuildCommandInfo]) -> str: - """Return the prettified str format for a list of `GenericBuildCommandInfo` instances.""" - pretty_formatted_ouput = [pprint.pformat(build_command_info) for build_command_info in build_command_infos] - return "\n".join(pretty_formatted_ouput) - - -def get_rc_internal_build_info( - purl: PackageURL, - session: Session, -) -> RcInternalBuildInfo | None: - """Return an ``RcInternalBuildInfo`` instance that captures the build related information for a PackageURL. - - Parameters - ---------- - purl: PackageURL - The PackageURL to extract information about. - session: Session - The SQLAlchemy Session for the Macaron database. - - Returns - ------- - RcInternalBuildInfo | None - An instance of ``RcInternalBuildInfo`` or None if there was an error. - """ - try: - latest_component = lookup_latest_component( - purl=purl, - session=session, - ) - except QueryMacaronDatabaseError as lookup_component_error: - logger.error( - "Unexpected result from querying latest component for %s. Error: %s", - purl.to_string(), - lookup_component_error, - ) - return None - if not latest_component: - logger.error( - "Cannot find an analysis result for PackageURL %s in the database. " - + "Please check if an analysis for it exists in the database.", - purl.to_string(), - ) - return None - - latest_component_id = latest_component.id - logger.debug("Latest component ID: %d", latest_component_id) - - latest_component_repository = latest_component.repository - if not latest_component_repository: - logger.error( - "Cannot find any repository information for %s in the database.", - purl.to_string(), - ) - return None - logger.info( - "Repository information for purl %s: url %s, commit %s", - purl, - latest_component_repository.remote_path, - latest_component_repository.commit_sha, - ) - - try: - build_tool_facts = lookup_build_tools_check( - component_id=latest_component_id, - session=session, - ) - except QueryMacaronDatabaseError as lookup_build_tools_error: - logger.error( - "Unexpected result from querying build tools for %s. Error: %s", - purl.to_string(), - lookup_build_tools_error, - ) - return None - if not build_tool_facts: - logger.error( - "Cannot find any build tool for PackageURL %s in the database.", - purl.to_string(), - ) - return None - logger.info( - "Build tools discovered from the %s table: %s", - BuildToolFacts.__tablename__, - [fact.build_tool_name for fact in build_tool_facts], - ) - - try: - lookup_build_command_infos = lookup_any_build_command(latest_component_id, session) - except QueryMacaronDatabaseError as lookup_build_command_error: - logger.error( - "Unexpected result from querying all build command information for %s. Error: %s", - purl.to_string(), - lookup_build_command_error, - ) - return None - logger.debug( - "Build command information discovered\n%s", - format_build_command_infos(lookup_build_command_infos), - ) - - return RcInternalBuildInfo( - purl=purl, - repository=latest_component_repository, - latest_component_id=latest_component_id, - build_tool_facts=build_tool_facts, - generic_build_command_facts=lookup_build_command_infos, - ) diff --git a/src/macaron/build_spec_generator/reproducible_central/reproducible_central.py b/src/macaron/build_spec_generator/reproducible_central/reproducible_central.py index 5e95d0c75..5eddd8ad6 100644 --- a/src/macaron/build_spec_generator/reproducible_central/reproducible_central.py +++ b/src/macaron/build_spec_generator/reproducible_central/reproducible_central.py @@ -4,6 +4,7 @@ """This module contains the logic to generate a build spec in Reproducible Central format.""" import logging +import pprint import shlex from collections.abc import Mapping, Sequence from enum import Enum @@ -16,10 +17,13 @@ from macaron.build_spec_generator.build_command_patcher import PatchCommandBuildTool, PatchValueType, patch_commands from macaron.build_spec_generator.jdk_finder import find_jdk_version_from_central_maven_repo from macaron.build_spec_generator.jdk_version_normalizer import normalize_jdk_version -from macaron.build_spec_generator.reproducible_central.rc_build_info import ( - RcInternalBuildInfo, - get_rc_internal_build_info, +from macaron.build_spec_generator.macaron_db_extractor import ( + GenericBuildCommandInfo, + lookup_any_build_command, + lookup_build_tools_check, + lookup_latest_component, ) +from macaron.errors import QueryMacaronDatabaseError from macaron.slsa_analyzer.checks.build_tool_check import BuildToolFacts logger: logging.Logger = logging.getLogger(__name__) @@ -38,8 +42,6 @@ # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. # Generated by Macaron version {macaron_version} -{extra_comment} - groupId={group_id} artifactId={artifact_id} version={version} @@ -70,7 +72,7 @@ class _MacaronBuildToolName(str, Enum): GRADLE = "gradle" -class _ReproducibleCentralBuildToolName(str, Enum): +class ReproducibleCentralBuildTool(str, Enum): """Represent the name of the build tool used in the Reproducible Central's Buildspec. https://github.com/jvm-repo-rebuild/reproducible-central/blob/master/doc/BUILDSPEC.md @@ -81,6 +83,23 @@ class _ReproducibleCentralBuildToolName(str, Enum): SBT = "sbt" +def format_build_command_infos(build_command_infos: list[GenericBuildCommandInfo]) -> str: + """Return the prettified str format for a list of `GenericBuildCommandInfo` instances. + + Parameters + ---------- + build_command_infos: GenericBuildCommandInfo + A list of ``GenericBuildCommandInfo`` instances. + + Returns + ------- + str + The prettified output. + """ + pretty_formatted_ouput = [pprint.pformat(build_command_info) for build_command_info in build_command_infos] + return "\n".join(pretty_formatted_ouput) + + def remove_shell_quote(cmd: list[str]) -> list[str]: """Remove shell quotes from a shell command. @@ -103,38 +122,49 @@ def remove_shell_quote(cmd: list[str]) -> list[str]: return shlex.split(" ".join(cmd)) -def _get_extra_comments(comments: list[str]) -> str: - """Generate the shell comments for adding additional information into the RC-build spec.""" - shell_comments = [f"# {comment}" for comment in comments] - return "\n".join(shell_comments) +def get_rc_build_command(cmds_sequence: list[list[str]]) -> str: + """Return a single command as string to be used in RC buildspec from a sequence of commands. + The build commands in the sequence will be ``&&`` together, because RC's build spec + is a shell script. -def _get_build_command_sequence(cmds_sequence: list[list[str]]) -> str: - """Return a build command sequence as a string. + Parameters + ---------- + cmds_sequence: list[list[str]] + The sequence of build commands. - The build commands in the sequence will be && together, because RC's build spec - is a shell script. + Returns + ------- + str + A bash command to be used in RC's command field. """ removed_shell_quote = [" ".join(remove_shell_quote(cmds)) for cmds in cmds_sequence] result = " && ".join(removed_shell_quote) return result -def _get_default_build_command_sequence( - rc_build_tool_name: _ReproducibleCentralBuildToolName, - patches: Mapping[ - PatchCommandBuildTool, - Mapping[str, PatchValueType | None], - ], -) -> list[list[str]] | None: - """Return a default build command sequence for the corresponding build tool name discovered by Macaron.""" +def get_rc_default_build_command( + rc_build_tool_name: ReproducibleCentralBuildTool, +) -> list[str] | None: + """Return a default build command for a type of Reproducible Central build tool type. + + Parameters + ---------- + rc_build_tool_name: ReproducibleCentralBuildTool + The type of build tool to get the default build command. + + Returns + ------- + list[str] | None + The build command as a list of strings or None if we cannot get one for this tool. + """ default_build_command = None match rc_build_tool_name: - case _ReproducibleCentralBuildToolName.MAVEN: - default_build_command = "mvn clean package" - case _ReproducibleCentralBuildToolName.GRADLE: - default_build_command = "./gradlew clean assemble publishToMavenLocal" + case ReproducibleCentralBuildTool.MAVEN: + default_build_command = "mvn clean package".split() + case ReproducibleCentralBuildTool.GRADLE: + default_build_command = "./gradlew clean assemble publishToMavenLocal".split() case _: pass @@ -145,19 +175,7 @@ def _get_default_build_command_sequence( ) return None - patched_build_commands = patch_commands( - cmds_sequence=[default_build_command.split()], - patches=patches, - ) - - if not patched_build_commands: - logger.error( - "Failed to patch default build command %s.", - default_build_command, - ) - return None - - return patched_build_commands + return default_build_command def _get_macaron_build_tool_name(build_tool_facts: Sequence[BuildToolFacts]) -> _MacaronBuildToolName | None: @@ -175,152 +193,119 @@ def _get_macaron_build_tool_name(build_tool_facts: Sequence[BuildToolFacts]) -> return None -def _get_rc_build_tool_name( +def _get_rc_build_tool_name_from_build_facts( build_tool_facts: Sequence[BuildToolFacts], -) -> _ReproducibleCentralBuildToolName | None: - """Return the build tool name to be put into the RC buildspec.""" +) -> ReproducibleCentralBuildTool | None: + """Return the build tool name to be put into the RC buildspec from a sequence of BuildToolFacts instances.""" macaron_build_tool_name = _get_macaron_build_tool_name(build_tool_facts) if not macaron_build_tool_name: + logger.error( + "No supported build tool are found. Expect %s", + [build_tool.value for build_tool in _MacaronBuildToolName], + ) return None match macaron_build_tool_name: case _MacaronBuildToolName.MAVEN: - return _ReproducibleCentralBuildToolName.MAVEN + return ReproducibleCentralBuildTool.MAVEN case _MacaronBuildToolName.GRADLE: - return _ReproducibleCentralBuildToolName.GRADLE + return ReproducibleCentralBuildTool.GRADLE -def _gen_reproducible_central_build_spec( - build_info: RcInternalBuildInfo, - patches: Mapping[ - PatchCommandBuildTool, - Mapping[str, PatchValueType | None], - ], -) -> str | None: - """Return the RC's Buildspec content from a ``RcInternalBuildInfo`` instance. +def get_rc_build_tool_name( + component_id: int, + session: sqlalchemy.orm.Session, +) -> ReproducibleCentralBuildTool | None: + """Return the ``ReproducibleCentralBuildTool`` instance corresponding to the build tool of the component. - This function will perform necessary validation on the data captured within ``build_info`` to make sure - it has enough information for generating the output BuildSpec. + Parameters + ---------- + component_id: int + The id of the component we are finding build command for. + session: sqlalchemy.orm.Session + The SQLAlchemy Session opened for the database to extract build information. - This function will use the information available in ``build_info`` to populate the file. - For example, the GAV coordinate can be obtained from ``build_info.purl``. + Returns + ------- + ReproducibleCentralBuildTool | None + The ``ReproducibleCentralBuildTool`` instance for this component. + """ + try: + build_tool_facts = lookup_build_tools_check( + component_id=component_id, + session=session, + ) + except QueryMacaronDatabaseError as lookup_build_tools_error: + logger.error( + "Unexpected result from querying build tools for component id %s. Error: %s", + component_id, + lookup_build_tools_error, + ) + return None + if not build_tool_facts: + logger.error( + "Cannot find any build tool for component id %s in the database.", + component_id, + ) + return None + logger.info( + "Build tools discovered from the %s table: %s", + BuildToolFacts.__tablename__, + [(fact.build_tool_name, fact.language) for fact in build_tool_facts], + ) - The ``patches`` mapping will be used for patching the build command in the `command` section of the Buildspec - output. + return _get_rc_build_tool_name_from_build_facts(build_tool_facts) - The function will return the Buildspec file content as string or None if there is an error. - """ - extra_comments = [] - purl = build_info.purl - logger.debug( - "Generating build spec for %s with command patches:\n%s", - purl, - pformat(patches), - ) +def get_lookup_build_command_info( + component_id: int, + session: sqlalchemy.orm.Session, +) -> GenericBuildCommandInfo | None: + """Return the highest confidence build command information from the database for a component. - # Getting groupid, artifactid and version from PURL. - group = purl.namespace - artifact = purl.name - version = purl.version - if group is None or version is None: - logger.error("Missing group and/or version for purl %s.", purl.to_string()) - return None + The build command is found by looking up CheckFacts for build-related checks. - extra_comments.append(f"Input PURL - {purl}") + Parameters + ---------- + component_id: int + The id of the component we are finding build command for. + session: sqlalchemy.orm.Session + The SQLAlchemy Session opened for the database to extract build information. - # Getting the RC build tool name from the build tool check facts. - rc_build_tool_name = _get_rc_build_tool_name(build_info.build_tool_facts) - if not rc_build_tool_name: + Returns + ------- + GenericBuildCommandInfo | None + The GenericBuildCommandInfo object for the highest confidence build command or None if there was + an error, or no build command is found from the database. + """ + try: + lookup_build_command_infos = lookup_any_build_command(component_id, session) + except QueryMacaronDatabaseError as lookup_build_command_error: logger.error( - "The Component doesn't have any build tool that we support for generating RC buildspec. It has %s.", - [(fact.build_tool_name, fact.language) for fact in build_info.build_tool_facts], - ) - return None - - # Set the default build command and jdk version. - # The default build command depends on the build tool, while the default jdk version - # is 8. - final_build_command_seq = _get_default_build_command_sequence( - rc_build_tool_name=rc_build_tool_name, - patches=patches, - ) - if not final_build_command_seq: - logger.critical( - "Cannot generate a default build command for %s", - purl, + "Unexpected result from querying all build command information for component id %s. Error: %s", + component_id, + lookup_build_command_error, ) return None - final_jdk_version = "8" - extra_comments.append( - f"Initial default JDK version {final_jdk_version} and default build command {final_build_command_seq}." + logger.debug( + "Build command information discovered\n%s", + format_build_command_infos(lookup_build_command_infos), ) - # We always attempt to get the JDK version from maven central JAR for this GAV artifact. - jdk_from_jar = find_jdk_version_from_central_maven_repo( - group_id=purl.name, - artifact_id=group, - version=version, - ) - if jdk_from_jar: - extra_comments.append(f"Use JDK version from jar {jdk_from_jar}.") - final_jdk_version = jdk_from_jar - else: - extra_comments.append(f"No JDK version found from jar {jdk_from_jar}.") - - # If there is a build command available from the database, patch and use it in the final - # buildspec. - # If we couldn't find a JDK version from Maven Central JAR, we use the language_version - # of the build command with the highest confidence score (if available). - if build_info.generic_build_command_facts: - # The elements are ordered in decreasing confidence score. We pick the highest one. - build_fact = build_info.generic_build_command_facts[0] - lookup_build_command = build_fact.command - extra_comments.append(f"The lookup build command: {lookup_build_command}") - - patched_build_commands = patch_commands( - cmds_sequence=[lookup_build_command], - patches=patches, - ) - if not patched_build_commands: - logger.error( - "Failed to patch look up command %s.", - lookup_build_command, - ) - return None - - final_build_command_seq = patched_build_commands - - if not jdk_from_jar and build_fact.language_versions: - # We pop the last element without any concrete reason, and we haven't had any issue - # with it so far. - lookup_jdk_ver = build_fact.language_versions.pop() - extra_comments.append(f"Use Jdk version from lookup build command {lookup_jdk_ver}.") - final_jdk_version = lookup_jdk_ver - else: - extra_comments.append("No JDK version used from lookup result.") - - major_jdk_version = normalize_jdk_version(final_jdk_version) - if not major_jdk_version: - logger.error("Failed to obtain the major version of %s", final_jdk_version) - return None + return lookup_build_command_infos[0] if lookup_build_command_infos else None - template_format_values: dict[str, str] = { - "macaron_version": importlib_metadata.version("macaron"), - "group_id": group, - "artifact_id": artifact, - "version": version, - "git_repo": build_info.repository.remote_path, - "git_tag": build_info.repository.commit_sha, - "tool": rc_build_tool_name.value, - "newline": "lf", - "buildinfo": f"target/{artifact}-{version}.buildinfo", - "extra_comment": _get_extra_comments(extra_comments), - "jdk": final_jdk_version, - "command": _get_build_command_sequence(final_build_command_seq), - } - return STRING_TEMPLATE.format_map(template_format_values) +def get_lookup_build_command_jdk( + build_command_info: GenericBuildCommandInfo, +) -> str | None: + """Return the jdk version from a GenericBuildCommandInfo object.""" + if build_command_info.language_versions: + # There isn't a concrete reason why we select the last element. + # We just use this at this point because we haven't looked into + # a better way to select the jdk version obtained from the database. + return build_command_info.language_versions.pop() + + return None def gen_reproducible_central_build_spec( @@ -354,19 +339,133 @@ def gen_reproducible_central_build_spec( for this PURL, 3. Failed to patch the build commands using the provided ``patches``, 4. The database from ``session`` doesn't contain enough information. """ - internal_build_info = get_rc_internal_build_info( - purl=purl, - session=session, + logger.debug( + "Generating build spec for %s with command patches:\n%s", + purl, + pformat(patches), ) - if not internal_build_info: + # Getting groupid, artifactid and version from PURL. + group = purl.namespace + artifact = purl.name + version = purl.version + if group is None or version is None: + logger.error("Missing group and/or version for purl %s.", purl.to_string()) + return None + + try: + latest_component = lookup_latest_component( + purl=purl, + session=session, + ) + except QueryMacaronDatabaseError as lookup_component_error: + logger.error( + "Unexpected result from querying latest component for %s. Error: %s", + purl.to_string(), + lookup_component_error, + ) + return None + if not latest_component: logger.error( - "Failed to obtain necessary data for purl %s from the database.", - purl, + "Cannot find an analysis result for PackageURL %s in the database. " + + "Please check if an analysis for it exists in the database.", + purl.to_string(), ) return None - return _gen_reproducible_central_build_spec( - build_info=internal_build_info, + latest_component_repository = latest_component.repository + if not latest_component_repository: + logger.error( + "Cannot find any repository information for %s in the database.", + purl.to_string(), + ) + return None + logger.info( + "Repository information for purl %s: url %s, commit %s", + purl, + latest_component_repository.remote_path, + latest_component_repository.commit_sha, + ) + + # Getting the RC build tool name from the build tool check facts. + rc_build_tool_name = get_rc_build_tool_name( + component_id=latest_component.id, + session=session, + ) + if not rc_build_tool_name: + return None + + # We always attempt to get the JDK version from maven central JAR for this GAV artifact. + jdk_from_jar = find_jdk_version_from_central_maven_repo( + group_id=group, + artifact_id=artifact, + version=version, + ) + logger.info( + "Attempted to find JDK from Maven Central JAR. Result: %s", + jdk_from_jar or "Cannot find any.", + ) + + # Obtain the highest confidence build command info from the database. + lookup_build_command_info = get_lookup_build_command_info( + component_id=latest_component.id, + session=session, + ) + logger.info( + "Attempted to find build command from the database. Result: %s", + lookup_build_command_info or "Cannot find any.", + ) + lookup_build_command_jdk = ( + get_lookup_build_command_jdk( + lookup_build_command_info, + ) + if lookup_build_command_info + else None + ) + + # Select jdk from jar from different source. + # The default JDK version is 8. + selected_jdk_version = jdk_from_jar or lookup_build_command_jdk or "8" + major_jdk_version = normalize_jdk_version(selected_jdk_version) + if not major_jdk_version: + logger.error("Failed to obtain the major version of %s", selected_jdk_version) + return None + + # Select build commands from lookup or use a default one. + selected_build_command = ( + lookup_build_command_info.command + if lookup_build_command_info + else get_rc_default_build_command( + rc_build_tool_name, + ) + ) + if not selected_build_command: + logger.error("Failed to get a build command for %s.", purl.to_string()) + return None + + patched_build_commands = patch_commands( + cmds_sequence=[selected_build_command], patches=patches, ) + if not patched_build_commands: + logger.error( + "Failed to patch command sequences %s.", + [selected_build_command], + ) + return None + + template_format_values: dict[str, str] = { + "macaron_version": importlib_metadata.version("macaron"), + "group_id": group, + "artifact_id": artifact, + "version": version, + "git_repo": latest_component_repository.remote_path, + "git_tag": latest_component_repository.commit_sha, + "tool": rc_build_tool_name.value, + "newline": "lf", + "buildinfo": f"target/{artifact}-{version}.buildinfo", + "jdk": major_jdk_version, + "command": get_rc_build_command(patched_build_commands), + } + + return STRING_TEMPLATE.format_map(template_format_values) diff --git a/tests/build_spec_generator/reproducible_central/test_reproducible_central.py b/tests/build_spec_generator/reproducible_central/test_reproducible_central.py index 11c687f42..e07878411 100644 --- a/tests/build_spec_generator/reproducible_central/test_reproducible_central.py +++ b/tests/build_spec_generator/reproducible_central/test_reproducible_central.py @@ -5,42 +5,17 @@ import pytest +from macaron.build_spec_generator.macaron_db_extractor import GenericBuildCommandInfo from macaron.build_spec_generator.reproducible_central.reproducible_central import ( - _get_build_command_sequence, - _get_extra_comments, - _get_rc_build_tool_name, - _ReproducibleCentralBuildToolName, + ReproducibleCentralBuildTool, + _get_rc_build_tool_name_from_build_facts, + get_lookup_build_command_jdk, + get_rc_build_command, + get_rc_default_build_command, ) from macaron.slsa_analyzer.checks.build_tool_check import BuildToolFacts -@pytest.mark.parametrize( - ("comments", "expected"), - [ - pytest.param( - [ - "Input PURL - pkg:maven/oracle/macaron@v0.16.0", - "Initial default JDK version 8 and default build command boo", - ], - "# Input PURL - pkg:maven/oracle/macaron@v0.16.0\n# Initial default JDK version 8 and default build command boo", - ), - pytest.param( - [ - "Input PURL - pkg:maven/oracle/macaron@v0.16.0", - ], - "# Input PURL - pkg:maven/oracle/macaron@v0.16.0", - ), - pytest.param( - [], - "", - ), - ], -) -def test_get_extra_comments(comments: list[str], expected: str) -> None: - """Test the _get_extra_comments function.""" - assert _get_extra_comments(comments) == expected - - @pytest.mark.parametrize( ("cmds_sequence", "expected"), [ @@ -59,12 +34,12 @@ def test_get_extra_comments(comments: list[str], expected: str) -> None: ), ], ) -def test_get_build_command_sequence( +def test_get_rc_build_command( cmds_sequence: list[list[str]], expected: str, ) -> None: """Test the _get_build_command_sequence function.""" - assert _get_build_command_sequence(cmds_sequence) == expected + assert get_rc_build_command(cmds_sequence) == expected @pytest.mark.parametrize( @@ -87,7 +62,7 @@ def test_get_build_command_sequence( build_tool_name="gradle", ) ], - _ReproducibleCentralBuildToolName.GRADLE, + ReproducibleCentralBuildTool.GRADLE, id="build_tool_gradle", ), pytest.param( @@ -97,7 +72,7 @@ def test_get_build_command_sequence( build_tool_name="maven", ) ], - _ReproducibleCentralBuildToolName.MAVEN, + ReproducibleCentralBuildTool.MAVEN, id="build_tool_maven", ), pytest.param( @@ -124,7 +99,45 @@ def test_get_build_command_sequence( ) def test_get_rc_build_tool_name( build_tool_facts: list[BuildToolFacts], - expected: _ReproducibleCentralBuildToolName | None, + expected: ReproducibleCentralBuildTool | None, ) -> None: """Test the _get_rc_build_tool_name function.""" - assert _get_rc_build_tool_name(build_tool_facts) == expected + assert _get_rc_build_tool_name_from_build_facts(build_tool_facts) == expected + + +def test_get_rc_default_build_command_unsupported() -> None: + """Test the get_rc_default_build_command function for an unsupported RC build tool.""" + assert not get_rc_default_build_command(ReproducibleCentralBuildTool.SBT) + + +@pytest.mark.parametrize( + ("build_command_info", "expected"), + [ + pytest.param( + GenericBuildCommandInfo( + command=["mvn", "package"], + language="java", + language_versions=["8"], + build_tool_name="maven", + ), + "8", + id="has_language_version", + ), + pytest.param( + GenericBuildCommandInfo( + command=["mvn", "package"], + language="java", + language_versions=[], + build_tool_name="maven", + ), + None, + id="no_language_version", + ), + ], +) +def test_get_lookup_build_command_jdk( + build_command_info: GenericBuildCommandInfo, + expected: str | None, +) -> None: + """Test the get_lookup_build_command_jdk function.""" + assert get_lookup_build_command_jdk(build_command_info) == expected From 1ee8a19a651b12cffd9f72ebdb5c149230389660 Mon Sep 17 00:00:00 2001 From: Ben Selwyn-Smith Date: Sun, 7 Sep 2025 22:47:01 +1000 Subject: [PATCH 23/27] chore: misc fixes Signed-off-by: Ben Selwyn-Smith --- .../build_command_patcher.py | 8 ++--- .../build_spec_generator.py | 8 ++--- .../cli_command_parser/__init__.py | 8 ++--- .../cli_command_parser/gradle_cli_command.py | 6 ++-- .../cli_command_parser/gradle_cli_parser.py | 33 ++++++++++--------- .../cli_command_parser/maven_cli_command.py | 6 ++-- .../cli_command_parser/maven_cli_parser.py | 27 ++++++++------- .../unparsed_cli_command.py | 4 +-- .../build_spec_generator/jdk_finder.py | 24 +++++++------- .../jdk_version_normalizer.py | 12 +++---- .../macaron_db_extractor.py | 8 ++--- .../reproducible_central.py | 16 ++++----- src/macaron/errors.py | 2 +- 13 files changed, 81 insertions(+), 81 deletions(-) diff --git a/src/macaron/build_spec_generator/build_command_patcher.py b/src/macaron/build_spec_generator/build_command_patcher.py index bd9db4db5..023a7674b 100644 --- a/src/macaron/build_spec_generator/build_command_patcher.py +++ b/src/macaron/build_spec_generator/build_command_patcher.py @@ -99,10 +99,10 @@ def patch_commands( For each command in this command sequence: - - If the command is not a build command or the build tool is not supported by us, it will be leave intact. + - If the command is not a build command, or it's a tool we do not support, it will be left intact. - - If the command is a build command supported by us, it will be patch if a patch value is provided to ``patches``. - If no patch value is provided for a build command, it will be leave intact. + - If the command is a build command we support, it will be patched, if a patch value is provided in ``patches``. + If no patch value is provided for a build command, it will be left intact. `patches` is a mapping with: @@ -113,7 +113,7 @@ def patch_commands( For example: :class:`macaron.cli_command_parser.maven_cli_parser.MavenCLICommandParser.apply_patch`, :class:`macaron.cli_command_parser.gradle_cli_parser.GradleCLICommandParser.apply_patch`. - This means that all commands that matches a BuildTool will be apply by the same patch value. + This means that all commands that match a BuildTool will be applied by the same patch value. Returns ------- diff --git a/src/macaron/build_spec_generator/build_spec_generator.py b/src/macaron/build_spec_generator/build_spec_generator.py index 265871a94..688543dc0 100644 --- a/src/macaron/build_spec_generator/build_spec_generator.py +++ b/src/macaron/build_spec_generator/build_spec_generator.py @@ -20,7 +20,7 @@ class BuildSpecFormat(str, Enum): - """The build spec format that we supports.""" + """The build spec formats that we support.""" REPRODUCIBLE_CENTRAL = "rc-buildspec" @@ -87,7 +87,7 @@ def gen_build_spec_for_purl( int The exit code for this function. ``os.EX_OK`` if everything is fine, ``os.EX_OSERR`` if the buildspec file cannot be created in the local filesystem, ``os.EX_DATAERR`` if there was an - error in generate the content for the buildspec file. + error generating the content for the buildspec file. """ db_engine = create_engine(f"sqlite+pysqlite:///{database_path}", echo=False) @@ -102,7 +102,7 @@ def gen_build_spec_for_purl( ) if not build_spec_content: - logger.error("Error while generate reproducible central build spec.") + logger.error("Error while generating reproducible central build spec.") return os.EX_DATAERR logger.debug("Build spec content: \n%s", build_spec_content) @@ -133,7 +133,7 @@ def gen_build_spec_for_purl( file.write(build_spec_content) except OSError as error: logger.error( - "Could not generate the Buildspec to %s. Error: %s", + "Could not create the build spec at %s. Error: %s", os.path.relpath(build_spec_filepath, os.getcwd()), error, ) diff --git a/src/macaron/build_spec_generator/cli_command_parser/__init__.py b/src/macaron/build_spec_generator/cli_command_parser/__init__.py index 8801ea55a..05a0e913e 100644 --- a/src/macaron/build_spec_generator/cli_command_parser/__init__.py +++ b/src/macaron/build_spec_generator/cli_command_parser/__init__.py @@ -17,7 +17,7 @@ def is_list_of_strs(value: Any) -> TypeGuard[list[str]]: def is_dict_of_str_to_str_or_none(value: Any) -> TypeGuard[dict[str, str | None]]: - """Type guard for a dictionary with keys are string and values are strings or None.""" + """Type guard for a dictionary where the keys are string and values are strings or None.""" if not isinstance(value, dict): return False @@ -37,7 +37,7 @@ def patch_mapping( ) -> dict[str, str]: """Patch a mapping. - A key with value in patch set to None will be removed from the original. + A key with a value in the patch set to None will be removed from the original. Parameters ---------- @@ -79,7 +79,7 @@ class OptionDef(Generic[P]): @abstractmethod def is_valid_patch_option(self, patch: Any) -> TypeGuard[P]: - """Return True if the provide patch value is compatible with the internal type of this option.""" + """Return True if the provided patch value is compatible with the internal type of this option.""" raise NotImplementedError() @abstractmethod @@ -162,4 +162,4 @@ def apply_patch( cli_command: T, options_patch: Mapping[str, Y_contra | None], ) -> T: - """Return the a new CLICommand object with its option patched, while persisting the executable path.""" + """Return a new CLICommand object with its option patched, while persisting the executable path.""" diff --git a/src/macaron/build_spec_generator/cli_command_parser/gradle_cli_command.py b/src/macaron/build_spec_generator/cli_command_parser/gradle_cli_command.py index 48d0000fc..5c3e333cd 100644 --- a/src/macaron/build_spec_generator/cli_command_parser/gradle_cli_command.py +++ b/src/macaron/build_spec_generator/cli_command_parser/gradle_cli_command.py @@ -139,17 +139,17 @@ def from_parsed_arg( def parse_properties(props: list[str]) -> dict[str, str]: """Return a dictionary that maps between a property and its value. - Each property definition value in `props` can have either of these format: + Each property definition value in `props` can have either of these formats: - `property=value` (e.g. `property=value` from `-Dproperty=value`): this will be parsed into a dictionary mapping of `"property": "value"`. - Both the key and value of this mapping is of type string. + Both the key and value of this mapping are of type string. - `property` (e.g. `property` from `-Dproperty`): this will be parsed into a dictionary mapping of `"property": `. Parameters ---------- props: list[str] - The list of properties definition provided in the cli command. + The list of property definitions provided in the cli command. This is the list parsed by argparse. Returns diff --git a/src/macaron/build_spec_generator/cli_command_parser/gradle_cli_parser.py b/src/macaron/build_spec_generator/cli_command_parser/gradle_cli_parser.py index 986452b41..2484f8325 100644 --- a/src/macaron/build_spec_generator/cli_command_parser/gradle_cli_parser.py +++ b/src/macaron/build_spec_generator/cli_command_parser/gradle_cli_parser.py @@ -47,7 +47,7 @@ class GradleOptionalFlag(OptionDef[bool]): dest: str | None = field(default=None) def is_valid_patch_option(self, patch: Any) -> TypeGuard[bool]: - """Return True if the provide patch value is compatible with the internal type of this option.""" + """Return True if the provided patch value is compatible with the internal type of this option.""" return isinstance(patch, bool) def add_itself_to_arg_parser(self, arg_parse: argparse.ArgumentParser) -> None: @@ -76,7 +76,7 @@ def get_patch_type_str(self) -> str: @dataclass class GradleOptionalNegateableFlag(OptionDef[bool]): - """This option represents an optional negateable flag in Gradle CLI command. + """This option represents an optional negatable flag in Gradle CLI command. For example: --build-cache/--no-build-cache """ @@ -119,12 +119,12 @@ def get_patch_type_str(self) -> str: @dataclass class GradleSingleValue(OptionDef[str]): - """This option represents an option that takes a value in Grale CLI command.""" + """This option represents an option that takes a value in Gradle CLI command.""" short_name: str | None def is_valid_patch_option(self, patch: Any) -> TypeGuard[str]: - """Return True if the provide patch value is compatible with the internal type of this option.""" + """Return True if the provided patch value is compatible with the internal type of this option.""" return isinstance(patch, str) def add_itself_to_arg_parser(self, arg_parse: argparse.ArgumentParser) -> None: @@ -145,10 +145,10 @@ def get_patch_type_str(self) -> str: @dataclass class GradlePropeties(OptionDef[dict[str, str | None]]): - """This option represents an option used to define properties values of a Gradle CLI command. + """This option represents an option used to define property values of a Gradle CLI command. This option can be defined multiple times and the values are appended into a list of string in argparse. - However, it's stored internally as a dictionary mapping between the system property name to its value. + However, it's stored internally as a dictionary mapping between the system property name and its value. In Gradle there are 2 options of this type: - -D/--system-prop @@ -158,7 +158,7 @@ class GradlePropeties(OptionDef[dict[str, str | None]]): short_name: str def is_valid_patch_option(self, patch: Any) -> TypeGuard[dict[str, str | None]]: - """Return True if the provide patch value is compatible with the internal type of this option.""" + """Return True if the provided patch value is compatible with the internal type of this option.""" return is_dict_of_str_to_str_or_none(patch) def add_itself_to_arg_parser(self, arg_parse: argparse.ArgumentParser) -> None: @@ -181,7 +181,7 @@ class GradleTask(OptionDef[list[str]]): """ def is_valid_patch_option(self, patch: Any) -> TypeGuard[list[str]]: - """Return True if the provide patch value is compatible with the internal type of this option.""" + """Return True if the provided patch value is compatible with the internal type of this option.""" return is_list_of_strs(patch) def add_itself_to_arg_parser(self, arg_parse: argparse.ArgumentParser) -> None: @@ -199,16 +199,17 @@ def get_patch_type_str(self) -> str: @dataclass class GradleAppendedList(OptionDef[list[str]]): - """This option represents an option that can be specify multiple times and they all appended to a list. + """This option represents an option that can be specified multiple times. - For example, one can exclude multiple tasks with - gradle --exclude-task taskA --exclude-task taskB + Each instance of the option will be appended to a list. + For example, one can exclude multiple tasks with: + gradle --exclude-task taskA --exclude-task taskB """ short_name: str def is_valid_patch_option(self, patch: Any) -> TypeGuard[list[str]]: - """Return True if the provide patch value is compatible with the internal type of this option.""" + """Return True if the provided patch value is compatible with the internal type of this option.""" return is_list_of_strs(patch) def add_itself_to_arg_parser(self, arg_parse: argparse.ArgumentParser) -> None: @@ -223,7 +224,7 @@ def get_patch_type_str(self) -> str: return "list[str]" -# TODO: some value option only allows you to provide certain values +# TODO: some value options only allow you to provide certain values. # For example: --console allows "plain", "auto", "rich" or "verbose". # They are right now not enforced. We need to think whether we want to enforce them. GRADLE_OPTION_DEF: list[OptionDef] = [ @@ -368,7 +369,7 @@ def get_patch_type_str(self) -> str: short_name="-x", long_name="--exclude-task", ), - # TODO: determine which of these options can be provided multiple times + # TODO: determine which of these options can be provided multiple times. GradleSingleValue( short_name="-b", long_name="--build-file", @@ -583,7 +584,7 @@ def apply_patch( `options_patch` is a mapping with: - - **Key**: the long name of an Gradle CLI option as string. For example: ``--continue``, ``--build-cache``. + - **Key**: the long name of a Gradle CLI option as string. For example: ``--continue``, ``--build-cache``. For patching tasks, use the key ``tasks``. - **Value**: The value to patch for an option referred to by the key. The type of this value @@ -660,7 +661,7 @@ def apply_option_patch( if not self.validate_patch(patch): raise PatchBuildCommandError("The patch is invalid.") - # Copy the Maven CLI Options for patching + # Copy the Maven CLI Options for patching. new_gradle_cli_options = deepcopy(gradle_cli_options) for option_long_name, patch_value in patch.items(): diff --git a/src/macaron/build_spec_generator/cli_command_parser/maven_cli_command.py b/src/macaron/build_spec_generator/cli_command_parser/maven_cli_command.py index 7368e1f52..7df577e6a 100644 --- a/src/macaron/build_spec_generator/cli_command_parser/maven_cli_command.py +++ b/src/macaron/build_spec_generator/cli_command_parser/maven_cli_command.py @@ -60,7 +60,7 @@ def from_parsed_arg( cls, parsed_arg: argparse.Namespace, ) -> "MavenCLIOptions": - """Initialize the instance from the the argparse.Namespace object. + """Initialize the instance from the argparse.Namespace object. Parameters ---------- @@ -142,8 +142,8 @@ def parse_system_properties(props: list[str]) -> dict[str, str]: system_props = {} for ele in props: prop_name, _, prop_val = ele.partition("=") - # Allow the subsequent definition override the previous one. - # This follows the way Maven is resolving system property. + # Allow subsequent definitions to override previous ones. + # This follows the way Maven resolves system properties. # For example: # mvn help:evaluate -Da=foo -Da=bar -Dexpression=a -q -DforceStdout # => result for `a` is bar diff --git a/src/macaron/build_spec_generator/cli_command_parser/maven_cli_parser.py b/src/macaron/build_spec_generator/cli_command_parser/maven_cli_parser.py index 454f84cb0..fa87d797b 100644 --- a/src/macaron/build_spec_generator/cli_command_parser/maven_cli_parser.py +++ b/src/macaron/build_spec_generator/cli_command_parser/maven_cli_parser.py @@ -33,7 +33,7 @@ class MavenOptionalFlag(OptionDef[bool]): For example: --debug/-X - A short form for the option is rquired. + A short form for the option is required. """ short_name: str @@ -43,7 +43,7 @@ class MavenOptionalFlag(OptionDef[bool]): dest: str | None = field(default=None) def is_valid_patch_option(self, patch: Any) -> TypeGuard[bool]: - """Return True if the provide patch value is compatible with the internal type of this option.""" + """Return True if the provided patch value is compatible with the internal type of this option.""" return isinstance(patch, bool) def add_itself_to_arg_parser(self, arg_parse: argparse.ArgumentParser) -> None: @@ -77,7 +77,7 @@ class MavenSingleValue(OptionDef[str]): short_name: str def is_valid_patch_option(self, patch: Any) -> TypeGuard[str]: - """Return True if the provide patch value is compatible with the internal type of this option.""" + """Return True if the provided patch value is compatible with the internal type of this option.""" return isinstance(patch, str) def add_itself_to_arg_parser(self, arg_parse: argparse.ArgumentParser) -> None: @@ -96,11 +96,11 @@ class MavenCommaDelimList(OptionDef[list[str]]): """This option represents an option that takes a comma delimited value in Maven CLI command. This option can be defined one time only and the value is stored as a string in argparse. - However, it's stored internally as list of strings obtained by spliting its original value in argparse + However, it's stored internally as list of strings obtained by splitting its original value in argparse using comma as the delimiter. For example: "-P profile1,profile2,profile3" - will be store as ["profile1", "profile2", "profile3"] + will be stored as ["profile1", "profile2", "profile3"] A short form for the option is required. """ @@ -108,7 +108,7 @@ class MavenCommaDelimList(OptionDef[list[str]]): short_name: str def is_valid_patch_option(self, patch: Any) -> TypeGuard[list[str]]: - """Return True if the provide patch value is compatible with the internal type of this option.""" + """Return True if the provided patch value is compatible with the internal type of this option.""" return is_list_of_strs(patch) def add_itself_to_arg_parser(self, arg_parse: argparse.ArgumentParser) -> None: @@ -138,7 +138,7 @@ class MavenSystemPropeties(OptionDef[dict[str, str | None]]): short_name: str def is_valid_patch_option(self, patch: Any) -> TypeGuard[dict[str, str | None]]: - """Return True if the provide patch value is compatible with the internal type of this option.""" + """Return True if the provided patch value is compatible with the internal type of this option.""" return is_dict_of_str_to_str_or_none(patch) def add_itself_to_arg_parser(self, arg_parse: argparse.ArgumentParser) -> None: @@ -161,7 +161,7 @@ class MavenGoalPhase(OptionDef[list[str]]): """ def is_valid_patch_option(self, patch: Any) -> TypeGuard[list[str]]: - """Return True if the provide patch value is compatible with the internal type of this option.""" + """Return True if the provided patch value is compatible with the internal type of this option.""" return is_list_of_strs(patch) def add_itself_to_arg_parser(self, arg_parse: argparse.ArgumentParser) -> None: @@ -280,7 +280,7 @@ def get_patch_type_str(self) -> str: long_name="goals", ), # TODO: we need to confirm whether one can provide - # -P or -pl multiple times and the values will be aggregate into a list of string + # -P or -pl multiple times and the values will be aggregate into a list of string. # The current implementation only consider one instance of -P or -pl. # Where to begin: # https://github.com/apache/maven/blob/maven-3.9.x/maven-embedder/src/main/java/org/apache/maven/cli/CLIManager.java @@ -360,7 +360,7 @@ def __init__(self) -> None: exit_on_error=False, ) - # A mapping between the long name to its option definition. + # A mapping between the long name and its option definition. self.option_defs: dict[str, OptionDef] = {} for opt_def in MAVEN_OPTION_DEF: @@ -452,7 +452,7 @@ def parse(self, cmd_list: list[str]) -> "MavenCLICommand": # mvn --help # mvn --version # Note that we don't allow mvn -V or mvn --show-version as this command will - # failed for mvn + # fail for mvn. if not parsed_opts.help_ and not parsed_opts.version: raise CommandLineParseError(f"No goal detected for {' '.join(options)}.") @@ -491,10 +491,9 @@ def apply_patch( `options_patch` is a mapping with: - **Key**: the long name of a Maven CLI option as a string. For example: ``--define``, ``--settings``. - For patching goals or plugin phases, use the key `goals` with value being a list of string. + For patching goals or plugin phases, use the key `goals` with the value being a list of strings. - - **Value**: The value to patch. The type of this value depends on the type of option you want to - patch. + - **Value**: The value to patch. The type of this value depends on the type of option to be patched. The types of patch values: diff --git a/src/macaron/build_spec_generator/cli_command_parser/unparsed_cli_command.py b/src/macaron/build_spec_generator/cli_command_parser/unparsed_cli_command.py index 805c5c418..947fc134c 100644 --- a/src/macaron/build_spec_generator/cli_command_parser/unparsed_cli_command.py +++ b/src/macaron/build_spec_generator/cli_command_parser/unparsed_cli_command.py @@ -8,9 +8,9 @@ @dataclass class UnparsedCLICommand: - """This class represents a CLICommand that we don't support parsing. + """This class represents a CLICommand that we support the parsing of. - Therefore, it only stores the original command as is. + It is stored in its original form. """ original_cmds: list[str] diff --git a/src/macaron/build_spec_generator/jdk_finder.py b/src/macaron/build_spec_generator/jdk_finder.py index d883a94d1..a12f85602 100644 --- a/src/macaron/build_spec_generator/jdk_finder.py +++ b/src/macaron/build_spec_generator/jdk_finder.py @@ -1,7 +1,7 @@ # Copyright (c) 2025 - 2025, Oracle and/or its affiliates. All rights reserved. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. -"""This module includes the functions for obtaining JDK version from a Java artifact.""" +"""This module includes the functions for obtaining the JDK version from a Java artifact.""" import logging import os @@ -151,9 +151,9 @@ def find_jdk_version_from_remote_maven_repo_standalone( asset_name: str, remote_maven_repo_url: str, ) -> str | None: - """Return the jdk version string from an artifact matching a given GAV from a remote maven layout repository. + """Return the JDK version string from an artifact matching a given GAV from a remote maven layout repository. - This function doesn't cache the downloaded artifact, and remove it after the function exits. + This function doesn't cache the downloaded artifact, and removes it after the function exits. We assume that the remote maven layout repository supports downloading a file through a HTTPS URL. Parameters @@ -174,8 +174,8 @@ def find_jdk_version_from_remote_maven_repo_standalone( Returns ------- str | None - The version string extract from the artifact (as is) or None - ff there is an error, or if we couldn't find any jdk version. + The version string extracted from the artifact (as is); or None + if there is an error, or if we couldn't find any jdk version. """ maven_repository_path = construct_maven_repository_path( group_id=group_id, @@ -217,9 +217,9 @@ def find_jdk_version_from_remote_maven_repo_cache( remote_maven_repo_url: str, local_cache_repo: str, ) -> str | None: - """Return the jdk version string from an artifact matching a given GAV from a remote maven layout repository. + """Return the JDK version string from an artifact matching a given GAV from a remote maven layout repository. - This function cache the downloaded artifact in a maven layout https://maven.apache.org/repository/layout.html + This function caches the downloaded artifact in a maven layout https://maven.apache.org/repository/layout.html under ``local_cache_repo``. We assume that the remote maven layout repository supports downloading a file through a HTTPS URL. @@ -242,8 +242,8 @@ def find_jdk_version_from_remote_maven_repo_cache( Returns ------- str | None - The version string extract from the artifact (as is) or None - ff there is an error, or if we couldn't find any jdk version. + The version string extracted from the artifact (as is); or None + if there is an error, or if we couldn't find any jdk version. """ maven_repository_path = construct_maven_repository_path( group_id=group_id, @@ -295,7 +295,7 @@ def find_jdk_version_from_central_maven_repo( version: str, cache_strat: CacheStrategy = CacheStrategy.MAVEN_LAYOUT, ) -> str | None: - """Return the jdk version string from an artifact matching a given GAV from Maven Central repository. + """Return the JDK version string from an artifact matching a given GAV from Maven Central repository. The artifacts will be downloaded from https://repo1.maven.org/maven2/ for JDK version extraction. @@ -315,8 +315,8 @@ def find_jdk_version_from_central_maven_repo( Returns ------- str | None - The version string extract from the artifact (as is) or None - ff there is an error, or if we couldn't find any jdk version. + The version string extract from the artifact (as is); or None + if there is an error, or if we couldn't find any jdk version. """ central_repo_url = "https://repo1.maven.org/maven2/" local_cache_maven_repo = os.path.join( diff --git a/src/macaron/build_spec_generator/jdk_version_normalizer.py b/src/macaron/build_spec_generator/jdk_version_normalizer.py index 7dc9f169c..00fb0f220 100644 --- a/src/macaron/build_spec_generator/jdk_version_normalizer.py +++ b/src/macaron/build_spec_generator/jdk_version_normalizer.py @@ -1,7 +1,7 @@ # Copyright (c) 2025 - 2025, Oracle and/or its affiliates. All rights reserved. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. -"""This module contains the logic to nomarlize a JDK version string to a major version number.""" +"""This module contains the logic to normalize a JDK version string as a major version number.""" SUPPORTED_JAVA_VERSION = [ "5", @@ -30,20 +30,20 @@ def normalize_jdk_version(jdk_version_str: str) -> str | None: """Return the major JDK version number. - We assume that the jdk version string is already valid (e.g not using a JDK - version that is not available in the real world. + We assume that the JDK version string is already valid (e.g Using a JDK + version that is available in the real world). - For 1.x versions, we returns the major version as ``x``. + For 1.x versions, we return the major version as ``x``. Parameters ---------- jdk_version_str: str - The jdk version string. + The JDK version string. Returns ------- str | None - The major jdk version number as string or None if there is an error. + The major JDK version number as a string, or None if there is an error. Examples -------- diff --git a/src/macaron/build_spec_generator/macaron_db_extractor.py b/src/macaron/build_spec_generator/macaron_db_extractor.py index a41a16870..742c5443d 100644 --- a/src/macaron/build_spec_generator/macaron_db_extractor.py +++ b/src/macaron/build_spec_generator/macaron_db_extractor.py @@ -42,14 +42,14 @@ def lookup_multiple( select_statement: Select[tuple[T]], session: Session, ) -> Sequence[T]: - """Perform an SELECT statement and return all scalar results. + """Perform a SELECT statement and returns all scalar results. Parameters ---------- select_statement : Select[tuple[T]] The SQLAlchemy SELECT statement to execute. session : Session - The SQLAlchemy session to the database we are querying from. + The SQLAlchemy session of the database we are querying. Returns ------- @@ -76,14 +76,14 @@ def lookup_one_or_none( select_statement: Select[tuple[T]], session: Session, ) -> T | None: - """Perform an SELECT statement and return at most one scalar result. + """Perform a SELECT statement and returns at most one scalar result. Parameters ---------- select_statement : Select[tuple[T]] The SQLAlchemy SELECT statement to execute session : Session - The SQLAlchemy session to the database we are querying from. + The SQLAlchemy session of the database we are querying. Returns ------- diff --git a/src/macaron/build_spec_generator/reproducible_central/reproducible_central.py b/src/macaron/build_spec_generator/reproducible_central/reproducible_central.py index 5eddd8ad6..3852bab8d 100644 --- a/src/macaron/build_spec_generator/reproducible_central/reproducible_central.py +++ b/src/macaron/build_spec_generator/reproducible_central/reproducible_central.py @@ -1,7 +1,7 @@ # Copyright (c) 2025 - 2025, Oracle and/or its affiliates. All rights reserved. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. -"""This module contains the logic to generate a build spec in Reproducible Central format.""" +"""This module contains the logic to generate a build spec in the Reproducible Central format.""" import logging import pprint @@ -62,9 +62,9 @@ class _MacaronBuildToolName(str, Enum): - """Represent the name of a build tool that Macaron stores in the database. + """Represents the name of a build tool that Macaron stores in the database. - These doesn't cover all build tools that Macaron support, and ONLY include the ones that we + This doesn't cover all build tools that Macaron supports, and ONLY includes the ones that we support generating Reproducible Central Buildspec for. """ @@ -123,7 +123,7 @@ def remove_shell_quote(cmd: list[str]) -> list[str]: def get_rc_build_command(cmds_sequence: list[list[str]]) -> str: - """Return a single command as string to be used in RC buildspec from a sequence of commands. + """Return a single command as a string to be used in RC buildspec from a sequence of commands. The build commands in the sequence will be ``&&`` together, because RC's build spec is a shell script. @@ -187,7 +187,7 @@ def _get_macaron_build_tool_name(build_tool_facts: Sequence[BuildToolFacts]) -> except ValueError: continue - # TODO: What happen if we report multiple build tools in the database. + # TODO: What happen if we report multiple build tools in the database? return macaron_build_tool_name return None @@ -268,14 +268,14 @@ def get_lookup_build_command_info( Parameters ---------- component_id: int - The id of the component we are finding build command for. + The id of the component we are finding the build command for. session: sqlalchemy.orm.Session The SQLAlchemy Session opened for the database to extract build information. Returns ------- GenericBuildCommandInfo | None - The GenericBuildCommandInfo object for the highest confidence build command or None if there was + The GenericBuildCommandInfo object for the highest confidence build command; or None if there was an error, or no build command is found from the database. """ try: @@ -298,7 +298,7 @@ def get_lookup_build_command_info( def get_lookup_build_command_jdk( build_command_info: GenericBuildCommandInfo, ) -> str | None: - """Return the jdk version from a GenericBuildCommandInfo object.""" + """Return the JDK version from a GenericBuildCommandInfo object.""" if build_command_info.language_versions: # There isn't a concrete reason why we select the last element. # We just use this at this point because we haven't looked into diff --git a/src/macaron/errors.py b/src/macaron/errors.py index 91ce63990..d088914de 100644 --- a/src/macaron/errors.py +++ b/src/macaron/errors.py @@ -128,4 +128,4 @@ class QueryMacaronDatabaseError(Exception): class GenerateBuildSpecError(Exception): - """Happens when there is an unexpected error while generate the build spec file.""" + """Happens when there is an unexpected error while generating the build spec file.""" From 5a7923bd91a04977bbb4d6f8fd0001a5eda11fa1 Mon Sep 17 00:00:00 2001 From: behnazh-w Date: Wed, 10 Sep 2025 10:50:19 +1000 Subject: [PATCH 24/27] chore: address PR feedback Signed-off-by: behnazh-w --- .../build_command_patcher.py | 4 +- .../build_spec_generator.py | 83 ++++++++++--------- .../cli_command_parser/__init__.py | 6 +- .../cli_command_parser/gradle_cli_parser.py | 14 ++-- .../cli_command_parser/maven_cli_parser.py | 12 +-- .../reproducible_central.py | 9 ++ 6 files changed, 70 insertions(+), 58 deletions(-) diff --git a/src/macaron/build_spec_generator/build_command_patcher.py b/src/macaron/build_spec_generator/build_command_patcher.py index 023a7674b..63737b399 100644 --- a/src/macaron/build_spec_generator/build_command_patcher.py +++ b/src/macaron/build_spec_generator/build_command_patcher.py @@ -59,7 +59,7 @@ def _patch_commands( cli_command = effective_cli_parser.parse(cmds) except CommandLineParseError as error: logger.error( - "Failed to parse the cli command %s. Error %s.", + "Failed to patch the cli command %s. Error %s.", " ".join(cmds), error, ) @@ -77,7 +77,7 @@ def _patch_commands( ) except PatchBuildCommandError as error: logger.error( - "Failed to patch the mvn command %s. Error %s.", + "Failed to patch the build command %s. Error %s.", " ".join(cmds), error, ) diff --git a/src/macaron/build_spec_generator/build_spec_generator.py b/src/macaron/build_spec_generator/build_spec_generator.py index 688543dc0..b6c9ef382 100644 --- a/src/macaron/build_spec_generator/build_spec_generator.py +++ b/src/macaron/build_spec_generator/build_spec_generator.py @@ -78,9 +78,12 @@ def gen_build_spec_for_purl( purl: PackageURL The package URL to generate build spec for. database_path: str - The path to the Macaron database. + The path to the Macaron SQLite database file. This database will be accessed in read-only mode, + ensuring that no modifications can be made during operations. build_spec_format: BuildSpecFormat The format of the final build spec content. + output_path: str + The path to the output directory. Returns ------- @@ -89,10 +92,10 @@ def gen_build_spec_for_purl( buildspec file cannot be created in the local filesystem, ``os.EX_DATAERR`` if there was an error generating the content for the buildspec file. """ - db_engine = create_engine(f"sqlite+pysqlite:///{database_path}", echo=False) + db_engine = create_engine(f"sqlite+pysqlite:///file:{database_path}?mode=ro&uri=true", echo=False) + build_spec_content = None with Session(db_engine) as session, session.begin(): - build_spec_content = None match build_spec_format: case BuildSpecFormat.REPRODUCIBLE_CENTRAL: build_spec_content = gen_reproducible_central_build_spec( @@ -101,42 +104,42 @@ def gen_build_spec_for_purl( patches=CLI_COMMAND_PATCHES, ) - if not build_spec_content: - logger.error("Error while generating reproducible central build spec.") - return os.EX_DATAERR - - logger.debug("Build spec content: \n%s", build_spec_content) - - build_spec_filepath = os.path.join( - output_path, - "buildspec", - get_purl_based_dir( - purl_name=purl.name, - purl_namespace=purl.namespace, - purl_type=purl.type, - ), - "macaron.buildspec", + if not build_spec_content: + logger.error("Error while generating the build spec.") + return os.EX_DATAERR + + logger.debug("Build spec content: \n%s", build_spec_content) + + build_spec_filepath = os.path.join( + output_path, + "buildspec", + get_purl_based_dir( + purl_name=purl.name, + purl_namespace=purl.namespace, + purl_type=purl.type, + ), + "macaron.buildspec", + ) + + os.makedirs( + name=os.path.dirname(build_spec_filepath), + exist_ok=True, + ) + + logger.info( + "Generating the %s format build spec to %s.", + build_spec_format.value, + os.path.relpath(build_spec_filepath, os.getcwd()), + ) + try: + with open(build_spec_filepath, mode="w", encoding="utf-8") as file: + file.write(build_spec_content) + except OSError as error: + logger.error( + "Could not create the build spec at %s. Error: %s", + os.path.relpath(build_spec_filepath, os.getcwd()), + error, ) + return os.EX_OSERR - os.makedirs( - name=os.path.dirname(build_spec_filepath), - exist_ok=True, - ) - - try: - with open(build_spec_filepath, mode="w", encoding="utf-8") as file: - logger.info( - "Generating the %s format build spec to %s.", - build_spec_format.value, - os.path.relpath(build_spec_filepath, os.getcwd()), - ) - file.write(build_spec_content) - except OSError as error: - logger.error( - "Could not create the build spec at %s. Error: %s", - os.path.relpath(build_spec_filepath, os.getcwd()), - error, - ) - return os.EX_OSERR - - return os.EX_OK + return os.EX_OK diff --git a/src/macaron/build_spec_generator/cli_command_parser/__init__.py b/src/macaron/build_spec_generator/cli_command_parser/__init__.py index 05a0e913e..c44cf1981 100644 --- a/src/macaron/build_spec_generator/cli_command_parser/__init__.py +++ b/src/macaron/build_spec_generator/cli_command_parser/__init__.py @@ -67,10 +67,10 @@ def patch_mapping( @dataclass class OptionDef(Generic[P]): - """This class represent a definition of a CLI option for argparse.ArgumentParser. + """This class represents a definition of a CLI option for argparse.ArgumentParser. This class also contains the information for validating a patch value. - The generic type T is the patch expected type (if it's not None). + The generic type P is the patch expected type (if it's not None). """ # e.g. `--long-option-name` @@ -83,7 +83,7 @@ def is_valid_patch_option(self, patch: Any) -> TypeGuard[P]: raise NotImplementedError() @abstractmethod - def add_itself_to_arg_parser(self, arg_parse: argparse.ArgumentParser) -> None: + def add_to_arg_parser(self, arg_parse: argparse.ArgumentParser) -> None: """Add a new argument to argparser.ArgumentParser representing this option.""" raise NotImplementedError() diff --git a/src/macaron/build_spec_generator/cli_command_parser/gradle_cli_parser.py b/src/macaron/build_spec_generator/cli_command_parser/gradle_cli_parser.py index 2484f8325..f66436076 100644 --- a/src/macaron/build_spec_generator/cli_command_parser/gradle_cli_parser.py +++ b/src/macaron/build_spec_generator/cli_command_parser/gradle_cli_parser.py @@ -50,7 +50,7 @@ def is_valid_patch_option(self, patch: Any) -> TypeGuard[bool]: """Return True if the provided patch value is compatible with the internal type of this option.""" return isinstance(patch, bool) - def add_itself_to_arg_parser(self, arg_parse: argparse.ArgumentParser) -> None: + def add_to_arg_parser(self, arg_parse: argparse.ArgumentParser) -> None: """Add a new argument to argparser.ArgumentParser representing this option.""" kwargs: dict[str, Any] = {} @@ -90,7 +90,7 @@ def get_negated_long_name(long_name: str) -> str: """Return the negated version of a long option name.""" return f"--no-{long_name.removeprefix('--')}" - def add_itself_to_arg_parser(self, arg_parse: argparse.ArgumentParser) -> None: + def add_to_arg_parser(self, arg_parse: argparse.ArgumentParser) -> None: """Add a new argument to argparser.ArgumentParser representing this option.""" # We allow providing both the normal and negated form. negated_long_name = self.get_negated_long_name(self.long_name) @@ -127,7 +127,7 @@ def is_valid_patch_option(self, patch: Any) -> TypeGuard[str]: """Return True if the provided patch value is compatible with the internal type of this option.""" return isinstance(patch, str) - def add_itself_to_arg_parser(self, arg_parse: argparse.ArgumentParser) -> None: + def add_to_arg_parser(self, arg_parse: argparse.ArgumentParser) -> None: """Add a new argument to argparser.ArgumentParser representing this option.""" if self.short_name: arg_parse.add_argument( @@ -161,7 +161,7 @@ def is_valid_patch_option(self, patch: Any) -> TypeGuard[dict[str, str | None]]: """Return True if the provided patch value is compatible with the internal type of this option.""" return is_dict_of_str_to_str_or_none(patch) - def add_itself_to_arg_parser(self, arg_parse: argparse.ArgumentParser) -> None: + def add_to_arg_parser(self, arg_parse: argparse.ArgumentParser) -> None: """Add a new argument to argparser.ArgumentParser representing this option.""" arg_parse.add_argument( *(self.short_name, self.long_name), @@ -184,7 +184,7 @@ def is_valid_patch_option(self, patch: Any) -> TypeGuard[list[str]]: """Return True if the provided patch value is compatible with the internal type of this option.""" return is_list_of_strs(patch) - def add_itself_to_arg_parser(self, arg_parse: argparse.ArgumentParser) -> None: + def add_to_arg_parser(self, arg_parse: argparse.ArgumentParser) -> None: """Add a new argument to argparser.ArgumentParser representing this option.""" # Doesn't require to allow cases like "gradle --help". arg_parse.add_argument( @@ -212,7 +212,7 @@ def is_valid_patch_option(self, patch: Any) -> TypeGuard[list[str]]: """Return True if the provided patch value is compatible with the internal type of this option.""" return is_list_of_strs(patch) - def add_itself_to_arg_parser(self, arg_parse: argparse.ArgumentParser) -> None: + def add_to_arg_parser(self, arg_parse: argparse.ArgumentParser) -> None: """Add a new argument to argparser.ArgumentParser representing this option.""" arg_parse.add_argument( *(self.short_name, self.long_name), @@ -464,7 +464,7 @@ def __init__(self) -> None: self.option_defs: dict[str, OptionDef] = {} for opt_def in GRADLE_OPTION_DEF: - opt_def.add_itself_to_arg_parser(self.arg_parser) + opt_def.add_to_arg_parser(self.arg_parser) self.option_defs[opt_def.long_name] = opt_def diff --git a/src/macaron/build_spec_generator/cli_command_parser/maven_cli_parser.py b/src/macaron/build_spec_generator/cli_command_parser/maven_cli_parser.py index fa87d797b..9b56e5adc 100644 --- a/src/macaron/build_spec_generator/cli_command_parser/maven_cli_parser.py +++ b/src/macaron/build_spec_generator/cli_command_parser/maven_cli_parser.py @@ -46,7 +46,7 @@ def is_valid_patch_option(self, patch: Any) -> TypeGuard[bool]: """Return True if the provided patch value is compatible with the internal type of this option.""" return isinstance(patch, bool) - def add_itself_to_arg_parser(self, arg_parse: argparse.ArgumentParser) -> None: + def add_to_arg_parser(self, arg_parse: argparse.ArgumentParser) -> None: """Add a new argument to argparser.ArgumentParser representing this option.""" if self.dest: arg_parse.add_argument( @@ -80,7 +80,7 @@ def is_valid_patch_option(self, patch: Any) -> TypeGuard[str]: """Return True if the provided patch value is compatible with the internal type of this option.""" return isinstance(patch, str) - def add_itself_to_arg_parser(self, arg_parse: argparse.ArgumentParser) -> None: + def add_to_arg_parser(self, arg_parse: argparse.ArgumentParser) -> None: """Add a new argument to argparser.ArgumentParser representing this option.""" arg_parse.add_argument( *(self.short_name, self.long_name), @@ -111,7 +111,7 @@ def is_valid_patch_option(self, patch: Any) -> TypeGuard[list[str]]: """Return True if the provided patch value is compatible with the internal type of this option.""" return is_list_of_strs(patch) - def add_itself_to_arg_parser(self, arg_parse: argparse.ArgumentParser) -> None: + def add_to_arg_parser(self, arg_parse: argparse.ArgumentParser) -> None: """Add a new argument to argparser.ArgumentParser representing this option.""" arg_parse.add_argument( *(self.short_name, self.long_name), @@ -141,7 +141,7 @@ def is_valid_patch_option(self, patch: Any) -> TypeGuard[dict[str, str | None]]: """Return True if the provided patch value is compatible with the internal type of this option.""" return is_dict_of_str_to_str_or_none(patch) - def add_itself_to_arg_parser(self, arg_parse: argparse.ArgumentParser) -> None: + def add_to_arg_parser(self, arg_parse: argparse.ArgumentParser) -> None: """Add a new argument to argparser.ArgumentParser representing this option.""" arg_parse.add_argument( *(self.short_name, self.long_name), @@ -164,7 +164,7 @@ def is_valid_patch_option(self, patch: Any) -> TypeGuard[list[str]]: """Return True if the provided patch value is compatible with the internal type of this option.""" return is_list_of_strs(patch) - def add_itself_to_arg_parser(self, arg_parse: argparse.ArgumentParser) -> None: + def add_to_arg_parser(self, arg_parse: argparse.ArgumentParser) -> None: """Add a new argument to argparser.ArgumentParser representing this option.""" # Doesn't require to allow cases like "mvn --help". arg_parse.add_argument( @@ -364,7 +364,7 @@ def __init__(self) -> None: self.option_defs: dict[str, OptionDef] = {} for opt_def in MAVEN_OPTION_DEF: - opt_def.add_itself_to_arg_parser(self.arg_parser) + opt_def.add_to_arg_parser(self.arg_parser) self.option_defs[opt_def.long_name] = opt_def diff --git a/src/macaron/build_spec_generator/reproducible_central/reproducible_central.py b/src/macaron/build_spec_generator/reproducible_central/reproducible_central.py index 3852bab8d..790c1c535 100644 --- a/src/macaron/build_spec_generator/reproducible_central/reproducible_central.py +++ b/src/macaron/build_spec_generator/reproducible_central/reproducible_central.py @@ -339,6 +339,15 @@ def gen_reproducible_central_build_spec( for this PURL, 3. Failed to patch the build commands using the provided ``patches``, 4. The database from ``session`` doesn't contain enough information. """ + if purl.type != "maven": + logger.error( + "Reproducible Central build specification only supports PURLs of type 'maven'. " + "Received PURL type: '%s' (%s). Please provide a valid Maven package URL.", + purl.type, + purl.to_string(), + ) + return None + logger.debug( "Generating build spec for %s with command patches:\n%s", purl, From 71b9f5a9a9929c60bcf729b75a84f4c37c7425a8 Mon Sep 17 00:00:00 2001 From: behnazh-w Date: Fri, 12 Sep 2025 16:32:14 +1000 Subject: [PATCH 25/27] chore: address PR feedback Signed-off-by: behnazh-w --- .../build_command_patcher.py | 2 +- .../cli_command_parser/__init__.py | 11 +++- .../cli_command_parser/gradle_cli_command.py | 5 +- .../cli_command_parser/gradle_cli_parser.py | 65 +++++++++++++------ .../cli_command_parser/maven_cli_command.py | 5 +- .../cli_command_parser/maven_cli_parser.py | 52 ++++++++++----- .../build_spec_generator/jdk_finder.py | 2 - .../macaron_db_extractor.py | 36 +++++----- .../reproducible_central.py | 36 ++++------ .../test_maven_cli_parser.py | 14 ++-- 10 files changed, 132 insertions(+), 96 deletions(-) diff --git a/src/macaron/build_spec_generator/build_command_patcher.py b/src/macaron/build_spec_generator/build_command_patcher.py index 63737b399..489cf8fb9 100644 --- a/src/macaron/build_spec_generator/build_command_patcher.py +++ b/src/macaron/build_spec_generator/build_command_patcher.py @@ -73,7 +73,7 @@ def _patch_commands( try: new_cli_command = effective_cli_parser.apply_patch( cli_command=cli_command, - options_patch=patch, + patch_options=patch, ) except PatchBuildCommandError as error: logger.error( diff --git a/src/macaron/build_spec_generator/cli_command_parser/__init__.py b/src/macaron/build_spec_generator/cli_command_parser/__init__.py index c44cf1981..7ce7d8127 100644 --- a/src/macaron/build_spec_generator/cli_command_parser/__init__.py +++ b/src/macaron/build_spec_generator/cli_command_parser/__init__.py @@ -114,7 +114,16 @@ def to_cmds(self) -> list[str]: """Return the CLI Command as a list of strings.""" +# T is a generic type variable restricted to subclasses of CLICommand. +# It ensures that only derived types of CLICommand can be used with +# generic classes or functions parameterized by T. T = TypeVar("T", bound="CLICommand") + +# Y_contra is a contravariant type variable intended for CLI argument +# patch values. Using contravariance allows generic classes or functions +# to accept supertypes of the specified type parameter, making it easier +# to support broader value types when implementing patching for different +# build tools. Y_contra = TypeVar("Y_contra", contravariant=True) @@ -160,6 +169,6 @@ def is_build_tool(self, executable_path: str) -> bool: def apply_patch( self, cli_command: T, - options_patch: Mapping[str, Y_contra | None], + patch_options: Mapping[str, Y_contra | None], ) -> T: """Return a new CLICommand object with its option patched, while persisting the executable path.""" diff --git a/src/macaron/build_spec_generator/cli_command_parser/gradle_cli_command.py b/src/macaron/build_spec_generator/cli_command_parser/gradle_cli_command.py index 5c3e333cd..342811909 100644 --- a/src/macaron/build_spec_generator/cli_command_parser/gradle_cli_command.py +++ b/src/macaron/build_spec_generator/cli_command_parser/gradle_cli_command.py @@ -382,7 +382,4 @@ class GradleCLICommand: def to_cmds(self) -> list[str]: """Return the CLI Command as a list of strings.""" - result = [] - result.append(self.executable) - result.extend(self.options.to_option_cmds()) - return result + return [self.executable] + self.options.to_option_cmds() diff --git a/src/macaron/build_spec_generator/cli_command_parser/gradle_cli_parser.py b/src/macaron/build_spec_generator/cli_command_parser/gradle_cli_parser.py index f66436076..ee33ba963 100644 --- a/src/macaron/build_spec_generator/cli_command_parser/gradle_cli_parser.py +++ b/src/macaron/build_spec_generator/cli_command_parser/gradle_cli_parser.py @@ -52,9 +52,7 @@ def is_valid_patch_option(self, patch: Any) -> TypeGuard[bool]: def add_to_arg_parser(self, arg_parse: argparse.ArgumentParser) -> None: """Add a new argument to argparser.ArgumentParser representing this option.""" - kwargs: dict[str, Any] = {} - - kwargs["action"] = "store_true" + kwargs: dict[str, Any] = {"action": "store_true"} if self.dest: kwargs["dest"] = self.dest @@ -75,7 +73,7 @@ def get_patch_type_str(self) -> str: @dataclass -class GradleOptionalNegateableFlag(OptionDef[bool]): +class GradleOptionalNegatableFlag(OptionDef[bool]): """This option represents an optional negatable flag in Gradle CLI command. For example: --build-cache/--no-build-cache @@ -144,7 +142,7 @@ def get_patch_type_str(self) -> str: @dataclass -class GradlePropeties(OptionDef[dict[str, str | None]]): +class GradleProperties(OptionDef[dict[str, str | None]]): """This option represents an option used to define property values of a Gradle CLI command. This option can be defined multiple times and the values are appended into a list of string in argparse. @@ -175,7 +173,7 @@ def get_patch_type_str(self) -> str: @dataclass class GradleTask(OptionDef[list[str]]): - """This option represents the positional task option in Maven CLI command. + """This option represents the positional task option in Gradle CLI command. argparse.Namespace stores this as a list of string. This is stored internally as a list of string. """ @@ -314,25 +312,25 @@ def get_patch_type_str(self) -> str: short_names=None, long_name="--write-locks", ), - GradleOptionalNegateableFlag( + GradleOptionalNegatableFlag( long_name="--build-cache", ), - GradleOptionalNegateableFlag( + GradleOptionalNegatableFlag( long_name="--configuration-cache", ), - GradleOptionalNegateableFlag( + GradleOptionalNegatableFlag( long_name="--configure-on-demand", ), - GradleOptionalNegateableFlag( + GradleOptionalNegatableFlag( long_name="--daemon", ), - GradleOptionalNegateableFlag( + GradleOptionalNegatableFlag( long_name="--parallel", ), - GradleOptionalNegateableFlag( + GradleOptionalNegatableFlag( long_name="--scan", ), - GradleOptionalNegateableFlag( + GradleOptionalNegatableFlag( long_name="--watch-fs", ), # This has been validated by setting up a minimal gradle project. Gradle version 8.14.2 @@ -430,11 +428,11 @@ def get_patch_type_str(self) -> str: short_name=None, long_name="--warning-mode", ), - GradlePropeties( + GradleProperties( short_name="-D", long_name="--system-prop", ), - GradlePropeties( + GradleProperties( short_name="-P", long_name="--project-prop", ), @@ -447,7 +445,7 @@ def get_patch_type_str(self) -> str: class GradleCLICommandParser: """A Gradle CLI Command Parser.""" - ACCEPTABLE_EXECUTABLE = ["gradle", "gradlew"] + ACCEPTABLE_EXECUTABLE = {"gradle", "gradlew"} def __init__(self) -> None: """Initialize the instance.""" @@ -561,8 +559,35 @@ def _patch_properties_mapping( option_long_name: str, patch_value: GradleOptionPatchValueType, ) -> dict[str, str]: + """ + Apply a patch value to an existing properties dictionary for a specified Gradle option. + + This function locates the metadata definition for the given option by its long name, + ensures it is a properties-type option, validates the patch value type, and then + applies the patch using `patch_mapping`. Throws a `PatchBuildCommandError` if the + option is not valid or the patch value's type is incorrect. + + Parameters + ---------- + original_props: dict[str, str] + The original mapping of property names to values. + option_long_name: str + The long name of the Gradle option to patch. + patch_value: GradleOptionPatchValueType + The patch to apply to the properties mapping. + + Returns + ------- + dict[str, str] + The updated properties mapping after applying the patch. + + Raises + ------ + PatchBuildCommandError + If the option is not a valid properties-type option or the patch value does not have a valid type. + """ prop_opt_def = self.option_defs.get(option_long_name) - if not prop_opt_def or not isinstance(prop_opt_def, GradlePropeties): + if not prop_opt_def or not isinstance(prop_opt_def, GradleProperties): raise PatchBuildCommandError(f"{option_long_name} from the patch is not a property type option.") if not prop_opt_def.is_valid_patch_option(patch_value): @@ -578,11 +603,11 @@ def _patch_properties_mapping( def apply_patch( self, cli_command: GradleCLICommand, - options_patch: Mapping[str, GradleOptionPatchValueType | None], + patch_options: Mapping[str, GradleOptionPatchValueType | None], ) -> GradleCLICommand: """Patch the options of a Gradle CLI command, while persisting the executable path. - `options_patch` is a mapping with: + `patch_options` is a mapping with: - **Key**: the long name of a Gradle CLI option as string. For example: ``--continue``, ``--build-cache``. For patching tasks, use the key ``tasks``. @@ -630,7 +655,7 @@ def apply_patch( executable=cli_command.executable, options=self.apply_option_patch( cli_command.options, - patch=options_patch, + patch=patch_options, ), ) diff --git a/src/macaron/build_spec_generator/cli_command_parser/maven_cli_command.py b/src/macaron/build_spec_generator/cli_command_parser/maven_cli_command.py index 7df577e6a..c6eaed108 100644 --- a/src/macaron/build_spec_generator/cli_command_parser/maven_cli_command.py +++ b/src/macaron/build_spec_generator/cli_command_parser/maven_cli_command.py @@ -318,7 +318,4 @@ class MavenCLICommand: def to_cmds(self) -> list[str]: """Return the CLI Command as a list of strings.""" - result = [] - result.append(self.executable) - result.extend(self.options.to_option_cmds()) - return result + return [self.executable] + self.options.to_option_cmds() diff --git a/src/macaron/build_spec_generator/cli_command_parser/maven_cli_parser.py b/src/macaron/build_spec_generator/cli_command_parser/maven_cli_parser.py index 9b56e5adc..bace7eab8 100644 --- a/src/macaron/build_spec_generator/cli_command_parser/maven_cli_parser.py +++ b/src/macaron/build_spec_generator/cli_command_parser/maven_cli_parser.py @@ -123,7 +123,7 @@ def get_patch_type_str(self) -> str: @dataclass -class MavenSystemPropeties(OptionDef[dict[str, str | None]]): +class MavenSystemProperties(OptionDef[dict[str, str | None]]): """This option represents the -D/--define option of a Maven CLI command. This option can be defined multiple times and the values are appended into a list of string in argparse. @@ -289,7 +289,7 @@ def get_patch_type_str(self) -> str: short_name="-b", long_name="--builder", ), - MavenSystemPropeties( + MavenSystemProperties( short_name="-D", long_name="--define", ), @@ -347,7 +347,7 @@ def get_patch_type_str(self) -> str: class MavenCLICommandParser: """A Maven CLI Command Parser.""" - ACCEPTABLE_EXECUTABLE = ["mvn", "mvnw"] + ACCEPTABLE_EXECUTABLE = {"mvn", "mvnw"} def __init__(self) -> None: """Initialize the instance.""" @@ -446,16 +446,6 @@ def parse(self, cmd_list: list[str]) -> "MavenCLICommand": except SystemExit as sys_exit_err: raise CommandLineParseError(f"Failed to parse the Maven CLI Options {' '.join(options)}.") from sys_exit_err - # Handle cases where goal or plugin phase is not provided. - if not parsed_opts.goals: - # Allow cases such as: - # mvn --help - # mvn --version - # Note that we don't allow mvn -V or mvn --show-version as this command will - # fail for mvn. - if not parsed_opts.help_ and not parsed_opts.version: - raise CommandLineParseError(f"No goal detected for {' '.join(options)}.") - maven_cli_options = MavenCLIOptions.from_parsed_arg(parsed_opts) return MavenCLICommand( @@ -469,8 +459,36 @@ def _patch_properties_mapping( option_long_name: str, patch_value: MavenOptionPatchValueType, ) -> dict[str, str]: + """ + Apply a patch to the Maven system properties mapping for a specific option. + + Retrieves the system property option definition for the specified long name, + validates its type and the patch value, and applies the patch update to the + original properties dictionary. Raises an error if the option or patch type + is invalid for Maven `--define` options. + + Parameters + ---------- + original_props : dict[str, str] + The original dictionary of Maven system property names and their values. + option_long_name : str + The long name of the Maven option to patch (usually '--define'). + patch_value : MavenOptionPatchValueType + The value to patch into the original properties dictionary. + + Returns + ------- + dict[str, str] + The updated mapping with the patch applied. + + Raises + ------ + PatchBuildCommandError + If the option is not a Maven system property option or if the patch value + has an invalid type. + """ define_opt_def = self.option_defs.get(option_long_name) - if not define_opt_def or not isinstance(define_opt_def, MavenSystemPropeties): + if not define_opt_def or not isinstance(define_opt_def, MavenSystemProperties): raise PatchBuildCommandError(f"{option_long_name} from the patch is not a --define option.") if not define_opt_def.is_valid_patch_option(patch_value): @@ -484,11 +502,11 @@ def _patch_properties_mapping( def apply_patch( self, cli_command: MavenCLICommand, - options_patch: Mapping[str, MavenOptionPatchValueType | None], + patch_options: Mapping[str, MavenOptionPatchValueType | None], ) -> MavenCLICommand: """Patch the options of a Gradle CLI command, while persisting the executable path. - `options_patch` is a mapping with: + `patch_options` is a mapping with: - **Key**: the long name of a Maven CLI option as a string. For example: ``--define``, ``--settings``. For patching goals or plugin phases, use the key `goals` with the value being a list of strings. @@ -529,7 +547,7 @@ def apply_patch( executable=cli_command.executable, options=self.apply_option_patch( cli_command.options, - patch=options_patch, + patch=patch_options, ), ) diff --git a/src/macaron/build_spec_generator/jdk_finder.py b/src/macaron/build_spec_generator/jdk_finder.py index a12f85602..538a57b28 100644 --- a/src/macaron/build_spec_generator/jdk_finder.py +++ b/src/macaron/build_spec_generator/jdk_finder.py @@ -166,8 +166,6 @@ def find_jdk_version_from_remote_maven_repo_standalone( The version part of the GAV coordinate. asset_name: str The name of artifact to download and extract the jdk version. - ext: JavaArtifactExt - The extension of the main artifact file. remote_maven_repo_url: str The URL to the remote maven layout repository. diff --git a/src/macaron/build_spec_generator/macaron_db_extractor.py b/src/macaron/build_spec_generator/macaron_db_extractor.py index 742c5443d..092a0eed8 100644 --- a/src/macaron/build_spec_generator/macaron_db_extractor.py +++ b/src/macaron/build_spec_generator/macaron_db_extractor.py @@ -116,7 +116,7 @@ def lookup_one_or_none( return result -def compile_sqlite_select_statement(select_statment: Select) -> str: +def compile_sqlite_select_statement(select_statement: Select) -> str: """Return the equivalent SQLite SELECT statement from an SQLAlchemy SELECT statement. This function also introduces additional cosmetic details so that it can be easily @@ -132,7 +132,7 @@ def compile_sqlite_select_statement(select_statment: Select) -> str: str The equivalent SQLite SELECT statement as a string. """ - compiled_sqlite = select_statment.compile( + compiled_sqlite = select_statement.compile( dialect=sqlite.dialect(), # type: ignore compile_kwargs={"literal_binds": True}, ) @@ -171,16 +171,16 @@ def get_sql_stmt_latest_component_for_purl(purl: PackageURL) -> Select[tuple[Com def get_sql_stmt_build_tools(component_id: int) -> Select[tuple[BuildToolFacts]]: - """Return an SQLAlchemy SELECT statement to query the BuildToolFacts for a given PackageURL. + """Return an SQLAlchemy SELECT statement to query the BuildToolFacts for a given component. Parameters ---------- - purl_string : str - The PackageURL string to find the BuildToolFacts. + component_id: int + The unique identifier of the component for which BuildToolFacts are to be queried. Returns ------- - Select[tuple[BuildAsCodeFacts]] + Select[tuple[BuildToolFacts]] The SQLAlchemy SELECT statement. """ # Because BuildToolFacts inherit from CheckFacts, SQLAlchemy had to perform implicit alias @@ -213,12 +213,12 @@ def get_sql_stmt_build_tools(component_id: int) -> Select[tuple[BuildToolFacts]] def get_sql_stmt_build_as_code_check(component_id: int) -> Select[tuple[BuildAsCodeFacts]]: - """Return an SQLAlchemy SELECT statement to query the BuildAsCodeFacts for a given PackageURL. + """Return an SQLAlchemy SELECT statement to query the BuildAsCodeFacts for a given component. Parameters ---------- - purl_string : str - The PackageURL string to find the BuildToolFacts. + component_id : int + The unique identifier of the component for which BuildAsCodeFacts are to be queried. Returns ------- @@ -260,12 +260,12 @@ def get_sql_stmt_build_as_code_check(component_id: int) -> Select[tuple[BuildAsC def get_sql_stmt_build_service_check(component_id: int) -> Select[tuple[BuildServiceFacts]]: - """Return an SQLAlchemy SELECT statement to query the BuildServiceFacts for a given PackageURL. + """Return an SQLAlchemy SELECT statement to query the BuildServiceFacts for a given component. Parameters ---------- - purl_string : str - The PackageURL string to find the BuildServiceFacts. + component_id: int + The unique identifier of the component for which BuildServiceFacts are to be queried. Returns ------- @@ -307,12 +307,12 @@ def get_sql_stmt_build_service_check(component_id: int) -> Select[tuple[BuildSer def get_sql_stmt_build_script_check(component_id: int) -> Select[tuple[BuildScriptFacts]]: - """Return an SQLAlchemy SELECT statement to query the BuildScriptFacts for a given PackageURL. + """Return an SQLAlchemy SELECT statement to query the BuildScriptFacts for a given component. Parameters ---------- - purl_string : str - The PackageURL string to find the BuildScriptFacts. + component_id: int + The unique identifier of the component for which BuildServiceFacts are to be queried. Returns ------- @@ -515,15 +515,15 @@ def lookup_build_script_check(component_id: int, session: Session) -> Sequence[B QueryMacaronDatabaseError If there is an unexpected error when executing the SQLAlchemy query. """ - build_script_select_statment = get_sql_stmt_build_script_check(component_id) + build_script_select_statement = get_sql_stmt_build_script_check(component_id) logger.debug( "Build Script Check Fact for component %d \n %s", component_id, - compile_sqlite_select_statement(build_script_select_statment), + compile_sqlite_select_statement(build_script_select_statement), ) build_script_check_facts = lookup_multiple( - select_statement=build_script_select_statment, + select_statement=build_script_select_statement, session=session, ) diff --git a/src/macaron/build_spec_generator/reproducible_central/reproducible_central.py b/src/macaron/build_spec_generator/reproducible_central/reproducible_central.py index 790c1c535..326eea794 100644 --- a/src/macaron/build_spec_generator/reproducible_central/reproducible_central.py +++ b/src/macaron/build_spec_generator/reproducible_central/reproducible_central.py @@ -83,12 +83,12 @@ class ReproducibleCentralBuildTool(str, Enum): SBT = "sbt" -def format_build_command_infos(build_command_infos: list[GenericBuildCommandInfo]) -> str: +def format_build_command_info(build_command_info: list[GenericBuildCommandInfo]) -> str: """Return the prettified str format for a list of `GenericBuildCommandInfo` instances. Parameters ---------- - build_command_infos: GenericBuildCommandInfo + build_command_info: GenericBuildCommandInfo A list of ``GenericBuildCommandInfo`` instances. Returns @@ -96,7 +96,7 @@ def format_build_command_infos(build_command_infos: list[GenericBuildCommandInfo str The prettified output. """ - pretty_formatted_ouput = [pprint.pformat(build_command_info) for build_command_info in build_command_infos] + pretty_formatted_ouput = [pprint.pformat(build_command_info) for build_command_info in build_command_info] return "\n".join(pretty_formatted_ouput) @@ -279,7 +279,7 @@ def get_lookup_build_command_info( an error, or no build command is found from the database. """ try: - lookup_build_command_infos = lookup_any_build_command(component_id, session) + lookup_build_command_info = lookup_any_build_command(component_id, session) except QueryMacaronDatabaseError as lookup_build_command_error: logger.error( "Unexpected result from querying all build command information for component id %s. Error: %s", @@ -289,10 +289,10 @@ def get_lookup_build_command_info( return None logger.debug( "Build command information discovered\n%s", - format_build_command_infos(lookup_build_command_infos), + format_build_command_info(lookup_build_command_info), ) - return lookup_build_command_infos[0] if lookup_build_command_infos else None + return lookup_build_command_info[0] if lookup_build_command_info else None def get_lookup_build_command_jdk( @@ -339,15 +339,6 @@ def gen_reproducible_central_build_spec( for this PURL, 3. Failed to patch the build commands using the provided ``patches``, 4. The database from ``session`` doesn't contain enough information. """ - if purl.type != "maven": - logger.error( - "Reproducible Central build specification only supports PURLs of type 'maven'. " - "Received PURL type: '%s' (%s). Please provide a valid Maven package URL.", - purl.type, - purl.to_string(), - ) - return None - logger.debug( "Generating build spec for %s with command patches:\n%s", purl, @@ -424,17 +415,14 @@ def gen_reproducible_central_build_spec( "Attempted to find build command from the database. Result: %s", lookup_build_command_info or "Cannot find any.", ) - lookup_build_command_jdk = ( - get_lookup_build_command_jdk( - lookup_build_command_info, - ) - if lookup_build_command_info - else None + + # Select JDK from jar or another source, with a default of version 8. + selected_jdk_version = ( + jdk_from_jar + or (get_lookup_build_command_jdk(lookup_build_command_info) if lookup_build_command_info else None) + or "8" ) - # Select jdk from jar from different source. - # The default JDK version is 8. - selected_jdk_version = jdk_from_jar or lookup_build_command_jdk or "8" major_jdk_version = normalize_jdk_version(selected_jdk_version) if not major_jdk_version: logger.error("Failed to obtain the major version of %s", selected_jdk_version) diff --git a/tests/build_spec_generator/cli_command_parser/test_maven_cli_parser.py b/tests/build_spec_generator/cli_command_parser/test_maven_cli_parser.py index d219e1af2..6b4611bd8 100644 --- a/tests/build_spec_generator/cli_command_parser/test_maven_cli_parser.py +++ b/tests/build_spec_generator/cli_command_parser/test_maven_cli_parser.py @@ -112,6 +112,15 @@ }, id="allow_no_goal_for_version_and_help", ), + pytest.param( + "mvn", + { + "goals": [], + "help_": False, + "version": False, + }, + id="No goal or phase", + ), ], ) def test_maven_cli_command_parser_valid_input( @@ -169,7 +178,6 @@ def test_maven_cli_command_parser_executable( ("build_command"), [ pytest.param("", id="An empty command"), - pytest.param("mvn", id="No goal or phase"), pytest.param( "mvn --this-argument-should-never-exist-in-mvn", id="unrecognized_optional_argument", @@ -182,10 +190,6 @@ def test_maven_cli_command_parser_executable( "mmmvvvnnn clean package", id="unrecognized_executable_path", ), - pytest.param( - "mvn --show-version", - id="show_version_with_no_goal", - ), ], ) def test_maven_cli_command_parser_invalid_input( From a9be2e376787ce64d2f7c8351347f262761a624f Mon Sep 17 00:00:00 2001 From: behnazh-w Date: Mon, 15 Sep 2025 14:29:21 +1000 Subject: [PATCH 26/27] chore: address PR feedback Signed-off-by: behnazh-w --- .../test_gradle_cli_command.py | 29 ------------------- .../test_maven_cli_command.py | 29 ------------------- .../compare_rc_build_spec.py | 2 +- .../test_build_command_patcher.py | 2 +- .../test_macaron_db_extractor.py | 2 +- tests/vsa/compare_vsa.py | 4 +-- 6 files changed, 5 insertions(+), 63 deletions(-) diff --git a/tests/build_spec_generator/cli_command_parser/test_gradle_cli_command.py b/tests/build_spec_generator/cli_command_parser/test_gradle_cli_command.py index 80cd3a643..6143dc2af 100644 --- a/tests/build_spec_generator/cli_command_parser/test_gradle_cli_command.py +++ b/tests/build_spec_generator/cli_command_parser/test_gradle_cli_command.py @@ -4,8 +4,6 @@ """This module contains tests for the gradle_cli_command module.""" -from typing import Any - import pytest from macaron.build_spec_generator.cli_command_parser.gradle_cli_command import GradleCLIOptions @@ -80,33 +78,6 @@ def test_comparing_gradle_cli_command_unequal( assert not this_command == that_command -@pytest.mark.parametrize( - ("command", "that"), - [ - ( - "gradle clean build -x test --debug --stacktrace -Dorg.gradle.caching=true", - True, - ), - ( - "gradle clean build -x test --debug --stacktrace -Dorg.gradle.caching=true", - ["boo", "foo"], - ), - ( - "gradle clean build -x test --debug --stacktrace -Dorg.gradle.caching=true", - {"boo", "foo"}, - ), - ], -) -def test_comparing_gradle_cli_command_unequal_types( - gradle_cli_parser: GradleCLICommandParser, - command: str, - that: Any, -) -> None: - """Test comparing MavenCLICommand with another incompatible type oject.""" - this_command = gradle_cli_parser.parse(command.split()) - assert not this_command == that - - @pytest.mark.parametrize( ("command"), [ diff --git a/tests/build_spec_generator/cli_command_parser/test_maven_cli_command.py b/tests/build_spec_generator/cli_command_parser/test_maven_cli_command.py index d0e681e57..36e9c286e 100644 --- a/tests/build_spec_generator/cli_command_parser/test_maven_cli_command.py +++ b/tests/build_spec_generator/cli_command_parser/test_maven_cli_command.py @@ -3,8 +3,6 @@ """This module contains tests for the maven_cli_command module.""" -from typing import Any - import pytest from macaron.build_spec_generator.cli_command_parser.maven_cli_command import MavenCLIOptions @@ -65,33 +63,6 @@ def test_comparing_maven_cli_command_unequal( assert not this_command == that_command -@pytest.mark.parametrize( - ("command", "that"), - [ - ( - "mvn clean package -P profile1,profile2 -T 2C -ntp -Dmaven.skip.test=true -Dboo=foo", - True, - ), - ( - "mvn clean package -P profile1,profile2 -T 2C -ntp -Dmaven.skip.test=true -Dboo=foo", - ["boo", "foo"], - ), - ( - "mvn clean package -P profile1,profile2 -T 2C -ntp -Dmaven.skip.test=true -Dboo=foo", - {"boo", "foo"}, - ), - ], -) -def test_comparing_maven_cli_command_unequal_types( - maven_cli_parser: MavenCLICommandParser, - command: str, - that: Any, -) -> None: - """Test comparing MavenCLICommand with another incompatible type oject.""" - this_command = maven_cli_parser.parse(command.split()) - assert not this_command == that - - @pytest.mark.parametrize( ("command"), [ diff --git a/tests/build_spec_generator/reproducible_central/compare_rc_build_spec.py b/tests/build_spec_generator/reproducible_central/compare_rc_build_spec.py index df541d461..d12b34b41 100644 --- a/tests/build_spec_generator/reproducible_central/compare_rc_build_spec.py +++ b/tests/build_spec_generator/reproducible_central/compare_rc_build_spec.py @@ -62,7 +62,7 @@ def compare_rc_build_spec( The result object. expected : dict[str, str] The expected object. - compare_fn_map : str + compare_fn_map : dict[str, CompareFn] A map from field name to corresponding compare function. Returns diff --git a/tests/build_spec_generator/test_build_command_patcher.py b/tests/build_spec_generator/test_build_command_patcher.py index b96de56d4..b83359698 100644 --- a/tests/build_spec_generator/test_build_command_patcher.py +++ b/tests/build_spec_generator/test_build_command_patcher.py @@ -470,7 +470,7 @@ def test_patching_multiple_commands( break if effective_cli_parser: - expected_cli_commands.append(cli_parser.parse(cmd)) + expected_cli_commands.append(effective_cli_parser.parse(cmd)) else: expected_cli_commands.append( UnparsedCLICommand( diff --git a/tests/build_spec_generator/test_macaron_db_extractor.py b/tests/build_spec_generator/test_macaron_db_extractor.py index c2c750083..8d63a4168 100644 --- a/tests/build_spec_generator/test_macaron_db_extractor.py +++ b/tests/build_spec_generator/test_macaron_db_extractor.py @@ -282,7 +282,7 @@ def test_lookup_any_build_command_empty_db(macaron_db_session: Session) -> None: assert not lookup_any_build_command(component_id=1, session=macaron_db_session) -def test_invalid_input_databse(invalid_db_session: Session) -> None: +def test_invalid_input_database(invalid_db_session: Session) -> None: """Test handling invalid input database.""" with pytest.raises(QueryMacaronDatabaseError): lookup_any_build_command( diff --git a/tests/vsa/compare_vsa.py b/tests/vsa/compare_vsa.py index 943caf138..fcd4120f8 100644 --- a/tests/vsa/compare_vsa.py +++ b/tests/vsa/compare_vsa.py @@ -126,7 +126,7 @@ def compare_list( The result array. expected : list The expected array. - compare_fn_map : str + compare_fn_map : dict[str, CompareFn] A map from field name to corresponding compare function. name : str The name of the field whose value is being compared in this function. @@ -169,7 +169,7 @@ def compare_dict( The result object. expected : dict The expected object. - compare_fn_map : str + compare_fn_map : dict[str, CompareFn] A map from field name to corresponding compare function. name : str The name of the field whose value is being compared in this function. From 211c23e806da7624d25456234d50524638298ab6 Mon Sep 17 00:00:00 2001 From: behnazh-w Date: Tue, 16 Sep 2025 11:40:15 +1000 Subject: [PATCH 27/27] docs: fix the docstrings and comments Signed-off-by: behnazh-w --- src/macaron/build_spec_generator/build_spec_generator.py | 2 +- .../cli_command_parser/gradle_cli_parser.py | 2 +- .../build_spec_generator/cli_command_parser/maven_cli_parser.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/macaron/build_spec_generator/build_spec_generator.py b/src/macaron/build_spec_generator/build_spec_generator.py index b6c9ef382..4262f7e6a 100644 --- a/src/macaron/build_spec_generator/build_spec_generator.py +++ b/src/macaron/build_spec_generator/build_spec_generator.py @@ -71,7 +71,7 @@ def gen_build_spec_for_purl( build_spec_format: BuildSpecFormat, output_path: str, ) -> int: - """Return the content of a build spec file from a given PURL. + """Generate the build spec file for the given PURL in the specified output directory. Parameters ---------- diff --git a/src/macaron/build_spec_generator/cli_command_parser/gradle_cli_parser.py b/src/macaron/build_spec_generator/cli_command_parser/gradle_cli_parser.py index ee33ba963..f381c505c 100644 --- a/src/macaron/build_spec_generator/cli_command_parser/gradle_cli_parser.py +++ b/src/macaron/build_spec_generator/cli_command_parser/gradle_cli_parser.py @@ -537,7 +537,7 @@ def parse(self, cmd_list: list[str]) -> GradleCLICommand: parsed_opts = self.arg_parser.parse_args(options) except argparse.ArgumentError as error: raise CommandLineParseError(f"Failed to parse {' '.join(options)}.") from error - # Even though we have set `exit_on_error`, argparse still exists unexpectedly in some + # Even though we have set `exit_on_error`, argparse still exits unexpectedly in some # cases. This has been confirmed to be a bug in the argparse library implementation. # https://github.com/python/cpython/issues/121018. # This is fixed in Python3.12, but not Python3.11 diff --git a/src/macaron/build_spec_generator/cli_command_parser/maven_cli_parser.py b/src/macaron/build_spec_generator/cli_command_parser/maven_cli_parser.py index bace7eab8..c4877cbdd 100644 --- a/src/macaron/build_spec_generator/cli_command_parser/maven_cli_parser.py +++ b/src/macaron/build_spec_generator/cli_command_parser/maven_cli_parser.py @@ -439,7 +439,7 @@ def parse(self, cmd_list: list[str]) -> "MavenCLICommand": parsed_opts = self.arg_parser.parse_args(options) except argparse.ArgumentError as error: raise CommandLineParseError(f"Failed to parse command {' '.join(options)}.") from error - # Even though we have set `exit_on_error`, argparse still exists unexpectedly in some + # Even though we have set `exit_on_error`, argparse still exits unexpectedly in some # cases. This has been confirmed to be a bug in the argparse library implementation. # https://github.com/python/cpython/issues/121018. # This is fixed in Python3.12, but not Python3.11