diff options
author | Matt Jolly <kangie@gentoo.org> | 2024-09-27 10:46:24 +1000 |
---|---|---|
committer | Matt Jolly <kangie@gentoo.org> | 2024-09-27 10:52:11 +1000 |
commit | e0adc1721f392c89c8262c4f864f6b1edf796edc (patch) | |
tree | d1dd7e9a760622d789488440825f50c8fcc23414 | |
parent | flake8: Add config and do some trivial style changes (diff) | |
download | chromium-tools-master.tar.gz chromium-tools-master.tar.bz2 chromium-tools-master.zip |
- Rework the logic to get a better result when remediating
- Also store the version mapping in a dataclass (why not).
- Use packaging.version.Version to make sorting versions trivial
- Accept positional arguments for the max and min versions.
Signed-off-by: Matt Jolly <kangie@gentoo.org>
-rwxr-xr-x | get-opera-version-mapping.py | 118 |
1 files changed, 86 insertions, 32 deletions
diff --git a/get-opera-version-mapping.py b/get-opera-version-mapping.py index 6d6f3de..015fd21 100755 --- a/get-opera-version-mapping.py +++ b/get-opera-version-mapping.py @@ -1,6 +1,32 @@ #!/usr/bin/env python + +# SPDX-License-Identifier: GPL-2.0-or-later +# This script is used to extract Opera and Chromium versions from the Opera changelog (blog) +# This is incomplete data, so we need to fill in the gaps with the Chromium version from the previous known version +# The intent here is to have _some_ sort of datasource to identify a potentially-fixed version of Opera based on +# the Chromium version it includes. +# High level logic: +# We can fetch the opera blog posts that relate to a major version of Opera as long as they don't change their URIs. +# We iterate over H4 elements to get the Opera version (and date, though we throw that away) +# We then iterate over child elements until we find an "Update Chromium" entry, which we can use to get the +# Chromium version (in which case we bail early) Or we exhaust the children and give up. +# Lather, rinse, repeat. + +import argparse, dataclasses + import requests from bs4 import BeautifulSoup +from packaging.version import Version + + +@dataclasses.dataclass +class OperaChromiumVersion: + opera_version: Version + chromium_version: Version + + def __str__(self): + chromium_version_str = 'unknown' if self.chromium_version == Version('0.0.0.0') else str(self.chromium_version) + return f"Opera Version: {self.opera_version}, Chromium Version: {chromium_version_str}" def get_opera_chromium_versions(base_url, start_version, end_version): @@ -15,16 +41,11 @@ def get_opera_chromium_versions(base_url, start_version, end_version): end_version: The ending version to extract information for (inclusive). Returns: - A dictionary mapping Opera version to Chromium version. - If no update is mentioned, the previous Chromium version is used. - For missing data or errors, "unknown" is used. + A list of OperaChromiumVersion objects containing the extracted version information. """ - versions = {} - chromium_version = None + versions: list[OperaChromiumVersion] = [] for version in range(start_version, end_version + 1): - # Fix formatting issue: - # OR url = base_url.format(version) url = base_url.format(version) print(f"Processing version {version}") @@ -38,8 +59,8 @@ def get_opera_chromium_versions(base_url, start_version, end_version): # Iterate through each section starting with an H4 element for section in content.find_all('h4'): + chromium_version = None version_str, date_str = section.text.strip().split(' – ') - versions[version_str] = chromium_version # Process all content elements (including nested ones) until the next H4 next_sibling = section.find_next_sibling( @@ -63,7 +84,12 @@ def get_opera_chromium_versions(base_url, start_version, end_version): # Handle missing Chromium version if not chromium_version: - chromium_version = "unknown" + chromium_version = '0.0.0.0' + + versions.append(OperaChromiumVersion( + Version(version_str), + Version(chromium_version) + )) except requests.exceptions.RequestException as e: if e.args and e.args[0] == 404: @@ -76,41 +102,69 @@ def get_opera_chromium_versions(base_url, start_version, end_version): print(f"Unexpected error: {e}") chromium_version = None # Reset chromium_version for next iteration - return versions + # We're broadly sorted by major version, but within each major version we get newer entries first + # Sort by Opera version to get the correct order + sorted_versions = sorted(versions, key=lambda x: x.opera_version) + return sorted_versions def remediate_unknown_versions(versions): """ - Remediates entries with "unknown" values in the versions dictionary by + Remediates entries with '0.0.0.0' values in the versions dictionary by assuming no change from the previous known version. Args: - versions: A dictionary mapping Opera version to Chromium version. + versions: A list of OperaChromiumVersion objects containing the extracted version information. Returns: - The modified versions dictionary with "unknown" values replaced based on previous entries. + A list of OperaChromiumVersion objects with '0.0.0.0' values replaced + by the previous known version if available. """ - previous_version = None - for version, chromium_version in versions.items(): - if chromium_version == "unknown": - if previous_version is not None: - # Update with previous version - versions[version] = previous_version + previous_version: Version = Version('0.0.0.0') + fixed_versions: list[OperaChromiumVersion] = [] + + for mapping in versions: + if mapping.chromium_version == Version('0.0.0.0') and previous_version is not Version('0.0.0.0'): + # Update with previous version + fixed_versions.append(OperaChromiumVersion(mapping.opera_version, previous_version)) else: - previous_version = chromium_version # Update known version for future references - return versions + # This should be fine, we're always parsing from oldest to newest + if previous_version < mapping.chromium_version: + previous_version = mapping.chromium_version + fixed_versions.append(mapping) + + return fixed_versions + + +def parse_arguments(): + """ + Parses the command line arguments and returns the parsed values. + + Returns: + The parsed command line arguments. + """ + parser = argparse.ArgumentParser(description='Get Opera and Chromium versions.') + parser.add_argument('start_ver', type=int, help='starting version', default=110) + parser.add_argument('end_ver', type=int, help='ending version', default=115) + return parser.parse_args() + + +def main(): + args = parse_arguments() + + # Base URL with version placeholder + base_url = "https://blogs.opera.com/desktop/changelog-for-{}/" + opera_chromium_versions = get_opera_chromium_versions(base_url, args.start_ver, args.end_ver) + fixed_versions = remediate_unknown_versions(opera_chromium_versions) -# Example usage -# Base URL with version placeholder -base_url = "https://blogs.opera.com/desktop/changelog-for-{}/" -opera_chromium_versions = get_opera_chromium_versions(base_url, 110, 115) + # Print the versions + if fixed_versions: + for mapping in fixed_versions: + print(mapping) + else: + print("Failed to extract any versions.") -opera_chromium_versions = remediate_unknown_versions(opera_chromium_versions) -if opera_chromium_versions: - for opera_version, chromium_version in opera_chromium_versions.items(): - print( - f"Opera Version: {opera_version}, Chromium Version: {chromium_version}") -else: - print("Failed to extract any versions.") +if __name__ == "__main__": + main() |