diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 5ba3c95b..8238e7c1 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -11,42 +11,36 @@ # at Lawrence Livermore National Laboratory (LLNL). # This entire pipeline is LLNL-specific # -# Important note: This file is a template provided by llnl/radiuss-shared-ci. -# Remains to set variable values, change the reference to the radiuss-shared-ci -# repo, opt-in and out optional features. The project can then extend it with -# additional stages. +# This file uses GitLab CI Components from radiuss-shared-ci (requires GitLab 17.0+). +# Components provide better versioning, type-safe inputs, and cleaner syntax. # -# In addition, each project should copy over and complete: -# - .gitlab/custom-jobs-and-variables.yml -# - .gitlab/subscribed-pipelines.yml -# -# The jobs should be specified in a file local to the project, -# - .gitlab/jobs/${CI_MACHINE}.yml -# or generated (see LLNL/Umpire for an example). +# In addition, each project should have: +# - .gitlab/custom-jobs.yml - Job templates for child pipelines +# - .gitlab/custom-variables.yml - Machine-specific variables +# - .gitlab/jobs/${CI_MACHINE}.yml - Machine-specific job definitions +############################################################################### + +############################################################################### +# VARIABLES ############################################################################### -# We define the following GitLab pipeline variables: variables: -##### LC GITLAB CONFIGURATION -# Use the umdev LLNL service user to run CI. This prevents from running -# pipelines as an actual user. + +# LC GITLAB CONFIGURATION +# Use an LLNL service user to run CI. This prevents from running pipelines as +# an actual user. LLNL_SERVICE_USER: "" # Use the service user workspace. Solves permission issues, stores everything # at the same location whoever triggers a pipeline. # CUSTOM_CI_BUILDS_DIR: "" -# Submodules: We don't need to fetch dependencies handled by Spack. +# Optimize submodules usage: only fetch the ones we need. +# Note: We don't need to fetch dependencies handled by Spack. GIT_SUBMODULE_STRATEGY: normal GIT_SUBMODULE_DEPTH: 1 GIT_SUBMODULE_UPDATE_FLAGS: --jobs 2 GIT_SUBMODULE_PATHS: scripts/radiuss-spack-configs scripts/uberenv -##### PROJECT VARIABLES -# We build the projects in the CI clone directory. -# Used in script/gitlab/build_and_test.sh script. -# TODO: add a clean-up mechanism. - BUILD_ROOT: ${CI_PROJECT_DIR} - -##### SHARED_CI CONFIGURATION +# SHARED_CI CONFIGURATION # Required information about GitHub repository GITHUB_PROJECT_NAME: "CARE" GITHUB_PROJECT_ORG: "LLNL" @@ -56,41 +50,164 @@ variables: JOB_CMD: value: "./scripts/gitlab/build_and_test.sh" expand: false -# Override the pattern describing branches that will skip the "draft PR filter -# test". Add protected branches here. See default value in -# preliminary-ignore-draft-pr.yml. -# ALWAYS_RUN_PATTERN: "" -# We organize the build-and-test stage with sub-pipelines. Each sub-pipeline -# corresponds to a test batch on a given machine. +############################################################################### +# MAIN PIPELINE STAGES +############################################################################### +# IMPORTANT: You must define stages yourself to allow customization. +# The following stages are REQUIRED by RADIUSS Shared CI components: -# High level stages stages: - - prerequisites - - build-and-test + - prerequisites # Required: machine availability checks + - build-and-test # Required: build and test jobs -# Template for jobs triggering a build-and-test sub-pipeline: -.build-and-test: - stage: build-and-test - trigger: - include: - - local: '.gitlab/custom-jobs-and-variables.yml' - - project: 'radiuss/radiuss-shared-ci' - ref: 'v2025.06.0' - file: 'pipelines/${CI_MACHINE}.yml' - - artifact: '${CI_MACHINE}-jobs.yml' - job: 'generate-job-lists' - strategy: depend - forward: - pipeline_variables: true +############################################################################### +# INCLUDES +############################################################################### include: # Sets ID tokens for every job using `default:` - project: 'lc-templates/id_tokens' file: 'id_tokens.yml' - # [Optional] checks preliminary to running the actual CI test - #- project: 'radiuss/radiuss-shared-ci' - # ref: 'v2025.06.0' - # file: 'utilities/preliminary-ignore-draft-pr.yml' - # pipelines subscribed by the project - - local: '.gitlab/subscribed-pipelines.yml' + + # Base pipeline templates and utilities + - component: $CI_SERVER_FQDN/radiuss/radiuss-shared-ci/base-pipeline@v2025.12.0 + inputs: + github_project_name: $GITHUB_PROJECT_NAME + github_project_org: $GITHUB_PROJECT_ORG + github_token: $GITHUB_TOKEN + + # Local custom variables (used for component inputs and forwarded to child pipelines) + - local: '.gitlab/custom-variables.yml' + +############################################################################### +# MACHINE PIPELINES +############################################################################### +# We organize the build-and-test stage with sub-pipelines. Each sub-pipeline +# corresponds to a test batch on a given machine. +# +# Note: .machine-check template is provided by the base-pipeline component +# and includes better error handling, validation, and GitHub status reporting. +# Trigger a build-and-test pipeline for each machine. +# Comment out the machine blocks you don't need. + +# One job to generate the job list for all the subpipelines +generate-job-lists: + stage: prerequisites + tags: [shell, oslic] + variables: + RADIUSS_JOBS_PATH: "scripts/radiuss-spack-configs/gitlab/radiuss-jobs" + LOCAL_JOBS_PATH: ".gitlab/jobs" + script: + - cat ${RADIUSS_JOBS_PATH}/dane.yml ${LOCAL_JOBS_PATH}/dane.yml > dane-jobs.yml + - cat ${RADIUSS_JOBS_PATH}/matrix.yml ${LOCAL_JOBS_PATH}/matrix.yml > matrix-jobs.yml + - cat ${RADIUSS_JOBS_PATH}/corona.yml ${LOCAL_JOBS_PATH}/corona.yml > corona-jobs.yml + - cat ${RADIUSS_JOBS_PATH}/tioga.yml ${LOCAL_JOBS_PATH}/tioga.yml > tioga-jobs.yml + - cat ${RADIUSS_JOBS_PATH}/tuolumne.yml ${LOCAL_JOBS_PATH}/tuolumne.yml > tuolumne-jobs.yml + artifacts: + paths: + - dane-jobs.yml + - matrix-jobs.yml + - corona-jobs.yml + - tioga-jobs.yml + - tuolumne-jobs.yml + +# DANE +dane-up-check: + extends: [.dane, .machine-check] + +dane-build-and-test: + extends: [.dane, .build-and-test] + needs: [dane-up-check, generate-job-lists] + trigger: + include: + - component: $CI_SERVER_FQDN/radiuss/radiuss-shared-ci/dane-pipeline@v2025.12.0 + inputs: + job_cmd: $JOB_CMD + shared_alloc: $DANE_SHARED_ALLOC + job_alloc: $DANE_JOB_ALLOC + github_project_name: $GITHUB_PROJECT_NAME + github_project_org: $GITHUB_PROJECT_ORG + - local: '.gitlab/custom-jobs.yml' + - artifact: 'dane-jobs.yml' + job: 'generate-job-lists' + +# MATRIX +matrix-up-check: + extends: [.matrix, .machine-check] + +matrix-build-and-test: + extends: [.matrix, .build-and-test] + needs: [matrix-up-check, generate-job-lists] + trigger: + include: + - component: $CI_SERVER_FQDN/radiuss/radiuss-shared-ci/matrix-pipeline@v2025.12.0 + inputs: + job_cmd: $JOB_CMD + shared_alloc: $MATRIX_SHARED_ALLOC + job_alloc: $MATRIX_JOB_ALLOC + github_project_name: $GITHUB_PROJECT_NAME + github_project_org: $GITHUB_PROJECT_ORG + - local: '.gitlab/custom-jobs.yml' + - artifact: 'matrix-jobs.yml' + job: 'generate-job-lists' + +# CORONA +corona-up-check: + extends: [.corona, .machine-check] + +corona-build-and-test: + extends: [.corona, .build-and-test] + needs: [corona-up-check, generate-job-lists] + trigger: + include: + - component: $CI_SERVER_FQDN/radiuss/radiuss-shared-ci/corona-pipeline@v2025.12.0 + inputs: + job_cmd: $JOB_CMD + shared_alloc: $CORONA_SHARED_ALLOC + job_alloc: $CORONA_JOB_ALLOC + github_project_name: $GITHUB_PROJECT_NAME + github_project_org: $GITHUB_PROJECT_ORG + - local: '.gitlab/custom-jobs.yml' + - artifact: 'corona-jobs.yml' + job: 'generate-job-lists' + +# TIOGA +tioga-up-check: + extends: [.tioga, .machine-check] + +tioga-build-and-test: + extends: [.tioga, .build-and-test] + needs: [tioga-up-check, generate-job-lists] + trigger: + include: + - component: $CI_SERVER_FQDN/radiuss/radiuss-shared-ci/tioga-pipeline@v2025.12.0 + inputs: + job_cmd: $JOB_CMD + shared_alloc: $TIOGA_SHARED_ALLOC + job_alloc: $TIOGA_JOB_ALLOC + github_project_name: $GITHUB_PROJECT_NAME + github_project_org: $GITHUB_PROJECT_ORG + - local: '.gitlab/custom-jobs.yml' + - artifact: 'tioga-jobs.yml' + job: 'generate-job-lists' + +# TUOLUMNE +tuolumne-up-check: + extends: [.tuolumne, .machine-check] + +tuolumne-build-and-test: + extends: [.tuolumne, .build-and-test] + needs: [tuolumne-up-check, generate-job-lists] + trigger: + include: + - component: $CI_SERVER_FQDN/radiuss/radiuss-shared-ci/tuolumne-pipeline@v2025.12.0 + inputs: + job_cmd: $JOB_CMD + shared_alloc: $TUOLUMNE_SHARED_ALLOC + job_alloc: $TUOLUMNE_JOB_ALLOC + github_project_name: $GITHUB_PROJECT_NAME + github_project_org: $GITHUB_PROJECT_ORG + - local: '.gitlab/custom-jobs.yml' + - artifact: 'tuolumne-jobs.yml' + job: 'generate-job-lists' diff --git a/.gitlab/custom-jobs-and-variables.yml b/.gitlab/custom-jobs-and-variables.yml deleted file mode 100644 index 82ddca24..00000000 --- a/.gitlab/custom-jobs-and-variables.yml +++ /dev/null @@ -1,86 +0,0 @@ -############################################################################### -# Copyright (c) 2020-25, Lawrence Livermore National Security, LLC and CARE -# project contributors. See the CARE LICENSE file for details. -# -# SPDX-License-Identifier: BSD-3-Clause -############################################################################### - -# We define the following GitLab pipeline variables: -variables: -# On LLNL's machines, this pipeline creates only one allocation shared among jobs -# in order to save time and resources. This allocation has to be uniquely named -# so that we are sure to retrieve it and avoid collisions. - ALLOC_NAME: ${CI_PROJECT_NAME}_ci_${CI_PIPELINE_ID} - -# Dane -# Arguments for top level allocation - DANE_SHARED_ALLOC: "--exclusive --reservation=ci --time=10 --nodes=1" -# Arguments for job level allocation - DANE_JOB_ALLOC: "--overlap --reservation=ci --nodes=1" -# Project specific variants for dane - PROJECT_DANE_VARIANTS: "" -# Project specific deps for dane - PROJECT_DANE_DEPS: "^umpire~c~shared ^raja~examples~exercises~tests ^chai~shared~examples " - -# Corona -# Arguments for top level allocation - CORONA_SHARED_ALLOC: "--exclusive --time-limit=20m --nodes=1" -# Arguments for job level allocation - CORONA_JOB_ALLOC: "--nodes=1 --begin-time=+5s" -# Project specific variants for corona - PROJECT_CORONA_VARIANTS: "" -# Project specific deps for corona - PROJECT_CORONA_DEPS: "^umpire~c~shared ^raja~examples~exercises~tests ^chai~shared~examples " - -# Tioga -# Arguments for top level allocation - TIOGA_SHARED_ALLOC: "--queue=pci --exclusive --time-limit=20m --nodes=1" -# Arguments for job level allocation - TIOGA_JOB_ALLOC: "--nodes=1 --begin-time=+5s" -# Project specific variants for tioga - PROJECT_TIOGA_VARIANTS: "" -# Project specific deps for tioga - PROJECT_TIOGA_DEPS: "^umpire~c~shared ^raja~examples~exercises~tests ^chai~shared~examples " - -# Tuolumne -# Arguments for top level allocation - TUOLUMNE_SHARED_ALLOC: "--queue=pci --exclusive --time-limit=20m --nodes=1" -# Arguments for job level allocation - TUOLUMNE_JOB_ALLOC: "--nodes=1 --begin-time=+5s" -# Project specific variants for tuolumne - PROJECT_TUOLUMNE_VARIANTS: "" -# Project specific deps for tuolumne - PROJECT_TUOLUMNE_DEPS: "^umpire~c~shared ^raja~examples~exercises~tests ^chai~shared~examples " - -# Lassen and Butte use a different job scheduler (spectrum lsf) that does not -# allow pre-allocation the same way slurm does. -# Arguments for job level allocation - LASSEN_JOB_ALLOC: "1 -W 11 -q pci" -# Project specific variants for lassen - PROJECT_LASSEN_VARIANTS: " cuda_arch=70" -# Project specific deps for lassen - PROJECT_LASSEN_DEPS: "^umpire~c~shared ^raja~examples~exercises~tests ^chai~shared~examples " - -# Configuration shared by build and test jobs specific to this project. -# Not all configuration can be shared. Here projects can fine tune the -# CI behavior. -# See Umpire for an example (export junit test reports). -.custom_job: - artifacts: - reports: - junit: junit.xml - name: "${CI_PROJECT_NAME}-${CI_MACHINE}-${CI_JOB_NAME}-${CI_PIPELINE_ID}" - paths: - - ./*.cmake - -.reproducer_vars: - script: - - | - echo -e " - # Required variables \n - export MODULE_LIST=\"${MODULE_LIST}\" \n - export SPEC=\"${SPEC//\"/\\\"}\" \n - # Allow to set job script for debugging (only this differs from CI) \n - export DEBUG_MODE=true \n - # Using the CI build cache is optional and requires a token. Set it like so: \n - # export REGISTRY_TOKEN=\"\" \n" diff --git a/.gitlab/custom-jobs.yml b/.gitlab/custom-jobs.yml new file mode 100644 index 00000000..a2c04252 --- /dev/null +++ b/.gitlab/custom-jobs.yml @@ -0,0 +1,37 @@ +############################################################################### +# Copyright (c) 2020-25, Lawrence Livermore National Security, LLC and CARE +# project contributors. See the CARE LICENSE file for details. +# +# SPDX-License-Identifier: BSD-3-Clause +############################################################################### + +# This file defines JOB TEMPLATES ONLY. +# It is included in CHILD pipelines (via trigger: include:) to provide +# templates that your jobs can extend. + +############################################################################### +# JOB CUSTOMIZATION TEMPLATES +############################################################################### + +# Configuration shared by build and test jobs specific to this project. +# Not all configuration can be shared. Here projects can fine tune the +# CI behavior. +.custom_job: + artifacts: + reports: + junit: junit.xml + name: "${CI_PROJECT_NAME}-${CI_MACHINE}-${CI_JOB_NAME}-${CI_PIPELINE_ID}" + paths: + - ./*.cmake + +.reproducer_vars: + script: + - | + echo -e " + # Required variables \n + export MODULE_LIST=\"${MODULE_LIST}\" \n + export SPEC=\"${SPEC//\"/\\\"}\" \n + # Allow to set job script for debugging (only this differs from CI) \n + export DEBUG_MODE=true \n + # Using the CI build cache is optional and requires a token. Set it like so: \n + # export REGISTRY_TOKEN=\"\" \n" diff --git a/.gitlab/custom-variables.yml b/.gitlab/custom-variables.yml new file mode 100644 index 00000000..903a1520 --- /dev/null +++ b/.gitlab/custom-variables.yml @@ -0,0 +1,60 @@ +############################################################################### +# Copyright (c) 2020-25, Lawrence Livermore National Security, LLC and CARE +# project contributors. See the CARE LICENSE file for details. +# +# SPDX-License-Identifier: BSD-3-Clause +############################################################################### + +# This file defines project-specific VARIABLES ONLY. +# It is included in the PARENT pipeline (.gitlab-ci.yml) to provide +# variables that are passed as component inputs and forwarded to child pipelines. + +############################################################################### +# MACHINE-SPECIFIC ALLOCATION VARIABLES +############################################################################### + +variables: + # Allocation name (used for shared allocations) + ALLOC_NAME: ${CI_PROJECT_NAME}_ci_${CI_PIPELINE_ID} + + # Dane (SLURM) allocation settings + DANE_SHARED_ALLOC: "--exclusive --reservation=ci --time=10 --nodes=1" + # Note: we repeat the reservation, helpful when jobs are manually re-triggered. + DANE_JOB_ALLOC: "--overlap --reservation=ci --nodes=1" + # Project specific variants for dane + PROJECT_DANE_VARIANTS: "" + # Project specific deps for dane + PROJECT_DANE_DEPS: "^umpire~c~shared ^raja~examples~exercises~tests ^chai~shared~examples " + + # Matrix (SLURM) allocation settings + MATRIX_SHARED_ALLOC: "--exclusive --partition=pdebug --time=10 --nodes=1" + # Note: we repeat the reservation, helpful when jobs are manually re-triggered. + MATRIX_JOB_ALLOC: "--partition=pdebug --overlap --nodes=1" + # Project specific variants for matrix + PROJECT_MATRIX_VARIANTS: "+cuda cuda_arch=75" + # Project specific deps for matrix + PROJECT_MATRIX_DEPS: "^umpire~c~shared ^raja~examples~exercises~tests ^chai~shared~examples " + + # Corona (flux) allocation settings + CORONA_SHARED_ALLOC: "--exclusive --time-limit=20m --nodes=1" + CORONA_JOB_ALLOC: "--nodes=1 --begin-time=+5s" + # Project specific variants for corona + PROJECT_CORONA_VARIANTS: "" + # Project specific deps for corona + PROJECT_CORONA_DEPS: "^umpire~c~shared ^raja~examples~exercises~tests ^chai~shared~examples " + + # Tioga (flux) allocation settings + TIOGA_SHARED_ALLOC: "--queue=pci --exclusive --time-limit=20m --nodes=1" + TIOGA_JOB_ALLOC: "--nodes=1 --begin-time=+5s" + # Project specific variants for tioga + PROJECT_TIOGA_VARIANTS: "" + # Project specific deps for tioga + PROJECT_TIOGA_DEPS: "^umpire~c~shared ^raja~examples~exercises~tests ^chai~shared~examples " + + # Tuolumne (flux) allocation settings + TUOLUMNE_SHARED_ALLOC: "--queue=pci --exclusive --time-limit=20m --nodes=1" + TUOLUMNE_JOB_ALLOC: "--nodes=1 --begin-time=+5s" + # Project specific variants for tuolumne + PROJECT_TUOLUMNE_VARIANTS: "" + # Project specific deps for tuolumne + PROJECT_TUOLUMNE_DEPS: "^umpire~c~shared ^raja~examples~exercises~tests ^chai~shared~examples " diff --git a/.gitlab/jobs/lassen.yml b/.gitlab/jobs/matrix.yml similarity index 94% rename from .gitlab/jobs/lassen.yml rename to .gitlab/jobs/matrix.yml index 867f0e3d..480d97c6 100644 --- a/.gitlab/jobs/lassen.yml +++ b/.gitlab/jobs/matrix.yml @@ -1,12 +1,12 @@ -############################################################################## +############################################################################### # Copyright (c) 2020-25, Lawrence Livermore National Security, LLC and CARE # project contributors. See the CARE LICENSE file for details. # # SPDX-License-Identifier: BSD-3-Clause -############################################################################## +############################################################################### # Override reproducer section to define project specific variables. -.lassen_reproducer_vars: +.matrix_reproducer_vars: script: - !reference [.reproducer_vars, script] diff --git a/.gitlab/subscribed-pipelines.yml b/.gitlab/subscribed-pipelines.yml deleted file mode 100644 index e8ab3ae4..00000000 --- a/.gitlab/subscribed-pipelines.yml +++ /dev/null @@ -1,113 +0,0 @@ -############################################################################### -# Copyright (c) 2020-25, Lawrence Livermore National Security, LLC and CARE -# project contributors. See the CARE LICENSE file for details. -# -# SPDX-License-Identifier: BSD-3-Clause -############################################################################### - -# The template job to test whether a machine is up. -# Expects CI_MACHINE defined to machine name. -.machine-check: - stage: prerequisites - tags: [shell, oslic] - variables: - GIT_STRATEGY: none - script: - - | - if [[ $(jq '.[env.CI_MACHINE].total_nodes_up' /usr/global/tools/lorenz/data/loginnodeStatus) == 0 ]] - then - echo -e "\e[31mNo node available on ${CI_MACHINE}\e[0m" - curl --url "https://api.github.com/repos/${GITHUB_PROJECT_ORG}/${GITHUB_PROJECT_NAME}/statuses/${CI_COMMIT_SHA}" \ - --header 'Content-Type: application/json' \ - --header "authorization: Bearer ${GITHUB_TOKEN}" \ - --data "{ \"state\": \"failure\", \"target_url\": \"${CI_PIPELINE_URL}\", \"description\": \"GitLab ${CI_MACHINE} down\", \"context\": \"ci/gitlab/${CI_MACHINE}\" }" - exit 1 - fi - -### -# Trigger a build-and-test pipeline for a machine. -# Comment the jobs for machines you don’t need. -### - -# One job to generate the job list for all the subpipelines -generate-job-lists: - stage: prerequisites - tags: [shell, oslic] - variables: - RADIUSS_JOBS_PATH: "scripts/radiuss-spack-configs/gitlab/radiuss-jobs" - LOCAL_JOBS_PATH: ".gitlab/jobs" - script: - - cat ${RADIUSS_JOBS_PATH}/dane.yml ${LOCAL_JOBS_PATH}/dane.yml > dane-jobs.yml - - cat ${RADIUSS_JOBS_PATH}/lassen.yml ${LOCAL_JOBS_PATH}/lassen.yml > lassen-jobs.yml - - cat ${RADIUSS_JOBS_PATH}/corona.yml ${LOCAL_JOBS_PATH}/corona.yml > corona-jobs.yml - - cat ${RADIUSS_JOBS_PATH}/tioga.yml ${LOCAL_JOBS_PATH}/tioga.yml > tioga-jobs.yml - - cat ${RADIUSS_JOBS_PATH}/tuolumne.yml ${LOCAL_JOBS_PATH}/tuolumne.yml > tuolumne-jobs.yml - artifacts: - paths: - - dane-jobs.yml - - lassen-jobs.yml - - corona-jobs.yml - - tioga-jobs.yml - - tuolumne-jobs.yml - -# DANE -dane-up-check: - variables: - CI_MACHINE: "dane" - extends: [.machine-check] - -dane-build-and-test: - variables: - CI_MACHINE: "dane" - needs: [dane-up-check, generate-job-lists] - extends: [.build-and-test] - -# CORONA -corona-up-check: - variables: - CI_MACHINE: "corona" - extends: [.machine-check] - -corona-build-and-test: - variables: - CI_MACHINE: "corona" - needs: [corona-up-check, generate-job-lists] - extends: [.build-and-test] - -# TIOGA -tioga-up-check: - variables: - CI_MACHINE: "tioga" - extends: [.machine-check] - -tioga-build-and-test: - variables: - CI_MACHINE: "tioga" - needs: [tioga-up-check, generate-job-lists] - extends: [.build-and-test] - -# TUOLUMNE -tuolumne-up-check: - variables: - CI_MACHINE: "tuolumne" - extends: [.machine-check] - -tuolumne-build-and-test: - variables: - CI_MACHINE: "tuolumne" - needs: [tuolumne-up-check, generate-job-lists] - extends: [.build-and-test] - -# LASSEN -lassen-up-check: - variables: - CI_MACHINE: "lassen" - extends: [.machine-check] - -lassen-build-and-test: - variables: - CI_MACHINE: "lassen" - needs: [lassen-up-check, generate-job-lists] - extends: [.build-and-test] - - diff --git a/.uberenv_config.json b/.uberenv_config.json index 4e214b0d..ef48d326 100644 --- a/.uberenv_config.json +++ b/.uberenv_config.json @@ -4,8 +4,8 @@ "package_final_phase": "initconfig", "package_source_dir": "../..", "spack_url": "https://github.com/spack/spack.git", -"spack_commit": "280017a9ba3f6a969743deca0eebc96e7c0e5cfd", +"spack_branch": "v1.0.2", "spack_configs_path": "scripts/radiuss-spack-configs", -"spack_packages_path": ["scripts/radiuss-spack-configs/packages", "scripts/spack_packages/packages"], +"spack_packages_path": ["scripts/radiuss-spack-configs/spack_repo/llnl_radiuss/packages", "scripts/spack_packages/spack_repo/llnl_care/packages"], "spack_setup_clingo": false } diff --git a/CMakeLists.txt b/CMakeLists.txt index 60391eaa..940f8eaa 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -18,7 +18,7 @@ cmake_policy(SET CMP0096 OLD) # Required when building Umpire submodule project(CARE LANGUAGES C CXX - VERSION 0.15.2) + VERSION 0.15.3) include(${PROJECT_SOURCE_DIR}/cmake/Setup.cmake) diff --git a/RELEASE_NOTES.md b/RELEASE_NOTES.md index e8428095..eab0b441 100644 --- a/RELEASE_NOTES.md +++ b/RELEASE_NOTES.md @@ -12,6 +12,13 @@ in this file. The format of this file is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). +## [Version 0.15.3] - Release date 2025-12-22 + +### Fixed +- Removed template arguments when declaring special member functions of a template class. They are unnecessary and even disallowed by some compilers. +- Fixed several chunked loop macros. +- Fixed comparison function passed to cub/hipcub. + ## [Version 0.15.2] - Release date 2025-09-15 ### Added diff --git a/docs/sphinx/conf.py b/docs/sphinx/conf.py index b8adf7d8..af9955d8 100644 --- a/docs/sphinx/conf.py +++ b/docs/sphinx/conf.py @@ -57,7 +57,7 @@ # The short X.Y version. version = '0.15' # The full version, including alpha/beta/rc tags. -release = '0.15.2' +release = '0.15.3' # The language for content autogenerated by Sphinx. Refer to documentation # for a list of supported languages. diff --git a/host-configs/lc/blueos_3_ppc64le_ib_p9/nvcc_clang.cmake b/host-configs/lc/blueos_3_ppc64le_ib_p9/nvcc_clang.cmake deleted file mode 100644 index 7c6bf76f..00000000 --- a/host-configs/lc/blueos_3_ppc64le_ib_p9/nvcc_clang.cmake +++ /dev/null @@ -1,23 +0,0 @@ -############################################################################## -# Copyright (c) 2020-25, Lawrence Livermore National Security, LLC and CARE -# project contributors. See the CARE LICENSE file for details. -# -# SPDX-License-Identifier: BSD-3-Clause -############################################################################## - -set(COMPILER_BASE "/usr/tce/packages/clang/clang-ibm-14.0.5" CACHE PATH "") -set(CMAKE_C_COMPILER "${COMPILER_BASE}/bin/clang" CACHE PATH "") -set(CMAKE_CXX_COMPILER "${COMPILER_BASE}/bin/clang++" CACHE PATH "") - -set(GCC_HOME "/usr/tce/packages/gcc/gcc-8.3.1" CACHE PATH "") -set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} --gcc-toolchain=${GCC_HOME}" CACHE STRING "") -set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} --gcc-toolchain=${GCC_HOME}" CACHE STRING "") - -set(BLT_CMAKE_IMPLICIT_LINK_DIRECTORIES_EXCLUDE "/usr/tce/packages/gcc/gcc-4.9.3/lib64/gcc/powerpc64le-unknown-linux-gnu/4.9.3;/usr/tce/packages/gcc/gcc-4.9.3/lib64" CACHE STRING "") - -set(ENABLE_CUDA ON CACHE BOOL "Enable CUDA") -set(CUDA_TOOLKIT_ROOT_DIR "/usr/tce/packages/cuda/cuda-11.8.0" CACHE PATH "Path to CUDA") -set(CMAKE_CUDA_COMPILER "${CUDA_TOOLKIT_ROOT_DIR}/bin/nvcc" CACHE PATH "") -set(CMAKE_CUDA_HOST_COMPILER "${CMAKE_CXX_COMPILER}" CACHE PATH "") -set(CMAKE_CUDA_ARCHITECTURES "70" CACHE STRING "") -set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Xcompiler=--gcc-toolchain=${GCC_HOME}" CACHE STRING "") diff --git a/host-configs/lc/toss_4_x86_64_ib/nvcc_clang.cmake b/host-configs/lc/toss_4_x86_64_ib/nvcc_clang.cmake index 4a6bdbc5..6c018c2b 100644 --- a/host-configs/lc/toss_4_x86_64_ib/nvcc_clang.cmake +++ b/host-configs/lc/toss_4_x86_64_ib/nvcc_clang.cmake @@ -26,7 +26,7 @@ set(CUDA_TOOLKIT_ROOT_DIR "/usr/tce/packages/cuda/cuda-${CUDA_VER}" CACHE PATH " set(CMAKE_CUDA_COMPILER "${CUDA_TOOLKIT_ROOT_DIR}/bin/nvcc" CACHE PATH "") set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Xcompiler=--gcc-toolchain=${GCC_DIR} -Wno-deprecated-gpu-targets -Wno-unused-command-line-argument" CACHE STRING "") set(CMAKE_CUDA_HOST_COMPILER "${CMAKE_CXX_COMPILER}" CACHE PATH "") -set(CMAKE_CUDA_ARCHITECTURES "70" CACHE STRING "") +set(CMAKE_CUDA_ARCHITECTURES "90" CACHE STRING "") # Prevent incorrect implicit libraries from being linked in set(BLT_CMAKE_IMPLICIT_LINK_DIRECTORIES_EXCLUDE "/usr/tce/packages/gcc/gcc-10.3.1/lib/gcc/x86_64-redhat-linux/10;/usr/tce/packages/gcc/gcc-10.3.1/lib64;/lib64;/usr/lib64;/lib;/usr/lib" CACHE STRING "") diff --git a/scripts/gitlab/build_and_test.sh b/scripts/gitlab/build_and_test.sh index e3242208..16de843b 100755 --- a/scripts/gitlab/build_and_test.sh +++ b/scripts/gitlab/build_and_test.sh @@ -218,7 +218,7 @@ then timed_message "Cleaning working directory" # Map CPU core allocations - declare -A core_counts=(["lassen"]=40 ["poodle"]=28 ["dane"]=28 ["corona"]=32 ["rzansel"]=48 ["tioga"]=32 ["tuolumne"]=48) + declare -A core_counts=(["lassen"]=40 ["poodle"]=28 ["dane"]=28 ["corona"]=32 ["rzansel"]=48 ["tioga"]=32 ["tuolumne"]=48 ["matrix"]=48) # If building, then delete everything first # NOTE: 'cmake --build . -j core_counts' attempts to reduce individual build resources. diff --git a/scripts/make_release_tarball.sh b/scripts/make_release_tarball.sh index 0f38733c..e54720e7 100755 --- a/scripts/make_release_tarball.sh +++ b/scripts/make_release_tarball.sh @@ -7,8 +7,8 @@ # SPDX-License-Identifier: BSD-3-Clause ############################################################################## -TAR_CMD=gtar -VERSION=0.15.2 +TAR_CMD=`which tar` +VERSION=`git describe --tags` git archive --prefix=care-${VERSION}/ -o care-${VERSION}.tar HEAD 2> /dev/null diff --git a/scripts/radiuss-spack-configs b/scripts/radiuss-spack-configs index dcf80758..fddc4f16 160000 --- a/scripts/radiuss-spack-configs +++ b/scripts/radiuss-spack-configs @@ -1 +1 @@ -Subproject commit dcf807584584cb1ca4dca1d5cbf8b320bf73567e +Subproject commit fddc4f16ee987abc9c1c61879eaf8a2d6a8253d9 diff --git a/scripts/spack_packages/repo.yaml b/scripts/spack_packages/repo.yaml deleted file mode 100644 index b97ec70d..00000000 --- a/scripts/spack_packages/repo.yaml +++ /dev/null @@ -1,2 +0,0 @@ -repo: - namespace: 'llnl.care' diff --git a/scripts/spack_packages/packages/README.md b/scripts/spack_packages/spack_repo/llnl_care/packages/README.md similarity index 100% rename from scripts/spack_packages/packages/README.md rename to scripts/spack_packages/spack_repo/llnl_care/packages/README.md diff --git a/scripts/spack_packages/spack_repo/llnl_care/repo.yaml b/scripts/spack_packages/spack_repo/llnl_care/repo.yaml new file mode 100644 index 00000000..b6d0227e --- /dev/null +++ b/scripts/spack_packages/spack_repo/llnl_care/repo.yaml @@ -0,0 +1,3 @@ +repo: + namespace: 'llnl_care' + api: v2.0 diff --git a/scripts/uberenv b/scripts/uberenv index 6ba67dcb..bec05e20 160000 --- a/scripts/uberenv +++ b/scripts/uberenv @@ -1 +1 @@ -Subproject commit 6ba67dcbd7ccbb9c03920b89de19ac959e2c3bdd +Subproject commit bec05e20bf2a1634d97ead358a9072c36f1fdcac diff --git a/src/care/CMakeLists.txt b/src/care/CMakeLists.txt index e41d91e5..59a8de1d 100644 --- a/src/care/CMakeLists.txt +++ b/src/care/CMakeLists.txt @@ -24,6 +24,7 @@ set(care_headers ArrayView.h atomic.h care_inst.h + compress_algorithm_impl.h CHAICallback.h CHAIDataGetter.h GPUWatchpoint.h diff --git a/src/care/DefaultMacros.h b/src/care/DefaultMacros.h index 361f948b..af00f33b 100644 --- a/src/care/DefaultMacros.h +++ b/src/care/DefaultMacros.h @@ -101,15 +101,15 @@ OMP_FOR_BEGIN for (auto INDEX = _care_openmp_for_loop_begin_ndx; INDEX < _care_o //////////////////////////////////////////////////////////////////////////////// #define CARE_CHECKED_CHUNKED_OPENMP_FOR_LOOP_START(INDEX, START_INDEX, END_INDEX, CHUNK_SIZE, CHECK) {\ CARE_NEST_BEGIN(CHECK) \ - auto const _care_openmp_for_loop_end_ndx = END_INDEX; \ - decltype(_care_openmp_for_loop_end_ndx) _care_openmp_for_loop_ndx = START_INDEX; \ + auto _care_openmp_for_loop_end_ndx = END_INDEX; \ + decltype(_care_openmp_for_loop_end_ndx) _care_openmp_for_loop_current_ndx = START_INDEX; \ decltype(_care_openmp_for_loop_end_ndx) _care_open_chunked_for_loop_chunk_size = CHUNK_SIZE > 0 ? CHUNK_SIZE : END_INDEX - START_INDEX ; \ - while (_care_openmp_for_loop_begin_ndx < _care_openmp_for_loop_end_ndx) { \ - decltype(_care_openmp_for_loop_end_ndx) _care_openmp_for_loop_chunk_begin_ndx = _care_openmp_for_loop_ndx ; \ - decltype(_care_openmp_for_loop_end_ndx) _care_openmp_for_loop_chunk_end_ndx = (_care_openmp_for_loop_ndx + _care_open_chunked_for_loop_chunk_size) ? _care_openmp_for_loop_ndx + _care_open_chunked_for_loop_chunk_size : _care_openmp_for_loop_end_ndx ; \ + while (_care_openmp_for_loop_current_ndx < _care_openmp_for_loop_end_ndx) { \ + decltype(_care_openmp_for_loop_end_ndx) _care_openmp_for_loop_chunk_begin_ndx = _care_openmp_for_loop_current_ndx ; \ + decltype(_care_openmp_for_loop_end_ndx) _care_openmp_for_loop_chunk_end_ndx = (_care_openmp_for_loop_current_ndx + _care_open_chunked_for_loop_chunk_size < _care_openmp_for_loop_end_ndx) ? _care_openmp_for_loop_current_ndx + _care_open_chunked_for_loop_chunk_size : _care_openmp_for_loop_end_ndx ; \ OMP_FOR_BEGIN for (auto INDEX = _care_openmp_for_loop_chunk_begin_ndx; INDEX < _care_openmp_for_loop_chunk_end_ndx; ++INDEX) {\ -#define CARE_CHECKED_CHUNKED_OPENMP_FOR_LOOP_END(CHECK) } OMP_FOR_END } CARE_NEST_END(CHECK) } +#define CARE_CHECKED_CHUNKED_OPENMP_FOR_LOOP_END(CHECK) } OMP_FOR_END _care_openmp_for_loop_current_ndx = _care_openmp_for_loop_chunk_end_ndx; } CARE_NEST_END(CHECK) } @@ -417,7 +417,7 @@ OMP_FOR_BEGIN for (auto INDEX = _care_openmp_for_loop_chunk_begin_ndx; INDEX < _ /// @arg[in] CHECK The variable to check that the start and end macros match /// //////////////////////////////////////////////////////////////////////////////// -#define CARE_CHECKED_CHUNKED_MANAGED_PTR_LOOP_START(INDEX, START_INDEX, END_INDEX, CHECK) CARE_CHECKED_CHUNKED_OPENMP_FOR_LOOP_START(INDEX, START_INDEX, END_INDEX, CHUNK_SIZE, CHECK) +#define CARE_CHECKED_CHUNKED_MANAGED_PTR_LOOP_START(INDEX, START_INDEX, END_INDEX, CHUNK_SIZE, CHECK) CARE_CHECKED_CHUNKED_OPENMP_FOR_LOOP_START(INDEX, START_INDEX, END_INDEX, CHUNK_SIZE, CHECK) #define CARE_CHECKED_CHUNKED_MANAGED_PTR_LOOP_END(CHECK) CARE_CHECKED_CHUNKED_OPENMP_FOR_LOOP_END(CHECK) diff --git a/src/care/KeyValueSorter_decl.h b/src/care/KeyValueSorter_decl.h index b4e1bf9c..9595ced8 100644 --- a/src/care/KeyValueSorter_decl.h +++ b/src/care/KeyValueSorter_decl.h @@ -14,6 +14,10 @@ #include "care/algorithm_decl.h" #include "care/CHAIDataGetter.h" +#include "care/scan.h" + +#include // For std::move + namespace care { /////////////////////////////////////////////////////////////////////////// @@ -183,6 +187,22 @@ class CARE_DLL_API KeyValueSorter { { setKeyValueArraysFromManagedArray(m_keys, m_values, len, arr); } + /////////////////////////////////////////////////////////////////////////// + /// @author Peter Robinson + /// @brief Constructor + /// Takes ownership of the provided keys and values arrays + /// @param[in] len - The number of elements in the arrays + /// @param[in] keys - The keys array to take ownership of + /// @param[in] values - The values array to take ownership of + /// @return a KeyValueSorter instance + /////////////////////////////////////////////////////////////////////////// + KeyValueSorter(const size_t len, host_device_ptr && keys, host_device_ptr && values) + : m_len(len) + , m_ownsPointers(true) + , m_keys(std::move(keys)) + , m_values(std::move(values)) + { + } /////////////////////////////////////////////////////////////////////////// /// @author Alan Dayton @@ -421,6 +441,77 @@ class CARE_DLL_API KeyValueSorter { sortKeyValueArrays(m_keys, m_values, 0, m_len, true); } + /////////////////////////////////////////////////////////////////////////// + /// @author Peter Robinson + /// @brief Sorts "len" elements starting at "start" by key, then by value + /// @param[in] start - The index to start at + /// @param[in] len - The number of elements to sort + /// @return void + /// TODO: add bounds checking + /////////////////////////////////////////////////////////////////////////// + void sortByKeyThenValue(const size_t start, const size_t len) { + if (len <= 1) return; + + // First sort by key + sortKeyValueArrays(m_keys, m_values, start, len, false); + + // Phase 1: Identify ranges of identical keys + host_device_ptr rangeStarts(len+1); + host_device_ptr rangeEnds(len+1); + + int count = 0; + + auto keys = m_keys; + + // Use SCAN_LOOP to identify where ranges start + SCAN_LOOP(i, start, start+len-1, idx, count, + (i == start) || (keys[i] != keys[i-1])) { + rangeStarts[idx] = i; + } SCAN_LOOP_END(len, idx, count) + + // Set the last range end + rangeStarts.set(count , start+len); + + auto values = m_values; + + // Phase 2: Sort each range by value using insertion sort in parallel + CARE_STREAM_LOOP(i, 0, count) { + int rangeStart = rangeStarts[i]; + int rangeEnd = rangeStarts[i+1]; + int rangeLen = rangeEnd - rangeStart; + + // Only sort if range has more than one element + if (rangeLen > 1) { + // remmber that keys are identical over this range, so no need to modify m_keys + InsertionSort(values.slice(rangeStart), rangeEnd-rangeStart); + } + } CARE_STREAM_LOOP_END + + // Free temporary arrays + rangeStarts.free(); + rangeEnds.free(); + } + + + /////////////////////////////////////////////////////////////////////////// + /// @author Peter Robinson + /// @brief Sorts the first "len" elements by key, then by value + /// @param[in] len - The number of elements to sort + /// @return void + /////////////////////////////////////////////////////////////////////////// + void sortByKeyThenValue(const size_t len) { + sortByKeyThenValue(0, len); + } + + /////////////////////////////////////////////////////////////////////////// + /// @author Peter Robinson + /// @brief Sorts all the elements by key, then by value + /// @return void + /////////////////////////////////////////////////////////////////////////// + void sortByKeyThenValue() { + sortByKeyThenValue(0, m_len); + } + /////////////////////////////////////////////////////////////////////////// /// @author Alan Dayton /// @brief Does a stable sort on "len" elements starting at "start" by value @@ -489,11 +580,79 @@ class CARE_DLL_API KeyValueSorter { sortByKey(); } } + + /////////////////////////////////////////////////////////////////////////// + /// @author Peter Robinson + /// @brief Eliminates duplicate key-value pairs + /// First does a sort by key and then by value, which groups identical pairs. + /// Then duplicates are removed. + /// @return void + /////////////////////////////////////////////////////////////////////////// + void eliminateDuplicatePairs() { + if (m_len > 1) { + // First sort by key and then by value to group identical pairs + sortByKeyThenValue(); + + // Allocate storage for tracking unique elements + host_device_ptr isUnique(m_len+1); + + // Mark unique elements (first element is always unique) + auto len = m_len; + auto keys = m_keys; + auto values = m_values; + CARE_STREAM_LOOP(i, 0, len+1) { + if (i == 0) { + isUnique[i] = 1; + } + else if (i == len) { + isUnique[i] = 0; + } + else { + // Element is unique if it differs from the previous element + // in either key or value + isUnique[i] = (keys[i] != keys[i-1] || + values[i] != values[i-1]) ? 1 : 0; + } + } CARE_STREAM_LOOP_END + + // Use exclusive scan to compute output positions + host_device_ptr positions(m_len+1); + care::exclusive_scan(RAJADeviceExec{}, isUnique, positions, m_len, 0, false); + + // Get the total number of unique elements + int newSize = positions.pick(m_len); + + // Allocate new arrays for the unique elements + host_device_ptr newKeys(newSize, "newKeys"); + host_device_ptr newValues(newSize, "newValues"); + + // Copy unique elements to their new positions + CARE_STREAM_LOOP(i, 0, m_len) { + if (isUnique[i]) { + int pos = positions[i]; + newKeys[pos] = keys[i]; + newValues[pos] = values[i]; + } + } CARE_STREAM_LOOP_END + + // Free temporary arrays + isUnique.free(); + positions.free(); + + // Free the original key value pairs + free(); + + // Set to new key value pairs + m_keys = newKeys; + m_values = newValues; + m_len = newSize; + } + } /////////////////////////////////////////////////////////////////////////// /// @author Benjamin Liu /// @brief no-op - /// GPU version does not require separate allocation for kesy array. + /// GPU version does not require separate allocation for keys array. /// @return void /////////////////////////////////////////////////////////////////////////// void initializeKeys() const { @@ -604,6 +763,22 @@ inline bool cmpKeys(KeyValueType const & left, KeyValueType const & right) return left.key < right.key; } +/////////////////////////////////////////////////////////////////////////// +/// @author Peter Robinson +/// @brief Less than comparison operator for keys, then values +/// Used as a comparator in the STL +/// @param left - left _kv to compare +/// @param right - right _kv to compare +/// @return true if left's key is less than right's key, or if keys are equal +/// and left's value is less than right's value +/////////////////////////////////////////////////////////////////////////// +template +inline bool cmpKeysThenValues(KeyValueType const & left, KeyValueType const & right) +{ + return (left.key < right.key) || + ((left.key == right.key) && (left.value < right.value)); +} + /////////////////////////////////////////////////////////////////////////// /// @author Benjamin Liu after Alan Dayton /// @brief Initializes keys and values by copying elements from the array @@ -743,6 +918,33 @@ class CARE_DLL_API KeyValueSorter { setKeyValueArraysFromManagedArray(m_keyValues, len, arr); } + /////////////////////////////////////////////////////////////////////////// + /// @author Peter Robinson + /// @brief Constructor + /// Takes ownership of the provided keys and values arrays + /// @param[in] len - The number of elements in the arrays + /// @param[in] keys - The keys array to take ownership of + /// @param[in] values - The values array to take ownership of + /// @return a KeyValueSorter instance + /////////////////////////////////////////////////////////////////////////// + KeyValueSorter(const size_t len, host_device_ptr && keys, host_device_ptr && values) + : m_len(len) + , m_ownsPointers(true) + , m_keys(std::move(keys)) + , m_values(std::move(values)) + , m_keyValues(len, "m_keyValues") + { + auto mkeyValues = m_keyValues; + auto mkeys = m_keys; + auto mvalues = m_values; + + // Initialize m_keyValues from the provided keys and values + CARE_SEQUENTIAL_LOOP(i, 0, m_len) { + mkeyValues[i].key = mkeys[i]; + mkeyValues[i].value = mvalues[i]; + } CARE_SEQUENTIAL_LOOP_END + } + /////////////////////////////////////////////////////////////////////////// /// @author Alan Dayton /// @brief (Shallow) Copy constructor @@ -1013,6 +1215,48 @@ class CARE_DLL_API KeyValueSorter { void sortByKey() const { sortByKey(m_len); } + + /////////////////////////////////////////////////////////////////////////// + /// @author Peter Robinson + /// @brief Sorts "len" elements starting at "start" by key, then by value + /// @param[in] start - The index to start at + /// @param[in] len - The number of elements to sort + /// @return void + /// TODO: add bounds checking + /////////////////////////////////////////////////////////////////////////// + void sortByKeyThenValue(const size_t start, const size_t len) const { + CHAIDataGetter<_kv, RAJA::seq_exec> getter {}; + _kv * rawData = getter.getRawArrayData(m_keyValues) + start; + std::stable_sort(rawData, rawData + len, cmpKeysThenValues<_kv>); + + // Free stale arrays + if (m_keys) { + m_keys.free(); + } + + if (m_values) { + m_values.free(); + } + } + + /////////////////////////////////////////////////////////////////////////// + /// @author Peter Robinson + /// @brief Sorts the first "len" elements by key, then by value + /// @param[in] len - The number of elements to sort + /// @return void + /////////////////////////////////////////////////////////////////////////// + void sortByKeyThenValue(const size_t len) const { + sortByKeyThenValue(0, len); + } + + /////////////////////////////////////////////////////////////////////////// + /// @author Peter Robinson + /// @brief Sorts all the elements by key, then by value + /// @return void + /////////////////////////////////////////////////////////////////////////// + void sortByKeyThenValue() const { + sortByKeyThenValue(m_len); + } /////////////////////////////////////////////////////////////////////////// /// @author Alan Dayton @@ -1068,6 +1312,53 @@ class CARE_DLL_API KeyValueSorter { } } } + + /////////////////////////////////////////////////////////////////////////// + /// @author Peter Robinson + /// @brief Eliminates duplicate key-value pairs + /// First does a sort by key and then by value, which groups identical pairs. + /// Then duplicates are removed. + /// @return void + /////////////////////////////////////////////////////////////////////////// + void eliminateDuplicatePairs() { + if (m_len > 1) { + // First sort by key and then by value to group identical pairs + sortByKeyThenValue(); + // Create a new array to hold the unique pairs + host_device_ptr<_kv> uniquePairs(m_len, "uniquePairs"); + // Copy the first element + uniquePairs.set(0, m_keyValues.pick(0)); + // Copy only non-duplicate elements + size_t newSize = 1; + auto keyValues = m_keyValues; + CARE_SEQUENTIAL_REF_LOOP(i, 1, m_len, newSize) { + if (keyValues[i].key != keyValues[i-1].key || + keyValues[i].value != keyValues[i-1].value) { + uniquePairs[newSize] = keyValues[i]; + ++newSize; + } + } CARE_SEQUENTIAL_REF_LOOP_END + + // Free the original key value pairs + m_keyValues.free(); + + // Set to new key value pairs + m_keyValues = uniquePairs; + m_len = newSize; + + // Reallocate to the correct size + m_keyValues.realloc(newSize); + + // Free stale arrays + if (m_keys) { + m_keys.free(); + } + + if (m_values) { + m_values.free(); + } + } + } /////////////////////////////////////////////////////////////////////////// /// @author Alan Dayton @@ -1213,4 +1504,3 @@ void IntersectKeyValueSorters(RAJA::seq_exec exec, } // namespace care #endif // !defined(_CARE_KEY_VALUE_SORTER_DECL_H_) - diff --git a/src/care/KeyValueSorter_impl.h b/src/care/KeyValueSorter_impl.h index 97a63354..bd2b1a43 100644 --- a/src/care/KeyValueSorter_impl.h +++ b/src/care/KeyValueSorter_impl.h @@ -238,11 +238,13 @@ sortKeyValueArrays(host_device_ptr & keys, auto * rawKeyResult = keyGetter.getRawArrayData(keyResult); auto * rawValueResult = valueGetter.getRawArrayData(valueResult); - auto custom_comparator = [] CARE_HOST_DEVICE (decltype(*rawKeyData) lhs, decltype(*rawKeyData) rhs) { + using RawKeyType = std::remove_reference_t; + + auto custom_comparator = [] CARE_HOST_DEVICE (const RawKeyType& lhs, + const RawKeyType& rhs) { return lhs < rhs; }; - // Get the temp storage length char * d_temp_storage = nullptr; size_t temp_storage_bytes = 0; diff --git a/src/care/LoopFuser.h b/src/care/LoopFuser.h index 25087de0..e23bd562 100644 --- a/src/care/LoopFuser.h +++ b/src/care/LoopFuser.h @@ -642,21 +642,21 @@ class LoopFuser : public FusedActions { /// in the event that a user wants to maintain multiple independent /// LoopFuser objects. /////////////////////////////////////////////////////////////////////////// - CARE_DLL_API LoopFuser(allocator); + CARE_DLL_API LoopFuser(allocator); /////////////////////////////////////////////////////////////////////////// /// @author Peter Robinson /// @brief The destructor. /////////////////////////////////////////////////////////////////////////// - CARE_DLL_API ~LoopFuser(); + CARE_DLL_API ~LoopFuser(); /////////////////////////////////////////////////////////////////////////// /// @author Peter Robinson /// @brief gets a static singleton instance of a LoopFuser. /// @return The default instance. /////////////////////////////////////////////////////////////////////////// - CARE_DLL_API static LoopFuser * getInstance(); + CARE_DLL_API static LoopFuser* getInstance(); /////////////////////////////////////////////////////////////////////////// /// @author Peter Robinson diff --git a/src/care/algorithm_decl.h b/src/care/algorithm_decl.h index cbc7a44d..3b84815c 100644 --- a/src/care/algorithm_decl.h +++ b/src/care/algorithm_decl.h @@ -115,8 +115,8 @@ T ArrayMaxLoc(care::host_device_ptr arr, int n, T initVal, int & loc); template int ArrayFind(care::host_device_ptr arr, const int len, const T val, const int start = 0) ; -template -T PickAndPerformSum(care::host_device_ptr arr, care::host_device_ptr mask, care::host_device_ptr subset, int n); +template +ReturnType PickAndPerformSum(care::host_device_ptr arr, care::host_device_ptr mask, care::host_device_ptr subset, int n); template int FindIndexMinAboveThresholds(care::host_device_ptr arr, int n, @@ -166,20 +166,20 @@ int PickAndPerformFindMaxIndex(care::host_device_ptr arr, template int ArrayCount(care::host_device_ptr arr, int length, T val); -template -T ArraySum(care::host_device_ptr arr, int n, T initVal); +template +ReturnType ArraySum(care::host_device_ptr arr, int n, T initVal); -template -T ArraySumSubset(care::host_device_ptr arr, care::host_device_ptr subset, int n, T initVal); +template +ReturnType ArraySumSubset(care::host_device_ptr arr, care::host_device_ptr subset, int n, T initVal); -template -T SumArrayOrArraySubset(care::host_device_ptr arr, care::host_device_ptr subset, int n); +template +ReturnType SumArrayOrArraySubset(care::host_device_ptr arr, care::host_device_ptr subset, int n); -template -T ArrayMaskedSumSubset(care::host_device_ptr arr, care::host_device_ptr mask, care::host_device_ptr subset, int n, T initVal); +template +ReturnType ArrayMaskedSumSubset(care::host_device_ptr arr, care::host_device_ptr mask, care::host_device_ptr subset, int n, T initVal); -template -T ArrayMaskedSum(care::host_device_ptr arr, care::host_device_ptr mask, int n, T initVal); +template +ReturnType ArrayMaskedSum(care::host_device_ptr arr, care::host_device_ptr mask, int n, T initVal); template int FindIndexGT(care::host_device_ptr arr, int n, T limit); @@ -364,19 +364,19 @@ int uniqArray(RAJADeviceExec exec, care::host_device_ptr & Array, size_t len, template void sort_uniq(Exec e, care::host_device_ptr * array, int * len, bool noCopy = false); -enum class compress_array { removed_list, mapping_list }; +enum class compress_array { removed_list, mapping_list, remove_flag_list, keep_flag_list }; template -void CompressArray(RAJA::seq_exec, care::host_device_ptr & arr, const int arrLen, - care::host_device_ptr list, const int listLen, const care::compress_array listType, bool realloc=false); +int CompressArray(RAJA::seq_exec, care::host_device_ptr & arr, const int arrLen, + care::host_device_ptr list, const int listLen, const care::compress_array listType, bool realloc=false); #ifdef CARE_PARALLEL_DEVICE template -void CompressArray(RAJADeviceExec exec, care::host_device_ptr & arr, const int arrLen, - care::host_device_ptr list, const int listLen, const care::compress_array listType, bool realloc=false); +int CompressArray(RAJADeviceExec exec, care::host_device_ptr & arr, const int arrLen, + care::host_device_ptr list, const int listLen, const care::compress_array listType, bool realloc=false); #endif // defined(CARE_PARALLEL_DEVICE) template -void CompressArray(care::host_device_ptr & arr, const int arrLen, - care::host_device_ptr list, const int listLen, const care::compress_array listType, bool realloc=false); +int CompressArray(care::host_device_ptr & arr, const int arrLen, + care::host_device_ptr list, const int listLen, const care::compress_array listType, bool realloc=false); template CARE_HOST_DEVICE void InsertionSort(care::local_ptr array, int len); @@ -496,4 +496,3 @@ void ExpandArrayInPlace(RAJADeviceExec, care::host_device_ptr array, care::ho } // end namespace care #endif // !defined(CARE_ALGORITHM_DECL_H) - diff --git a/src/care/algorithm_impl.h b/src/care/algorithm_impl.h index 8d635202..a686d23a 100644 --- a/src/care/algorithm_impl.h +++ b/src/care/algorithm_impl.h @@ -21,6 +21,7 @@ #include "care/CHAIDataGetter.h" #include "care/DefaultMacros.h" #include "care/scan.h" +#include "care/compress_algorithm_impl.h" // Other library headers #if defined(__CUDACC__) @@ -862,162 +863,6 @@ CARE_INLINE void sort_uniq(Exec e, care::host_device_ptr * array, int * len, *len = uniqArray(e, *array, *len, noCopy); } -/************************************************************************ -* Function : CompressArray -* Author(s) : Peter Robinson, Benjamin Liu -* Purpose : Compress an array based on list of array indices. -* Based on listType, the list is either -* removed_list: a list of indices to remove -* or -* mapping_list: a mapping from compressed indices to original indices. -* All entries in list must be > 0 and < arrLen. -* If the realloc parameter is true, arr will be resized/reallocated to -* the compressed size. -* Thread safe version of CompressArray. -* Note that thread safe version only requires list to be sorted, -* and only if listType == removed_list is true. -**************************************************************************/ -#ifdef CARE_PARALLEL_DEVICE -template -CARE_INLINE void CompressArray(RAJADeviceExec exec, care::host_device_ptr & arr, const int arrLen, - care::host_device_ptr list, const int listLen, - const care::compress_array listType, bool realloc) -{ - //GPU VERSION - if (listType == care::compress_array::removed_list) { - care::host_device_ptr tmp(arrLen-listLen, "CompressArray_tmp"); - int numKept = 0; - SCAN_LOOP(i, 0, arrLen, pos, numKept, - -1 == BinarySearch(list, 0, listLen, i)) { - tmp[pos] = arr[i]; - } SCAN_LOOP_END(arrLen, pos, numKept) - -#ifdef CARE_DEBUG - int numRemoved = arrLen - numKept; - if (listLen != numRemoved) { - printf("Warning in CompressArray: did not remove expected number of members!\n"); - } -#endif - if (realloc) { - arr.free(); - arr = tmp; - } - else { - ArrayCopy(exec, arr, reinterpret_cast &>(tmp), numKept); - tmp.free(); - } - } - else { - care::host_device_ptr tmp(arrLen, "CompressArray tmp"); - ArrayCopy(tmp, arr, arrLen); - if (realloc) { - arr.realloc(listLen) ; - } - CARE_STREAM_LOOP(newIndex, 0, listLen) { - int oldIndex = list[newIndex] ; - arr[newIndex] = tmp[oldIndex] ; - } CARE_STREAM_LOOP_END - tmp.free(); - } -} - -#endif // defined(CARE_PARALLEL_DEVICE) - -/************************************************************************ -* Function : CompressArray -* Author(s) : Peter Robinson, Benjamin Liu -* Purpose : Compress an array based on list of array indices. -* Based on listType, the list is either -* removed_list: a list of indices to remove -* or -* mapping_list: a mapping from compressed indices to original indices. -* All entries in list must be > 0 and < arrLen. -* If the realloc parameter is true, arr will be resized/reallocated to -* the compressed size. -* Sequential Version of CompressArray -* Requires both arr and list to be sorted. -**************************************************************************/ -template -CARE_INLINE void CompressArray(RAJA::seq_exec, care::host_device_ptr & arr, const int arrLen, - care::host_device_ptr list, const int listLen, - const care::compress_array listType, bool realloc) -{ - // CPU VERSION - if (listType == care::compress_array::removed_list) { - int readLoc; - int writeLoc = 0, numRemoved = 0; - care::host_ptr listHost = list ; - care::host_ptr arrHost = arr ; -#ifdef CARE_DEBUG - if (listHost[listLen-1] > arrLen-1) { - printf("Warning in CompressArray seq_exec: asking to remove entries not in array!\n"); - } -#endif - for (readLoc = 0; readLoc < arrLen; ++readLoc) { - if ((numRemoved == listLen) || (readLoc < listHost[numRemoved])) { - arrHost[writeLoc++] = arrHost[readLoc]; - } - else if (readLoc == listHost[numRemoved]) { - ++numRemoved; - } -#ifdef CARE_DEBUG - else { - printf("Warning in CompressArray seq_exec: list of removed members not sorted!\n"); - } -#endif - } -#ifdef CARE_DEBUG - if ((listLen != numRemoved) || (writeLoc != arrLen - listLen)) { - printf("CompressArray seq_exec: did not remove expected number of members!\n"); - } -#endif - if (realloc) { - arr.realloc(arrLen - listLen) ; - } - } - else { - CARE_SEQUENTIAL_LOOP(newIndex, 0, listLen) { - int oldIndex = list[newIndex] ; -#ifdef CARE_DEBUG - if (oldIndex > arrLen-1 || oldIndex < 0) { - printf("Warning in CompressArray seq_exec: asking to remove entries not in array!\n"); - } -#endif - arr[newIndex] = arr[oldIndex] ; - } CARE_SEQUENTIAL_LOOP_END - if (realloc) { - arr.realloc(listLen) ; - } - } -} - -/************************************************************************ -* Function : CompressArray -* Author(s) : Peter Robinson, Benjamin Liu -* Purpose : Compress an array based on list of array indices. -* Based on listType, the list is either -* removed_list: a list of indices to remove -* or -* mapping_list: a mapping from compressed indices to original indices. -* All entries in list must be > 0 and < arrLen. -* If the realloc parameter is true, arr will be resized/reallocated to -* the compressed size. -* Both arr and list should be sorted to support the sequential -* implementation. -**************************************************************************/ -template -CARE_INLINE void CompressArray(care::host_device_ptr & arr, const int arrLen, - care::host_device_ptr list, const int listLen, - const care::compress_array listType, bool realloc) -{ -#ifdef CARE_DEBUG - checkSorted(arr, arrLen, "CompressArray", "arr") ; - checkSorted(list, listLen, "CompressArray", "list") ; -#endif - return CompressArray(RAJAExec(), arr, arrLen, list, listLen, listType, realloc); -} - - /************************************************************************ * Function : uniqLocal * Author(s) : Benjamin Liu @@ -1438,15 +1283,15 @@ CARE_INLINE int ArrayCount(care::host_device_ptr arr, int length, T val * Author(s) : Peter Robinson * Purpose : Returns the sum of all values in a ManagedArray * ************************************************************************/ -template -CARE_INLINE T ArraySum(care::host_device_ptr arr, int n, T initVal) +template +CARE_INLINE ReturnType ArraySum(care::host_device_ptr arr, int n, T initVal) { ReduceType iVal = initVal; RAJAReduceSum sum { iVal }; CARE_REDUCE_LOOP(k, 0, n) { sum += arr[k]; } CARE_REDUCE_LOOP_END - return (T) (ReduceType) sum; + return (ReturnType) (ReduceType) sum; } /************************************************************************ @@ -1455,8 +1300,8 @@ CARE_INLINE T ArraySum(care::host_device_ptr arr, int n, T initVal) * Purpose : Returns the sum of values in arr at indices in subset. * Note : length n refers to length of subset, not array * ************************************************************************/ -template -CARE_INLINE T ArraySumSubset(care::host_device_ptr arr, +template +CARE_INLINE ReturnType ArraySumSubset(care::host_device_ptr arr, care::host_device_ptr subset, int n, T initVal) { ReduceType iVal = initVal; @@ -1464,7 +1309,7 @@ CARE_INLINE T ArraySumSubset(care::host_device_ptr arr, CARE_REDUCE_LOOP(k, 0, n) { sum += arr[subset[k]]; } CARE_REDUCE_LOOP_END - return (T) (ReduceType) sum; + return (ReturnType) (ReduceType) sum; } /************************************************************************ @@ -1472,8 +1317,8 @@ CARE_INLINE T ArraySumSubset(care::host_device_ptr arr, * Author(s) : Peter Robinson * Purpose : Returns the sum of values in arr at indices in subset. * ************************************************************************/ -template -CARE_INLINE T ArrayMaskedSumSubset(care::host_device_ptr arr, +template +CARE_INLINE ReturnType ArrayMaskedSumSubset(care::host_device_ptr arr, care::host_device_ptr mask, care::host_device_ptr subset, int n, T initVal) @@ -1486,7 +1331,7 @@ CARE_INLINE T ArrayMaskedSumSubset(care::host_device_ptr arr, sum += arr[ndx]; } } CARE_REDUCE_LOOP_END - return (T) (ReduceType) sum; + return (ReturnType) (ReduceType) sum; } /************************************************************************ @@ -1494,8 +1339,8 @@ CARE_INLINE T ArrayMaskedSumSubset(care::host_device_ptr arr, * Author(s) : Peter Robinson * Purpose : Returns the sum of values in arr at indices where mask is 0. * ************************************************************************/ -template -CARE_INLINE T ArrayMaskedSum(care::host_device_ptr arr, +template +CARE_INLINE ReturnType ArrayMaskedSum(care::host_device_ptr arr, care::host_device_ptr mask, int n, T initVal) { @@ -1506,7 +1351,7 @@ CARE_INLINE T ArrayMaskedSum(care::host_device_ptr arr, sum += arr[i] * T(mask[i] == 0); } CARE_STREAM_LOOP_END - return (T) (ReduceType) sum ; + return (ReturnType) (ReduceType) sum ; } /************************************************************************ @@ -1690,15 +1535,15 @@ CARE_INLINE care::host_device_ptr ArrayDup(RAJA::seq_exec, const T* from, int // SumIntArray. // @author Peter Robinson // -template -CARE_INLINE T SumArrayOrArraySubset(care::host_device_ptr arr, +template +CARE_INLINE ReturnType SumArrayOrArraySubset(care::host_device_ptr arr, care::host_device_ptr subset, int n) { if (subset) { - return ArraySumSubset(arr, subset, n, T(0)); + return ArraySumSubset(arr, subset, n, T(0)); } else { - return ArraySum(arr, n, T(0)); + return ArraySum(arr, n, T(0)); } } @@ -1710,21 +1555,21 @@ CARE_INLINE T SumArrayOrArraySubset(care::host_device_ptr arr, // @param mask Array of same length as arr // @param subset Array of length n. // -template -CARE_INLINE T PickAndPerformSum(care::host_device_ptr arr, +template +CARE_INLINE ReturnType PickAndPerformSum(care::host_device_ptr arr, care::host_device_ptr mask, care::host_device_ptr subset, int n) { if (mask) { if (subset) { - return ArrayMaskedSumSubset(arr, mask, subset, n, T(0)); + return ArrayMaskedSumSubset(arr, mask, subset, n, T(0)); } else { - return ArrayMaskedSum(arr, mask, n, T(0)); + return ArrayMaskedSum(arr, mask, n, T(0)); } } else { - return SumArrayOrArraySubset(arr, subset, n); + return SumArrayOrArraySubset(arr, subset, n); } } diff --git a/src/care/care_inst.h b/src/care/care_inst.h index 93a6492e..b1356ce4 100644 --- a/src/care/care_inst.h +++ b/src/care/care_inst.h @@ -50,6 +50,10 @@ #define CARE_TEMPLATE_ARRAY_TYPE globalID #include "care/KeyValueSorter_inst.h" +#define CARE_TEMPLATE_KEY_TYPE globalID +#define CARE_TEMPLATE_ARRAY_TYPE globalID +#include "care/KeyValueSorter_inst.h" + #if GLOBALID_IS_64BIT #define CARE_TEMPLATE_KEY_TYPE GIDTYPE #define CARE_TEMPLATE_ARRAY_TYPE int @@ -341,44 +345,44 @@ void sort_uniq(RAJA::seq_exec, care::host_device_ptr *, int *, bool) ; #ifdef CARE_PARALLEL_DEVICE CARE_EXTERN template CARE_DLL_API -void CompressArray(RAJADeviceExec, care::host_device_ptr &, const int, care::host_device_ptr, const int, const care::compress_array, bool) ; +int CompressArray(RAJADeviceExec, care::host_device_ptr &, const int, care::host_device_ptr, const int, const care::compress_array, bool) ; CARE_EXTERN template CARE_DLL_API -void CompressArray(RAJADeviceExec, care::host_device_ptr &, const int, care::host_device_ptr, const int, const care::compress_array, bool) ; +int CompressArray(RAJADeviceExec, care::host_device_ptr &, const int, care::host_device_ptr, const int, const care::compress_array, bool) ; CARE_EXTERN template CARE_DLL_API -void CompressArray(RAJADeviceExec, care::host_device_ptr &, const int, care::host_device_ptr, const int, const care::compress_array, bool) ; +int CompressArray(RAJADeviceExec, care::host_device_ptr &, const int, care::host_device_ptr, const int, const care::compress_array, bool) ; CARE_EXTERN template CARE_DLL_API -void CompressArray(RAJADeviceExec, care::host_device_ptr &, const int, care::host_device_ptr, const int, const care::compress_array, bool) ; +int CompressArray(RAJADeviceExec, care::host_device_ptr &, const int, care::host_device_ptr, const int, const care::compress_array, bool) ; #if CARE_HAVE_LLNL_GLOBALID CARE_EXTERN template CARE_DLL_API -void CompressArray(RAJADeviceExec, care::host_device_ptr &, const int, care::host_device_ptr, const int, const care::compress_array, bool) ; +int CompressArray(RAJADeviceExec, care::host_device_ptr &, const int, care::host_device_ptr, const int, const care::compress_array, bool) ; #endif #endif // defined(CARE_PARALLEL_DEVICE) CARE_EXTERN template CARE_DLL_API -void CompressArray(RAJA::seq_exec, care::host_device_ptr &, const int, care::host_device_ptr, const int, const care::compress_array, bool) ; +int CompressArray(RAJA::seq_exec, care::host_device_ptr &, const int, care::host_device_ptr, const int, const care::compress_array, bool) ; CARE_EXTERN template CARE_DLL_API -void CompressArray(RAJA::seq_exec, care::host_device_ptr &, const int, care::host_device_ptr, const int, const care::compress_array, bool) ; +int CompressArray(RAJA::seq_exec, care::host_device_ptr &, const int, care::host_device_ptr, const int, const care::compress_array, bool) ; CARE_EXTERN template CARE_DLL_API -void CompressArray(RAJA::seq_exec, care::host_device_ptr &, const int, care::host_device_ptr, const int, const care::compress_array, bool) ; +int CompressArray(RAJA::seq_exec, care::host_device_ptr &, const int, care::host_device_ptr, const int, const care::compress_array, bool) ; CARE_EXTERN template CARE_DLL_API -void CompressArray(RAJA::seq_exec, care::host_device_ptr &, const int, care::host_device_ptr, const int, const care::compress_array, bool) ; +int CompressArray(RAJA::seq_exec, care::host_device_ptr &, const int, care::host_device_ptr, const int, const care::compress_array, bool) ; #if CARE_HAVE_LLNL_GLOBALID CARE_EXTERN template CARE_DLL_API -void CompressArray(RAJA::seq_exec, care::host_device_ptr &, const int, care::host_device_ptr, const int, const care::compress_array, bool) ; +int CompressArray(RAJA::seq_exec, care::host_device_ptr &, const int, care::host_device_ptr, const int, const care::compress_array, bool) ; #endif CARE_EXTERN template CARE_DLL_API -void CompressArray(care::host_device_ptr &, const int, care::host_device_ptr, const int, const care::compress_array, bool) ; +int CompressArray(care::host_device_ptr &, const int, care::host_device_ptr, const int, const care::compress_array, bool) ; CARE_EXTERN template CARE_DLL_API -void CompressArray(care::host_device_ptr &, const int, care::host_device_ptr, const int, const care::compress_array, bool) ; +int CompressArray(care::host_device_ptr &, const int, care::host_device_ptr, const int, const care::compress_array, bool) ; CARE_EXTERN template CARE_DLL_API -void CompressArray(care::host_device_ptr &, const int, care::host_device_ptr, const int, const care::compress_array, bool) ; +int CompressArray(care::host_device_ptr &, const int, care::host_device_ptr, const int, const care::compress_array, bool) ; CARE_EXTERN template CARE_DLL_API -void CompressArray(care::host_device_ptr &, const int, care::host_device_ptr, const int, const care::compress_array, bool) ; +int CompressArray(care::host_device_ptr &, const int, care::host_device_ptr, const int, const care::compress_array, bool) ; #if CARE_HAVE_LLNL_GLOBALID CARE_EXTERN template CARE_DLL_API -void CompressArray(care::host_device_ptr &, const int, care::host_device_ptr, const int, const care::compress_array, bool) ; +int CompressArray(care::host_device_ptr &, const int, care::host_device_ptr, const int, const care::compress_array, bool) ; #endif /////////////////////////////////////////////////////////////////////////////// @@ -870,6 +874,8 @@ int ArrayCount(care::host_device_ptr, #ifdef CARE_PARALLEL_DEVICE +CARE_EXTERN template CARE_DLL_API +int ArraySum(care::host_device_ptr, int, bool) ; CARE_EXTERN template CARE_DLL_API int ArraySum(care::host_device_ptr, int, int) ; CARE_EXTERN template CARE_DLL_API @@ -880,6 +886,8 @@ double ArraySum(care::host_device_ptr(care::host_device_ptr, int, bool) ; CARE_EXTERN template CARE_DLL_API int ArraySum(care::host_device_ptr, int, int) ; CARE_EXTERN template CARE_DLL_API @@ -892,6 +900,8 @@ double ArraySum(care::host_device_ptr(care::host_device_ptr, care::host_device_ptr, int, bool) ; CARE_EXTERN template CARE_DLL_API int ArraySumSubset(care::host_device_ptr, care::host_device_ptr, int, int) ; CARE_EXTERN template CARE_DLL_API @@ -902,6 +912,8 @@ double ArraySumSubset(care::host_device_ptr(care::host_device_ptr, care::host_device_ptr, int, bool) ; CARE_EXTERN template CARE_DLL_API int ArraySumSubset(care::host_device_ptr, care::host_device_ptr, int, int) ; CARE_EXTERN template CARE_DLL_API @@ -914,6 +926,8 @@ double ArraySumSubset(care::host_device_ptr(care::host_device_ptr, care::host_device_ptr, care::host_device_ptr, int, bool) ; CARE_EXTERN template CARE_DLL_API int ArrayMaskedSumSubset(care::host_device_ptr, care::host_device_ptr, care::host_device_ptr, int, int) ; CARE_EXTERN template CARE_DLL_API @@ -924,6 +938,8 @@ double ArrayMaskedSumSubset(care::host_device_pt #endif // defined(CARE_PARALLEL_DEVICE) +CARE_EXTERN template CARE_DLL_API +int ArrayMaskedSumSubset(care::host_device_ptr, care::host_device_ptr, care::host_device_ptr, int, bool) ; CARE_EXTERN template CARE_DLL_API int ArrayMaskedSumSubset(care::host_device_ptr, care::host_device_ptr, care::host_device_ptr, int, int) ; CARE_EXTERN template CARE_DLL_API @@ -936,6 +952,8 @@ double ArrayMaskedSumSubset(care::host_device_pt #ifdef CARE_PARALLEL_DEVICE +CARE_EXTERN template CARE_DLL_API +int ArrayMaskedSum(care::host_device_ptr, care::host_device_ptr, int, bool) ; CARE_EXTERN template CARE_DLL_API int ArrayMaskedSum(care::host_device_ptr, care::host_device_ptr, int, int) ; CARE_EXTERN template CARE_DLL_API @@ -946,6 +964,8 @@ double ArrayMaskedSum(care::host_device_ptr(care::host_device_ptr, care::host_device_ptr, int, bool) ; CARE_EXTERN template CARE_DLL_API int ArrayMaskedSum(care::host_device_ptr, care::host_device_ptr, int, int) ; CARE_EXTERN template CARE_DLL_API @@ -958,6 +978,8 @@ double ArrayMaskedSum(care::host_device_ptr(care::host_device_ptr, int, bool) ; CARE_EXTERN template CARE_DLL_API int FindIndexGT(care::host_device_ptr, int, int) ; CARE_EXTERN template CARE_DLL_API @@ -1129,21 +1151,25 @@ care::host_device_ptr ArrayDup(RAJA::seq_exe #ifdef CARE_PARALLEL_DEVICE CARE_EXTERN template CARE_DLL_API -int SumArrayOrArraySubset(care::host_device_ptr, care::host_device_ptr, int) ; +int SumArrayOrArraySubset(care::host_device_ptr, care::host_device_ptr, int) ; CARE_EXTERN template CARE_DLL_API -float SumArrayOrArraySubset(care::host_device_ptr, care::host_device_ptr, int) ; +int SumArrayOrArraySubset(care::host_device_ptr, care::host_device_ptr, int) ; CARE_EXTERN template CARE_DLL_API -double SumArrayOrArraySubset(care::host_device_ptr, care::host_device_ptr, int) ; +float SumArrayOrArraySubset(care::host_device_ptr, care::host_device_ptr, int) ; +CARE_EXTERN template CARE_DLL_API +double SumArrayOrArraySubset(care::host_device_ptr, care::host_device_ptr, int) ; // TODO GID not implemented #endif // defined(CARE_PARALLEL_DEVICE) CARE_EXTERN template CARE_DLL_API -int SumArrayOrArraySubset(care::host_device_ptr, care::host_device_ptr, int) ; +int SumArrayOrArraySubset(care::host_device_ptr, care::host_device_ptr, int) ; +CARE_EXTERN template CARE_DLL_API +int SumArrayOrArraySubset(care::host_device_ptr, care::host_device_ptr, int) ; CARE_EXTERN template CARE_DLL_API -float SumArrayOrArraySubset(care::host_device_ptr, care::host_device_ptr, int) ; +float SumArrayOrArraySubset(care::host_device_ptr, care::host_device_ptr, int) ; CARE_EXTERN template CARE_DLL_API -double SumArrayOrArraySubset(care::host_device_ptr, care::host_device_ptr, int) ; +double SumArrayOrArraySubset(care::host_device_ptr, care::host_device_ptr, int) ; // TODO GID not implemented /////////////////////////////////////////////////////////////////////////////// @@ -1156,6 +1182,8 @@ CARE_EXTERN template CARE_DLL_API float PickAndPerformSum(care::host_device_ptr, care::host_device_ptr, care::host_device_ptr, int) ; CARE_EXTERN template CARE_DLL_API double PickAndPerformSum(care::host_device_ptr, care::host_device_ptr, care::host_device_ptr, int) ; +CARE_EXTERN template CARE_DLL_API +int PickAndPerformSum(care::host_device_ptr, care::host_device_ptr, care::host_device_ptr, int) ; // TODO GID not implemented #endif // defined(CARE_PARALLEL_DEVICE) @@ -1166,6 +1194,8 @@ CARE_EXTERN template CARE_DLL_API float PickAndPerformSum(care::host_device_ptr, care::host_device_ptr, care::host_device_ptr, int) ; CARE_EXTERN template CARE_DLL_API double PickAndPerformSum(care::host_device_ptr, care::host_device_ptr, care::host_device_ptr, int) ; +CARE_EXTERN template CARE_DLL_API +int PickAndPerformSum(care::host_device_ptr, care::host_device_ptr, care::host_device_ptr, int) ; // TODO GID not implemented /////////////////////////////////////////////////////////////////////////////// diff --git a/src/care/compress_algorithm_impl.h b/src/care/compress_algorithm_impl.h new file mode 100644 index 00000000..9c9ea8de --- /dev/null +++ b/src/care/compress_algorithm_impl.h @@ -0,0 +1,308 @@ +////////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2020-25, Lawrence Livermore National Security, LLC and CARE +// project contributors. See the CARE LICENSE file for details. +// +// SPDX-License-Identifier: BSD-3-Clause +////////////////////////////////////////////////////////////////////////////// + +#ifndef CARE_COMPRESS_ALGORITHM_IMPL_H +#define CARE_COMPRESS_ALGORITHM_IMPL_H + +// This header includes the implementations of the CARE compress algorithms. + +#include "care/algorithm_decl.h" +#include "care/CHAIDataGetter.h" +#include "care/DefaultMacros.h" +#include "care/scan.h" + +namespace care { + +/************************************************************************ +* Function : CompressArray +* Author(s) : Peter Robinson, Benjamin Liu, Extended by AI Assistant +* Purpose : Compress an array based on list of array indices or flags. +* Based on listType, the list is either: +* removed_list: a list of indices to remove +* or +* mapping_list: a mapping from compressed indices to original indices +* or +* remove_flag_list: a list of 1s and 0s where 1 means remove the element +* or +* keep_flag_list: a list of 1s and 0s where 1 means keep the element +* All entries in list must be > 0 and < arrLen for index lists. +* Flag lists must be the same length as the array. +* If the realloc parameter is true, arr will be resized/reallocated to +* the compressed size. +* Thread safe version of CompressArray. +**************************************************************************/ +#ifdef CARE_PARALLEL_DEVICE +template +CARE_INLINE int CompressArray(RAJADeviceExec exec, care::host_device_ptr & arr, const int arrLen, + care::host_device_ptr list, const int listLen, + const care::compress_array listType, bool realloc) +{ + //GPU VERSION + if (listType == care::compress_array::removed_list) { + care::host_device_ptr tmp(arrLen-listLen, "CompressArray_tmp"); + int numKept = 0; + SCAN_LOOP(i, 0, arrLen, pos, numKept, + -1 == BinarySearch(list, 0, listLen, i)) { + tmp[pos] = arr[i]; + } SCAN_LOOP_END(arrLen, pos, numKept) + +#ifdef CARE_DEBUG + int numRemoved = arrLen - numKept; + if (listLen != numRemoved) { + printf("Warning in CompressArray: did not remove expected number of members!\n"); + } +#endif + if (realloc) { + arr.free(); + arr = tmp; + } + else { + ArrayCopy(exec, arr, reinterpret_cast &>(tmp), numKept); + tmp.free(); + } + return numKept; + } + else if (listType == care::compress_array::mapping_list) { + care::host_device_ptr tmp(arrLen, "CompressArray tmp"); + ArrayCopy(tmp, arr, arrLen); + if (realloc) { + arr.realloc(listLen) ; + } + CARE_STREAM_LOOP(newIndex, 0, listLen) { + int oldIndex = list[newIndex] ; + arr[newIndex] = tmp[oldIndex] ; + } CARE_STREAM_LOOP_END + tmp.free(); + return listLen; + } + else if (listType == care::compress_array::remove_flag_list) { + // For remove_flag_list, 1 means remove the element, 0 means keep it + care::host_device_ptr keepIndices(arrLen, "CompressArray keepIndices"); + int numKept = 0; + + // First, identify which elements to keep + SCAN_LOOP(i, 0, arrLen, pos, numKept, list[i] == 0) { + keepIndices[pos] = i; + } SCAN_LOOP_END(arrLen, pos, numKept) + + // Create a temporary array to hold the kept elements + care::host_device_ptr tmp(numKept, "CompressArray_tmp"); + + // Copy the kept elements to the temporary array + CARE_STREAM_LOOP(i, 0, numKept) { + tmp[i] = arr[keepIndices[i]]; + } CARE_STREAM_LOOP_END + + if (realloc) { + arr.free(); + arr = tmp; + } + else { + ArrayCopy(exec, arr, reinterpret_cast &>(tmp), numKept); + tmp.free(); + } + + keepIndices.free(); + return numKept; + } + else if (listType == care::compress_array::keep_flag_list) { + // For keep_flag_list, 1 means keep the element, 0 means remove it + care::host_device_ptr keepIndices(arrLen, "CompressArray keepIndices"); + int numKept = 0; + + // First, identify which elements to keep + SCAN_LOOP(i, 0, arrLen, pos, numKept, list[i] == 1) { + keepIndices[pos] = i; + } SCAN_LOOP_END(arrLen, pos, numKept) + + // Create a temporary array to hold the kept elements + care::host_device_ptr tmp(numKept, "CompressArray_tmp"); + + // Copy the kept elements to the temporary array + CARE_STREAM_LOOP(i, 0, numKept) { + tmp[i] = arr[keepIndices[i]]; + } CARE_STREAM_LOOP_END + + if (realloc) { + arr.free(); + arr = tmp; + } + else { + ArrayCopy(exec, arr, reinterpret_cast &>(tmp), numKept); + tmp.free(); + } + + keepIndices.free(); + + return numKept; + + } + else { +#ifdef CARE_DEBUG + printf("Warning in CompressArray: unsupported compressArray mode!\n"); +#endif + return -1; + } +} + +#endif // defined(CARE_PARALLEL_DEVICE) + +/************************************************************************ +* Function : CompressArray +* Author(s) : Peter Robinson, Benjamin Liu, Extended by AI Assistant +* Purpose : Compress an array based on list of array indices or flags. +* Based on listType, the list is either: +* removed_list: a list of indices to remove +* or +* mapping_list: a mapping from compressed indices to original indices +* or +* remove_flag_list: a list of 1s and 0s where 1 means remove the element +* or +* keep_flag_list: a list of 1s and 0s where 1 means keep the element +* All entries in list must be > 0 and < arrLen for index lists. +* Flag lists must be the same length as the array. +* If the realloc parameter is true, arr will be resized/reallocated to +* the compressed size. +* Sequential Version of CompressArray +* Requires both arr and list to be sorted for removed_list. +**************************************************************************/ +template +CARE_INLINE int CompressArray(RAJA::seq_exec, care::host_device_ptr & arr, const int arrLen, + care::host_device_ptr list, const int listLen, + const care::compress_array listType, bool realloc) +{ + // CPU VERSION + if (listType == care::compress_array::removed_list) { + int readLoc; + int writeLoc = 0, numRemoved = 0; + care::host_ptr listHost = list ; + care::host_ptr arrHost = arr ; +#ifdef CARE_DEBUG + if (listHost[listLen-1] > arrLen-1) { + printf("Warning in CompressArray seq_exec: asking to remove entries not in array!\n"); + } +#endif + for (readLoc = 0; readLoc < arrLen; ++readLoc) { + if ((numRemoved == listLen) || (readLoc < listHost[numRemoved])) { + arrHost[writeLoc++] = arrHost[readLoc]; + } + else if (readLoc == listHost[numRemoved]) { + ++numRemoved; + } +#ifdef CARE_DEBUG + else { + printf("Warning in CompressArray seq_exec: list of removed members not sorted!\n"); + } +#endif + } +#ifdef CARE_DEBUG + if ((listLen != numRemoved) || (writeLoc != arrLen - listLen)) { + printf("CompressArray seq_exec: did not remove expected number of members!\n"); + } +#endif + if (realloc) { + arr.realloc(arrLen - listLen) ; + } + return arrLen - listLen; + } + else if (listType == care::compress_array::mapping_list) { + CARE_SEQUENTIAL_LOOP(newIndex, 0, listLen) { + int oldIndex = list[newIndex] ; +#ifdef CARE_DEBUG + if (oldIndex > arrLen-1 || oldIndex < 0) { + printf("Warning in CompressArray seq_exec: asking to remove entries not in array!\n"); + } +#endif + arr[newIndex] = arr[oldIndex] ; + } CARE_SEQUENTIAL_LOOP_END + if (realloc) { + arr.realloc(listLen) ; + } + return listLen; + } + else if (listType == care::compress_array::remove_flag_list) { + // For remove_flag_list, 1 means remove the element, 0 means keep it + care::host_ptr listHost = list; + care::host_ptr arrHost = arr; + + int writeLoc = 0; + for (int readLoc = 0; readLoc < arrLen; ++readLoc) { + if (listHost[readLoc] == 0) { // Keep this element + arrHost[writeLoc++] = arrHost[readLoc]; + } + } + + int numKept = writeLoc; + + if (realloc) { + arr.realloc(numKept); + } + return numKept; + } + else if (listType == care::compress_array::keep_flag_list) { + // For keep_flag_list, 1 means keep the element, 0 means remove it + care::host_ptr listHost = list; + care::host_ptr arrHost = arr; + + int writeLoc = 0; + for (int readLoc = 0; readLoc < arrLen; ++readLoc) { + if (listHost[readLoc] == 1) { // Keep this element + arrHost[writeLoc++] = arrHost[readLoc]; + } + } + + int numKept = writeLoc; + + if (realloc) { + arr.realloc(numKept); + } + return numKept; + } + else { +#ifdef CARE_DEBUG + printf("Warning in CompressArray: unsupported compressArray mode!\n"); +#endif + return -1; + } +} + +/************************************************************************ +* Function : CompressArray +* Author(s) : Peter Robinson, Benjamin Liu, Extended by AI Assistant +* Purpose : Compress an array based on list of array indices or flags. +* Based on listType, the list is either: +* removed_list: a list of indices to remove +* or +* mapping_list: a mapping from compressed indices to original indices +* or +* remove_flag_list: a list of 1s and 0s where 1 means remove the element +* or +* keep_flag_list: a list of 1s and 0s where 1 means keep the element +* All entries in list must be > 0 and < arrLen for index lists. +* Flag lists must be the same length as the array. +* If the realloc parameter is true, arr will be resized/reallocated to +* the compressed size. +* Both arr and list should be sorted to support the sequential +* implementation for removed_list. +**************************************************************************/ +template +CARE_INLINE int CompressArray(care::host_device_ptr & arr, const int arrLen, + care::host_device_ptr list, const int listLen, + const care::compress_array listType, bool realloc) +{ +#ifdef CARE_DEBUG + if (listType == care::compress_array::removed_list) { + checkSorted(arr, arrLen, "CompressArray", "arr") ; + checkSorted(list, listLen, "CompressArray", "list") ; + } +#endif + return CompressArray(RAJAExec(), arr, arrLen, list, listLen, listType, realloc); +} + +} // namespace care + +#endif // CARE_COMPRESS_ALGORITHM_IMPL_H \ No newline at end of file diff --git a/test/TestAlgorithm.cpp b/test/TestAlgorithm.cpp index 57146094..e5bb8ca6 100644 --- a/test/TestAlgorithm.cpp +++ b/test/TestAlgorithm.cpp @@ -274,8 +274,9 @@ TEST(algorithm, compressarray) removed[i] = i ; } CARE_SEQUENTIAL_LOOP_END - care::CompressArray(RAJA::seq_exec(), a, size, removed, removedLen, care::compress_array::removed_list, true) ; + int newSize = care::CompressArray(RAJA::seq_exec(), a, size, removed, removedLen, care::compress_array::removed_list, true); + EXPECT_EQ(newSize, size-removedLen); CARE_SEQUENTIAL_LOOP(i, 0, size-removedLen) { EXPECT_EQ(a[i], 100 + (i + removedLen)); } CARE_SEQUENTIAL_LOOP_END @@ -298,14 +299,55 @@ TEST(algorithm, compressarray) mapList[i] = i + 3 ; } CARE_SEQUENTIAL_LOOP_END - care::CompressArray(RAJA::seq_exec(), b, size, mapList, newLen, care::compress_array::mapping_list, true) ; + newSize = care::CompressArray(RAJA::seq_exec(), b, size, mapList, newLen, care::compress_array::mapping_list, true); + EXPECT_EQ(newSize, newLen); CARE_SEQUENTIAL_LOOP(i, 0, newLen) { EXPECT_EQ(b[i], 100 + (i+3)); } CARE_SEQUENTIAL_LOOP_END b.free(); mapList.free(); + + // Test CompressArray with remove_flag_list mode + care::host_device_ptr c(size, "c"); + care::host_device_ptr removeFlags(size, "removeFlags"); + + CARE_SEQUENTIAL_LOOP(i, 0, size) { + c[i] = 100 + i; + // Remove even indices + removeFlags[i] = (i % 2 == 0) ? 1 : 0; + } CARE_SEQUENTIAL_LOOP_END + + newSize = care::CompressArray(RAJA::seq_exec(), c, size, removeFlags, size, care::compress_array::remove_flag_list, true); + + EXPECT_EQ(newSize, size/2); + CARE_SEQUENTIAL_LOOP(i, 0, size/2) { + EXPECT_EQ(c[i], 100 + (i*2 + 1)); + } CARE_SEQUENTIAL_LOOP_END + + c.free(); + removeFlags.free(); + + // Test CompressArray with keep_flag_list mode + care::host_device_ptr d(size, "d"); + care::host_device_ptr keepFlags(size, "keepFlags"); + + CARE_SEQUENTIAL_LOOP(i, 0, size) { + d[i] = 100 + i; + // Keep odd indices + keepFlags[i] = (i % 2 == 1) ? 1 : 0; + } CARE_SEQUENTIAL_LOOP_END + + newSize = care::CompressArray(RAJA::seq_exec(), d, size, keepFlags, size, care::compress_array::keep_flag_list, true); + + EXPECT_EQ(newSize, size/2); + CARE_SEQUENTIAL_LOOP(i, 0, size/2) { + EXPECT_EQ(d[i], 100 + (i*2 + 1)); + } CARE_SEQUENTIAL_LOOP_END + + d.free(); + keepFlags.free(); } #if defined(CARE_GPUCC) @@ -2054,8 +2096,9 @@ GPU_TEST(algorithm, compressarray) removed[i] = i ; } CARE_STREAM_LOOP_END - care::CompressArray(RAJAExec(), a, size, removed, removedLen, care::compress_array::removed_list, true) ; + int newSize = care::CompressArray(RAJADeviceExec(), a, size, removed, removedLen, care::compress_array::removed_list, true); + EXPECT_EQ(newSize, size-removedLen); CARE_SEQUENTIAL_LOOP(i, 0, size-removedLen) { EXPECT_EQ(a[i], 100 + (i + removedLen)); } CARE_SEQUENTIAL_LOOP_END @@ -2078,16 +2121,154 @@ GPU_TEST(algorithm, compressarray) mapList[i] = i + 3 ; } CARE_STREAM_LOOP_END - care::CompressArray(RAJAExec(), b, size, mapList, newLen, care::compress_array::mapping_list, true) ; + newSize = care::CompressArray(RAJADeviceExec(), b, size, mapList, newLen, care::compress_array::mapping_list, true); + EXPECT_EQ(newSize, newLen); CARE_SEQUENTIAL_LOOP(i, 0, newLen) { EXPECT_EQ(b[i], 100 + (i+3)); } CARE_SEQUENTIAL_LOOP_END b.free(); mapList.free(); + + // Test CompressArray with remove_flag_list mode + care::host_device_ptr c(size, "c"); + care::host_device_ptr removeFlags(size, "removeFlags"); + + CARE_STREAM_LOOP(i, 0, size) { + c[i] = 100 + i; + // Remove even indices + removeFlags[i] = (i % 2 == 0) ? 1 : 0; + } CARE_STREAM_LOOP_END + + newSize = care::CompressArray(RAJADeviceExec(), c, size, removeFlags, size, care::compress_array::remove_flag_list, true); + + EXPECT_EQ(newSize, size/2); + CARE_SEQUENTIAL_LOOP(i, 0, size/2) { + EXPECT_EQ(c[i], 100 + (i*2 + 1)); + } CARE_SEQUENTIAL_LOOP_END + + c.free(); + removeFlags.free(); + + // Test CompressArray with keep_flag_list mode + care::host_device_ptr d(size, "d"); + care::host_device_ptr keepFlags(size, "keepFlags"); + + CARE_STREAM_LOOP(i, 0, size) { + d[i] = 100 + i; + // Keep odd indices + keepFlags[i] = (i % 2 == 1) ? 1 : 0; + } CARE_STREAM_LOOP_END + + newSize = care::CompressArray(RAJADeviceExec(), d, size, keepFlags, size, care::compress_array::keep_flag_list, true); + + EXPECT_EQ(newSize, size/2); + CARE_SEQUENTIAL_LOOP(i, 0, size/2) { + EXPECT_EQ(d[i], 100 + (i*2 + 1)); + } CARE_SEQUENTIAL_LOOP_END + + d.free(); + keepFlags.free(); } +#endif // CARE_GPUCC +#if defined(CARE_GPUCC) +GPU_TEST(algorithm, array_return_type) +{ + // Test ArraySum with different return type + int size = 5; + care::host_device_ptr boolArray(size, "boolArray"); + + CARE_STREAM_LOOP(i, 0, size) { + boolArray[i] = (i % 2 == 0); // true for even indices, false for odd + } CARE_STREAM_LOOP_END + + // Sum bool array but return int + int sum = care::ArraySum(boolArray, size, false); + EXPECT_EQ(sum, 3); // Should count 3 true values (indices 0, 2, 4) + + // Test ArraySumSubset with different return type + care::host_device_ptr subset(3, "subset"); + + CARE_STREAM_LOOP(i, 0, 3) { + subset[i] = i * 2; // Select indices 0, 2, 4 + } CARE_STREAM_LOOP_END + + // Sum subset of bool array but return int + sum = care::ArraySumSubset(boolArray, subset, 3, false); + EXPECT_EQ(sum, 3); // Should count 3 true values (indices 0, 2, 4) + + // Test SumArrayOrArraySubset with different return type + sum = care::SumArrayOrArraySubset(boolArray, nullptr, size); + EXPECT_EQ(sum, 3); // Should count 3 true values (indices 0, 2, 4) + + sum = care::SumArrayOrArraySubset(boolArray, subset, 3); + EXPECT_EQ(sum, 3); // Should count 3 true values (indices 0, 2, 4) + + // Test PickAndPerformSum with different return type + care::host_device_ptr mask(size, "mask"); + + CARE_STREAM_LOOP(i, 0, size) { + mask[i] = (i < 3) ? 0 : 1; // Mask out indices 3 and 4 + } CARE_STREAM_LOOP_END + + sum = care::PickAndPerformSum(boolArray, mask, nullptr, size); + EXPECT_EQ(sum, 2); // Should count 2 true values (indices 0, 2) + + boolArray.free(); + subset.free(); + mask.free(); +} #endif // CARE_GPUCC +// Test for the new ReturnType template parameter in array functions +TEST(algorithm, array_return_type) +{ + // Test ArraySum with different return type + int size = 5; + care::host_device_ptr boolArray(size, "boolArray"); + + CARE_SEQUENTIAL_LOOP(i, 0, size) { + boolArray[i] = (i % 2 == 0); // true for even indices, false for odd + } CARE_SEQUENTIAL_LOOP_END + + // Sum bool array but return int + int sum = care::ArraySum(boolArray, size, false); + EXPECT_EQ(sum, 3); // Should count 3 true values (indices 0, 2, 4) + + // Test ArraySumSubset with different return type + care::host_device_ptr subset(3, "subset"); + + CARE_SEQUENTIAL_LOOP(i, 0, 3) { + subset[i] = i * 2; // Select indices 0, 2, 4 + } CARE_SEQUENTIAL_LOOP_END + + // Sum subset of bool array but return int + sum = care::ArraySumSubset(boolArray, subset, 3, false); + EXPECT_EQ(sum, 3); // Should count 3 true values (indices 0, 2, 4) + + // Test SumArrayOrArraySubset with different return type + sum = care::SumArrayOrArraySubset(boolArray, nullptr, size); + EXPECT_EQ(sum, 3); // Should count 3 true values (indices 0, 2, 4) + + sum = care::SumArrayOrArraySubset(boolArray, subset, 3); + EXPECT_EQ(sum, 3); // Should count 3 true values (indices 0, 2, 4) + + // Test PickAndPerformSum with different return type + care::host_device_ptr mask(size, "mask"); + + CARE_SEQUENTIAL_LOOP(i, 0, size) { + mask[i] = (i < 3) ? 0 : 1; // Mask out indices 3 and 4 + } CARE_SEQUENTIAL_LOOP_END + + sum = care::PickAndPerformSum(boolArray, mask, nullptr, size); + EXPECT_EQ(sum, 2); // Should count 2 true values (indices 0, 2) + + boolArray.free(); + subset.free(); + mask.free(); +} + + diff --git a/test/TestKeyValueSorter.cpp b/test/TestKeyValueSorter.cpp index 736b2a5c..a9703c7a 100644 --- a/test/TestKeyValueSorter.cpp +++ b/test/TestKeyValueSorter.cpp @@ -256,6 +256,366 @@ GPU_TEST(KeyValueSorter, host_device_ptr_Constructor) EXPECT_EQ(sorter.value(4), 4); } CARE_HOST_KERNEL_END } - #endif // CARE_GPUCC +///////////////////////////////////////////////////////////////////////// +/// +/// @brief Test case that checks the new constructor that takes ownership +/// of keys and values arrays. +/// +///////////////////////////////////////////////////////////////////////// +TEST(KeyValueSorter, OwnershipConstructor) +{ + int length = 5; + care::host_device_ptr keys(length, "keys"); + care::host_device_ptr values(length, "values"); + + CARE_HOST_KERNEL { + keys[0] = 0; + keys[1] = 1; + keys[2] = 2; + keys[3] = 3; + keys[4] = 4; + + values[0] = 4; + values[1] = 1; + values[2] = 2; + values[3] = 0; + values[4] = 3; + } CARE_HOST_KERNEL_END + + care::KeyValueSorter sorter(length, std::move(keys), std::move(values)); + + CARE_SEQUENTIAL_LOOP(i, 0, length) { + EXPECT_EQ(sorter.key(i), i); + EXPECT_EQ(sorter.value(i), values[i]); + } CARE_SEQUENTIAL_LOOP_END + + sorter.sort(); + + CARE_HOST_KERNEL { + EXPECT_EQ(sorter.key(0), 3); + EXPECT_EQ(sorter.key(1), 1); + EXPECT_EQ(sorter.key(2), 2); + EXPECT_EQ(sorter.key(3), 4); + EXPECT_EQ(sorter.key(4), 0); + + EXPECT_EQ(sorter.value(0), 0); + EXPECT_EQ(sorter.value(1), 1); + EXPECT_EQ(sorter.value(2), 2); + EXPECT_EQ(sorter.value(3), 3); + EXPECT_EQ(sorter.value(4), 4); + } CARE_HOST_KERNEL_END +} + +///////////////////////////////////////////////////////////////////////// +/// +/// @brief Test case that checks the sortByKeyThenValue method. +/// +///////////////////////////////////////////////////////////////////////// +TEST(KeyValueSorter, SortByKeyThenValue) +{ + int length = 8; + care::host_device_ptr keys(length, "keys"); + care::host_device_ptr values(length, "values"); + + CARE_HOST_KERNEL { + // Create data with duplicate keys + keys[0] = 3; + keys[1] = 1; + keys[2] = 3; + keys[3] = 2; + keys[4] = 1; + keys[5] = 2; + keys[6] = 3; + keys[7] = 1; + + // Values are in reverse order within each key group + values[0] = 7; + values[1] = 5; + values[2] = 6; + values[3] = 3; + values[4] = 4; + values[5] = 2; + values[6] = 8; + values[7] = 1; + } CARE_HOST_KERNEL_END + + care::KeyValueSorter sorter(length, std::move(keys), std::move(values)); + + // Sort by key then by value + sorter.sortByKeyThenValue(); + + // Check that keys are sorted + CARE_HOST_KERNEL { + // Keys should be in ascending order + EXPECT_EQ(sorter.key(0), 1); + EXPECT_EQ(sorter.key(1), 1); + EXPECT_EQ(sorter.key(2), 1); + EXPECT_EQ(sorter.key(3), 2); + EXPECT_EQ(sorter.key(4), 2); + EXPECT_EQ(sorter.key(5), 3); + EXPECT_EQ(sorter.key(6), 3); + EXPECT_EQ(sorter.key(7), 3); + + // Values should be in ascending order within each key group + EXPECT_EQ(sorter.value(0), 1); + EXPECT_EQ(sorter.value(1), 4); + EXPECT_EQ(sorter.value(2), 5); + EXPECT_EQ(sorter.value(3), 2); + EXPECT_EQ(sorter.value(4), 3); + EXPECT_EQ(sorter.value(5), 6); + EXPECT_EQ(sorter.value(6), 7); + EXPECT_EQ(sorter.value(7), 8); + } CARE_HOST_KERNEL_END +} + +///////////////////////////////////////////////////////////////////////// +/// +/// @brief Test case that checks the eliminateDuplicatePairs method. +/// +///////////////////////////////////////////////////////////////////////// +TEST(KeyValueSorter, EliminateDuplicatePairs) +{ + int length = 10; + care::host_device_ptr keys(length, "keys"); + care::host_device_ptr values(length, "values"); + + CARE_HOST_KERNEL { + // Create data with duplicate key-value pairs + keys[0] = 1; + keys[1] = 2; + keys[2] = 3; + keys[3] = 1; // Duplicate of pair at index 0 + keys[4] = 2; // Duplicate of pair at index 1 + keys[5] = 4; + keys[6] = 5; + keys[7] = 3; // Duplicate of pair at index 2 + keys[8] = 4; // Duplicate of pair at index 5 + keys[9] = 6; + + values[0] = 10; + values[1] = 20; + values[2] = 30; + values[3] = 10; // Duplicate of pair at index 0 + values[4] = 20; // Duplicate of pair at index 1 + values[5] = 40; + values[6] = 50; + values[7] = 30; // Duplicate of pair at index 2 + values[8] = 40; // Duplicate of pair at index 5 + values[9] = 60; + } CARE_HOST_KERNEL_END + + care::KeyValueSorter sorter(length, std::move(keys), std::move(values)); + + // Eliminate duplicate pairs + sorter.eliminateDuplicatePairs(); + + // Check that duplicates were removed + CARE_HOST_KERNEL { + // Should have 6 unique pairs + EXPECT_EQ(sorter.len(), 6); + + // Check the unique pairs + EXPECT_EQ(sorter.key(0), 1); + EXPECT_EQ(sorter.value(0), 10); + + EXPECT_EQ(sorter.key(1), 2); + EXPECT_EQ(sorter.value(1), 20); + + EXPECT_EQ(sorter.key(2), 3); + EXPECT_EQ(sorter.value(2), 30); + + EXPECT_EQ(sorter.key(3), 4); + EXPECT_EQ(sorter.value(3), 40); + + EXPECT_EQ(sorter.key(4), 5); + EXPECT_EQ(sorter.value(4), 50); + + EXPECT_EQ(sorter.key(5), 6); + EXPECT_EQ(sorter.value(5), 60); + } CARE_HOST_KERNEL_END +} + +#if defined(CARE_GPUCC) + +///////////////////////////////////////////////////////////////////////// +/// +/// @brief GPU test case that checks the new constructor that takes ownership +/// of keys and values arrays. +/// +///////////////////////////////////////////////////////////////////////// +GPU_TEST(KeyValueSorter, OwnershipConstructor) +{ + int length = 5; + care::host_device_ptr keys(length, "keys"); + care::host_device_ptr values(length, "values"); + + CARE_GPU_KERNEL { + keys[0] = 0; + keys[1] = 1; + keys[2] = 2; + keys[3] = 3; + keys[4] = 4; + + values[0] = 4; + values[1] = 1; + values[2] = 2; + values[3] = 0; + values[4] = 3; + } CARE_GPU_KERNEL_END + + care::KeyValueSorter sorter(length, std::move(keys), std::move(values)); + + CARE_SEQUENTIAL_LOOP(i, 0, length) { + EXPECT_EQ(sorter.key(i), i); + EXPECT_EQ(sorter.value(i), values[i]); + } CARE_SEQUENTIAL_LOOP_END + + sorter.sort(); + + CARE_HOST_KERNEL { + EXPECT_EQ(sorter.key(0), 3); + EXPECT_EQ(sorter.key(1), 1); + EXPECT_EQ(sorter.key(2), 2); + EXPECT_EQ(sorter.key(3), 4); + EXPECT_EQ(sorter.key(4), 0); + + EXPECT_EQ(sorter.value(0), 0); + EXPECT_EQ(sorter.value(1), 1); + EXPECT_EQ(sorter.value(2), 2); + EXPECT_EQ(sorter.value(3), 3); + EXPECT_EQ(sorter.value(4), 4); + } CARE_HOST_KERNEL_END +} + +///////////////////////////////////////////////////////////////////////// +/// +/// @brief GPU test case that checks the sortByKeyThenValue method. +/// +///////////////////////////////////////////////////////////////////////// +GPU_TEST(KeyValueSorter, SortByKeyThenValue) +{ + int length = 8; + care::host_device_ptr keys(length, "keys"); + care::host_device_ptr values(length, "values"); + + CARE_GPU_KERNEL { + // Create data with duplicate keys + keys[0] = 3; + keys[1] = 1; + keys[2] = 3; + keys[3] = 2; + keys[4] = 1; + keys[5] = 2; + keys[6] = 3; + keys[7] = 1; + + // Values are in reverse order within each key group + values[0] = 7; + values[1] = 5; + values[2] = 6; + values[3] = 3; + values[4] = 4; + values[5] = 2; + values[6] = 8; + values[7] = 1; + } CARE_GPU_KERNEL_END + + care::KeyValueSorter sorter(length, std::move(keys), std::move(values)); + + // Sort by key then by value + sorter.sortByKeyThenValue(); + + // Check that keys are sorted + CARE_HOST_KERNEL { + // Keys should be in ascending order + EXPECT_EQ(sorter.key(0), 1); + EXPECT_EQ(sorter.key(1), 1); + EXPECT_EQ(sorter.key(2), 1); + EXPECT_EQ(sorter.key(3), 2); + EXPECT_EQ(sorter.key(4), 2); + EXPECT_EQ(sorter.key(5), 3); + EXPECT_EQ(sorter.key(6), 3); + EXPECT_EQ(sorter.key(7), 3); + + // Values should be in ascending order within each key group + EXPECT_EQ(sorter.value(0), 1); + EXPECT_EQ(sorter.value(1), 4); + EXPECT_EQ(sorter.value(2), 5); + EXPECT_EQ(sorter.value(3), 2); + EXPECT_EQ(sorter.value(4), 3); + EXPECT_EQ(sorter.value(5), 6); + EXPECT_EQ(sorter.value(6), 7); + EXPECT_EQ(sorter.value(7), 8); + } CARE_HOST_KERNEL_END +} + +///////////////////////////////////////////////////////////////////////// +/// +/// @brief GPU test case that checks the eliminateDuplicatePairs method. +/// +///////////////////////////////////////////////////////////////////////// +GPU_TEST(KeyValueSorter, EliminateDuplicatePairs) +{ + int length = 10; + care::host_device_ptr keys(length, "keys"); + care::host_device_ptr values(length, "values"); + + CARE_GPU_KERNEL { + // Create data with duplicate key-value pairs + keys[0] = 1; + keys[1] = 2; + keys[2] = 3; + keys[3] = 1; // Duplicate of pair at index 0 + keys[4] = 2; // Duplicate of pair at index 1 + keys[5] = 4; + keys[6] = 5; + keys[7] = 3; // Duplicate of pair at index 2 + keys[8] = 4; // Duplicate of pair at index 5 + keys[9] = 6; + + values[0] = 10; + values[1] = 20; + values[2] = 30; + values[3] = 10; // Duplicate of pair at index 0 + values[4] = 20; // Duplicate of pair at index 1 + values[5] = 40; + values[6] = 50; + values[7] = 30; // Duplicate of pair at index 2 + values[8] = 40; // Duplicate of pair at index 5 + values[9] = 60; + } CARE_GPU_KERNEL_END + + care::KeyValueSorter sorter(length, std::move(keys), std::move(values)); + + // Eliminate duplicate pairs + sorter.eliminateDuplicatePairs(); + + // Check that duplicates were removed + CARE_HOST_KERNEL { + // Should have 6 unique pairs + EXPECT_EQ(sorter.len(), 6); + + // Check the unique pairs + EXPECT_EQ(sorter.key(0), 1); + EXPECT_EQ(sorter.value(0), 10); + + EXPECT_EQ(sorter.key(1), 2); + EXPECT_EQ(sorter.value(1), 20); + + EXPECT_EQ(sorter.key(2), 3); + EXPECT_EQ(sorter.value(2), 30); + + EXPECT_EQ(sorter.key(3), 4); + EXPECT_EQ(sorter.value(3), 40); + + EXPECT_EQ(sorter.key(4), 5); + EXPECT_EQ(sorter.value(4), 50); + + EXPECT_EQ(sorter.key(5), 6); + EXPECT_EQ(sorter.value(5), 60); + } CARE_HOST_KERNEL_END +} + +#endif diff --git a/tpl/chai b/tpl/chai index 51e66b8a..26d56467 160000 --- a/tpl/chai +++ b/tpl/chai @@ -1 +1 @@ -Subproject commit 51e66b8a5a6caac41aa329c8643180b8e2b33e5f +Subproject commit 26d5646707e1848b0524379b12a7716e4a830a27 diff --git a/tpl/raja b/tpl/raja index ca756788..e827035c 160000 --- a/tpl/raja +++ b/tpl/raja @@ -1 +1 @@ -Subproject commit ca756788dbdd43fec2a3840389126ae94a905d5f +Subproject commit e827035c630e71a9358e2f21c2f3cf6fd5fb6605 diff --git a/tpl/umpire b/tpl/umpire index 6b0ea9ed..0372fbd6 160000 --- a/tpl/umpire +++ b/tpl/umpire @@ -1 +1 @@ -Subproject commit 6b0ea9edbbbc741c8a429768d946549cd3bd7d33 +Subproject commit 0372fbd6e1f17d7e6dd72693f8b857f3ec7559e9 diff --git a/tpl/versions.txt b/tpl/versions.txt index 72005fd9..f890b077 100644 --- a/tpl/versions.txt +++ b/tpl/versions.txt @@ -6,6 +6,6 @@ ############################################################################## BLT: 0.7.1 -CHAI: v2025.09.1 -RAJA: v2025.09.0 -Umpire: v2025.09.0 +CHAI: v2025.12.0 +RAJA: v2025.12.0 +Umpire: v2025.12.0