/****************************************************************************** * * Project: High Performance Image Reprojector * Purpose: Implementation of the GDALWarpKernel class. Implements the actual * image warping for a "chunk" of input and output imagery already * loaded into memory. * Author: Frank Warmerdam, warmerdam@pobox.com * ****************************************************************************** * Copyright (c) 2003, Frank Warmerdam * Copyright (c) 2008-2013, Even Rouault * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), * to deal in the Software without restriction, including without limitation * the rights to use, copy, modify, merge, publish, distribute, sublicense, * and/or sell copies of the Software, and to permit persons to whom the * Software is furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included * in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER * DEALINGS IN THE SOFTWARE. ****************************************************************************/ #include "cpl_port.h" #include "gdalwarper.h" #include #include #include #include #include #include #include #include #include #include #include "cpl_atomic_ops.h" #include "cpl_conv.h" #include "cpl_error.h" #include "cpl_multiproc.h" #include "cpl_progress.h" #include "cpl_string.h" #include "cpl_vsi.h" #include "cpl_worker_thread_pool.h" #include "gdal.h" #include "gdal_alg.h" #include "gdal_alg_priv.h" #include "gdal_thread_pool.h" #include "gdalwarpkernel_opencl.h" // We restrict to 64bit processors because they are guaranteed to have SSE2. // Could possibly be used too on 32bit, but we would need to check at runtime. #if defined(__x86_64) || defined(_M_X64) #include "gdalsse_priv.h" #if __SSE4_1__ #include #endif #if __SSE3__ #include #endif #endif CPL_CVSID("$Id: gdalwarpkernel.cpp 831355f692b610e0562802d39c65c015e68467d5 2021-10-28 11:30:49 +0200 Even Rouault $") constexpr double BAND_DENSITY_THRESHOLD = 0.0000000001; constexpr float SRC_DENSITY_THRESHOLD = 0.000000001f; // #define INSTANTIATE_FLOAT64_SSE2_IMPL static const int anGWKFilterRadius[] = { 0, // Nearest neighbour 1, // Bilinear 2, // Cubic Convolution (Catmull-Rom) 2, // Cubic B-Spline 3, // Lanczos windowed sinc 0, // Average 0, // Mode 0, // Reserved GRA_Gauss=7 0, // Max 0, // Min 0, // Med 0, // Q1 0, // Q3 0, // Sum 0, // RMS }; static double GWKBilinear(double dfX); static double GWKCubic(double dfX); static double GWKBSpline(double dfX); static double GWKLanczosSinc(double dfX); static const FilterFuncType apfGWKFilter[] = { nullptr, // Nearest neighbour GWKBilinear, // Bilinear GWKCubic, // Cubic Convolution (Catmull-Rom) GWKBSpline, // Cubic B-Spline GWKLanczosSinc, // Lanczos windowed sinc nullptr, // Average nullptr, // Mode nullptr, // Reserved GRA_Gauss=7 nullptr, // Max nullptr, // Min nullptr, // Med nullptr, // Q1 nullptr, // Q3 nullptr, // Sum nullptr, // RMS }; // TODO(schwehr): Can we make these functions have a const * const arg? static double GWKBilinear4Values(double* padfVals); static double GWKCubic4Values(double* padfVals); static double GWKBSpline4Values(double* padfVals); static double GWKLanczosSinc4Values(double* padfVals); static const FilterFunc4ValuesType apfGWKFilter4Values[] = { nullptr, // Nearest neighbour GWKBilinear4Values, // Bilinear GWKCubic4Values, // Cubic Convolution (Catmull-Rom) GWKBSpline4Values, // Cubic B-Spline GWKLanczosSinc4Values, // Lanczos windowed sinc nullptr, // Average nullptr, // Mode nullptr, // Reserved GRA_Gauss=7 nullptr, // Max nullptr, // Min nullptr, // Med nullptr, // Q1 nullptr, // Q3 nullptr, // Sum nullptr, // RMS }; int GWKGetFilterRadius(GDALResampleAlg eResampleAlg) { static_assert( CPL_ARRAYSIZE(anGWKFilterRadius) == GRA_LAST_VALUE + 1, "Bad size of anGWKFilterRadius" ); return anGWKFilterRadius[eResampleAlg]; } FilterFuncType GWKGetFilterFunc(GDALResampleAlg eResampleAlg) { static_assert( CPL_ARRAYSIZE(apfGWKFilter) == GRA_LAST_VALUE + 1, "Bad size of apfGWKFilter" ); return apfGWKFilter[eResampleAlg]; } FilterFunc4ValuesType GWKGetFilterFunc4Values(GDALResampleAlg eResampleAlg) { static_assert( CPL_ARRAYSIZE(apfGWKFilter4Values) == GRA_LAST_VALUE + 1, "Bad size of apfGWKFilter4Values" ); return apfGWKFilter4Values[eResampleAlg]; } #ifdef HAVE_OPENCL static CPLErr GWKOpenCLCase( GDALWarpKernel * ); #endif static CPLErr GWKGeneralCase( GDALWarpKernel * ); static CPLErr GWKRealCase( GDALWarpKernel *poWK ); static CPLErr GWKNearestNoMasksOrDstDensityOnlyByte( GDALWarpKernel *poWK ); static CPLErr GWKBilinearNoMasksOrDstDensityOnlyByte( GDALWarpKernel *poWK ); static CPLErr GWKCubicNoMasksOrDstDensityOnlyByte( GDALWarpKernel *poWK ); static CPLErr GWKCubicNoMasksOrDstDensityOnlyFloat( GDALWarpKernel *poWK ); #ifdef INSTANTIATE_FLOAT64_SSE2_IMPL static CPLErr GWKCubicNoMasksOrDstDensityOnlyDouble( GDALWarpKernel *poWK ); #endif static CPLErr GWKCubicSplineNoMasksOrDstDensityOnlyByte( GDALWarpKernel *poWK ); static CPLErr GWKNearestByte( GDALWarpKernel *poWK ); static CPLErr GWKNearestNoMasksOrDstDensityOnlyShort( GDALWarpKernel *poWK ); static CPLErr GWKBilinearNoMasksOrDstDensityOnlyShort( GDALWarpKernel *poWK ); static CPLErr GWKBilinearNoMasksOrDstDensityOnlyFloat( GDALWarpKernel *poWK ); #ifdef INSTANTIATE_FLOAT64_SSE2_IMPL static CPLErr GWKBilinearNoMasksOrDstDensityOnlyDouble( GDALWarpKernel *poWK ); #endif static CPLErr GWKCubicNoMasksOrDstDensityOnlyShort( GDALWarpKernel *poWK ); static CPLErr GWKCubicSplineNoMasksOrDstDensityOnlyShort( GDALWarpKernel *poWK ); static CPLErr GWKNearestShort( GDALWarpKernel *poWK ); static CPLErr GWKNearestNoMasksOrDstDensityOnlyFloat( GDALWarpKernel *poWK ); static CPLErr GWKNearestFloat( GDALWarpKernel *poWK ); static CPLErr GWKAverageOrMode( GDALWarpKernel * ); static CPLErr GWKCubicNoMasksOrDstDensityOnlyUShort( GDALWarpKernel * ); static CPLErr GWKCubicSplineNoMasksOrDstDensityOnlyUShort( GDALWarpKernel * ); static CPLErr GWKBilinearNoMasksOrDstDensityOnlyUShort( GDALWarpKernel * ); /************************************************************************/ /* GWKJobStruct */ /************************************************************************/ struct GWKJobStruct { std::mutex& mutex; std::condition_variable& cv; int& counter; bool& stopFlag; GDALWarpKernel *poWK; int iYMin; int iYMax; int (*pfnProgress)(GWKJobStruct* psJob); void *pTransformerArg; void (*pfnFunc)(void*); // used by GWKRun() to assign the proper pTransformerArg GWKJobStruct(std::mutex& mutex_, std::condition_variable& cv_, int& counter_, bool& stopFlag_) : mutex(mutex_), cv(cv_), counter(counter_), stopFlag(stopFlag_), poWK(nullptr), iYMin(0), iYMax(0), pfnProgress(nullptr), pTransformerArg(nullptr), pfnFunc(nullptr) {} } ; struct GWKThreadData { std::unique_ptr poJobQueue {}; std::unique_ptr> threadJobs {}; int nThreads {0}; int counter {0}; bool stopFlag {false}; std::mutex mutex {}; std::condition_variable cv {}; bool bTransformerArgInputAssignedToThread {false}; void * pTransformerArgInput {nullptr}; // owned by calling layer. Not to be destroyed std::map mapThreadToTransformerArg {}; }; /************************************************************************/ /* GWKProgressThread() */ /************************************************************************/ // Return TRUE if the computation must be interrupted. static int GWKProgressThread( GWKJobStruct* psJob ) { bool stop = false; { std::lock_guard lock(psJob->mutex); psJob->counter++; stop = psJob->stopFlag; } psJob->cv.notify_one(); return stop; } /************************************************************************/ /* GWKProgressMonoThread() */ /************************************************************************/ // Return TRUE if the computation must be interrupted. static int GWKProgressMonoThread( GWKJobStruct* psJob ) { GDALWarpKernel *poWK = psJob->poWK; if( !poWK->pfnProgress( poWK->dfProgressBase + poWK->dfProgressScale * (++psJob->counter / static_cast(psJob->iYMax)), "", poWK->pProgress ) ) { CPLError( CE_Failure, CPLE_UserInterrupt, "User terminated" ); psJob->stopFlag = true; return TRUE; } return FALSE; } /************************************************************************/ /* GWKGenericMonoThread() */ /************************************************************************/ static CPLErr GWKGenericMonoThread( GDALWarpKernel *poWK, void (*pfnFunc) (void *pUserData) ) { GWKThreadData td; // NOTE: the mutex is not used. GWKJobStruct job(td.mutex, td.cv, td.counter, td.stopFlag); job.poWK = poWK; job.iYMin = 0; job.iYMax = poWK->nDstYSize; job.pfnProgress = GWKProgressMonoThread; job.pTransformerArg = poWK->pTransformerArg; pfnFunc(&job); return td.stopFlag ? CE_Failure : CE_None; } /************************************************************************/ /* GWKThreadsCreate() */ /************************************************************************/ void* GWKThreadsCreate( char** papszWarpOptions, GDALTransformerFunc /* pfnTransformer */, void* pTransformerArg ) { const char* pszWarpThreads = CSLFetchNameValue(papszWarpOptions, "NUM_THREADS"); if( pszWarpThreads == nullptr ) pszWarpThreads = CPLGetConfigOption("GDAL_NUM_THREADS", "1"); int nThreads = 0; if( EQUAL(pszWarpThreads, "ALL_CPUS") ) nThreads = CPLGetNumCPUs(); else nThreads = atoi(pszWarpThreads); if( nThreads <= 1 ) nThreads = 0; if( nThreads > 128 ) nThreads = 128; GWKThreadData* psThreadData = new GWKThreadData(); auto poThreadPool = nThreads > 0 ? GDALGetGlobalThreadPool(nThreads) : nullptr; if( nThreads && poThreadPool ) { psThreadData->nThreads = nThreads; psThreadData->threadJobs.reset(new std::vector(nThreads, GWKJobStruct(psThreadData->mutex, psThreadData->cv, psThreadData->counter, psThreadData->stopFlag))); psThreadData->poJobQueue = poThreadPool->CreateJobQueue(); psThreadData->pTransformerArgInput = pTransformerArg; } return psThreadData; } /************************************************************************/ /* GWKThreadsEnd() */ /************************************************************************/ void GWKThreadsEnd( void* psThreadDataIn ) { if( psThreadDataIn == nullptr ) return; GWKThreadData* psThreadData = static_cast(psThreadDataIn); if( psThreadData->poJobQueue ) { for( auto& pair: psThreadData->mapThreadToTransformerArg ) { if( pair.second != psThreadData->pTransformerArgInput ) GDALDestroyTransformer(pair.second); } psThreadData->poJobQueue.reset(); } delete psThreadData; } /************************************************************************/ /* ThreadFuncAdapter() */ /************************************************************************/ static void ThreadFuncAdapter(void* pData) { GWKJobStruct* psJob = static_cast(pData); GWKThreadData* psThreadData = static_cast(psJob->poWK->psThreadData); // Look if we have already a per-thread transformer void* pTransformerArg = nullptr; const GIntBig nThreadId = CPLGetPID(); { std::lock_guard lock(psThreadData->mutex); auto oIter = psThreadData->mapThreadToTransformerArg.find(nThreadId); if (oIter != psThreadData->mapThreadToTransformerArg.end()) { pTransformerArg = oIter->second; } else if( !psThreadData->bTransformerArgInputAssignedToThread ) { // Borrow the original transformer, as it has not already been done psThreadData->bTransformerArgInputAssignedToThread = true; pTransformerArg = psThreadData->pTransformerArgInput; psThreadData->mapThreadToTransformerArg[nThreadId] = pTransformerArg; } } // If no transformer assigned to current thread, instantiate one if( pTransformerArg == nullptr ) { // This somehow assumes that GDALCloneTransformer() is thread-safe // which should normally be the case. pTransformerArg = GDALCloneTransformer(psThreadData->pTransformerArgInput); // Lock for the stop flag and the transformer map. std::lock_guard lock(psThreadData->mutex); if( !pTransformerArg ) { psJob->stopFlag = true; return; } psThreadData->mapThreadToTransformerArg[nThreadId] = pTransformerArg; } psJob->pTransformerArg = pTransformerArg; psJob->pfnFunc(pData); } /************************************************************************/ /* GWKRun() */ /************************************************************************/ static CPLErr GWKRun( GDALWarpKernel *poWK, const char* pszFuncName, void (*pfnFunc) (void *pUserData) ) { const int nDstYSize = poWK->nDstYSize; CPLDebug("GDAL", "GDALWarpKernel()::%s() " "Src=%d,%d,%dx%d Dst=%d,%d,%dx%d", pszFuncName, poWK->nSrcXOff, poWK->nSrcYOff, poWK->nSrcXSize, poWK->nSrcYSize, poWK->nDstXOff, poWK->nDstYOff, poWK->nDstXSize, poWK->nDstYSize); if( !poWK->pfnProgress( poWK->dfProgressBase, "", poWK->pProgress ) ) { CPLError( CE_Failure, CPLE_UserInterrupt, "User terminated" ); return CE_Failure; } GWKThreadData* psThreadData = static_cast(poWK->psThreadData); if( psThreadData == nullptr || psThreadData->poJobQueue == nullptr ) { return GWKGenericMonoThread(poWK, pfnFunc); } int nThreads = std::min(psThreadData->nThreads, nDstYSize / 2); // Config option mostly useful for tests to be able to test multithreading // with small rasters const int nWarpChunkSize = atoi( CPLGetConfigOption("WARP_THREAD_CHUNK_SIZE", "65536")); if( nWarpChunkSize > 0 ) { GIntBig nChunks = static_cast(nDstYSize) * poWK->nDstXSize / nWarpChunkSize; if( nThreads > nChunks ) nThreads = static_cast(nChunks); } if( nThreads <= 0 ) nThreads = 1; CPLDebug("WARP", "Using %d threads", nThreads); auto& jobs = *psThreadData->threadJobs; CPLAssert( static_cast(jobs.size()) >= nThreads ); // Fill-in job structures. for (int i = 0; i < nThreads; ++i) { auto& job = jobs[i]; job.poWK = poWK; job.iYMin = static_cast(static_cast(i) * nDstYSize / nThreads); job.iYMax = static_cast(static_cast(i + 1) * nDstYSize / nThreads); if( poWK->pfnProgress != GDALDummyProgress ) job.pfnProgress = GWKProgressThread; job.pfnFunc = pfnFunc; } { std::unique_lock lock(psThreadData->mutex); // Start jobs. for (int i = 0; i < nThreads; ++i) { auto& job = jobs[i]; psThreadData->poJobQueue->SubmitJob( ThreadFuncAdapter, static_cast(&job) ); } /* -------------------------------------------------------------------- */ /* Report progress. */ /* -------------------------------------------------------------------- */ if( poWK->pfnProgress != GDALDummyProgress ) { int& counter = psThreadData->counter; while (counter < nDstYSize) { psThreadData->cv.wait(lock); if( !poWK->pfnProgress( poWK->dfProgressBase + poWK->dfProgressScale * (counter / static_cast(nDstYSize)), "", poWK->pProgress ) ) { CPLError( CE_Failure, CPLE_UserInterrupt, "User terminated" ); psThreadData->stopFlag = true; break; } } } } /* -------------------------------------------------------------------- */ /* Wait for all jobs to complete. */ /* -------------------------------------------------------------------- */ psThreadData->poJobQueue->WaitCompletion(); return psThreadData->stopFlag ? CE_Failure : CE_None; } /************************************************************************/ /* ==================================================================== */ /* GDALWarpKernel */ /* ==================================================================== */ /************************************************************************/ /** * \class GDALWarpKernel "gdalwarper.h" * * Low level image warping class. * * This class is responsible for low level image warping for one * "chunk" of imagery. The class is essentially a structure with all * data members public - primarily so that new special-case functions * can be added without changing the class declaration. * * Applications are normally intended to interactive with warping facilities * through the GDALWarpOperation class, though the GDALWarpKernel can in * theory be used directly if great care is taken in setting up the * control data. * *

Design Issues

* * The intention is that PerformWarp() would analyze the setup in terms * of the datatype, resampling type, and validity/density mask usage and * pick one of many specific implementations of the warping algorithm over * a continuum of optimization vs. generality. At one end there will be a * reference general purpose implementation of the algorithm that supports * any data type (working internally in double precision complex), all three * resampling types, and any or all of the validity/density masks. At the * other end would be highly optimized algorithms for common cases like * nearest neighbour resampling on GDT_Byte data with no masks. * * The full set of optimized versions have not been decided but we should * expect to have at least: * - One for each resampling algorithm for 8bit data with no masks. * - One for each resampling algorithm for float data with no masks. * - One for each resampling algorithm for float data with any/all masks * (essentially the generic case for just float data). * - One for each resampling algorithm for 8bit data with support for * input validity masks (per band or per pixel). This handles the common * case of nodata masking. * - One for each resampling algorithm for float data with support for * input validity masks (per band or per pixel). This handles the common * case of nodata masking. * * Some of the specializations would operate on all bands in one pass * (especially the ones without masking would do this), while others might * process each band individually to reduce code complexity. * *

Masking Semantics

* * A detailed explanation of the semantics of the validity and density masks, * and their effects on resampling kernels is needed here. */ /************************************************************************/ /* GDALWarpKernel Data Members */ /************************************************************************/ /** * \var GDALResampleAlg GDALWarpKernel::eResample; * * Resampling algorithm. * * The resampling algorithm to use. One of GRA_NearestNeighbour, GRA_Bilinear, * GRA_Cubic, GRA_CubicSpline, GRA_Lanczos, GRA_Average, GRA_RMS, * GRA_Mode or GRA_Sum. * * This field is required. GDT_NearestNeighbour may be used as a default * value. */ /** * \var GDALDataType GDALWarpKernel::eWorkingDataType; * * Working pixel data type. * * The datatype of pixels in the source image (papabySrcimage) and * destination image (papabyDstImage) buffers. Note that operations on * some data types (such as GDT_Byte) may be much better optimized than other * less common cases. * * This field is required. It may not be GDT_Unknown. */ /** * \var int GDALWarpKernel::nBands; * * Number of bands. * * The number of bands (layers) of imagery being warped. Determines the * number of entries in the papabySrcImage, papanBandSrcValid, * and papabyDstImage arrays. * * This field is required. */ /** * \var int GDALWarpKernel::nSrcXSize; * * Source image width in pixels. * * This field is required. */ /** * \var int GDALWarpKernel::nSrcYSize; * * Source image height in pixels. * * This field is required. */ /** * \var double GDALWarpKernel::dfSrcXExtraSize; * * Number of pixels included in nSrcXSize that are present on the edges of * the area of interest to take into account the width of the kernel. * * This field is required. */ /** * \var double GDALWarpKernel::dfSrcYExtraSize; * * Number of pixels included in nSrcYExtraSize that are present on the edges of * the area of interest to take into account the height of the kernel. * * This field is required. */ /** * \var int GDALWarpKernel::papabySrcImage; * * Array of source image band data. * * This is an array of pointers (of size GDALWarpKernel::nBands) pointers * to image data. Each individual band of image data is organized as a single * block of image data in left to right, then bottom to top order. The actual * type of the image data is determined by GDALWarpKernel::eWorkingDataType. * * To access the pixel value for the (x=3, y=4) pixel (zero based) of * the second band with eWorkingDataType set to GDT_Float32 use code like * this: * * \code * float dfPixelValue; * int nBand = 2-1; // Band indexes are zero based. * int nPixel = 3; // Zero based. * int nLine = 4; // Zero based. * * assert( nPixel >= 0 && nPixel < poKern->nSrcXSize ); * assert( nLine >= 0 && nLine < poKern->nSrcYSize ); * assert( nBand >= 0 && nBand < poKern->nBands ); * dfPixelValue = ((float *) poKern->papabySrcImage[nBand]) * [nPixel + nLine * poKern->nSrcXSize]; * \endcode * * This field is required. */ /** * \var GUInt32 **GDALWarpKernel::papanBandSrcValid; * * Per band validity mask for source pixels. * * Array of pixel validity mask layers for each source band. Each of * the mask layers is the same size (in pixels) as the source image with * one bit per pixel. Note that it is legal (and common) for this to be * NULL indicating that none of the pixels are invalidated, or for some * band validity masks to be NULL in which case all pixels of the band are * valid. The following code can be used to test the validity of a particular * pixel. * * \code * int bIsValid = TRUE; * int nBand = 2-1; // Band indexes are zero based. * int nPixel = 3; // Zero based. * int nLine = 4; // Zero based. * * assert( nPixel >= 0 && nPixel < poKern->nSrcXSize ); * assert( nLine >= 0 && nLine < poKern->nSrcYSize ); * assert( nBand >= 0 && nBand < poKern->nBands ); * * if( poKern->papanBandSrcValid != NULL * && poKern->papanBandSrcValid[nBand] != NULL ) * { * GUInt32 *panBandMask = poKern->papanBandSrcValid[nBand]; * int iPixelOffset = nPixel + nLine * poKern->nSrcXSize; * * bIsValid = panBandMask[iPixelOffset>>5] * & (0x01 << (iPixelOffset & 0x1f)); * } * \endcode */ /** * \var GUInt32 *GDALWarpKernel::panUnifiedSrcValid; * * Per pixel validity mask for source pixels. * * A single validity mask layer that applies to the pixels of all source * bands. It is accessed similarly to papanBandSrcValid, but without the * extra level of band indirection. * * This pointer may be NULL indicating that all pixels are valid. * * Note that if both panUnifiedSrcValid, and papanBandSrcValid are available, * the pixel isn't considered to be valid unless both arrays indicate it is * valid. */ /** * \var float *GDALWarpKernel::pafUnifiedSrcDensity; * * Per pixel density mask for source pixels. * * A single density mask layer that applies to the pixels of all source * bands. It contains values between 0.0 and 1.0 indicating the degree to * which this pixel should be allowed to contribute to the output result. * * This pointer may be NULL indicating that all pixels have a density of 1.0. * * The density for a pixel may be accessed like this: * * \code * float fDensity = 1.0; * int nPixel = 3; // Zero based. * int nLine = 4; // Zero based. * * assert( nPixel >= 0 && nPixel < poKern->nSrcXSize ); * assert( nLine >= 0 && nLine < poKern->nSrcYSize ); * if( poKern->pafUnifiedSrcDensity != NULL ) * fDensity = poKern->pafUnifiedSrcDensity * [nPixel + nLine * poKern->nSrcXSize]; * \endcode */ /** * \var int GDALWarpKernel::nDstXSize; * * Width of destination image in pixels. * * This field is required. */ /** * \var int GDALWarpKernel::nDstYSize; * * Height of destination image in pixels. * * This field is required. */ /** * \var GByte **GDALWarpKernel::papabyDstImage; * * Array of destination image band data. * * This is an array of pointers (of size GDALWarpKernel::nBands) pointers * to image data. Each individual band of image data is organized as a single * block of image data in left to right, then bottom to top order. The actual * type of the image data is determined by GDALWarpKernel::eWorkingDataType. * * To access the pixel value for the (x=3, y=4) pixel (zero based) of * the second band with eWorkingDataType set to GDT_Float32 use code like * this: * * \code * float dfPixelValue; * int nBand = 2-1; // Band indexes are zero based. * int nPixel = 3; // Zero based. * int nLine = 4; // Zero based. * * assert( nPixel >= 0 && nPixel < poKern->nDstXSize ); * assert( nLine >= 0 && nLine < poKern->nDstYSize ); * assert( nBand >= 0 && nBand < poKern->nBands ); * dfPixelValue = ((float *) poKern->papabyDstImage[nBand]) * [nPixel + nLine * poKern->nSrcYSize]; * \endcode * * This field is required. */ /** * \var GUInt32 *GDALWarpKernel::panDstValid; * * Per pixel validity mask for destination pixels. * * A single validity mask layer that applies to the pixels of all destination * bands. It is accessed similarly to papanUnitifiedSrcValid, but based * on the size of the destination image. * * This pointer may be NULL indicating that all pixels are valid. */ /** * \var float *GDALWarpKernel::pafDstDensity; * * Per pixel density mask for destination pixels. * * A single density mask layer that applies to the pixels of all destination * bands. It contains values between 0.0 and 1.0. * * This pointer may be NULL indicating that all pixels have a density of 1.0. * * The density for a pixel may be accessed like this: * * \code * float fDensity = 1.0; * int nPixel = 3; // Zero based. * int nLine = 4; // Zero based. * * assert( nPixel >= 0 && nPixel < poKern->nDstXSize ); * assert( nLine >= 0 && nLine < poKern->nDstYSize ); * if( poKern->pafDstDensity != NULL ) * fDensity = poKern->pafDstDensity[nPixel + nLine * poKern->nDstXSize]; * \endcode */ /** * \var int GDALWarpKernel::nSrcXOff; * * X offset to source pixel coordinates for transformation. * * See pfnTransformer. * * This field is required. */ /** * \var int GDALWarpKernel::nSrcYOff; * * Y offset to source pixel coordinates for transformation. * * See pfnTransformer. * * This field is required. */ /** * \var int GDALWarpKernel::nDstXOff; * * X offset to destination pixel coordinates for transformation. * * See pfnTransformer. * * This field is required. */ /** * \var int GDALWarpKernel::nDstYOff; * * Y offset to destination pixel coordinates for transformation. * * See pfnTransformer. * * This field is required. */ /** * \var GDALTransformerFunc GDALWarpKernel::pfnTransformer; * * Source/destination location transformer. * * The function to call to transform coordinates between source image * pixel/line coordinates and destination image pixel/line coordinates. * See GDALTransformerFunc() for details of the semantics of this function. * * The GDALWarpKern algorithm will only ever use this transformer in * "destination to source" mode (bDstToSrc=TRUE), and will always pass * partial or complete scanlines of points in the destination image as * input. This means, among other things, that it is safe to the the * approximating transform GDALApproxTransform() as the transformation * function. * * Source and destination images may be subsets of a larger overall image. * The transformation algorithms will expect and return pixel/line coordinates * in terms of this larger image, so coordinates need to be offset by * the offsets specified in nSrcXOff, nSrcYOff, nDstXOff, and nDstYOff before * passing to pfnTransformer, and after return from it. * * The GDALWarpKernel::pfnTransformerArg value will be passed as the callback * data to this function when it is called. * * This field is required. */ /** * \var void *GDALWarpKernel::pTransformerArg; * * Callback data for pfnTransformer. * * This field may be NULL if not required for the pfnTransformer being used. */ /** * \var GDALProgressFunc GDALWarpKernel::pfnProgress; * * The function to call to report progress of the algorithm, and to check * for a requested termination of the operation. It operates according to * GDALProgressFunc() semantics. * * Generally speaking the progress function will be invoked for each * scanline of the destination buffer that has been processed. * * This field may be NULL (internally set to GDALDummyProgress()). */ /** * \var void *GDALWarpKernel::pProgress; * * Callback data for pfnProgress. * * This field may be NULL if not required for the pfnProgress being used. */ /************************************************************************/ /* GDALWarpKernel() */ /************************************************************************/ GDALWarpKernel::GDALWarpKernel() : papszWarpOptions(nullptr), eResample(GRA_NearestNeighbour), eWorkingDataType(GDT_Unknown), nBands(0), nSrcXSize(0), nSrcYSize(0), dfSrcXExtraSize(0.0), dfSrcYExtraSize(0.0), papabySrcImage(nullptr), papanBandSrcValid(nullptr), panUnifiedSrcValid(nullptr), pafUnifiedSrcDensity(nullptr), nDstXSize(0), nDstYSize(0), papabyDstImage(nullptr), panDstValid(nullptr), pafDstDensity(nullptr), dfXScale(1.0), dfYScale(1.0), dfXFilter(0.0), dfYFilter(0.0), nXRadius(0), nYRadius(0), nFiltInitX(0), nFiltInitY(0), nSrcXOff(0), nSrcYOff(0), nDstXOff(0), nDstYOff(0), pfnTransformer(nullptr), pTransformerArg(nullptr), pfnProgress(GDALDummyProgress), pProgress(nullptr), dfProgressBase(0.0), dfProgressScale(1.0), padfDstNoDataReal(nullptr), psThreadData(nullptr) {} /************************************************************************/ /* ~GDALWarpKernel() */ /************************************************************************/ GDALWarpKernel::~GDALWarpKernel() {} /************************************************************************/ /* PerformWarp() */ /************************************************************************/ /** * \fn CPLErr GDALWarpKernel::PerformWarp(); * * This method performs the warp described in the GDALWarpKernel. * * @return CE_None on success or CE_Failure if an error occurs. */ CPLErr GDALWarpKernel::PerformWarp() { const CPLErr eErr = Validate(); if( eErr != CE_None ) return eErr; // See #2445 and #3079. if( nSrcXSize <= 0 || nSrcYSize <= 0 ) { if( !pfnProgress( dfProgressBase + dfProgressScale, "", pProgress ) ) { CPLError( CE_Failure, CPLE_UserInterrupt, "User terminated" ); return CE_Failure; } return CE_None; } /* -------------------------------------------------------------------- */ /* Pre-calculate resampling scales and window sizes for filtering. */ /* -------------------------------------------------------------------- */ dfXScale = static_cast(nDstXSize) / (nSrcXSize - dfSrcXExtraSize); dfYScale = static_cast(nDstYSize) / (nSrcYSize - dfSrcYExtraSize); if( nSrcXSize >= nDstXSize && nSrcXSize <= nDstXSize + dfSrcXExtraSize ) dfXScale = 1.0; if( nSrcYSize >= nDstYSize && nSrcYSize <= nDstYSize + dfSrcYExtraSize ) dfYScale = 1.0; if( dfXScale < 1.0 ) { double dfXReciprocalScale = 1.0 / dfXScale; const int nXReciprocalScale = static_cast(dfXReciprocalScale + 0.5); if( fabs(dfXReciprocalScale-nXReciprocalScale) < 0.05 ) dfXScale = 1.0 / nXReciprocalScale; } if( dfYScale < 1.0 ) { double dfYReciprocalScale = 1.0 / dfYScale; const int nYReciprocalScale = static_cast(dfYReciprocalScale + 0.5); if( fabs(dfYReciprocalScale-nYReciprocalScale) < 0.05 ) dfYScale = 1.0 / nYReciprocalScale; } // XSCALE and YSCALE undocumented for now. Can help in some cases. // Best would probably be a per-pixel scale computation. const char* pszXScale = CSLFetchNameValue(papszWarpOptions, "XSCALE"); if( pszXScale != nullptr && !EQUAL(pszXScale, "FROM_GRID_SAMPLING") ) dfXScale = CPLAtof(pszXScale); const char* pszYScale = CSLFetchNameValue(papszWarpOptions, "YSCALE"); if( pszYScale != nullptr ) dfYScale = CPLAtof(pszYScale); // If the xscale is significantly lower than the yscale, this is highly // suspicious of a situation of wrapping a very large virtual file in // geographic coordinates with left and right parts being close to the // antimeridian. In that situation, the xscale computed by the above method // is completely wrong. Prefer doing an average of a few sample points // instead if( (dfYScale / dfXScale > 100 || (pszXScale != nullptr && EQUAL(pszXScale, "FROM_GRID_SAMPLING"))) ) { // Sample points along a grid const int nPointsX = std::min(10, nDstXSize); const int nPointsY = std::min(10, nDstYSize); const int nPoints = 3 * nPointsX * nPointsY; std::vector padfX; std::vector padfY; std::vector padfZ(nPoints); std::vector pabSuccess(nPoints); for(int iY = 0; iY < nPointsY; iY++) { for(int iX = 0; iX < nPointsX; iX++) { const double dfX = nPointsX == 1 ? 0.0 : static_cast(iX) * nDstXSize / (nPointsX - 1); const double dfY = nPointsY == 1 ? 0.0 : static_cast(iY) * nDstYSize / (nPointsY - 1); // Reproject each destination sample point and its neighbours // at (x+1,y) and (x,y+1), so as to get the local scale. padfX.push_back( dfX ); padfY.push_back( dfY ); padfX.push_back( (iX == nPointsX - 1) ? dfX - 1 : dfX + 1 ); padfY.push_back( dfY); padfX.push_back( dfX ); padfY.push_back( (iY == nPointsY - 1) ? dfY - 1 : dfY + 1 ); } } pfnTransformer(pTransformerArg, TRUE, nPoints, &padfX[0], &padfY[0], &padfZ[0], &pabSuccess[0]); // Compute the xscale at each sampling point std::vector adfXScales; for( int i = 0; i < nPoints; i+=3 ) { if( pabSuccess[i] && pabSuccess[i+1] && pabSuccess[i+2] ) { const double dfPointXScale = 1.0 / std::max( std::abs(padfX[i+1] - padfX[i]), std::abs(padfX[i+2] - padfX[i]) ); adfXScales.push_back(dfPointXScale); } } // Sort by increasing xcale std::sort(adfXScales.begin(), adfXScales.end()); if( !adfXScales.empty() ) { // Compute the average of scales, but eliminate outliers small // scales, if some samples are just along the discontinuity. const double dfMaxPointXScale = adfXScales.back(); double dfSumPointXScale = 0; int nCountPointScale = 0; for( double dfPointXScale : adfXScales ) { if( dfPointXScale > dfMaxPointXScale / 10 ) { dfSumPointXScale += dfPointXScale; nCountPointScale ++; } } if( nCountPointScale > 0 ) // should always be true { const double dfXScaleFromSampling = dfSumPointXScale / nCountPointScale; #if DEBUG_VERBOSE CPLDebug("WARP", "Correcting dfXScale from %f to %f", dfXScale, dfXScaleFromSampling); #endif dfXScale = dfXScaleFromSampling; } } } #if DEBUG_VERBOSE CPLDebug("WARP", "dfXScale = %f, dfYScale = %f", dfXScale, dfYScale); #endif const int bUse4SamplesFormula = dfXScale >= 0.95 && dfYScale >= 0.95; // Safety check for callers that would use GDALWarpKernel without using // GDALWarpOperation. if( (eResample == GRA_CubicSpline || eResample == GRA_Lanczos || ((eResample == GRA_Cubic || eResample == GRA_Bilinear) && !bUse4SamplesFormula)) && atoi(CSLFetchNameValueDef(papszWarpOptions, "EXTRA_ELTS", "0") ) != WARP_EXTRA_ELTS ) { CPLError( CE_Failure, CPLE_AppDefined, "Source arrays must have WARP_EXTRA_ELTS extra elements at " "their end. " "See GDALWarpKernel class definition. If this condition is " "fulfilled, define a EXTRA_ELTS=%d warp options", WARP_EXTRA_ELTS); return CE_Failure; } dfXFilter = anGWKFilterRadius[eResample]; dfYFilter = anGWKFilterRadius[eResample]; nXRadius = dfXScale < 1.0 ? static_cast(ceil(dfXFilter / dfXScale)) : static_cast(dfXFilter); nYRadius = dfYScale < 1.0 ? static_cast(ceil(dfYFilter / dfYScale)) : static_cast(dfYFilter); // Filter window offset depends on the parity of the kernel radius. nFiltInitX = ((anGWKFilterRadius[eResample] + 1) % 2) - nXRadius; nFiltInitY = ((anGWKFilterRadius[eResample] + 1) % 2) - nYRadius; bApplyVerticalShift = CPLFetchBool(papszWarpOptions, "APPLY_VERTICAL_SHIFT", false); dfMultFactorVerticalShit = CPLAtof( CSLFetchNameValueDef(papszWarpOptions, "MULT_FACTOR_VERTICAL_SHIFT", "1.0") ); /* -------------------------------------------------------------------- */ /* Set up resampling functions. */ /* -------------------------------------------------------------------- */ if( CPLFetchBool( papszWarpOptions, "USE_GENERAL_CASE", false ) ) return GWKGeneralCase( this ); #if defined(HAVE_OPENCL) if( (eWorkingDataType == GDT_Byte || eWorkingDataType == GDT_CInt16 || eWorkingDataType == GDT_UInt16 || eWorkingDataType == GDT_Int16 || eWorkingDataType == GDT_CFloat32 || eWorkingDataType == GDT_Float32) && (eResample == GRA_Bilinear || eResample == GRA_Cubic || eResample == GRA_CubicSpline || eResample == GRA_Lanczos) && !bApplyVerticalShift && CPLFetchBool( papszWarpOptions, "USE_OPENCL", true ) ) { const CPLErr eResult = GWKOpenCLCase( this ); // CE_Warning tells us a suitable OpenCL environment was not available // so we fall through to other CPU based methods. if( eResult != CE_Warning ) return eResult; } #endif // defined HAVE_OPENCL const bool bNoMasksOrDstDensityOnly = papanBandSrcValid == nullptr && panUnifiedSrcValid == nullptr && pafUnifiedSrcDensity == nullptr && panDstValid == nullptr; if( eWorkingDataType == GDT_Byte && eResample == GRA_NearestNeighbour && bNoMasksOrDstDensityOnly ) return GWKNearestNoMasksOrDstDensityOnlyByte( this ); if( eWorkingDataType == GDT_Byte && eResample == GRA_Bilinear && bNoMasksOrDstDensityOnly ) return GWKBilinearNoMasksOrDstDensityOnlyByte( this ); if( eWorkingDataType == GDT_Byte && eResample == GRA_Cubic && bNoMasksOrDstDensityOnly ) return GWKCubicNoMasksOrDstDensityOnlyByte( this ); if( eWorkingDataType == GDT_Byte && eResample == GRA_CubicSpline && bNoMasksOrDstDensityOnly ) return GWKCubicSplineNoMasksOrDstDensityOnlyByte( this ); if( eWorkingDataType == GDT_Byte && eResample == GRA_NearestNeighbour ) return GWKNearestByte( this ); if( (eWorkingDataType == GDT_Int16 || eWorkingDataType == GDT_UInt16) && eResample == GRA_NearestNeighbour && bNoMasksOrDstDensityOnly ) return GWKNearestNoMasksOrDstDensityOnlyShort( this ); if( (eWorkingDataType == GDT_Int16 ) && eResample == GRA_Cubic && bNoMasksOrDstDensityOnly ) return GWKCubicNoMasksOrDstDensityOnlyShort( this ); if( (eWorkingDataType == GDT_Int16 ) && eResample == GRA_CubicSpline && bNoMasksOrDstDensityOnly ) return GWKCubicSplineNoMasksOrDstDensityOnlyShort( this ); if( (eWorkingDataType == GDT_Int16 ) && eResample == GRA_Bilinear && bNoMasksOrDstDensityOnly ) return GWKBilinearNoMasksOrDstDensityOnlyShort( this ); if( (eWorkingDataType == GDT_UInt16 ) && eResample == GRA_Cubic && bNoMasksOrDstDensityOnly ) return GWKCubicNoMasksOrDstDensityOnlyUShort( this ); if( (eWorkingDataType == GDT_UInt16 ) && eResample == GRA_CubicSpline && bNoMasksOrDstDensityOnly ) return GWKCubicSplineNoMasksOrDstDensityOnlyUShort( this ); if( (eWorkingDataType == GDT_UInt16 ) && eResample == GRA_Bilinear && bNoMasksOrDstDensityOnly ) return GWKBilinearNoMasksOrDstDensityOnlyUShort( this ); if( (eWorkingDataType == GDT_Int16 || eWorkingDataType == GDT_UInt16) && eResample == GRA_NearestNeighbour ) return GWKNearestShort( this ); if( eWorkingDataType == GDT_Float32 && eResample == GRA_NearestNeighbour && bNoMasksOrDstDensityOnly ) return GWKNearestNoMasksOrDstDensityOnlyFloat( this ); if( eWorkingDataType == GDT_Float32 && eResample == GRA_NearestNeighbour ) return GWKNearestFloat( this ); if( eWorkingDataType == GDT_Float32 && eResample == GRA_Bilinear && bNoMasksOrDstDensityOnly ) return GWKBilinearNoMasksOrDstDensityOnlyFloat( this ); if( eWorkingDataType == GDT_Float32 && eResample == GRA_Cubic && bNoMasksOrDstDensityOnly ) return GWKCubicNoMasksOrDstDensityOnlyFloat( this ); #ifdef INSTANTIATE_FLOAT64_SSE2_IMPL if( eWorkingDataType == GDT_Float64 && eResample == GRA_Bilinear && bNoMasksOrDstDensityOnly ) return GWKBilinearNoMasksOrDstDensityOnlyDouble( this ); if( eWorkingDataType == GDT_Float64 && eResample == GRA_Cubic && bNoMasksOrDstDensityOnly ) return GWKCubicNoMasksOrDstDensityOnlyDouble( this ); #endif if( eResample == GRA_Average ) return GWKAverageOrMode( this ); if( eResample == GRA_RMS ) return GWKAverageOrMode( this ); if( eResample == GRA_Mode ) return GWKAverageOrMode( this ); if( eResample == GRA_Max ) return GWKAverageOrMode( this ); if( eResample == GRA_Min ) return GWKAverageOrMode( this ); if( eResample == GRA_Med ) return GWKAverageOrMode( this ); if( eResample == GRA_Q1 ) return GWKAverageOrMode( this ); if( eResample == GRA_Q3 ) return GWKAverageOrMode( this ); if( eResample == GRA_Sum ) return GWKAverageOrMode( this ); if (!GDALDataTypeIsComplex(eWorkingDataType)) { return GWKRealCase( this ); } return GWKGeneralCase( this ); } /************************************************************************/ /* Validate() */ /************************************************************************/ /** * \fn CPLErr GDALWarpKernel::Validate() * * Check the settings in the GDALWarpKernel, and issue a CPLError() * (and return CE_Failure) if the configuration is considered to be * invalid for some reason. * * This method will also do some standard defaulting such as setting * pfnProgress to GDALDummyProgress() if it is NULL. * * @return CE_None on success or CE_Failure if an error is detected. */ CPLErr GDALWarpKernel::Validate() { if( static_cast(eResample) >= (sizeof(anGWKFilterRadius) / sizeof(anGWKFilterRadius[0])) ) { CPLError( CE_Failure, CPLE_AppDefined, "Unsupported resampling method %d.", static_cast(eResample) ); return CE_Failure; } return CE_None; } /************************************************************************/ /* GWKOverlayDensity() */ /* */ /* Compute the final density for the destination pixel. This */ /* is a function of the overlay density (passed in) and the */ /* original density. */ /************************************************************************/ static void GWKOverlayDensity( const GDALWarpKernel *poWK, GPtrDiff_t iDstOffset, double dfDensity ) { if( dfDensity < 0.0001 || poWK->pafDstDensity == nullptr ) return; poWK->pafDstDensity[iDstOffset] = static_cast ( 1.0 - (1.0 - dfDensity) * (1.0 - poWK->pafDstDensity[iDstOffset]) ); } /************************************************************************/ /* GWKRoundValueT() */ /************************************************************************/ template struct sGWKRoundValueT { static T eval(double); }; template struct sGWKRoundValueT /* signed */ { static T eval(double dfValue) { return static_cast(floor(dfValue + 0.5)); } }; template struct sGWKRoundValueT /* unsigned */ { static T eval(double dfValue) { return static_cast(dfValue + 0.5); } }; template static T GWKRoundValueT(double dfValue) { return sGWKRoundValueT::is_signed>::eval(dfValue); } template<> float GWKRoundValueT(double dfValue) { return static_cast(dfValue); } #ifdef notused template<> double GWKRoundValueT(double dfValue) { return dfValue; } #endif /************************************************************************/ /* GWKClampValueT() */ /************************************************************************/ template static CPL_INLINE T GWKClampValueT(double dfValue) { if( dfValue < std::numeric_limits::min() ) return std::numeric_limits::min(); else if( dfValue > std::numeric_limits::max() ) return std::numeric_limits::max(); else return GWKRoundValueT(dfValue); } template<> float GWKClampValueT(double dfValue) { return static_cast(dfValue); } #ifdef notused template<> double GWKClampValueT(double dfValue) { return dfValue; } #endif /************************************************************************/ /* GWKSetPixelValueRealT() */ /************************************************************************/ template static bool GWKSetPixelValueRealT( const GDALWarpKernel *poWK, int iBand, GPtrDiff_t iDstOffset, double dfDensity, T value) { T *pDst = reinterpret_cast(poWK->papabyDstImage[iBand]); /* -------------------------------------------------------------------- */ /* If the source density is less than 100% we need to fetch the */ /* existing destination value, and mix it with the source to */ /* get the new "to apply" value. Also compute composite */ /* density. */ /* */ /* We avoid mixing if density is very near one or risk mixing */ /* in very extreme nodata values and causing odd results (#1610) */ /* -------------------------------------------------------------------- */ if( dfDensity < 0.9999 ) { if( dfDensity < 0.0001 ) return true; double dfDstDensity = 1.0; if( poWK->pafDstDensity != nullptr ) dfDstDensity = poWK->pafDstDensity[iDstOffset]; else if( poWK->panDstValid != nullptr && !((poWK->panDstValid[iDstOffset>>5] & (0x01 << (iDstOffset & 0x1f))) ) ) dfDstDensity = 0.0; // It seems like we also ought to be testing panDstValid[] here! const double dfDstReal = pDst[iDstOffset]; // The destination density is really only relative to the portion // not occluded by the overlay. const double dfDstInfluence = (1.0 - dfDensity) * dfDstDensity; const double dfReal = (value * dfDensity + dfDstReal * dfDstInfluence) / (dfDensity + dfDstInfluence); /* -------------------------------------------------------------------- */ /* Actually apply the destination value. */ /* */ /* Avoid using the destination nodata value for integer datatypes */ /* if by chance it is equal to the computed pixel value. */ /* -------------------------------------------------------------------- */ pDst[iDstOffset] = GWKClampValueT(dfReal); } else { pDst[iDstOffset] = value; } if( poWK->padfDstNoDataReal != nullptr && poWK->padfDstNoDataReal[iBand] == static_cast(pDst[iDstOffset]) ) { if( pDst[iDstOffset] == std::numeric_limits::min() ) pDst[iDstOffset] = std::numeric_limits::min() + 1; else pDst[iDstOffset]--; } return true; } /************************************************************************/ /* GWKSetPixelValue() */ /************************************************************************/ static bool GWKSetPixelValue( const GDALWarpKernel *poWK, int iBand, GPtrDiff_t iDstOffset, double dfDensity, double dfReal, double dfImag ) { GByte *pabyDst = poWK->papabyDstImage[iBand]; /* -------------------------------------------------------------------- */ /* If the source density is less than 100% we need to fetch the */ /* existing destination value, and mix it with the source to */ /* get the new "to apply" value. Also compute composite */ /* density. */ /* */ /* We avoid mixing if density is very near one or risk mixing */ /* in very extreme nodata values and causing odd results (#1610) */ /* -------------------------------------------------------------------- */ if( dfDensity < 0.9999 ) { if( dfDensity < 0.0001 ) return true; double dfDstDensity = 1.0; if( poWK->pafDstDensity != nullptr ) dfDstDensity = poWK->pafDstDensity[iDstOffset]; else if( poWK->panDstValid != nullptr && !((poWK->panDstValid[iDstOffset>>5] & (0x01 << (iDstOffset & 0x1f))) ) ) dfDstDensity = 0.0; double dfDstReal = 0.0; double dfDstImag = 0.0; // It seems like we also ought to be testing panDstValid[] here! // TODO(schwehr): Factor out this repreated type of set. switch( poWK->eWorkingDataType ) { case GDT_Byte: dfDstReal = pabyDst[iDstOffset]; dfDstImag = 0.0; break; case GDT_Int16: dfDstReal = reinterpret_cast(pabyDst)[iDstOffset]; dfDstImag = 0.0; break; case GDT_UInt16: dfDstReal = reinterpret_cast(pabyDst)[iDstOffset]; dfDstImag = 0.0; break; case GDT_Int32: dfDstReal = reinterpret_cast(pabyDst)[iDstOffset]; dfDstImag = 0.0; break; case GDT_UInt32: dfDstReal = reinterpret_cast(pabyDst)[iDstOffset]; dfDstImag = 0.0; break; case GDT_Float32: dfDstReal = reinterpret_cast(pabyDst)[iDstOffset]; dfDstImag = 0.0; break; case GDT_Float64: dfDstReal = reinterpret_cast(pabyDst)[iDstOffset]; dfDstImag = 0.0; break; case GDT_CInt16: dfDstReal = reinterpret_cast(pabyDst)[iDstOffset*2]; dfDstImag = reinterpret_cast(pabyDst)[iDstOffset*2+1]; break; case GDT_CInt32: dfDstReal = reinterpret_cast(pabyDst)[iDstOffset*2]; dfDstImag = reinterpret_cast(pabyDst)[iDstOffset*2+1]; break; case GDT_CFloat32: dfDstReal = reinterpret_cast(pabyDst)[iDstOffset*2]; dfDstImag = reinterpret_cast(pabyDst)[iDstOffset*2+1]; break; case GDT_CFloat64: dfDstReal = reinterpret_cast(pabyDst)[iDstOffset*2]; dfDstImag = reinterpret_cast(pabyDst)[iDstOffset*2+1]; break; default: CPLAssert( false ); return false; } // The destination density is really only relative to the portion // not occluded by the overlay. const double dfDstInfluence = (1.0 - dfDensity) * dfDstDensity; dfReal = (dfReal * dfDensity + dfDstReal * dfDstInfluence) / (dfDensity + dfDstInfluence); dfImag = (dfImag * dfDensity + dfDstImag * dfDstInfluence) / (dfDensity + dfDstInfluence); } /* -------------------------------------------------------------------- */ /* Actually apply the destination value. */ /* */ /* Avoid using the destination nodata value for integer datatypes */ /* if by chance it is equal to the computed pixel value. */ /* -------------------------------------------------------------------- */ // TODO(schwehr): Can we make this a template? #define CLAMP(type) \ do { \ type* _pDst = reinterpret_cast(pabyDst); \ if( dfReal < static_cast(std::numeric_limits::min()) ) \ _pDst[iDstOffset] = static_cast(std::numeric_limits::min()); \ else if( dfReal > static_cast(std::numeric_limits::max()) ) \ _pDst[iDstOffset] = static_cast(std::numeric_limits::max()); \ else \ _pDst[iDstOffset] = \ (std::numeric_limits::is_signed) ? \ static_cast(floor(dfReal + 0.5)) : \ static_cast(dfReal + 0.5); \ if( poWK->padfDstNoDataReal != nullptr && \ poWK->padfDstNoDataReal[iBand] == static_cast(_pDst[iDstOffset]) ) \ { \ if( _pDst[iDstOffset] == static_cast(std::numeric_limits::min()) ) \ _pDst[iDstOffset] = static_cast(std::numeric_limits::min() + 1); \ else \ _pDst[iDstOffset]--; \ } } while( false ) switch( poWK->eWorkingDataType ) { case GDT_Byte: CLAMP(GByte); break; case GDT_Int16: CLAMP(GInt16); break; case GDT_UInt16: CLAMP(GUInt16); break; case GDT_UInt32: CLAMP(GUInt32); break; case GDT_Int32: CLAMP(GInt32); break; case GDT_Float32: reinterpret_cast(pabyDst)[iDstOffset] = static_cast(dfReal); break; case GDT_Float64: reinterpret_cast(pabyDst)[iDstOffset] = dfReal; break; case GDT_CInt16: { typedef GInt16 T; if( dfReal < static_cast(std::numeric_limits::min()) ) reinterpret_cast(pabyDst)[iDstOffset*2] = std::numeric_limits::min(); else if( dfReal > static_cast(std::numeric_limits::max()) ) reinterpret_cast(pabyDst)[iDstOffset*2] = std::numeric_limits::max(); else reinterpret_cast(pabyDst)[iDstOffset*2] = static_cast(floor(dfReal+0.5)); if( dfImag < static_cast(std::numeric_limits::min()) ) reinterpret_cast(pabyDst)[iDstOffset*2+1] = std::numeric_limits::min(); else if( dfImag > static_cast(std::numeric_limits::max()) ) reinterpret_cast(pabyDst)[iDstOffset*2+1] = std::numeric_limits::max(); else reinterpret_cast(pabyDst)[iDstOffset*2+1] = static_cast(floor(dfImag+0.5)); break; } case GDT_CInt32: { typedef GInt32 T; if( dfReal < static_cast(std::numeric_limits::min()) ) reinterpret_cast(pabyDst)[iDstOffset*2] = std::numeric_limits::min(); else if( dfReal > static_cast(std::numeric_limits::max()) ) reinterpret_cast(pabyDst)[iDstOffset*2] = std::numeric_limits::max(); else reinterpret_cast(pabyDst)[iDstOffset*2] = static_cast(floor(dfReal+0.5)); if( dfImag < static_cast(std::numeric_limits::min()) ) reinterpret_cast(pabyDst)[iDstOffset*2+1] = std::numeric_limits::min(); else if( dfImag > static_cast(std::numeric_limits::max()) ) reinterpret_cast(pabyDst)[iDstOffset*2+1] = std::numeric_limits::max(); else reinterpret_cast(pabyDst)[iDstOffset*2+1] = static_cast(floor(dfImag+0.5)); break; } case GDT_CFloat32: reinterpret_cast(pabyDst)[iDstOffset*2] = static_cast(dfReal); reinterpret_cast(pabyDst)[iDstOffset*2+1] = static_cast(dfImag); break; case GDT_CFloat64: reinterpret_cast(pabyDst)[iDstOffset*2] = dfReal; reinterpret_cast(pabyDst)[iDstOffset*2+1] = dfImag; break; default: return false; } return true; } /************************************************************************/ /* GWKSetPixelValueReal() */ /************************************************************************/ static bool GWKSetPixelValueReal( const GDALWarpKernel *poWK, int iBand, GPtrDiff_t iDstOffset, double dfDensity, double dfReal ) { GByte *pabyDst = poWK->papabyDstImage[iBand]; /* -------------------------------------------------------------------- */ /* If the source density is less than 100% we need to fetch the */ /* existing destination value, and mix it with the source to */ /* get the new "to apply" value. Also compute composite */ /* density. */ /* */ /* We avoid mixing if density is very near one or risk mixing */ /* in very extreme nodata values and causing odd results (#1610) */ /* -------------------------------------------------------------------- */ if( dfDensity < 0.9999 ) { if( dfDensity < 0.0001 ) return true; double dfDstReal = 0.0; double dfDstDensity = 1.0; if( poWK->pafDstDensity != nullptr ) dfDstDensity = poWK->pafDstDensity[iDstOffset]; else if( poWK->panDstValid != nullptr && !((poWK->panDstValid[iDstOffset>>5] & (0x01 << (iDstOffset & 0x1f))) ) ) dfDstDensity = 0.0; // It seems like we also ought to be testing panDstValid[] here! switch( poWK->eWorkingDataType ) { case GDT_Byte: dfDstReal = pabyDst[iDstOffset]; break; case GDT_Int16: dfDstReal = reinterpret_cast(pabyDst)[iDstOffset]; break; case GDT_UInt16: dfDstReal = reinterpret_cast(pabyDst)[iDstOffset]; break; case GDT_Int32: dfDstReal = reinterpret_cast(pabyDst)[iDstOffset]; break; case GDT_UInt32: dfDstReal = reinterpret_cast(pabyDst)[iDstOffset]; break; case GDT_Float32: dfDstReal = reinterpret_cast(pabyDst)[iDstOffset]; break; case GDT_Float64: dfDstReal = reinterpret_cast(pabyDst)[iDstOffset]; break; default: CPLAssert( false ); return false; } // The destination density is really only relative to the portion // not occluded by the overlay. const double dfDstInfluence = (1.0 - dfDensity) * dfDstDensity; dfReal = (dfReal * dfDensity + dfDstReal * dfDstInfluence) / (dfDensity + dfDstInfluence); } /* -------------------------------------------------------------------- */ /* Actually apply the destination value. */ /* */ /* Avoid using the destination nodata value for integer datatypes */ /* if by chance it is equal to the computed pixel value. */ /* -------------------------------------------------------------------- */ switch( poWK->eWorkingDataType ) { case GDT_Byte: CLAMP(GByte); break; case GDT_Int16: CLAMP(GInt16); break; case GDT_UInt16: CLAMP(GUInt16); break; case GDT_UInt32: CLAMP(GUInt32); break; case GDT_Int32: CLAMP(GInt32); break; case GDT_Float32: reinterpret_cast(pabyDst)[iDstOffset] = static_cast(dfReal); break; case GDT_Float64: reinterpret_cast(pabyDst)[iDstOffset] = dfReal; break; default: return false; } return true; } /************************************************************************/ /* GWKGetPixelValue() */ /************************************************************************/ /* It is assumed that panUnifiedSrcValid has been checked before */ static bool GWKGetPixelValue( const GDALWarpKernel *poWK, int iBand, GPtrDiff_t iSrcOffset, double *pdfDensity, double *pdfReal, double *pdfImag ) { GByte *pabySrc = poWK->papabySrcImage[iBand]; if( poWK->papanBandSrcValid != nullptr && poWK->papanBandSrcValid[iBand] != nullptr && !((poWK->papanBandSrcValid[iBand][iSrcOffset>>5] & (0x01 << (iSrcOffset & 0x1f)))) ) { *pdfDensity = 0.0; return false; } // TODO(schwehr): Fix casting. switch( poWK->eWorkingDataType ) { case GDT_Byte: *pdfReal = pabySrc[iSrcOffset]; *pdfImag = 0.0; break; case GDT_Int16: *pdfReal = reinterpret_cast(pabySrc)[iSrcOffset]; *pdfImag = 0.0; break; case GDT_UInt16: *pdfReal = reinterpret_cast(pabySrc)[iSrcOffset]; *pdfImag = 0.0; break; case GDT_Int32: *pdfReal = reinterpret_cast(pabySrc)[iSrcOffset]; *pdfImag = 0.0; break; case GDT_UInt32: *pdfReal = reinterpret_cast(pabySrc)[iSrcOffset]; *pdfImag = 0.0; break; case GDT_Float32: *pdfReal = reinterpret_cast(pabySrc)[iSrcOffset]; *pdfImag = 0.0; break; case GDT_Float64: *pdfReal = reinterpret_cast(pabySrc)[iSrcOffset]; *pdfImag = 0.0; break; case GDT_CInt16: *pdfReal = reinterpret_cast(pabySrc)[iSrcOffset*2]; *pdfImag = reinterpret_cast(pabySrc)[iSrcOffset*2+1]; break; case GDT_CInt32: *pdfReal = reinterpret_cast(pabySrc)[iSrcOffset*2]; *pdfImag = reinterpret_cast(pabySrc)[iSrcOffset*2+1]; break; case GDT_CFloat32: *pdfReal = reinterpret_cast(pabySrc)[iSrcOffset*2]; *pdfImag = reinterpret_cast(pabySrc)[iSrcOffset*2+1]; break; case GDT_CFloat64: *pdfReal = reinterpret_cast(pabySrc)[iSrcOffset*2]; *pdfImag = reinterpret_cast(pabySrc)[iSrcOffset*2+1]; break; default: *pdfDensity = 0.0; return false; } if( poWK->pafUnifiedSrcDensity != nullptr ) *pdfDensity = poWK->pafUnifiedSrcDensity[iSrcOffset]; else *pdfDensity = 1.0; return *pdfDensity != 0.0; } /************************************************************************/ /* GWKGetPixelValueReal() */ /************************************************************************/ static bool GWKGetPixelValueReal( const GDALWarpKernel *poWK, int iBand, GPtrDiff_t iSrcOffset, double *pdfDensity, double *pdfReal ) { GByte *pabySrc = poWK->papabySrcImage[iBand]; if( poWK->papanBandSrcValid != nullptr && poWK->papanBandSrcValid[iBand] != nullptr && !((poWK->papanBandSrcValid[iBand][iSrcOffset>>5] & (0x01 << (iSrcOffset & 0x1f)))) ) { *pdfDensity = 0.0; return false; } switch( poWK->eWorkingDataType ) { case GDT_Byte: *pdfReal = pabySrc[iSrcOffset]; break; case GDT_Int16: *pdfReal = reinterpret_cast(pabySrc)[iSrcOffset]; break; case GDT_UInt16: *pdfReal = reinterpret_cast(pabySrc)[iSrcOffset]; break; case GDT_Int32: *pdfReal = reinterpret_cast(pabySrc)[iSrcOffset]; break; case GDT_UInt32: *pdfReal = reinterpret_cast(pabySrc)[iSrcOffset]; break; case GDT_Float32: *pdfReal = reinterpret_cast(pabySrc)[iSrcOffset]; break; case GDT_Float64: *pdfReal = reinterpret_cast(pabySrc)[iSrcOffset]; break; default: CPLAssert(false); return false; } if( poWK->pafUnifiedSrcDensity != nullptr ) *pdfDensity = poWK->pafUnifiedSrcDensity[iSrcOffset]; else *pdfDensity = 1.0; return *pdfDensity != 0.0; } /************************************************************************/ /* GWKGetPixelRow() */ /************************************************************************/ /* It is assumed that adfImag[] is set to 0 by caller code for non-complex */ /* data-types. */ static bool GWKGetPixelRow( const GDALWarpKernel *poWK, int iBand, GPtrDiff_t iSrcOffset, int nHalfSrcLen, double* padfDensity, double adfReal[], double* padfImag ) { // We know that nSrcLen is even, so we can *always* unroll loops 2x. const int nSrcLen = nHalfSrcLen * 2; bool bHasValid = false; if( padfDensity != nullptr ) { // Init the density. for( int i = 0; i < nSrcLen; i += 2 ) { padfDensity[i] = 1.0; padfDensity[i+1] = 1.0; } if( poWK->panUnifiedSrcValid != nullptr ) { for( int i = 0; i < nSrcLen; i += 2 ) { if( poWK->panUnifiedSrcValid[(iSrcOffset+i)>>5] & (0x01 << ((iSrcOffset+i) & 0x1f)) ) bHasValid = true; else padfDensity[i] = 0.0; if( poWK->panUnifiedSrcValid[(iSrcOffset+i+1)>>5] & (0x01 << ((iSrcOffset+i+1) & 0x1f)) ) bHasValid = true; else padfDensity[i+1] = 0.0; } // Reset or fail as needed. if( bHasValid ) bHasValid = false; else return false; } if( poWK->papanBandSrcValid != nullptr && poWK->papanBandSrcValid[iBand] != nullptr ) { for( int i = 0; i < nSrcLen; i += 2 ) { if( poWK->papanBandSrcValid[iBand][(iSrcOffset+i)>>5] & (0x01 << ((iSrcOffset+i) & 0x1f)) ) bHasValid = true; else padfDensity[i] = 0.0; if( poWK->papanBandSrcValid[iBand][(iSrcOffset+i+1)>>5] & (0x01 << ((iSrcOffset+i+1) & 0x1f)) ) bHasValid = true; else padfDensity[i+1] = 0.0; } // Reset or fail as needed. if( bHasValid ) bHasValid = false; else return false; } } // TODO(schwehr): Fix casting. // Fetch data. switch( poWK->eWorkingDataType ) { case GDT_Byte: { GByte* pSrc = reinterpret_cast(poWK->papabySrcImage[iBand]); pSrc += iSrcOffset; for( int i = 0; i < nSrcLen; i += 2 ) { adfReal[i] = pSrc[i]; adfReal[i+1] = pSrc[i+1]; } break; } case GDT_Int16: { GInt16* pSrc = reinterpret_cast(poWK->papabySrcImage[iBand]); pSrc += iSrcOffset; for( int i = 0; i < nSrcLen; i += 2 ) { adfReal[i] = pSrc[i]; adfReal[i+1] = pSrc[i+1]; } break; } case GDT_UInt16: { GUInt16* pSrc = reinterpret_cast(poWK->papabySrcImage[iBand]); pSrc += iSrcOffset; for( int i = 0; i < nSrcLen; i += 2 ) { adfReal[i] = pSrc[i]; adfReal[i+1] = pSrc[i+1]; } break; } case GDT_Int32: { GInt32* pSrc = reinterpret_cast(poWK->papabySrcImage[iBand]); pSrc += iSrcOffset; for( int i = 0; i < nSrcLen; i += 2 ) { adfReal[i] = pSrc[i]; adfReal[i+1] = pSrc[i+1]; } break; } case GDT_UInt32: { GUInt32* pSrc = reinterpret_cast(poWK->papabySrcImage[iBand]); pSrc += iSrcOffset; for( int i = 0; i < nSrcLen; i += 2 ) { adfReal[i] = pSrc[i]; adfReal[i+1] = pSrc[i+1]; } break; } case GDT_Float32: { float* pSrc = reinterpret_cast(poWK->papabySrcImage[iBand]); pSrc += iSrcOffset; for( int i = 0; i < nSrcLen; i += 2 ) { adfReal[i] = pSrc[i]; adfReal[i+1] = pSrc[i+1]; } break; } case GDT_Float64: { double* pSrc = reinterpret_cast(poWK->papabySrcImage[iBand]); pSrc += iSrcOffset; for( int i = 0; i < nSrcLen; i += 2 ) { adfReal[i] = pSrc[i]; adfReal[i+1] = pSrc[i+1]; } break; } case GDT_CInt16: { GInt16* pSrc = reinterpret_cast(poWK->papabySrcImage[iBand]); pSrc += 2 * iSrcOffset; for( int i = 0; i < nSrcLen; i += 2 ) { adfReal[i] = pSrc[2*i]; padfImag[i] = pSrc[2*i+1]; adfReal[i+1] = pSrc[2*i+2]; padfImag[i+1] = pSrc[2*i+3]; } break; } case GDT_CInt32: { GInt32* pSrc = reinterpret_cast(poWK->papabySrcImage[iBand]); pSrc += 2 * iSrcOffset; for( int i = 0; i < nSrcLen; i += 2 ) { adfReal[i] = pSrc[2*i]; padfImag[i] = pSrc[2*i+1]; adfReal[i+1] = pSrc[2*i+2]; padfImag[i+1] = pSrc[2*i+3]; } break; } case GDT_CFloat32: { float* pSrc = reinterpret_cast(poWK->papabySrcImage[iBand]); pSrc += 2 * iSrcOffset; for( int i = 0; i < nSrcLen; i += 2 ) { adfReal[i] = pSrc[2*i]; padfImag[i] = pSrc[2*i+1]; adfReal[i+1] = pSrc[2*i+2]; padfImag[i+1] = pSrc[2*i+3]; } break; } case GDT_CFloat64: { double* pSrc = reinterpret_cast(poWK->papabySrcImage[iBand]); pSrc += 2 * iSrcOffset; for( int i = 0; i < nSrcLen; i += 2 ) { adfReal[i] = pSrc[2*i]; padfImag[i] = pSrc[2*i+1]; adfReal[i+1] = pSrc[2*i+2]; padfImag[i+1] = pSrc[2*i+3]; } break; } default: CPLAssert(false); if( padfDensity ) memset( padfDensity, 0, nSrcLen * sizeof(double) ); return false; } if( padfDensity == nullptr ) return true; if( poWK->pafUnifiedSrcDensity == nullptr ) { for( int i = 0; i < nSrcLen; i += 2 ) { // Take into account earlier calcs. if( padfDensity[i] > SRC_DENSITY_THRESHOLD ) { padfDensity[i] = 1.0; bHasValid = true; } if( padfDensity[i+1] > SRC_DENSITY_THRESHOLD ) { padfDensity[i+1] = 1.0; bHasValid = true; } } } else { for( int i = 0; i < nSrcLen; i += 2 ) { if( padfDensity[i] > SRC_DENSITY_THRESHOLD ) padfDensity[i] = poWK->pafUnifiedSrcDensity[iSrcOffset+i]; if( padfDensity[i] > SRC_DENSITY_THRESHOLD ) bHasValid = true; if( padfDensity[i+1] > SRC_DENSITY_THRESHOLD ) padfDensity[i+1] = poWK->pafUnifiedSrcDensity[iSrcOffset+i+1]; if( padfDensity[i+1] > SRC_DENSITY_THRESHOLD ) bHasValid = true; } } return bHasValid; } /************************************************************************/ /* GWKGetPixelT() */ /************************************************************************/ template static bool GWKGetPixelT( const GDALWarpKernel *poWK, int iBand, GPtrDiff_t iSrcOffset, double *pdfDensity, T *pValue ) { T *pSrc = reinterpret_cast(poWK->papabySrcImage[iBand]); if( ( poWK->panUnifiedSrcValid != nullptr && !((poWK->panUnifiedSrcValid[iSrcOffset>>5] & (0x01 << (iSrcOffset & 0x1f))) ) ) || ( poWK->papanBandSrcValid != nullptr && poWK->papanBandSrcValid[iBand] != nullptr && !((poWK->papanBandSrcValid[iBand][iSrcOffset>>5] & (0x01 << (iSrcOffset & 0x1f)))) ) ) { *pdfDensity = 0.0; return false; } *pValue = pSrc[iSrcOffset]; if( poWK->pafUnifiedSrcDensity == nullptr ) *pdfDensity = 1.0; else *pdfDensity = poWK->pafUnifiedSrcDensity[iSrcOffset]; return *pdfDensity != 0.0; } /************************************************************************/ /* GWKBilinearResample() */ /* Set of bilinear interpolators */ /************************************************************************/ static bool GWKBilinearResample4Sample( const GDALWarpKernel *poWK, int iBand, double dfSrcX, double dfSrcY, double *pdfDensity, double *pdfReal, double *pdfImag ) { // Save as local variables to avoid following pointers. const int nSrcXSize = poWK->nSrcXSize; const int nSrcYSize = poWK->nSrcYSize; int iSrcX = static_cast(floor(dfSrcX - 0.5)); int iSrcY = static_cast(floor(dfSrcY - 0.5)); double dfRatioX = 1.5 - (dfSrcX - iSrcX); double dfRatioY = 1.5 - (dfSrcY - iSrcY); bool bShifted = false; if( iSrcX == -1 ) { iSrcX = 0; dfRatioX = 1; } if( iSrcY == -1 ) { iSrcY = 0; dfRatioY = 1; } GPtrDiff_t iSrcOffset = iSrcX + static_cast(iSrcY) * nSrcXSize; // Shift so we don't overrun the array. if( static_cast(nSrcXSize) * nSrcYSize == iSrcOffset + 1 || static_cast(nSrcXSize) * nSrcYSize == iSrcOffset + nSrcXSize + 1 ) { bShifted = true; --iSrcOffset; } double adfDensity[2] = { 0.0, 0.0 }; double adfReal[2] = { 0.0, 0.0 }; double adfImag[2] = { 0.0, 0.0 }; double dfAccumulatorReal = 0.0; double dfAccumulatorImag = 0.0; double dfAccumulatorDensity = 0.0; double dfAccumulatorDivisor = 0.0; const GPtrDiff_t nSrcPixels = static_cast(nSrcXSize) * nSrcYSize; // Get pixel row. if( iSrcY >= 0 && iSrcY < nSrcYSize && iSrcOffset >= 0 && iSrcOffset < nSrcPixels && GWKGetPixelRow( poWK, iBand, iSrcOffset, 1, adfDensity, adfReal, adfImag ) ) { double dfMult1 = dfRatioX * dfRatioY; double dfMult2 = (1.0-dfRatioX) * dfRatioY; // Shifting corrected. if( bShifted ) { adfReal[0] = adfReal[1]; adfImag[0] = adfImag[1]; adfDensity[0] = adfDensity[1]; } // Upper Left Pixel. if( iSrcX >= 0 && iSrcX < nSrcXSize && adfDensity[0] > SRC_DENSITY_THRESHOLD ) { dfAccumulatorDivisor += dfMult1; dfAccumulatorReal += adfReal[0] * dfMult1; dfAccumulatorImag += adfImag[0] * dfMult1; dfAccumulatorDensity += adfDensity[0] * dfMult1; } // Upper Right Pixel. if( iSrcX+1 >= 0 && iSrcX+1 < nSrcXSize && adfDensity[1] > SRC_DENSITY_THRESHOLD ) { dfAccumulatorDivisor += dfMult2; dfAccumulatorReal += adfReal[1] * dfMult2; dfAccumulatorImag += adfImag[1] * dfMult2; dfAccumulatorDensity += adfDensity[1] * dfMult2; } } // Get pixel row. if( iSrcY+1 >= 0 && iSrcY+1 < nSrcYSize && iSrcOffset+nSrcXSize >= 0 && iSrcOffset+nSrcXSize < nSrcPixels && GWKGetPixelRow( poWK, iBand, iSrcOffset+nSrcXSize, 1, adfDensity, adfReal, adfImag ) ) { double dfMult1 = dfRatioX * (1.0-dfRatioY); double dfMult2 = (1.0-dfRatioX) * (1.0-dfRatioY); // Shifting corrected if ( bShifted ) { adfReal[0] = adfReal[1]; adfImag[0] = adfImag[1]; adfDensity[0] = adfDensity[1]; } // Lower Left Pixel if ( iSrcX >= 0 && iSrcX < nSrcXSize && adfDensity[0] > SRC_DENSITY_THRESHOLD ) { dfAccumulatorDivisor += dfMult1; dfAccumulatorReal += adfReal[0] * dfMult1; dfAccumulatorImag += adfImag[0] * dfMult1; dfAccumulatorDensity += adfDensity[0] * dfMult1; } // Lower Right Pixel. if( iSrcX+1 >= 0 && iSrcX+1 < nSrcXSize && adfDensity[1] > SRC_DENSITY_THRESHOLD ) { dfAccumulatorDivisor += dfMult2; dfAccumulatorReal += adfReal[1] * dfMult2; dfAccumulatorImag += adfImag[1] * dfMult2; dfAccumulatorDensity += adfDensity[1] * dfMult2; } } /* -------------------------------------------------------------------- */ /* Return result. */ /* -------------------------------------------------------------------- */ if( dfAccumulatorDivisor == 1.0 ) { *pdfReal = dfAccumulatorReal; *pdfImag = dfAccumulatorImag; *pdfDensity = dfAccumulatorDensity; return false; } else if( dfAccumulatorDivisor < 0.00001 ) { *pdfReal = 0.0; *pdfImag = 0.0; *pdfDensity = 0.0; return false; } else { *pdfReal = dfAccumulatorReal / dfAccumulatorDivisor; *pdfImag = dfAccumulatorImag / dfAccumulatorDivisor; *pdfDensity = dfAccumulatorDensity / dfAccumulatorDivisor; return true; } } template static bool GWKBilinearResampleNoMasks4SampleT( const GDALWarpKernel *poWK, int iBand, double dfSrcX, double dfSrcY, T *pValue ) { const int iSrcX = static_cast(floor(dfSrcX - 0.5)); const int iSrcY = static_cast(floor(dfSrcY - 0.5)); GPtrDiff_t iSrcOffset = iSrcX + static_cast(iSrcY) * poWK->nSrcXSize; const double dfRatioX = 1.5 - (dfSrcX - iSrcX); const double dfRatioY = 1.5 - (dfSrcY - iSrcY); const T* const pSrc = reinterpret_cast(poWK->papabySrcImage[iBand]); if( iSrcX >= 0 && iSrcX+1 < poWK->nSrcXSize && iSrcY >= 0 && iSrcY+1 < poWK->nSrcYSize ) { const double dfAccumulator = (pSrc[iSrcOffset] * dfRatioX + pSrc[iSrcOffset+1] * (1.0 - dfRatioX)) * dfRatioY + (pSrc[iSrcOffset+poWK->nSrcXSize] * dfRatioX + pSrc[iSrcOffset+1+poWK->nSrcXSize] * (1.0 - dfRatioX)) * (1.0-dfRatioY); *pValue = GWKRoundValueT(dfAccumulator); return true; } double dfAccumulatorDivisor = 0.0; double dfAccumulator = 0.0; // Upper Left Pixel. if( iSrcX >= 0 && iSrcX < poWK->nSrcXSize && iSrcY >= 0 && iSrcY < poWK->nSrcYSize ) { const double dfMult = dfRatioX * dfRatioY; dfAccumulatorDivisor += dfMult; dfAccumulator += pSrc[iSrcOffset] * dfMult; } // Upper Right Pixel. if( iSrcX+1 >= 0 && iSrcX+1 < poWK->nSrcXSize && iSrcY >= 0 && iSrcY < poWK->nSrcYSize ) { const double dfMult = (1.0 - dfRatioX) * dfRatioY; dfAccumulatorDivisor += dfMult; dfAccumulator += pSrc[iSrcOffset+1] * dfMult; } // Lower Right Pixel. if( iSrcX+1 >= 0 && iSrcX+1 < poWK->nSrcXSize && iSrcY+1 >= 0 && iSrcY+1 < poWK->nSrcYSize ) { const double dfMult = (1.0 - dfRatioX) * (1.0 - dfRatioY); dfAccumulatorDivisor += dfMult; dfAccumulator += pSrc[iSrcOffset+1+poWK->nSrcXSize] * dfMult; } // Lower Left Pixel. if( iSrcX >= 0 && iSrcX < poWK->nSrcXSize && iSrcY+1 >= 0 && iSrcY+1 < poWK->nSrcYSize ) { const double dfMult = dfRatioX * (1.0 - dfRatioY); dfAccumulatorDivisor += dfMult; dfAccumulator += pSrc[iSrcOffset+poWK->nSrcXSize] * dfMult; } /* -------------------------------------------------------------------- */ /* Return result. */ /* -------------------------------------------------------------------- */ double dfValue = 0.0; if( dfAccumulatorDivisor < 0.00001 ) { *pValue = 0; return false; } else if( dfAccumulatorDivisor == 1.0 ) { dfValue = dfAccumulator; } else { dfValue = dfAccumulator / dfAccumulatorDivisor; } *pValue = GWKRoundValueT(dfValue); return true; } /************************************************************************/ /* GWKCubicResample() */ /* Set of bicubic interpolators using cubic convolution. */ /************************************************************************/ // http://verona.fi-p.unam.mx/boris/practicas/CubConvInterp.pdf Formula 18 // or http://en.wikipedia.org/wiki/Cubic_Hermite_spline : CINTx(p_1,p0,p1,p2) // http://en.wikipedia.org/wiki/Bicubic_interpolation: matrix notation // TODO(schwehr): Use an inline function. #define CubicConvolution(distance1,distance2,distance3,f0,f1,f2,f3) \ ( f1 \ + 0.5 * (distance1*(f2 - f0) \ + distance2*(2.0*f0 - 5.0*f1 + 4.0*f2 - f3) \ + distance3*(3.0*(f1 - f2) + f3 - f0))) /************************************************************************/ /* GWKCubicComputeWeights() */ /************************************************************************/ // adfCoeffs[2] = 1.0 - (adfCoeffs[0] + adfCoeffs[1] - adfCoeffs[3]); // TODO(schwehr): Use an inline function. #define GWKCubicComputeWeights(dfX_, adfCoeffs) \ { \ const double dfX = dfX_; \ const double dfHalfX = 0.5 * dfX; \ const double dfThreeX = 3.0 * dfX; \ const double dfHalfX2 = dfHalfX * dfX; \ \ adfCoeffs[0] = dfHalfX * (-1 + dfX * (2 - dfX)); \ adfCoeffs[1] = 1 + dfHalfX2 * (-5 + dfThreeX); \ adfCoeffs[2] = dfHalfX * (1 + dfX * (4 - dfThreeX)); \ adfCoeffs[3] = dfHalfX2 * (-1 + dfX); \ } // TODO(schwehr): Use an inline function. #define CONVOL4(v1, v2) ((v1)[0] * (v2)[0] + (v1)[1] * (v2)[1] + \ (v1)[2] * (v2)[2] + (v1)[3] * (v2)[3]) #if 0 // Optimal (in theory...) for max 2 convolutions: 14 multiplications // instead of 17. // TODO(schwehr): Use an inline function. #define GWKCubicComputeWeights_Optim2MAX(dfX_, adfCoeffs, dfHalfX) \ { \ const double dfX = dfX_; \ dfHalfX = 0.5 * dfX; \ const double dfThreeX = 3.0 * dfX; \ const double dfXMinus1 = dfX - 1; \ \ adfCoeffs[0] = -1 + dfX * (2 - dfX); \ adfCoeffs[1] = dfX * (-5 + dfThreeX); \ /*adfCoeffs[2] = 1 + dfX * (4 - dfThreeX);*/ \ adfCoeffs[2] = -dfXMinus1 - adfCoeffs[1]; \ /*adfCoeffs[3] = dfX * (-1 + dfX); */ \ adfCoeffs[3] = dfXMinus1 - adfCoeffs[0]; \ } // TODO(schwehr): Use an inline function. #define CONVOL4_Optim2MAX(adfCoeffs, v, dfHalfX) \ ((v)[1] + (dfHalfX) * (\ (adfCoeffs)[0] * (v)[0] + (adfCoeffs)[1] * (v)[1] + \ (adfCoeffs)[2] * (v)[2] + (adfCoeffs)[3] * (v)[3])) #endif static bool GWKCubicResample4Sample( const GDALWarpKernel *poWK, int iBand, double dfSrcX, double dfSrcY, double *pdfDensity, double *pdfReal, double *pdfImag ) { const int iSrcX = static_cast(dfSrcX - 0.5); const int iSrcY = static_cast(dfSrcY - 0.5); GPtrDiff_t iSrcOffset = iSrcX + static_cast(iSrcY) * poWK->nSrcXSize; const double dfDeltaX = dfSrcX - 0.5 - iSrcX; const double dfDeltaY = dfSrcY - 0.5 - iSrcY; double adfDensity[4] = {}; double adfReal[4] = {}; double adfImag[4] = {}; // Get the bilinear interpolation at the image borders. if( iSrcX - 1 < 0 || iSrcX + 2 >= poWK->nSrcXSize || iSrcY - 1 < 0 || iSrcY + 2 >= poWK->nSrcYSize ) return GWKBilinearResample4Sample( poWK, iBand, dfSrcX, dfSrcY, pdfDensity, pdfReal, pdfImag ); double adfValueDens[4] = {}; double adfValueReal[4] = {}; double adfValueImag[4] = {}; double adfCoeffsX[4] = {}; GWKCubicComputeWeights(dfDeltaX, adfCoeffsX); for( GPtrDiff_t i = -1; i < 3; i++ ) { if( !GWKGetPixelRow(poWK, iBand, iSrcOffset + i * poWK->nSrcXSize - 1, 2, adfDensity, adfReal, adfImag) || adfDensity[0] < SRC_DENSITY_THRESHOLD || adfDensity[1] < SRC_DENSITY_THRESHOLD || adfDensity[2] < SRC_DENSITY_THRESHOLD || adfDensity[3] < SRC_DENSITY_THRESHOLD ) { return GWKBilinearResample4Sample( poWK, iBand, dfSrcX, dfSrcY, pdfDensity, pdfReal, pdfImag ); } adfValueDens[i + 1] = CONVOL4(adfCoeffsX, adfDensity); adfValueReal[i + 1] = CONVOL4(adfCoeffsX, adfReal); adfValueImag[i + 1] = CONVOL4(adfCoeffsX, adfImag); } /* -------------------------------------------------------------------- */ /* For now, if we have any pixels missing in the kernel area, */ /* we fallback on using bilinear interpolation. Ideally we */ /* should do "weight adjustment" of our results similarly to */ /* what is done for the cubic spline and lanc. interpolators. */ /* -------------------------------------------------------------------- */ double adfCoeffsY[4] = {}; GWKCubicComputeWeights(dfDeltaY, adfCoeffsY); *pdfDensity = CONVOL4(adfCoeffsY, adfValueDens); *pdfReal = CONVOL4(adfCoeffsY, adfValueReal); *pdfImag = CONVOL4(adfCoeffsY, adfValueImag); return true; } // We do not define USE_SSE_CUBIC_IMPL since in practice, it gives zero // perf benefit. #if defined(USE_SSE_CUBIC_IMPL) && (defined(__x86_64) || defined(_M_X64)) /************************************************************************/ /* XMMLoad4Values() */ /* */ /* Load 4 packed byte or uint16, cast them to float and put them in a */ /* m128 register. */ /************************************************************************/ static CPL_INLINE __m128 XMMLoad4Values(const GByte* ptr) { #ifdef CPL_CPU_REQUIRES_ALIGNED_ACCESS unsigned int i; memcpy(&i, ptr, 4); __m128i xmm_i = _mm_cvtsi32_si128(s); #else __m128i xmm_i = _mm_cvtsi32_si128(*(unsigned int*)(ptr)); #endif // Zero extend 4 packed unsigned 8-bit integers in a to packed // 32-bit integers. #if __SSE4_1__ xmm_i = _mm_cvtepu8_epi32(xmm_i); #else xmm_i = _mm_unpacklo_epi8(xmm_i, _mm_setzero_si128()); xmm_i = _mm_unpacklo_epi16(xmm_i, _mm_setzero_si128()); #endif return _mm_cvtepi32_ps(xmm_i); } static CPL_INLINE __m128 XMMLoad4Values(const GUInt16* ptr) { #ifdef CPL_CPU_REQUIRES_ALIGNED_ACCESS GUInt64 i; memcpy(&i, ptr, 8); __m128i xmm_i = _mm_cvtsi64_si128(s); #else __m128i xmm_i = _mm_cvtsi64_si128(*(GUInt64*)(ptr)); #endif // Zero extend 4 packed unsigned 16-bit integers in a to packed // 32-bit integers. #if __SSE4_1__ xmm_i = _mm_cvtepu16_epi32(xmm_i); #else xmm_i = _mm_unpacklo_epi16(xmm_i, _mm_setzero_si128()); #endif return _mm_cvtepi32_ps(xmm_i); } /************************************************************************/ /* XMMHorizontalAdd() */ /* */ /* Return the sum of the 4 floating points of the register. */ /************************************************************************/ #if __SSE3__ static CPL_INLINE float XMMHorizontalAdd(__m128 v) { __m128 shuf = _mm_movehdup_ps(v); // (v3 , v3 , v1 , v1) __m128 sums = _mm_add_ps(v, shuf); // (v3+v3, v3+v2, v1+v1, v1+v0) shuf = _mm_movehl_ps(shuf, sums); // (v3 , v3 , v3+v3, v3+v2) sums = _mm_add_ss(sums, shuf); // (v1+v0)+(v3+v2) return _mm_cvtss_f32(sums); } #else static CPL_INLINE float XMMHorizontalAdd(__m128 v) { __m128 shuf = _mm_movehl_ps(v, v); // (v3 , v2 , v3 , v2) __m128 sums = _mm_add_ps(v, shuf); // (v3+v3, v2+v2, v3+v1, v2+v0) shuf = _mm_shuffle_ps(sums, sums, 1); // (v2+v0, v2+v0, v2+v0, v3+v1) sums = _mm_add_ss(sums, shuf); // (v2+v0)+(v3+v1) return _mm_cvtss_f32(sums); } #endif #endif // defined(USE_SSE_CUBIC_IMPL) && (defined(__x86_64) || defined(_M_X64)) /************************************************************************/ /* GWKCubicResampleSrcMaskIsDensity4SampleRealT() */ /************************************************************************/ // Note: if USE_SSE_CUBIC_IMPL, only instantiate that for Byte and UInt16, // because there are a few assumptions above those types. template static CPL_INLINE bool GWKCubicResampleSrcMaskIsDensity4SampleRealT( const GDALWarpKernel *poWK, int iBand, double dfSrcX, double dfSrcY, double *pdfDensity, double *pdfReal ) { const int iSrcX = static_cast(dfSrcX - 0.5); const int iSrcY = static_cast(dfSrcY - 0.5); const GPtrDiff_t iSrcOffset = iSrcX + static_cast(iSrcY) * poWK->nSrcXSize; // Get the bilinear interpolation at the image borders. if( iSrcX - 1 < 0 || iSrcX + 2 >= poWK->nSrcXSize || iSrcY - 1 < 0 || iSrcY + 2 >= poWK->nSrcYSize ) { double adfImagIgnored[4] = {}; return GWKBilinearResample4Sample(poWK, iBand, dfSrcX, dfSrcY, pdfDensity, pdfReal, adfImagIgnored); } #if defined(USE_SSE_CUBIC_IMPL) && (defined(__x86_64) || defined(_M_X64)) const float fDeltaX = static_cast(dfSrcX) - 0.5f - iSrcX; const float fDeltaY = static_cast(dfSrcY) - 0.5f - iSrcY; // TODO(schwehr): Explain the magic numbers. float afTemp[4 + 4 + 4 + 1]; float* pafAligned = reinterpret_cast(afTemp + ((size_t)afTemp & 0xf)); float* pafCoeffs = pafAligned; float* pafDensity = pafAligned + 4; float* pafValue = pafAligned + 8; const float fHalfDeltaX = 0.5f * fDeltaX; const float fThreeDeltaX = 3.0f * fDeltaX; const float fHalfDeltaX2 = fHalfDeltaX * fDeltaX; pafCoeffs[0] = fHalfDeltaX * (-1 + fDeltaX * (2 - fDeltaX)); pafCoeffs[1] = 1 + fHalfDeltaX2 * (-5 + fThreeDeltaX); pafCoeffs[2] = fHalfDeltaX * (1 + fDeltaX * (4 - fThreeDeltaX)); pafCoeffs[3] = fHalfDeltaX2 * (-1 + fDeltaX); __m128 xmmCoeffs = _mm_load_ps(pafCoeffs); const __m128 xmmThreshold = _mm_load1_ps(&SRC_DENSITY_THRESHOLD); __m128 xmmMaskLowDensity = _mm_setzero_ps(); for( GPtrDiff_t i = -1, iOffset = iSrcOffset - poWK->nSrcXSize - 1; i < 3; i++, iOffset += poWK->nSrcXSize ) { const __m128 xmmDensity = _mm_loadu_ps( poWK->pafUnifiedSrcDensity + iOffset); xmmMaskLowDensity = _mm_or_ps(xmmMaskLowDensity, _mm_cmplt_ps(xmmDensity, xmmThreshold)); pafDensity[i + 1] = XMMHorizontalAdd(_mm_mul_ps(xmmCoeffs, xmmDensity)); const __m128 xmmValues = XMMLoad4Values( ((T*) poWK->papabySrcImage[iBand]) + iOffset); pafValue[i + 1] = XMMHorizontalAdd(_mm_mul_ps(xmmCoeffs, xmmValues)); } if( _mm_movemask_ps(xmmMaskLowDensity) ) { double adfImagIgnored[4] = {}; return GWKBilinearResample4Sample( poWK, iBand, dfSrcX, dfSrcY, pdfDensity, pdfReal, adfImagIgnored ); } const float fHalfDeltaY = 0.5f * fDeltaY; const float fThreeDeltaY = 3.0f * fDeltaY; const float fHalfDeltaY2 = fHalfDeltaY * fDeltaY; pafCoeffs[0] = fHalfDeltaY * (-1 + fDeltaY * (2 - fDeltaY)); pafCoeffs[1] = 1 + fHalfDeltaY2 * (-5 + fThreeDeltaY); pafCoeffs[2] = fHalfDeltaY * (1 + fDeltaY * (4 - fThreeDeltaY)); pafCoeffs[3] = fHalfDeltaY2 * (-1 + fDeltaY); xmmCoeffs = _mm_load_ps(pafCoeffs); const __m128 xmmDensity = _mm_load_ps(pafDensity); const __m128 xmmValue = _mm_load_ps(pafValue); *pdfDensity = XMMHorizontalAdd(_mm_mul_ps(xmmCoeffs, xmmDensity)); *pdfReal = XMMHorizontalAdd(_mm_mul_ps(xmmCoeffs, xmmValue)); // We did all above computations on float32 whereas the general case is // float64. Not sure if one is fundamentally more correct than the other // one, but we want our optimization to give the same result as the // general case as much as possible, so if the resulting value is // close to some_int_value + 0.5, redo the computation with the general // case. // Note: If other types than Byte or UInt16, will need changes. if( fabs(*pdfReal - static_cast(*pdfReal) - 0.5) > .007 ) return true; #endif // defined(USE_SSE_CUBIC_IMPL) && (defined(__x86_64) || defined(_M_X64)) const double dfDeltaX = dfSrcX - 0.5 - iSrcX; const double dfDeltaY = dfSrcY - 0.5 - iSrcY; double adfValueDens[4] = {}; double adfValueReal[4] = {}; double adfCoeffsX[4] = {}; GWKCubicComputeWeights(dfDeltaX, adfCoeffsX); double adfCoeffsY[4] = {}; GWKCubicComputeWeights(dfDeltaY, adfCoeffsY); for( GPtrDiff_t i = -1; i < 3; i++ ) { const GPtrDiff_t iOffset = iSrcOffset+i*poWK->nSrcXSize - 1; #if !(defined(USE_SSE_CUBIC_IMPL) && (defined(__x86_64) || defined(_M_X64))) if( poWK->pafUnifiedSrcDensity[iOffset + 0] < SRC_DENSITY_THRESHOLD || poWK->pafUnifiedSrcDensity[iOffset + 1] < SRC_DENSITY_THRESHOLD || poWK->pafUnifiedSrcDensity[iOffset + 2] < SRC_DENSITY_THRESHOLD || poWK->pafUnifiedSrcDensity[iOffset + 3] < SRC_DENSITY_THRESHOLD ) { double adfImagIgnored[4] = {}; return GWKBilinearResample4Sample( poWK, iBand, dfSrcX, dfSrcY, pdfDensity, pdfReal, adfImagIgnored ); } #endif adfValueDens[i + 1] = CONVOL4( adfCoeffsX, poWK->pafUnifiedSrcDensity + iOffset); adfValueReal[i + 1] = CONVOL4( adfCoeffsX, reinterpret_cast(poWK->papabySrcImage[iBand]) + iOffset); } *pdfDensity = CONVOL4(adfCoeffsY, adfValueDens); *pdfReal = CONVOL4(adfCoeffsY, adfValueReal); return true; } /************************************************************************/ /* GWKCubicResampleSrcMaskIsDensity4SampleReal() */ /* Bi-cubic when source has and only has pafUnifiedSrcDensity. */ /************************************************************************/ static bool GWKCubicResampleSrcMaskIsDensity4SampleReal( const GDALWarpKernel *poWK, int iBand, double dfSrcX, double dfSrcY, double *pdfDensity, double *pdfReal ) { const int iSrcX = static_cast(dfSrcX - 0.5); const int iSrcY = static_cast(dfSrcY - 0.5); const GPtrDiff_t iSrcOffset = iSrcX + static_cast(iSrcY) * poWK->nSrcXSize; const double dfDeltaX = dfSrcX - 0.5 - iSrcX; const double dfDeltaY = dfSrcY - 0.5 - iSrcY; // Get the bilinear interpolation at the image borders. if( iSrcX - 1 < 0 || iSrcX + 2 >= poWK->nSrcXSize || iSrcY - 1 < 0 || iSrcY + 2 >= poWK->nSrcYSize ) { double adfImagIgnored[4] = {}; return GWKBilinearResample4Sample(poWK, iBand, dfSrcX, dfSrcY, pdfDensity, pdfReal, adfImagIgnored); } double adfCoeffsX[4] = {}; GWKCubicComputeWeights(dfDeltaX, adfCoeffsX); double adfCoeffsY[4] = {}; GWKCubicComputeWeights(dfDeltaY, adfCoeffsY); double adfValueDens[4] = {}; double adfValueReal[4] = {}; double adfDensity[4] = {}; double adfReal[4] = {}; double adfImagIgnored[4] = {}; for( GPtrDiff_t i = -1; i < 3; i++ ) { if( !GWKGetPixelRow(poWK, iBand, iSrcOffset + i * poWK->nSrcXSize - 1, 2, adfDensity, adfReal, adfImagIgnored) || adfDensity[0] < SRC_DENSITY_THRESHOLD || adfDensity[1] < SRC_DENSITY_THRESHOLD || adfDensity[2] < SRC_DENSITY_THRESHOLD || adfDensity[3] < SRC_DENSITY_THRESHOLD ) { return GWKBilinearResample4Sample( poWK, iBand, dfSrcX, dfSrcY, pdfDensity, pdfReal, adfImagIgnored ); } adfValueDens[i + 1] = CONVOL4(adfCoeffsX, adfDensity); adfValueReal[i + 1] = CONVOL4(adfCoeffsX, adfReal); } *pdfDensity = CONVOL4(adfCoeffsY, adfValueDens); *pdfReal = CONVOL4(adfCoeffsY, adfValueReal); return true; } template static bool GWKCubicResampleNoMasks4SampleT( const GDALWarpKernel *poWK, int iBand, double dfSrcX, double dfSrcY, T *pValue ) { const int iSrcX = static_cast(dfSrcX - 0.5); const int iSrcY = static_cast(dfSrcY - 0.5); const GPtrDiff_t iSrcOffset = iSrcX + static_cast(iSrcY) * poWK->nSrcXSize; const double dfDeltaX = dfSrcX - 0.5 - iSrcX; const double dfDeltaY = dfSrcY - 0.5 - iSrcY; const double dfDeltaY2 = dfDeltaY * dfDeltaY; const double dfDeltaY3 = dfDeltaY2 * dfDeltaY; // Get the bilinear interpolation at the image borders. if( iSrcX - 1 < 0 || iSrcX + 2 >= poWK->nSrcXSize || iSrcY - 1 < 0 || iSrcY + 2 >= poWK->nSrcYSize ) return GWKBilinearResampleNoMasks4SampleT ( poWK, iBand, dfSrcX, dfSrcY, pValue ); double adfCoeffs[4] = {}; GWKCubicComputeWeights(dfDeltaX, adfCoeffs); double adfValue[4] = {}; for( GPtrDiff_t i = -1; i < 3; i++ ) { const GPtrDiff_t iOffset = iSrcOffset + i * poWK->nSrcXSize - 1; adfValue[i + 1] = CONVOL4( adfCoeffs, reinterpret_cast(poWK->papabySrcImage[iBand]) + iOffset); } const double dfValue = CubicConvolution( dfDeltaY, dfDeltaY2, dfDeltaY3, adfValue[0], adfValue[1], adfValue[2], adfValue[3]); *pValue = GWKClampValueT(dfValue); return true; } /************************************************************************/ /* GWKLanczosSinc() */ /************************************************************************/ /* * Lanczos windowed sinc interpolation kernel with radius r. * / * | sinc(x) * sinc(x/r), if |x| < r * L(x) = | 1, if x = 0 , * | 0, otherwise * \ * * where sinc(x) = sin(PI * x) / (PI * x). */ static double GWKLanczosSinc( double dfX ) { if( dfX == 0.0 ) return 1.0; const double dfPIX = M_PI * dfX; const double dfPIXoverR = dfPIX / 3; const double dfPIX2overR = dfPIX * dfPIXoverR; return sin(dfPIX) * sin(dfPIXoverR) / dfPIX2overR; } static double GWKLanczosSinc4Values( double* padfValues ) { for( int i = 0; i < 4; i++ ) { if( padfValues[i] == 0.0 ) { padfValues[i] = 1.0; } else { const double dfPIX = M_PI * padfValues[i]; const double dfPIXoverR = dfPIX / 3; const double dfPIX2overR = dfPIX * dfPIXoverR; padfValues[i] = sin(dfPIX) * sin(dfPIXoverR) / dfPIX2overR; } } return padfValues[0] + padfValues[1] + padfValues[2] + padfValues[3]; } /************************************************************************/ /* GWKBilinear() */ /************************************************************************/ static double GWKBilinear( double dfX ) { double dfAbsX = fabs(dfX); if( dfAbsX <= 1.0 ) return 1 - dfAbsX; else return 0.0; } static double GWKBilinear4Values( double* padfValues ) { double dfAbsX0 = fabs(padfValues[0]); double dfAbsX1 = fabs(padfValues[1]); double dfAbsX2 = fabs(padfValues[2]); double dfAbsX3 = fabs(padfValues[3]); if( dfAbsX0 <= 1.0 ) padfValues[0] = 1 - dfAbsX0; else padfValues[0] = 0.0; if( dfAbsX1 <= 1.0 ) padfValues[1] = 1 - dfAbsX1; else padfValues[1] = 0.0; if( dfAbsX2 <= 1.0 ) padfValues[2] = 1 - dfAbsX2; else padfValues[2] = 0.0; if( dfAbsX3 <= 1.0 ) padfValues[3] = 1 - dfAbsX3; else padfValues[3] = 0.0; return padfValues[0] + padfValues[1] + padfValues[2] + padfValues[3]; } /************************************************************************/ /* GWKCubic() */ /************************************************************************/ static double GWKCubic( double dfX ) { // http://en.wikipedia.org/wiki/Bicubic_interpolation#Bicubic_convolution_algorithm // W(x) formula with a = -0.5 (cubic hermite spline ) // or https://www.cs.utexas.edu/~fussell/courses/cs384g-fall2013/lectures/mitchell/Mitchell.pdf // k(x) (formula 8) with (B,C)=(0,0.5) the Catmull-Rom spline double dfAbsX = fabs(dfX); if( dfAbsX <= 1.0 ) { double dfX2 = dfX * dfX; return dfX2 * (1.5 * dfAbsX - 2.5) + 1; } else if( dfAbsX <= 2.0 ) { double dfX2 = dfX * dfX; return dfX2 * (-0.5 * dfAbsX + 2.5) - 4 * dfAbsX + 2; } else return 0.0; } static double GWKCubic4Values( double* padfValues ) { const double dfAbsX_0 = fabs(padfValues[0]); const double dfAbsX_1 = fabs(padfValues[1]); const double dfAbsX_2 = fabs(padfValues[2]); const double dfAbsX_3 = fabs(padfValues[3]); const double dfX2_0 = padfValues[0] * padfValues[0]; const double dfX2_1 = padfValues[1] * padfValues[1]; const double dfX2_2 = padfValues[2] * padfValues[2]; const double dfX2_3 = padfValues[3] * padfValues[3]; double dfVal0 = 0.0; if( dfAbsX_0 <= 1.0 ) dfVal0 = dfX2_0 * (1.5 * dfAbsX_0 - 2.5) + 1.0; else if( dfAbsX_0 <= 2.0 ) dfVal0 = dfX2_0 * (-0.5 * dfAbsX_0 + 2.5) - 4.0 * dfAbsX_0 + 2.0; double dfVal1 = 0.0; if( dfAbsX_1 <= 1.0 ) dfVal1 = dfX2_1 * (1.5 * dfAbsX_1 - 2.5) + 1.0; else if( dfAbsX_1 <= 2.0 ) dfVal1 = dfX2_1 * (-0.5 * dfAbsX_1 + 2.5) - 4.0 * dfAbsX_1 + 2.0; double dfVal2 = 0.0; if( dfAbsX_2 <= 1.0 ) dfVal2 = dfX2_2 * (1.5 * dfAbsX_2 - 2.5) + 1.0; else if( dfAbsX_2 <= 2.0 ) dfVal2 = dfX2_2 * (-0.5 * dfAbsX_2 + 2.5) - 4.0 * dfAbsX_2 + 2.0; double dfVal3 = 0.0; if( dfAbsX_3 <= 1.0 ) dfVal3 = dfX2_3 * (1.5 * dfAbsX_3 - 2.5) + 1.0; else if( dfAbsX_3 <= 2.0 ) dfVal3 = dfX2_3 * (-0.5 * dfAbsX_3 + 2.5) - 4.0 * dfAbsX_3 + 2.0; padfValues[0] = dfVal0; padfValues[1] = dfVal1; padfValues[2] = dfVal2; padfValues[3] = dfVal3; return dfVal0 + dfVal1 + dfVal2 + dfVal3; } /************************************************************************/ /* GWKBSpline() */ /************************************************************************/ // https://www.cs.utexas.edu/~fussell/courses/cs384g-fall2013/lectures/mitchell/Mitchell.pdf // Equation 8 with (B,C)=(1,0) // 1/6 * ( 3 * |x|^3 - 6 * |x|^2 + 4) |x| < 1 // 1/6 * ( -|x|^3 + 6 |x|^2 - 12|x| + 8) |x| >= 1 and |x| < 2 static double GWKBSpline( double x ) { const double xp2 = x + 2.0; const double xp1 = x + 1.0; const double xm1 = x - 1.0; // This will most likely be used, so we'll compute it ahead of time to // avoid stalling the processor. const double xp2c = xp2 * xp2 * xp2; // Note that the test is computed only if it is needed. // TODO(schwehr): Make this easier to follow. return xp2 > 0.0 ? ((xp1 > 0.0)?((x > 0.0)?((xm1 > 0.0)? -4.0 * xm1*xm1*xm1:0.0) + 6.0 * x*x*x:0.0) + -4.0 * xp1*xp1*xp1:0.0) + xp2c : 0.0; // * 0.166666666666666666666 } static double GWKBSpline4Values( double* padfValues ) { for( int i = 0; i < 4; i++ ) { const double x = padfValues[i]; const double xp2 = x + 2.0; const double xp1 = x + 1.0; const double xm1 = x - 1.0; // This will most likely be used, so we'll compute it ahead of time to // avoid stalling the processor. const double xp2c = xp2 * xp2 * xp2; // Note that the test is computed only if it is needed. // TODO(schwehr): Make this easier to follow. padfValues[i] = (xp2 > 0.0)?((xp1 > 0.0)?((x > 0.0)?((xm1 > 0.0)? -4.0 * xm1*xm1*xm1:0.0) + 6.0 * x*x*x:0.0) + -4.0 * xp1*xp1*xp1:0.0) + xp2c:0.0; // * 0.166666666666666666666 } return padfValues[0] + padfValues[1] + padfValues[2] + padfValues[3]; } /************************************************************************/ /* GWKResampleWrkStruct */ /************************************************************************/ typedef struct _GWKResampleWrkStruct GWKResampleWrkStruct; typedef bool (*pfnGWKResampleType) ( const GDALWarpKernel *poWK, int iBand, double dfSrcX, double dfSrcY, double *pdfDensity, double *pdfReal, double *pdfImag, GWKResampleWrkStruct* psWrkStruct ); struct _GWKResampleWrkStruct { pfnGWKResampleType pfnGWKResample; // Space for saved X weights. double *padfWeightsX; bool *pabCalcX; double *padfWeightsY; // Only used by GWKResampleOptimizedLanczos. int iLastSrcX; // Only used by GWKResampleOptimizedLanczos. int iLastSrcY; // Only used by GWKResampleOptimizedLanczos. double dfLastDeltaX; // Only used by GWKResampleOptimizedLanczos. double dfLastDeltaY; // Only used by GWKResampleOptimizedLanczos. // Space for saving a row of pixels. double *padfRowDensity; double *padfRowReal; double *padfRowImag; }; /************************************************************************/ /* GWKResampleCreateWrkStruct() */ /************************************************************************/ static bool GWKResample( const GDALWarpKernel *poWK, int iBand, double dfSrcX, double dfSrcY, double *pdfDensity, double *pdfReal, double *pdfImag, GWKResampleWrkStruct* psWrkStruct ); static bool GWKResampleOptimizedLanczos( const GDALWarpKernel *poWK, int iBand, double dfSrcX, double dfSrcY, double *pdfDensity, double *pdfReal, double *pdfImag, GWKResampleWrkStruct* psWrkStruct ); static GWKResampleWrkStruct* GWKResampleCreateWrkStruct(GDALWarpKernel *poWK) { const int nXDist = ( poWK->nXRadius + 1 ) * 2; const int nYDist = ( poWK->nYRadius + 1 ) * 2; GWKResampleWrkStruct* psWrkStruct = static_cast( CPLMalloc(sizeof(GWKResampleWrkStruct))); // Alloc space for saved X weights. psWrkStruct->padfWeightsX = static_cast(CPLCalloc( nXDist, sizeof(double))); psWrkStruct->pabCalcX = static_cast(CPLMalloc(nXDist * sizeof(bool))); psWrkStruct->padfWeightsY = static_cast(CPLCalloc(nYDist, sizeof(double))); psWrkStruct->iLastSrcX = -10; psWrkStruct->iLastSrcY = -10; psWrkStruct->dfLastDeltaX = -10; psWrkStruct->dfLastDeltaY = -10; // Alloc space for saving a row of pixels. if( poWK->pafUnifiedSrcDensity == nullptr && poWK->panUnifiedSrcValid == nullptr && poWK->papanBandSrcValid == nullptr ) { psWrkStruct->padfRowDensity = nullptr; } else { psWrkStruct->padfRowDensity = static_cast(CPLCalloc(nXDist, sizeof(double))); } psWrkStruct->padfRowReal = static_cast(CPLCalloc(nXDist, sizeof(double))); psWrkStruct->padfRowImag = static_cast(CPLCalloc(nXDist, sizeof(double))); if( poWK->eResample == GRA_Lanczos ) { psWrkStruct->pfnGWKResample = GWKResampleOptimizedLanczos; const double dfXScale = poWK->dfXScale; if( dfXScale < 1.0 ) { int iMin = poWK->nFiltInitX; int iMax = poWK->nXRadius; while( iMin * dfXScale < -3.0 ) iMin++; while( iMax * dfXScale > 3.0 ) iMax--; for( int i = iMin; i <= iMax; ++i ) { psWrkStruct->padfWeightsX[i-poWK->nFiltInitX] = GWKLanczosSinc(i * dfXScale); } } const double dfYScale = poWK->dfYScale; if( dfYScale < 1.0 ) { int jMin = poWK->nFiltInitY; int jMax = poWK->nYRadius; while( jMin * dfYScale < -3.0 ) jMin++; while( jMax * dfYScale > 3.0 ) jMax--; for( int j = jMin; j <= jMax; ++j ) { psWrkStruct->padfWeightsY[j-poWK->nFiltInitY] = GWKLanczosSinc(j * dfYScale); } } } else psWrkStruct->pfnGWKResample = GWKResample; return psWrkStruct; } /************************************************************************/ /* GWKResampleDeleteWrkStruct() */ /************************************************************************/ static void GWKResampleDeleteWrkStruct(GWKResampleWrkStruct* psWrkStruct) { CPLFree( psWrkStruct->padfWeightsX ); CPLFree( psWrkStruct->padfWeightsY ); CPLFree( psWrkStruct->pabCalcX ); CPLFree( psWrkStruct->padfRowDensity ); CPLFree( psWrkStruct->padfRowReal ); CPLFree( psWrkStruct->padfRowImag ); CPLFree( psWrkStruct ); } /************************************************************************/ /* GWKResample() */ /************************************************************************/ static bool GWKResample( const GDALWarpKernel *poWK, int iBand, double dfSrcX, double dfSrcY, double *pdfDensity, double *pdfReal, double *pdfImag, GWKResampleWrkStruct* psWrkStruct ) { // Save as local variables to avoid following pointers in loops. const int nSrcXSize = poWK->nSrcXSize; const int nSrcYSize = poWK->nSrcYSize; double dfAccumulatorReal = 0.0; double dfAccumulatorImag = 0.0; double dfAccumulatorDensity = 0.0; double dfAccumulatorWeight = 0.0; const int iSrcX = static_cast(floor(dfSrcX - 0.5)); const int iSrcY = static_cast(floor(dfSrcY - 0.5)); const GPtrDiff_t iSrcOffset = iSrcX + static_cast(iSrcY) * nSrcXSize; const double dfDeltaX = dfSrcX - 0.5 - iSrcX; const double dfDeltaY = dfSrcY - 0.5 - iSrcY; const double dfXScale = poWK->dfXScale; const double dfYScale = poWK->dfYScale; const int nXDist = ( poWK->nXRadius + 1 ) * 2; // Space for saved X weights. double *padfWeightsX = psWrkStruct->padfWeightsX; bool *pabCalcX = psWrkStruct->pabCalcX; // Space for saving a row of pixels. double *padfRowDensity = psWrkStruct->padfRowDensity; double *padfRowReal = psWrkStruct->padfRowReal; double *padfRowImag = psWrkStruct->padfRowImag; // Mark as needing calculation (don't calculate the weights yet, // because a mask may render it unnecessary). memset( pabCalcX, false, nXDist * sizeof(bool) ); FilterFuncType pfnGetWeight = apfGWKFilter[poWK->eResample]; CPLAssert(pfnGetWeight); // Skip sampling over edge of image. int j = poWK->nFiltInitY; int jMax= poWK->nYRadius; if( iSrcY + j < 0 ) j = -iSrcY; if( iSrcY + jMax >= nSrcYSize ) jMax = nSrcYSize - iSrcY - 1; int iMin = poWK->nFiltInitX; int iMax = poWK->nXRadius; if( iSrcX + iMin < 0 ) iMin = -iSrcX; if( iSrcX + iMax >= nSrcXSize ) iMax = nSrcXSize - iSrcX - 1; const int bXScaleBelow1 = ( dfXScale < 1.0 ); const int bYScaleBelow1 = ( dfYScale < 1.0 ); GPtrDiff_t iRowOffset = iSrcOffset + static_cast(j - 1) * nSrcXSize + iMin; // Loop over pixel rows in the kernel. for( ; j <= jMax; ++j ) { iRowOffset += nSrcXSize; // Get pixel values. // We can potentially read extra elements after the "normal" end of the // source arrays, but the contract of papabySrcImage[iBand], // papanBandSrcValid[iBand], panUnifiedSrcValid and pafUnifiedSrcDensity // is to have WARP_EXTRA_ELTS reserved at their end. if( !GWKGetPixelRow( poWK, iBand, iRowOffset, (iMax-iMin+2)/2, padfRowDensity, padfRowReal, padfRowImag ) ) continue; // Calculate the Y weight. double dfWeight1 = ( bYScaleBelow1 ) ? pfnGetWeight((j - dfDeltaY) * dfYScale): pfnGetWeight(j - dfDeltaY); // Iterate over pixels in row. double dfAccumulatorRealLocal = 0.0; double dfAccumulatorImagLocal = 0.0; double dfAccumulatorDensityLocal = 0.0; double dfAccumulatorWeightLocal = 0.0; for( int i = iMin; i <= iMax; ++i ) { // Skip sampling if pixel has zero density. if( padfRowDensity != nullptr && padfRowDensity[i-iMin] < SRC_DENSITY_THRESHOLD ) continue; double dfWeight2 = 0.0; // Make or use a cached set of weights for this row. if( pabCalcX[i-iMin] ) { // Use saved weight value instead of recomputing it. dfWeight2 = padfWeightsX[i-iMin]; } else { // Calculate & save the X weight. padfWeightsX[i-iMin] = dfWeight2 = ( bXScaleBelow1 ) ? pfnGetWeight((i - dfDeltaX) * dfXScale): pfnGetWeight(i - dfDeltaX); pabCalcX[i-iMin] = true; } // Accumulate! dfAccumulatorRealLocal += padfRowReal[i-iMin] * dfWeight2; dfAccumulatorImagLocal += padfRowImag[i-iMin] * dfWeight2; if( padfRowDensity != nullptr ) dfAccumulatorDensityLocal += padfRowDensity[i-iMin] * dfWeight2; dfAccumulatorWeightLocal += dfWeight2; } dfAccumulatorReal += dfAccumulatorRealLocal * dfWeight1; dfAccumulatorImag += dfAccumulatorImagLocal * dfWeight1; dfAccumulatorDensity += dfAccumulatorDensityLocal * dfWeight1; dfAccumulatorWeight += dfAccumulatorWeightLocal * dfWeight1; } if( dfAccumulatorWeight < 0.000001 || (padfRowDensity != nullptr && dfAccumulatorDensity < 0.000001) ) { *pdfDensity = 0.0; return false; } // Calculate the output taking into account weighting. if( dfAccumulatorWeight < 0.99999 || dfAccumulatorWeight > 1.00001 ) { *pdfReal = dfAccumulatorReal / dfAccumulatorWeight; *pdfImag = dfAccumulatorImag / dfAccumulatorWeight; if( padfRowDensity != nullptr ) *pdfDensity = dfAccumulatorDensity / dfAccumulatorWeight; else *pdfDensity = 1.0; } else { *pdfReal = dfAccumulatorReal; *pdfImag = dfAccumulatorImag; if( padfRowDensity != nullptr ) *pdfDensity = dfAccumulatorDensity; else *pdfDensity = 1.0; } return true; } /************************************************************************/ /* GWKResampleOptimizedLanczos() */ /************************************************************************/ static bool GWKResampleOptimizedLanczos( const GDALWarpKernel *poWK, int iBand, double dfSrcX, double dfSrcY, double *pdfDensity, double *pdfReal, double *pdfImag, GWKResampleWrkStruct* psWrkStruct ) { // Save as local variables to avoid following pointers in loops. const int nSrcXSize = poWK->nSrcXSize; const int nSrcYSize = poWK->nSrcYSize; double dfAccumulatorReal = 0.0; double dfAccumulatorImag = 0.0; double dfAccumulatorDensity = 0.0; double dfAccumulatorWeight = 0.0; const int iSrcX = static_cast(floor( dfSrcX - 0.5 )); const int iSrcY = static_cast(floor( dfSrcY - 0.5 )); const GPtrDiff_t iSrcOffset = iSrcX + static_cast(iSrcY) * nSrcXSize; const double dfDeltaX = dfSrcX - 0.5 - iSrcX; const double dfDeltaY = dfSrcY - 0.5 - iSrcY; const double dfXScale = poWK->dfXScale; const double dfYScale = poWK->dfYScale; // Space for saved X weights. double *padfWeightsX = psWrkStruct->padfWeightsX; double *padfWeightsY = psWrkStruct->padfWeightsY; // Space for saving a row of pixels. double *padfRowDensity = psWrkStruct->padfRowDensity; double *padfRowReal = psWrkStruct->padfRowReal; double *padfRowImag = psWrkStruct->padfRowImag; // Skip sampling over edge of image. int jMin = poWK->nFiltInitY; int jMax = poWK->nYRadius; if( iSrcY + jMin < 0 ) jMin = -iSrcY; if( iSrcY + jMax >= nSrcYSize ) jMax = nSrcYSize - iSrcY - 1; int iMin = poWK->nFiltInitX; int iMax = poWK->nXRadius; if( iSrcX + iMin < 0 ) iMin = -iSrcX; if( iSrcX + iMax >= nSrcXSize ) iMax = nSrcXSize - iSrcX - 1; if( dfXScale < 1.0 ) { while( iMin * dfXScale < -3.0 ) iMin++; while( iMax * dfXScale > 3.0 ) iMax--; // padfWeightsX computed in GWKResampleCreateWrkStruct. } else { while( iMin - dfDeltaX < -3.0 ) iMin++; while( iMax - dfDeltaX > 3.0 ) iMax--; if( iSrcX != psWrkStruct->iLastSrcX || dfDeltaX != psWrkStruct->dfLastDeltaX ) { // Optimisation of GWKLanczosSinc(i - dfDeltaX) based on the // following trigonometric formulas. // TODO(schwehr): Move this somewhere where it can be rendered at LaTeX. // sin(M_PI * (dfBase + k)) = sin(M_PI * dfBase) * cos(M_PI * k) + cos(M_PI * dfBase) * sin(M_PI * k) // sin(M_PI * (dfBase + k)) = dfSinPIBase * cos(M_PI * k) + dfCosPIBase * sin(M_PI * k) // sin(M_PI * (dfBase + k)) = dfSinPIBase * cos(M_PI * k) // sin(M_PI * (dfBase + k)) = dfSinPIBase * (((k % 2) == 0) ? 1 : -1) // sin(M_PI / dfR * (dfBase + k)) = sin(M_PI / dfR * dfBase) * cos(M_PI / dfR * k) + cos(M_PI / dfR * dfBase) * sin(M_PI / dfR * k) // sin(M_PI / dfR * (dfBase + k)) = dfSinPIBaseOverR * cos(M_PI / dfR * k) + dfCosPIBaseOverR * sin(M_PI / dfR * k) const double dfSinPIDeltaXOver3 = sin((-M_PI / 3.0) * dfDeltaX); const double dfSin2PIDeltaXOver3 = dfSinPIDeltaXOver3 * dfSinPIDeltaXOver3; // Ok to use sqrt(1-sin^2) since M_PI / 3 * dfDeltaX < PI/2. const double dfCosPIDeltaXOver3 = sqrt(1.0 - dfSin2PIDeltaXOver3); const double dfSinPIDeltaX = (3.0 - 4 * dfSin2PIDeltaXOver3) * dfSinPIDeltaXOver3; const double dfInvPI2Over3 = 3.0 / (M_PI * M_PI); const double dfInvPI2Over3xSinPIDeltaX = dfInvPI2Over3 * dfSinPIDeltaX; const double dfInvPI2Over3xSinPIDeltaXxm0d5SinPIDeltaXOver3 = -0.5 * dfInvPI2Over3xSinPIDeltaX * dfSinPIDeltaXOver3; const double dfSinPIOver3 = 0.8660254037844386; const double dfInvPI2Over3xSinPIDeltaXxSinPIOver3xCosPIDeltaXOver3 = dfSinPIOver3 * dfInvPI2Over3xSinPIDeltaX * dfCosPIDeltaXOver3; const double padfCst[] = { dfInvPI2Over3xSinPIDeltaX * dfSinPIDeltaXOver3, dfInvPI2Over3xSinPIDeltaXxm0d5SinPIDeltaXOver3 - dfInvPI2Over3xSinPIDeltaXxSinPIOver3xCosPIDeltaXOver3, dfInvPI2Over3xSinPIDeltaXxm0d5SinPIDeltaXOver3 + dfInvPI2Over3xSinPIDeltaXxSinPIOver3xCosPIDeltaXOver3 }; for( int i = iMin; i <= iMax; ++i ) { const double dfX = i - dfDeltaX; if( dfX == 0.0 ) padfWeightsX[i-poWK->nFiltInitX] = 1.0; else padfWeightsX[i-poWK->nFiltInitX] = padfCst[(i + 3) % 3] / (dfX * dfX); #if DEBUG_VERBOSE // TODO(schwehr): AlmostEqual. //CPLAssert(fabs(padfWeightsX[i-poWK->nFiltInitX] - // GWKLanczosSinc(dfX, 3.0)) < 1e-10); #endif } psWrkStruct->iLastSrcX = iSrcX; psWrkStruct->dfLastDeltaX = dfDeltaX; } } if( dfYScale < 1.0 ) { while( jMin * dfYScale < -3.0 ) jMin++; while( jMax * dfYScale > 3.0 ) jMax--; // padfWeightsY computed in GWKResampleCreateWrkStruct. } else { while( jMin - dfDeltaY < -3.0 ) jMin++; while( jMax - dfDeltaY > 3.0 ) jMax--; if( iSrcY != psWrkStruct->iLastSrcY || dfDeltaY != psWrkStruct->dfLastDeltaY ) { const double dfSinPIDeltaYOver3 = sin((-M_PI / 3.0) * dfDeltaY); const double dfSin2PIDeltaYOver3 = dfSinPIDeltaYOver3 * dfSinPIDeltaYOver3; // Ok to use sqrt(1-sin^2) since M_PI / 3 * dfDeltaY < PI/2. const double dfCosPIDeltaYOver3 = sqrt(1.0 - dfSin2PIDeltaYOver3); const double dfSinPIDeltaY = (3.0 - 4.0 * dfSin2PIDeltaYOver3) * dfSinPIDeltaYOver3; const double dfInvPI2Over3 = 3.0 / (M_PI * M_PI); const double dfInvPI2Over3xSinPIDeltaY = dfInvPI2Over3 * dfSinPIDeltaY; const double dfInvPI2Over3xSinPIDeltaYxm0d5SinPIDeltaYOver3 = -0.5 * dfInvPI2Over3xSinPIDeltaY * dfSinPIDeltaYOver3; const double dfSinPIOver3 = 0.8660254037844386; const double dfInvPI2Over3xSinPIDeltaYxSinPIOver3xCosPIDeltaYOver3 = dfSinPIOver3 * dfInvPI2Over3xSinPIDeltaY * dfCosPIDeltaYOver3; const double padfCst[] = { dfInvPI2Over3xSinPIDeltaY * dfSinPIDeltaYOver3, dfInvPI2Over3xSinPIDeltaYxm0d5SinPIDeltaYOver3 - dfInvPI2Over3xSinPIDeltaYxSinPIOver3xCosPIDeltaYOver3, dfInvPI2Over3xSinPIDeltaYxm0d5SinPIDeltaYOver3 + dfInvPI2Over3xSinPIDeltaYxSinPIOver3xCosPIDeltaYOver3 }; for( int j = jMin; j <= jMax; ++j ) { const double dfY = j - dfDeltaY; if( dfY == 0.0 ) padfWeightsY[j-poWK->nFiltInitY] = 1.0; else padfWeightsY[j-poWK->nFiltInitY] = padfCst[(j + 3) % 3] / (dfY * dfY); #if DEBUG_VERBOSE // TODO(schwehr): AlmostEqual. //CPLAssert(fabs(padfWeightsY[j-poWK->nFiltInitY] - // GWKLanczosSinc(dfY, 3.0)) < 1e-10); #endif } psWrkStruct->iLastSrcY = iSrcY; psWrkStruct->dfLastDeltaY = dfDeltaY; } } GPtrDiff_t iRowOffset = iSrcOffset + static_cast(jMin - 1) * nSrcXSize + iMin; // If we have no density information, we can simply compute the // accumulated weight. if( padfRowDensity == nullptr ) { double dfRowAccWeight = 0.0; for( int i = iMin; i <= iMax; ++i ) { dfRowAccWeight += padfWeightsX[i-poWK->nFiltInitX]; } double dfColAccWeight = 0.0; for( int j = jMin; j <= jMax; ++j ) { dfColAccWeight += padfWeightsY[j-poWK->nFiltInitY]; } dfAccumulatorWeight = dfRowAccWeight * dfColAccWeight; } const bool bIsNonComplex = !GDALDataTypeIsComplex(poWK->eWorkingDataType); // Loop over pixel rows in the kernel. int nCountValid = 0; for( int j = jMin; j <= jMax; ++j ) { iRowOffset += nSrcXSize; // Get pixel values. // We can potentially read extra elements after the "normal" end of the // source arrays, but the contract of papabySrcImage[iBand], // papanBandSrcValid[iBand], panUnifiedSrcValid and pafUnifiedSrcDensity // is to have WARP_EXTRA_ELTS reserved at their end. if( !GWKGetPixelRow( poWK, iBand, iRowOffset, (iMax-iMin+2)/2, padfRowDensity, padfRowReal, padfRowImag ) ) continue; const double dfWeight1 = padfWeightsY[j-poWK->nFiltInitY]; // Iterate over pixels in row. if( padfRowDensity != nullptr ) { for( int i = iMin; i <= iMax; ++i ) { // Skip sampling if pixel has zero density. if( padfRowDensity[i - iMin] < SRC_DENSITY_THRESHOLD ) continue; nCountValid ++; // Use a cached set of weights for this row. const double dfWeight2 = dfWeight1 * padfWeightsX[i- poWK->nFiltInitX]; // Accumulate! dfAccumulatorReal += padfRowReal[i - iMin] * dfWeight2; dfAccumulatorImag += padfRowImag[i - iMin] * dfWeight2; dfAccumulatorDensity += padfRowDensity[i - iMin] * dfWeight2; dfAccumulatorWeight += dfWeight2; } } else if( bIsNonComplex ) { double dfRowAccReal = 0.0; for( int i = iMin; i <= iMax; ++i ) { const double dfWeight2 = padfWeightsX[i- poWK->nFiltInitX]; // Accumulate! dfRowAccReal += padfRowReal[i - iMin] * dfWeight2; } dfAccumulatorReal += dfRowAccReal * dfWeight1; } else { double dfRowAccReal = 0.0; double dfRowAccImag = 0.0; for( int i = iMin; i <= iMax; ++i ) { const double dfWeight2 = padfWeightsX[i- poWK->nFiltInitX]; // Accumulate! dfRowAccReal += padfRowReal[i - iMin] * dfWeight2; dfRowAccImag += padfRowImag[i - iMin] * dfWeight2; } dfAccumulatorReal += dfRowAccReal * dfWeight1; dfAccumulatorImag += dfRowAccImag * dfWeight1; } } if( dfAccumulatorWeight < 0.000001 || (padfRowDensity != nullptr && ( dfAccumulatorDensity < 0.000001 || nCountValid < (jMax - jMin + 1) * (iMax - iMin + 1)/2)) ) { *pdfDensity = 0.0; return false; } // Calculate the output taking into account weighting. if( dfAccumulatorWeight < 0.99999 || dfAccumulatorWeight > 1.00001 ) { const double dfInvAcc = 1.0 / dfAccumulatorWeight; *pdfReal = dfAccumulatorReal * dfInvAcc; *pdfImag = dfAccumulatorImag * dfInvAcc; if( padfRowDensity != nullptr ) *pdfDensity = dfAccumulatorDensity * dfInvAcc; else *pdfDensity = 1.0; } else { *pdfReal = dfAccumulatorReal; *pdfImag = dfAccumulatorImag; if( padfRowDensity != nullptr ) *pdfDensity = dfAccumulatorDensity; else *pdfDensity = 1.0; } return true; } /************************************************************************/ /* GWKResampleNoMasksT() */ /************************************************************************/ template static bool GWKResampleNoMasksT( const GDALWarpKernel *poWK, int iBand, double dfSrcX, double dfSrcY, T *pValue, double *padfWeight ) { // Commonly used; save locally. const int nSrcXSize = poWK->nSrcXSize; const int nSrcYSize = poWK->nSrcYSize; const int iSrcX = static_cast(floor(dfSrcX - 0.5)); const int iSrcY = static_cast(floor(dfSrcY - 0.5)); const GPtrDiff_t iSrcOffset = iSrcX + static_cast(iSrcY) * nSrcXSize; const int nXRadius = poWK->nXRadius; const int nYRadius = poWK->nYRadius; // Politely refuse to process invalid coordinates or obscenely small image. if( iSrcX >= nSrcXSize || iSrcY >= nSrcYSize || nXRadius > nSrcXSize || nYRadius > nSrcYSize ) return GWKBilinearResampleNoMasks4SampleT(poWK, iBand, dfSrcX, dfSrcY, pValue); T* pSrcBand = reinterpret_cast(poWK->papabySrcImage[iBand]); const double dfDeltaX = dfSrcX - 0.5 - iSrcX; const double dfDeltaY = dfSrcY - 0.5 - iSrcY; const FilterFuncType pfnGetWeight = apfGWKFilter[poWK->eResample]; CPLAssert(pfnGetWeight); const FilterFunc4ValuesType pfnGetWeight4Values = apfGWKFilter4Values[poWK->eResample]; CPLAssert(pfnGetWeight4Values); const double dfXScale = std::min(poWK->dfXScale, 1.0); const double dfYScale = std::min(poWK->dfYScale, 1.0); // Loop over all rows in the kernel. double dfAccumulatorWeightHorizontal = 0.0; double dfAccumulatorWeightVertical = 0.0; int iMin = 1 - nXRadius; if( iSrcX + iMin < 0 ) iMin = -iSrcX; int iMax = nXRadius; if( iSrcX + iMax >= nSrcXSize-1 ) iMax = nSrcXSize-1 - iSrcX; int i = iMin; // Used after for. int iC = 0; // Used after for. for( ; i+2 < iMax; i += 4, iC += 4 ) { padfWeight[iC] = (i - dfDeltaX) * dfXScale; padfWeight[iC+1] = padfWeight[iC] + dfXScale; padfWeight[iC+2] = padfWeight[iC+1] + dfXScale; padfWeight[iC+3] = padfWeight[iC+2] + dfXScale; dfAccumulatorWeightHorizontal += pfnGetWeight4Values(padfWeight+iC); } for( ; i <= iMax; ++i, ++iC ) { const double dfWeight = pfnGetWeight((i - dfDeltaX) * dfXScale); padfWeight[iC] = dfWeight; dfAccumulatorWeightHorizontal += dfWeight; } int j = 1 - nYRadius; if( iSrcY + j < 0 ) j = -iSrcY; int jMax = nYRadius; if( iSrcY + jMax >= nSrcYSize-1 ) jMax = nSrcYSize-1 - iSrcY; double dfAccumulator = 0.0; for( ; j <= jMax; ++j ) { const GPtrDiff_t iSampJ = iSrcOffset + static_cast(j) * nSrcXSize; // Loop over all pixels in the row. double dfAccumulatorLocal = 0.0; double dfAccumulatorLocal2 = 0.0; iC = 0; i = iMin; // Process by chunk of 4 cols. for( ; i+2 < iMax; i += 4, iC += 4 ) { // Retrieve the pixel & accumulate. dfAccumulatorLocal += pSrcBand[i+iSampJ] * padfWeight[iC]; dfAccumulatorLocal += pSrcBand[i+1+iSampJ] * padfWeight[iC+1]; dfAccumulatorLocal2 += pSrcBand[i+2+iSampJ] * padfWeight[iC+2]; dfAccumulatorLocal2 += pSrcBand[i+3+iSampJ] * padfWeight[iC+3]; } dfAccumulatorLocal += dfAccumulatorLocal2; if( i < iMax ) { dfAccumulatorLocal += pSrcBand[i+iSampJ] * padfWeight[iC]; dfAccumulatorLocal += pSrcBand[i+1+iSampJ] * padfWeight[iC+1]; i += 2; iC += 2; } if( i == iMax ) { dfAccumulatorLocal += pSrcBand[i+iSampJ] * padfWeight[iC]; } // Calculate the Y weight. const double dfWeight = pfnGetWeight((j - dfDeltaY) * dfYScale); dfAccumulator += dfWeight * dfAccumulatorLocal; dfAccumulatorWeightVertical += dfWeight; } const double dfAccumulatorWeight = dfAccumulatorWeightHorizontal * dfAccumulatorWeightVertical; *pValue = GWKClampValueT(dfAccumulator / dfAccumulatorWeight); return true; } /* We restrict to 64bit processors because they are guaranteed to have SSE2 */ /* Could possibly be used too on 32bit, but we would need to check at runtime */ #if defined(__x86_64) || defined(_M_X64) /************************************************************************/ /* GWKResampleNoMasks_SSE2_T() */ /************************************************************************/ template static bool GWKResampleNoMasks_SSE2_T( const GDALWarpKernel *poWK, int iBand, double dfSrcX, double dfSrcY, T *pValue, double *padfWeight ) { // Commonly used; save locally. const int nSrcXSize = poWK->nSrcXSize; const int nSrcYSize = poWK->nSrcYSize; const int iSrcX = static_cast(floor(dfSrcX - 0.5)); const int iSrcY = static_cast(floor(dfSrcY - 0.5)); const GPtrDiff_t iSrcOffset = iSrcX + static_cast(iSrcY) * nSrcXSize; const int nXRadius = poWK->nXRadius; const int nYRadius = poWK->nYRadius; // Politely refuse to process invalid coordinates or obscenely small image. if( iSrcX >= nSrcXSize || iSrcY >= nSrcYSize || nXRadius > nSrcXSize || nYRadius > nSrcYSize ) return GWKBilinearResampleNoMasks4SampleT(poWK, iBand, dfSrcX, dfSrcY, pValue); const T* pSrcBand = reinterpret_cast(poWK->papabySrcImage[iBand]); const FilterFuncType pfnGetWeight = apfGWKFilter[poWK->eResample]; CPLAssert(pfnGetWeight); const FilterFunc4ValuesType pfnGetWeight4Values = apfGWKFilter4Values[poWK->eResample]; CPLAssert(pfnGetWeight4Values); const double dfDeltaX = dfSrcX - 0.5 - iSrcX; const double dfDeltaY = dfSrcY - 0.5 - iSrcY; const double dfXScale = std::min(poWK->dfXScale, 1.0); const double dfYScale = std::min(poWK->dfYScale, 1.0); // Loop over all rows in the kernel. double dfAccumulatorWeightHorizontal = 0.0; double dfAccumulatorWeightVertical = 0.0; double dfAccumulator = 0.0; int iMin = 1 - nXRadius; if( iSrcX + iMin < 0 ) iMin = -iSrcX; int iMax = nXRadius; if( iSrcX + iMax >= nSrcXSize-1 ) iMax = nSrcXSize-1 - iSrcX; int i, iC; for( iC = 0, i = iMin; i+2 < iMax; i+=4, iC+=4 ) { padfWeight[iC] = (i - dfDeltaX) * dfXScale; padfWeight[iC+1] = padfWeight[iC] + dfXScale; padfWeight[iC+2] = padfWeight[iC+1] + dfXScale; padfWeight[iC+3] = padfWeight[iC+2] + dfXScale; dfAccumulatorWeightHorizontal += pfnGetWeight4Values(padfWeight+iC); } for( ; i <= iMax; ++i, ++iC ) { double dfWeight = pfnGetWeight((i - dfDeltaX) * dfXScale); padfWeight[iC] = dfWeight; dfAccumulatorWeightHorizontal += dfWeight; } int j = 1 - nYRadius; if( iSrcY + j < 0 ) j = -iSrcY; int jMax = nYRadius; if( iSrcY + jMax >= nSrcYSize-1 ) jMax = nSrcYSize-1 - iSrcY; // Process by chunk of 4 rows. for( ; j+2 < jMax; j+=4 ) { const GPtrDiff_t iSampJ = iSrcOffset + static_cast(j) * nSrcXSize; // Loop over all pixels in the row. iC = 0; i = iMin; // Process by chunk of 4 cols. XMMReg4Double v_acc_1 = XMMReg4Double::Zero(); XMMReg4Double v_acc_2 = XMMReg4Double::Zero(); XMMReg4Double v_acc_3 = XMMReg4Double::Zero(); XMMReg4Double v_acc_4 = XMMReg4Double::Zero(); for( ; i+2 < iMax; i+=4, iC+=4 ) { // Retrieve the pixel & accumulate. XMMReg4Double v_pixels_1 = XMMReg4Double::Load4Val(pSrcBand+i+iSampJ); XMMReg4Double v_pixels_2 = XMMReg4Double::Load4Val(pSrcBand+i+iSampJ+nSrcXSize); XMMReg4Double v_pixels_3 = XMMReg4Double::Load4Val(pSrcBand+i+iSampJ+2*nSrcXSize); XMMReg4Double v_pixels_4 = XMMReg4Double::Load4Val(pSrcBand+i+iSampJ+3*nSrcXSize); XMMReg4Double v_padfWeight = XMMReg4Double::Load4Val(padfWeight + iC); v_acc_1 += v_pixels_1 * v_padfWeight; v_acc_2 += v_pixels_2 * v_padfWeight; v_acc_3 += v_pixels_3 * v_padfWeight; v_acc_4 += v_pixels_4 * v_padfWeight; } if( i < iMax ) { XMMReg2Double v_pixels_1 = XMMReg2Double::Load2Val(pSrcBand+i+iSampJ); XMMReg2Double v_pixels_2 = XMMReg2Double::Load2Val(pSrcBand+i+iSampJ+nSrcXSize); XMMReg2Double v_pixels_3 = XMMReg2Double::Load2Val(pSrcBand+i+iSampJ+2*nSrcXSize); XMMReg2Double v_pixels_4 = XMMReg2Double::Load2Val(pSrcBand+i+iSampJ+3*nSrcXSize); XMMReg2Double v_padfWeight = XMMReg2Double::Load2Val(padfWeight + iC); v_acc_1.AddToLow( v_pixels_1 * v_padfWeight ); v_acc_2.AddToLow( v_pixels_2 * v_padfWeight ); v_acc_3.AddToLow( v_pixels_3 * v_padfWeight ); v_acc_4.AddToLow( v_pixels_4 * v_padfWeight ); i+=2; iC+=2; } double dfAccumulatorLocal_1 = v_acc_1.GetHorizSum(); double dfAccumulatorLocal_2 = v_acc_2.GetHorizSum(); double dfAccumulatorLocal_3 = v_acc_3.GetHorizSum(); double dfAccumulatorLocal_4 = v_acc_4.GetHorizSum(); if( i == iMax ) { dfAccumulatorLocal_1 += static_cast(pSrcBand[i+iSampJ]) * padfWeight[iC]; dfAccumulatorLocal_2 += static_cast(pSrcBand[i+iSampJ + nSrcXSize]) * padfWeight[iC]; dfAccumulatorLocal_3 += static_cast(pSrcBand[i+iSampJ + 2 * nSrcXSize]) * padfWeight[iC]; dfAccumulatorLocal_4 += static_cast(pSrcBand[i+iSampJ + 3 * nSrcXSize]) * padfWeight[iC]; } // Calculate the Y weight. const double dfWeight0 = (j - dfDeltaY) * dfYScale; const double dfWeight1 = dfWeight0 + dfYScale; const double dfWeight2 = dfWeight1 + dfYScale; const double dfWeight3 = dfWeight2 + dfYScale; double adfWeight[4] = { dfWeight0, dfWeight1, dfWeight2, dfWeight3 }; dfAccumulatorWeightVertical += pfnGetWeight4Values(adfWeight); dfAccumulator += adfWeight[0] * dfAccumulatorLocal_1; dfAccumulator += adfWeight[1] * dfAccumulatorLocal_2; dfAccumulator += adfWeight[2] * dfAccumulatorLocal_3; dfAccumulator += adfWeight[3] * dfAccumulatorLocal_4; } for( ; j <= jMax; ++j ) { const GPtrDiff_t iSampJ = iSrcOffset + static_cast(j) * nSrcXSize; // Loop over all pixels in the row. iC = 0; i = iMin; // Process by chunk of 4 cols. XMMReg4Double v_acc = XMMReg4Double::Zero(); for( ; i+2 < iMax; i+=4, iC+=4 ) { // Retrieve the pixel & accumulate. XMMReg4Double v_pixels= XMMReg4Double::Load4Val(pSrcBand+i+iSampJ); XMMReg4Double v_padfWeight = XMMReg4Double::Load4Val(padfWeight + iC); v_acc += v_pixels * v_padfWeight; } double dfAccumulatorLocal = v_acc.GetHorizSum(); if( i < iMax ) { dfAccumulatorLocal += pSrcBand[i+iSampJ] * padfWeight[iC]; dfAccumulatorLocal += pSrcBand[i+1+iSampJ] * padfWeight[iC+1]; i+=2; iC+=2; } if( i == iMax ) { dfAccumulatorLocal += static_cast(pSrcBand[i+iSampJ]) * padfWeight[iC]; } // Calculate the Y weight. double dfWeight = pfnGetWeight((j - dfDeltaY) * dfYScale); dfAccumulator += dfWeight * dfAccumulatorLocal; dfAccumulatorWeightVertical += dfWeight; } const double dfAccumulatorWeight = dfAccumulatorWeightHorizontal * dfAccumulatorWeightVertical; *pValue = GWKClampValueT(dfAccumulator / dfAccumulatorWeight); return true; } /************************************************************************/ /* GWKResampleNoMasksT() */ /************************************************************************/ template<> bool GWKResampleNoMasksT( const GDALWarpKernel *poWK, int iBand, double dfSrcX, double dfSrcY, GByte *pValue, double *padfWeight ) { return GWKResampleNoMasks_SSE2_T(poWK, iBand, dfSrcX, dfSrcY, pValue, padfWeight); } /************************************************************************/ /* GWKResampleNoMasksT() */ /************************************************************************/ template<> bool GWKResampleNoMasksT( const GDALWarpKernel *poWK, int iBand, double dfSrcX, double dfSrcY, GInt16 *pValue, double *padfWeight ) { return GWKResampleNoMasks_SSE2_T(poWK, iBand, dfSrcX, dfSrcY, pValue, padfWeight); } /************************************************************************/ /* GWKResampleNoMasksT() */ /************************************************************************/ template<> bool GWKResampleNoMasksT( const GDALWarpKernel *poWK, int iBand, double dfSrcX, double dfSrcY, GUInt16 *pValue, double *padfWeight ) { return GWKResampleNoMasks_SSE2_T(poWK, iBand, dfSrcX, dfSrcY, pValue, padfWeight); } /************************************************************************/ /* GWKResampleNoMasksT() */ /************************************************************************/ template<> bool GWKResampleNoMasksT( const GDALWarpKernel *poWK, int iBand, double dfSrcX, double dfSrcY, float *pValue, double *padfWeight ) { return GWKResampleNoMasks_SSE2_T(poWK, iBand, dfSrcX, dfSrcY, pValue, padfWeight); } #ifdef INSTANTIATE_FLOAT64_SSE2_IMPL /************************************************************************/ /* GWKResampleNoMasksT() */ /************************************************************************/ template<> bool GWKResampleNoMasksT( const GDALWarpKernel *poWK, int iBand, double dfSrcX, double dfSrcY, double *pValue, double *padfWeight ) { return GWKResampleNoMasks_SSE2_T(poWK, iBand, dfSrcX, dfSrcY, pValue, padfWeight); } #endif /* INSTANTIATE_FLOAT64_SSE2_IMPL */ #endif /* defined(__x86_64) || defined(_M_X64) */ /************************************************************************/ /* GWKRoundSourceCoordinates() */ /************************************************************************/ static void GWKRoundSourceCoordinates( int nDstXSize, double* padfX, double* padfY, double* padfZ, int* pabSuccess, double dfSrcCoordPrecision, double dfErrorThreshold, GDALTransformerFunc pfnTransformer, void* pTransformerArg, double dfDstXOff, double dfDstY ) { double dfPct = 0.8; if( dfErrorThreshold > 0 && dfSrcCoordPrecision / dfErrorThreshold >= 10.0 ) { dfPct = 1.0 - 2 * 1.0 / (dfSrcCoordPrecision / dfErrorThreshold); } const double dfExactTransformThreshold = 0.5 * dfPct * dfSrcCoordPrecision; for( int iDstX = 0; iDstX < nDstXSize; iDstX++ ) { const double dfXBefore = padfX[iDstX]; const double dfYBefore = padfY[iDstX]; padfX[iDstX] = floor(padfX[iDstX] / dfSrcCoordPrecision + 0.5) * dfSrcCoordPrecision; padfY[iDstX] = floor(padfY[iDstX] / dfSrcCoordPrecision + 0.5) * dfSrcCoordPrecision; // If we are in an uncertainty zone, go to non-approximated // transformation. // Due to the 80% of half-precision threshold, dfSrcCoordPrecision must // be at least 10 times greater than the approximation error. if( fabs(dfXBefore - padfX[iDstX]) > dfExactTransformThreshold || fabs(dfYBefore - padfY[iDstX]) > dfExactTransformThreshold ) { padfX[iDstX] = iDstX + dfDstXOff; padfY[iDstX] = dfDstY; padfZ[iDstX] = 0.0; pfnTransformer( pTransformerArg, TRUE, 1, padfX + iDstX, padfY + iDstX, padfZ + iDstX, pabSuccess + iDstX ); padfX[iDstX] = floor(padfX[iDstX] / dfSrcCoordPrecision + 0.5) * dfSrcCoordPrecision; padfY[iDstX] = floor(padfY[iDstX] / dfSrcCoordPrecision + 0.5) * dfSrcCoordPrecision; } } } /************************************************************************/ /* GWKOpenCLCase() */ /* */ /* This is identical to GWKGeneralCase(), but functions via */ /* OpenCL. This means we have vector optimization (SSE) and/or */ /* GPU optimization depending on our prefs. The code itself is */ /* general and not optimized, but by defining constants we can */ /* make some pretty darn good code on the fly. */ /************************************************************************/ #if defined(HAVE_OPENCL) static CPLErr GWKOpenCLCase( GDALWarpKernel *poWK ) { const int nDstXSize = poWK->nDstXSize; const int nDstYSize = poWK->nDstYSize; const int nSrcXSize = poWK->nSrcXSize; const int nSrcYSize = poWK->nSrcYSize; const int nDstXOff = poWK->nDstXOff; const int nDstYOff = poWK->nDstYOff; const int nSrcXOff = poWK->nSrcXOff; const int nSrcYOff = poWK->nSrcYOff; bool bUseImag = false; cl_channel_type imageFormat; switch( poWK->eWorkingDataType ) { case GDT_Byte: imageFormat = CL_UNORM_INT8; break; case GDT_UInt16: imageFormat = CL_UNORM_INT16; break; case GDT_CInt16: bUseImag = true; CPL_FALLTHROUGH case GDT_Int16: imageFormat = CL_SNORM_INT16; break; case GDT_CFloat32: bUseImag = true; CPL_FALLTHROUGH case GDT_Float32: imageFormat = CL_FLOAT; break; default: // No support for higher precision formats. CPLDebug("OpenCL", "Unsupported resampling OpenCL data type %d.", static_cast(poWK->eWorkingDataType)); return CE_Warning; } OCLResampAlg resampAlg; switch( poWK->eResample ) { case GRA_Bilinear: resampAlg = OCL_Bilinear; break; case GRA_Cubic: resampAlg = OCL_Cubic; break; case GRA_CubicSpline: resampAlg = OCL_CubicSpline; break; case GRA_Lanczos: resampAlg = OCL_Lanczos; break; default: // No support for higher precision formats. CPLDebug("OpenCL", "Unsupported resampling OpenCL resampling alg %d.", static_cast(poWK->eResample)); return CE_Warning; } struct oclWarper *warper = nullptr; cl_int err; CPLErr eErr = CE_None; // TODO(schwehr): Fix indenting. try { // Using a factor of 2 or 4 seems to have much less rounding error // than 3 on the GPU. // Then the rounding error can cause strange artifacts under the // right conditions. warper = GDALWarpKernelOpenCL_createEnv(nSrcXSize, nSrcYSize, nDstXSize, nDstYSize, imageFormat, poWK->nBands, 4, bUseImag, poWK->papanBandSrcValid != nullptr, poWK->pafDstDensity, poWK->padfDstNoDataReal, resampAlg, &err); if( err != CL_SUCCESS || warper == nullptr ) { eErr = CE_Warning; if( warper != nullptr ) throw eErr; return eErr; } CPLDebug("GDAL", "GDALWarpKernel()::GWKOpenCLCase() " "Src=%d,%d,%dx%d Dst=%d,%d,%dx%d", nSrcXOff, nSrcYOff, nSrcXSize, nSrcYSize, nDstXOff, nDstYOff, nDstXSize, nDstYSize); if( !poWK->pfnProgress( poWK->dfProgressBase, "", poWK->pProgress ) ) { CPLError( CE_Failure, CPLE_UserInterrupt, "User terminated" ); eErr = CE_Failure; throw eErr; } /* ==================================================================== */ /* Loop over bands. */ /* ==================================================================== */ for( int iBand = 0; iBand < poWK->nBands; iBand++ ) { if( poWK->papanBandSrcValid != nullptr && poWK->papanBandSrcValid[iBand] != nullptr) { GDALWarpKernelOpenCL_setSrcValid( warper, reinterpret_cast(poWK->papanBandSrcValid[iBand]), iBand); if( err != CL_SUCCESS ) { CPLError( CE_Failure, CPLE_AppDefined, "OpenCL routines reported failure (%d) on line %d.", static_cast(err), __LINE__ ); eErr = CE_Failure; throw eErr; } } err = GDALWarpKernelOpenCL_setSrcImg(warper, poWK->papabySrcImage[iBand], iBand); if( err != CL_SUCCESS ) { CPLError(CE_Failure, CPLE_AppDefined, "OpenCL routines reported failure (%d) on line %d.", static_cast(err), __LINE__); eErr = CE_Failure; throw eErr; } err = GDALWarpKernelOpenCL_setDstImg(warper, poWK->papabyDstImage[iBand], iBand); if( err != CL_SUCCESS ) { CPLError(CE_Failure, CPLE_AppDefined, "OpenCL routines reported failure (%d) on line %d.", static_cast(err), __LINE__); eErr = CE_Failure; throw eErr; } } /* -------------------------------------------------------------------- */ /* Allocate x,y,z coordinate arrays for transformation ... one */ /* scanlines worth of positions. */ /* -------------------------------------------------------------------- */ // For x, 2 *, because we cache the precomputed values at the end. double *padfX = static_cast(CPLMalloc(2 * sizeof(double) * nDstXSize)); double * padfY = static_cast(CPLMalloc(sizeof(double) * nDstXSize)); double *padfZ = static_cast(CPLMalloc(sizeof(double) * nDstXSize)); int *pabSuccess = static_cast(CPLMalloc(sizeof(int) * nDstXSize)); const double dfSrcCoordPrecision = CPLAtof( CSLFetchNameValueDef(poWK->papszWarpOptions, "SRC_COORD_PRECISION", "0")); const double dfErrorThreshold = CPLAtof( CSLFetchNameValueDef(poWK->papszWarpOptions, "ERROR_THRESHOLD", "0")); // Precompute values. for( int iDstX = 0; iDstX < nDstXSize; iDstX++ ) padfX[nDstXSize + iDstX] = iDstX + 0.5 + poWK->nDstXOff; /* ==================================================================== */ /* Loop over output lines. */ /* ==================================================================== */ for( int iDstY = 0; iDstY < nDstYSize && eErr == CE_None; ++iDstY ) { /* ---------------------------------------------------------------- */ /* Setup points to transform to source image space. */ /* ---------------------------------------------------------------- */ memcpy( padfX, padfX + nDstXSize, sizeof(double) * nDstXSize ); const double dfYConst = iDstY + 0.5 + poWK->nDstYOff; for( int iDstX = 0; iDstX < nDstXSize; iDstX++ ) padfY[iDstX] = dfYConst; memset( padfZ, 0, sizeof(double) * nDstXSize ); /* ---------------------------------------------------------------- */ /* Transform the points from destination pixel/line coordinates*/ /* to source pixel/line coordinates. */ /* ---------------------------------------------------------------- */ poWK->pfnTransformer( poWK->pTransformerArg, TRUE, nDstXSize, padfX, padfY, padfZ, pabSuccess ); if( dfSrcCoordPrecision > 0.0 ) { GWKRoundSourceCoordinates(nDstXSize, padfX, padfY, padfZ, pabSuccess, dfSrcCoordPrecision, dfErrorThreshold, poWK->pfnTransformer, poWK->pTransformerArg, 0.5 + nDstXOff, iDstY + 0.5 + nDstYOff); } err = GDALWarpKernelOpenCL_setCoordRow(warper, padfX, padfY, nSrcXOff, nSrcYOff, pabSuccess, iDstY); if( err != CL_SUCCESS ) { CPLError(CE_Failure, CPLE_AppDefined, "OpenCL routines reported failure (%d) on line %d.", static_cast(err), __LINE__); eErr = CE_Failure; break; } // Update the valid & density masks because we don't do so in the // kernel. for( int iDstX = 0; iDstX < nDstXSize && eErr == CE_None; iDstX++ ) { const double dfX = padfX[iDstX]; const double dfY = padfY[iDstX]; const GPtrDiff_t iDstOffset = iDstX + static_cast(iDstY) * nDstXSize; // See GWKGeneralCase() for appropriate commenting. if( !pabSuccess[iDstX] || dfX < nSrcXOff || dfY < nSrcYOff ) continue; int iSrcX = static_cast(dfX) - nSrcXOff; int iSrcY = static_cast(dfY) - nSrcYOff; if( iSrcX < 0 || iSrcX >= nSrcXSize || iSrcY < 0 || iSrcY >= nSrcYSize ) continue; GPtrDiff_t iSrcOffset = iSrcX + static_cast(iSrcY) * nSrcXSize; double dfDensity = 1.0; if( poWK->pafUnifiedSrcDensity != nullptr && iSrcX >= 0 && iSrcY >= 0 && iSrcX < nSrcXSize && iSrcY < nSrcYSize ) dfDensity = poWK->pafUnifiedSrcDensity[iSrcOffset]; GWKOverlayDensity( poWK, iDstOffset, dfDensity ); // Because this is on the bit-wise level, it can't be done well in // OpenCL. if( poWK->panDstValid != nullptr ) poWK->panDstValid[iDstOffset>>5] |= 0x01 << (iDstOffset & 0x1f); } } CPLFree( padfX ); CPLFree( padfY ); CPLFree( padfZ ); CPLFree( pabSuccess ); if( eErr != CE_None ) throw eErr; err = GDALWarpKernelOpenCL_runResamp(warper, poWK->pafUnifiedSrcDensity, poWK->panUnifiedSrcValid, poWK->pafDstDensity, poWK->panDstValid, poWK->dfXScale, poWK->dfYScale, poWK->dfXFilter, poWK->dfYFilter, poWK->nXRadius, poWK->nYRadius, poWK->nFiltInitX, poWK->nFiltInitY); if( err != CL_SUCCESS ) { CPLError(CE_Failure, CPLE_AppDefined, "OpenCL routines reported failure (%d) on line %d.", static_cast(err), __LINE__); eErr = CE_Failure; throw eErr; } /* ==================================================================== */ /* Loop over output lines. */ /* ==================================================================== */ for( int iDstY = 0; iDstY < nDstYSize && eErr == CE_None; iDstY++ ) { for( int iBand = 0; iBand < poWK->nBands; iBand++ ) { void *rowReal = nullptr; void *rowImag = nullptr; GByte *pabyDst = poWK->papabyDstImage[iBand]; err = GDALWarpKernelOpenCL_getRow(warper, &rowReal, &rowImag, iDstY, iBand); if( err != CL_SUCCESS ) { CPLError( CE_Failure, CPLE_AppDefined, "OpenCL routines reported failure (%d) on line %d.", static_cast(err), __LINE__ ); eErr = CE_Failure; throw eErr; } // Copy the data from the warper to GDAL's memory. switch( poWK->eWorkingDataType ) { case GDT_Byte: memcpy(&(pabyDst[iDstY*nDstXSize]), rowReal, sizeof(GByte)*nDstXSize); break; case GDT_Int16: memcpy(&(reinterpret_cast(pabyDst)[iDstY*nDstXSize]), rowReal, sizeof(GInt16)*nDstXSize); break; case GDT_UInt16: memcpy(&(reinterpret_cast(pabyDst)[iDstY*nDstXSize]), rowReal, sizeof(GUInt16)*nDstXSize); break; case GDT_Float32: memcpy(&(reinterpret_cast(pabyDst)[iDstY*nDstXSize]), rowReal, sizeof(float)*nDstXSize); break; case GDT_CInt16: { GInt16 *pabyDstI16 = &(reinterpret_cast(pabyDst)[iDstY*nDstXSize]); for( int iDstX = 0; iDstX < nDstXSize; iDstX++ ) { pabyDstI16[iDstX*2 ] = static_cast(rowReal)[iDstX]; pabyDstI16[iDstX*2+1] = static_cast(rowImag)[iDstX]; } } break; case GDT_CFloat32: { float *pabyDstF32 = &(reinterpret_cast(pabyDst)[iDstY*nDstXSize]); for( int iDstX = 0; iDstX < nDstXSize; iDstX++ ) { pabyDstF32[iDstX*2 ] = static_cast(rowReal)[iDstX]; pabyDstF32[iDstX*2+1] = static_cast(rowImag)[iDstX]; } } break; default: // No support for higher precision formats. CPLError(CE_Failure, CPLE_AppDefined, "Unsupported resampling OpenCL data type %d.", static_cast(poWK->eWorkingDataType) ); eErr = CE_Failure; throw eErr; } } } } catch( const CPLErr& ) { } if( (err = GDALWarpKernelOpenCL_deleteEnv(warper)) != CL_SUCCESS ) { CPLError(CE_Failure, CPLE_AppDefined, "OpenCL routines reported failure (%d) on line %d.", static_cast(err), __LINE__ ); return CE_Failure; } return eErr; } #endif /* defined(HAVE_OPENCL) */ /************************************************************************/ /* GWKCheckAndComputeSrcOffsets() */ /************************************************************************/ static CPL_INLINE bool GWKCheckAndComputeSrcOffsets( const int* _pabSuccess, int _iDstX, const double* _padfX, const double* _padfY, const GDALWarpKernel* _poWK, int _nSrcXSize, int _nSrcYSize, GPtrDiff_t& iSrcOffset) { if( !_pabSuccess[_iDstX] ) return false; // If this happens this is likely the symptom of a bug somewhere. if( CPLIsNan(_padfX[_iDstX]) || CPLIsNan(_padfY[_iDstX]) ) { static bool bNanCoordFound = false; if( !bNanCoordFound ) { CPLDebug("WARP", "GWKCheckAndComputeSrcOffsets(): " "NaN coordinate found on point %d.", _iDstX); bNanCoordFound = true; } return false; } /* -------------------------------------------------------------------- */ /* Figure out what pixel we want in our source raster, and skip */ /* further processing if it is well off the source image. */ /* -------------------------------------------------------------------- */ /* We test against the value before casting to avoid the */ /* problem of asymmetric truncation effects around zero. That is */ /* -0.5 will be 0 when cast to an int. */ if( _padfX[_iDstX] < _poWK->nSrcXOff || _padfY[_iDstX] < _poWK->nSrcYOff ) return false; // Check for potential overflow when casting from float to int, (if // operating outside natural projection area, padfX/Y can be a very huge // positive number before doing the actual conversion), as such cast is // undefined behavior that can trigger exception with some compilers // (see #6753) if( _padfX[_iDstX] + 1e-10 > _nSrcXSize + _poWK->nSrcXOff || _padfY[_iDstX] + 1e-10 > _nSrcYSize + _poWK->nSrcYOff ) { return false; } int iSrcX = static_cast(_padfX[_iDstX] + 1.0e-10) - _poWK->nSrcXOff; int iSrcY = static_cast(_padfY[_iDstX] + 1.0e-10) - _poWK->nSrcYOff; if( iSrcX == _nSrcXSize ) iSrcX --; if( iSrcY == _nSrcYSize ) iSrcY --; // Those checks should normally be OK given the previous ones. CPLAssert( iSrcX >= 0 ); CPLAssert( iSrcY >= 0 ); CPLAssert( iSrcX < _nSrcXSize ); CPLAssert( iSrcY < _nSrcYSize ); iSrcOffset = iSrcX + static_cast(iSrcY) * _nSrcXSize; return true; } /************************************************************************/ /* GWKGeneralCase() */ /* */ /* This is the most general case. It attempts to handle all */ /* possible features with relatively little concern for */ /* efficiency. */ /************************************************************************/ static void GWKGeneralCaseThread( void* pData) { GWKJobStruct* psJob = reinterpret_cast(pData); GDALWarpKernel *poWK = psJob->poWK; const int iYMin = psJob->iYMin; const int iYMax = psJob->iYMax; int nDstXSize = poWK->nDstXSize; int nSrcXSize = poWK->nSrcXSize; int nSrcYSize = poWK->nSrcYSize; /* -------------------------------------------------------------------- */ /* Allocate x,y,z coordinate arrays for transformation ... one */ /* scanlines worth of positions. */ /* -------------------------------------------------------------------- */ // For x, 2 *, because we cache the precomputed values at the end. double *padfX = static_cast(CPLMalloc(2 * sizeof(double) * nDstXSize)); double *padfY = static_cast(CPLMalloc(sizeof(double) * nDstXSize)); double *padfZ = static_cast(CPLMalloc(sizeof(double) * nDstXSize)); int *pabSuccess = static_cast(CPLMalloc(sizeof(int) * nDstXSize)); const bool bUse4SamplesFormula = poWK->dfXScale >= 0.95 && poWK->dfYScale >= 0.95; GWKResampleWrkStruct* psWrkStruct = nullptr; if( poWK->eResample != GRA_NearestNeighbour ) { psWrkStruct = GWKResampleCreateWrkStruct(poWK); } const double dfSrcCoordPrecision = CPLAtof( CSLFetchNameValueDef(poWK->papszWarpOptions, "SRC_COORD_PRECISION", "0")); const double dfErrorThreshold = CPLAtof( CSLFetchNameValueDef(poWK->papszWarpOptions, "ERROR_THRESHOLD", "0")); // Precompute values. for( int iDstX = 0; iDstX < nDstXSize; iDstX++ ) padfX[nDstXSize + iDstX] = iDstX + 0.5 + poWK->nDstXOff; /* ==================================================================== */ /* Loop over output lines. */ /* ==================================================================== */ for( int iDstY = iYMin; iDstY < iYMax; iDstY++ ) { /* -------------------------------------------------------------------- */ /* Setup points to transform to source image space. */ /* -------------------------------------------------------------------- */ memcpy( padfX, padfX + nDstXSize, sizeof(double) * nDstXSize ); const double dfY = iDstY + 0.5 + poWK->nDstYOff; for( int iDstX = 0; iDstX < nDstXSize; iDstX++ ) padfY[iDstX] = dfY; memset( padfZ, 0, sizeof(double) * nDstXSize ); /* -------------------------------------------------------------------- */ /* Transform the points from destination pixel/line coordinates */ /* to source pixel/line coordinates. */ /* -------------------------------------------------------------------- */ poWK->pfnTransformer( psJob->pTransformerArg, TRUE, nDstXSize, padfX, padfY, padfZ, pabSuccess ); if( dfSrcCoordPrecision > 0.0 ) { GWKRoundSourceCoordinates(nDstXSize, padfX, padfY, padfZ, pabSuccess, dfSrcCoordPrecision, dfErrorThreshold, poWK->pfnTransformer, psJob->pTransformerArg, 0.5 + poWK->nDstXOff, iDstY + 0.5 + poWK->nDstYOff); } /* ==================================================================== */ /* Loop over pixels in output scanline. */ /* ==================================================================== */ for( int iDstX = 0; iDstX < nDstXSize; iDstX++ ) { GPtrDiff_t iSrcOffset = 0; if( !GWKCheckAndComputeSrcOffsets(pabSuccess, iDstX, padfX, padfY, poWK, nSrcXSize, nSrcYSize, iSrcOffset) ) continue; /* -------------------------------------------------------------------- */ /* Do not try to apply transparent/invalid source pixels to the */ /* destination. This currently ignores the multi-pixel input */ /* of bilinear and cubic resamples. */ /* -------------------------------------------------------------------- */ double dfDensity = 1.0; if( poWK->pafUnifiedSrcDensity != nullptr ) { dfDensity = poWK->pafUnifiedSrcDensity[iSrcOffset]; if( dfDensity < SRC_DENSITY_THRESHOLD ) continue; } if( poWK->panUnifiedSrcValid != nullptr && !(poWK->panUnifiedSrcValid[iSrcOffset>>5] & (0x01 << (iSrcOffset & 0x1f))) ) continue; /* ==================================================================== */ /* Loop processing each band. */ /* ==================================================================== */ bool bHasFoundDensity = false; const GPtrDiff_t iDstOffset = iDstX + static_cast(iDstY) * nDstXSize; for( int iBand = 0; iBand < poWK->nBands; iBand++ ) { double dfBandDensity = 0.0; double dfValueReal = 0.0; double dfValueImag = 0.0; /* -------------------------------------------------------------------- */ /* Collect the source value. */ /* -------------------------------------------------------------------- */ if( poWK->eResample == GRA_NearestNeighbour || nSrcXSize == 1 || nSrcYSize == 1 ) { // FALSE is returned if dfBandDensity == 0, which is // checked below. CPL_IGNORE_RET_VAL( GWKGetPixelValue( poWK, iBand, iSrcOffset, &dfBandDensity, &dfValueReal, &dfValueImag )); } else if( poWK->eResample == GRA_Bilinear && bUse4SamplesFormula ) { GWKBilinearResample4Sample( poWK, iBand, padfX[iDstX]-poWK->nSrcXOff, padfY[iDstX]-poWK->nSrcYOff, &dfBandDensity, &dfValueReal, &dfValueImag ); } else if( poWK->eResample == GRA_Cubic && bUse4SamplesFormula ) { GWKCubicResample4Sample( poWK, iBand, padfX[iDstX]-poWK->nSrcXOff, padfY[iDstX]-poWK->nSrcYOff, &dfBandDensity, &dfValueReal, &dfValueImag ); } else #ifdef DEBUG // Only useful for clang static analyzer. if( psWrkStruct != nullptr ) #endif { psWrkStruct->pfnGWKResample( poWK, iBand, padfX[iDstX]-poWK->nSrcXOff, padfY[iDstX]-poWK->nSrcYOff, &dfBandDensity, &dfValueReal, &dfValueImag, psWrkStruct ); } // If we didn't find any valid inputs skip to next band. if( dfBandDensity < BAND_DENSITY_THRESHOLD ) continue; if( poWK->bApplyVerticalShift ) { if( !std::isfinite(padfZ[iDstX]) ) continue; // Subtract padfZ[] since the coordinate transformation is from target to source dfValueReal = dfValueReal * poWK->dfMultFactorVerticalShit - padfZ[iDstX]; } bHasFoundDensity = true; /* -------------------------------------------------------------------- */ /* We have a computed value from the source. Now apply it to */ /* the destination pixel. */ /* -------------------------------------------------------------------- */ GWKSetPixelValue( poWK, iBand, iDstOffset, dfBandDensity, dfValueReal, dfValueImag ); } if( !bHasFoundDensity ) continue; /* -------------------------------------------------------------------- */ /* Update destination density/validity masks. */ /* -------------------------------------------------------------------- */ GWKOverlayDensity( poWK, iDstOffset, dfDensity ); if( poWK->panDstValid != nullptr ) { poWK->panDstValid[iDstOffset>>5] |= 0x01 << (iDstOffset & 0x1f); } } /* Next iDstX */ /* -------------------------------------------------------------------- */ /* Report progress to the user, and optionally cancel out. */ /* -------------------------------------------------------------------- */ if( psJob->pfnProgress && psJob->pfnProgress(psJob) ) break; } /* -------------------------------------------------------------------- */ /* Cleanup and return. */ /* -------------------------------------------------------------------- */ CPLFree( padfX ); CPLFree( padfY ); CPLFree( padfZ ); CPLFree( pabSuccess ); if( psWrkStruct ) GWKResampleDeleteWrkStruct(psWrkStruct); } static CPLErr GWKGeneralCase( GDALWarpKernel *poWK ) { return GWKRun( poWK, "GWKGeneralCase", GWKGeneralCaseThread ); } /************************************************************************/ /* GWKRealCase() */ /* */ /* General case for non-complex data types. */ /************************************************************************/ static void GWKRealCaseThread( void* pData) { GWKJobStruct* psJob = static_cast(pData); GDALWarpKernel *poWK = psJob->poWK; const int iYMin = psJob->iYMin; const int iYMax = psJob->iYMax; const int nDstXSize = poWK->nDstXSize; const int nSrcXSize = poWK->nSrcXSize; const int nSrcYSize = poWK->nSrcYSize; /* -------------------------------------------------------------------- */ /* Allocate x,y,z coordinate arrays for transformation ... one */ /* scanlines worth of positions. */ /* -------------------------------------------------------------------- */ // For x, 2 *, because we cache the precomputed values at the end. double *padfX = static_cast(CPLMalloc(2 * sizeof(double) * nDstXSize)); double *padfY = static_cast(CPLMalloc(sizeof(double) * nDstXSize)); double *padfZ = static_cast(CPLMalloc(sizeof(double) * nDstXSize)); int *pabSuccess = static_cast(CPLMalloc(sizeof(int) * nDstXSize)); const bool bUse4SamplesFormula = poWK->dfXScale >= 0.95 && poWK->dfYScale >= 0.95; GWKResampleWrkStruct* psWrkStruct = nullptr; if( poWK->eResample != GRA_NearestNeighbour ) { psWrkStruct = GWKResampleCreateWrkStruct(poWK); } const double dfSrcCoordPrecision = CPLAtof( CSLFetchNameValueDef(poWK->papszWarpOptions, "SRC_COORD_PRECISION", "0")); const double dfErrorThreshold = CPLAtof( CSLFetchNameValueDef(poWK->papszWarpOptions, "ERROR_THRESHOLD", "0")); const bool bSrcMaskIsDensity = poWK->panUnifiedSrcValid == nullptr && poWK->papanBandSrcValid == nullptr && poWK->pafUnifiedSrcDensity != nullptr; // Precompute values. for( int iDstX = 0; iDstX < nDstXSize; iDstX++ ) padfX[nDstXSize + iDstX] = iDstX + 0.5 + poWK->nDstXOff; /* ==================================================================== */ /* Loop over output lines. */ /* ==================================================================== */ for( int iDstY = iYMin; iDstY < iYMax; iDstY++ ) { /* -------------------------------------------------------------------- */ /* Setup points to transform to source image space. */ /* -------------------------------------------------------------------- */ memcpy( padfX, padfX + nDstXSize, sizeof(double) * nDstXSize ); const double dfY = iDstY + 0.5 + poWK->nDstYOff; for( int iDstX = 0; iDstX < nDstXSize; iDstX++ ) padfY[iDstX] = dfY; memset( padfZ, 0, sizeof(double) * nDstXSize ); /* -------------------------------------------------------------------- */ /* Transform the points from destination pixel/line coordinates */ /* to source pixel/line coordinates. */ /* -------------------------------------------------------------------- */ poWK->pfnTransformer( psJob->pTransformerArg, TRUE, nDstXSize, padfX, padfY, padfZ, pabSuccess ); if( dfSrcCoordPrecision > 0.0 ) { GWKRoundSourceCoordinates(nDstXSize, padfX, padfY, padfZ, pabSuccess, dfSrcCoordPrecision, dfErrorThreshold, poWK->pfnTransformer, psJob->pTransformerArg, 0.5 + poWK->nDstXOff, iDstY + 0.5 + poWK->nDstYOff); } /* ==================================================================== */ /* Loop over pixels in output scanline. */ /* ==================================================================== */ for( int iDstX = 0; iDstX < nDstXSize; iDstX++ ) { GPtrDiff_t iSrcOffset = 0; if( !GWKCheckAndComputeSrcOffsets(pabSuccess, iDstX, padfX, padfY, poWK, nSrcXSize, nSrcYSize, iSrcOffset) ) continue; /* -------------------------------------------------------------------- */ /* Do not try to apply transparent/invalid source pixels to the */ /* destination. This currently ignores the multi-pixel input */ /* of bilinear and cubic resamples. */ /* -------------------------------------------------------------------- */ double dfDensity = 1.0; if( poWK->pafUnifiedSrcDensity != nullptr ) { dfDensity = poWK->pafUnifiedSrcDensity[iSrcOffset]; if( dfDensity < SRC_DENSITY_THRESHOLD ) continue; } if( poWK->panUnifiedSrcValid != nullptr && !(poWK->panUnifiedSrcValid[iSrcOffset>>5] & (0x01 << (iSrcOffset & 0x1f))) ) continue; /* ==================================================================== */ /* Loop processing each band. */ /* ==================================================================== */ bool bHasFoundDensity = false; const GPtrDiff_t iDstOffset = iDstX + static_cast(iDstY) * nDstXSize; for( int iBand = 0; iBand < poWK->nBands; iBand++ ) { double dfBandDensity = 0.0; double dfValueReal = 0.0; /* -------------------------------------------------------------------- */ /* Collect the source value. */ /* -------------------------------------------------------------------- */ if( poWK->eResample == GRA_NearestNeighbour || nSrcXSize == 1 || nSrcYSize == 1 ) { // FALSE is returned if dfBandDensity == 0, which is // checked below. CPL_IGNORE_RET_VAL( GWKGetPixelValueReal( poWK, iBand, iSrcOffset, &dfBandDensity, &dfValueReal )); } else if( poWK->eResample == GRA_Bilinear && bUse4SamplesFormula ) { double dfValueImagIgnored = 0.0; GWKBilinearResample4Sample( poWK, iBand, padfX[iDstX]-poWK->nSrcXOff, padfY[iDstX]-poWK->nSrcYOff, &dfBandDensity, &dfValueReal, &dfValueImagIgnored ); } else if( poWK->eResample == GRA_Cubic && bUse4SamplesFormula ) { if( bSrcMaskIsDensity ) { if( poWK->eWorkingDataType == GDT_Byte ) { GWKCubicResampleSrcMaskIsDensity4SampleRealT( poWK, iBand, padfX[iDstX]-poWK->nSrcXOff, padfY[iDstX]-poWK->nSrcYOff, &dfBandDensity, &dfValueReal ); } else if( poWK->eWorkingDataType == GDT_UInt16 ) { GWKCubicResampleSrcMaskIsDensity4SampleRealT( poWK, iBand, padfX[iDstX]-poWK->nSrcXOff, padfY[iDstX]-poWK->nSrcYOff, &dfBandDensity, &dfValueReal ); } else { GWKCubicResampleSrcMaskIsDensity4SampleReal( poWK, iBand, padfX[iDstX]-poWK->nSrcXOff, padfY[iDstX]-poWK->nSrcYOff, &dfBandDensity, &dfValueReal ); } } else { double dfValueImagIgnored = 0.0; GWKCubicResample4Sample( poWK, iBand, padfX[iDstX]-poWK->nSrcXOff, padfY[iDstX]-poWK->nSrcYOff, &dfBandDensity, &dfValueReal, &dfValueImagIgnored); } } else #ifdef DEBUG // Only useful for clang static analyzer. if( psWrkStruct != nullptr ) #endif { double dfValueImagIgnored = 0.0; psWrkStruct->pfnGWKResample( poWK, iBand, padfX[iDstX]-poWK->nSrcXOff, padfY[iDstX]-poWK->nSrcYOff, &dfBandDensity, &dfValueReal, &dfValueImagIgnored, psWrkStruct); } // If we didn't find any valid inputs skip to next band. if( dfBandDensity < BAND_DENSITY_THRESHOLD ) continue; if( poWK->bApplyVerticalShift ) { if( !std::isfinite(padfZ[iDstX]) ) continue; // Subtract padfZ[] since the coordinate transformation is from target to source dfValueReal = dfValueReal * poWK->dfMultFactorVerticalShit - padfZ[iDstX]; } bHasFoundDensity = true; /* -------------------------------------------------------------------- */ /* We have a computed value from the source. Now apply it to */ /* the destination pixel. */ /* -------------------------------------------------------------------- */ GWKSetPixelValueReal(poWK, iBand, iDstOffset, dfBandDensity, dfValueReal); } if( !bHasFoundDensity ) continue; /* -------------------------------------------------------------------- */ /* Update destination density/validity masks. */ /* -------------------------------------------------------------------- */ GWKOverlayDensity( poWK, iDstOffset, dfDensity ); if( poWK->panDstValid != nullptr ) { poWK->panDstValid[iDstOffset>>5] |= 0x01 << (iDstOffset & 0x1f); } } // Next iDstX. /* -------------------------------------------------------------------- */ /* Report progress to the user, and optionally cancel out. */ /* -------------------------------------------------------------------- */ if( psJob->pfnProgress && psJob->pfnProgress(psJob) ) break; } /* -------------------------------------------------------------------- */ /* Cleanup and return. */ /* -------------------------------------------------------------------- */ CPLFree( padfX ); CPLFree( padfY ); CPLFree( padfZ ); CPLFree( pabSuccess ); if( psWrkStruct ) GWKResampleDeleteWrkStruct(psWrkStruct); } static CPLErr GWKRealCase( GDALWarpKernel *poWK ) { return GWKRun( poWK, "GWKRealCase", GWKRealCaseThread ); } /************************************************************************/ /* GWKResampleNoMasksOrDstDensityOnlyThreadInternal() */ /************************************************************************/ template static void GWKResampleNoMasksOrDstDensityOnlyThreadInternal( void* pData ) { GWKJobStruct* psJob = static_cast(pData); GDALWarpKernel *poWK = psJob->poWK; const int iYMin = psJob->iYMin; const int iYMax = psJob->iYMax; const int nDstXSize = poWK->nDstXSize; const int nSrcXSize = poWK->nSrcXSize; const int nSrcYSize = poWK->nSrcYSize; /* -------------------------------------------------------------------- */ /* Allocate x,y,z coordinate arrays for transformation ... one */ /* scanlines worth of positions. */ /* -------------------------------------------------------------------- */ // For x, 2 *, because we cache the precomputed values at the end. double *padfX = static_cast(CPLMalloc(2 * sizeof(double) * nDstXSize)); double *padfY = static_cast(CPLMalloc(sizeof(double) * nDstXSize)); double *padfZ = static_cast(CPLMalloc(sizeof(double) * nDstXSize)); int *pabSuccess = static_cast(CPLMalloc(sizeof(int) * nDstXSize)); const int nXRadius = poWK->nXRadius; double *padfWeight = static_cast(CPLCalloc(1 + nXRadius * 2, sizeof(double))); const double dfSrcCoordPrecision = CPLAtof( CSLFetchNameValueDef(poWK->papszWarpOptions, "SRC_COORD_PRECISION", "0")); const double dfErrorThreshold = CPLAtof( CSLFetchNameValueDef(poWK->papszWarpOptions, "ERROR_THRESHOLD", "0")); // Precompute values. for( int iDstX = 0; iDstX < nDstXSize; iDstX++ ) padfX[nDstXSize + iDstX] = iDstX + 0.5 + poWK->nDstXOff; /* ==================================================================== */ /* Loop over output lines. */ /* ==================================================================== */ for( int iDstY = iYMin; iDstY < iYMax; iDstY++ ) { /* -------------------------------------------------------------------- */ /* Setup points to transform to source image space. */ /* -------------------------------------------------------------------- */ memcpy( padfX, padfX + nDstXSize, sizeof(double) * nDstXSize ); const double dfY = iDstY + 0.5 + poWK->nDstYOff; for( int iDstX = 0; iDstX < nDstXSize; iDstX++ ) padfY[iDstX] = dfY; memset( padfZ, 0, sizeof(double) * nDstXSize ); /* -------------------------------------------------------------------- */ /* Transform the points from destination pixel/line coordinates */ /* to source pixel/line coordinates. */ /* -------------------------------------------------------------------- */ poWK->pfnTransformer( psJob->pTransformerArg, TRUE, nDstXSize, padfX, padfY, padfZ, pabSuccess ); if( dfSrcCoordPrecision > 0.0 ) { GWKRoundSourceCoordinates(nDstXSize, padfX, padfY, padfZ, pabSuccess, dfSrcCoordPrecision, dfErrorThreshold, poWK->pfnTransformer, psJob->pTransformerArg, 0.5 + poWK->nDstXOff, iDstY + 0.5 + poWK->nDstYOff); } /* ==================================================================== */ /* Loop over pixels in output scanline. */ /* ==================================================================== */ for( int iDstX = 0; iDstX < nDstXSize; iDstX++ ) { GPtrDiff_t iSrcOffset = 0; if( !GWKCheckAndComputeSrcOffsets(pabSuccess, iDstX, padfX, padfY, poWK, nSrcXSize, nSrcYSize, iSrcOffset) ) continue; /* ==================================================================== */ /* Loop processing each band. */ /* ==================================================================== */ const GPtrDiff_t iDstOffset = iDstX + static_cast(iDstY) * nDstXSize; for( int iBand = 0; iBand < poWK->nBands; iBand++ ) { T value = 0; if( eResample == GRA_NearestNeighbour ) { value = reinterpret_cast( poWK->papabySrcImage[iBand])[iSrcOffset]; } else if( bUse4SamplesFormula ) { if( eResample == GRA_Bilinear ) GWKBilinearResampleNoMasks4SampleT( poWK, iBand, padfX[iDstX]-poWK->nSrcXOff, padfY[iDstX]-poWK->nSrcYOff, &value ); else GWKCubicResampleNoMasks4SampleT( poWK, iBand, padfX[iDstX]-poWK->nSrcXOff, padfY[iDstX]-poWK->nSrcYOff, &value ); } else { GWKResampleNoMasksT( poWK, iBand, padfX[iDstX]-poWK->nSrcXOff, padfY[iDstX]-poWK->nSrcYOff, &value, padfWeight); } if( poWK->bApplyVerticalShift ) { if( !std::isfinite(padfZ[iDstX]) ) continue; // Subtract padfZ[] since the coordinate transformation is from target to source value = GWKClampValueT(value * poWK->dfMultFactorVerticalShit - padfZ[iDstX]); } if( poWK->pafDstDensity ) poWK->pafDstDensity[iDstOffset] = 1.0f; reinterpret_cast(poWK->papabyDstImage[iBand])[iDstOffset] = value; } } /* -------------------------------------------------------------------- */ /* Report progress to the user, and optionally cancel out. */ /* -------------------------------------------------------------------- */ if( psJob->pfnProgress && psJob->pfnProgress(psJob) ) break; } /* -------------------------------------------------------------------- */ /* Cleanup and return. */ /* -------------------------------------------------------------------- */ CPLFree( padfX ); CPLFree( padfY ); CPLFree( padfZ ); CPLFree( pabSuccess ); CPLFree( padfWeight ); } template static void GWKResampleNoMasksOrDstDensityOnlyThread( void* pData ) { GWKResampleNoMasksOrDstDensityOnlyThreadInternal(pData); } template static void GWKResampleNoMasksOrDstDensityOnlyHas4SampleThread( void* pData ) { GWKJobStruct* psJob = static_cast(pData); GDALWarpKernel *poWK = psJob->poWK; CPLAssert(eResample == GRA_Bilinear || eResample == GRA_Cubic); const bool bUse4SamplesFormula = poWK->dfXScale >= 0.95 && poWK->dfYScale >= 0.95; if( bUse4SamplesFormula ) GWKResampleNoMasksOrDstDensityOnlyThreadInternal(pData); else GWKResampleNoMasksOrDstDensityOnlyThreadInternal(pData); } static CPLErr GWKNearestNoMasksOrDstDensityOnlyByte( GDALWarpKernel *poWK ) { return GWKRun( poWK, "GWKNearestNoMasksOrDstDensityOnlyByte", GWKResampleNoMasksOrDstDensityOnlyThread); } static CPLErr GWKBilinearNoMasksOrDstDensityOnlyByte( GDALWarpKernel *poWK ) { return GWKRun( poWK, "GWKBilinearNoMasksOrDstDensityOnlyByte", GWKResampleNoMasksOrDstDensityOnlyHas4SampleThread); } static CPLErr GWKCubicNoMasksOrDstDensityOnlyByte( GDALWarpKernel *poWK ) { return GWKRun( poWK, "GWKCubicNoMasksOrDstDensityOnlyByte", GWKResampleNoMasksOrDstDensityOnlyHas4SampleThread); } static CPLErr GWKCubicNoMasksOrDstDensityOnlyFloat( GDALWarpKernel *poWK ) { return GWKRun( poWK, "GWKCubicNoMasksOrDstDensityOnlyFloat", GWKResampleNoMasksOrDstDensityOnlyHas4SampleThread); } #ifdef INSTANTIATE_FLOAT64_SSE2_IMPL static CPLErr GWKCubicNoMasksOrDstDensityOnlyDouble( GDALWarpKernel *poWK ) { return GWKRun( poWK, "GWKCubicNoMasksOrDstDensityOnlyDouble", GWKResampleNoMasksOrDstDensityOnlyHas4SampleThread); } #endif static CPLErr GWKCubicSplineNoMasksOrDstDensityOnlyByte( GDALWarpKernel *poWK ) { return GWKRun( poWK, "GWKCubicSplineNoMasksOrDstDensityOnlyByte", GWKResampleNoMasksOrDstDensityOnlyThread); } /************************************************************************/ /* GWKNearestByte() */ /* */ /* Case for 8bit input data with nearest neighbour resampling */ /* using valid flags. Should be as fast as possible for this */ /* particular transformation type. */ /************************************************************************/ template static void GWKNearestThread( void* pData ) { GWKJobStruct* psJob = static_cast(pData); GDALWarpKernel *poWK = psJob->poWK; const int iYMin = psJob->iYMin; const int iYMax = psJob->iYMax; const int nDstXSize = poWK->nDstXSize; const int nSrcXSize = poWK->nSrcXSize; const int nSrcYSize = poWK->nSrcYSize; /* -------------------------------------------------------------------- */ /* Allocate x,y,z coordinate arrays for transformation ... one */ /* scanlines worth of positions. */ /* -------------------------------------------------------------------- */ // For x, 2 *, because we cache the precomputed values at the end. double *padfX = static_cast(CPLMalloc(2 * sizeof(double) * nDstXSize)); double *padfY = static_cast(CPLMalloc(sizeof(double) * nDstXSize)); double *padfZ = static_cast(CPLMalloc(sizeof(double) * nDstXSize)); int *pabSuccess = static_cast(CPLMalloc(sizeof(int) * nDstXSize)); const double dfSrcCoordPrecision = CPLAtof( CSLFetchNameValueDef(poWK->papszWarpOptions, "SRC_COORD_PRECISION", "0")); const double dfErrorThreshold = CPLAtof( CSLFetchNameValueDef(poWK->papszWarpOptions, "ERROR_THRESHOLD", "0")); // Precompute values. for( int iDstX = 0; iDstX < nDstXSize; iDstX++ ) padfX[nDstXSize + iDstX] = iDstX + 0.5 + poWK->nDstXOff; /* ==================================================================== */ /* Loop over output lines. */ /* ==================================================================== */ for( int iDstY = iYMin; iDstY < iYMax; iDstY++ ) { /* -------------------------------------------------------------------- */ /* Setup points to transform to source image space. */ /* -------------------------------------------------------------------- */ memcpy( padfX, padfX + nDstXSize, sizeof(double) * nDstXSize ); const double dfY = iDstY + 0.5 + poWK->nDstYOff; for( int iDstX = 0; iDstX < nDstXSize; iDstX++ ) padfY[iDstX] = dfY; memset( padfZ, 0, sizeof(double) * nDstXSize ); /* -------------------------------------------------------------------- */ /* Transform the points from destination pixel/line coordinates */ /* to source pixel/line coordinates. */ /* -------------------------------------------------------------------- */ poWK->pfnTransformer( psJob->pTransformerArg, TRUE, nDstXSize, padfX, padfY, padfZ, pabSuccess ); if( dfSrcCoordPrecision > 0.0 ) { GWKRoundSourceCoordinates(nDstXSize, padfX, padfY, padfZ, pabSuccess, dfSrcCoordPrecision, dfErrorThreshold, poWK->pfnTransformer, psJob->pTransformerArg, 0.5 + poWK->nDstXOff, iDstY + 0.5 + poWK->nDstYOff); } /* ==================================================================== */ /* Loop over pixels in output scanline. */ /* ==================================================================== */ for( int iDstX = 0; iDstX < nDstXSize; iDstX++ ) { GPtrDiff_t iSrcOffset = 0; if( !GWKCheckAndComputeSrcOffsets(pabSuccess, iDstX, padfX, padfY, poWK, nSrcXSize, nSrcYSize, iSrcOffset) ) continue; /* -------------------------------------------------------------------- */ /* Do not try to apply invalid source pixels to the dest. */ /* -------------------------------------------------------------------- */ if( poWK->panUnifiedSrcValid != nullptr && !(poWK->panUnifiedSrcValid[iSrcOffset>>5] & (0x01 << (iSrcOffset & 0x1f))) ) continue; /* -------------------------------------------------------------------- */ /* Do not try to apply transparent source pixels to the destination.*/ /* -------------------------------------------------------------------- */ double dfDensity = 1.0; if( poWK->pafUnifiedSrcDensity != nullptr ) { dfDensity = poWK->pafUnifiedSrcDensity[iSrcOffset]; if( dfDensity < SRC_DENSITY_THRESHOLD ) continue; } /* ==================================================================== */ /* Loop processing each band. */ /* ==================================================================== */ const GPtrDiff_t iDstOffset = iDstX + static_cast(iDstY) * nDstXSize; for( int iBand = 0; iBand < poWK->nBands; iBand++ ) { T value = 0; double dfBandDensity = 0.0; /* -------------------------------------------------------------------- */ /* Collect the source value. */ /* -------------------------------------------------------------------- */ if( GWKGetPixelT(poWK, iBand, iSrcOffset, &dfBandDensity, &value) ) { if( poWK->bApplyVerticalShift ) { if( !std::isfinite(padfZ[iDstX]) ) continue; // Subtract padfZ[] since the coordinate transformation is from target to source value = GWKClampValueT(value * poWK->dfMultFactorVerticalShit - padfZ[iDstX]); } if( dfBandDensity < 1.0 ) { if( dfBandDensity == 0.0 ) { // Do nothing. } else { // Let the general code take care of mixing. GWKSetPixelValueRealT( poWK, iBand, iDstOffset, dfBandDensity, value ); } } else { reinterpret_cast( poWK->papabyDstImage[iBand])[iDstOffset] = value; } } } /* -------------------------------------------------------------------- */ /* Mark this pixel valid/opaque in the output. */ /* -------------------------------------------------------------------- */ GWKOverlayDensity( poWK, iDstOffset, dfDensity ); if( poWK->panDstValid != nullptr ) { poWK->panDstValid[iDstOffset>>5] |= 0x01 << (iDstOffset & 0x1f); } } /* Next iDstX */ /* -------------------------------------------------------------------- */ /* Report progress to the user, and optionally cancel out. */ /* -------------------------------------------------------------------- */ if( psJob->pfnProgress && psJob->pfnProgress(psJob) ) break; } /* -------------------------------------------------------------------- */ /* Cleanup and return. */ /* -------------------------------------------------------------------- */ CPLFree( padfX ); CPLFree( padfY ); CPLFree( padfZ ); CPLFree( pabSuccess ); } static CPLErr GWKNearestByte( GDALWarpKernel *poWK ) { return GWKRun( poWK, "GWKNearestByte", GWKNearestThread ); } static CPLErr GWKNearestNoMasksOrDstDensityOnlyShort( GDALWarpKernel *poWK ) { return GWKRun( poWK, "GWKNearestNoMasksOrDstDensityOnlyShort", GWKResampleNoMasksOrDstDensityOnlyThread); } static CPLErr GWKBilinearNoMasksOrDstDensityOnlyShort( GDALWarpKernel *poWK ) { return GWKRun( poWK, "GWKBilinearNoMasksOrDstDensityOnlyShort", GWKResampleNoMasksOrDstDensityOnlyHas4SampleThread); } static CPLErr GWKBilinearNoMasksOrDstDensityOnlyUShort( GDALWarpKernel *poWK ) { return GWKRun( poWK, "GWKBilinearNoMasksOrDstDensityOnlyUShort", GWKResampleNoMasksOrDstDensityOnlyHas4SampleThread); } static CPLErr GWKBilinearNoMasksOrDstDensityOnlyFloat( GDALWarpKernel *poWK ) { return GWKRun( poWK, "GWKBilinearNoMasksOrDstDensityOnlyFloat", GWKResampleNoMasksOrDstDensityOnlyHas4SampleThread); } #ifdef INSTANTIATE_FLOAT64_SSE2_IMPL static CPLErr GWKBilinearNoMasksOrDstDensityOnlyDouble( GDALWarpKernel *poWK ) { return GWKRun( poWK, "GWKBilinearNoMasksOrDstDensityOnlyDouble", GWKResampleNoMasksOrDstDensityOnlyHas4SampleThread); } #endif static CPLErr GWKCubicNoMasksOrDstDensityOnlyShort( GDALWarpKernel *poWK ) { return GWKRun( poWK, "GWKCubicNoMasksOrDstDensityOnlyShort", GWKResampleNoMasksOrDstDensityOnlyHas4SampleThread); } static CPLErr GWKCubicNoMasksOrDstDensityOnlyUShort( GDALWarpKernel *poWK ) { return GWKRun( poWK, "GWKCubicNoMasksOrDstDensityOnlyUShort", GWKResampleNoMasksOrDstDensityOnlyHas4SampleThread); } static CPLErr GWKCubicSplineNoMasksOrDstDensityOnlyShort( GDALWarpKernel *poWK ) { return GWKRun( poWK, "GWKCubicSplineNoMasksOrDstDensityOnlyShort", GWKResampleNoMasksOrDstDensityOnlyThread); } static CPLErr GWKCubicSplineNoMasksOrDstDensityOnlyUShort( GDALWarpKernel *poWK ) { return GWKRun( poWK, "GWKCubicSplineNoMasksOrDstDensityOnlyUShort", GWKResampleNoMasksOrDstDensityOnlyThread); } static CPLErr GWKNearestShort( GDALWarpKernel *poWK ) { return GWKRun(poWK, "GWKNearestShort", GWKNearestThread); } static CPLErr GWKNearestNoMasksOrDstDensityOnlyFloat( GDALWarpKernel *poWK ) { return GWKRun( poWK, "GWKNearestNoMasksOrDstDensityOnlyFloat", GWKResampleNoMasksOrDstDensityOnlyThread); } static CPLErr GWKNearestFloat( GDALWarpKernel *poWK ) { return GWKRun(poWK, "GWKNearestFloat", GWKNearestThread); } /************************************************************************/ /* GWKAverageOrMode() */ /* */ /************************************************************************/ static void GWKAverageOrModeThread(void* pData); static CPLErr GWKAverageOrMode( GDALWarpKernel *poWK ) { return GWKRun( poWK, "GWKAverageOrMode", GWKAverageOrModeThread ); } // Overall logic based on GWKGeneralCaseThread(). static void GWKAverageOrModeThread( void* pData) { GWKJobStruct* psJob = static_cast(pData); GDALWarpKernel *poWK = psJob->poWK; const int iYMin = psJob->iYMin; const int iYMax = psJob->iYMax; const int nDstXSize = poWK->nDstXSize; const int nSrcXSize = poWK->nSrcXSize; const int nSrcYSize = poWK->nSrcYSize; /* -------------------------------------------------------------------- */ /* Find out which algorithm to use (small optim.) */ /* -------------------------------------------------------------------- */ int nAlgo = 0; // These vars only used with nAlgo == 3. int *panVals = nullptr; int nBins = 0; int nBinsOffset = 0; // Only used with nAlgo = 2. float* pafRealVals = nullptr; float* pafImagVals = nullptr; int* panRealSums = nullptr; int* panImagSums = nullptr; // Only used with nAlgo = 6. float quant = 0.5; //To control array allocation only when data type is complex const bool bIsComplex = GDALDataTypeIsComplex(poWK->eWorkingDataType)!=0; if( poWK->eResample == GRA_Average ) { nAlgo = GWKAOM_Average; } else if( poWK->eResample == GRA_RMS ) { nAlgo = GWKAOM_RMS; } else if( poWK->eResample == GRA_Mode ) { // TODO check color table count > 256. if( poWK->eWorkingDataType == GDT_Byte || poWK->eWorkingDataType == GDT_UInt16 || poWK->eWorkingDataType == GDT_Int16 ) { nAlgo = GWKAOM_Imode; // In the case of a paletted or non-paletted byte band, // Input values are between 0 and 255. if( poWK->eWorkingDataType == GDT_Byte ) { nBins = 256; } // In the case of Int16, input values are between -32768 and 32767. else if( poWK->eWorkingDataType == GDT_Int16 ) { nBins = 65536; nBinsOffset = 32768; } // In the case of UInt16, input values are between 0 and 65537. else if( poWK->eWorkingDataType == GDT_UInt16 ) { nBins = 65536; } panVals = static_cast(VSI_MALLOC_VERBOSE(nBins * sizeof(int))); if( panVals == nullptr ) return; } else { nAlgo = GWKAOM_Fmode; if( nSrcXSize > 0 && nSrcYSize > 0 ) { pafRealVals = static_cast( VSI_MALLOC3_VERBOSE(nSrcXSize, nSrcYSize, sizeof(float))); panRealSums = static_cast( VSI_MALLOC3_VERBOSE(nSrcXSize, nSrcYSize, sizeof(int))); if( pafRealVals == nullptr || panRealSums == nullptr ) { VSIFree(pafRealVals); VSIFree(panRealSums); return; } } } } else if( poWK->eResample == GRA_Max ) { nAlgo = GWKAOM_Max; } else if( poWK->eResample == GRA_Min ) { nAlgo = GWKAOM_Min; } else if( poWK->eResample == GRA_Med ) { nAlgo = GWKAOM_Quant; quant = 0.5; } else if( poWK->eResample == GRA_Q1 ) { nAlgo = GWKAOM_Quant; quant = 0.25; } else if( poWK->eResample == GRA_Q3 ) { nAlgo = GWKAOM_Quant; quant = 0.75; } else if( poWK->eResample == GRA_Sum ) { nAlgo = GWKAOM_Sum; } else { // Other resample algorithms not permitted here. CPLDebug("GDAL", "GDALWarpKernel():GWKAverageOrModeThread() ERROR, " "illegal resample" ); return; } CPLDebug("GDAL", "GDALWarpKernel():GWKAverageOrModeThread() using algo %d", nAlgo); /* -------------------------------------------------------------------- */ /* Allocate x,y,z coordinate arrays for transformation ... two */ /* scanlines worth of positions. */ /* -------------------------------------------------------------------- */ double *padfX = static_cast(CPLMalloc(sizeof(double) * nDstXSize)); double *padfY = static_cast(CPLMalloc(sizeof(double) * nDstXSize)); double *padfZ = static_cast(CPLMalloc(sizeof(double) * nDstXSize)); double *padfX2 = static_cast(CPLMalloc(sizeof(double) * nDstXSize)); double *padfY2 = static_cast(CPLMalloc(sizeof(double) * nDstXSize)); double *padfZ2 = static_cast(CPLMalloc(sizeof(double) * nDstXSize)); int *pabSuccess = static_cast(CPLMalloc(sizeof(int) * nDstXSize)); int *pabSuccess2 = static_cast(CPLMalloc(sizeof(int) * nDstXSize)); const double dfSrcCoordPrecision = CPLAtof( CSLFetchNameValueDef(poWK->papszWarpOptions, "SRC_COORD_PRECISION", "0")); const double dfErrorThreshold = CPLAtof( CSLFetchNameValueDef(poWK->papszWarpOptions, "ERROR_THRESHOLD", "0")); /* ==================================================================== */ /* Loop over output lines. */ /* ==================================================================== */ for( int iDstY = iYMin; iDstY < iYMax; iDstY++ ) { /* -------------------------------------------------------------------- */ /* Setup points to transform to source image space. */ /* -------------------------------------------------------------------- */ for( int iDstX = 0; iDstX < nDstXSize; iDstX++ ) { padfX[iDstX] = iDstX + poWK->nDstXOff; padfY[iDstX] = iDstY + poWK->nDstYOff; padfZ[iDstX] = 0.0; padfX2[iDstX] = iDstX + 1.0 + poWK->nDstXOff; padfY2[iDstX] = iDstY + 1.0 + poWK->nDstYOff; padfZ2[iDstX] = 0.0; } /* -------------------------------------------------------------------- */ /* Transform the points from destination pixel/line coordinates */ /* to source pixel/line coordinates. */ /* -------------------------------------------------------------------- */ poWK->pfnTransformer( psJob->pTransformerArg, TRUE, nDstXSize, padfX, padfY, padfZ, pabSuccess ); poWK->pfnTransformer( psJob->pTransformerArg, TRUE, nDstXSize, padfX2, padfY2, padfZ2, pabSuccess2 ); if( dfSrcCoordPrecision > 0.0 ) { GWKRoundSourceCoordinates(nDstXSize, padfX, padfY, padfZ, pabSuccess, dfSrcCoordPrecision, dfErrorThreshold, poWK->pfnTransformer, psJob->pTransformerArg, poWK->nDstXOff, iDstY + poWK->nDstYOff); GWKRoundSourceCoordinates(nDstXSize, padfX2, padfY2, padfZ2, pabSuccess2, dfSrcCoordPrecision, dfErrorThreshold, poWK->pfnTransformer, psJob->pTransformerArg, 1.0 + poWK->nDstXOff, iDstY + 1.0 + poWK->nDstYOff); } /* ==================================================================== */ /* Loop over pixels in output scanline. */ /* ==================================================================== */ for( int iDstX = 0; iDstX < nDstXSize; iDstX++ ) { GPtrDiff_t iSrcOffset = 0; double dfDensity = 1.0; bool bHasFoundDensity = false; if( !pabSuccess[iDstX] || !pabSuccess2[iDstX] ) continue; if( padfX[iDstX] < poWK->nSrcXOff - 1 || padfX2[iDstX] < poWK->nSrcXOff - 1 || padfY[iDstX] < poWK->nSrcYOff - 1 || padfY2[iDstX] < poWK->nSrcYOff -1 || padfX[iDstX] > nSrcXSize + poWK->nSrcXOff + 1 || padfX2[iDstX] > nSrcXSize + poWK->nSrcXOff + 1 || padfY[iDstX] > nSrcYSize + poWK->nSrcYOff + 1 || padfY2[iDstX] > nSrcYSize + poWK->nSrcYOff + 1 ) { continue; } const GPtrDiff_t iDstOffset = iDstX + static_cast(iDstY) * nDstXSize; // Compute corners in source crs. // The transformation might not have preserved ordering of // coordinates so do the necessary swapping (#5433). // NOTE: this is really an approximative fix. To do something // more precise we would for example need to compute the // transformation of coordinates in the // [iDstX,iDstY]x[iDstX+1,iDstY+1] square back to source // coordinates, and take the bounding box of the got source // coordinates. const double dfXMin = std::min(padfX[iDstX],padfX2[iDstX]) - poWK->nSrcXOff; int iSrcXMin = std::max(static_cast(floor(dfXMin + 1e-10)), 0); const double dfXMax = std::max(padfX[iDstX],padfX2[iDstX]) - poWK->nSrcXOff; int iSrcXMax = std::min(static_cast(ceil(dfXMax - 1e-10)), nSrcXSize); const double dfYMin = std::min(padfY[iDstX],padfY2[iDstX]) - poWK->nSrcYOff; int iSrcYMin = std::max(static_cast(floor(dfYMin + 1e-10)), 0); const double dfYMax = std::max(padfY[iDstX],padfY2[iDstX]) - poWK->nSrcYOff; int iSrcYMax = std::min(static_cast(ceil(dfYMax - 1e-10)), nSrcYSize); if( iSrcXMin == iSrcXMax && iSrcXMax < nSrcXSize ) iSrcXMax++; if( iSrcYMin == iSrcYMax && iSrcYMax < nSrcYSize ) iSrcYMax++; /* ==================================================================== */ /* Loop processing each band. */ /* ==================================================================== */ for( int iBand = 0; iBand < poWK->nBands; iBand++ ) { double dfBandDensity = 0.0; double dfValueReal = 0.0; double dfValueImag = 0.0; double dfValueRealTmp = 0.0; double dfValueImagTmp = 0.0; /* -------------------------------------------------------------------- */ /* Collect the source value. */ /* -------------------------------------------------------------------- */ // Loop over source lines and pixels - 3 possible algorithms. #define COMPUTE_WEIGHT_Y(iSrcY) \ ((iSrcY == iSrcYMin) ? \ ((iSrcYMin + 1 == iSrcYMax) ? 1.0 : 1 - (dfYMin - iSrcYMin)): \ (iSrcY + 1 == iSrcYMax) ? 1 - (iSrcYMax - dfYMax): \ 1.0) #define COMPUTE_WEIGHT(iSrcX, dfWeightY) \ ((iSrcX == iSrcXMin) ? \ ((iSrcXMin + 1 == iSrcXMax) ? dfWeightY : dfWeightY * (1 - (dfXMin - iSrcXMin))): \ (iSrcX + 1 == iSrcXMax) ? dfWeightY * (1 - (iSrcXMax - dfXMax)): \ dfWeightY) // poWK->eResample == GRA_Average. if( nAlgo == GWKAOM_Average ) { double dfTotalReal = 0.0; double dfTotalImag = 0.0; double dfTotalWeight = 0.0; // This code adapted from GDALDownsampleChunk32R_AverageT() // in gcore/overview.cpp. for( int iSrcY = iSrcYMin; iSrcY < iSrcYMax; iSrcY++ ) { const double dfWeightY = COMPUTE_WEIGHT_Y(iSrcY); iSrcOffset = iSrcXMin + static_cast(iSrcY) * nSrcXSize; for( int iSrcX = iSrcXMin; iSrcX < iSrcXMax; iSrcX++, iSrcOffset++ ) { if( poWK->panUnifiedSrcValid != nullptr && !(poWK->panUnifiedSrcValid[iSrcOffset>>5] & (0x01 << (iSrcOffset & 0x1f))) ) { continue; } if( GWKGetPixelValue( poWK, iBand, iSrcOffset, &dfBandDensity, &dfValueRealTmp, &dfValueImagTmp ) && dfBandDensity > BAND_DENSITY_THRESHOLD ) { const double dfWeight = COMPUTE_WEIGHT(iSrcX, dfWeightY); dfTotalWeight += dfWeight; dfTotalReal += dfValueRealTmp * dfWeight; if (bIsComplex) { dfTotalImag += dfValueImagTmp * dfWeight; } } } } if( dfTotalWeight > 0 ) { dfValueReal = dfTotalReal / dfTotalWeight; if( poWK->bApplyVerticalShift ) { if( !std::isfinite(padfZ[iDstX]) ) continue; // Subtract padfZ[] since the coordinate transformation is from target to source dfValueReal = dfValueReal * poWK->dfMultFactorVerticalShit - padfZ[iDstX]; } if (bIsComplex) { dfValueImag = dfTotalImag / dfTotalWeight; } dfBandDensity = 1; bHasFoundDensity = true; } } // GRA_Average. // poWK->eResample == GRA_RMS. if( nAlgo == GWKAOM_RMS ) { double dfTotalReal = 0.0; double dfTotalImag = 0.0; double dfTotalWeight = 0.0; // This code adapted from GDALDownsampleChunk32R_AverageT() // in gcore/overview.cpp. for( int iSrcY = iSrcYMin; iSrcY < iSrcYMax; iSrcY++ ) { const double dfWeightY = COMPUTE_WEIGHT_Y(iSrcY); iSrcOffset = iSrcXMin + static_cast(iSrcY) * nSrcXSize; for( int iSrcX = iSrcXMin; iSrcX < iSrcXMax; iSrcX++, iSrcOffset++ ) { if( poWK->panUnifiedSrcValid != nullptr && !(poWK->panUnifiedSrcValid[iSrcOffset>>5] & (0x01 << (iSrcOffset & 0x1f))) ) { continue; } if( GWKGetPixelValue( poWK, iBand, iSrcOffset, &dfBandDensity, &dfValueRealTmp, &dfValueImagTmp ) && dfBandDensity > BAND_DENSITY_THRESHOLD ) { const double dfWeight = COMPUTE_WEIGHT(iSrcX, dfWeightY); dfTotalWeight += dfWeight; dfTotalReal += dfValueRealTmp * dfValueRealTmp * dfWeight; if (bIsComplex) dfTotalImag += dfValueImagTmp * dfValueImagTmp * dfWeight; } } } if( dfTotalWeight > 0 ) { dfValueReal = sqrt( dfTotalReal / dfTotalWeight ); if( poWK->bApplyVerticalShift ) { if( !std::isfinite(padfZ[iDstX]) ) continue; // Subtract padfZ[] since the coordinate transformation is from target to source dfValueReal = dfValueReal * poWK->dfMultFactorVerticalShit - padfZ[iDstX]; } if (bIsComplex) dfValueImag = sqrt( dfTotalImag / dfTotalWeight ); dfBandDensity = 1; bHasFoundDensity = true; } } // GRA_RMS. else if( nAlgo == GWKAOM_Sum ) // poWK->eResample == GRA_Sum { double dfTotalReal = 0.0; double dfTotalImag = 0.0; bool bFoundValid = false; for( int iSrcY = iSrcYMin; iSrcY < iSrcYMax; iSrcY++ ) { const double dfWeightY = COMPUTE_WEIGHT_Y(iSrcY); iSrcOffset = iSrcXMin + static_cast(iSrcY) * nSrcXSize; for( int iSrcX = iSrcXMin; iSrcX < iSrcXMax; iSrcX++, iSrcOffset++ ) { if( poWK->panUnifiedSrcValid != nullptr && !(poWK->panUnifiedSrcValid[iSrcOffset>>5] & (0x01 << (iSrcOffset & 0x1f))) ) { continue; } if( GWKGetPixelValue( poWK, iBand, iSrcOffset, &dfBandDensity, &dfValueRealTmp, &dfValueImagTmp ) && dfBandDensity > BAND_DENSITY_THRESHOLD ) { const double dfWeight = COMPUTE_WEIGHT(iSrcX, dfWeightY); bFoundValid = true; dfTotalReal += dfValueRealTmp * dfWeight; if (bIsComplex) { dfTotalImag += dfValueImagTmp * dfWeight; } } } } if( bFoundValid ) { dfValueReal = dfTotalReal; if( poWK->bApplyVerticalShift ) { if( !std::isfinite(padfZ[iDstX]) ) continue; // Subtract padfZ[] since the coordinate transformation is from target to source dfValueReal = dfValueReal * poWK->dfMultFactorVerticalShit - padfZ[iDstX]; } if (bIsComplex) { dfValueImag = dfTotalImag; } dfBandDensity = 1; bHasFoundDensity = true; } } // GRA_Sum. else if( nAlgo == GWKAOM_Imode || nAlgo == GWKAOM_Fmode ) // poWK->eResample == GRA_Mode { // This code adapted from GDALDownsampleChunk32R_Mode() in // gcore/overview.cpp. if( nAlgo == GWKAOM_Fmode ) // int32 or float. { // Does it make sense it makes to run a // majority filter on floating point data? But, here it // is for the sake of compatibility. It won't look // right on RGB images by the nature of the filter. int iMaxInd = 0; int iMaxVal = -1; int i = 0; for( int iSrcY = iSrcYMin; iSrcY < iSrcYMax; iSrcY++ ) { for( int iSrcX = iSrcXMin; iSrcX < iSrcXMax; iSrcX++ ) { iSrcOffset = iSrcX + static_cast(iSrcY) * nSrcXSize; if( poWK->panUnifiedSrcValid != nullptr && !(poWK->panUnifiedSrcValid[iSrcOffset>>5] & (0x01 << (iSrcOffset & 0x1f))) ) continue; if( GWKGetPixelValue( poWK, iBand, iSrcOffset, &dfBandDensity, &dfValueRealTmp, &dfValueImagTmp ) && dfBandDensity > BAND_DENSITY_THRESHOLD ) { const float fVal = static_cast(dfValueRealTmp); // Check array for existing entry. for( i = 0; i < iMaxInd; ++i ) if( pafRealVals[i] == fVal && ++panRealSums[i] > panRealSums[iMaxVal] ) { iMaxVal = i; break; } // Add to arr if entry not already there. if( i == iMaxInd ) { pafRealVals[iMaxInd] = fVal; panRealSums[iMaxInd] = 1; if( iMaxVal < 0 ) iMaxVal = iMaxInd; ++iMaxInd; } } } } if( iMaxVal != -1 ) { dfValueReal = pafRealVals[iMaxVal]; if( poWK->bApplyVerticalShift ) { if( !std::isfinite(padfZ[iDstX]) ) continue; // Subtract padfZ[] since the coordinate transformation is from target to source dfValueReal = dfValueReal * poWK->dfMultFactorVerticalShit - padfZ[iDstX]; } dfBandDensity = 1; bHasFoundDensity = true; } } else // byte or int16. { int nMaxVal = 0; int iMaxInd = -1; memset(panVals, 0, nBins*sizeof(int)); for( int iSrcY = iSrcYMin; iSrcY < iSrcYMax; iSrcY++ ) { for( int iSrcX = iSrcXMin; iSrcX < iSrcXMax; iSrcX++ ) { iSrcOffset = iSrcX + static_cast(iSrcY) * nSrcXSize; if( poWK->panUnifiedSrcValid != nullptr && !(poWK->panUnifiedSrcValid[iSrcOffset>>5] & (0x01 << (iSrcOffset & 0x1f))) ) continue; if( GWKGetPixelValue( poWK, iBand, iSrcOffset, &dfBandDensity, &dfValueRealTmp, &dfValueImagTmp ) && dfBandDensity > BAND_DENSITY_THRESHOLD ) { const int nVal = static_cast(dfValueRealTmp); if( ++panVals[nVal+nBinsOffset] > nMaxVal ) { // Sum the density. // Is it the most common value so far? iMaxInd = nVal; nMaxVal = panVals[nVal+nBinsOffset]; } } } } if( iMaxInd != -1 ) { dfValueReal = iMaxInd; if( poWK->bApplyVerticalShift ) { if( !std::isfinite(padfZ[iDstX]) ) continue; // Subtract padfZ[] since the coordinate transformation is from target to source dfValueReal = dfValueReal * poWK->dfMultFactorVerticalShit - padfZ[iDstX]; } dfBandDensity = 1; bHasFoundDensity = true; } } } // GRA_Mode. else if( nAlgo == GWKAOM_Max ) // poWK->eResample == GRA_Max. { bool bFoundValid = false; double dfTotalReal = std::numeric_limits::lowest(); // This code adapted from nAlgo 1 method, GRA_Average. for( int iSrcY = iSrcYMin; iSrcY < iSrcYMax; iSrcY++ ) { for( int iSrcX = iSrcXMin; iSrcX < iSrcXMax; iSrcX++ ) { iSrcOffset = iSrcX + static_cast(iSrcY) * nSrcXSize; if( poWK->panUnifiedSrcValid != nullptr && !(poWK->panUnifiedSrcValid[iSrcOffset>>5] & (0x01 << (iSrcOffset & 0x1f))) ) { continue; } // Returns pixel value if it is not no data. if( GWKGetPixelValue( poWK, iBand, iSrcOffset, &dfBandDensity, &dfValueRealTmp, &dfValueImagTmp ) && dfBandDensity > BAND_DENSITY_THRESHOLD ) { bFoundValid = true; if( dfTotalReal < dfValueRealTmp ) { dfTotalReal = dfValueRealTmp; } } } } if( bFoundValid ) { dfValueReal = dfTotalReal; if( poWK->bApplyVerticalShift ) { if( !std::isfinite(padfZ[iDstX]) ) continue; // Subtract padfZ[] since the coordinate transformation is from target to source dfValueReal = dfValueReal * poWK->dfMultFactorVerticalShit - padfZ[iDstX]; } dfBandDensity = 1; bHasFoundDensity = true; } } // GRA_Max. else if( nAlgo == GWKAOM_Min ) // poWK->eResample == GRA_Min. { bool bFoundValid = false; double dfTotalReal = std::numeric_limits::max(); // This code adapted from nAlgo 1 method, GRA_Average. for( int iSrcY = iSrcYMin; iSrcY < iSrcYMax; iSrcY++ ) { for( int iSrcX = iSrcXMin; iSrcX < iSrcXMax; iSrcX++ ) { iSrcOffset = iSrcX + static_cast(iSrcY) * nSrcXSize; if( poWK->panUnifiedSrcValid != nullptr && !(poWK->panUnifiedSrcValid[iSrcOffset>>5] & (0x01 << (iSrcOffset & 0x1f))) ) { continue; } // Returns pixel value if it is not no data. if( GWKGetPixelValue( poWK, iBand, iSrcOffset, &dfBandDensity, &dfValueRealTmp, &dfValueImagTmp ) && dfBandDensity > BAND_DENSITY_THRESHOLD ) { bFoundValid = true; if( dfTotalReal > dfValueRealTmp ) { dfTotalReal = dfValueRealTmp; } } } } if( bFoundValid ) { dfValueReal = dfTotalReal; if( poWK->bApplyVerticalShift ) { if( !std::isfinite(padfZ[iDstX]) ) continue; // Subtract padfZ[] since the coordinate transformation is from target to source dfValueReal = dfValueReal * poWK->dfMultFactorVerticalShit - padfZ[iDstX]; } dfBandDensity = 1; bHasFoundDensity = true; } } // GRA_Min. else if( nAlgo == GWKAOM_Quant ) // poWK->eResample == GRA_Med | GRA_Q1 | GRA_Q3. { bool bFoundValid = false; std::vector