NFFT 3.5.3alpha
fastsum_benchomp_detail.c
1/*
2 * Copyright (c) 2002, 2017 Jens Keiner, Stefan Kunis, Daniel Potts
3 *
4 * This program is free software; you can redistribute it and/or modify it under
5 * the terms of the GNU General Public License as published by the Free Software
6 * Foundation; either version 2 of the License, or (at your option) any later
7 * version.
8 *
9 * This program is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
11 * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
12 * details.
13 *
14 * You should have received a copy of the GNU General Public License along with
15 * this program; if not, write to the Free Software Foundation, Inc., 51
16 * Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
17 */
18
19#include <stdio.h>
20#include <math.h>
21#include <string.h>
22#include <stdlib.h>
23#include <complex.h>
24
25#include "nfft3.h"
26#include "infft.h"
27#include "fastsum.h"
28#include "kernels.h"
29#ifdef _OPENMP
30#include <omp.h>
31#endif
32
33int bench_openmp(FILE *infile, int n, int m, int p,
34 C (*kernel)(R, int, const R *), R c, R eps_I, R eps_B)
35{
36 fastsum_plan my_fastsum_plan;
37 int d, L, M;
38 int t, j;
39 R re, im;
40 R r_max = K(0.25) - my_fastsum_plan.eps_B / K(2.0);
41 ticks t0, t1;
42 R tt_total;
43
44 fscanf(infile, "%d %d %d", &d, &L, &M);
45
46#ifdef _OPENMP
47 FFTW(import_wisdom_from_filename)("fastsum_benchomp_detail_threads.plan");
48#else
49 FFTW(import_wisdom_from_filename)("fastsum_benchomp_detail_single.plan");
50#endif
51
52 fastsum_init_guru(&my_fastsum_plan, d, L, M, kernel, &c, NEARFIELD_BOXES, n,
53 m, p, eps_I, eps_B);
54
55#ifdef _OPENMP
56 FFTW(export_wisdom_to_filename)("fastsum_benchomp_detail_threads.plan");
57#else
58 FFTW(export_wisdom_to_filename)("fastsum_benchomp_detail_single.plan");
59#endif
60
61 for (j = 0; j < L; j++)
62 {
63 for (t = 0; t < d; t++)
64 {
65 R v;
66 fscanf(infile, __FR__, &v);
67 my_fastsum_plan.x[d * j + t] = v * r_max;
68 }
69 }
70
71 for (j = 0; j < L; j++)
72 {
73 fscanf(infile, __FR__ " " __FR__, &re, &im);
74 my_fastsum_plan.alpha[j] = re + II * im;
75 }
76
77 for (j = 0; j < M; j++)
78 {
79 for (t = 0; t < d; t++)
80 {
81 R v;
82 fscanf(infile, __FR__, &v);
83 my_fastsum_plan.y[d * j + t] = v * r_max;
84 }
85 }
86
88 t0 = getticks();
89 fastsum_precompute(&my_fastsum_plan);
90
92 fastsum_trafo(&my_fastsum_plan);
93 t1 = getticks();
94 tt_total = NFFT(elapsed_seconds)(t1, t0);
95
96#ifndef MEASURE_TIME
97 my_fastsum_plan.MEASURE_TIME_t[0] = K(0.0);
98 my_fastsum_plan.MEASURE_TIME_t[1] = K(0.0);
99 my_fastsum_plan.MEASURE_TIME_t[2] = K(0.0);
100 my_fastsum_plan.MEASURE_TIME_t[3] = K(0.0);
101 my_fastsum_plan.MEASURE_TIME_t[4] = K(0.0);
102 my_fastsum_plan.MEASURE_TIME_t[5] = K(0.0);
103 my_fastsum_plan.MEASURE_TIME_t[6] = K(0.0);
104 my_fastsum_plan.MEASURE_TIME_t[7] = K(0.0);
105 my_fastsum_plan.mv1.MEASURE_TIME_t[0] = K(0.0);
106 my_fastsum_plan.mv1.MEASURE_TIME_t[2] = K(0.0);
107 my_fastsum_plan.mv2.MEASURE_TIME_t[0] = K(0.0);
108 my_fastsum_plan.mv2.MEASURE_TIME_t[2] = K(0.0);
109#endif
110#ifndef MEASURE_TIME_FFTW
111 my_fastsum_plan.mv1.MEASURE_TIME_t[1] = K(0.0);
112 my_fastsum_plan.mv2.MEASURE_TIME_t[1] = K(0.0);
113#endif
114
115 printf(
116 "%.6" __FES__ " %.6" __FES__ " %.6" __FES__ " %6" __FES__ " %.6" __FES__ " %.6" __FES__ " %.6" __FES__ " %.6" __FES__ " %.6" __FES__ " %6" __FES__ " %.6" __FES__ " %.6" __FES__ " %6" __FES__ " %.6" __FES__ " %.6" __FES__ " %6" __FES__ "\n",
117 my_fastsum_plan.MEASURE_TIME_t[0], my_fastsum_plan.MEASURE_TIME_t[1],
118 my_fastsum_plan.MEASURE_TIME_t[2], my_fastsum_plan.MEASURE_TIME_t[3],
119 my_fastsum_plan.MEASURE_TIME_t[4], my_fastsum_plan.MEASURE_TIME_t[5],
120 my_fastsum_plan.MEASURE_TIME_t[6], my_fastsum_plan.MEASURE_TIME_t[7],
121 tt_total - my_fastsum_plan.MEASURE_TIME_t[0]
122 - my_fastsum_plan.MEASURE_TIME_t[1]
123 - my_fastsum_plan.MEASURE_TIME_t[2]
124 - my_fastsum_plan.MEASURE_TIME_t[3]
125 - my_fastsum_plan.MEASURE_TIME_t[4]
126 - my_fastsum_plan.MEASURE_TIME_t[5]
127 - my_fastsum_plan.MEASURE_TIME_t[6]
128 - my_fastsum_plan.MEASURE_TIME_t[7], tt_total,
129 my_fastsum_plan.mv1.MEASURE_TIME_t[0],
130 my_fastsum_plan.mv1.MEASURE_TIME_t[1],
131 my_fastsum_plan.mv1.MEASURE_TIME_t[2],
132 my_fastsum_plan.mv2.MEASURE_TIME_t[0],
133 my_fastsum_plan.mv2.MEASURE_TIME_t[1],
134 my_fastsum_plan.mv2.MEASURE_TIME_t[2]);
135
136 fastsum_finalize(&my_fastsum_plan);
137
138 return 0;
139}
140
141int main(int argc, char **argv)
142{
143 int n;
144 int m;
145 int p;
146 char *s;
147 C (*kernel)(R, int, const R *);
148 R c;
149 R eps_I;
150 R eps_B;
151
152#ifdef _OPENMP
153 int nthreads;
154
155 if (argc != 9)
156 return EXIT_FAILURE;
157
158 nthreads = atoi(argv[8]);
159 FFTW(init_threads)();
160 omp_set_num_threads(nthreads);
161#else
162 if (argc != 8)
163 return EXIT_FAILURE;
164#endif
165
166 n = atoi(argv[1]);
167 m = atoi(argv[2]);
168 p = atoi(argv[3]);
169 s = argv[4];
170 c = atof(argv[5]);
171 eps_I = (R)(atof(argv[6]));
172 eps_B = (R)(atof(argv[7]));
173 if (strcmp(s, "gaussian") == 0)
174 kernel = gaussian;
175 else if (strcmp(s, "multiquadric") == 0)
176 kernel = multiquadric;
177 else if (strcmp(s, "inverse_multiquadric") == 0)
178 kernel = inverse_multiquadric;
179 else if (strcmp(s, "logarithm") == 0)
180 kernel = logarithm;
181 else if (strcmp(s, "thinplate_spline") == 0)
182 kernel = thinplate_spline;
183 else if (strcmp(s, "one_over_square") == 0)
184 kernel = one_over_square;
185 else if (strcmp(s, "one_over_modulus") == 0)
186 kernel = one_over_modulus;
187 else if (strcmp(s, "one_over_x") == 0)
188 kernel = one_over_x;
189 else if (strcmp(s, "inverse_multiquadric3") == 0)
190 kernel = inverse_multiquadric3;
191 else if (strcmp(s, "sinc_kernel") == 0)
192 kernel = sinc_kernel;
193 else if (strcmp(s, "cosc") == 0)
194 kernel = cosc;
195 else if (strcmp(s, "cot") == 0)
196 kernel = kcot;
197 else if (strcmp(s, "one_over_cube") == 0)
198 kernel = one_over_cube;
199 else if (strcmp(s, "log_sin") == 0)
200 kernel = log_sin;
201 else
202 {
203 s = "multiquadric";
204 kernel = multiquadric;
205 }
206
207 bench_openmp(stdin, n, m, p, kernel, c, eps_I, eps_B);
208
209 return EXIT_SUCCESS;
210}
211
Header file for the fast NFFT-based summation algorithm.
void fastsum_precompute(fastsum_plan *ths)
precomputation for fastsum
Definition fastsum.c:1173
C inverse_multiquadric(R x, int der, const R *param)
K(x)=1/sqrt(x^2+c^2).
Definition kernels.c:90
C logarithm(R x, int der, const R *param)
K(x)=log |x|.
Definition kernels.c:116
C multiquadric(R x, int der, const R *param)
K(x)=sqrt(x^2+c^2).
Definition kernels.c:64
void fastsum_init_guru(fastsum_plan *ths, int d, int N_total, int M_total, kernel k, R *param, unsigned flags, int nn, int m, int p, R eps_I, R eps_B)
initialization of fastsum plan
Definition fastsum.c:987
R * x
source knots in d-ball with radius 1/4-eps_b/2
Definition fastsum.h:94
C one_over_cube(R x, int der, const R *param)
K(x) = 1/x^3.
Definition kernels.c:374
C one_over_square(R x, int der, const R *param)
K(x) = 1/x^2.
Definition kernels.c:177
C sinc_kernel(R x, int der, const R *param)
K(x) = sin(cx)/x.
Definition kernels.c:287
C kcot(R x, int der, const R *param)
K(x) = cot(cx).
Definition kernels.c:346
R MEASURE_TIME_t[8]
Measured time for each step if MEASURE_TIME is set.
Definition fastsum.h:134
C one_over_modulus(R x, int der, const R *param)
K(x) = 1/|x|.
Definition kernels.c:205
struct fastsum_plan_ fastsum_plan
plan for fast summation algorithm
C * alpha
source coefficients
Definition fastsum.h:91
R eps_B
outer boundary
Definition fastsum.h:114
C thinplate_spline(R x, int der, const R *param)
K(x) = x^2 log |x|.
Definition kernels.c:149
void fastsum_trafo(fastsum_plan *ths)
fast NFFT-based summation
Definition fastsum.c:1180
void fastsum_finalize(fastsum_plan *ths)
finalization of fastsum plan
Definition fastsum.c:1048
C inverse_multiquadric3(R x, int der, const R *param)
K(x) = 1/sqrt(x^2+c^2)^3.
Definition kernels.c:261
C gaussian(R x, int der, const R *param)
K(x)=exp(-x^2/c^2).
Definition kernels.c:38
C one_over_x(R x, int der, const R *param)
K(x) = 1/x.
Definition kernels.c:233
C cosc(R x, int der, const R *param)
K(x) = cos(cx)/x.
Definition kernels.c:314
R * y
target knots in d-ball with radius 1/4-eps_b/2
Definition fastsum.h:95
C log_sin(R x, int der, const R *param)
K(x) = log(|sin(cx)|).
Definition kernels.c:402
Internal header file for auxiliary definitions and functions.
Header file with predefined kernels for the fast summation algorithm.
Header file for the nfft3 library.