spandsp 0.0.6
fast_convert.h
1/*
2 * SpanDSP - a series of DSP components for telephony
3 *
4 * fast_convert.h - Quick ways to convert floating point numbers to integers
5 *
6 * Written by Steve Underwood <steveu@coppice.org>
7 *
8 * Copyright (C) 2009 Steve Underwood
9 *
10 * All rights reserved.
11 *
12 * This program is free software; you can redistribute it and/or modify
13 * it under the terms of the GNU Lesser General Public License version 2.1,
14 * as published by the Free Software Foundation.
15 *
16 * This program is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 * GNU Lesser General Public License for more details.
20 *
21 * You should have received a copy of the GNU Lesser General Public
22 * License along with this program; if not, write to the Free Software
23 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
24 */
25
26#if !defined(_SPANDSP_FAST_CONVERT_H_)
27#define _SPANDSP_FAST_CONVERT_H_
28
29#if defined(__cplusplus)
30extern "C"
31{
32#endif
33
34/* The following code, to handle issues with lrint() and lrintf() on various
35 * platforms, is adapted from similar code in libsndfile, which is:
36 *
37 * Copyright (C) 2001-2004 Erik de Castro Lopo <erikd@mega-nerd.com>
38 *
39 * This program is free software; you can redistribute it and/or modify
40 * it under the terms of the GNU Lesser General Public License as published by
41 * the Free Software Foundation; either version 2.1 of the License, or
42 * (at your option) any later version.
43 *
44 * This program is distributed in the hope that it will be useful,
45 * but WITHOUT ANY WARRANTY; without even the implied warranty of
46 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
47 * GNU Lesser General Public License for more details.
48 */
49
50/*
51 * On Intel Pentium processors (especially PIII and probably P4), converting
52 * from float to int is very slow. To meet the C specs, the code produced by
53 * most C compilers targeting Pentium needs to change the FPU rounding mode
54 * before the float to int conversion is performed.
55 *
56 * Changing the FPU rounding mode causes the FPU pipeline to be flushed. It
57 * is this flushing of the pipeline which is so slow.
58 *
59 * Fortunately the ISO C99 specification defines the functions lrint, lrintf,
60 * llrint and llrintf which fix this problem as a side effect.
61 *
62 * On Unix-like systems, the configure process should have detected the
63 * presence of these functions. If they weren't found we have to replace them
64 * here with a standard C cast.
65 */
66
67/*
68 * The C99 prototypes for these functions are as follows:
69 *
70 * int rintf(float x);
71 * int rint(double x);
72 * long int lrintf(float x);
73 * long int lrint(double x);
74 * long long int llrintf(float x);
75 * long long int llrint(double x);
76 *
77 * The presence of the required functions are detected during the configure
78 * process and the values HAVE_LRINT and HAVE_LRINTF are set accordingly in
79 * the config file.
80 */
81
82#if defined(__CYGWIN__)
83#if !defined(__cplusplus) && (__GNUC__ < 4)
84 /*
85 * CYGWIN versions prior to 1.7.1 have lrint and lrintf functions, but
86 * they are slow and buggy:
87 * http://sourceware.org/ml/cygwin/2005-06/msg00153.html
88 * http://sourceware.org/ml/cygwin/2005-09/msg00047.html
89 * These replacement functions (pulled from the Public Domain MinGW
90 * math.h header) replace the native versions.
91 */
92 static __inline__ long int lrint(double x)
93 {
94 long int retval;
95
96 __asm__ __volatile__
97 (
98 "fistpl %0"
99 : "=m" (retval)
100 : "t" (x)
101 : "st"
102 );
103
104 return retval;
105 }
106
107 static __inline__ long int lrintf(float x)
108 {
109 long int retval;
110
111 __asm__ __volatile__
112 (
113 "fistpl %0"
114 : "=m" (retval)
115 : "t" (x)
116 : "st"
117 );
118 return retval;
119 }
120#endif
121
122 /* The fastest way to convert is the equivalent of lrint() */
123 static __inline__ long int lfastrint(double x)
124 {
125 long int retval;
126
127 __asm__ __volatile__
128 (
129 "fistpl %0"
130 : "=m" (retval)
131 : "t" (x)
132 : "st"
133 );
134
135 return retval;
136 }
137
138 static __inline__ long int lfastrintf(float x)
139 {
140 long int retval;
141
142 __asm__ __volatile__
143 (
144 "fistpl %0"
145 : "=m" (retval)
146 : "t" (x)
147 : "st"
148 );
149 return retval;
150 }
151#elif defined(__GNUC__) || (__SUNPRO_C >= 0x0590)
152
153#if defined(__i386__)
154 /* These routines are guaranteed fast on an i386 machine. Using the built in
155 lrint() and lrintf() should be similar, but they may not always be enabled.
156 Sometimes, especially with "-O0", you might get slow calls to routines. */
157 static __inline__ long int lfastrint(double x)
158 {
159 long int retval;
160
161 __asm__ __volatile__
162 (
163 "fistpl %0"
164 : "=m" (retval)
165 : "t" (x)
166 : "st"
167 );
168
169 return retval;
170 }
171
172 static __inline__ long int lfastrintf(float x)
173 {
174 long int retval;
175
176 __asm__ __volatile__
177 (
178 "fistpl %0"
179 : "=m" (retval)
180 : "t" (x)
181 : "st"
182 );
183 return retval;
184 }
185#elif defined(__x86_64__)
186 /* On an x86_64 machine, the fastest thing seems to be a pure assignment from a
187 double or float to an int. It looks like the design on the x86_64 took account
188 of the default behaviour specified for C. */
189 static __inline__ long int lfastrint(double x)
190 {
191 return (long int) (x);
192 }
193
194 static __inline__ long int lfastrintf(float x)
195 {
196 return (long int) (x);
197 }
198#elif defined(__ppc__) || defined(__powerpc__)
199 static __inline__ long int lfastrint(register double x)
200 {
201 int res[2];
202
203 __asm__ __volatile__
204 (
205 "fctiw %1, %1\n\t"
206 "stfd %1, %0"
207 : "=m" (res) /* Output */
208 : "f" (x) /* Input */
209 : "memory"
210 );
211
212 return res[1];
213 }
214
215 static __inline__ long int lfastrintf(register float x)
216 {
217 int res[2];
218
219 __asm__ __volatile__
220 (
221 "fctiw %1, %1\n\t"
222 "stfd %1, %0"
223 : "=m" (res) /* Output */
224 : "f" (x) /* Input */
225 : "memory"
226 );
227
228 return res[1];
229 }
230#else
231 /* Fallback routines, for unrecognised platforms */
232 static __inline__ long int lfastrint(double x)
233 {
234 return (long int) x;
235 }
236
237 static __inline__ long int lfastrintf(float x)
238 {
239 return (long int) x;
240 }
241#endif
242
243#elif defined(_M_IX86)
244 /* Visual Studio i386 */
245 /*
246 * Win32 doesn't seem to have the lrint() and lrintf() functions.
247 * Therefore implement inline versions of these functions here.
248 */
249
250 __inline long int lrint(double x)
251 {
252 long int i;
253
254 _asm
255 {
256 fld x
257 fistp i
258 };
259 return i;
260 }
261
262 __inline long int lrintf(float x)
263 {
264 long int i;
265
266 _asm
267 {
268 fld x
269 fistp i
270 };
271 return i;
272 }
273
274 __inline float rintf(float flt)
275 {
276 _asm
277 { fld flt
278 frndint
279 }
280 }
281
282 __inline double rint(double dbl)
283 {
284 _asm
285 {
286 fld dbl
287 frndint
288 }
289 }
290
291 __inline long int lfastrint(double x)
292 {
293 long int i;
294
295 _asm
296 {
297 fld x
298 fistp i
299 };
300 return i;
301 }
302
303 __inline long int lfastrintf(float x)
304 {
305 long int i;
306
307 _asm
308 {
309 fld x
310 fistp i
311 };
312 return i;
313 }
314#elif defined(_M_X64)
315 /* Visual Studio x86_64 */
316 /* x86_64 machines will do best with a simple assignment. */
317#include <intrin.h>
318
319 __inline long int lrint(double x)
320 {
321 return (long int)_mm_cvtsd_si64x( _mm_loadu_pd ((const double*)&x) );
322 }
323
324 __inline long int lrintf(float x)
325 {
326 return _mm_cvt_ss2si( _mm_load_ss((const float*)&x) );
327 }
328
329 __inline long int lfastrint(double x)
330 {
331 return (long int) (x);
332 }
333
334 __inline long int lfastrintf(float x)
335 {
336 return (long int) (x);
337 }
338#elif defined(__MWERKS__) && defined(macintosh)
339 /* This MacOS 9 solution was provided by Stephane Letz */
340
341 long int __inline__ lfastrint(register double x)
342 {
343 long int res[2];
344
345 asm
346 {
347 fctiw x, x
348 stfd x, res
349 }
350 return res[1];
351 }
352
353 long int __inline__ lfastrintf(register float x)
354 {
355 long int res[2];
356
357 asm
358 {
359 fctiw x, x
360 stfd x, res
361 }
362 return res[1];
363 }
364#elif defined(__MACH__) && defined(__APPLE__) && (defined(__ppc__) || defined(__powerpc__))
365 /* For Apple Mac OS/X - do recent versions still need this? */
366
367 static __inline__ long int lfastrint(register double x)
368 {
369 int res[2];
370
371 __asm__ __volatile__
372 (
373 "fctiw %1, %1\n\t"
374 "stfd %1, %0"
375 : "=m" (res) /* Output */
376 : "f" (x) /* Input */
377 : "memory"
378 );
379
380 return res[1];
381 }
382
383 static __inline__ long int lfastrintf(register float x)
384 {
385 int res[2];
386
387 __asm__ __volatile__
388 (
389 "fctiw %1, %1\n\t"
390 "stfd %1, %0"
391 : "=m" (res) /* Output */
392 : "f" (x) /* Input */
393 : "memory"
394 );
395
396 return res[1];
397 }
398#else
399 /* There is nothing else to do, but use a simple casting operation, instead of a real
400 rint() type function. Since we are only trying to use rint() to speed up conversions,
401 the accuracy issues related to changing the rounding scheme are of little concern
402 to us. */
403
404 #if !defined(__sgi) && !defined(__sunos) && !defined(__solaris) && !defined(__sun)
405 #warning "No usable lrint() and lrintf() functions available."
406 #warning "Replacing these functions with a simple C cast."
407 #endif
408
409 static __inline__ long int lrint(double x)
410 {
411 return (long int) (x);
412 }
413
414 static __inline__ long int lrintf(float x)
415 {
416 return (long int) (x);
417 }
418
419 static __inline__ long int lfastrint(double x)
420 {
421 return (long int) (x);
422 }
423
424 static __inline__ long int lfastrintf(float x)
425 {
426 return (long int) (x);
427 }
428#endif
429
430#if defined(__cplusplus)
431}
432#endif
433
434#endif
435
436/*- End of file ------------------------------------------------------------*/