/*
 *  Copyright (C) 2004 Steve Harris
 *
 *  This program is free software; you can redistribute it and/or modify
 *  it under the terms of the GNU General Public License as published by
 *  the Free Software Foundation; either version 2 of the License, or
 *  (at your option) any later version.
 *
 *  This program is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU General Public License for more details.
 *
 *  $Id: denormal-finder.c $
 */

#include <stdio.h>
#include <stdlib.h>
#include <sys/time.h>

#ifdef __SSE__
#include <xmmintrin.h>
#endif

#define SIZE 1000

#define TIME(a) \
    _t = 1e100; \
    for (j=0; j<10; j++) { \
	then = gettime(); \
	for (i=0; i<SIZE; i++) { \
	    a; \
	} \
	now = gettime(); \
	if (now - then < _t) { \
	    _t = now - then; \
	} \
    }

long double basetime = 0.0;

static inline long double gettime();

typedef float tt;

void set_denormal_flags();

int main(int arhc, char *argv[])
{
    tt ones[SIZE];
    tt vars[SIZE];
    tt out[SIZE];
    int i, j, it;
    long double then, now;
    long double norm_time, denorm_time;
    long double mid, upper, lower;
    long double max_zero, min_denorm;
    long double _t;

    set_denormal_flags();

    basetime = gettime() - 1000.0;

    for (i=0; i<SIZE; i++) {
	ones[i] = 1.0;
	vars[i] = 1.0;
    }

    TIME(out[i] = ones[i] * vars[i]);
    norm_time = _t;
    printf("base time = %.1Lfus\n", norm_time * 1000000.0);

    upper = 1.0;
    lower = 0.0;

    for (it=0; it<102400; it++) {
	mid = (upper + lower) * 0.5;
	if ((float)mid == 0.0f) {
	    lower = mid;
	} else {
	    upper = mid;
	}
    }
    max_zero = lower;
    min_denorm = upper;

    /* upper should be denormal */
    
    for (i=0; i<SIZE; i++) {
	vars[i] = upper;
    }

    TIME(out[i] = ones[i] * vars[i]);
    denorm_time = _t;

    if (norm_time * 4.0 > denorm_time) {
	printf("Can't find denormal numbers just obove 0.0\n");
	printf("Possibly denormals are disabled\n");
        printf("   1.0 MUL time = %Lfs\n", norm_time);
        printf("   %Lg MUL time = %Lfs\n", upper, denorm_time);
	return 1;
    }
    printf("denormals take approx. %.1Lf * execution time\n", denorm_time / norm_time);

    lower = upper;
    upper = 1.0;

    for (it=0; it<1024; it++) {
	mid = (upper + lower) * 0.5;
	for (i=0; i<SIZE; i++) {
	    vars[i] = mid;
	}
	TIME(out[i] = ones[i] * vars[i]);
	if (_t > denorm_time * 0.5) {
	    lower = mid;
	} else {
	    upper = mid;
	}
    }
    printf("\n");
    printf("lower bound on normals   = %Lg\n", upper);
    printf("upper bound on denormals = %Lg\n", lower);
    printf("lower bound on denormals = %Lg\n", min_denorm);
    printf("upper bound on zeros     = %Lg\n", max_zero);

    return 0;
}

static inline long double gettime()
{
    struct timeval tv;

    gettimeofday(&tv, NULL);

    return (long double)(tv.tv_usec * 10e-6) + (long double)(tv.tv_sec) -
	    basetime;
}

void set_denormal_flags()
{
    unsigned long a, b, c, d;

#ifdef __SSE__

    asm("cpuid": "=a" (a), "=b" (b), "=c" (c), "=d" (d) : "a" (1));
    if (d & 1<<25) { /* It has SSE support */
	_MM_SET_FLUSH_ZERO_MODE(_MM_FLUSH_ZERO_ON);

	asm("cpuid": "=a" (a), "=b" (b), "=c" (c), "=d" (d) : "a" (0));
	if (b == 0x756e6547) { /* It's an Intel */
	    int stepping, model, family, extfamily;

	    family = (a >> 8) & 0xf;
	    extfamily = (a >> 20) & 0xff;
	    model = (a >> 4) & 0xf;
	    stepping = a & 0xf;
	    if (family == 15 && extfamily == 0 && model == 0 && stepping < 7) {
		return;
	    }
	}
	asm("cpuid": "=a" (a), "=b" (b), "=c" (c), "=d" (d) : "a" (1));
	if (d & 1<<26) { /* bit 26, SSE2 support */
	    _mm_setcsr(_mm_getcsr() | 0x40);
	}
    } else {
	fprintf(stderr, "This code has been built with SSE support, but your processor does not support\nthe SSE instruction set.\nexiting\n");
	exit(1);
    }
#endif
}

/* vi:set ts=8 sts=4 sw=4: */

