The major malfunction here - this DOES NOT compile without optimization.
Would it be acceptable to #define it away in that case? The lesser bug is
that _delay_us_2() only works with a constant; who wants float arithmetic at
runtime, anyway?
This delay.h adds _delay_us_2() which is more precise and has a larger max
than _delay_us(). There is also a _delay_loop_2b() added.
The _delay_us_2() uses the 4 clock, 16 bit sbiw loop. If a below
minimum delay is commanded, it will resolve into the right number of NOPs.
Also, 1,2, or 3 NOPs are appended to shim out the 4 clock loop exactly.
The other function, _delay_loop_2b(), is a more constant-demanding
_delay_loop_2(). The change is in using clobber to take R24+R25 for itself,
and LDI'ng them directly with the constant argument. The way _delay_loop_2()
works, it is less predictable with the chance of doing a MOV shuffle.
The else {/*NOTREACHED*/} could be filled in with a still larger delay routine
for almost unlimited us count.
The included delaytest.c is the only testing I did on this delay.h; the
included listing is for straight -O. The C code skews a few lines after the
generated assembler.
--- /usr/avr/include/util/delay.h 2006-05-01 06:04:50.000000000 -0400
+++ delay.h 2006-08-20 16:31:03.000000000 -0400
@@ -83,6 +83,7 @@
#if !defined(__DOXYGEN__)
static inline void _delay_loop_1(uint8_t __count) __attribute__((always_inline));
static inline void _delay_loop_2(uint16_t __count) __attribute__((always_inline));
+static inline void _delay_loop_2b(uint16_t __count) __attribute__((always_inline));
static inline void _delay_us(double __us) __attribute__((always_inline));
static inline void _delay_ms(double __ms) __attribute__((always_inline));
#endif
@@ -131,6 +132,33 @@
);
}
+/** \ingroup util_delay
+
+ Just like __delay_loop_2 but "plan b" explicitly uses r24-5 and clobber
+ directive to (hopefully) repel any mov shenanigans that may eat clocks.
+ Delay loop using a 16-bit counter \c __count, so up to 65536
+ iterations are possible. (The value 65536 would have to be
+ passed as 0.) The loop executes four CPU cycles per iteration.
+ not including the overhead the compiler requires to setup the
+ counter register pair.
+
+ Thus, at a CPU speed of 1 MHz, delays of up to about 262.1
+ milliseconds can be achieved.
+ */
+#define _delay_loop_2b( __count)\
+{\
+ __asm__ volatile (\
+ "ldi r24,lo8(%0) \n\t"\
+ "ldi r25,hi8(%0) \n\t"\
+ "1: sbiw r24,1 \n\t"\
+ "brne 1b \n\t"\
+ :\
+ : "n" (__count)\
+ : "r24", "r25"\
+ );\
+}
+
+
#ifndef F_CPU
/* prevent compiler error by supplying a default */
# warning "F_CPU not defined for <util/delay.h>"
@@ -140,25 +168,46 @@
/**
\ingroup util_delay
- Perform a delay of \c __us microseconds, using _delay_loop_1().
-
+ Perform a delay of \c __us microseconds, using _delay_loop_2b().
+ This routine will use nop shims to be as precise as possible, down to just 1 nop.
+ Or, in the case of 0.9 clocks-worth of delay, this will do NOTHING (IOW, it rounds down).
+ It needs gcc optimization (-O) in order to be inlined properly.
+ Optimization will clash with using -g to hand-count a listing, sorry.
The macro F_CPU is supposed to be defined to a
constant defining the CPU clock frequency (in Hertz).
- The maximal possible delay is 768 us / F_CPU in MHz.
+ The maximal possible delay is ~262.14 ms (not us!) / F_CPU in MHz.
*/
void
-_delay_us(double __us)
+_delay_us(const double __us)
{
- uint8_t __ticks;
- double __tmp = ((F_CPU) / 3e6) * __us;
- if (__tmp < 1.0)
- __ticks = 1;
- else if (__tmp > 255)
- __ticks = 0; /* i.e. 256 */
- else
- __ticks = (uint8_t)__tmp;
- _delay_loop_1(__ticks);
+ const double __clocks_per_us=((F_CPU)/1e6);
+ const double __clocks_delay = __clocks_per_us * __us;
+ const char __loop_2b_fixed=7; // the 4 ldi clocks + the 3clk last loop
+ const uint32_t __loop_2b_runs = ((__clocks_delay - __loop_2b_fixed) / 4)+1; // +1, don't forget the 3clk loop!
+ const double __remainder_clocks= __clocks_delay - ((__loop_2b_runs-1) * 4 + __loop_2b_fixed); //the minus 1 un-considers the 3clk last loop, which is counted in __loop_3_fixed
+
+ if(__loop_2b_runs < 65536) {
+ if (__clocks_delay < 1) {/*DONOTHING*/}
+ else if (__clocks_delay < 2) {asm volatile ("nop");}
+ else if (__clocks_delay < 3) {asm volatile ("rjmp +0");} /*rjmp 0(implicit PC+1) = 2 nops*/
+ else if (__clocks_delay < 4) {asm volatile ("rjmp +0\n\t nop");}
+ else if (__clocks_delay < 5) {asm volatile ("rjmp +0\n\t rjmp +0");}
+ else if (__clocks_delay < 6) {asm volatile ("rjmp +0\n\t rjmp +0 \n\t nop");}
+ else if (__clocks_delay < 7) {asm volatile ("rjmp +0\n\t rjmp +0 \n\t rjmp +0");}
+ else if (__remainder_clocks < 1) {
+ _delay_loop_2b((uint16_t)__loop_2b_runs);
+ } else if (__remainder_clocks < 2) {
+ _delay_loop_2b((uint16_t)__loop_2b_runs);
+ asm volatile ("nop");
+ } else if (__remainder_clocks < 3) {
+ _delay_loop_2b((uint16_t)__loop_2b_runs);
+ asm volatile ("rjmp +0");
+ } else if (__remainder_clocks < 4) {
+ _delay_loop_2b((uint16_t)__loop_2b_runs);
+ asm volatile ("rjmp +0\n\t nop");
+ }
+ } else {/*NOTREACHED*/}
}
/* Copyright (c) 2002, Marek Michalkiewicz
Copyright (c) 2004,2005 Joerg Wunsch
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in
the documentation and/or other materials provided with the
distribution.
* Neither the name of the copyright holders nor the names of
contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE. */
/* $Id: delay.h,v 1.1.2.1 2005/12/12 23:19:49 joerg_wunsch Exp $ */
#ifndef _UTIL_DELAY_H_
#define _UTIL_DELAY_H_ 1
#include <inttypes.h>
/** \defgroup util_delay <util/delay.h>: Busy-wait delay loops
\code
#define F_CPU 1000000UL // 1 MHz
//#define F_CPU 14.7456E6
#include <util/delay.h>
\endcode
\note As an alternative method, it is possible to pass the
F_CPU macro down to the compiler from the Makefile.
Obviously, in that case, no \c \#define statement should be
used.
The functions in this header file implement simple delay loops
that perform a busy-waiting. They are typically used to
facilitate short delays in the program execution. They are
implemented as count-down loops with a well-known CPU cycle
count per loop iteration. As such, no other processing can
occur simultaneously. It should be kept in mind that the
functions described here do not disable interrupts.
In general, for long delays, the use of hardware timers is
much preferrable, as they free the CPU, and allow for
concurrent processing of other events while the timer is
running. However, in particular for very short delays, the
overhead of setting up a hardware timer is too much compared
to the overall delay time.
Two inline functions are provided for the actual delay algorithms.
Two wrapper functions allow the specification of microsecond, and
millisecond delays directly, using the application-supplied macro
F_CPU as the CPU clock frequency (in Hertz). These functions
operate on double typed arguments, however when optimization is
turned on, the entire floating-point calculation will be done at
compile-time.
\note When using _delay_us() and _delay_ms(), the expressions
passed as arguments to these functions shall be compile-time
constants, otherwise the floating-point calculations to setup the
loops will be done at run-time, thereby drastically increasing
both the resulting code size, as well as the time required to
setup the loops.
*/
#if !defined(__DOXYGEN__)
static inline void _delay_loop_1(uint8_t __count) __attribute__((always_inline));
static inline void _delay_loop_2(uint16_t __count) __attribute__((always_inline));
static inline void _delay_loop_2b(uint16_t __count) __attribute__((always_inline));
static inline void _delay_us(double __us) __attribute__((always_inline));
static inline void _delay_us_2(double __us) __attribute__((always_inline));
static inline void _delay_ms(double __ms) __attribute__((always_inline));
#endif
/** \ingroup util_delay
Delay loop using an 8-bit counter \c __count, so up to 256
iterations are possible. (The value 256 would have to be passed
as 0.) The loop executes three CPU cycles per iteration, not
including the overhead the compiler needs to setup the counter
register.
Thus, at a CPU speed of 1 MHz, delays of up to 768 microseconds
can be achieved.
*/
void
_delay_loop_1(uint8_t __count)
{
__asm__ volatile (
"1: dec %0" "\n\t"
"brne 1b"
: "=r" (__count)
: "0" (__count)
);
}
/** \ingroup util_delay
Delay loop using a 16-bit counter \c __count, so up to 65536
iterations are possible. (The value 65536 would have to be
passed as 0.) The loop executes four CPU cycles per iteration,
not including the overhead the compiler requires to setup the
counter register pair.
Thus, at a CPU speed of 1 MHz, delays of up to about 262.1
milliseconds can be achieved.
*/
void
_delay_loop_2(uint16_t __count)
{
__asm__ volatile (
"1: sbiw %0,1" "\n\t"
"brne 1b"
: "=w" (__count)
: "0" (__count)
);
}
/** \ingroup util_delay
Just like __delay_loop_2 but "plan b" explicitly uses r24-5 and clobber
directive to (hopefully) repel any mov shenanigans that may eat clocks.
Delay loop using a 16-bit counter \c __count, so up to 65536
iterations are possible. (The value 65536 would have to be
passed as 0.) The loop executes four CPU cycles per iteration.
not including the overhead the compiler requires to setup the
counter register pair.
Thus, at a CPU speed of 1 MHz, delays of up to about 262.1
milliseconds can be achieved.
*/
void
_delay_loop_2b(uint16_t __count)
{
__asm__ volatile (
"ldi r24,lo8(%0) \n\t"
"ldi r25,hi8(%0) \n\t"
"1: sbiw r24,1 \n\t"
"brne 1b \n\t"
:
: "n" (__count)
: "r24", "r25"
);
}
#ifndef F_CPU
/* prevent compiler error by supplying a default */
# warning "F_CPU not defined for <util/delay.h>"
# define F_CPU 1000000UL
#endif
/**
\ingroup util_delay
Perform a delay of \c __us microseconds, using _delay_loop_1().
The macro F_CPU is supposed to be defined to a
constant defining the CPU clock frequency (in Hertz).
The maximal possible delay is 768 us / F_CPU in MHz.
*/
void
_delay_us(double __us)
{
uint8_t __ticks;
double __tmp = ((F_CPU) / 3e6) * __us;
if (__tmp < 1.0)
__ticks = 1;
else if (__tmp > 255)
__ticks = 0; /* i.e. 256 */
else
__ticks = (uint8_t)__tmp;
_delay_loop_1(__ticks);
}
/**
\ingroup util_delay
Perform a delay of \c __us microseconds, using _delay_loop_2b().
This routine will use nop shims to be as precise as possible, down to just 1 nop.
Or, in the case of 0.9 clocks-worth of delay, this will do NOTHING (IOW, it rounds down).
It must be called with a value known at compile-time.
It needs gcc optimization (-O) in order to be inlined properly.
Optimization will clash with using -g to hand-count a listing, sorry.
The macro F_CPU is supposed to be defined to a
constant defining the CPU clock frequency (in Hertz).
The maximal possible delay is ~262.14 ms (not us!) / F_CPU in MHz.
*/
void
_delay_us_2(const double __us)
{
const double __clocks_per_us=((F_CPU)/1e6);
const double __clocks_delay = __clocks_per_us * __us;
const char __loop_2b_fixed=7; // the 4 ldi clocks + the 3clk last loop
const uint32_t __loop_2b_runs = ((__clocks_delay - __loop_2b_fixed) / 4)+1; // +1, don't forget the 3clk loop!
const double __remainder_clocks= __clocks_delay - ((__loop_2b_runs-1) * 4 + __loop_2b_fixed); //the minus 1 un-considers the 3clk last loop, which is counted in __loop_3_fixed
if(__loop_2b_runs < 65536) {
if (__clocks_delay < 1) {/*DONOTHING*/}
else if (__clocks_delay < 2) {asm volatile ("nop");}
else if (__clocks_delay < 3) {asm volatile ("rjmp +0");} /*rjmp 0(implicit PC+1) = 2 nops*/
else if (__clocks_delay < 4) {asm volatile ("rjmp +0\n\t nop");}
else if (__clocks_delay < 5) {asm volatile ("rjmp +0\n\t rjmp +0");}
else if (__clocks_delay < 6) {asm volatile ("rjmp +0\n\t rjmp +0 \n\t nop");}
else if (__clocks_delay < 7) {asm volatile ("rjmp +0\n\t rjmp +0 \n\t rjmp +0");}
else if (__remainder_clocks < 1) {
_delay_loop_2b((uint16_t)__loop_2b_runs);
} else if (__remainder_clocks < 2) {
_delay_loop_2b((uint16_t)__loop_2b_runs);
asm volatile ("nop");
} else if (__remainder_clocks < 3) {
_delay_loop_2b((uint16_t)__loop_2b_runs);
asm volatile ("rjmp +0");
} else if (__remainder_clocks < 4) {
_delay_loop_2b((uint16_t)__loop_2b_runs);
asm volatile ("rjmp +0\n\t nop");
}
} else {/*NOTREACHED*/}
}
/**
\ingroup util_delay
Perform a delay of \c __ms milliseconds, using _delay_loop_2().
The macro F_CPU is supposed to be defined to a
constant defining the CPU clock frequency (in Hertz).
The maximal possible delay is 262.14 ms / F_CPU in MHz.
*/
void
_delay_ms(double __ms)
{
uint16_t __ticks;
double __tmp = ((F_CPU) / 4e3) * __ms;
if (__tmp < 1.0)
__ticks = 1;
else if (__tmp > 65535)
__ticks = 0; /* i.e. 65536 */
else
__ticks = (uint16_t)__tmp;
_delay_loop_2(__ticks);
}
#endif /* _UTIL_DELAY_H_ */
1 .file "delaytest.c"
2 .arch atmega32
3 __SREG__ = 0x3f
4 __SP_H__ = 0x3e
5 __SP_L__ = 0x3d
6 __tmp_reg__ = 0
7 __zero_reg__ = 1
8 .global __do_copy_data
9 .global __do_clear_bss
12 .text
13 .Ltext0:
69 .global main
71 main:
72 .stabd 46,0,0
1:delaytest.c **** #define F_CPU 10e6
2:delaytest.c **** #define MCU atmega32
3:delaytest.c **** #include <avr/io.h>
4:delaytest.c **** //#include <util/delay.h>
5:delaytest.c **** #include "delay.h"
6:delaytest.c **** int main() {
74 .LM0:
75 /* prologue: frame size=0 */
76 0000 C0E0 ldi r28,lo8(__stack - 0)
77 0002 D0E0 ldi r29,hi8(__stack - 0)
78 0004 DEBF out __SP_H__,r29
79 0006 CDBF out __SP_L__,r28
80 /* prologue end (size=4) */
7:delaytest.c **** char a=PINB;
82 .LM1:
83 0008 86B3 in r24,54-0x20
8:delaytest.c **** DDRC=12;
85 .LM2:
86 000a 8CE0 ldi r24,lo8(12)
87 000c 84BB out 52-0x20,r24
88 .LBB90:
89 .LBB91:
91 .Ltext1:
1:delay.h **** /* Copyright (c) 2002, Marek Michalkiewicz
2:delay.h **** Copyright (c) 2004,2005 Joerg Wunsch
3:delay.h **** All rights reserved.
4:delay.h ****
5:delay.h **** Redistribution and use in source and binary forms, with or without
6:delay.h **** modification, are permitted provided that the following conditions are met:
7:delay.h ****
8:delay.h **** * Redistributions of source code must retain the above copyright
9:delay.h **** notice, this list of conditions and the following disclaimer.
10:delay.h ****
11:delay.h **** * Redistributions in binary form must reproduce the above copyright
12:delay.h **** notice, this list of conditions and the following disclaimer in
13:delay.h **** the documentation and/or other materials provided with the
14:delay.h **** distribution.
15:delay.h ****
16:delay.h **** * Neither the name of the copyright holders nor the names of
17:delay.h **** contributors may be used to endorse or promote products derived
18:delay.h **** from this software without specific prior written permission.
19:delay.h ****
20:delay.h **** THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21:delay.h **** AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22:delay.h **** IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23:delay.h **** ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
24:delay.h **** LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25:delay.h **** CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26:delay.h **** SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27:delay.h **** INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28:delay.h **** CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29:delay.h **** ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30:delay.h **** POSSIBILITY OF SUCH DAMAGE. */
31:delay.h ****
32:delay.h **** /* $Id: delay.h,v 1.1.2.1 2005/12/12 23:19:49 joerg_wunsch Exp $ */
33:delay.h ****
34:delay.h **** #ifndef _UTIL_DELAY_H_
35:delay.h **** #define _UTIL_DELAY_H_ 1
36:delay.h ****
37:delay.h **** #include <inttypes.h>
38:delay.h ****
39:delay.h **** /** \defgroup util_delay <util/delay.h>: Busy-wait delay loops
40:delay.h **** \code
41:delay.h **** #define F_CPU 1000000UL // 1 MHz
42:delay.h **** //#define F_CPU 14.7456E6
43:delay.h **** #include <util/delay.h>
44:delay.h **** \endcode
45:delay.h ****
46:delay.h **** \note As an alternative method, it is possible to pass the
47:delay.h **** F_CPU macro down to the compiler from the Makefile.
48:delay.h **** Obviously, in that case, no \c \#define statement should be
49:delay.h **** used.
50:delay.h ****
51:delay.h **** The functions in this header file implement simple delay loops
52:delay.h **** that perform a busy-waiting. They are typically used to
53:delay.h **** facilitate short delays in the program execution. They are
54:delay.h **** implemented as count-down loops with a well-known CPU cycle
55:delay.h **** count per loop iteration. As such, no other processing can
56:delay.h **** occur simultaneously. It should be kept in mind that the
57:delay.h **** functions described here do not disable interrupts.
58:delay.h ****
59:delay.h **** In general, for long delays, the use of hardware timers is
60:delay.h **** much preferrable, as they free the CPU, and allow for
61:delay.h **** concurrent processing of other events while the timer is
62:delay.h **** running. However, in particular for very short delays, the
63:delay.h **** overhead of setting up a hardware timer is too much compared
64:delay.h **** to the overall delay time.
65:delay.h ****
66:delay.h **** Two inline functions are provided for the actual delay algorithms.
67:delay.h ****
68:delay.h **** Two wrapper functions allow the specification of microsecond, and
69:delay.h **** millisecond delays directly, using the application-supplied macro
70:delay.h **** F_CPU as the CPU clock frequency (in Hertz). These functions
71:delay.h **** operate on double typed arguments, however when optimization is
72:delay.h **** turned on, the entire floating-point calculation will be done at
73:delay.h **** compile-time.
74:delay.h ****
75:delay.h **** \note When using _delay_us() and _delay_ms(), the expressions
76:delay.h **** passed as arguments to these functions shall be compile-time
77:delay.h **** constants, otherwise the floating-point calculations to setup the
78:delay.h **** loops will be done at run-time, thereby drastically increasing
79:delay.h **** both the resulting code size, as well as the time required to
80:delay.h **** setup the loops.
81:delay.h **** */
82:delay.h ****
83:delay.h **** #if !defined(__DOXYGEN__)
84:delay.h **** static inline void _delay_loop_1(uint8_t __count) __attribute__((always_inline));
85:delay.h **** static inline void _delay_loop_2(uint16_t __count) __attribute__((always_inline));
86:delay.h **** static inline void _delay_loop_2b(uint16_t __count) __attribute__((always_inline));
87:delay.h **** static inline void _delay_us(double __us) __attribute__((always_inline));
88:delay.h **** static inline void _delay_us_2(double __us) __attribute__((always_inline));
89:delay.h **** static inline void _delay_ms(double __ms) __attribute__((always_inline));
90:delay.h **** #endif
91:delay.h ****
92:delay.h **** /** \ingroup util_delay
93:delay.h ****
94:delay.h **** Delay loop using an 8-bit counter \c __count, so up to 256
95:delay.h **** iterations are possible. (The value 256 would have to be passed
96:delay.h **** as 0.) The loop executes three CPU cycles per iteration, not
97:delay.h **** including the overhead the compiler needs to setup the counter
98:delay.h **** register.
99:delay.h ****
100:delay.h **** Thus, at a CPU speed of 1 MHz, delays of up to 768 microseconds
101:delay.h **** can be achieved.
102:delay.h **** */
103:delay.h **** void
104:delay.h **** _delay_loop_1(uint8_t __count)
105:delay.h **** {
106:delay.h **** __asm__ volatile (
107:delay.h **** "1: dec %0" "\n\t"
108:delay.h **** "brne 1b"
109:delay.h **** : "=r" (__count)
110:delay.h **** : "0" (__count)
111:delay.h **** );
112:delay.h **** }
113:delay.h ****
114:delay.h **** /** \ingroup util_delay
115:delay.h ****
116:delay.h **** Delay loop using a 16-bit counter \c __count, so up to 65536
117:delay.h **** iterations are possible. (The value 65536 would have to be
118:delay.h **** passed as 0.) The loop executes four CPU cycles per iteration,
119:delay.h **** not including the overhead the compiler requires to setup the
120:delay.h **** counter register pair.
121:delay.h ****
122:delay.h **** Thus, at a CPU speed of 1 MHz, delays of up to about 262.1
123:delay.h **** milliseconds can be achieved.
124:delay.h **** */
125:delay.h **** void
126:delay.h **** _delay_loop_2(uint16_t __count)
127:delay.h **** {
128:delay.h **** __asm__ volatile (
129:delay.h **** "1: sbiw %0,1" "\n\t"
130:delay.h **** "brne 1b"
131:delay.h **** : "=w" (__count)
132:delay.h **** : "0" (__count)
133:delay.h **** );
134:delay.h **** }
135:delay.h ****
136:delay.h **** /** \ingroup util_delay
137:delay.h ****
138:delay.h **** Just like __delay_loop_2 but "plan b" explicitly uses r24-5 and clobber
139:delay.h **** directive to (hopefully) repel any mov shenanigans that may eat clocks.
140:delay.h **** Delay loop using a 16-bit counter \c __count, so up to 65536
141:delay.h **** iterations are possible. (The value 65536 would have to be
142:delay.h **** passed as 0.) The loop executes four CPU cycles per iteration.
143:delay.h **** not including the overhead the compiler requires to setup the
144:delay.h **** counter register pair.
145:delay.h ****
146:delay.h **** Thus, at a CPU speed of 1 MHz, delays of up to about 262.1
147:delay.h **** milliseconds can be achieved.
148:delay.h **** */
149:delay.h **** void
150:delay.h **** _delay_loop_2b(uint16_t __count)
151:delay.h **** {
152:delay.h **** __asm__ volatile (
153:delay.h **** "ldi r24,lo8(%0) \n\t"
154:delay.h **** "ldi r25,hi8(%0) \n\t"
155:delay.h **** "1: sbiw r24,1 \n\t"
156:delay.h **** "brne 1b \n\t"
157:delay.h **** :
158:delay.h **** : "n" (__count)
159:delay.h **** : "r24", "r25"
160:delay.h **** );
161:delay.h **** }
162:delay.h ****
163:delay.h ****
164:delay.h **** #ifndef F_CPU
165:delay.h **** /* prevent compiler error by supplying a default */
166:delay.h **** # warning "F_CPU not defined for <util/delay.h>"
167:delay.h **** # define F_CPU 1000000UL
168:delay.h **** #endif
169:delay.h ****
170:delay.h **** /**
171:delay.h **** \ingroup util_delay
172:delay.h ****
173:delay.h **** Perform a delay of \c __us microseconds, using _delay_loop_1().
174:delay.h ****
175:delay.h **** The macro F_CPU is supposed to be defined to a
176:delay.h **** constant defining the CPU clock frequency (in Hertz).
177:delay.h ****
178:delay.h **** The maximal possible delay is 768 us / F_CPU in MHz.
179:delay.h **** */
180:delay.h **** void
181:delay.h **** _delay_us(double __us)
182:delay.h **** {
183:delay.h **** uint8_t __ticks;
184:delay.h **** double __tmp = ((F_CPU) / 3e6) * __us;
185:delay.h **** if (__tmp < 1.0)
186:delay.h **** __ticks = 1;
187:delay.h **** else if (__tmp > 255)
188:delay.h **** __ticks = 0; /* i.e. 256 */
189:delay.h **** else
190:delay.h **** __ticks = (uint8_t)__tmp;
191:delay.h **** _delay_loop_1(__ticks);
192:delay.h **** }
193:delay.h ****
194:delay.h **** /**
195:delay.h **** \ingroup util_delay
196:delay.h ****
197:delay.h **** Perform a delay of \c __us microseconds, using _delay_loop_2b().
198:delay.h **** This routine will use nop shims to be as precise as possible, down to just 1 nop.
199:delay.h **** Or, in the case of 0.9 clocks-worth of delay, this will do NOTHING (IOW, it rounds down).
200:delay.h **** It must be called with a value known at compile-time.
201:delay.h **** It needs gcc optimization (-O) in order to be inlined properly.
202:delay.h **** Optimization will clash with using -g to hand-count a listing, sorry.
203:delay.h **** The macro F_CPU is supposed to be defined to a
204:delay.h **** constant defining the CPU clock frequency (in Hertz).
205:delay.h ****
206:delay.h **** The maximal possible delay is ~262.14 ms (not us!) / F_CPU in MHz.
207:delay.h **** */
208:delay.h **** void
209:delay.h **** _delay_us_2(const double __us)
210:delay.h **** {
211:delay.h **** const double __clocks_per_us=((F_CPU)/1e6);
212:delay.h **** const double __clocks_delay = __clocks_per_us * __us;
213:delay.h **** const char __loop_2b_fixed=7; // the 4 ldi clocks + the 3clk last loop
214:delay.h **** const uint32_t __loop_2b_runs = ((__clocks_delay - __loop_2b_fixed) / 4)+1; // +1, don't fo
215:delay.h **** const double __remainder_clocks= __clocks_delay - ((__loop_2b_runs-1) * 4 + __loop_2b_fixed
216:delay.h ****
217:delay.h **** if(__loop_2b_runs < 65536) {
218:delay.h **** if (__clocks_delay < 1) {/*DONOTHING*/}
219:delay.h **** else if (__clocks_delay < 2) {asm volatile ("nop");}
220:delay.h **** else if (__clocks_delay < 3) {asm volatile ("rjmp +0");} /*rjmp 0(implicit PC+1) = 2 nops*/
221:delay.h **** else if (__clocks_delay < 4) {asm volatile ("rjmp +0\n\t nop");}
222:delay.h **** else if (__clocks_delay < 5) {asm volatile ("rjmp +0\n\t rjmp +0");}
93 .LM3:
94 /* #APP */
95 000e F8CF rjmp +0
96 0010 F7CF rjmp +0
97 /* #NOAPP */
98 .LBE91:
99 .LBE90:
101 .Ltext2:
9:delaytest.c **** _delay_us_2(0.4);
10:delaytest.c **** DDRC=13;
103 .LM4:
104 0012 8DE0 ldi r24,lo8(13)
105 0014 84BB out 52-0x20,r24
106 .LBB92:
107 .LBB93:
108 .LBB94:
109 .LBB95:
111 .Ltext3:
113 .LM5:
114 /* #APP */
115 0016 82E0 ldi r24,lo8(2)
116 0018 90E0 ldi r25,hi8(2)
117 001a 0197 1: sbiw r24,1
118 001c F1F7 brne 1b
119
120 /* #NOAPP */
121 .LBE95:
122 .LBE94:
223:delay.h **** else if (__clocks_delay < 6) {asm volatile ("rjmp +0\n\t rjmp +0 \n\t nop");}
224:delay.h **** else if (__clocks_delay < 7) {asm volatile ("rjmp +0\n\t rjmp +0 \n\t rjmp +0");}
225:delay.h **** else if (__remainder_clocks < 1) {
226:delay.h **** _delay_loop_2b((uint16_t)__loop_2b_runs);
227:delay.h **** } else if (__remainder_clocks < 2) {
228:delay.h **** _delay_loop_2b((uint16_t)__loop_2b_runs);
229:delay.h **** asm volatile ("nop");
230:delay.h **** } else if (__remainder_clocks < 3) {
231:delay.h **** _delay_loop_2b((uint16_t)__loop_2b_runs);
232:delay.h **** asm volatile ("rjmp +0");
124 .LM6:
125 /* #APP */
126 001e F0CF rjmp +0
127 /* #NOAPP */
128 .LBE93:
129 .LBE92:
131 .Ltext4:
11:delaytest.c **** _delay_us_2(1.3);
12:delaytest.c **** DDRC=14;
133 .LM7:
134 0020 8EE0 ldi r24,lo8(14)
135 0022 84BB out 52-0x20,r24
136 .LBB96:
137 .LBB97:
138 .LBB98:
139 .LBB99:
141 .Ltext5:
143 .LM8:
144 /* #APP */
145 0024 84E0 ldi r24,lo8(4)
146 0026 90E0 ldi r25,hi8(4)
147 0028 0197 1: sbiw r24,1
148 002a F1F7 brne 1b
149
150 /* #NOAPP */
151 .LBE99:
152 .LBE98:
233:delay.h **** } else if (__remainder_clocks < 4) {
234:delay.h **** _delay_loop_2b((uint16_t)__loop_2b_runs);
235:delay.h **** asm volatile ("rjmp +0\n\t nop");
154 .LM9:
155 /* #APP */
156 002c E9CF rjmp +0
157 002e 0000 nop
158 /* #NOAPP */
159 .LBE97:
160 .LBE96:
162 .Ltext6:
13:delaytest.c **** _delay_us_2(2.2);
14:delaytest.c **** DDRC=15;
164 .LM10:
165 0030 8FE0 ldi r24,lo8(15)
166 0032 84BB out 52-0x20,r24
167 .LBB100:
168 .LBB101:
169 .LBB102:
170 .LBB103:
172 .Ltext7:
174 .LM11:
175 /* #APP */
176 0034 87E0 ldi r24,lo8(7)
177 0036 90E0 ldi r25,hi8(7)
178 0038 0197 1: sbiw r24,1
179 003a F1F7 brne 1b
180
181 /* #NOAPP */
182 .LBE103:
183 .LBE102:
184 .LBE101:
185 .LBE100:
187 .Ltext8:
15:delaytest.c **** _delay_us_2(3.1);
16:delaytest.c **** DDRC=16;
189 .LM12:
190 003c 80E1 ldi r24,lo8(16)
191 003e 84BB out 52-0x20,r24
17:delaytest.c **** //_delay_us_2(a);
18:delaytest.c **** DDRC=4;
193 .LM13:
194 0040 84E0 ldi r24,lo8(4)
195 0042 84BB out 52-0x20,r24
196 .LBB104:
197 .LBB105:
198 .LBB106:
199 .LBB107:
201 .Ltext9:
203 .LM14:
204 /* #APP */
205 0044 86E3 ldi r24,lo8(54)
206 0046 90E0 ldi r25,hi8(54)
207 0048 0197 1: sbiw r24,1
208 004a F1F7 brne 1b
209
210 /* #NOAPP */
211 .LBE107:
212 .LBE106:
214 .LM15:
215 /* #APP */
216 004c 0000 nop
217 /* #NOAPP */
218 .LBE105:
219 .LBE104:
221 .Ltext10:
19:delaytest.c **** _delay_us_2(22);
20:delaytest.c **** DDRC=17;
223 .LM16:
224 004e 81E1 ldi r24,lo8(17)
225 0050 84BB out 52-0x20,r24
226 .LBB108:
227 .LBB109:
228 .LBB110:
229 .LBB111:
231 .Ltext11:
233 .LM17:
234 /* #APP */
235 0052 88E3 ldi r24,lo8(56)
236 0054 90E0 ldi r25,hi8(56)
237 0056 0197 1: sbiw r24,1
238 0058 F1F7 brne 1b
239
240 /* #NOAPP */
241 .LBE111:
242 .LBE110:
244 .LM18:
245 /* #APP */
246 005a D2CF rjmp +0
247 005c 0000 nop
248 /* #NOAPP */
249 .LBE109:
250 .LBE108:
252 .Ltext12:
21:delaytest.c **** _delay_us_2(23);
22:delaytest.c **** DDRC=18;
254 .LM19:
255 005e 82E1 ldi r24,lo8(18)
256 0060 84BB out 52-0x20,r24
257 .LBB112:
258 .LBB113:
259 .LBB114:
260 .LBB115:
262 .Ltext13:
264 .LM20:
265 /* #APP */
266 0062 8EE6 ldi r24,lo8(30574)
267 0064 97E7 ldi r25,hi8(30574)
268 0066 0197 1: sbiw r24,1
269 0068 F1F7 brne 1b
270
271 /* #NOAPP */
272 .LBE115:
273 .LBE114:
275 .LM21:
276 /* #APP */
277 006a 0000 nop
278 /* #NOAPP */
279 .LBE113:
280 .LBE112:
282 .Ltext14:
23:delaytest.c **** _delay_us_2(12230);
24:delaytest.c **** DDRC=19;
284 .LM22:
285 006c 83E1 ldi r24,lo8(19)
286 006e 84BB out 52-0x20,r24
25:delaytest.c **** _delay_us_2(62230);
26:delaytest.c **** DDRC=20;
288 .LM23:
289 0070 84E1 ldi r24,lo8(20)
290 0072 84BB out 52-0x20,r24
291 /* epilogue: frame size=0 */
292 0074 0C94 0000 jmp exit
293 /* epilogue end (size=2) */
294 /* function main size 105 (99) */
296 .Lscope0:
298 .stabd 78,0,0
300 .Letext0:
301 /* File "delaytest.c": code 105 = 0x0069 ( 99), prologues 4, epilogues 2 */
DEFINED SYMBOLS
*ABS*:00000000 delaytest.c
/tmp/ccysCnuE.s:3 *ABS*:0000003f __SREG__
/tmp/ccysCnuE.s:4 *ABS*:0000003e __SP_H__
/tmp/ccysCnuE.s:5 *ABS*:0000003d __SP_L__
/tmp/ccysCnuE.s:6 *ABS*:00000000 __tmp_reg__
/tmp/ccysCnuE.s:7 *ABS*:00000001 __zero_reg__
/tmp/ccysCnuE.s:71 .text:00000000 main
UNDEFINED SYMBOLS
__do_copy_data
__do_clear_bss
__stack
exit
#define F_CPU 10e6
#define MCU atmega32
#include <avr/io.h>
//#include <util/delay.h>
#include "delay.h"
int main() {
char a=PINB;
DDRC=12;
_delay_us_2(0.4);
DDRC=13;
_delay_us_2(1.3);
DDRC=14;
_delay_us_2(2.2);
DDRC=15;
_delay_us_2(3.1);
DDRC=16;
//_delay_us_2(a);
DDRC=4;
_delay_us_2(22);
DDRC=17;
_delay_us_2(23);
DDRC=18;
_delay_us_2(12230);
DDRC=19;
_delay_us_2(62230);
DDRC=20;
}
_______________________________________________
AVR-libc-dev mailing list
[email protected]
http://lists.nongnu.org/mailman/listinfo/avr-libc-dev