|
View:
New views
3 Messages
—
Rating Filter:
Alert me
|
|
|
Patch: do_ch_freeverb() speedup with JITHello everyone,
I've come up with a patch for TiMidity++ 2.13.2 which reduces the running time of .wav generation by about 4% on my machine (the patch is attached). The speedup patch is turned on with the "configure" switch "--with-ccg": it requires Ian Piumarta's ccg (http://www.iam.unibe.ch/~denker/Squeak/j3/ccg/) dynamic code generator to work, and only on the x86. I tested it by using it to generate a .wav file from a .mid of Rakhmaninov's G minor prelude (http://www.kunstderfuge.com/rachmaninov.htm -- Op. 23, No. 5); specifically: CFLAGS="-O6 -g" ./configure --with-ccg cd timidity make time ./timidity -Ow -o o.wav \ ~/rachmaninov_57525a_prelude_\(nc\)smythe.mid Anyway, is there any chance this patch might be included in the official tree some way or other? :) I'm curious because I see that there's not been a new release of TiMidity++ for quite some time... Thanks! diff -r -U5 --new-file --exclude=configure --exclude='*m4*' --exclude=Makefile.in TiMidity++-2.13.2/configure.in TiMidity++-2.13.2-ccg/configure.in --- TiMidity++-2.13.2/configure.in 2004-10-03 20:39:52.000000000 +0800 +++ TiMidity++-2.13.2-ccg/configure.in 2007-09-29 12:25:45.000000000 +0800 @@ -752,10 +752,17 @@ eval "au_enable_$DEFAULT_PLAYMODE=yes" else AC_MSG_WARN(--with-default-output=$withval: audio is not enabled) fi]) +AC_ARG_WITH(ccg, + [ --with-ccg use Piumarta's ccg for code speedup (x86 only)], + [ if test x"$withval" = xyes; then + EXTRADEFS="$EXTRADEFS -DCCG" + SYSEXTRAS="$SYSEXTRAS jit_i386.c" + fi]) + dnl compatibility matters. dnl AC_ARG_ENABLE(esd, dnl [ --enable-esd EsounD (Obsoleted. Use --enable-audio=esd)], dnl [ au_enable_esd=$enableval ]) dnl AC_ARG_ENABLE(nas, @@ -1923,10 +1930,12 @@ AM_CONDITIONAL(BORLANDC, test "x$BORLANDC" = xyes) AM_CONDITIONAL(WATCOM_C, test "x$WATCOM_C" = xyes) AM_CONDITIONAL(W32READDIR, test "x$W32READDIR" = "xyes") +AM_CONDITIONAL(CCG, test "x$with_ccg" = xyes) + SET_UNIQ_WORDS(LDFLAGS,$LDFLAGS) SET_UNIQ_WORDS(SHLDFLAGS,$SHLDFLAGS) SET_UNIQ_WORDS(CFLAGS,$EXTRACFLAGS $CFLAGS) SET_UNIQ_WORDS(CPPFLAGS,$CPPFLAGS $EXTRADEFS) diff -r -U5 --new-file --exclude=configure --exclude='*m4*' --exclude=Makefile.in TiMidity++-2.13.2/timidity/jit_i386.cg TiMidity++-2.13.2-ccg/timidity/jit_i386.cg --- TiMidity++-2.13.2/timidity/jit_i386.cg 1970-01-01 07:30:00.000000000 +0730 +++ TiMidity++-2.13.2-ccg/timidity/jit_i386.cg 2007-09-29 15:21:10.000000000 +0800 @@ -0,0 +1,336 @@ +#include <limits.h> +#include <stdlib.h> +#include <unistd.h> +#include <sys/mman.h> +#include "timidity.h" +#include "reverb.h" + +#ifdef __GNUC__ +#localpc +register int asm_pass __asm__("%esi"); +register unsigned char *asm_pc __asm__("%edi"); +#endif + +#define __STRICT_ANSI__ +#cpu pentium + +#ifdef iflush +#undef iflush +static void iflush(insn *start, insn *end) +{ + int page_size = getpagesize(); + int page = (long)start & -page_size; + int length = ((long)end - page + page_size - 1) & -page_size; + mprotect((void *)page, length, PROT_READ | PROT_WRITE | PROT_EXEC); +} +#endif + +extern int32 reverb_effect_buffer[]; + +/* + * writes code to multiply %eax by signed 8.24 operand val, store 8.24 result + * in %eax, and trash %edx + */ +static void imuldiv24_jit(int32 val) +{ + #[ # no. of bytes: + movl $val,%edx # 5 +!_W(0xeaf7); /* imull %edx # 2 */ + shll $8,%edx # 3 + shrl $24,%eax # 3 + orl %edx,%eax # 2 + ]# +} + +enum { imuldiv24_n_insns = 15 }; + +/* + * writes code to compute + * %(rd) = buf + (((%(rs) + incr) / sizeof(int32)) % size) + * where the value of %(rs) will be a multiple of 32 and smaller than upp + */ +static void circ_ptr_jit(int rd, int rs, int32 *buf, size_t size, +size_t upp, size_t incr) +{ + size_t s = size * sizeof(int32), n = incr * sizeof(int32); + insn *wrap; + if (size == 1) #[ + movl $(long)buf,%(rd) # 5 + ]# else if ((size & (size - 1)) == 0) #[ + # power of 2 + leal n(%(rs)),%(rd) # 7 + andl $(s - 1),%(rd) # 6 + addl $(long)buf,%(rd) # 6 + ]# else if ((upp + incr) / size > 3) #[ + leal n(%(rs)),%(rd) # 7 + xchgl %eax,%(rd) # 1 + pushl %edx # 1 + cltd # 1 + pushl $(s) # 5 + idivl (%esp),%eax # 3 + addl $4,%esp # 3 + movl %edx,%eax # 2 + popl %edx # 1 + xchgl %eax,%(rd) # 1 + addl $(long)buf,%(rd) # 6 + ]# else #[ + leal n(%(rs)),%(rd) # 7 + wrap: subl $(s),%(rd) # 6 + jnb wrap # 2 + addl $((long)buf+s),%(rd) # 6 + ]# +} + +enum { circ_ptr_n_insns = 31 }; + +static void delay_jit(int stream_reg, delay *dl, int32 upp) +{ + #[ + ## int32 output; + ## output = buf[*index]; + ]# + circ_ptr_jit(#(%edx)#, #(%edi)#, dl->buf, dl->size, upp, + dl->index); + #[ + movl (%edx),%eax # 2 + ## + ## buf[*index] = *stream; + ## if (++*index >= size) {*index = 0;} + ## + movl %(stream_reg),(%edx) # 2 + ## + ## *stream = output; + ## + movl %eax,%(stream_reg) # 2 + ]# +} + +enum { delay_n_insns = 6 + circ_ptr_n_insns }; + +static void freeverb_comb_jit(int input_reg, int stream_reg, comb *cb, +int32 upp) +{ + #[ + ## int32 output; + ## output = buf[*index]; + ]# + circ_ptr_jit(#(%ebx)#, #(%edi)#, cb->buf, cb->size, upp, + cb->index); + #[ + movl (%ebx),%eax # 2 + ## + ## *stream += output; + ## + addl %eax,%(stream_reg) # 2 + ## + ## *fs = imuldiv24(output, damp2) + imuldiv24(*fs, damp1); + ]# + imuldiv24_jit(cb->damp2i); + #[ + movl %eax,%ecx # 2 + movl (&cb->filterstore),%eax # 5 + ]# + imuldiv24_jit(cb->damp1i); + #[ + addl %ecx,%eax # 2 + movl %eax,(&cb->filterstore) # 5 + ## + ## buf[*index] = input + imuldiv24(*fs, feedback); + ]# + imuldiv24_jit(cb->feedbacki); + #[ + movl %(input_reg),%ecx # 2 + addl %ecx,%eax # 2 + movl %eax,(%ebx) # 2 + ## + ## if (++*index >= size) {*index = 0;} + ## + ## ### unneeded + ]# +} + +enum { freeverb_comb_n_insns = 24 + circ_ptr_n_insns + 3*imuldiv24_n_insns }; + +static void freeverb_allpass_jit(int stream_reg, allpass *ap, int32 upp) +{ + #[ + ## int32 bufout, output; + ## bufout = buf[*index]; + ]# + circ_ptr_jit(#(%ebx)#, #(%edi)#, ap->buf, ap->size, upp, + ap->index); + #[ + movl (%ebx),%ecx # 2 + ## + ## buf[*index] = *stream + imuldiv24(bufout, feedback); + ## + movl %ecx,%eax # 2 + ]# + imuldiv24_jit(ap->feedbacki); + #[ + addl %(stream_reg),%eax # 2 + movl %eax,(%ebx) # 2 + ## + ## output = -*stream + bufout; + ## if (++*index >= size) {*index = 0;} + ## *stream = output; + ## + subl %ecx,%(stream_reg) # 2 + negl %(stream_reg) # 2 + ]# +} + +enum { freeverb_allpass_n_insns = 12 + circ_ptr_n_insns + imuldiv24_n_insns }; + +void do_ch_freeverb_jit(int32 *buf, int32 count, InfoFreeverb *rev) +{ + int32 c2 = count / 2, j, outl; + comb *combL = rev->combL, *combR = rev->combR; + allpass *allpassL = rev->allpassL, *allpassR = rev->allpassR; + insn insn_buf[90 + 4 * imuldiv24_n_insns + + delay_n_insns + + 2 * numcombs * freeverb_comb_n_insns + + 2 * numallpasses * freeverb_allpass_n_insns]; + void (*frob)(void); + insn *loop; + int save_asm_pass = asm_pass; + insn *save_asm_pc = asm_pc; + #[ + ## ### %eax, %ecx, %edx scratch regs. + ## ### %ebp == input + ## ### %esi == outl + ## ### %edi == k * sizeof(int32) + ## + .org insn_buf + frob: pushl %ebx # 1 + pushl %ebp # 1 + pushl %esi # 1 + pushl %edi # 1 + ## + ## for (k = 0; k < count; k++) + ## { + ## + xorl %edi,%edi # 2 + ## + ## input = reverb_effect_buffer[k] + reverb_effect_buffer[k + 1]; + ## + loop: leal ((long)reverb_effect_buffer)(%edi),%eax # 6 + movl (%eax),%ebp # 2 + addl 4(%eax),%ebp # 3 + ## + ## outl = outr = reverb_effect_buffer[k] = + ## reverb_effect_buffer[k + 1] = 0; + ## + xorl %esi,%esi # 2 + movl %esi,(%eax) # 2 + movl %esi,4(%eax) # 3 + ## + ## do_delay(&input, pdelay->buf, pdelay->size, &pdelay->index); + ## + ## ### %edi == (k / 2) * sizeof(int32) + ## + shrl $1,%edi # 2 + ]# + delay_jit(#(%ebp)#, &rev->pdelay, c2); + #[ + ## for (i = 0; i < numcombs; i++) { + ## do_freeverb_comb(input, &outl, combL[i].buf, + ## combL[i].size, &combL[i].index, + ## combL[i].damp1i, combL[i].damp2i, + ## &combL[i].filterstore, combL[i].feedbacki); + ## + ## do_freeverb_comb(input, &outr, combR[i].buf, + ## combR[i].size, &combR[i].index, + ## combR[i].damp1i, combR[i].damp2i, + ## &combR[i].filterstore, combR[i].feedbacki); + ## } + ## for (i = 0; i < numallpasses; i++) { + ## do_freeverb_allpass(&outl, allpassL[i].buf, + ## allpassL[i].size, &allpassL[i].index, + ## allpassL[i].feedbacki); + ## do_freeverb_allpass(&outr, allpassR[i].buf, + ## allpassR[i].size, &allpassR[i].index, + ## allpassR[i].feedbacki); + ## } + ## + ## ### %ebx scratch reg. + ]# + for (j = 0; j < numcombs; ++j) + freeverb_comb_jit(#(%ebp)#, #(%esi)#, &combL[j], c2); + for (j = 0; j < numallpasses; ++j) + freeverb_allpass_jit(#(%esi)#, &allpassL[j], c2); + #[ + movl %esi,(&outl) # 6 + ## + ## ### %esi == outr + ## + xorl %esi,%esi # 2 + ]# + for (j = 0; j < numcombs; ++j) + freeverb_comb_jit(#(%ebp)#, #(%esi)#, &combR[j], c2); + for (j = 0; j < numallpasses; ++j) + freeverb_allpass_jit(#(%esi)#, &allpassR[j], c2); + #[ + ## buf[k] += imuldiv24(outl, rev->wet1i) + + ## imuldiv24(outr, rev->wet2i); + ## + ## ### %ebx == outl + ## ### %ebp == &buf[k] + ## ### %edi == k * sizeof(int32) + ## + shll $1,%edi # 2 + movl (&outl),%ebx # 6 + leal ((long)buf)(%edi),%ebp # 6 + movl %ebx,%eax # 2 + ]# + imuldiv24_jit(rev->wet1i); + #[ + movl %eax,%ecx # 2 + movl %esi,%eax # 2 + ]# + imuldiv24_jit(rev->wet2i); + #[ + addl %eax,%ecx # 2 +!_W(0x4d01);_B(0x00); +! /* addl %ecx,(%ebp) # 3 */ + #[ + ## buf[k + 1] += imuldiv24(outr, rev->wet1i) + + ## imuldiv24(outl, rev->wet2i); + ## + movl %esi,%eax # 2 + ]# + imuldiv24_jit(rev->wet1i); + #[ + movl %eax,%ecx # 2 + movl %ebx,%eax # 2 + ]# + imuldiv24_jit(rev->wet2i); + #[ + addl %eax,%ecx # 2 + addl %ecx,4(%ebp) # 3 + ## + ## ++k; + ## } + ## + addl $8,%edi # 3 + cmpl $(count*sizeof(32)),%edi # 6 + jb loop # 6 + popl %edi # 1 + popl %esi # 1 + popl %ebp # 1 + popl %ebx # 1 + ret # 1 + ]# + iflush(insn_buf, asm_pc); + frob(); + for (j = 0; j < numcombs; ++j) { + combL[j].index = (combL[j].index + c2) % combL[j].size; + combR[j].index = (combR[j].index + c2) % combR[j].size; + } + for (j = 0; j < numallpasses; ++j) { + allpassL[j].index = (allpassL[j].index+c2) % allpassL[j].size; + allpassR[j].index = (allpassR[j].index+c2) % allpassR[j].size; + } + asm_pass = save_asm_pass; + asm_pc = save_asm_pc; +} diff -r -U5 --new-file --exclude=configure --exclude='*m4*' --exclude=Makefile.in TiMidity++-2.13.2/timidity/Makefile.am TiMidity++-2.13.2-ccg/timidity/Makefile.am --- TiMidity++-2.13.2/timidity/Makefile.am 2004-06-01 08:05:29.000000000 +0800 +++ TiMidity++-2.13.2-ccg/timidity/Makefile.am 2007-09-29 12:54:35.000000000 +0800 @@ -124,10 +124,11 @@ bsd20_a.c \ esd_a.c \ hpux_a.c \ hpux_d_a.c \ jack_a.c \ + jit_i386.cg \ oss_a.c \ mac_a.c \ mac_qt_a.c \ mac_com.h \ mac_dlog.c \ @@ -161,10 +162,22 @@ w32_libOggFLAC_dll_g.h \ w32_libOggFLAC_dll_i.h calcnewt_SOURCES = calcnewt.c +CLEANFILES = + +if CCG + +SUFFIXES = .cg +CLEANFILES += jit_i386.c + +.cg.c: + ccg -q -o $@ $< + +endif + if BORLANDC if ENABLE_W32GUI W32GUI_RES = w32g_res.res else @@ -173,11 +186,11 @@ endif endif else -CLEANFILES = newton_table.c +CLEANFILES += newton_table.c if ENABLE_W32GUI W32GUI_RES = $(top_builddir)/interface/w32g_res.res else if ENABLE_W32G_SYN diff -r -U5 --new-file --exclude=configure --exclude='*m4*' --exclude=Makefile.in TiMidity++-2.13.2/timidity/reverb.c TiMidity++-2.13.2-ccg/timidity/reverb.c --- TiMidity++-2.13.2/timidity/reverb.c 2004-08-28 13:38:19.000000000 +0800 +++ TiMidity++-2.13.2-ccg/timidity/reverb.c 2007-09-28 19:09:44.000000000 +0800 @@ -914,11 +914,11 @@ #define REV_VAL0 5.3 #define REV_VAL1 10.5 #define REV_VAL2 44.12 #define REV_VAL3 21.0 -static int32 reverb_effect_buffer[AUDIO_BUFFER_SIZE * 2]; +int32 reverb_effect_buffer[AUDIO_BUFFER_SIZE * 2]; static int32 reverb_effect_bufsize = sizeof(reverb_effect_buffer); #if OPT_MODE != 0 #if defined(_MSC_VER) || defined(__WATCOMC__) || ( defined(__BORLANDC__) &&(__BORLANDC__ >= 1380) ) void set_ch_reverb(int32 *buf, int32 count, int32 level) @@ -1547,10 +1547,15 @@ } else if(count == MAGIC_FREE_EFFECT_INFO) { free_freeverb_buf(rev); return; } +#ifdef CCG + void do_ch_freeverb_jit(int32 *, int32, InfoFreeverb *); + if (count != 0) + do_ch_freeverb_jit(buf, count, rev); +#else for (k = 0; k < count; k++) { input = reverb_effect_buffer[k] + reverb_effect_buffer[k + 1]; outl = outr = reverb_effect_buffer[k] = reverb_effect_buffer[k + 1] = 0; @@ -1568,10 +1573,11 @@ } buf[k] += imuldiv24(outl, rev->wet1i) + imuldiv24(outr, rev->wet2i); buf[k + 1] += imuldiv24(outr, rev->wet1i) + imuldiv24(outl, rev->wet2i); ++k; } +#endif } /* */ /* Reverb: Delay & Panning Delay */ /* */ ------------------------------------------------------------------------- This SF.net email is sponsored by: Microsoft Defy all challenges. Microsoft(R) Visual Studio 2005. http://clk.atdmt.com/MRT/go/vse0120000070mrt/direct/01/ _______________________________________________ Timidity-talk mailing list Timidity-talk@... https://lists.sourceforge.net/lists/listinfo/timidity-talk |
|
|
Re: Patch: do_ch_freeverb() speedup with JITOn Sun, 30 Sep 2007 03:27:56 +0800
Frank Bi <bi@...> wrote: > > The speedup patch is turned on with the "configure" switch "--with-ccg": it > requires Ian Piumarta's ccg (http://www.iam.unibe.ch/~denker/Squeak/j3/ccg/) > dynamic code generator to work, and only on the x86. > Does it work on the x86_64? AK ------------------------------------------------------------------------- This SF.net email is sponsored by: Microsoft Defy all challenges. Microsoft(R) Visual Studio 2005. http://clk.atdmt.com/MRT/go/vse0120000070mrt/direct/01/ _______________________________________________ Timidity-talk mailing list Timidity-talk@... https://lists.sourceforge.net/lists/listinfo/timidity-talk |
|
|
Re: Patch: do_ch_freeverb() speedup with JITHello,
> > The speedup patch is turned on with the "configure" switch "--with-ccg": > > it requires Ian Piumarta's ccg > > (http://www.iam.unibe.ch/~denker/Squeak/j3/ccg/) dynamic code generator > > to work, and only on the x86. > Does it work on the x86_64? Unfortunately, no (unless TiMidity++ is compiled in x86-32 mode, but of course). ------------------------------------------------------------------------- This SF.net email is sponsored by: Microsoft Defy all challenges. Microsoft(R) Visual Studio 2005. http://clk.atdmt.com/MRT/go/vse0120000070mrt/direct/01/ _______________________________________________ Timidity-talk mailing list Timidity-talk@... https://lists.sourceforge.net/lists/listinfo/timidity-talk |
| Free Forum Powered by Nabble | Forum Help |