Index: exp-bbv/bbv_main.c =================================================================== --- exp-bbv/bbv_main.c (revision 0) +++ exp-bbv/bbv_main.c (revision 0) @@ -0,0 +1,531 @@ +/* bbv - generates Simpoint Basic Block Vectors */ +/* loosely based on the cachegrind plugin */ + +/* This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2 of the + License, or (at your option) any later version. + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + 02111-1307, USA. + + The GNU General Public License is contained in the file COPYING. +*/ + +/* original code by Vince Weaver, < vince _at_ csl.cornell.edu > */ +/* pcfile code by Oriol Prat, < oriol.prat _at _ bsc.es > */ + +#include "pub_tool_basics.h" +#include "pub_tool_tooliface.h" +#include "pub_tool_options.h" /* command line options */ + +#include "pub_tool_vki.h" /* vki_stat */ +#include "pub_tool_libcbase.h" /* VG_(strlen) */ +#include "pub_tool_libcfile.h" /* VG_(write) */ +#include "pub_tool_libcprint.h" /* VG_(printf) */ +#include "pub_tool_libcassert.h" /* VG_(exit) */ +#include "pub_tool_mallocfree.h" /* plain_free */ +#include "pub_tool_machine.h" /* VG_(fnptr_to_fnentry) */ +#include "pub_tool_debuginfo.h" /* VG_(get_fnname) */ + +#include "pub_tool_oset.h" /* ordered set stuff */ + +/* instruction special cases */ +#define REP_INSTRUCTION 0x1 +#define FLDCW_INSTRUCTION 0x2 + +/* interval variables */ +#define DEFAULT_GRAIN_SIZE 100000000 /* 100 million by default */ +static Int interval_size=DEFAULT_GRAIN_SIZE; +static Int interval_num=1; + +/* statistics */ +static ULong dyn_instr=0,total_instr=0,global_rep_count=0, + unique_rep_count=0,fldcw_count=0; + +/* filenames */ +static UChar *clo_bb_out_file="bb.out.%p"; +static UChar *clo_pc_out_file="pc.out.%p"; +static UChar *bb_out_file=NULL; +static UChar *pc_out_file=NULL; + +/* output parameters */ +static Bool instr_count_only=False; +static Bool generate_pc_file=False; + +/* file descriptors */ +static Int bbtrace_fd; + +/* write buffer */ +static UChar buf[1024]; + +/* global next block number */ +static Int block_num=1; + +/* table that holds the basic block info */ +static OSet* instr_info_table; + +#define FUNCTION_NAME_LENGTH 20 + +struct BB_info { + Addr BB_addr; /* used as key, must be first */ + Addr rep_addr; + Int n_instrs; /* instructions in the basic block */ + Int block_num; /* unique block identifier */ + Int bb_inst_counter; /* times entered * num_instructions */ + Int rep_count; + Int fldcw_count; + Bool is_entry; /* is this block a function entry point */ + UChar fn_name[FUNCTION_NAME_LENGTH]; /* Function block is in */ +}; + + + /* dump the optional PC file, which contains basic block number to */ + /* instruction address and function name mappings */ +static void dumpPcFile(void) { + + struct BB_info *bb_elem; + Int pctrace_fd; + SysRes sres; + + pc_out_file = + VG_(expand_file_name)("--pc-out-file", clo_pc_out_file); + + sres = VG_(open)(pc_out_file, VKI_O_CREAT|VKI_O_TRUNC|VKI_O_WRONLY, + VKI_S_IRUSR|VKI_S_IWUSR|VKI_S_IRGRP|VKI_S_IWGRP); + if (sr_isError(sres)) { + VG_UMSG("Error: cannot create pc file %s\n", pc_out_file); + VG_(exit)(1); + } else { + pctrace_fd = sr_Res(sres); + } + + /* Loop through the table, printing the number, address, */ + /* and function name for each basic block */ + VG_(OSetGen_ResetIter)(instr_info_table); + while ( (bb_elem = VG_(OSetGen_Next)(instr_info_table)) ) { + VG_(write)(pctrace_fd,"F",1); + VG_(sprintf)( buf,":%d:%x:%s\n", + bb_elem->block_num, + (Int)bb_elem->BB_addr, + bb_elem->fn_name); + VG_(write)(pctrace_fd, (void*)buf, VG_(strlen)(buf)); + } + + VG_(close)(pctrace_fd); +} + +int last_was_a_rep=0; + +Addr last_rep_addr=0; + +static VG_REGPARM(1) void update_BBV(struct BB_info *bbInfo) { + + struct BB_info *bb_elem; + + tl_assert(bbInfo); + +// VG_(printf)("VMW: #%d, A0x%x, RC%d, %d\n",bbInfo->block_num,bbInfo->BB_addr,bbInfo->rep_count,global_rep_count); + + if ((bbInfo->rep_count) && (bbInfo->rep_addr==last_rep_addr)) { + global_rep_count+=bbInfo->rep_count; + last_rep_addr=bbInfo->rep_addr; + return; + } + + if (bbInfo->rep_count) { + last_rep_addr=bbInfo->rep_addr; + global_rep_count+=bbInfo->rep_count; + unique_rep_count+=bbInfo->rep_count; +// VG_(printf)("VMW: unique rep %x count %d\n",last_rep_addr,bbInfo->rep_count); + } + + fldcw_count+=bbInfo->fldcw_count; + + bbInfo->bb_inst_counter+=bbInfo->n_instrs; + + total_instr+=bbInfo->n_instrs; + dyn_instr +=bbInfo->n_instrs; + + if (dyn_instr > interval_size) { + + if (!instr_count_only) { + /* put in an entry to the out.bb file */ + + VG_(write)(bbtrace_fd,"T",1); + + VG_(OSetGen_ResetIter)(instr_info_table); + while ( (bb_elem = VG_(OSetGen_Next)(instr_info_table)) ) { + if ( bb_elem->bb_inst_counter != 0 ) { + VG_(sprintf)( buf,":%d:%d ", + bb_elem->block_num, + bb_elem->bb_inst_counter); + VG_(write)(bbtrace_fd, (void*)buf, VG_(strlen)(buf)); + bb_elem->bb_inst_counter = 0; + } + } + + VG_(write)(bbtrace_fd,"\n",1); + } + + dyn_instr -= interval_size; + interval_num++; + } +} + + /* Check if the instruction pointed to is one that needs */ + /* special handling. If so, set a bit in the return */ + /* value indicating what type. */ +static Int get_inst_type(Int len,Addr addr) { + + int result=0; + +#if defined(VGA_x86) || defined(VGA_amd64) + + unsigned char *inst_pointer; + unsigned char inst_byte; + int i,possible_rep; + + /* rep prefixed instructions are counted as one instruction on */ + /* x86 processors and must be handled as a special case */ + + /* Also, the rep prefix is re-used as part of the opcode for */ + /* SSE instructions. So we need to specifically check for */ + /* the following: movs, cmps, scas, lods, stos, ins, outs */ + + inst_pointer=(unsigned char *)addr; + i=0; + inst_byte=0; + possible_rep=0; + + while (i= 0xa4) && /* movs,cmps,scas */ + (inst_byte <= 0xaf) ) || /* lods,stos */ + ( (inst_byte >= 0x6c) && + (inst_byte <= 0x6f) ) ) ) { /* ins,outs */ + + result|=REP_INSTRUCTION; + } + + /* fldcw instructions are double-counted by the hardware */ + /* performance counters on pentium 4 processors so it is */ + /* useful to have that count when doing validation work. */ + + inst_pointer=(unsigned char *)addr; + if (len>1) { + /* FLDCW detection */ + /* opcode is 0xd9/5, ie 1101 1001 oo10 1mmm */ + if ((*inst_pointer==0xd9) && + (*(inst_pointer+1)<0xb0) && /* need this case of fldz, etc, count */ + ( (*(inst_pointer+1) & 0x38) == 0x28)) { + result|=FLDCW_INSTRUCTION; + } + } + +#endif + return result; +} + + + /* Our instrumentation function */ + /* sbIn = super block to translate */ + /* layout = guest layout */ + /* gWordTy = size of guest word */ + /* hWordTy = size of host word */ +static IRSB* bbv_instrument ( VgCallbackClosure* closure, + IRSB* sbIn, VexGuestLayout* layout, + VexGuestExtents* vge, + IRType gWordTy, IRType hWordTy ) { + + Int i,out_counter,start_of_instructions,n_instrs=0; + IRSB *sbOut; + IRStmt *st; + struct BB_info *bbInfo; + Addr64 origAddr,ourAddr; + IRDirty *di; + IRExpr **argv, *arg1; + Int regparms,opcode_type; + + /* We don't handle a host/guest word size mismatch */ + if (gWordTy != hWordTy) { + VG_(tool_panic)("host/guest word size mismatch"); + } + + /* Set up SB */ + sbOut = deepCopyIRSBExceptStmts(sbIn); + + /* Copy verbatim any IR preamble preceding the first IMark */ + i = 0; + while ( (i < sbIn->stmts_used) && (sbIn->stmts[i]->tag!=Ist_IMark)) { + addStmtToIRSB( sbOut, sbIn->stmts[i] ); + i++; + } + + /* Get the first statement */ + tl_assert(sbIn->stmts_used > 0); + + while(1) { +vince_is_cool: + + st = sbIn->stmts[i]; + + /* double check we are at a Mark statement */ + tl_assert(Ist_IMark == st->tag); + + origAddr=st->Ist.IMark.addr; + + /* Get the BB_info */ + bbInfo = VG_(OSetGen_Lookup)(instr_info_table, &origAddr); + + /* save this for later */ + start_of_instructions=i; + + if (bbInfo==NULL) { + + /* BB never translated before (at this address, at least; */ + /* could have been unloaded and then reloaded) */ + + /* allocate and initialize a new basic block structure */ + bbInfo = VG_(OSetGen_AllocNode)(instr_info_table, + sizeof(struct BB_info)); + bbInfo->BB_addr = origAddr; + + /* assign a unique block number */ + bbInfo->block_num=block_num; + block_num++; + + /* get function name and entry point information */ + VG_(get_fnname)(origAddr,bbInfo->fn_name,FUNCTION_NAME_LENGTH); + bbInfo->is_entry=VG_(get_fnname_if_entry)(origAddr, bbInfo->fn_name, + FUNCTION_NAME_LENGTH); + /* insert structure into table */ + VG_(OSetGen_Insert)( instr_info_table, bbInfo ); + + /* Iterate through the basic block, counting instructions */ + /* This assumes we have forced vex-guest-chase-thresh to 0 */ + /* Otherwise we are counting superblocks not basic blocks */ + + /* Doing things at the basic block level enables faster */ + /* run-time, but loses the ability to do things like */ + /* calculating how many times a rep instruction repeats */ + + /* count instructions and type */ + } + n_instrs=0; + bbInfo->rep_count=0; + bbInfo->fldcw_count=0; + while(i < sbIn->stmts_used) { + st=sbIn->stmts[i]; + + if (st->tag == Ist_IMark) { + ourAddr = st->Ist.IMark.addr; + opcode_type=get_inst_type(st->Ist.IMark.len,ourAddr); +// VG_(printf)("VMW: %d %d %x\n",block_num,n_instrs,ourAddr); + if (opcode_type&REP_INSTRUCTION) { + //if (!bbInfo->rep_addr) + bbInfo->rep_addr=ourAddr; + bbInfo->rep_count++; + } + if (opcode_type&FLDCW_INSTRUCTION) { + bbInfo->fldcw_count++; + } + n_instrs++; + } + + i++; + + if (st->tag == Ist_Exit) { +// VG_(printf)("VMW: Exit!\n"); + break; + } + + } + bbInfo->n_instrs = n_instrs; + + + /* Create call to our instrumentation function to insert. */ + /* It has two arguments, the original address of the basic block */ + /* and an argument spefcifying if it is a *rep* instruction */ + regparms=1; + arg1= mkIRExpr_HWord( (HWord)bbInfo); + argv= mkIRExprVec_1(arg1); + di= unsafeIRDirty_0_N( regparms, "update_BBV", + VG_(fnptr_to_fnentry)( &update_BBV ), argv); + + /* Insert our call */ + addStmtToIRSB( sbOut, IRStmt_Dirty(di)); + + /* restore start of insturctions before writing out */ + out_counter=start_of_instructions; + +// VG_(printf)("VMW: out_counter,i %d, %d\n",out_counter,i); +// while(i < sbIn->stmts_used) { + while(out_counter < i) { + st=sbIn->stmts[out_counter]; + + /* Insert the original instruction */ + addStmtToIRSB( sbOut, st ); + + out_counter++; + } + + + while(i < sbIn->stmts_used) { + st = sbIn->stmts[i]; + + if (st->tag==Ist_IMark) break; + + addStmtToIRSB( sbOut, st ); + + i++; + } + + + + // VG_(printf)("VMW: i, statements_used %d, %d\n",i,sbIn->stmts_used); + if (i < sbIn->stmts_used) goto vince_is_cool; + + break; + } + return sbOut; +} + +/*--------------------------------------------------------------------*/ +/*--- Setup ---*/ +/*--------------------------------------------------------------------*/ + +static void bbv_post_clo_init(void) { + + SysRes sres; + + bb_out_file = + VG_(expand_file_name)("--bb-out-file", clo_bb_out_file); + + sres = VG_(open)(bb_out_file, VKI_O_CREAT|VKI_O_TRUNC|VKI_O_WRONLY, + VKI_S_IRUSR|VKI_S_IWUSR|VKI_S_IRGRP|VKI_S_IWGRP); + if (sr_isError(sres)) { + VG_UMSG("Error: cannot create bb file %s\n",bb_out_file); + VG_(exit)(1); + } else { + bbtrace_fd=sr_Res(sres); + } +} + + +static void bbv_thread_called ( ThreadId tid, ULong nDisp ) { + + static int old_thread=0,warn_mt=0; + + if (tid!=old_thread) { + old_thread=tid; + if ((tid!=1) && (!warn_mt)) { + VG_(printf)("Warning! exp-bbv does not properly handle multi-threaded applications\n"); + warn_mt=1; + } + } +} + + + /* Parse the command line options */ +static Bool bbv_process_cmd_line_option(Char* arg) { + + if VG_INT_CLO (arg, "--interval-size", interval_size) {} + else if VG_STR_CLO (arg, "--bb-out-file", clo_bb_out_file) {} + else if VG_STR_CLO (arg, "--pc-out-file", clo_pc_out_file) { + generate_pc_file = True; + } + else if VG_XACT_CLO (arg, "--instr-count-only", instr_count_only, True) {} + else { + return False; + } + + return True; +} + +static void bbv_print_usage(void) { + VG_(printf) (" --bb-out-file= filename for basic block vector info\n"); + VG_(printf) (" --pc-out-file= filename for basic block addresses and function names\n"); + VG_(printf) (" --interval-size= interval size\n"); + VG_(printf) (" --instr-count-only only print total instruction count\n"); +} + +static void bbv_print_debug_usage(void) { + VG_(printf)(" (none)\n"); +} + +static void bbv_fini(Int exitcode) { + + if (generate_pc_file) { + dumpPcFile(); + } + + VG_(sprintf)(buf,"\n\n# Total intervals: %d (Interval Size %d)\n# Total instructions: %lld\n" + "# Total reps: %lld\n# Unique reps: %lld\n" + "# Total fldcw instructions: %lld\n\n", + (Int)(total_instr/(ULong)interval_size), + interval_size, + total_instr, + global_rep_count,unique_rep_count,fldcw_count); + /* Print results to display */ + VG_UMSG(buf); + /* Also print to results file */ + VG_(write)(bbtrace_fd,(void*)buf,VG_(strlen)(buf)); + VG_(close)(bbtrace_fd); +} + + +static void bbv_pre_clo_init(void) { + + VG_(details_name) ("exp-bbv"); + VG_(details_version) (NULL); + VG_(details_description) ("a SimPoint basic block vector generator"); + VG_(details_copyright_author)( + "Copyright (C) 2006-2009 Vince Weaver"); + VG_(details_bug_reports_to) (VG_BUGS_TO); + + VG_(basic_tool_funcs) (bbv_post_clo_init, + bbv_instrument, + bbv_fini); + + VG_(needs_command_line_options)(bbv_process_cmd_line_option, + bbv_print_usage, + bbv_print_debug_usage); + + VG_(track_start_client_code)( bbv_thread_called ); + + instr_info_table = VG_(OSetGen_Create)(/*keyOff*/0, + NULL, + VG_(malloc), "bbv.1", VG_(free)); + +} + +VG_DETERMINE_INTERFACE_VERSION(bbv_pre_clo_init) + +/*--------------------------------------------------------------------*/ +/*--- end ---*/ +/*--------------------------------------------------------------------*/ Index: exp-bbv/Makefile.am =================================================================== --- exp-bbv/Makefile.am (revision 0) +++ exp-bbv/Makefile.am (revision 0) @@ -0,0 +1,85 @@ +include $(top_srcdir)/Makefile.tool.am + +noinst_PROGRAMS = +if VGCONF_PLATFORMS_INCLUDE_X86_LINUX +noinst_PROGRAMS += exp-bbv-x86-linux +endif +if VGCONF_PLATFORMS_INCLUDE_AMD64_LINUX +noinst_PROGRAMS += exp-bbv-amd64-linux +endif +if VGCONF_PLATFORMS_INCLUDE_PPC32_LINUX +noinst_PROGRAMS += exp-bbv-ppc32-linux +endif +if VGCONF_PLATFORMS_INCLUDE_PPC64_LINUX +noinst_PROGRAMS += exp-bbv-ppc64-linux +endif +if VGCONF_PLATFORMS_INCLUDE_PPC32_AIX5 +noinst_PROGRAMS += exp-bbv-ppc32-aix5 +endif +if VGCONF_PLATFORMS_INCLUDE_PPC64_AIX5 +noinst_PROGRAMS += exp-bbv-ppc64-aix5 +endif +if VGCONF_PLATFORMS_INCLUDE_X86_DARWIN +noinst_PROGRAMS += exp-bbv-x86-darwin +endif +if VGCONF_PLATFORMS_INCLUDE_AMD64_DARWIN +noinst_PROGRAMS += exp-bbv-amd64-darwin +endif + +BBV_SOURCES_COMMON = bbv_main.c + +exp_bbv_x86_linux_SOURCES = $(BBV_SOURCES_COMMON) +exp_bbv_x86_linux_CPPFLAGS = $(AM_CPPFLAGS_X86_LINUX) +exp_bbv_x86_linux_CFLAGS = $(AM_CFLAGS_X86_LINUX) +exp_bbv_x86_linux_DEPENDENCIES = $(COREGRIND_LIBS_X86_LINUX) +exp_bbv_x86_linux_LDADD = $(TOOL_LDADD_X86_LINUX) +exp_bbv_x86_linux_LDFLAGS = $(TOOL_LDFLAGS_X86_LINUX) + +exp_bbv_amd64_linux_SOURCES = $(BBV_SOURCES_COMMON) +exp_bbv_amd64_linux_CPPFLAGS = $(AM_CPPFLAGS_AMD64_LINUX) +exp_bbv_amd64_linux_CFLAGS = $(AM_CFLAGS_AMD64_LINUX) +exp_bbv_amd64_linux_DEPENDENCIES = $(COREGRIND_LIBS_AMD64_LINUX) +exp_bbv_amd64_linux_LDADD = $(TOOL_LDADD_AMD64_LINUX) +exp_bbv_amd64_linux_LDFLAGS = $(TOOL_LDFLAGS_AMD64_LINUX) + +exp_bbv_ppc32_linux_SOURCES = $(BBV_SOURCES_COMMON) +exp_bbv_ppc32_linux_CPPFLAGS = $(AM_CPPFLAGS_PPC32_LINUX) +exp_bbv_ppc32_linux_CFLAGS = $(AM_CFLAGS_PPC32_LINUX) +exp_bbv_ppc32_linux_DEPENDENCIES = $(COREGRIND_LIBS_PPC32_LINUX) +exp_bbv_ppc32_linux_LDADD = $(TOOL_LDADD_PPC32_LINUX) +exp_bbv_ppc32_linux_LDFLAGS = $(TOOL_LDFLAGS_PPC32_LINUX) + +exp_bbv_ppc64_linux_SOURCES = $(BBV_SOURCES_COMMON) +exp_bbv_ppc64_linux_CPPFLAGS = $(AM_CPPFLAGS_PPC64_LINUX) +exp_bbv_ppc64_linux_CFLAGS = $(AM_CFLAGS_PPC64_LINUX) +exp_bbv_ppc64_linux_DEPENDENCIES = $(COREGRIND_LIBS_PPC64_LINUX) +exp_bbv_ppc64_linux_LDADD = $(TOOL_LDADD_PPC64_LINUX) +exp_bbv_ppc64_linux_LDFLAGS = $(TOOL_LDFLAGS_PPC64_LINUX) + +exp_bbv_ppc32_aix5_SOURCES = $(BBV_SOURCES_COMMON) +exp_bbv_ppc32_aix5_CPPFLAGS = $(AM_CPPFLAGS_PPC32_AIX5) +exp_bbv_ppc32_aix5_CFLAGS = $(AM_CFLAGS_PPC32_AIX5) +exp_bbv_ppc32_aix5_DEPENDENCIES = $(COREGRIND_LIBS_PPC32_AIX5) +exp_bbv_ppc32_aix5_LDADD = $(TOOL_LDADD_PPC32_AIX5) +exp_bbv_ppc32_aix5_LDFLAGS = $(TOOL_LDFLAGS_PPC32_AIX5) + +exp_bbv_ppc64_aix5_SOURCES = $(BBV_SOURCES_COMMON) +exp_bbv_ppc64_aix5_CPPFLAGS = $(AM_CPPFLAGS_PPC64_AIX5) +exp_bbv_ppc64_aix5_CFLAGS = $(AM_CFLAGS_PPC64_AIX5) +exp_bbv_ppc64_aix5_DEPENDENCIES = $(COREGRIND_LIBS_PPC64_AIX5) +exp_bbv_ppc64_aix5_LDADD = $(TOOL_LDADD_PPC64_AIX5) +exp_bbv_ppc64_aix5_LDFLAGS = $(TOOL_LDFLAGS_PPC64_AIX5) + +exp_bbv_x86_darwin_SOURCES = $(BBV_SOURCES_COMMON) +exp_bbv_x86_darwin_CPPFLAGS = $(AM_CPPFLAGS_X86_DARWIN) +exp_bbv_x86_darwin_CFLAGS = $(AM_CFLAGS_X86_DARWIN) +exp_bbv_x86_darwin_DEPENDENCIES = $(COREGRIND_LIBS_X86_DARWIN) +exp_bbv_x86_darwin_LDADD = $(TOOL_LDADD_X86_DARWIN) +exp_bbv_x86_darwin_LDFLAGS = $(TOOL_LDFLAGS_X86_DARWIN) + +exp_bbv_amd64_darwin_SOURCES = $(BBV_SOURCES_COMMON) +exp_bbv_amd64_darwin_CPPFLAGS = $(AM_CPPFLAGS_AMD64_DARWIN) +exp_bbv_amd64_darwin_CFLAGS = $(AM_CFLAGS_AMD64_DARWIN) +exp_bbv_amd64_darwin_DEPENDENCIES = $(COREGRIND_LIBS_AMD64_DARWIN) +exp_bbv_amd64_darwin_LDADD = $(TOOL_LDADD_AMD64_DARWIN) +exp_bbv_amd64_darwin_LDFLAGS = $(TOOL_LDFLAGS_AMD64_DARWIN) Index: exp-bbv/docs/bbv-manual.xml =================================================================== --- exp-bbv/docs/bbv-manual.xml (revision 0) +++ exp-bbv/docs/bbv-manual.xml (revision 0) @@ -0,0 +1,131 @@ + + + + + +BBV: a Basic Block Vector generation tool + + +Background + + + A Basic Blocks Vector is a list of all basic blocks entered + during program execution, and a count of how many times each + block was run( a basic block is a section of code + with only one entry point and one exit point). + The entry count is multiplied by the number of instructions that are + in the basic block, in order to weigh the count so that instructions in + small Basic Blocks aren't counted as more important than instructions + in large Basic Blocks. + + + + The exp-bbv plugin is primarily designed for use in conjunction + with the SimPoint (http://www.cse.ucsd.edu/~calder/simpoint/) tool. + SimPoint is a method of speeding up architectural + simulations by exploiting the fact that programs have phase behavior, + enabling you to extrapolate total performance while only simulating small + portions of the program. + + + + The Basic Block Vector count is dumped at fixed intervals. This + is commonly done every 100 million instructions; the + --interval-size option can be + used to change this. + + + + The output file looks like this: + + + + + + Each new interval starts with a T. This is followed by a colon, + then by a unique number identifying the basic block. This is followed + by another colon, then followed by the frequency (which is scaled + by the number of instructions in the basic block). + + + +Implementation + + + Valgrind provides all of the information necessary to create + BBV files. In the current implementation, all instructions + are instrumented. This is slower than a method that only + instruments at the basic-block level, but there are some complications + that make that method more difficult. + + + + Valgrind actually provides instrumentation at a super-block level. + A super-block has one entry point but unlike basic-blocks can + have multiple exit points. It might be valid to use super-blocks + as a basis for SimPoint methodology, but since this has not been + validated it is probably best to force Valgrind to use basic-blocks. + This is possible by using the + --vex-guest-chase-thresh=0 + option to Valgrind. + + + + On the x86 amd amd64 architectures we special case rep-prefixed + string instructions. This is because actual hardware counts + this as one instruction, while a naive Valgrind implementation + would count it as many (possibly hundreds, thousands or even millions) + of instructions. Therefore we need to have special code to handle + this properly. The fldcw instruction is also counted as a special + case, as on Pentium 4 systems this instruction counts as two + instructions on the hardware performance counters. + + + + While executing, our routine is called once for each instruction + in the original code. We look up our current superblock + in an Ordered Set to find a structure that holds block-specific + statistics (we use the entry point address as the index into + the hash table). We then increment by one the instruction count for this + superblock (assuming it's not a rep instruction). + We also update the master instruction counter by one at the same time. + If this overflows the interval size (by default 100 million, but this + is configurable with a command line option) then we run the BBV generation + code. This routine prints the current BBV line to the output file, and + resets all the superblock counters to zero. + + + +Validation + + + This plugin has been tested on x86 and compared against real + hardware using hardware performance counters. This is described in a paper + from the HiPEAC'08 conference, "Using Dynamic Binary Instrumentation to + Generate Multi-Platform Simpoints: Methodology and Accuracy" by + V.M. Weaver and S.A. McKee. + + + + The plugin has also been used with PowerPC but has not been + in any way validated. + + + + +Performance + + + Using this program slows down execution by roughly a factor of 40 + over native execution. This varies depending on the machine + used and the benchmark being run. + On the SPEC CPU 2000 benchmarks running on a 3.4GHz Pentium D + processor, the slowdown ranges from 24x (mcf) to 340x (vortex.2). + + + + Index: exp-bbv/docs/bbv_introduction.txt =================================================================== --- exp-bbv/docs/bbv_introduction.txt (revision 0) +++ exp-bbv/docs/bbv_introduction.txt (revision 0) @@ -0,0 +1,168 @@ +exp-bbv -- a tool to generate Simpoint BBVs (basic block vectors) + + by Vince Weaver vince _at_ csl.cornell.edu + + +Background +~~~~~~~~~~ + The Simpoint algorithm (see http://www.cse.ucsd.edu/~calder/simpoint/ + for more information) is a method of speeding up architectural + simulations by exploiting the fact that programs have phase behavior, + enabling you to extrapolate total performance while only simulating small + portions of the program. + + Most program exhibit phase-based behavior. At various times during program + execution a program will encounter intervals of time where the code behaves + very similarly to an interval that previously happened in the past. + If you can detect these intervals and group them together, you can + approximate the total behavior of the program by only simulating the bare + minimum number of intervals, and then scaling these results up to predict + what the actual results would have been if you simulated the entire program. + + This is useful in computer architecture research, as running a + benchmark on a cycle-accurate simulator can cause a slowdown of + 1000x, making it take days, weeks, or even longer to run until + completion. If you utilize Simpoints, you can simulate a small fraction + of the program but still get an estimate of what the behavior would + be for the entire program. + + See Sherwood et al [1] for more info. + + In order to calculate the phases, you need to collect so-called + Basic Block Vectors (BBVs) from the benchmark of interest. These + contain a list of all Basic Blocks entered (a basic block is a section + of code with only one entry point and one exit point). In addition, + there is a count of how many times each basic block was entered. + This count is multiplied by the number of instructions that are in + the basic block, in order to weigh the count so that instructions in + small Basic Blocks aren't counted as more important than instructions + in large Basic Blocks. + + The Basic Block Vector count is dumped at certain fixed intervals. + A common interval size is 100 million committed instructions, but + different sizes can be specified. There are various tradeoffs + to using different sizes. Using smaller sizes can cause problems + because of warmup; in a simulation it takes a while to "warm-up" + the caches and other CPU structures to be what they would be like + if you had run the whole program. + + The actual on-disk format of a BBV files looks something like this: + +T:45:1024 :189:99343 +T:11:78573 :15:1353 :56:1 +T:18:45 :12:135353 :56:78 314:4324263 + + Each new interval starts with a T. Then there is a colon + followed by a unique number identifying a basic block. + This is followed by another colon, and then followed by a value + that is the number of times the block was entered multiplied by + the number of instructions in the block. This repeats for + each block that was entered during this interval. + + The Simpoint utility takes a BBV file as described above as an input. + A sample run might look like this, assuming you used gzip to zip + the vector for disk-space reasons: + + ./SimPoint.3.2/bin/simpoint -inputVectorsGzipped \ + -loadFVFile results.valgrind.bb.gz \ + -k 5 -saveSimpoints results.simpts \ + -saveSimpointWeights results.weights + + The Simpoint program does random linear projection using 15-dimensions, + then does k-mean clustering to calculate which intervals are the one + of interest. + + In this case we specified we want 5 with the -k 5 option. + + So as output we will have the "results.simpts" file which will simply + list the 5 intervals of interest (which when we multiply by 100 million + [or whatever our interval size we chose] will give us the offset in number + of instructions from the beginning of the program to start simulating). + + If we then simulate 100 million instructions from the start of + each of those intervals, gather our statistics, then weight them + according to the weights printed in the "results.weights" file, + we then should have an approximation of how the entire program + would have behaved, in a small fraction of the time. + + +Valgrind Implementation +~~~~~~~~~~~~~~~~~~~~~~~ + + Valgrind provides all of the information necessary to create + BBV files. In the current implementation, all instructions + are instrumented. This is slower than a method that only + instruments at the basic-block level, but there are some complications + that make that method more difficult. + + Valgrind actually provides instrumentation at a super-block level. + A super-block has one entry point but unlike basic-blocks can + have multiple exit points. It might be valid to use super-blocks + as a basis for SimPoint methodology, but since this has not been + validated it is probably best to force Valgrind to use basic-blocks. + This is possible by using the --vex-guest-chase-thresh=0 + option to Valgrind. + + On the x86 AMD amd64 architectures we special case rep-prefixed + string instructions. This is because actual hardware counts + this as one instruction, while a naive Valgrind implementation + would count it as many (possibly hundreds, thousands or even millions) + of instructions. Therefore we need to have special code to handle + this properly. The fldcw instruction is also counted as a special + case, as on Pentium 4 systems this instruction counts as two + instructions on the hardware performance counters. + + While executing, our routine is called once for each instruction + in the original code. We look up our current superblock + in an Ordered Set to find a structure that holds block-specific + statistics (we use the entry point address as the index into + the hash table). We then increment by one the + instruction count for this superblock (assuming it's not a rep instruction). + We also update the master instruction counter by one at the same time. + If this overflows the interval size (by default 100 million, but this + is configurable with a command line option) then we run the BBV generation + code. This routine prints the current BBV line to the output file, and + resets all the superblock counters to zero. + +Validation +~~~~~~~~~~ + + The code has been tested on x86 and compared against real + hardware. This is described in a paper [2]. + + The code has been compiled and run on PowerPC but has not been + in any way validated. + + +Using +~~~~~ + + You run the tool something like this: + + valgrind --tool=exp-bbv --vex-guest-chase-thresh=0 \ + --interval-size=100000000 --bb-out-file=out.bbv /bin/ls + + The --vex-guest-chase-thresh=0 option forces valgrind to use basic blocks + (as opposed to super blocks). + The "interval-size" parameter is optional, it defaults to 100 million. + The "bb-out-file" option specifies the output file name. It defaults to out.bb + /bin/ls is just a placeholder for any executable you'd like to run. + +Performance +~~~~~~~~~~~ + Using valsim slows down execution by roughly a factor of 40. + It depends on the machine being run on, and the benchmark. + + On the spec2000 benchmarks running on a 3.4GHz Pentium D processor, + the slowdown ranges from 24x (mcf) to 340x (vortex.2). + +References +~~~~~~~~~~ +[1] T Sherwood, E Perelman, G. Hamerly, B. Calder. Automatically + Characterizing Large Scale Program Behavior. ASPLOS X, 2002. + +[2] V.M. Weaver, S.A. McKee, "Using Dynamic Binary Instrumentation to + Generate Multi-Platform Simpoints: Methodology and Accuracy", 3rd EC + International Conference on High Performance Embedded Architectures and + Compilers (HiPEAC'08), Goeteborg, Sweden, January 2008. + Index: exp-bbv/docs/Makefile.am =================================================================== --- exp-bbv/docs/Makefile.am (revision 0) +++ exp-bbv/docs/Makefile.am (revision 0) @@ -0,0 +1,2 @@ +EXTRA_DIST = bbv-manual.xml + Index: exp-bbv/tests/Makefile.am =================================================================== --- exp-bbv/tests/Makefile.am (revision 0) +++ exp-bbv/tests/Makefile.am (revision 0) @@ -0,0 +1,28 @@ + +include $(top_srcdir)/Makefile.tool-tests.am + +SUBDIRS = . + +# Platform-specific tests +if VGCONF_PLATFORMS_INCLUDE_X86_LINUX +SUBDIRS += x86-linux +endif +if VGCONF_PLATFORMS_INCLUDE_AMD64_LINUX +SUBDIRS += amd64-linux +endif +if VGCONF_PLATFORMS_INCLUDE_PPC32_LINUX +SUBDIRS += ppc32-linux +endif + +DIST_SUBDIRS = x86-linux amd64-linux ppc32-linux . + +noinst_SCRIPTS = compare_bbv.sh + +EXTRA_DIST = $(noinst_SCRIPTS) \ + logo.include logo.lzss_new + +check_PROGRAMS = + +AM_CFLAGS += $(AM_FLAG_M3264_PRI) +AM_CXXFLAGS += $(AM_FLAG_M3264_PRI) + Index: exp-bbv/tests/compare_bbv.sh =================================================================== --- exp-bbv/tests/compare_bbv.sh (revision 0) +++ exp-bbv/tests/compare_bbv.sh (revision 0) @@ -0,0 +1,3 @@ +#!/bin/bash + +diff $1.out.bb $1.out.exp Property changes on: exp-bbv/tests/compare_bbv.sh ___________________________________________________________________ Added: svn:executable + * Index: exp-bbv/tests/logo.include =================================================================== --- exp-bbv/tests/logo.include (revision 0) +++ exp-bbv/tests/logo.include (revision 0) @@ -0,0 +1,6 @@ +.equ FREQUENT_CHAR,0 +.equ N,1024 +.equ F,64 +.equ THRESHOLD,2 +.equ P_BITS,10 +.equ POSITION_MASK,3 Index: exp-bbv/tests/logo.lzss_new =================================================================== --- exp-bbv/tests/logo.lzss_new (revision 0) +++ exp-bbv/tests/logo.lzss_new (revision 0) @@ -0,0 +1,21 @@ +logo: + .byte 255,27,91,48,59,49,59,51,55 + .byte 159,59,52,55,109,35,204,247,192,7,51 + .byte 141,48,200,27,27,91,196,7,203,31,28,12,59 + .byte 15,52,48,109,10,192,247,1,96,26,56,44,156 + .byte 31,27,91,51,49,109,204,4,65,172,13,36 + .byte 2,28,16,79,13,32,16,65,147,152,131,52,28,52,204,16 + .byte 16,12,36,111,57,236,167,28,8,51,22,20,137,85,44,96 + .byte 0,43,97,214,113,226,200,203,8,212,9,211,16,43,89,245,209 + .byte 0,128,17,210,24,13,40,28,20,13,44,28,28,240,74,26,91 + .byte 0,13,80,95,101,135,101,43,85,245,205,205,40,205,20,137,65 + .byte 0,29,135,66,75,114,83,28,120,15,98,135,109,85,88,247,193 + .byte 0,232,43,244,151,73,120,61,176,27,95,151,176,18,43,171,202 + .byte 16,223,22,26,245,90,245,217,63,51,27,86,146,91,176,2 + .byte 0,12,29,211,200,172,57,23,102,50,246,110,109,236,68,96,94 + .byte 8,175,10,166,105,20,1,48,51,11,222,31,49,15,211,188 + .byte 0,175,79,25,86,170,69,82,219,40,82,70,127,8,83,219,35 + .byte 0,169,85,170,53,24,33,18,104,145,42,200,34,178,104,112,45 + .byte 0,198,80,178,121,145,74,112,49,248,81,243,40,221,23,255,23 + .byte 8,2,54,3,36,229,66,10 +logo_end: Index: exp-bbv/tests/filter_stderr =================================================================== --- exp-bbv/tests/filter_stderr (revision 0) +++ exp-bbv/tests/filter_stderr (revision 0) @@ -0,0 +1,13 @@ +#! /bin/sh + +dir=`dirname $0` + +$dir/../../tests/filter_stderr_basic | + +# Remove lines that don't start with # +sed '/^[^#]/d' | + +# Remove all blank lines +sed '/^$/d' + + Property changes on: exp-bbv/tests/filter_stderr ___________________________________________________________________ Added: svn:executable + * Index: exp-bbv/tests/x86-linux/Makefile.am =================================================================== --- exp-bbv/tests/x86-linux/Makefile.am (revision 0) +++ exp-bbv/tests/x86-linux/Makefile.am (revision 0) @@ -0,0 +1,30 @@ +include $(top_srcdir)/Makefile.tool-tests.am + +noinst_SCRIPTS = filter_stderr + +check_PROGRAMS = \ + million rep_prefix ll fldcw_check + +EXTRA_DIST = $(noinst_SCRIPTS) \ + fldcw_check.stderr.exp \ + fldcw_check.vgtest \ + ll.stderr.exp \ + ll.stdout.exp \ + ll.out.exp \ + ll.vgtest \ + million.stderr.exp \ + million.out.exp \ + million.vgtest \ + rep_prefix.stderr.exp \ + rep_prefix.vgtest + +AM_CCASFLAGS += -ffreestanding + +LDFLAGS += @FLAG_M32@ -nostartfiles -nodefaultlibs + +million_SOURCES = million.S +rep_prefix_SOURCES = rep_prefix.S +ll_SOURCES = ll.S +fldcw_check_SOURCES = fldcw_check.S + +AM_CCASFLAGS += @FLAG_M32@ Index: exp-bbv/tests/x86-linux/filter_stderr =================================================================== --- exp-bbv/tests/x86-linux/filter_stderr (revision 0) +++ exp-bbv/tests/x86-linux/filter_stderr (revision 0) @@ -0,0 +1,5 @@ +#! /bin/sh + +../filter_stderr + + Property changes on: exp-bbv/tests/x86-linux/filter_stderr ___________________________________________________________________ Added: svn:executable + * Index: exp-bbv/tests/x86-linux/fldcw_check.vgtest =================================================================== --- exp-bbv/tests/x86-linux/fldcw_check.vgtest (revision 0) +++ exp-bbv/tests/x86-linux/fldcw_check.vgtest (revision 0) @@ -0,0 +1,4 @@ +prog: fldcw_check +vgopts: --interval-size=10000 --vex-guest-chase-thresh=0 --bb-out-file=fldcw_check.out.bb +cleanup: rm fldcw_check.out.bb + Index: exp-bbv/tests/x86-linux/fldcw_check.S =================================================================== --- exp-bbv/tests/x86-linux/fldcw_check.S (revision 0) +++ exp-bbv/tests/x86-linux/fldcw_check.S (revision 0) @@ -0,0 +1,120 @@ + +.globl _start + +_start: + # This code tests for the fldcw "load floating point command word" + # instruction. On most x86 processors the retired_instruction + # performance counter counts this as one instruction. However, + # on Pentium 4 systems it counts as two. Therefore this can + # affect BBV results on such a system. + # fldcw is most often used to set the rouding mode when doing + # floating point to integer conversions + + # It is encoded as "d9 /5" which means + # 1101 1001 xx10 1yyy + # Where xx is the "mod" which will be 00, 01, or 10 indicating offset + # and yyy is the register field + + + + # these are instructions with similar encodings to fldcw + # that can cause false positives if the test isn't explicit enough +similar: + fld1 # d9 e8 + fldl2t # d9 e9 + fldl2e # d9 ea + fldpi # d9 eb + fldlg2 # d9 ec + fldln2 # d9 ed + fldz # d9 ee + + # check some varied ways of calling fldcw + + + # offset on stack +stack: + sub $4,%esp # allocate space on stack + fnstcw 2(%esp) + fldcw 2(%esp) + add $4,%esp # restore stack + + # 32-bit register + + fnstcw cw + mov $cw,%eax + fldcw 0(%eax) # eax + mov $cw,%ebx + fldcw 0(%ebx) # ebx + mov $cw,%ecx + fldcw 0(%ecx) # ecx + mov $cw,%edx + fldcw 0(%edx) # edx + + # register + 8-bit offset +eight_bit: + mov $cw,%eax + sub $32,%eax + + fldcw 32(%eax) # eax + 8 bit offset + mov %eax,%ebx + fldcw 32(%ebx) # ebx + 8 bit offset + mov %eax,%ecx + fldcw 32(%ecx) # ecx + 8 bit offset + mov %eax,%edx + fldcw 32(%edx) # edx + 8 bit offset + + # register + 32-bit offset +thirtytwo_bit: + mov $cw,%eax + sub $30000,%eax + + fldcw 30000(%eax) # eax + 16 bit offset + mov %eax,%ebx + fldcw 30000(%ebx) # ebx + 16 bit offset + mov %eax,%ecx + fldcw 30000(%ecx) # ecx + 16 bit offset + mov %eax,%edx + fldcw 30000(%edx) # edx + 16 bit offset + + # check an fp/integer conversion + # in a loop to give a bigger count + + mov $1024,%ecx +big_loop: + + fldl three # load value onto fp stack + fnstcw saved_cw # store control word to mem + movzwl saved_cw, %eax # load cw from mem, zero extending + movb $12, %ah # set cw for "round to zero" + movw %ax, cw # store back to memory + fldcw cw # save new rounding mode + fistpl result # save stack value as integer to mem + fldcw saved_cw # restore old cw + + loop big_loop # loop to make the count more obvious + + movl result, %ebx # sanity check to see if the + cmp $3,%ebx # result is the expected one + je exit + +print_error: + mov $4,%eax # write syscall + mov $1,%ebx # stdout + mov $error,%ecx # string + mov $22,%edx # length of string + int $0x80 + +exit: + movl result, %ebx # load converted value + movl $1, %eax # SYSCALL_EXIT + int $0x80 + + + +.data +saved_cw: .long 0 +cw: .long 0 +result: .long 0 +three: .long 0 # a floating point 3.0 + .long 1074266112 +error: .asciz "Error! Wrong result!\n" Index: exp-bbv/tests/x86-linux/fldcw_check.stderr.exp =================================================================== --- exp-bbv/tests/x86-linux/fldcw_check.stderr.exp (revision 0) +++ exp-bbv/tests/x86-linux/fldcw_check.stderr.exp (revision 0) @@ -0,0 +1,5 @@ +# Total intervals: 0 (Interval Size 10000) +# Total instructions: 9261 +# Total reps: 0 +# Unique reps: 0 +# Total fldcw instructions: 2061 Index: exp-bbv/tests/x86-linux/ll.vgtest =================================================================== --- exp-bbv/tests/x86-linux/ll.vgtest (revision 0) +++ exp-bbv/tests/x86-linux/ll.vgtest (revision 0) @@ -0,0 +1,5 @@ +prog: ll +vgopts: --interval-size=1000 --vex-guest-chase-thresh=0 --bb-out-file=ll.out.bb +post: ../compare_bbv.sh ll +cleanup: rm ll.out.bb + Index: exp-bbv/tests/x86-linux/ll.S =================================================================== --- exp-bbv/tests/x86-linux/ll.S (revision 0) +++ exp-bbv/tests/x86-linux/ll.S (revision 0) @@ -0,0 +1,608 @@ +# +# linux_logo in i386 assembly language +# based on the code from ll_asm-0.36 +# +# By Vince Weaver +# +# Modified to remove non-deterministic system calls +# And to avoid reading from /proc +# + +.include "../logo.include" + +# offsets into the results returned by the uname syscall +.equ U_SYSNAME,0 +.equ U_NODENAME,65 +.equ U_RELEASE,65*2 +.equ U_VERSION,(65*3) +.equ U_MACHINE,(65*4) +.equ U_DOMAINNAME,65*5 + +# offset into the results returned by the sysinfo syscall +.equ S_TOTALRAM,16 + +# Sycscalls +.equ SYSCALL_EXIT, 1 +.equ SYSCALL_WRITE, 4 + +# +.equ STDIN,0 +.equ STDOUT,1 +.equ STDERR,2 + + .globl _start +_start: + #========================= + # PRINT LOGO + #========================= + +# LZSS decompression algorithm implementation +# by Stephan Walter 2002, based on LZSS.C by Haruhiko Okumura 1989 +# optimized some more by Vince Weaver + + # we used to fill the buffer with FREQUENT_CHAR + # but, that only gains us one byte of space in the lzss image. + # the lzss algorithm does automatic RLE... pretty clever + # so we compress with NUL as FREQUENT_CHAR and it is pre-done for us + + mov $(N-F), %bp # R + + mov $logo, %esi # %esi points to logo (for lodsb) + + mov $out_buffer, %edi # point to out_buffer + push %edi # save this value for later + +decompression_loop: + lodsb # load in a byte + + mov $0xff, %bh # re-load top as a hackish 8-bit counter + mov %al, %bl # move in the flags + +test_flags: + cmp $logo_end, %esi # have we reached the end? + je done_logo # if so, exit + + shr $1, %ebx # shift bottom bit into carry flag + jc discrete_char # if set, we jump to discrete char + +offset_length: + lodsw # get match_length and match_position + mov %eax,%edx # copy to edx + # no need to mask dx, as we do it + # by default in output_loop + + shr $(P_BITS),%eax + add $(THRESHOLD+1),%al + mov %al,%cl # cl = (ax >> P_BITS) + THRESHOLD + 1 + # (=match_length) + +output_loop: + and $POSITION_MASK,%dh # mask it + mov text_buf(%edx), %al # load byte from text_buf[] + inc %edx # advance pointer in text_buf +store_byte: + stosb # store it + + mov %al, text_buf(%ebp) # store also to text_buf[r] + inc %ebp # r++ + and $(N-1), %bp # mask r + + loop output_loop # repeat until k>j + + or %bh,%bh # if 0 we shifted through 8 and must + jnz test_flags # re-load flags + + jmp decompression_loop + +discrete_char: + lodsb # load a byte + inc %ecx # we set ecx to one so byte + # will be output once + # (how do we know ecx is zero?) + + jmp store_byte # and cleverly store it + + +# end of LZSS code + +done_logo: + + pop %ebp # get out_buffer and keep in bp + mov %ebp,%ecx # move out_buffer to ecx + + call write_stdout # print the logo + + # + # Setup + # +setup: + mov $strcat,%edx # use edx as call pointer + + + #========================== + # PRINT VERSION + #========================== + +# push $SYSCALL_UNAME # uname syscall +# pop %eax # in 3 bytes +# mov $uname_info,%ebx # uname struct +# int $0x80 # do syscall + + mov %ebp,%edi # point %edi to out_buffer + + mov $(uname_info+U_SYSNAME),%esi # os-name from uname "Linux" + call *%edx # call strcat + + mov $ver_string,%esi # source is " Version " + call *%edx # call strcat + push %esi # save our .txt pointer + + mov $(uname_info+U_RELEASE),%esi # version from uname "2.4.1" + call *%edx # call strcat + + pop %esi # restore .txt pointer + # source is ", Compiled " + call *%edx # call strcat + push %esi # store for later + + mov $(uname_info+U_VERSION),%esi # compiled date + call *%edx # call strcat + + mov %ebp,%ecx # move out_buffer to ecx + + mov $0xa,%ax # store linefeed on end + stosw # and zero + + call *%edx # call strcat + + call center_and_print # center and print + + #=============================== + # Middle-Line + #=============================== + + #========= + # Load /proc/cpuinfo into buffer + #========= + + push %edx # save call pointer + +# push $SYSCALL_OPEN # load 5 [ open() ] +# pop %eax # in 3 bytes + +# mov $cpuinfo,%ebx # '/proc/cpuinfo' +# xor %ecx,%ecx # 0 = O_RDONLY +# cdq # clear edx in clever way +# int $0x80 # syscall. fd in eax. + # we should check that eax>=0 + +# mov %eax,%ebx # save our fd + +# push $SYSCALL_READ # load 3 = read() +# pop %eax # in 3 bytes + + mov $disk_buffer,%ecx + +# mov $16,%dh # 4096 is maximum size of proc file #) + # we load sneakily by knowing + # 16<<8 = 4096. be sure edx clear + + +# int $0x80 + +# push $SYSCALL_CLOSE # close (to be correct) +# pop %eax +# int $0x80 + + #============= + # Number of CPUs + #============= +number_of_cpus: + + xor %ebx,%ebx # chip count + + # $disk_buffer still in ecx +bogo_loop: + mov (%ecx), %eax # load 4 bytes into eax + inc %ecx # increment pointer + + cmp $0,%al # check for end of file + je done_bogo + + cmp $('o'<<24+'g'<<16+'o'<<8+'b'),%eax + # "bogo" in little-endian + + jne bogo_loop # if not equal, keep going + + inc %ebx # otherwise, we have a bogo + inc %ebx # times two for future magic + jmp bogo_loop + +done_bogo: + lea one-6(%ebx,%ebx,2), %esi + # Load into esi + # [one]+(num_cpus*6) + # + # the above multiplies by three + # esi = (ebx+(ebx*2)) + # and we double-incremented ebx + # earlier + + mov %ebp,%edi # move output buffer to edi + + pop %edx # restore call pointer + call *%edx # copy it (call strcat) + + mov $' ',%al # print a space + stosb + + push %ebx # store cpu count + push %edx # store strcat pointer + + #========= + # MHz + #========= +print_mhz: + mov $('z'<<24+'H'<<16+'M'<<8+' '),%ebx + # find ' MHz' and grab up to . + # we are little endian + mov $'.',%ah + + # below is same as "sub $(strcat-find_string),%edx + # gas won't let us force the one-byte constant + .byte 0x83,0xEA,strcat-find_string + + call *%edx # call find string + + mov %ebx,%eax # clever way to get MHz in, sadly + ror $8,%eax # not any smaller than a mov + stosl + + #========= + # Chip Name + #========= +chip_name: + + # because of ugly newer cpuinfos from intel I had to hack this + # now we grab the first two words in the name field and use that + # it works on all recent Intel and AMD chips. Older things + # might choke + + mov $('e'<<24+'m'<<16+'a'<<8+'n'),%ebx + # find 'name\t: ' and grab up to \n + # we are little endian + mov $' ',%ah + call *%edx # print first word + stosb # store a space + call skip_spaces # print next word + + pop %edx + pop %ebx # restore chip count + pop %esi + + call *%edx # ' Processor' + cmpb $2,%bl + jne print_s + inc %esi # if singular, skip the s +print_s: + call *%edx # 's, ' + + push %esi # restore the values + push %edx + + #======== + # RAM + #======== + +# push $SYSCALL_SYSINFO # sysinfo() syscall +# pop %eax +# mov $sysinfo_buff,%ebx +# int $0x80 + + mov (sysinfo_buff+S_TOTALRAM),%eax # size in bytes of RAM + shr $20,%eax # divide by 1024*1024 to get M + adc $0, %eax # round + + + call num_to_ascii + + pop %edx # restore strcat pointer + + pop %esi # print 'M RAM, ' + call *%edx # call strcat + + push %esi + + + #======== + # Bogomips + #======== + + mov $('s'<<24+'p'<<16+'i'<<8+'m'),%ebx + # find 'mips\t: ' and grab up to \n + mov $0xa,%ah + call find_string + + pop %esi # bogo total follows RAM + + call *%edx # call strcat + + push %esi + + mov %ebp,%ecx # point ecx to out_buffer + + + call center_and_print # center and print + + #================================= + # Print Host Name + #================================= + + mov %ebp,%edi # point to output_buffer + + mov $(uname_info+U_NODENAME),%esi # host name from uname() + call *%edx # call strcat + + # ecx is unchanged + call center_and_print # center and print + + pop %ecx # (.txt) pointer to default_colors + + call write_stdout + + + #================================ + # Exit + #================================ +exit: + xor %ebx,%ebx + xor %eax,%eax + inc %eax # put exit syscall number (1) in eax + int $0x80 # and exit + + + #================================= + # FIND_STRING + #================================= + # ah is char to end at + # ebx is 4-char ascii string to look for + # edi points at output buffer + +find_string: + + mov $disk_buffer-1,%esi # look in cpuinfo buffer +find_loop: + inc %esi + cmpb $0, (%esi) # are we at EOF? + je done # if so, done + + cmp (%esi), %ebx # do the strings match? + jne find_loop # if not, loop + + # ! if we get this far, we matched + +find_colon: + lodsb # repeat till we find colon + cmp $0,%al # this is actually smaller code + je done # than an or ecx/repnz scasb + cmp $':',%al + jne find_colon + + +skip_spaces: + lodsb # skip spaces + cmp $0x20,%al # Loser new intel chips have lots?? + je skip_spaces + +store_loop: + cmp $0,%al + je done + cmp %ah,%al # is it end string? + je almost_done # if so, finish + cmp $'\n',%al # also end if linefeed + je almost_done + stosb # if not store and continue + lodsb # load value + jmp store_loop + +almost_done: + + movb $0, (%edi) # replace last value with NUL +done: + ret + + + #================================ + # strcat + #================================ + +strcat: + lodsb # load a byte from [ds:esi] + stosb # store a byte to [es:edi] + cmp $0,%al # is it zero? + jne strcat # if not loop + dec %edi # point to one less than null + ret # return + + #============================== + # center_and_print + #============================== + # string to center in ecx + +center_and_print: + push %edx + push %ecx # save the string pointer + inc %edi # move to a clear buffer + push %edi # save for later + + mov $('['<<8+27),%ax # we want to output ^[[ + stosw + + cdq # clear dx + +str_loop2: # find end of string + inc %edx + cmpb $0,(%ecx,%edx) # repeat till we find zero + jne str_loop2 + + push $81 # one added to cheat, we don't + # count the trailing '\n' + pop %eax + + cmp %eax,%edx # see if we are >=80 + jl not_too_big # if so, don't center + push $80 + pop %edx + +not_too_big: + sub %edx,%eax # subtract size from 80 + + shr %eax # then divide by 2 + + call num_to_ascii # print number of spaces + mov $'C',%al # tack a 'C' on the end + # ah is zero from num_to_ascii + stosw # store C and a NULL + pop %ecx # pop the pointer to ^[[xC + + call write_stdout # write to the screen + +done_center: + pop %ecx # restore string pointer + # and trickily print the real string + + pop %edx + + #================================ + # WRITE_STDOUT + #================================ + # ecx has string + # eax,ebx,ecx,edx trashed +write_stdout: + push %edx + push $SYSCALL_WRITE # put 4 in eax (write syscall) + pop %eax # in 3 bytes of code + + cdq # clear edx + + xor %ebx,%ebx # put 1 in ebx (stdout) + inc %ebx # in 3 bytes of code + + # another way of doing this: lea 1(%edx), %ebx + +str_loop1: + inc %edx + cmpb $0,(%ecx,%edx) # repeat till zero + jne str_loop1 + + int $0x80 # run the syscall + pop %edx + ret + + ############################## + # num_to_ascii + ############################## + # ax = value to print + # edi points to where we want it + +num_to_ascii: + push $10 + pop %ebx + xor %ecx,%ecx # clear ecx +div_by_10: + cdq # clear edx + div %ebx # divide + push %edx # save for later + inc %ecx # add to length counter + or %eax,%eax # was Q zero? + jnz div_by_10 # if not divide again + +write_out: + pop %eax # restore in reverse order + add $0x30, %al # convert to ASCII + stosb # save digit + loop write_out # loop till done + ret + +#=========================================================================== +# section .data +#=========================================================================== +.data + +ver_string: .ascii " Version \0" +compiled_string: .ascii ", Compiled \0" +processor: .ascii " Processor\0" +s_comma: .ascii "s, \0" +ram_comma: .ascii "M RAM, \0" +bogo_total: .ascii " Bogomips Total\n\0" + +default_colors: .ascii "\033[0m\n\n\0" + +cpuinfo: .ascii "/proc/cpuinfo\0" + + +one: .ascii "One\0\0\0" +two: .ascii "Two\0\0\0" +three: .ascii "Three\0" +four: .ascii "Four\0" + +.include "../logo.lzss_new" + +disk_buffer: +.ascii "processor : 0\n" +.ascii "vendor_id : AuthenticAMD\n" +.ascii "cpu family : 6\n" +.ascii "model : 6\n" +.ascii "model name : AMD Athlon(tm) XP 2000+\n" +.ascii "stepping : 2\n" +.ascii "cpu MHz : 1665.267\n" +.ascii "cache size : 256 KB\n" +.ascii "fdiv_bug : no\n" +.ascii "hlt_bug : no\n" +.ascii "f00f_bug : no\n" +.ascii "coma_bug : no\n" +.ascii "fpu : yes\n" +.ascii "fpu_exception : yes\n" +.ascii "cpuid level : 1\n" +.ascii "wp : yes\n" +.ascii "flags : fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 mmx fxsr sse syscall mmxext 3dnowext 3dnow up\n" +.ascii "bogomips : 3330.53\n" +.ascii "clflush size : 32\n" +.ascii "power management: ts\n\0" + +uname_info: +.ascii "Linux\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0" +.ascii "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0" + +.ascii "tobler\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0" +.ascii "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0" + +.ascii "2.6.29\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0" +.ascii "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0" + +.ascii "#1 SMP Mon May 4 09:51:54 EDT 2009\0\0" +.ascii "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0" + +.ascii "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0" +.ascii "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0" + +.ascii "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0" +.ascii "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0" + + +sysinfo_buff: +.long 0,0,0,0,512*1024*1024,0,0,0,0 +.long 0,0,0,0,0,0,0,0,0 + +#============================================================================ +# section .bss +#============================================================================ +.bss + +.lcomm text_buf, (N+F-1) +.lcomm out_buffer,16384 + + + + + Index: exp-bbv/tests/x86-linux/ll.stdout.exp =================================================================== --- exp-bbv/tests/x86-linux/ll.stdout.exp (revision 0) +++ exp-bbv/tests/x86-linux/ll.stdout.exp (revision 0) @@ -0,0 +1,17 @@ +############################################################################### +############################################################################### +##################################################################O#O########## +############################################################################### +############################################################################### +############################################################################### +############################################################################### +############################################################################### +############################################################################### +############################################################################### +############################################################################### +############################################################################### + +Linux Version 2.6.29, Compiled #1 SMP Mon May 4 09:51:54 EDT 2009 +One 1665MHz AMD Athlon(tm) Processor, 512M RAM, 3330.53 Bogomips Total +tobler + Index: exp-bbv/tests/x86-linux/ll.stderr.exp =================================================================== --- exp-bbv/tests/x86-linux/ll.stderr.exp (revision 0) +++ exp-bbv/tests/x86-linux/ll.stderr.exp (revision 0) @@ -0,0 +1,5 @@ +# Total intervals: 39 (Interval Size 1000) +# Total instructions: 39439 +# Total reps: 0 +# Unique reps: 0 +# Total fldcw instructions: 0 Index: exp-bbv/tests/x86-linux/ll.out.exp =================================================================== --- exp-bbv/tests/x86-linux/ll.out.exp (revision 0) +++ exp-bbv/tests/x86-linux/ll.out.exp (revision 0) @@ -0,0 +1,47 @@ +T:1:9 :7:10 :5:38 :2:44 :8:65 :9:663 :4:119 :6:2 :3:51 +T:7:5 :5:16 :2:18 :8:52 :9:858 :4:35 :6:1 :3:15 +T:7:5 :5:16 :2:18 :8:52 :9:858 :4:35 :6:1 :3:15 +T:7:5 :5:14 :2:16 :8:91 :9:863 :4:7 :6:1 :3:3 +T:7:5 :5:12 :2:14 :8:78 :9:880 :4:7 :6:1 :3:3 +T:7:5 :5:6 :2:8 :8:52 :9:928 :6:1 +T:7:5 :5:10 :2:11 :8:65 :9:908 :6:1 +T:7:5 :5:14 :2:17 :8:117 :9:846 :6:1 +T:5:8 :2:8 :8:52 :9:932 +T:7:5 :5:8 :2:10 :8:65 :9:911 :6:1 +T:5:8 :2:8 :8:52 :9:932 +T:7:5 :5:6 :2:8 :8:52 :9:928 :6:1 +T:5:6 :2:6 :8:39 :9:949 +T:7:5 :5:6 :2:8 :8:52 :9:928 :6:1 +T:5:4 :2:4 :8:26 :9:966 +T:7:5 :5:12 :2:14 :8:78 :9:880 :4:7 :6:1 :3:3 +T:5:6 :2:6 :8:39 :9:949 +T:7:5 :5:8 :2:10 :8:65 :9:911 :6:1 +T:7:5 :5:14 :2:16 :8:91 :9:863 :4:7 :6:1 :3:3 +T:5:8 :2:8 :8:52 :9:932 +T:7:5 :5:10 :2:12 :8:78 :9:894 :6:1 +T:7:5 :5:10 :2:12 :8:75 :9:897 :6:1 +T:5:12 :2:12 :8:81 :9:895 +T:7:5 :5:8 :2:8 :8:39 :9:389 :4:7 :6:1 :3:3 :10:3 :11:9 :12:528 +T:12:1000 +T:12:1000 +T:12:1000 +T:12:1000 +T:12:1000 +T:12:1000 +T:12:1000 +T:15:4 :18:2 :19:3 :20:2 :21:3 :22:4 :16:283 :17:10 :12:686 :13:1 :14:2 +T:23:1 :32:7 :34:352 :33:177 :16:1 :17:2 :24:10 :25:195 :26:4 :27:3 :30:4 :31:11 :11:9 :12:204 :13:2 :14:4 :28:9 :29:5 +T:34:667 :33:333 +T:34:665 :33:332 :35:3 +T:34:128 :33:64 :36:4 :37:8 :49:6 :38:8 :40:407 :39:274 :41:21 :42:14 :43:6 :44:10 :45:10 :46:8 :47:12 :48:2 :16:16 :17:2 +T:50:2 :51:4 :52:2 :53:2 :54:6 :56:3 :57:4 :38:4 :40:405 :39:272 :41:18 :42:12 :43:9 :44:30 :45:30 :46:26 :47:39 :48:4 :16:88 :17:6 :28:9 :55:12 :29:13 +T:40:600 :39:400 +T:58:2 :59:3 :40:352 :39:236 :41:18 :42:12 :43:6 :44:16 :45:16 :46:14 :47:21 :48:2 :16:68 :17:2 :24:10 :25:210 :26:4 :27:3 :28:5 + + +# Total intervals: 39 (Interval Size 1000) +# Total instructions: 39439 +# Total reps: 0 +# Unique reps: 0 +# Total fldcw instructions: 0 + Index: exp-bbv/tests/x86-linux/million.vgtest =================================================================== --- exp-bbv/tests/x86-linux/million.vgtest (revision 0) +++ exp-bbv/tests/x86-linux/million.vgtest (revision 0) @@ -0,0 +1,5 @@ +prog: million +vgopts: --interval-size=100000 --vex-guest-chase-thresh=0 --bb-out-file=million.out.bb +post: ../compare_bbv.sh million +cleanup: rm million.out.bb + Index: exp-bbv/tests/x86-linux/million.S =================================================================== --- exp-bbv/tests/x86-linux/million.S (revision 0) +++ exp-bbv/tests/x86-linux/million.S (revision 0) @@ -0,0 +1,21 @@ + + # count for 1 million instructions + # total is 1 + 1 + 499997*2 + 4 + + .globl _start +_start: + xor %ecx,%ecx # not needed, pads total to 1M + mov $499997,%ecx # load counter +test_loop: + dec %ecx # repeat count times + jnz test_loop + + #================================ + # Exit + #================================ +exit: + xor %ebx,%ebx # we return 0 + xor %eax,%eax + inc %eax # put exit syscall number (1) in eax + int $0x80 # and exit + Index: exp-bbv/tests/x86-linux/million.stderr.exp =================================================================== --- exp-bbv/tests/x86-linux/million.stderr.exp (revision 0) +++ exp-bbv/tests/x86-linux/million.stderr.exp (revision 0) @@ -0,0 +1,5 @@ +# Total intervals: 10 (Interval Size 100000) +# Total instructions: 1000000 +# Total reps: 0 +# Unique reps: 0 +# Total fldcw instructions: 0 Index: exp-bbv/tests/x86-linux/million.out.exp =================================================================== --- exp-bbv/tests/x86-linux/million.out.exp (revision 0) +++ exp-bbv/tests/x86-linux/million.out.exp (revision 0) @@ -0,0 +1,17 @@ +T:1:4 :2:99997 +T:2:100000 +T:2:100000 +T:2:100000 +T:2:100000 +T:2:100000 +T:2:100000 +T:2:100000 +T:2:100000 + + +# Total intervals: 10 (Interval Size 100000) +# Total instructions: 1000000 +# Total reps: 0 +# Unique reps: 0 +# Total fldcw instructions: 0 + Index: exp-bbv/tests/x86-linux/rep_prefix.vgtest =================================================================== --- exp-bbv/tests/x86-linux/rep_prefix.vgtest (revision 0) +++ exp-bbv/tests/x86-linux/rep_prefix.vgtest (revision 0) @@ -0,0 +1,4 @@ +prog: rep_prefix +vgopts: --interval-size=100000 --vex-guest-chase-thresh=0 --bb-out-file=rep_prefix.out.bb +cleanup: rm rep_prefix.out.bb + Index: exp-bbv/tests/x86-linux/rep_prefix.S =================================================================== --- exp-bbv/tests/x86-linux/rep_prefix.S (revision 0) +++ exp-bbv/tests/x86-linux/rep_prefix.S (revision 0) @@ -0,0 +1,270 @@ +# +# rep, repe (repz) and repne (repnz) prefixed string instructions +# only count as one instruction, even though they repeat many times +# This test makes sure the bbv plugin counts these instructions properly +# The answer is validated to hw perf counters. +# + + .globl _start +_start: + cld # we want these to happen forward + + #=================================== + # Check varied order of the size prefix + # with the rep prefix. Older binutils + # did this one way, newer binutils the other + #=================================== + +size_prefix: + # test 16-bit load + + mov $8192, %ecx + mov $buffer1, %esi # set source + .byte 0x66, 0xf3, 0xad # lodsw + + mov $8192, %ecx + mov $buffer1, %esi # set source + .byte 0xf3, 0x66, 0xad # lodsw + + + + + #=================================== + # Load and Store Instructions + #=================================== +loadstore: + xor %eax, %eax + mov $0xd, %al # set eax to d + + # test 8-bit store + + mov $16384, %ecx + mov $buffer1, %edi # set destination + rep stosb # store d 16384 times, auto-increment + + # test 8-bit load + + mov $16384, %ecx + mov $buffer1, %esi # set source + rep lodsb # load byte 16384 times, auto-increment + + cmp $0xd,%al # if we loaded wrong value + jne print_error # print an error + + # test 16-bit store + + mov $0x020d,%ax # store 0x020d + + mov $8192, %ecx + mov $buffer1, %edi # set destination + rep stosw # store 8192 times, auto-increment + + # test 16-bit load + + mov $8192, %ecx + mov $buffer1, %esi # set source + rep lodsw # load 8192 times, auto-increment + + cmp $0x020d,%ax # if we loaded wrong value + jne print_error # print an error + + # test 32-bit store + + mov $0x0feb1378,%eax # store 0x0feb1378 + + mov $4096, %ecx + mov $buffer1, %edi # set destination + rep stosl # store 4096 times, auto-increment + + # test 32-bit load + + mov $4096, %ecx + mov $buffer1, %esi # set source + rep lodsl # load 4096 times, auto-increment + + cmp $0x0feb1378,%eax # if we loaded wrong value + jne print_error # print an error + + #============================= + # Move instructions + #============================= +moves: + # test 8-bit move + + mov $16384, %ecx + mov $buffer1, %esi + mov $buffer2, %edi + rep movsb + + # test 16-bit move + + mov $8192, %ecx + mov $buffer2, %esi + mov $buffer1, %edi + rep movsw + + # test 32-bit move + + mov $4096, %ecx + mov $buffer1, %esi + mov $buffer2, %edi + rep movsl + + #================================== + # Compare equal instructions + #================================== +compare_equal: + # first set up the areas to compare + + mov $0xa5a5a5a5,%eax + mov $buffer1, %edi + mov $4096, %ecx + rep stosl + + mov $0xa5a5a5a5,%eax + mov $buffer2, %edi + mov $4096, %ecx + rep stosl + + # test 8-bit + + mov $buffer1,%esi + mov $buffer2,%edi + mov $16384, %ecx + repe cmpsb + jnz print_error + + # test 16-bit + + mov $buffer1,%esi + mov $buffer2,%edi + mov $8192, %ecx + repe cmpsw + jnz print_error + + # test 32-bit + + mov $buffer1,%esi + mov $buffer2,%edi + mov $4096, %ecx + repe cmpsl + jnz print_error + + #================================== + # Compare not equal instructions + #================================== +compare_noteq: + # change second buffer + + mov $0x5a5a5a5a,%eax + mov $buffer2, %edi + mov $4096, %ecx + rep stosl + + # test 8-bit + + mov $buffer1,%esi + mov $buffer2,%edi + mov $16384, %ecx + repne cmpsb + je print_error + + # test 16-bit + + mov $buffer1,%esi + mov $buffer2,%edi + mov $8192, %ecx + repne cmpsw + je print_error + + # test 32-bit + + mov $buffer1,%esi + mov $buffer2,%edi + mov $4096, %ecx + repne cmpsl + je print_error + + #==================================== + # Check scan equal instruction + #==================================== + + # test 8-bit + + mov $0xa5,%al + mov $buffer1,%edi + mov $16384, %ecx + repe scasb + jnz print_error + + # test 16-bit + + mov $0xa5a5,%ax + mov $buffer1,%edi + mov $8192, %ecx + repe scasw + jnz print_error + + # test 32-bit + + mov $0xa5a5a5a5,%eax + mov $buffer1,%edi + mov $4096, %ecx + repe scasl + jnz print_error + + #==================================== + # Check scan not-equal instruction + #==================================== + + # test 8-bit + + mov $0xa5,%al + mov $buffer2,%edi + mov $16384, %ecx + repne scasb + jz print_error + + # test 16-bit + + mov $0xa5a5,%ax + mov $buffer2,%edi + mov $8192, %ecx + repne scasw + jz print_error + + # test 32-bit + + mov $0xa5a5a5a5,%eax + mov $buffer2,%edi + mov $4096, %ecx + repne scasl + jz print_error + + jmp exit # no error, skip to exit + +print_error: + + mov $4, %eax # Write syscall + mov $1, %ebx # print to stdout + mov $error_string, %ecx # string to print + mov $16, %edx # strlen + int $0x80 # call syscall + + #================================ + # Exit + #================================ +exit: + xor %ebx,%ebx # we return 0 + xor %eax,%eax + inc %eax # put exit syscall number (1) in eax + int $0x80 # and exit + + +.data +error_string: .asciz "Error detected!\n" + +.bss + +.lcomm buffer1, 16384 +.lcomm buffer2, 16384 Index: exp-bbv/tests/x86-linux/rep_prefix.stderr.exp =================================================================== --- exp-bbv/tests/x86-linux/rep_prefix.stderr.exp (revision 0) +++ exp-bbv/tests/x86-linux/rep_prefix.stderr.exp (revision 0) @@ -0,0 +1,5 @@ +# Total intervals: 0 (Interval Size 100000) +# Total instructions: 124 +# Total reps: 229402 +# Unique reps: 26 +# Total fldcw instructions: 0 Index: exp-bbv/tests/amd64-linux/Makefile.am =================================================================== --- exp-bbv/tests/amd64-linux/Makefile.am (revision 0) +++ exp-bbv/tests/amd64-linux/Makefile.am (revision 0) @@ -0,0 +1,28 @@ +include $(top_srcdir)/Makefile.tool-tests.am + +noinst_SCRIPTS = filter_stderr + +check_PROGRAMS = \ + million rep_prefix ll fldcw_check + +EXTRA_DIST = $(noinst_SCRIPTS) \ + fldcw_check.stderr.exp \ + fldcw_check.vgtest \ + ll.stderr.exp \ + ll.stdout.exp \ + ll.out.exp \ + ll.vgtest \ + million.stderr.exp \ + million.out.exp \ + million.vgtest \ + rep_prefix.stderr.exp \ + rep_prefix.vgtest + +AM_CCASFLAGS += -ffreestanding + +LDFLAGS += -nostartfiles -nodefaultlibs + +million_SOURCES = million.S +rep_prefix_SOURCES = rep_prefix.S +ll_SOURCES = ll.S +fldcw_check_SOURCES = fldcw_check.S Index: exp-bbv/tests/amd64-linux/filter_stderr =================================================================== --- exp-bbv/tests/amd64-linux/filter_stderr (revision 0) +++ exp-bbv/tests/amd64-linux/filter_stderr (revision 0) @@ -0,0 +1,4 @@ +#! /bin/sh + +../filter_stderr + Property changes on: exp-bbv/tests/amd64-linux/filter_stderr ___________________________________________________________________ Added: svn:executable + * Index: exp-bbv/tests/amd64-linux/fldcw_check.vgtest =================================================================== --- exp-bbv/tests/amd64-linux/fldcw_check.vgtest (revision 0) +++ exp-bbv/tests/amd64-linux/fldcw_check.vgtest (revision 0) @@ -0,0 +1,4 @@ +prog: fldcw_check +vgopts: --interval-size=10000 --vex-guest-chase-thresh=0 --bb-out-file=fldcw_check.out.bb +cleanup: rm fldcw_check.out.bb + Index: exp-bbv/tests/amd64-linux/fldcw_check.stderr.exp =================================================================== --- exp-bbv/tests/amd64-linux/fldcw_check.stderr.exp (revision 0) +++ exp-bbv/tests/amd64-linux/fldcw_check.stderr.exp (revision 0) @@ -0,0 +1,5 @@ +# Total intervals: 0 (Interval Size 10000) +# Total instructions: 9270 +# Total reps: 0 +# Unique reps: 0 +# Total fldcw instructions: 2053 Index: exp-bbv/tests/amd64-linux/fldcw_check.S =================================================================== --- exp-bbv/tests/amd64-linux/fldcw_check.S (revision 0) +++ exp-bbv/tests/amd64-linux/fldcw_check.S (revision 0) @@ -0,0 +1,129 @@ + +.globl _start + +_start: + # This code tests for the fldcw "load floating point command word" + # instruction. On most x86 processors the retired_instruction + # performance counter counts this as one instruction. However, + # on Pentium 4 systems it counts as two. Therefore this can + # affect BBV results on such a system. + # fldcw is most often used to set the rouding mode when doing + # floating point to integer conversions + + # It is encoded as "d9 /5" which means + # 1101 1001 xx10 1yyy + # Where xx is the "mod" which will be 00, 01, or 10 indicating offset + # and yyy is the register field + + # these are instructions with similar encodings to fldcw + # that can cause false positives if the test isn't explicit enough +similar: + fld1 # d9 e8 + fldl2t # d9 e9 + fldl2e # d9 ea + fldpi # d9 eb + fldlg2 # d9 ec + fldln2 # d9 ed + fldz # d9 ee + + # check some varied ways of calling fldcw + + # offset on stack +stack: + sub $8,%rsp # allocate space on stack + fnstcw 2(%rsp) + fldcw 2(%rsp) + add $8,%rsp # restore stack + + # 64-bit register +sixtyfour_reg: + fnstcw cw + mov $cw,%rax + fldcw 0(%rax) # rax + mov $cw,%rbx + fldcw 0(%rbx) # rbx + mov $cw,%rcx + fldcw 0(%rcx) # rcx + mov $cw,%rdx + fldcw 0(%rdx) # rdx + + # 32-bit register +thirtytwo_reg: + fnstcw cw + mov $cw,%eax + fldcw 0(%eax) # eax + mov $cw,%ebx + fldcw 0(%ebx) # ebx + mov $cw,%ecx + fldcw 0(%ecx) # ecx + mov $cw,%edx + fldcw 0(%edx) # edx + + # register + 8-bit offset +eight_bit: + mov $cw,%eax + sub $32,%eax + + fldcw 32(%eax) # eax + 8 bit offset + mov %eax,%ebx + fldcw 32(%ebx) # ebx + 8 bit offset + mov %eax,%ecx + fldcw 32(%ecx) # ecx + 8 bit offset + mov %eax,%edx + fldcw 32(%edx) # edx + 8 bit offset + + # register + 32-bit offset +thirtytwo_bit: + mov $cw,%eax + sub $30000,%eax + + fldcw 30000(%eax) # eax + 16 bit offset + mov %eax,%ebx + fldcw 30000(%ebx) # ebx + 16 bit offset + mov %eax,%ecx + fldcw 30000(%ecx) # ecx + 16 bit offset + mov %eax,%edx + fldcw 30000(%edx) # edx + 16 bit offset + + # check an fp/integer conversion + # in a loop to give a bigger count + + mov $1024,%rcx +big_loop: + + fldl three # load value onto fp stack + fnstcw saved_cw # store control word to mem + movzwl saved_cw, %eax # load cw from mem, zero extending + movb $12, %ah # set cw for "round to zero" + movw %rax, cw # store back to memory + fldcw cw # save new rounding mode + fistpl result # save stack value as integer to mem + fldcw saved_cw # restore old cw + + loop big_loop # loop to make the count more obvious + + movl result, %ebx # sanity check to see if the + cmp $3,%rbx # result is the expected one + je exit + +print_error: + mov $1,%rax # write syscall + mov $1,%rdi # stdout + mov $error,%rsi # string + mov $22,%rdx # length of string + syscall + +exit: + xor %rdi, %rdi # return 0 + mov $60, %rax # SYSCALL_EXIT + syscall + + + +.data +saved_cw: .long 0 +cw: .long 0 +result: .long 0 +three: .long 0 # a floating point 3.0 + .long 1074266112 +error: .asciz "Error! Wrong result!\n" Index: exp-bbv/tests/amd64-linux/ll.vgtest =================================================================== --- exp-bbv/tests/amd64-linux/ll.vgtest (revision 0) +++ exp-bbv/tests/amd64-linux/ll.vgtest (revision 0) @@ -0,0 +1,5 @@ +prog: ll +vgopts: --interval-size=1000 --vex-guest-chase-thresh=0 --bb-out-file=ll.out.bb +post: ../compare_bbv.sh ll +cleanup: rm ll.out.bb + Index: exp-bbv/tests/amd64-linux/ll.S =================================================================== --- exp-bbv/tests/amd64-linux/ll.S (revision 0) +++ exp-bbv/tests/amd64-linux/ll.S (revision 0) @@ -0,0 +1,631 @@ +# +# linux_logo in x86_64 assembly language +# based on the code from ll_asm-0.36 +# +# By Vince Weaver +# +# Modified to remove non-deterministic system calls +# And to avoid reading from /proc +# + + +.include "../logo.include" + +# offsets into the results returned by the uname syscall +.equ U_SYSNAME,0 +.equ U_NODENAME,65 +.equ U_RELEASE,65*2 +.equ U_VERSION,(65*3) +.equ U_MACHINE,(65*4) +.equ U_DOMAINNAME,65*5 + +# offset into the results returned by the sysinfo syscall +.equ S_TOTALRAM,32 + +# Sycscalls +.equ SYSCALL_EXIT, 60 +.equ SYSCALL_READ, 0 +.equ SYSCALL_WRITE, 1 +.equ SYSCALL_OPEN, 2 +.equ SYSCALL_CLOSE, 3 +.equ SYSCALL_SYSINFO, 99 +.equ SYSCALL_UNAME, 63 + +# +.equ STDIN,0 +.equ STDOUT,1 +.equ STDERR,2 + + .globl _start +_start: + #========================= + # PRINT LOGO + #========================= + +# LZSS decompression algorithm implementation +# by Stephan Walter 2002, based on LZSS.C by Haruhiko Okumura 1989 +# optimized some more by Vince Weaver + + # we used to fill the buffer with FREQUENT_CHAR + # but, that only gains us one byte of space in the lzss image. + # the lzss algorithm does automatic RLE... pretty clever + # so we compress with NUL as FREQUENT_CHAR and it is pre-done for us + + mov $(N-F), %ebp # R + + mov $logo, %esi # %esi points to logo (for lodsb) + + mov $out_buffer, %edi # point to out_buffer + push %rdi # save this value for later + + xor %ecx, %ecx + +decompression_loop: + lodsb # load in a byte + + mov $0xff, %bh # re-load top as a hackish 8-bit counter + mov %al, %bl # move in the flags + +test_flags: + cmp $logo_end, %esi # have we reached the end? + je done_logo # ! if so, exit + + shr $1, %ebx # shift bottom bit into carry flag + jc discrete_char # ! if set, we jump to discrete char + +offset_length: + lodsw # get match_length and match_position + mov %eax,%edx # copy to edx + # no need to mask dx, as we do it + # by default in output_loop + + shr $(P_BITS),%eax + add $(THRESHOLD+1),%al + mov %al,%cl # cl = (ax >> P_BITS) + THRESHOLD + 1 + # (=match_length) + +output_loop: + and $POSITION_MASK,%dh # mask it + mov text_buf(%rdx), %al # load byte from text_buf[] + inc %edx # advance pointer in text_buf +store_byte: + stosb # store it + + mov %al, text_buf(%rbp) # store also to text_buf[r] + inc %ebp # r++ + and $(N-1), %bp # mask r + + loop output_loop # repeat until k>j + + or %bh,%bh # ! if 0 we shifted through 8 and must + jnz test_flags # re-load flags + + jmp decompression_loop + +discrete_char: + lodsb # load a byte + inc %ecx # we set ecx to one so byte + # will be output once + # (how do we know ecx is zero?) + + jmp store_byte # and cleverly store it + + +# end of LZSS code + +done_logo: + + pop %rbp # get out_buffer and keep in bp + mov %ebp,%ecx # move out_buffer to ecx + + call write_stdout # print the logo + + # + # Setup + # +setup: + mov $strcat,%edx # use rdx as call pointer (smaller op) + + + #========================== + # PRINT VERSION + #========================== + +# push $SYSCALL_UNAME # uname syscall +# pop %rax # in 3 bytes + mov $uname_info,%edi # uname struct (0 extend address) +# syscall # do syscall + + mov %ebp,%edi # point %edi to out_buffer + + mov $(uname_info+U_SYSNAME),%esi # os-name from uname "Linux" + call *%rdx # call strcat + + mov $ver_string,%esi # source is " Version " + call *%rdx # call strcat + push %rsi # save our .txt pointer + + mov $(uname_info+U_RELEASE),%esi # version from uname "2.4.1" + call *%rdx # call strcat + + pop %rsi # restore .txt pointer + # source is ", Compiled " + call *%rdx # call strcat + push %rsi # store for later + + mov $(uname_info+U_VERSION),%esi # compiled date + call *%rdx # call strcat + + mov %ebp,%ecx # move out_buffer to ecx + + mov $0xa,%ax # store linefeed on end + stosw # and zero + + call *%rdx # call strcat + + call center_and_print # center and print + + #=============================== + # Middle-Line + #=============================== +middle_line: + #========= + # Load /proc/cpuinfo into buffer + #========= + + push %rdx # save call pointer + +# push $SYSCALL_OPEN # load 5 [ open() ] +# pop %rax # in 3 bytes + +# mov $cpuinfo,%edi # '/proc/cpuinfo' +# xor %esi,%esi # 0 = O_RDONLY +# cdq # clear edx in clever way +# syscall # syscall. fd in eax. + # we should check that eax>=0 + +# mov %eax,%edi # save our fd + +# xor %eax,%eax # SYSCALL_READ make== 0 + + mov $disk_buffer,%esi + +# mov $16,%dh # 4096 is maximum size of proc file #) + # we load sneakily by knowing + # 16<<8 = 4096. be sure edx clear + +# syscall + +# push $SYSCALL_CLOSE # close (to be correct) +# pop %rax +# syscall + + #============= + # Number of CPUs + #============= +number_of_cpus: + + xor %ebx,%ebx # chip count + + # $disk_buffer still in %rsi +bogo_loop: + mov (%rsi), %eax # load 4 bytes into eax + inc %esi # increment pointer + + cmp $0,%al # check for end of file + je done_bogo + + cmp $('o'<<24+'g'<<16+'o'<<8+'b'),%eax + # "bogo" in little-endian + + jne bogo_loop # ! if not equal, keep going + add $2,%ebx # otherwise, we have a bogo + # 2 times too for future magic + jmp bogo_loop + +done_bogo: + lea one-6(%rbx,%rbx,2), %esi + # Load into esi + # [one]+(num_cpus*6) + # + # the above multiplies by three + # esi = (ebx+(ebx*2)) + # and we double-incremented ebx + # earlier + + mov %ebp,%edi # move output buffer to edi + + pop %rdx # restore call pointer + call *%rdx # copy it (call strcat) + + mov $' ',%al # print a space + stosb + + push %rbx + push %rdx # store strcat pointer + + #========= + # MHz + #========= +print_mhz: + mov $('z'<<24+'H'<<16+'M'<<8+' '),%ebx + # find ' MHz' and grab up to . + # we are little endian + mov $'.',%ah + + # below is same as "sub $(strcat-find_string),%edx + # gas won't let us force the one-byte constant + .byte 0x83,0xEA,strcat-find_string + + call *%rdx # call find string + + mov %ebx,%eax # clever way to get MHz in, sadly + ror $8,%eax # not any smaller than a mov + stosl + + #========= + # Chip Name + #========= +chip_name: + mov $('e'<<24+'m'<<16+'a'<<8+'n'),%ebx + # find 'name\t: ' and grab up to \n + # we are little endian + mov $' ',%ah + call *%rdx # call find_string + stosb + call skip_spaces + + pop %rdx + pop %rbx # restore chip count + pop %rsi + + call *%rdx # ' Processor' + cmpb $2,%bl + jne print_s + inc %rsi # ! if singular, skip the s +print_s: + call *%rdx # 's, ' + + push %rsi # restore the values + push %rdx + + #======== + # RAM + #======== + +# push %rdi +# push $SYSCALL_SYSINFO # sysinfo() syscall +# pop %rax +# mov $sysinfo_buff,%edi +# syscall +# pop %rdi + + # The following has to be a 64 bit load, to support + # Ram > 4GB + mov (sysinfo_buff+S_TOTALRAM),%rax # size in bytes of RAM + shr $20,%rax # divide by 1024*1024 to get M + adc $0, %eax # round + + call num_to_ascii + + pop %rdx # restore strcat pointer + + pop %rsi # print 'M RAM, ' + call *%rdx # call strcat + + push %rsi + + #======== + # Bogomips + #======== + + mov $('s'<<24+'p'<<16+'i'<<8+'m'),%ebx + # find 'mips\t: ' and grab up to \n + mov $0xa,%ah + call find_string + + pop %rsi # bogo total follows RAM + + call *%rdx # call strcat + + push %rsi + + mov %ebp,%ecx # point ecx to out_buffer + + push %rcx + call center_and_print # center and print + + #================================= + # Print Host Name + #================================= +last_line: + mov %ebp,%edi # point to output_buffer + + mov $(uname_info+U_NODENAME),%esi # host name from uname() + call *%rdx # call strcat + + pop %rcx # ecx is unchanged + call center_and_print # center and print + + pop %rcx # (.txt) pointer to default_colors + + call write_stdout + + #================================ + # Exit + #================================ +exit: + push $SYSCALL_EXIT # Put exit syscall in rax + pop %rax + + xor %edi,%edi # Make return value $0 + syscall + + + #================================= + # FIND_STRING + #================================= + # ah is char to end at + # ebx is 4-char ascii string to look for + # edi points at output buffer + +find_string: + + mov $disk_buffer-1,%esi # look in cpuinfo buffer +find_loop: + inc %esi + cmpb $0, (%rsi) # are we at EOF? + je done # ! if so, done + + cmp (%rsi), %ebx # do the strings match? + jne find_loop # ! if not, loop + + # ! if we get this far, we matched + +find_colon: + lodsb # repeat till we find colon + cmp $0,%al + je done + cmp $':',%al + jne find_colon + +skip_spaces: + lodsb # skip spaces + cmp $0x20,%al # Loser new intel chips have lots?? + je skip_spaces + +store_loop: + cmp $0,%al + je done + cmp %ah,%al # is it end string? + je almost_done # ! if so, finish + cmp $'\n',%al + je almost_done + stosb # ! if not store and continue + lodsb + + jmp store_loop + +almost_done: + movb $0, (%rdi) # replace last value with NUL +done: + ret + + + #================================ + # strcat + #================================ + +strcat: + lodsb # load a byte from [ds:esi] + stosb # store a byte to [es:edi] + cmp $0,%al # is it zero? + jne strcat # ! if not loop + dec %edi # point to one less than null + ret # return + + #============================== + # center_and_print + #============================== + # string to center in ecx + +center_and_print: + push %rdx # save strcat pointer + push %rcx # save the string pointer + inc %edi # move to a clear buffer + push %rdi # save for later + + mov $('['<<8+27),%ax # we want to output ^[[ + stosw + + cdq # clear dx + +str_loop2: # find end of string + inc %edx + cmpb $0,(%rcx,%rdx) # repeat till we find zero + jne str_loop2 + + push $81 # one added to cheat, we don't + # count the trailing '\n' + pop %rax + + cmp %eax,%edx # see if we are >=80 + jl not_too_big # ! if so, don't center + push $80 + pop %rdx + +not_too_big: + sub %edx,%eax # subtract size from 80 + + shr %eax # then divide by 2 + + call num_to_ascii # print number of spaces + mov $'C',%al # tack a 'C' on the end + # ah is zero from num_to_ascii + stosw # store C and a NULL + pop %rcx # pop the pointer to ^[[xC + + call write_stdout # write to the screen + +done_center: + pop %rcx # restore string pointer + # and trickily print the real string + + pop %rdx # restore strcat pointer + + #================================ + # WRITE_STDOUT + #================================ + # ecx has string + # eax,ebx,ecx,edx trashed +write_stdout: + push %rdx + push $SYSCALL_WRITE # put 4 in eax (write syscall) + pop %rax # in 3 bytes of code + + cdq # clear edx + + lea 1(%rdx),%edi # put 1 in ebx (stdout) + # in 3 bytes of code + + mov %ecx,%esi + +str_loop1: + inc %edx + cmpb $0,(%rcx,%rdx) # repeat till zero + jne str_loop1 + + syscall # run the syscall + pop %rdx + ret + + ############################## + # num_to_ascii + ############################## + # ax = value to print + # edi points to where we want it + +num_to_ascii: + push $10 + pop %rbx + xor %ecx,%ecx # clear ecx +div_by_10: + cdq # clear edx + div %ebx # divide + push %rdx # save for later + inc %ecx # add to length counter + or %eax,%eax # was Q zero? + jnz div_by_10 # ! if not divide again + +write_out: + pop %rax # restore in reverse order + add $0x30, %al # convert to ASCII + stosb # save digit + loop write_out # loop till done + ret + +#=========================================================================== +# section .data +#=========================================================================== +.data + +ver_string: .ascii " Version \0" +compiled_string: .ascii ", Compiled \0" +processor: .ascii " Processor\0" +s_comma: .ascii "s, \0" +ram_comma: .ascii "M RAM, \0" +bogo_total: .ascii " Bogomips Total\n\0" + +default_colors: .ascii "\033[0m\n\n\0" + +cpuinfo: .ascii "/proc/cpuinfo\0" + + +one: .ascii "One\0\0\0" +two: .ascii "Two\0\0\0" +three: .ascii "Three\0" +four: .ascii "Four\0" + +.include "../logo.lzss_new" + +disk_buffer: +.ascii "processor : 0\n" +.ascii "vendor_id : GenuineIntel\n" +.ascii "cpu family : 15\n" +.ascii "model : 6\n" +.ascii "model name : Intel(R) Xeon(TM) CPU 3.46GHz\n" +.ascii "stepping : 4\n" +.ascii "cpu MHz : 3200.000\n" +.ascii "cache size : 2048 KB\n" +.ascii "physical id : 0\n" +.ascii "siblings : 2\n" +.ascii "core id : 0\n" +.ascii "cpu cores : 2\n" +.ascii "apicid : 0\n" +.ascii "initial apicid : 0\n" +.ascii "fpu : yes\n" +.ascii "fpu_exception : yes\n" +.ascii "cpuid level : 6\n" +.ascii "wp : yes\n" +.ascii "flags : fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush dts acpi mmx fxsr sse sse2 ss ht tm pbe syscall nx lm constant_tsc pebs bts pni dtes64 monitor ds_cpl vmx est cid cx16 xtpr pdcm lahf_lm tpr_shadow\n" +.ascii "bogomips : 6934.38\n" +.ascii "clflush size : 64\n" +.ascii "cache_alignment : 128\n" +.ascii "address sizes : 36 bits physical, 48 bits virtual\n" +.ascii "power management:\n" +.ascii "\n" +.ascii "processor : 1\n" +.ascii "vendor_id : GenuineIntel\n" +.ascii "cpu family : 15\n" +.ascii "model : 6\n" +.ascii "model name : Intel(R) Xeon(TM) CPU 3.46GHz\n" +.ascii "stepping : 4\n" +.ascii "cpu MHz : 3200.000\n" +.ascii "cache size : 2048 KB\n" +.ascii "physical id : 1\n" +.ascii "siblings : 2\n" +.ascii "core id : 0\n" +.ascii "cpu cores : 2\n" +.ascii "apicid : 4\n" +.ascii "initial apicid : 4\n" +.ascii "fpu : yes\n" +.ascii "fpu_exception : yes\n" +.ascii "cpuid level : 6\n" +.ascii "wp : yes\n" +.ascii "flags : fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush dts acpi mmx fxsr sse sse2 ss ht tm pbe syscall nx lm constant_tsc pebs bts pni dtes64 monitor ds_cpl vmx est cid cx16 xtpr pdcm lahf_lm tpr_shadow\n" +.ascii "bogomips : 6934.13\n" +.ascii "clflush size : 64\n" +.ascii "cache_alignment : 128\n" +.ascii "address sizes : 36 bits physical, 48 bits virtual\n" +.ascii "power management:\n\0" + +uname_info: +.ascii "Linux\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0" +.ascii "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0" + +.ascii "domori\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0" +.ascii "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0" + +.ascii "2.6.29\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0" +.ascii "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0" + +.ascii "#1 SMP Mon May 4 09:51:54 EDT 2009\0\0" +.ascii "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0" + +.ascii "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0" +.ascii "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0" + +.ascii "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0" +.ascii "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0" + +sysinfo_buff: +.long 0,0,0,0,0,0,0,0,2048*1024*1024,0,0,0,0,0,0,0 + + +#============================================================================ +# section .bss +#============================================================================ +.bss + +.lcomm text_buf, (N+F-1) +.lcomm out_buffer,16384 Index: exp-bbv/tests/amd64-linux/ll.stdout.exp =================================================================== --- exp-bbv/tests/amd64-linux/ll.stdout.exp (revision 0) +++ exp-bbv/tests/amd64-linux/ll.stdout.exp (revision 0) @@ -0,0 +1,17 @@ +############################################################################### +############################################################################### +##################################################################O#O########## +############################################################################### +############################################################################### +############################################################################### +############################################################################### +############################################################################### +############################################################################### +############################################################################### +############################################################################### +############################################################################### + +Linux Version 2.6.29, Compiled #1 SMP Mon May 4 09:51:54 EDT 2009 +Two 3200MHz Intel(R) Xeon(TM) Processors, 2048M RAM, 6934.38 Bogomips Total +domori + Index: exp-bbv/tests/amd64-linux/ll.stderr.exp =================================================================== --- exp-bbv/tests/amd64-linux/ll.stderr.exp (revision 0) +++ exp-bbv/tests/amd64-linux/ll.stderr.exp (revision 0) @@ -0,0 +1,5 @@ +# Total intervals: 45 (Interval Size 1000) +# Total instructions: 45639 +# Total reps: 0 +# Unique reps: 0 +# Total fldcw instructions: 0 Index: exp-bbv/tests/amd64-linux/ll.out.exp =================================================================== --- exp-bbv/tests/amd64-linux/ll.out.exp (revision 0) +++ exp-bbv/tests/amd64-linux/ll.out.exp (revision 0) @@ -0,0 +1,53 @@ +T:1:10 :7:10 :5:38 :2:44 :8:65 :9:662 :4:119 :6:2 :3:51 +T:7:5 :5:16 :2:18 :8:52 :9:858 :4:35 :6:1 :3:15 +T:7:5 :5:16 :2:18 :8:52 :9:858 :4:35 :6:1 :3:15 +T:7:5 :5:14 :2:16 :8:91 :9:863 :4:7 :6:1 :3:3 +T:7:5 :5:12 :2:14 :8:78 :9:880 :4:7 :6:1 :3:3 +T:7:5 :5:6 :2:8 :8:52 :9:928 :6:1 +T:7:5 :5:10 :2:10 :8:65 :9:909 :6:1 +T:7:5 :5:14 :2:18 :8:117 :9:845 :6:1 +T:5:8 :2:8 :8:52 :9:932 +T:7:5 :5:8 :2:10 :8:65 :9:911 :6:1 +T:5:8 :2:8 :8:52 :9:932 +T:7:5 :5:6 :2:8 :8:52 :9:928 :6:1 +T:5:6 :2:6 :8:39 :9:949 +T:7:5 :5:6 :2:8 :8:52 :9:928 :6:1 +T:5:4 :2:4 :8:26 :9:966 +T:7:5 :5:12 :2:14 :8:78 :9:880 :4:7 :6:1 :3:3 +T:5:6 :2:6 :8:39 :9:949 +T:7:5 :5:8 :2:10 :8:65 :9:911 :6:1 +T:7:5 :5:14 :2:16 :8:91 :9:863 :4:7 :6:1 :3:3 +T:5:8 :2:8 :8:52 :9:932 +T:7:5 :5:10 :2:12 :8:78 :9:894 :6:1 +T:7:5 :5:10 :2:12 :8:74 :9:898 :6:1 +T:5:12 :2:12 :8:82 :9:894 +T:7:5 :5:8 :2:8 :8:39 :9:390 :4:7 :6:1 :3:3 :10:3 :11:9 :12:527 +T:12:1000 +T:12:1000 +T:12:1000 +T:12:1000 +T:12:1000 +T:12:1000 +T:12:1000 +T:15:5 :18:2 :19:3 :20:2 :21:3 :22:4 :16:281 :17:10 :12:687 :13:1 :14:2 +T:23:1 :32:7 :34:351 :33:176 :16:3 :17:2 :24:10 :25:195 :26:4 :27:3 :30:4 :31:11 :11:9 :12:204 :13:2 :14:4 :28:9 :29:5 +T:34:666 :33:334 +T:34:667 :33:333 +T:34:665 :33:333 :35:2 +T:34:667 :33:333 +T:34:667 :33:333 +T:34:666 :33:334 +T:34:666 :33:332 :35:2 +T:34:357 :33:178 :36:4 :37:8 :38:4 :40:258 :39:173 :16:16 :17:2 +T:49:6 :50:2 :51:4 :52:2 :53:1 :54:6 :56:3 :38:4 :40:333 :39:225 :41:39 :42:26 :43:15 :44:46 :45:46 :46:40 :47:60 :48:6 :16:88 :17:4 :28:9 :55:18 :29:17 +T:57:4 :38:4 :40:591 :39:395 :16:4 :17:2 +T:40:600 :39:400 +T:58:2 :59:4 :40:453 :39:303 :41:18 :42:12 :43:6 :44:16 :45:16 :46:14 :47:21 :48:2 :16:68 :17:2 :24:10 :25:53 + + +# Total intervals: 45 (Interval Size 1000) +# Total instructions: 45639 +# Total reps: 0 +# Unique reps: 0 +# Total fldcw instructions: 0 + Index: exp-bbv/tests/amd64-linux/million.vgtest =================================================================== --- exp-bbv/tests/amd64-linux/million.vgtest (revision 0) +++ exp-bbv/tests/amd64-linux/million.vgtest (revision 0) @@ -0,0 +1,5 @@ +prog: million +vgopts: --interval-size=100000 --vex-guest-chase-thresh=0 --bb-out-file=million.out.bb +post: ../compare_bbv.sh million +cleanup: rm million.out.bb + Index: exp-bbv/tests/amd64-linux/million.S =================================================================== --- exp-bbv/tests/amd64-linux/million.S (revision 0) +++ exp-bbv/tests/amd64-linux/million.S (revision 0) @@ -0,0 +1,22 @@ + + # count for 1 million instructions + # total is 2 + 1 + 499997*2 + 3 + + .globl _start +_start: + xor %rcx,%rcx # not needed, pads total to 1M + xor %rax,%rax # not needed, pads total to 1M + + mov $499997,%rcx # load counter +test_loop: + dec %rcx # repeat count times + jnz test_loop + + #================================ + # Exit + #================================ +exit: + xor %rdi,%rdi # we return 0 + mov $60,%rax # put exit syscall number (60) in rax + syscall + Index: exp-bbv/tests/amd64-linux/million.stderr.exp =================================================================== --- exp-bbv/tests/amd64-linux/million.stderr.exp (revision 0) +++ exp-bbv/tests/amd64-linux/million.stderr.exp (revision 0) @@ -0,0 +1,5 @@ +# Total intervals: 10 (Interval Size 100000) +# Total instructions: 1000000 +# Total reps: 0 +# Unique reps: 0 +# Total fldcw instructions: 0 Index: exp-bbv/tests/amd64-linux/million.out.exp =================================================================== --- exp-bbv/tests/amd64-linux/million.out.exp (revision 0) +++ exp-bbv/tests/amd64-linux/million.out.exp (revision 0) @@ -0,0 +1,17 @@ +T:1:5 :2:99996 +T:2:100000 +T:2:100000 +T:2:100000 +T:2:100000 +T:2:100000 +T:2:100000 +T:2:100000 +T:2:100000 + + +# Total intervals: 10 (Interval Size 100000) +# Total instructions: 1000000 +# Total reps: 0 +# Unique reps: 0 +# Total fldcw instructions: 0 + Index: exp-bbv/tests/amd64-linux/rep_prefix.vgtest =================================================================== --- exp-bbv/tests/amd64-linux/rep_prefix.vgtest (revision 0) +++ exp-bbv/tests/amd64-linux/rep_prefix.vgtest (revision 0) @@ -0,0 +1,4 @@ +prog: rep_prefix +vgopts: --interval-size=100000 --vex-guest-chase-thresh=0 --bb-out-file=rep_prefix.out.bb +cleanup: rm rep_prefix.out.bb + Index: exp-bbv/tests/amd64-linux/rep_prefix.S =================================================================== --- exp-bbv/tests/amd64-linux/rep_prefix.S (revision 0) +++ exp-bbv/tests/amd64-linux/rep_prefix.S (revision 0) @@ -0,0 +1,346 @@ +# +# rep, repe (repz) and repne (repnz) prefixed string instructions +# only count as one instruction, even though they repeat many times +# This test makes sure the bbv plugin counts these instructions properly +# The answer is validated to hw perf counters. +# + + .globl _start +_start: + cld # we want these to happen forward + + + #=============================================== + # Some SSE2 instructions start with 0xf2 or 0xf3 + # Check for them, to make sure our rep detection + # handles things properly. + # We should check this on x86 too, but then we'd + # have to check for SSE2 capability somehow? + #=================================== +false_positives: + + movdqu %xmm1,%xmm2 + movdqu %xmm2,%xmm1 + addsd %xmm1,%xmm2 + + #=================================== + # Check varied order of the size prefix + # with the rep prefix. Older binutils + # did this one way, newer binutils the other + #=================================== + +size_prefix: + # test 16-bit load + + mov $8192, %rcx + mov $buffer1, %rsi # set source + .byte 0x66, 0xf3, 0xad # lodsw + + mov $8192, %rcx + mov $buffer1, %rsi # set source + .byte 0xf3, 0x66, 0xad # lodsw + + + + + #=================================== + # Load and Store Instructions + #=================================== +loadstore: + xor %rax, %rax + mov $0xd, %al # set eax to d + + # test 8-bit store + + mov $16384, %rcx + mov $buffer1, %rdi # set destination + rep stosb # store d 16384 times, auto-increment + + # test 8-bit load + + mov $16384, %rcx + mov $buffer1, %rsi # set source + rep lodsb # load byte 16384 times, auto-increment + + cmp $0xd,%al # if we loaded wrong value + jne print_error # print an error + + # test 16-bit store + + mov $0x020d,%ax # store 0x020d + + mov $8192, %rcx + mov $buffer1, %rdi # set destination + rep stosw # store 8192 times, auto-increment + + # test 16-bit load + + mov $8192, %rcx + mov $buffer1, %rsi # set source + rep lodsw # load 8192 times, auto-increment + + cmp $0x020d,%ax # if we loaded wrong value + jne print_error # print an error + + # test 32-bit store + + mov $0x0feb1378,%eax # store 0x0feb1378 + + mov $4096, %rcx + mov $buffer1, %rdi # set destination + rep stosl # store 4096 times, auto-increment + + # test 32-bit load + + mov $4096, %rcx + mov $buffer1, %rsi # set source + rep lodsl # load 4096 times, auto-increment + + cmp $0x0feb1378,%eax # if we loaded wrong value + jne print_error # print an error + + # test 64-bit store + + mov $0xfeb131978a5a5a5a,%rax + + mov $2048, %rcx + mov $buffer1, %rdi # set destination + rep stosq # store 2048 times, auto-increment + + # test 64-bit load + + mov $2048, %rcx + mov $buffer1, %rsi # set source + rep lodsq # load 2048 times, auto-increment + + cmp $0x8a5a5a5a,%eax + # !if we loaded wrong value + jne print_error # print an error + + + #============================= + # Move instructions + #============================= +moves: + # test 8-bit move + + mov $16384, %rcx + mov $buffer1, %rsi + mov $buffer2, %rdi + rep movsb + + # test 16-bit move + + mov $8192, %rcx + mov $buffer2, %rsi + mov $buffer1, %rdi + rep movsw + + # test 32-bit move + + mov $4096, %rcx + mov $buffer1, %rsi + mov $buffer2, %rdi + rep movsl + + # test 64-bit move + + mov $2048, %rcx + mov $buffer1, %rsi + mov $buffer2, %rdi + rep movsq + + + #================================== + # Compare equal instructions + #================================== +compare_equal: + # first set up the areas to compare + + mov $0xa5a5a5a5,%eax + mov $buffer1, %rdi + mov $4096, %rcx + rep stosl + + mov $0xa5a5a5a5,%eax + mov $buffer2, %rdi + mov $4096, %rcx + rep stosl + + + # test 8-bit + + mov $buffer1,%rsi + mov $buffer2,%rdi + mov $16384, %rcx + repe cmpsb + jnz print_error + + # test 16-bit + + mov $buffer1,%rsi + mov $buffer2,%rdi + mov $8192, %rcx + repe cmpsw + jnz print_error + + # test 32-bit + + mov $buffer1,%rsi + mov $buffer2,%rdi + mov $4096, %rcx + repe cmpsl + jnz print_error + + # test 64-bit + + mov $buffer1,%rsi + mov $buffer2,%rdi + mov $2048, %rcx + repe cmpsq + jnz print_error + + + + #================================== + # Compare not equal instructions + #================================== +compare_noteq: + # change second buffer + + mov $0x5a5a5a5a,%eax + mov $buffer2, %rdi + mov $4096, %rcx + rep stosl + + # test 8-bit + + mov $buffer1,%rsi + mov $buffer2,%rdi + mov $16384, %rcx +# repne cmpsb FIXME! Not implemented valgrind +# je print_error + + # test 16-bit + + mov $buffer1,%rsi + mov $buffer2,%rdi + mov $8192, %rcx +# repne cmpsw FIXME! Not implemented valgrind +# je print_error + + # test 32-bit + + mov $buffer1,%rsi + mov $buffer2,%rdi + mov $4096, %rcx +# repne cmpsl FIXME! Not implemented valgrind +# je print_error + + # test 64-bit + + mov $buffer1,%rsi + mov $buffer2,%rdi + mov $2048, %rcx +# repne cmpsq FIXME! Not implemented valgrind +# je print_error + + #==================================== + # Check scan equal instruction + #==================================== +scan_eq: + # test 8-bit + + mov $0xa5,%al + mov $buffer1,%rdi + mov $16384, %rcx + repe scasb + jnz print_error + + # test 16-bit + + mov $0xa5a5,%ax + mov $buffer1,%rdi + mov $8192, %rcx + repe scasw + jnz print_error + + # test 32-bit + + mov $0xa5a5a5a5,%eax + mov $buffer1,%rdi + mov $4096, %rcx + repe scasl + jnz print_error + + # test 64-bit + + mov $0xa5a5a5a5a5a5a5a5,%rax + mov $buffer1,%rdi + mov $2048, %rcx + repe scasq + jnz print_error + + + #==================================== + # Check scan not-equal instruction + #==================================== + + # test 8-bit +scan_ne: + mov $0xa5,%al + mov $buffer2,%rdi + mov $16384, %rcx + repne scasb + jz print_error + + # test 16-bit + + mov $0xa5a5,%ax + mov $buffer2,%rdi + mov $8192, %rcx + repne scasw + jz print_error + + # test 32-bit + + mov $0xa5a5a5a5,%eax + mov $buffer2,%rdi + mov $4096, %rcx + repne scasl + jz print_error + + # test 64-bit + + mov $0xa5a5a5a5a5a5a5a5,%rax + mov $buffer2,%rdi + mov $2048, %rcx + repne scasq + jz print_error + + jmp exit # no error, skip to exit + +print_error: + + mov $1, %rax # Write syscall + mov $1, %rdi # print to stdout + mov $error_string, %rsi # string to print + mov $16, %edx # strlen + syscall # call syscall + + #================================ + # Exit + #================================ +exit: + mov $60,%rax + xor %rdi,%rdi # we return 0 + syscall # and exit + + +.data +error_string: .asciz "Error detected!\n" + +.bss + +.lcomm buffer1, 16384 +.lcomm buffer2, 16384 Index: exp-bbv/tests/amd64-linux/rep_prefix.stderr.exp =================================================================== --- exp-bbv/tests/amd64-linux/rep_prefix.stderr.exp (revision 0) +++ exp-bbv/tests/amd64-linux/rep_prefix.stderr.exp (revision 0) @@ -0,0 +1,5 @@ +# Total intervals: 0 (Interval Size 100000) +# Total instructions: 151 +# Total reps: 165917 +# Unique reps: 29 +# Total fldcw instructions: 0 Index: exp-bbv/tests/ppc32-linux/Makefile.am =================================================================== --- exp-bbv/tests/ppc32-linux/Makefile.am (revision 0) +++ exp-bbv/tests/ppc32-linux/Makefile.am (revision 0) @@ -0,0 +1,23 @@ +include $(top_srcdir)/Makefile.tool-tests.am + +noinst_SCRIPTS = filter_stderr + +check_PROGRAMS = \ + million ll + +EXTRA_DIST = $(noinst_SCRIPTS) \ + ll.stderr.exp \ + ll.stdout.exp \ + ll.out.exp \ + ll.vgtest \ + million.stderr.exp \ + million.out.exp \ + million.vgtest + +AM_CCASFLAGS += -ffreestanding + +LDFLAGS += -nostartfiles -nodefaultlibs + +million_SOURCES = million.S +ll_SOURCES = ll.S + Index: exp-bbv/tests/ppc32-linux/filter_stderr =================================================================== --- exp-bbv/tests/ppc32-linux/filter_stderr (revision 0) +++ exp-bbv/tests/ppc32-linux/filter_stderr (revision 0) @@ -0,0 +1,5 @@ +#! /bin/sh + +../filter_stderr + + Property changes on: exp-bbv/tests/ppc32-linux/filter_stderr ___________________________________________________________________ Added: svn:executable + * Index: exp-bbv/tests/ppc32-linux/million.vgtest =================================================================== --- exp-bbv/tests/ppc32-linux/million.vgtest (revision 0) +++ exp-bbv/tests/ppc32-linux/million.vgtest (revision 0) @@ -0,0 +1,5 @@ +prog: million +vgopts: --interval-size=100000 --vex-guest-chase-thresh=0 --bb-out-file=million.out.bb +post: ../compare_bbv.sh million +cleanup: rm million.out.bb + Index: exp-bbv/tests/ppc32-linux/million.S =================================================================== --- exp-bbv/tests/ppc32-linux/million.S (revision 0) +++ exp-bbv/tests/ppc32-linux/million.S (revision 0) @@ -0,0 +1,23 @@ + + # count for 1 million instructions + # total is 3 + 499997*2 + 3 + + .globl _start +_start: + nop # to give us an even million + lis 15,499997@ha # load high 16-bits of counter + addi 15,15,499997@l # load low 16-bits of counter +test_loop: + addic. 15,15,-1 # decrement counter + bne 0,test_loop # loop until zero + + #================================ + # Exit + #================================ + +exit: + li 3,0 # 0 exit value + li 0,1 # put the exit syscall number (1) in r0 + sc # and exit + + Index: exp-bbv/tests/ppc32-linux/million.stderr.exp =================================================================== --- exp-bbv/tests/ppc32-linux/million.stderr.exp (revision 0) +++ exp-bbv/tests/ppc32-linux/million.stderr.exp (revision 0) @@ -0,0 +1,5 @@ +# Total intervals: 10 (Interval Size 100000) +# Total instructions: 1000000 +# Total reps: 0 +# Unique reps: 0 +# Total fldcw instructions: 0 Index: exp-bbv/tests/ppc32-linux/million.out.exp =================================================================== --- exp-bbv/tests/ppc32-linux/million.out.exp (revision 0) +++ exp-bbv/tests/ppc32-linux/million.out.exp (revision 0) @@ -0,0 +1,17 @@ +T:1:5 :2:99996 +T:2:100000 +T:2:100000 +T:2:100000 +T:2:100000 +T:2:100000 +T:2:100000 +T:2:100000 +T:2:100000 + + +# Total intervals: 10 (Interval Size 100000) +# Total instructions: 1000000 +# Total reps: 0 +# Unique reps: 0 +# Total fldcw instructions: 0 + Index: exp-bbv/tests/ppc32-linux/ll.vgtest =================================================================== --- exp-bbv/tests/ppc32-linux/ll.vgtest (revision 0) +++ exp-bbv/tests/ppc32-linux/ll.vgtest (revision 0) @@ -0,0 +1,5 @@ +prog: ll +vgopts: --interval-size=1000 --vex-guest-chase-thresh=0 --bb-out-file=ll.out.bb +post: ../compare_bbv.sh ll +cleanup: rm ll.out.bb + Index: exp-bbv/tests/ppc32-linux/ll.S =================================================================== --- exp-bbv/tests/ppc32-linux/ll.S (revision 0) +++ exp-bbv/tests/ppc32-linux/ll.S (revision 0) @@ -0,0 +1,579 @@ +# +# linux_logo in ppc assembly language +# based on the code from ll_asm-0.36 +# +# By Vince Weaver +# +# Modified to remove non-deterministic system calls +# And to avoid reading from /proc +# + +# offsets into the results returned by the uname syscall +.equ U_SYSNAME,0 +.equ U_NODENAME,65 +.equ U_RELEASE,65*2 +.equ U_VERSION,(65*3) +.equ U_MACHINE,(65*4) +.equ U_DOMAINNAME,65*5 + +# offset into the SYSCALL_SYSINFO buffer +.equ S_TOTALRAM,16 + +# Sycscalls +.equ SYSCALL_EXIT, 1 +#.equ SYSCALL_READ, 3 +.equ SYSCALL_WRITE, 4 +#.equ SYSCALL_OPEN, 5 +#.equ SYSCALL_CLOSE, 6 +#.equ SYSCALL_SYSINFO,116 +#.equ SYSCALL_UNAME, 122 + +# +.equ STDIN, 0 +.equ STDOUT,1 +.equ STDERR,2 + +.equ BSS_BEGIN,25 +.equ DATA_BEGIN,26 + +.include "../logo.include" + + .globl _start +_start: + + #======================== + # Initialization + #======================== + + +# eieio # coolest opcode of all time ;) + # not needed, but I had to put it here + # the hack loading BSS_BEGIN and DATA_BEGIN + # saves one instruction on any future load from memory + # as we can just do an addi rather than an lis;addi + + lis 25,bss_begin@ha + addi 25,25,bss_begin@l + + lis 26,data_begin@ha + addi 26,26,data_begin@l + + addi 14,BSS_BEGIN,(out_buffer-bss_begin) + # the output buffer + + addi 21,BSS_BEGIN,(text_buf-bss_begin) + + + mr 17,14 # store out-buffer for later + + #========================= + # PRINT LOGO + #========================= + +# LZSS decompression algorithm implementation +# by Stephan Walter 2002, based on LZSS.C by Haruhiko Okumura 1989 +# optimized some more by Vince Weaver + + + li 8,(N-F) # grab "R" + + addi 9,DATA_BEGIN,(logo-data_begin)-1 + # logo_pointer + + addi 12,DATA_BEGIN,(logo_end-data_begin)-1 + # end of the logo + + + mr 16,17 + +decompression_loop: + lbzu 10,1(9) # load in a byte + # auto-update + mr 11,10 # copy to 11 + ori 11,11,0xff00 # re-load top as a hackish + # 8-bit counter + +test_flags: + cmpw 0,12,9 # have we reached the end? + ble done_logo # if so exit + + andi. 13,11,0x1 + srawi 11,11,1 + + bne 0,discrete_char + +offset_length: + lbzu 10,1(9) + lbzu 24,1(9) + slwi 24,24,8 + or 24,24,10 + + mr 10,24 + + srawi 15,10,P_BITS + addi 15,15,THRESHOLD+1 # cl = ax >> (P_BITS)+THRESH+1 + # = match length + +output_loop: + andi. 24,24,(POSITION_MASK<<8+0xff) # mask it + lbzx 10,21,24 + addi 24,24,1 + +store_byte: + stbu 10,1(16) + + stbx 10,21,8 + addi 8,8,1 + andi. 8,8,(N-1) + + addic. 15,15,-1 + bne 0,output_loop + + andi. 13,11,0xff00 + bne test_flags + + b decompression_loop + +discrete_char: + + lbzu 10,1(9) + li 15,1 + + b store_byte + +done_logo: + + addi 4,17,1 # restore (plus one because r17 is decremented) + bl write_stdout # and print the logo + + + #========================== + # First Line + #========================== + + + #========================== + # PRINT VERSION + #========================== + +# li 0,SYSCALL_UNAME # uname syscall +# addi 3,BSS_BEGIN,(uname_info-bss_begin) + # uname struct +# sc # do syscall + + + addi 16,DATA_BEGIN,(uname_info-data_begin)+U_SYSNAME@l-1 + # os-name from uname "Linux" + bl strcat + + addi 16,DATA_BEGIN,(ver_string-data_begin)-1 + # source is " Version " + bl strcat + + addi 16,DATA_BEGIN,(uname_info-data_begin)+U_RELEASE@l-1 + # version from uname "2.4.1" + bl strcat + + addi 16,DATA_BEGIN,(compiled_string-data_begin)-1 + # source is ", Compiled " + bl strcat + + addi 16,DATA_BEGIN,(uname_info-data_begin)+U_VERSION-1 + # compiled date + bl strcat + + bl center_and_print # write it to screen + + + #=============================== + # Middle-Line + #=============================== + + #========= + # Load /proc/cpuinfo into buffer + #========= + +# li 0,SYSCALL_OPEN # open() +# addi 3,DATA_BEGIN,(cpuinfo-data_begin) + # '/proc/cpuinfo' +# li 4,0 # O_RDONLY +# sc # syscall. fd in r0. + # we should check that r0>=0 + +# mr 13,3 # save fd in r13 + +# li 0,SYSCALL_READ # read +# addi 4,BSS_BEGIN,(disk_buffer-bss_begin) +# li 5,4096 # 4096 is maximum size of proc file ;) +# sc + +# mr 3,13 # restore fd +# li 0,6 # close +# sc + + #============= + # Number of CPUs + #============= + + mr 14,17 # point output to out_buf + + # Assume 1 CPU for now + # my iBook's /proc/cpuinfo does not have a "processor" line ??? + + addi 16,DATA_BEGIN,(one-data_begin)-1 + bl strcat + + #========= + # MHz + #========= + + lis 20,('l'<<8)+'o' # find 'lock ' and grab up to M + addi 20,20,('c'<<8)+'k' + li 23,'M' + bl find_string + + addi 16,DATA_BEGIN,(megahertz-data_begin)-1 + # print 'MHz ' + bl strcat + + + #========= + # Chip Name + #========= + + lis 20,('c'<<8)+'p' # find 'cpu\t: ' and grab up to \n + addi 20,20,('u'<<8)+'\t' + li 23,'\n' + bl find_string + + addi 16,DATA_BEGIN,(comma-data_begin)-1 + # print ', ' + bl strcat + + #======== + # RAM + #======== + +# li 0,SYSCALL_SYSINFO # sysinfo() syscall +# addi 3,BSS_BEGIN,(sysinfo_buff-bss_begin) + # sysinfo_buffer + +# sc + + lwz 4,(sysinfo_buff+S_TOTALRAM-data_begin)(DATA_BEGIN) + # load bytes of RAM into r4 + + srawi 4,4,20 # divide by 2^20 to get MB + li 5,0 + + bl num_to_ascii + + addi 16,DATA_BEGIN,(ram_comma-data_begin)-1 + # print 'M RAM, ' + + bl strcat + + #======== + # Bogomips + #======== + + lis 20,('m'<<8)+'i' # find 'mips' and grab up to \n + addi 20,20,('p'<<8)+'s' + li 23,'\n' + bl find_string + + addi 16,DATA_BEGIN,(bogo_total-data_begin)-1 + # print "Bogomips Total" + bl strcat + + bl center_and_print # center it + + + #================================= + # Print Host Name + #================================= + + mr 14,17 # restore out buffer + + addi 16,DATA_BEGIN,((uname_info-data_begin)+U_NODENAME)-1 + # hostname + + bl strcat + + bl center_and_print + + #================================ + # Exit + #================================ +exit: + li 3,0 # 0 exit value + li 0,SYSCALL_EXIT # put the exit syscall number in eax + sc # and exit + + + + + #================================= + # FIND_STRING + #================================= + # r23 is char to end at + # r20 is the 4-char ascii string to look for + # r14 points at output buffer + # r16,r21 + +find_string: + + addi 16,DATA_BEGIN,(disk_buffer-data_begin)-1 + # look in cpuinfo buffer + # -1 so we can use lbzu + +find_loop: + lwzu 13,1(16) # load in 32 bits, incrementing 8bits + cmpwi 13,0 # if null, we are done + beq done + cmpw 13,20 # compare with out 4 char string + bne find_loop # if no match, keep looping + + + # if we get this far, we matched + + li 21,':' +find_colon: + lbzu 13,1(16) # repeat till we find colon + cmpwi 13,0 + beq done + cmpw 13,21 + bne find_colon + + addi 16,16,1 # skip a char [should be space] + +store_loop: + lbzu 13,1(16) + cmpwi 13,0 + beq done + cmpw 13,23 # is it end string? + beq almost_done # if so, finish + stbu 13,1(14) # if not store and continue + b store_loop + +almost_done: + li 13,0 # replace last value with null + stb 13,1(14) + +done: + blr + + #================================ + # strcat + #================================ + # r13 = "temp" + # r16 = "source" + # r14 = "destination" +strcat: + lbzu 13,1(16) # load a byte from [r16] + stbu 13,1(14) # store a byte to [r14] + cmpwi 13,0 # is it zero? + bne strcat # if not loop + subi 14,14,1 # point to one less than null + blr # return + + #============================== + # center_and_print + #============================== + # r14 is end of buffer + # r17 is start of buffer + # r29 = saved link register + # r4-r10, r19-r22, r30 trashed + +center_and_print: + + mflr 29 # back up return address + + subf 5,17,14 # see how long the output + # buffer is + + cmpwi 5,80 # see if we are >80 + bgt done_center # if so, bail + + li 4,80 # 80 column screen + subf 4,5,4 # subtract strlen + srawi 23,4,1 # divide by two + + lis 4,escape@ha + addi 4,4,escape@l + bl write_stdout + + mr 4,23 + li 5,1 # print to stdout + bl num_to_ascii # print number + + lis 4,c@ha + addi 4,4,c@l + bl write_stdout + + +done_center: + + addi 4,17,1 # move string to output+1 + bl write_stdout # call write stdout + + lis 4,linefeed@ha + addi 4,4,linefeed@l + + mtlr 29 # restore link register + # and let write_stdout + # return for us + + + + #================================ + # WRITE_STDOUT + #================================ + # r4 has string + # r0,r3,r4,r5,r6 trashed + +write_stdout: + li 0,SYSCALL_WRITE # write syscall + li 3,STDOUT # stdout + + li 5,0 # string length counter +strlen_loop: + lbzx 6,4,5 # get byte from (r4+r5) + addi 5,5,1 # increment counter + cmpi 0,6,0 # is it zero? + bne strlen_loop # if not keep counting + addi 5,5,-1 + sc # syscall + + blr # return + + + ############################## + # Num to Ascii + ############################## + # num is in r4 + # r5 =0 then strcat, otherwise stdout + # r5-r10,r19,r20,r21,r22,r30 trashed + +num_to_ascii: + + mflr 30 # save the link register + + addi 16,BSS_BEGIN,(num_to_ascii_end-bss_begin) + # the end of a backwards growing + # 10 byte long buffer. + + li 20,10 # we will divide by 10 + mr 19,4 # load in the value passed + +div_by_10: + divw 21,19,20 # divide r19 by r20 put into r21 + + mullw 22,21,20 # find remainder. 1st q*dividend + subf 22,22,19 # then subtract from original = R + addi 22,22,0x30 # convert remainder to ascii + + stbu 22,-1(16) # Store to backwards buffer + + mr 19,21 # move Quotient as new dividend + cmpwi 19,0 # was quotient zero? + bne div_by_10 # if not keep dividing + +write_out: + cmpwi 5,0 # if r5 is 0 then skip ahead + bne stdout_num + + addi 16,16,-1 # point to the beginning + bl strcat # and strcat it + + mtlr 30 # restore link register + + blr # return + +stdout_num: + mr 4,16 # point to our buffer + mtlr 30 # restore link register + b write_stdout # stdout will return for us + + +#=========================================================================== +.data +#=========================================================================== + + +data_begin: + +.include "../logo.lzss_new" + +ver_string: .ascii " Version \0" +compiled_string: .ascii ", Compiled \0" +megahertz: .ascii "MHz PPC \0" +.equ space, ram_comma+6 +.equ comma, ram_comma+5 +linefeed: .ascii "\n\0" +escape: .ascii "\033[\0" +c: .ascii "C\0" +ram_comma: .ascii "M RAM, \0" + +bogo_total: .ascii " Bogomips Total\0" + +default_colors: .ascii "\033[0m\n\n\0" + +cpuinfo: .ascii "/proc/cpuinfo\0" + +one: .ascii "One \0" + +disk_buffer: +.ascii "processor : 0\n" +.ascii "cpu : 745/755\n" +.ascii "temperature : 22-24 C (uncalibrated)\n" +.ascii "clock : 600.000000MHz\n" +.ascii "revision : 51.17 (pvr 0008 3311)\n" +.ascii "bogomips : 49.79\n" +.ascii "timebase : 24960000\n" +.ascii "platform : PowerMac\n" +.ascii "model : PowerBook4,1\n" +.ascii "machine : PowerBook4,1\n" +.ascii "motherboard : PowerBook4,1 MacRISC2 MacRISC Power Macintosh\n" +.ascii "detected as : 257 (iBook 2)\n" +.ascii "pmac flags : 0000001b\n" +.ascii "L2 cache : 256K unified\n" +.ascii "pmac-generation : NewWorld\n\0" + +uname_info: +.ascii "Linux\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0" +.ascii "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0" +.ascii "henparma\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0" +.ascii "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0" +.ascii "2.6.29\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0" +.ascii "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0" +.ascii "#1 Wed May 13 15:51:54 UTC 2009\0" +.ascii "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0" +.ascii "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0" +.ascii "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0" +.ascii "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0" +.ascii "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0" + + +sysinfo_buff: +.long 0,0,0,0,512*1024*1024,0,0,0 + +#============================================================================ +#.bss +#============================================================================ + +.lcomm bss_begin,0 +.lcomm num_to_ascii_buff,10 +.lcomm num_to_ascii_end,1 +.lcomm text_buf, (N+F-1) # These buffers must follow each other +.lcomm out_buffer,16384 + + + + + + + + + + Index: exp-bbv/tests/ppc32-linux/ll.stdout.exp =================================================================== --- exp-bbv/tests/ppc32-linux/ll.stdout.exp (revision 0) +++ exp-bbv/tests/ppc32-linux/ll.stdout.exp (revision 0) @@ -0,0 +1,16 @@ +############################################################################### +############################################################################### +##################################################################O#O########## +############################################################################### +############################################################################### +############################################################################### +############################################################################### +############################################################################### +############################################################################### +############################################################################### +############################################################################### +############################################################################### + +Linux Version 2.6.29, Compiled #1 Wed May 13 15:51:54 UTC 2009 +One 600.000000MHz PPC 745/755, 512M RAM, 49.79 Bogomips Total +henparma Index: exp-bbv/tests/ppc32-linux/ll.stderr.exp =================================================================== --- exp-bbv/tests/ppc32-linux/ll.stderr.exp (revision 0) +++ exp-bbv/tests/ppc32-linux/ll.stderr.exp (revision 0) @@ -0,0 +1,5 @@ +# Total intervals: 40 (Interval Size 1000) +# Total instructions: 40330 +# Total reps: 0 +# Unique reps: 0 +# Total fldcw instructions: 0 Index: exp-bbv/tests/ppc32-linux/ll.out.exp =================================================================== --- exp-bbv/tests/ppc32-linux/ll.out.exp (revision 0) +++ exp-bbv/tests/ppc32-linux/ll.out.exp (revision 0) @@ -0,0 +1,48 @@ +T:1:16 :8:10 :6:32 :2:56 :9:48 :10:666 :4:90 :5:36 :7:2 :3:45 +T:8:5 :6:20 :2:34 :9:80 :10:775 :4:42 :5:22 :7:1 :3:21 +T:8:5 :6:16 :2:27 :9:64 :10:824 :4:30 :5:18 :7:1 :3:15 +T:8:5 :6:10 :2:18 :9:80 :10:865 :4:6 :5:12 :7:1 :3:3 +T:8:5 :6:10 :2:18 :9:96 :10:858 :5:12 :7:1 +T:8:5 :6:10 :2:18 :9:80 :10:865 :4:6 :5:12 :7:1 :3:3 +T:6:6 :2:9 :9:36 :10:943 :5:6 +T:8:5 :6:8 :2:15 :9:92 :10:869 :5:10 :7:1 +T:6:14 :2:21 :9:112 :10:839 :5:14 +T:8:5 :6:6 :2:12 :9:64 :10:902 :5:10 :7:1 +T:8:5 :6:8 :2:15 :9:80 :10:883 :5:8 :7:1 +T:6:8 :2:12 :9:64 :10:908 :5:8 +T:6:6 :2:9 :9:48 :10:931 :5:6 +T:8:5 :6:4 :2:9 :9:48 :10:927 :5:6 :7:1 +T:6:6 :2:9 :9:48 :10:931 :5:6 +T:8:5 :6:6 :2:12 :9:64 :10:904 :5:8 :7:1 +T:6:2 :2:3 :9:16 :10:977 :5:2 +T:8:5 :6:12 :2:21 :9:96 :10:842 :4:6 :5:14 :7:1 :3:3 +T:6:6 :2:9 :9:48 :10:931 :5:6 +T:6:6 :2:9 :9:48 :10:931 :5:6 +T:8:5 :6:14 :2:24 :9:112 :10:819 :4:6 :5:16 :7:1 :3:3 +T:8:5 :6:6 :2:12 :9:64 :10:904 :5:8 :7:1 +T:6:6 :2:9 :9:48 :10:931 :5:6 +T:8:5 :6:8 :2:15 :9:80 :10:881 :5:10 :7:1 +T:8:5 :6:10 :2:18 :9:96 :10:858 :5:12 :7:1 +T:6:10 :2:15 :9:80 :10:885 :5:10 +T:8:5 :6:10 :2:15 :9:64 :10:470 :4:6 :5:12 :7:1 :3:3 :11:2 :12:7 :13:405 +T:13:1000 +T:13:1000 +T:13:1000 +T:13:1000 +T:13:1000 +T:13:1000 +T:13:1000 +T:13:1000 +T:13:1000 +T:13:1000 +T:16:2 :19:2 :20:2 :21:2 :22:2 :23:1 :17:268 :18:10 :24:4 :25:6 :26:3 :30:3 :31:2 :12:28 :13:636 :14:8 :15:4 :27:12 :28:2 :29:3 +T:33:3 :34:4 :46:2 :47:4 :48:2 :49:4 :53:2 :54:4 :35:8 :37:246 :36:168 :38:8 :40:27 :39:22 :41:8 :44:51 :42:38 :43:34 :45:6 :17:116 :18:10 :32:10 :13:183 :14:4 :15:2 :27:12 :50:16 :28:2 :51:2 :52:2 +T:55:2 :56:1 :35:4 :37:381 :36:256 :38:4 :40:12 :39:10 :41:4 :44:15 :42:12 :43:10 :45:3 :17:64 :18:2 :24:4 :25:6 :26:3 :30:3 :31:2 :12:28 :13:148 :14:6 :15:3 :27:12 :28:2 :29:3 + + +# Total intervals: 40 (Interval Size 1000) +# Total instructions: 40330 +# Total reps: 0 +# Unique reps: 0 +# Total fldcw instructions: 0 + Index: Makefile.am =================================================================== --- Makefile.am (revision 10339) +++ Makefile.am (working copy) @@ -12,7 +12,8 @@ helgrind \ drd -EXP_TOOLS = exp-ptrcheck +EXP_TOOLS = exp-ptrcheck \ + exp-bbv # DDD: once all tools work on Darwin, TEST_TOOLS and TEST_EXP_TOOLS can be # replaced with TOOLS and EXP_TOOLS. Index: configure.in =================================================================== --- configure.in (revision 10339) +++ configure.in (working copy) @@ -1900,6 +1900,12 @@ drd/docs/Makefile drd/scripts/download-and-build-splash2 drd/tests/Makefile + exp-bbv/Makefile + exp-bbv/docs/Makefile + exp-bbv/tests/Makefile + exp-bbv/tests/x86-linux/Makefile + exp-bbv/tests/amd64-linux/Makefile + exp-bbv/tests/ppc32-linux/Makefile ) cat<