[Bro] File carving

Antonio Nappa jeppojeps at gmail.com
Thu Sep 5 08:38:26 PDT 2013


Hello,
I'm quite new to bro, so please be kind :)

I'm using this script:


#######################################!



module HTTP;

export {
  # NOTICE Type
  redef enum Notice::Type += {
    Exe_File_Capture,
  };


  # File Magic Bytes to look for
  const file_magic_bytes = /^\x50\x4B\x03\x04\x14\x00\x08\x00\x08\x00/
&redef;

  # MIME types to look for
  const extract_mime_types = /application\/java-archive/ &redef;
  redef HTTP::extract_mime_types += /application\/x-java-archive/;
  # redef HTTP::extract_mime_types += /application\/x-msdownload/;
  # redef HTTP::extract_mime_types += /application\/exe/;
  # redef HTTP::extract_mime_types += /application\/x-exe/;
  # redef HTTP::extract_mime_types += /application\/dos-exe/;
  # redef HTTP::extract_mime_types += /application\/x-winexe/;
  # redef HTTP::extract_mime_types += /application\/msdos-windows/;
  # redef HTTP::extract_mime_types += /application\/x-msdos-program/;

  # File extensions to look for
  const extract_file_extensions = /\.[jJ][aA][rR]$/ &redef;
  redef HTTP::extract_file_extensions += /\.[sS][cC][rR]$/;

  # Size constraints of file to extract (TODO)
  const minimum_size = 10240 &redef; # 10K
  const maximum_size = 8388608 &redef; # 8MB
  const ufile = open("malware_urls.txt");
  # URL patterns to whitelist
  const whitelist_url_patterns = /^http:\/\/[^\/]*\.windowsupdate\.com\//
&redef;
  redef HTTP::whitelist_url_patterns +=
/^http:\/\/[^\/]*\.microsoft\.com\//;
  redef HTTP::whitelist_url_patterns += /^http:\/\/[^\/]*\.google\.com\//;
  const hd: table[count] of mime_header_rec ={};
  const chk = F;

  #type mime_header_list: table[count] of mime_header_rec;
  # Information to track throughout session
  redef record Info += {
    extraction_prefix: string &optional;
    extraction_file:   file &log &optional;
    extract_file:      bool &default=F;
    extracted_size:    count &default=0;
  };
}

event http_all_headers (c: connection, is_orig: bool, hlist:
mime_header_list) &priority=-1
{
hd = hlist;
}
event http_entity_data(c: connection, is_orig: bool, length: count, data:
string) &priority=-5
{
  # Ignore client communication
  if ( is_orig )
    return;

  # If in first chunk of data
  if ( c$http$first_chunk )
  {
    # Get the URL for whitelisting and extension matching
    local url = build_url_http(c$http);

    # Check for file magic byte matches
    if ( HTTP::file_magic_bytes in data )
    {
      c$http$extraction_prefix = "magic-match";
      c$http$extract_file = T;

    }
    #Check for MIME type matches
    else if ( HTTP::extract_mime_types in c$http$mime_type )
    {
      c$http$extraction_prefix = "mime-match";
      c$http$extract_file = T;

    }
    # Check for file extension matches
    else if ( HTTP::extract_file_extensions in url ) {
      c$http$extraction_prefix = "extension-match";
      c$http$extract_file = T;

    }
    # Content Disposition HTTP Header String - TODO?

    # If a magic byte, MIME, or Ext match...
    if ( c$http$extract_file )
    {
      # Check against whitelist
      if ( HTTP::whitelist_url_patterns in url )
      {
        c$http$extract_file = F;
      }
      else
      {
        # Open file to capture data

        local suffix = fmt("%s_%d.jar", is_orig ? "orig" : "resp",
c$http_state$current_response);
        local fname =
generate_extraction_filename(c$http$extraction_prefix, c, suffix);
        c$http$extraction_file = open(fname);
        enable_raw_output(c$http$extraction_file);
        c$http$extracted_size = 0;
        local output=fmt("%s,%s", c$id$resp_h,c$http$uri);
    #local reshdr= fmt("%s %s %s", c$http$status_code, c$http$status_msg,
c$http$info_msg);

        print ufile,"########################";
        print ufile, output;
        print ufile,c$http$status_code,c$http$status_msg;
        for (j in hd)
    {

    print ufile, hd[j];
    }


        print ufile,"########################";
        local message = fmt("Storing %s to %s", url,
c$http$extraction_file);
        local method  = "UNKNOWN";
        if ( c$http?$method )
          method = c$http$method;

        NOTICE([$note=Exe_File_Capture,
                $msg=message,
                $conn=c,
                $method=method,
                $URL=url]);
      }
    }
  } # End first chunk if

  # Do the capture when a capture file is open
  if ( c$http?$extraction_file )
  {

    print c$http$extraction_file, data;
    c$http$extracted_size += length;
  }

} # End HTTP entity data


event http_end_entity(c: connection, is_orig: bool)
{


if ( c$http?$extraction_file )
  {

    close(c$http$extraction_file);


  }
}

##############################################################################

to carve jar files from pcaps, the problem is that if I manually carve a
file from wireshark the size and the hash of this file are different from
the ones that I get if I use bro. What is funny is that in the http.log I
get the right md5 of the jar file, but then if I use the md5sum utility on
the file extracted with bro they don't match, and actually the
content-lengh does not match with size of the extracted file. I've already
checked if there are multiple jar archives in the pcap that I'm using as
example but there's only one. So I wondering what can be wrong.

Thank you for you help.

Cheers
Antonio
-------------- next part --------------
An HTML attachment was scrubbed...
URL: http://mailman.ICSI.Berkeley.EDU/pipermail/bro/attachments/20130905/aa2fc093/attachment.html 


More information about the Bro mailing list