[Bro] Split path into directory and filename

Peter Erickson redlamb19 at gmail.com
Mon Aug 15 08:07:01 PDT 2011


** Seth Hall <seth at icir.org> [2011-08-15 09:20:59 -0400] **
> On Aug 13, 2011, at 9:45 PM, Peter Erickson wrote:
> 
> > The reason I ask is I'm looking to modify the http/file-extract.bro
> > script so that the http responses are saved into a directory structure
> > based on the src and dst ip addresses (e.g. http-items/src_ip/dst_ip).
> 
> Ah, that's interesting.  We need to rework the way that works to put
> more control of the file naming in users hands, it's a definite
> shortcoming in the current iteration.  I'll refactor it a little bit
> soon so that you can accomplish what you want without having to
> rewrite bits of functionality. :)

No need to spend your time doing it. I got it working over the weekend.
I updated the generate_extraction_filename to include a directory path
as the first argument... and then left everything else the same. After
obtaining the filename to use, I call the mkdirs command to create the
directory structure. I also updated the file-extract.bro script to
extract the client request payload as well. I'll try to attach my
updated scripts to this email, but it they are stripped let me know and
I'll send them to you directly.

One thing I did notice over the weekend was a potential problem in
file-extract (I'm using current as opposed to 1.5) with respect to http
POST requests. The file-extract script watches for first_chunk = T
before it starts capturing data, however with POST requests the
first_chunk is set, and subsequently set to F, within the client
request. Once the response gets processed, the first_chunk is F and the
payload is never saved (Hopefully that makes sense).  I fixed this by
creating the following event which resets the first_chunk and mime_type
in preparation for the response. The -15 priority will make sure that it
executes AFTER logging the message to the logfile.

event http_message_done(c: connection, is_orig: bool,
        stat: http_message_stat) &priority=-15 {
            c$http$first_chunk = T;
            delete c$http$mime_type;
}

> Since I see that using the code from the repository, I'd be happy to
> find how your experience with it has been if you are interested in
> sharing.

I'm not sure why I started playing with the current version in the repo
as opposed to 1.5, but I like it. The way the scripts are loaded and the
directory structure makes much more sense to me as opposed to having
them all in one directory. I also like the addition of the __load__.bro
scripts. As seen above in my fix for the http POST problems, the new
overloaded delete operator was a nice addition which made solving the
problem almost trivial. So far, I haven't seen any problems with the
current, but I have been running it one pcap files as opposed to live
traffic.

-- 
Peter Erickson
redlamb19 _at_ gmail _dot_ com
-------------- next part --------------

function generate_extraction_filename2(dir: string, prefix: string, c: connection, suffix: string): string
	{
	local conn_info = fmt("%s:%d-%s:%d", 
	                      c$id$orig_h, c$id$orig_p, c$id$resp_h, c$id$resp_p);
	
	if ( prefix != "" )
		conn_info = fmt("%s_%s", prefix, conn_info);
	if ( suffix != "" )
		conn_info = fmt("%s_%s", conn_info, suffix);

	if ( dir != "" )
		conn_info = fmt("%s/%s/%s/%s", dir, c$id$orig_h, c$id$resp_h, conn_info);

	return conn_info;
	}
	
-------------- next part --------------

function mkdirs(dir: string): bool {
	local path_split = split1(dir, /\/[^\/]*$/);
	local parent = path_split[1];

	if ( parent == "" || length(path_split) == 1 )
		return mkdir(dir);
	else {
		if ( ! mkdirs(parent) )
			return F;
		return mkdir(dir);
	}

	return T;
}

function path_dirname(path: string): string {
	#return path_split(path)[1]
	return split1(path, /\/[^\/]*$/)[1];
}

function path_filename(path: string): string {
	#return path_split(path)[2]
	local cpath = split(path, /\//);
	return cpath[length(cpath)];
}

function path_split(path: string): string_array {
	local cpath = split(path, /\//);
	local ret_val: string_array;

	ret_val[2] = cpath[length(cpath)];
	delete cpath[length(cpath)];
	ret_val[1] = join_string_array("/", cpath);

	return ret_val;
}

-------------- next part --------------
##! Extracts the items from HTTP traffic, one per file.  At this time only 
##! the message body from the server can be extracted with this script.

module HTTP;

export {
	## Pattern of file mime types to extract from HTTP entity bodies.
	const extract_file_types = /NO_DEFAULT/ &redef;

	## The on-disk prefix for files to be extracted from HTTP entity bodies.
	const extraction_prefix = "http-item" &redef;
	const extraction_dir = "" &redef;
	const extract_requests = F &redef;
	const extract_responses = F &redef;

	redef record Info += {
		## This field can be set per-connection to determine if the entity body
		## will be extracted.  It must be set to T on or before the first 
		## entity_body_data event.
		extracting_file:  bool &default=F;
	
		## This is the holder for the file handle as the file is being written
		## to disk.
		extraction_file:  file &log &optional;

		request_file:	file &log &optional;
		response_file:	file &log &optional;
	};

	redef record State += {
		entity_bodies:    count &default=0;
	};
}

event http_entity_data(c: connection, is_orig: bool, length: count, data: string) &priority=5
	{
	# Client body extraction is not currently supported in this script.
	if ( ! c$http$first_chunk ) return;
	
	if ( c$http$first_chunk )
		{
		if ( is_orig && extract_requests ||
		     ! is_orig && (extract_responses || c$http?$mime_type && extract_file_types in c$http$mime_type) )
			{
			c$http$extracting_file = T;
			local suffix = fmt("%s_%d.dat", is_orig ? "orig" : "resp", ++c$http_state$entity_bodies);
			local fname = generate_extraction_filename2(extraction_dir, extraction_prefix, c, suffix);

			if ( extraction_dir != "" )
				mkdirs(path_dirname(fname));
			if ( is_orig ) {
				c$http$request_file = open(fname);
				enable_raw_output(c$http$request_file);
			} else {
				c$http$response_file = open(fname);
				enable_raw_output(c$http$response_file);
			}
		}

	if ( c$http$extracting_file )
		if ( is_orig )
			print c$http$request_file, data;
		else
			print c$http$response_file, data;
	}
}

event http_end_entity(c: connection, is_orig: bool)
	{
	if ( c$http$extracting_file )
		if ( is_orig && c$http?$request_file )
			close(c$http$request_file);
		else if ( c$http?$response_file )
			close(c$http$response_file);
	}


More information about the Bro mailing list