[Bro] BRO Logger crashing due to large DNS log files

Ron McClellan Ron_McClellan at ao.uscourts.gov
Tue Aug 21 13:22:13 PDT 2018


Thanks Justin, here is the info.

Ron


######################Memory CPUI################################################################

[root@ ron]# free
              total        used        free      shared  buff/cache   available
Mem:      263620592   241469748      861800        4464    21289044    21038584
Swap:       4194300       80508     4113792

[root@ current]# cat /proc/cpuinfo |grep 'model name'|sort|uniq  -c
     72 model name      : Intel(R) Xeon(R) Gold 6140 CPU @ 2.30GHz


######################Perl Script OutputI################################################################

lag.value 500.000000

######################node.cfg################################################################

cat /usr/local/bro/etc/node.cfg
# Example BroControl node configuration.
#
# This example has a standalone node ready to go except for possibly changing
# the sniffing interface.

# This is a complete standalone configuration.  Most likely you will
# only need to change the interface.
#[bro]
#type=standalone
#host=localhost
#interface=eno1

## Below is an example clustered configuration. If you use this,
## remove the [bro] node above.

[logger]
type=logger
host=localhost
pin_cpus=3,4,5,6,7,8,9,10,11

[manager]
type=manager
host=localhost

[proxy-1]
type=proxy
host=localhost
#
[worker-1]
type=worker
host=localhost
interface=ens1f0
lb_method=pf_ring
lb_procs=48
pin_cpus=12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60


#[logger]
#type=logger
#host=localhost
#
#[manager]
#type=manager
#host=localhost
#
#[proxy-1]
#type=proxy
#host=localhost
#
#[worker-1]
#type=worker
#host=localhost
#interface=eth0
#
#[worker-2]
#type=worker
#host=localhost
#interface=eth0

##############################################################################




[root@ ron]# broctl doctor.bro
#################################################################
# Checking if many recent connections have a SAD or had history #
#################################################################
error: No conn log files in the past day???


################################################
# Checking for recent capture_loss.log entries #
################################################
Capture loss stats:
worker=worker-1-1 loss_count=107 noloss_count=0 min_loss=0.219585 max_loss=2.807555 overall_loss=0.736212145529
worker=worker-1-10 loss_count=107 noloss_count=0 min_loss=0.219585 max_loss=2.804896 overall_loss=0.73353383007
worker=worker-1-11 loss_count=102 noloss_count=0 min_loss=0.236097 max_loss=2.803468 overall_loss=0.747731387284
worker=worker-1-12 loss_count=111 noloss_count=0 min_loss=0.225453 max_loss=2.810418 overall_loss=0.713610666693
worker=worker-1-13 loss_count=106 noloss_count=0 min_loss=0.225238 max_loss=2.805285 overall_loss=0.737752297911
worker=worker-1-14 loss_count=103 noloss_count=0 min_loss=0.235811 max_loss=2.803339 overall_loss=0.75462047398
worker=worker-1-15 loss_count=113 noloss_count=0 min_loss=0.232239 max_loss=2.836904 overall_loss=0.723967320858
worker=worker-1-16 loss_count=107 noloss_count=0 min_loss=0.241669 max_loss=2.843484 overall_loss=0.756960461224
worker=worker-1-17 loss_count=103 noloss_count=0 min_loss=0.239883 max_loss=2.840825 overall_loss=0.763862024976
worker=worker-1-18 loss_count=113 noloss_count=0 min_loss=0.239311 max_loss=2.839503 overall_loss=0.737882275017
worker=worker-1-19 loss_count=107 noloss_count=0 min_loss=0.225381 max_loss=2.840043 overall_loss=0.744923084459
worker=worker-1-2 loss_count=103 noloss_count=0 min_loss=0.219585 max_loss=2.807167 overall_loss=0.749798789791
worker=worker-1-20 loss_count=103 noloss_count=0 min_loss=0.224953 max_loss=2.837683 overall_loss=0.758886002296
worker=worker-1-21 loss_count=113 noloss_count=0 min_loss=0.240954 max_loss=2.844588 overall_loss=0.735745101629
worker=worker-1-22 loss_count=105 noloss_count=0 min_loss=0.235882 max_loss=2.842643 overall_loss=0.749374186886
worker=worker-1-23 loss_count=103 noloss_count=0 min_loss=0.242026 max_loss=2.83619 overall_loss=0.767731838409
worker=worker-1-24 loss_count=110 noloss_count=0 min_loss=0.235739 max_loss=2.838526 overall_loss=0.72491227295
worker=worker-1-25 loss_count=107 noloss_count=0 min_loss=0.240525 max_loss=2.829691 overall_loss=0.749225792647
worker=worker-1-26 loss_count=103 noloss_count=0 min_loss=0.24124 max_loss=2.829044 overall_loss=0.762315842219
worker=worker-1-27 loss_count=114 noloss_count=0 min_loss=0.241884 max_loss=2.818178 overall_loss=0.729974786019
worker=worker-1-28 loss_count=107 noloss_count=0 min_loss=0.225381 max_loss=2.83352 overall_loss=0.74448587852
worker=worker-1-29 loss_count=103 noloss_count=0 min_loss=0.239525 max_loss=2.803074 overall_loss=0.757284890628
worker=worker-1-3 loss_count=114 noloss_count=0 min_loss=0.219728 max_loss=2.80548 overall_loss=0.719031297562
worker=worker-1-30 loss_count=111 noloss_count=0 min_loss=0.235454 max_loss=2.833323 overall_loss=0.728580085593
worker=worker-1-31 loss_count=104 noloss_count=0 min_loss=0.249393 max_loss=2.802944 overall_loss=0.756920450524
worker=worker-1-32 loss_count=102 noloss_count=0 min_loss=0.232239 max_loss=2.823435 overall_loss=0.746727375956
worker=worker-1-33 loss_count=114 noloss_count=0 min_loss=0.245892 max_loss=2.823399 overall_loss=0.736399137185
worker=worker-1-34 loss_count=106 noloss_count=0 min_loss=0.222024 max_loss=2.813374 overall_loss=0.734090574376
worker=worker-1-35 loss_count=103 noloss_count=0 min_loss=0.219738 max_loss=2.830536 overall_loss=0.75132105636
worker=worker-1-36 loss_count=114 noloss_count=0 min_loss=0.242244 max_loss=2.810973 overall_loss=0.732583490263
worker=worker-1-37 loss_count=107 noloss_count=0 min_loss=0.219657 max_loss=2.804831 overall_loss=0.732532430144
worker=worker-1-38 loss_count=103 noloss_count=0 min_loss=0.219585 max_loss=2.808788 overall_loss=0.747543372035
worker=worker-1-39 loss_count=114 noloss_count=0 min_loss=0.219585 max_loss=2.803075 overall_loss=0.716905805628
worker=worker-1-4 loss_count=107 noloss_count=0 min_loss=0.219728 max_loss=2.808918 overall_loss=0.736058042389
worker=worker-1-40 loss_count=107 noloss_count=0 min_loss=0.219585 max_loss=2.8021 overall_loss=0.73236088495
worker=worker-1-41 loss_count=103 noloss_count=0 min_loss=0.219585 max_loss=2.802619 overall_loss=0.746214054901
worker=worker-1-42 loss_count=113 noloss_count=0 min_loss=0.219442 max_loss=2.809179 overall_loss=0.716488984394
worker=worker-1-43 loss_count=107 noloss_count=0 min_loss=0.219442 max_loss=2.799245 overall_loss=0.732172697609
worker=worker-1-44 loss_count=103 noloss_count=0 min_loss=0.219513 max_loss=2.799373 overall_loss=0.744807113628
worker=worker-1-45 loss_count=114 noloss_count=0 min_loss=0.219441 max_loss=2.80574 overall_loss=0.716654868989
worker=worker-1-46 loss_count=107 noloss_count=0 min_loss=0.219585 max_loss=2.806646 overall_loss=0.732673849923
error: worker=worker-1-47 loss_count=42 noloss_count=0 min_loss=0.252768 max_loss=2.803858 overall_loss=1.00735434535
worker=worker-1-48 loss_count=55 noloss_count=0 min_loss=0.252767 max_loss=2.803207 overall_loss=0.874731142836
error: worker=worker-1-49 loss_count=20 noloss_count=0 min_loss=0.827054 max_loss=2.806194 overall_loss=1.3206719637
worker=worker-1-5 loss_count=103 noloss_count=0 min_loss=0.219585 max_loss=2.809826 overall_loss=0.749445632038
error: worker=worker-1-50 loss_count=20 noloss_count=0 min_loss=0.828338 max_loss=2.807944 overall_loss=1.32179609476
worker=worker-1-6 loss_count=114 noloss_count=0 min_loss=0.219585 max_loss=2.810032 overall_loss=0.720953328173
worker=worker-1-7 loss_count=107 noloss_count=0 min_loss=0.219442 max_loss=2.803858 overall_loss=0.733033867296
worker=worker-1-8 loss_count=103 noloss_count=0 min_loss=0.219585 max_loss=2.809439 overall_loss=0.749667466957
worker=worker-1-9 loss_count=114 noloss_count=0 min_loss=0.219585 max_loss=2.804442 overall_loss=0.718187501869


################################################################
# Checking what percentage of recent tcp connections show loss #
################################################################
error: No conn log files in the past day???


###################################################################
# Checking if connections are unevenly distributed across workers #
###################################################################
error: No conn log files in the past day???


###############################################################################################################################
# Checking if anything is in the deprecated local-logger.bro, local-manager.bro, local-proxy.bro, or local-worker.bro scripts #
###############################################################################################################################
Nothing found


######################################################################
# Checking if any recent connections have been logged multiple times #
######################################################################
error: No conn log files in the past day???


############################################################################
# Checking what percentage of recent tcp connections are remote to remote. #
############################################################################
error: No conn log files in the past day???


###############################################################################
# Checking if bro is linked against a custom malloc like tcmalloc or jemalloc #
###############################################################################
error: configured to use a custom malloc=False


##################################
# Checking pf_ring configuration #
##################################
error: bro binary on node worker-1-1 is neither linked against pf_ring libpcap or using the bro pf_ring plugin
error: bro binary on node worker-1-2 is neither linked against pf_ring libpcap or using the bro pf_ring plugin
error: bro binary on node worker-1-3 is neither linked against pf_ring libpcap or using the bro pf_ring plugin
error: bro binary on node worker-1-4 is neither linked against pf_ring libpcap or using the bro pf_ring plugin
error: bro binary on node worker-1-5 is neither linked against pf_ring libpcap or using the bro pf_ring plugin
error: bro binary on node worker-1-6 is neither linked against pf_ring libpcap or using the bro pf_ring plugin
error: bro binary on node worker-1-7 is neither linked against pf_ring libpcap or using the bro pf_ring plugin
error: bro binary on node worker-1-8 is neither linked against pf_ring libpcap or using the bro pf_ring plugin
error: bro binary on node worker-1-9 is neither linked against pf_ring libpcap or using the bro pf_ring plugin
error: bro binary on node worker-1-10 is neither linked against pf_ring libpcap or using the bro pf_ring plugin
error: bro binary on node worker-1-11 is neither linked against pf_ring libpcap or using the bro pf_ring plugin
error: bro binary on node worker-1-12 is neither linked against pf_ring libpcap or using the bro pf_ring plugin
error: bro binary on node worker-1-13 is neither linked against pf_ring libpcap or using the bro pf_ring plugin
error: bro binary on node worker-1-14 is neither linked against pf_ring libpcap or using the bro pf_ring plugin
error: bro binary on node worker-1-15 is neither linked against pf_ring libpcap or using the bro pf_ring plugin
error: bro binary on node worker-1-16 is neither linked against pf_ring libpcap or using the bro pf_ring plugin
error: bro binary on node worker-1-17 is neither linked against pf_ring libpcap or using the bro pf_ring plugin
error: bro binary on node worker-1-18 is neither linked against pf_ring libpcap or using the bro pf_ring plugin
error: bro binary on node worker-1-19 is neither linked against pf_ring libpcap or using the bro pf_ring plugin
error: bro binary on node worker-1-20 is neither linked against pf_ring libpcap or using the bro pf_ring plugin
error: bro binary on node worker-1-21 is neither linked against pf_ring libpcap or using the bro pf_ring plugin
error: bro binary on node worker-1-22 is neither linked against pf_ring libpcap or using the bro pf_ring plugin
error: bro binary on node worker-1-23 is neither linked against pf_ring libpcap or using the bro pf_ring plugin
error: bro binary on node worker-1-24 is neither linked against pf_ring libpcap or using the bro pf_ring plugin
error: bro binary on node worker-1-25 is neither linked against pf_ring libpcap or using the bro pf_ring plugin
error: bro binary on node worker-1-26 is neither linked against pf_ring libpcap or using the bro pf_ring plugin
error: bro binary on node worker-1-27 is neither linked against pf_ring libpcap or using the bro pf_ring plugin
error: bro binary on node worker-1-28 is neither linked against pf_ring libpcap or using the bro pf_ring plugin
error: bro binary on node worker-1-29 is neither linked against pf_ring libpcap or using the bro pf_ring plugin
error: bro binary on node worker-1-30 is neither linked against pf_ring libpcap or using the bro pf_ring plugin
error: bro binary on node worker-1-31 is neither linked against pf_ring libpcap or using the bro pf_ring plugin
error: bro binary on node worker-1-32 is neither linked against pf_ring libpcap or using the bro pf_ring plugin
error: bro binary on node worker-1-33 is neither linked against pf_ring libpcap or using the bro pf_ring plugin
error: bro binary on node worker-1-34 is neither linked against pf_ring libpcap or using the bro pf_ring plugin
error: bro binary on node worker-1-35 is neither linked against pf_ring libpcap or using the bro pf_ring plugin
error: bro binary on node worker-1-36 is neither linked against pf_ring libpcap or using the bro pf_ring plugin
error: bro binary on node worker-1-37 is neither linked against pf_ring libpcap or using the bro pf_ring plugin
error: bro binary on node worker-1-38 is neither linked against pf_ring libpcap or using the bro pf_ring plugin
error: bro binary on node worker-1-39 is neither linked against pf_ring libpcap or using the bro pf_ring plugin
error: bro binary on node worker-1-40 is neither linked against pf_ring libpcap or using the bro pf_ring plugin
error: bro binary on node worker-1-41 is neither linked against pf_ring libpcap or using the bro pf_ring plugin
error: bro binary on node worker-1-42 is neither linked against pf_ring libpcap or using the bro pf_ring plugin
error: bro binary on node worker-1-43 is neither linked against pf_ring libpcap or using the bro pf_ring plugin
error: bro binary on node worker-1-44 is neither linked against pf_ring libpcap or using the bro pf_ring plugin
error: bro binary on node worker-1-45 is neither linked against pf_ring libpcap or using the bro pf_ring plugin
error: bro binary on node worker-1-46 is neither linked against pf_ring libpcap or using the bro pf_ring plugin
error: bro binary on node worker-1-47 is neither linked against pf_ring libpcap or using the bro pf_ring plugin
error: bro binary on node worker-1-48 is neither linked against pf_ring libpcap or using the bro pf_ring plugin
error: configured to use pf_ring=True pcap=False plugin=False


############################################
# Checking for recent reporter.log entries #
############################################
error: Found 48 reporter log files in the past 7 days
Recent reporter.log messages:
Reporter::INFO processing suspended
Reporter::INFO processing suspended
446 duplicate messages suppressed
Reporter::INFO processing continued
Reporter::INFO processing continued
533 duplicate messages suppressed
Reporter::INFO received termination signal
Reporter::INFO received termination signal
15 duplicate messages suppressed




-----Original Message-----
From: Azoff, Justin S <jazoff at illinois.edu> 
Sent: Tuesday, August 21, 2018 4:08 PM
To: Ron McClellan <Ron_McClellan at ao.uscourts.gov>
Cc: bro at bro.org
Subject: Re: [Bro] BRO Logger crashing due to large DNS log files


> On Aug 21, 2018, at 2:53 PM, Ron McClellan <Ron_McClellan at ao.uscourts.gov> wrote:
> 
> Justin,
> 
> 	The first 5 lines are consistent, the last 2 lines the first time seen were today.  Crash report wasn't very useful (see below), diag was pretty much the same.  Hopefully the OOM message helps.
> 
> Ron
> 
> 
> Aug 21 09:45:18 aosoc kernel: Out of memory: Kill process 6610 (bro) 
> score 507 or sacrifice child Aug 21 09:45:18 aosoc kernel: Killed 
> process 6610 (bro) total-vm:139995144kB, anon-rss:137467264kB, 
> file-rss:0kB, shmem-rss:0kB Aug 21 11:32:23 aosoc kernel: bro invoked 
> oom-killer: gfp_mask=0x201da, order=0, oom_score_adj=0 Aug 21 11:32:23 
> aosoc kernel: bro cpuset=/ mems_allowed=0-1 Aug 21 11:32:23 aosoc 
> kernel: CPU: 57 PID: 21655 Comm: bro Kdump: loaded Not tainted 
> 3.10.0-862.11.6.el7.x86_64 #1 Aug 21 11:32:23 aosoc kernel: Out of 
> memory: Kill process 20158 (bro) score 544 or sacrifice child Aug 21 
> 11:32:23 aosoc kernel: Killed process 20158 (bro) 
> total-vm:150275592kB, anon-rss:147621508kB, file-rss:0kB, 
> shmem-rss:0kB

Ah, this is great.. well, not great in that it is crashing but great in that now we know what is wrong: You're running out of ram.

So you said you had 256GB, which should normally be more than enough as long as everything is working properly, but I have a feeling some things are not working quite right though.

Have you had a chance to run that python program I posted?  If you have a high amount of log lag, something is not keeping up well.

Do you have any graphs of memory usage on your host?

What exactly does this output:

$ cat /proc/cpuinfo |grep 'model name'|sort|uniq  -c
     40 model name      : Intel(R) Xeon(R) CPU E5-2470 v2 @ 2.40GHz


The fact that you are seeing 

34264380 dns_unmatched_msg
16696030 dns_unmatched_reply
 62288 possible_split_routing
 59512 data_before_established

in your weird.log points to something being very very wrong with your traffic.  This can cause bro to work many times harder than it needs to.

How is your load balancing setup in your node.cfg?

Can you try running bro-doctor from bro-pkg: https://packages.bro.org/packages/view/74d45e8c-4fb7-11e8-88be-0a645a3f3086

If you can't run bro-pkg you just need to grab 

https://raw.githubusercontent.com/ncsa/bro-doctor/master/doctor.py

and drop it in

/usr/local/bro/lib/broctl/plugins/doctor.py

and run broctl doctor.bro



—
Justin Azoff




More information about the Bro mailing list