filebeat 收集nginx日志输出到kafka

kafka下载解压

 
 wget -c https://archive.apache.org/dist/kafka/2.2.0/kafka_2.12-2.2.0.tgz
 tar -zxf kafka_2.12-2.2.0.tgz

zk & kafka关键配置

config/zookeeper.properties

 
# the directory where the snapshot is stored.
dataDir=/app/kafka_2.12-2.2.0/zkData
# the port at which the clients will connect
clientPort=2181

config/server.properties

 
#侦听端口
listeners=PLAINTEXT://:9092
#外部连接地址
advertised.listeners=PLAINTEXT://10.10.10.49:9092
#kafka数据存储目录
log.dirs=/app/kafka_2.12-2.2.0/kafka-logs-data

启动zk & kafka

 
#zk
 ./bin/zookeeper-server-start.sh  -daemon config/zookeeper.properties
#kafka
 ./bin/kafka-server-start.sh  -daemon config/server.properties

kafka 生产消费测试

 
#生产
./bin/kafka-console-producer.sh --broker-list localhost:9092 --topic ceshi
>nihao
#消费
./bin/kafka-console-consumer.sh --bootstrap-server localhost:9092 --topic ceshi --from-beginning
nihao

filebeat下载解压

wget  -c https://artifacts.elastic.co/downloads/beats/filebeat/filebeat-7.5.1-linux-x86_64.tar.gz

filebeat关键配置

filebeat.yml

 
filebeat.inputs:
# Each - is an input. Most options can be set at the input level, so
# you can use different inputs for various configurations.
# Below are the input specific configurations.
- type: log 
  # Change to true to enable this input configuration. 
  enabled: true 
  tail_files: true 
  # Paths that should be crawled and fetched. Glob based paths. 
  paths: 
    - /usr/local/nginx/logs/access.log
output.kafka: 
  # Array of hosts to connect to. 
  enabled: true 
  hosts: ["10.10.10.49:9092"]
  topic: "accesslog" 
  # Optional protocol and basic auth credentials. 
  protocol: "http"

filebeat重启脚本

 
#!/bin/bash
 
kill -9 $(ps -ef |grep filebeat.yml|grep -v grep |awk '{print $2}')
 
rm -rf logs/filebeat.log
nohup ./filebeat -e -c filebeat.yml > logs/filebeat.log &

	wget -c https://archive.apache.org/dist/kafka/2.2.0/kafka_2.12-2.2.0.tgz
	tar -zxf kafka_2.12-2.2.0.tgz

	# the directory where the snapshot is stored.
	dataDir=/app/kafka_2.12-2.2.0/zkData
	# the port at which the clients will connect
	clientPort=2181

	#侦听端口
	listeners=PLAINTEXT://:9092
	#外部连接地址
	advertised.listeners=PLAINTEXT://10.10.10.49:9092
	#kafka数据存储目录
	log.dirs=/app/kafka_2.12-2.2.0/kafka-logs-data

	#zk
	./bin/zookeeper-server-start.sh -daemon config/zookeeper.properties
	#kafka
	./bin/kafka-server-start.sh -daemon config/server.properties

	#生产
	./bin/kafka-console-producer.sh --broker-list localhost:9092 --topic ceshi
	>nihao
	#消费
	./bin/kafka-console-consumer.sh --bootstrap-server localhost:9092 --topic ceshi --from-beginning
	nihao

	filebeat.inputs:
	# Each - is an input. Most options can be set at the input level, so
	# you can use different inputs for various configurations.
	# Below are the input specific configurations.
	- type: log
	# Change to true to enable this input configuration.
	enabled: true
	tail_files: true
	# Paths that should be crawled and fetched. Glob based paths.
	paths:
	- /usr/local/nginx/logs/access.log
	output.kafka:
	# Array of hosts to connect to.
	enabled: true
	hosts: ["10.10.10.49:9092"]
	topic: "accesslog"
	# Optional protocol and basic auth credentials.
	protocol: "http"

	#!/bin/bash

	kill -9 $(ps -ef \|grep filebeat.yml\|grep -v grep \|awk '{print $2}')

	rm -rf logs/filebeat.log
	nohup ./filebeat -e -c filebeat.yml > logs/filebeat.log &