Generic Netlink used in OvS
Orion Electric Age

The netlink protocol is a socket-based Inter Process Communication (IPC) mechanism, based on RFC 3549, “Linux Netlink as an IP Services Protocol.” It provides a bidirectional communication channel between userspace and the kernel or among some parts of the kernel itself.

Appeared first in the 2.2 Linux kernel, an alternative to the awkward IOCTL communication method, but more flexiblity. The IOCTL handlers cannot send asynchronous messages to userspace from the kernel, and you have to define IOCTL numbers.

socket(AF_NETLINK, SOCK_RAW, NETLINK_GENERIC)

setsockopt(int fd, SOL_SOCKET, SO_RCVBUFFORCE, const void *optval, socklen_t optlen)

connect(int fd, struct sockaddr *, int len)

Send CTRL_CMD_GETFAMILY cmd to generic netlink id GENL_ID_CTRL with sendmsg().

1
2
3
4
static int ovs_datapath_family;
static int ovs_vport_family;
static int ovs_flow_family;
static int ovs_packet_family;

Corresponding with “generic netlink operations in OvS”.

With a word, the operations of netlink sock in userspace just as the unix or network socket’s operations, such a friendly design.

netlink_kernel_create(struct net *net, int unit, struct netlink_kernel_cfg *cfg)

genl_pernet_init()

(*input)(struct sk_buff *skb)
genl_rcv–>genl_rcv_msg

genlmsg_put()
genlmsg_unicast() /* mostly used */
genlmsg_multicast()

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
/**
* struct genl_ops - generic netlink operations
* @cmd: command identifier
* @internal_flags: flags used by the family
* @flags: flags
* @policy: attribute validation policy
* @doit: standard command callback
* @dumpit: callback for dumpers
* @done: completion callback for dumps
* @ops_list: operations list
*/
struct genl_ops {
const struct nla_policy *policy;
int (*doit)(struct sk_buff *skb,
struct genl_info *info);
int (*dumpit)(struct sk_buff *skb,
struct netlink_callback *cb);
int (*done)(struct netlink_callback *cb);
u8 cmd;
u8 internal_flags;
u8 flags;
};
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
/**
* struct nla_policy - attribute validation policy
* @type: Type of attribute or NLA_UNSPEC
* @len: Type specific length of payload
*
* Policies are defined as arrays of this struct, the array must be
* accessible by attribute type up to the highest identifier to be expected.
*
* Meaning of 'len' field:
* NLA_STRING Maximum length of string
* NLA_NUL_STRING Maximum length of string (excluding NUL)
* NLA_FLAG Unused
* NLA_BINARY Maximum length of attribute payload
* NLA_NESTED Don't use 'len' field -- length verification is
* done by checking len of nested header (or empty)
* NLA_NESTED_COMPAT Minimum length of structure payload
* NLA_U8, NLA_U16,
* NLA_U32, NLA_U64,
* NLA_S8, NLA_S16,
* NLA_S32, NLA_S64,
* NLA_MSECS Leaving the length field zero will verify the
* given type fits, using it verifies minimum length
* just like "All other"
* All other Minimum length of attribute payload
*
* Example:
* static const struct nla_policy my_policy[ATTR_MAX+1] = {
* [ATTR_FOO] = { .type = NLA_U16 },
* [ATTR_BAR] = { .type = NLA_STRING, .len = BARSIZ },
* [ATTR_BAZ] = { .len = sizeof(struct mystruct) },
* };
*/
struct nla_policy {
u16 type;
u16 len;
};
1
2
3
4
5
6
7
8
9
10
11
/* datapath operatons, new, del, dump and set */
struct genl_ops dp_datapath_genl_ops;

/* port operations, new, del, dump and set */
struct genl_ops dp_vport_genl_ops;

/* exact flow operations */
struct genl_ops dp_flow_genl_ops;

/* upcall packets handler */
struct genl_ops dp_packet_genl_ops;

One-to-one correspondence with netlink sock’s id.

layout illustrate

1
2
3
4
5
6
7
8
/*
* <------- NLA_HDRLEN ------> <-- NLA_ALIGN(payload)-->
* +---------------------+- - -+- - - - - - - - - -+- - -+
* | Header | Pad | Payload | Pad |
* | (struct nlattr) | ing | | ing |
* +---------------------+- - -+- - - - - - - - - -+- - -+
* <-------------- nlattr->nla_len -------------->
*/

Summary with flow chart

  • generic netlink flow chart in OvS
    generic netlink flow chart in OvS

Reference

generic netlink howto

1
2
3
4
5
6
7
8
.
├── kernel
│   └── netlink_instance.c
├── Makefile
├── pub
│   └── genl_ins_pub.h
└── user
└── user_netlink.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
#include <linux/kernel.h>
#include <linux/module.h>
#include <net/genetlink.h>

#include "../pub/genl_ins_pub.h"

/* attribute policy */
static struct nla_policy instance_genl_policy[INSTANCE_A_MAX + 1] = {
[INSTANCE_A_MSG] = { .type = NLA_NUL_STRING },
};

/* handler */
int instance_echo(struct sk_buff *skb, struct genl_info *info);

/* operation definition */
static struct genl_ops instance_genl_ops[INSTANCE_C_MAX] = {
{
.cmd = INSTANCE_C_ECHO,
.flags = 0,
.policy = instance_genl_policy,
.doit = instance_echo,
.dumpit = NULL,
},
};

/* family definition */
static struct genl_family instance_genl_family = {
.id = GENL_ID_GENERATE,
.hdrsize = 0,
.name = GENL_INSTANCE_NAME,
.version = 1,
.maxattr = INSTANCE_A_MAX,
.netnsok = true,
.parallel_ops = true,
.ops = instance_genl_ops,
.n_ops = ARRAY_SIZE(instance_genl_ops),
.module = THIS_MODULE,
};

int instance_echo(struct sk_buff *skb, struct genl_info *info)
{
struct nlattr *na;
struct sk_buff *skb_echo = NULL;
int rc;
void *msg_head;
char *mydata;

if (info == NULL) {
goto out;
}

/* For each attribute there is an index in info->attrs
* which points to a nlattr structure in this structure
* the data is given
*/
na = info->attrs[INSTANCE_A_MSG];
if (na) {
mydata = (char *) nla_data(na);
if (mydata == NULL) {
printk(KERN_ERR "error while receiving data\n");
} else {
/* shuld be validate the contend of data before print */
printk(KERN_INFO "received: %s\n", mydata);
}
} else {
printk(KERN_INFO "no info->attrs %i\n", INSTANCE_A_MSG);
}

/* Send a message back */
/* Allocate some memory, since the size is not
* yet known use NLMSG_GOODSIZE
*/
skb_echo = genlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
if (skb_echo == NULL) {
goto out;
}

/* Create the message headers */
/* arguments of genlmsg_put:
* struct sk_buff *,
* int (sending) pid,
* int sequence number,
* struct genl_family *,
* int flags,
* u8 command index (why do we need this?)
*/
msg_head = genlmsg_put(skb_echo, 0, info->snd_seq + 1,
&instance_genl_family, 0, INSTANCE_C_ECHO);
if (msg_head == NULL) {
rc = -ENOMEM;
goto out;
}

/* Add a INSTANCE_A_MSG attribute (actual value to be sent) */
rc = nla_put_string(skb_echo, INSTANCE_A_MSG,
"Hello world from kernel space");
if (rc != 0) {
goto out;
}

/* Finalize the message */
genlmsg_end(skb_echo, msg_head);

/* Send the message back */
rc = genlmsg_unicast(genl_info_net(info), skb_echo, info->snd_portid);
if (rc != 0) {
goto out;
}

return 0;

out:
if (!skb_echo) {
kfree_skb(skb_echo);
}
printk(KERN_ERR "an error occured in instance_echo\n");
return 0;
}

static int __init genl_instance_init(void)
{
int rc;

rc = genl_register_family(&instance_genl_family);
if (rc != 0) {
printk(KERN_ERR "register instance genl family fail, err %d\n", rc);
return rc;
}

printk(KERN_INFO "register %s genl family success\n", GENL_INSTANCE_NAME);

return 0;
}

static void genl_instance_uninit(void)
{
genl_unregister_family(&instance_genl_family);

printk(KERN_INFO "unregister %s genl family success\n", GENL_INSTANCE_NAME);
}

module_init(genl_instance_init);
module_exit(genl_instance_uninit);

MODULE_DESCRIPTION("generic netlink instance");
MODULE_LICENSE("GPL");
MODULE_VERSION("0.1");
MODULE_ALIAS_GENL_FAMILY(GENL_INSTANCE_NAME);
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
#include <stdio.h>
#include <stdint.h>
#include <stdlib.h>
#include <errno.h>
#include <unistd.h>
#include <poll.h>
#include <string.h>
#include <fcntl.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <sys/socket.h>
#include <sys/types.h>
#include <signal.h>

#include <linux/genetlink.h>

#include "../pub/genl_ins_pub.h"

/* Generic macros for dealing with netlink sockets. Might be duplicated
* elsewhere. It is recommended that commercial grade applications use
* libnl or libnetlink and use the interfaces provided by the library
*/
#define GENLMSG_DATA(gh) ((void *)(NLMSG_DATA(gh) + GENL_HDRLEN))
#define GENLMSG_PAYLOAD(gh) (NLMSG_PAYLOAD(gh, 0) - GENL_HDRLEN)
#define NLA_DATA(na) ((void *)((char*)(na) + NLA_HDRLEN))

#define MESSAGE_TO_KERNEL "Hello World!"

struct nl_sock
{
int fd;
uint32_t pid;

int protocol;
uint32_t seq_id;

int family_id;
const char *name;
};

/* memory for netlink request and response messages
* - headers are included
*/
struct gennl_ins_msg {
struct nlmsghdr nh;
struct genlmsghdr gh;
char user[256];
};

static struct nl_sock instance = { .fd = -1,
.name = GENL_INSTANCE_NAME };

int create_nl_sock(struct nl_sock *nl_sock, int protocol);
int lookup_gennl_family_id(struct nl_sock *nl_sock);
int gennl_interaction(struct nl_sock *nl_sock);

int main(char **argv, int argc)
{
do {
if (create_nl_sock(&instance, NETLINK_GENERIC) < 0) {
break;
}

if (gennl_interaction(&instance)) {
break;
}
} while (0);

if (instance.fd > 0) {
close(instance.fd);
instance.fd = -1;
}

return 0;
}

int create_nl_sock(struct nl_sock *nl_sock, int protocol)
{
int retval, rcvbuf = 1024 * 1024;
struct sockaddr_nl local, remote;

nl_sock->fd = socket(AF_NETLINK, SOCK_RAW, protocol);
if (nl_sock->fd < 0) {
fprintf(stderr, "create socket errno: %d\n", errno);
return errno;
}

nl_sock->protocol = protocol;
nl_sock->seq_id = 1;

if (setsockopt(nl_sock->fd, SOL_SOCKET, SO_RCVBUFFORCE,
&rcvbuf, sizeof rcvbuf)) {
printf("setting %d-bytes socket receive buffer failed, errno %d\n",
rcvbuf, errno);
}

/* Connect to kernel (pid 0) as remote address. */
memset(&remote, 0, sizeof remote);
remote.nl_family = AF_NETLINK;
remote.nl_pid = 0;

if (connect(nl_sock->fd, (struct sockaddr *) &remote,
sizeof(remote)) < 0) {
retval = errno;
fprintf(stderr, "connect sock fail, errno %d\n", errno);

goto error;
}

if (lookup_gennl_family_id(nl_sock)) {
retval = -1;
goto error;
}

return 0;

error:
if (nl_sock->fd >= 0) {
close(nl_sock->fd);
nl_sock->fd = -1;
}

return retval;
}

int lookup_gennl_family_id(struct nl_sock *nl_sock)
{
struct gennl_ins_msg request, reply;
struct nlattr *nl_na;
struct sockaddr_nl nl_address;
int length;

/* Step 1. prepare request msg */
/* netlink header */
request.nh.nlmsg_type = GENL_ID_CTRL;
request.nh.nlmsg_flags = NLM_F_REQUEST;
request.nh.nlmsg_seq = nl_sock->seq_id;
request.nh.nlmsg_pid = getpid();
request.nh.nlmsg_len = NLMSG_LENGTH(GENL_HDRLEN);

/* generic netlink header */
request.gh.cmd = CTRL_CMD_GETFAMILY;
request.gh.version = 0x2;

/* assemble attr */
nl_na = (struct nlattr *) GENLMSG_DATA(&request);
nl_na->nla_type = CTRL_ATTR_FAMILY_NAME;
nl_na->nla_len = strlen(GENL_INSTANCE_NAME) + 1 + NLA_HDRLEN;
strcpy(NLA_DATA(nl_na), GENL_INSTANCE_NAME);

request.nh.nlmsg_len += NLMSG_ALIGN(nl_na->nla_len);

/* Step 2. send request msg */
memset(&nl_address, 0, sizeof(nl_address));
nl_address.nl_family = AF_NETLINK;

length = sendto(nl_sock->fd, (char *) &request, request.nh.nlmsg_len,
0, (struct sockaddr *) &nl_address, sizeof(nl_address));
if (length != request.nh.nlmsg_len) {
fprintf(stderr, "%s sendto fail, %d\n", __func__, length);
return -1;
}

/* Step 3. receive reply msg */
length = recv(nl_sock->fd, &reply, sizeof(reply), 0);
if (length < 0) {
fprintf(stderr, "%s recv fail, %d\n", __func__, length);
return -1;
}

/* Step 4. validate&parse reply msg */
if (!NLMSG_OK((&reply.nh), length)) {
fprintf(stderr, "family ID request: invalid message\n");
return -1;
}
if (reply.nh.nlmsg_type == NLMSG_ERROR) {
fprintf(stderr, "family ID request: receive error\n");
return -1;
}

nl_na = (struct nlattr *) GENLMSG_DATA(&reply);
nl_na = (struct nlattr *) ((char *) nl_na + NLA_ALIGN(nl_na->nla_len));
if (nl_na->nla_type != CTRL_ATTR_FAMILY_ID) {
fprintf(stderr, "family ID request: receive nla type(%d) not match %d\n",
nl_na->nla_type, CTRL_ATTR_FAMILY_ID);
return -1;
}

nl_sock->family_id = *(__u16 *) NLA_DATA(nl_na);
printf("%s genric netlink id %d\n",
GENL_INSTANCE_NAME, nl_sock->family_id);

return 0;
}

int gennl_interaction(struct nl_sock *nl_sock)
{
struct gennl_ins_msg request, reply;
struct nlattr *nl_na;
struct sockaddr_nl nl_address;
int length;

memset(&request, 0, sizeof(request));
memset(&reply, 0, sizeof(reply));


request.nh.nlmsg_len = NLMSG_LENGTH(GENL_HDRLEN);
request.nh.nlmsg_type = nl_sock->family_id;
request.nh.nlmsg_flags = NLM_F_REQUEST;
request.nh.nlmsg_seq = 60;
request.nh.nlmsg_pid = getpid();

request.gh.cmd = INSTANCE_C_ECHO;
nl_na = (struct nlattr *) GENLMSG_DATA(&request);
nl_na->nla_type = INSTANCE_A_MSG;
nl_na->nla_len = sizeof(MESSAGE_TO_KERNEL) + NLA_HDRLEN;
memcpy(NLA_DATA(nl_na), MESSAGE_TO_KERNEL, sizeof(MESSAGE_TO_KERNEL));

request.nh.nlmsg_len += NLMSG_ALIGN(nl_na->nla_len);

memset(&nl_address, 0, sizeof(nl_address));
nl_address.nl_family = AF_NETLINK;

length = sendto(nl_sock->fd, (char *) &request, request.nh.nlmsg_len,
0, (struct sockaddr *) &nl_address, sizeof(nl_address));
if (length != request.nh.nlmsg_len) {
fprintf(stderr, "%s sento return %d, expect %d\n", __func__,
length, request.nh.nlmsg_len);
return -1;
}

length = recv(nl_sock->fd, &reply, sizeof(reply), 0);
if (length < 0) {
printf("%s recv error %d\n", __func__, length);
return -1;
}

if (!NLMSG_OK((&reply.nh), length)) {
fprintf(stderr, "%s recv invalid nlmsg\n", __func__);
return -1;
}

length = GENLMSG_PAYLOAD(&reply.nh);
nl_na = (struct nlattr *) GENLMSG_DATA(&reply);
printf("kernel replied: %s\n",(char *)NLA_DATA(nl_na));

return 0;
}

pub/genl_ins_pub.h

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
#define GENL_INSTANCE_NAME "gennl_ins"

#ifndef ARRAY_SIZE
#define ARRAY_SIZE(_x) ((sizeof(_x))/sizeof (_x)[0])
#endif

/* commands */
enum {
INSTANCE_C_UNSPEC,
INSTANCE_C_ECHO,
__INSTANCE_C_MAX,
};

#define INSTANCE_C_MAX (__INSTANCE_C_MAX - 1)

/* attributes */
enum {
INSTANCE_A_UNSPEC,
INSTANCE_A_MSG,
__INSTANCE_A_MAX,
};

#define INSTANCE_A_MAX (__INSTANCE_A_MAX - 1)

Makefile

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
# Example run in CentOS 7.4 with kernel 3.10.0-693.el7.x86_64
# Tabs maybe transfer to space, this should be check or correct after copy.

CONFIG_MODULE_SIG=n
obj-m += kernel/netlink_instance.o
nl_kern-objs := kernel/netlink_instance.o

all: kernel-build kernel-clean-temporary user-build
@tput setaf 3
@echo " done: all"
@tput sgr0
clean: kernel-clean user-clean
@tput setaf 3
@echo " done: clean"
@tput sgr0



kernel-build:
@tput setaf 1
@echo " kernel-build"
@tput sgr0
make -C /lib/modules/$(shell uname -r)/build M=$(PWD) modules
kernel-clean:
@tput setaf 1
@echo " kernel-clean"
@tput sgr0
make -C /lib/modules/$(shell uname -r)/build M=$(PWD) clean
kernel-clean-temporary:
@tput setaf 1
@echo " kernel-clean-temporary"
@tput sgr0
-rm -rf *.o *~ core .depend .*.cmd *.mod.c .tmp_versions
-rm -rf kernel/*.o kernel/*~ kernel/core kernel/.depend kernel/.*.cmd kernel/*.mod.c kernel/.tmp_versions
-rm -rf Module.symvers modules.order
kernel-module-install:
@tput setaf 1
@echo " kernel-module-install"
@tput sgr0
-sudo insmod netlink-instance.ko
kernel-module-uninstall:
@tput setaf 1
@echo " kernel-module-uninstall"
@tput sgr0
-sudo rmmod netlink-instance
kernel-clean-ring-buffer:
@tput setaf 1
@echo " kernel-clean-ring-buffer"
@tput sgr0
sudo dmesg -c > /dev/null



user-build:
@tput setaf 1
@echo " user-build"
@tput sgr0
gcc user/user_netlink.c -o genl_user
user-clean:
@tput setaf 1
@echo " user-clean"
@tput sgr0
rm -rf *.out genl_user