fc_sort: initial commit

Ordering matters in fc files; the last match wins. In builds where
many BOARD_SEPOLICY_DIRS are set, the order of that list becomes
increasingly important in order to maintain a cohesive built
file_contexts.

To correct this, we sort the device specific file_contexts entries
with the upstream fc_sort tool.

Change-Id: Id79cc6f434c41179d5c0d0d739c4718918b0b1dc
Signed-off-by: William Roberts <william.c.roberts@intel.com>
diff --git a/Android.mk b/Android.mk
index 7fc03a4..569ef9f 100644
--- a/Android.mk
+++ b/Android.mk
@@ -26,10 +26,14 @@
 LOCAL_ADDITIONAL_M4DEFS := $(addprefix -D, $(BOARD_SEPOLICY_M4DEFS))
 endif
 
-# Builds paths for all policy files found in BOARD_SEPOLICY_DIRS.
+# Builds paths for all policy files found in BOARD_SEPOLICY_DIRS and the LOCAL_PATH.
 # $(1): the set of policy name paths to build
 build_policy = $(foreach type, $(1), $(foreach file, $(addsuffix /$(type), $(LOCAL_PATH) $(BOARD_SEPOLICY_DIRS)), $(sort $(wildcard $(file)))))
 
+# Builds paths for all policy files found in BOARD_SEPOLICY_DIRS.
+# $(1): the set of policy name paths to build
+build_device_policy = $(foreach type, $(1), $(foreach file, $(addsuffix /$(type), $(BOARD_SEPOLICY_DIRS)), $(sort $(wildcard $(file)))))
+
 # Add a file containing only a newline in-between each policy configuration
 # 'contexts' file. This will allow OEM policy configuration files without a
 # final newline (0x0A) to be built correctly by the m4(1) macro processor.
@@ -178,30 +182,68 @@
 
 include $(BUILD_SYSTEM)/base_rules.mk
 
-all_fc_files := file_contexts
-ifneq ($(filter address,$(SANITIZE_TARGET)),)
-  all_fc_files := $(all_fc_files) file_contexts_asan
-endif
-all_fc_files := $(call build_policy, $(all_fc_files))
-all_fcfiles_with_nl := $(call add_nl, $(all_fc_files), $(built_nl))
+# The file_contexts.bin is built in the following way:
+# 1. Collect all file_contexts files in THIS repository and process them with
+#    m4 into a tmp file called file_contexts.local.tmp.
+# 2. Collect all device specific file_contexts files and process them with m4
+#    into a tmp file called file_contexts.device.tmp.
+# 3. Run checkfc and fc_sort on file_contexts.device.tmp and output to
+#    file_contexts.device.sorted.tmp.
+# 4. Concatenate file_contexts.local.tmp and file_contexts.device.tmp into
+#    file_contexts.concat.tmp.
+# 5. Run checkfc and sefcontext_compile on file_contexts.concat.tmp to produce
+#    file_contexts.bin.
+#
+#  Note: That a newline file is placed between each file_context file found to
+#        ensure a proper build when an fc file is missing an ending newline.
 
-file_contexts.tmp := $(intermediates)/file_contexts.tmp
-$(file_contexts.tmp): PRIVATE_FC_FILES := $(all_fcfiles_with_nl)
-$(file_contexts.tmp): PRIVATE_ADDITIONAL_M4DEFS := $(LOCAL_ADDITIONAL_M4DEFS)
-$(file_contexts.tmp): $(all_fcfiles_with_nl)
+local_fc_files := $(LOCAL_PATH)/file_contexts
+ifneq ($(filter address,$(SANITIZE_TARGET)),)
+  local_fc_files := $(local_fc_files) $(LOCAL_PATH)/file_contexts_asan
+endif
+local_fcfiles_with_nl := $(call add_nl, $(local_fc_files), $(built_nl))
+
+file_contexts.local.tmp := $(intermediates)/file_contexts.local.tmp
+$(file_contexts.local.tmp): $(local_fcfiles_with_nl)
 	@mkdir -p $(dir $@)
-	$(hide) m4 -s $(PRIVATE_ADDITIONAL_M4DEFS) $(PRIVATE_FC_FILES) > $@
+	$(hide) m4 -s $^ > $@
+
+device_fc_files := $(call build_device_policy, file_contexts)
+device_fcfiles_with_nl := $(call add_nl, $(device_fc_files), $(built_nl))
+
+file_contexts.device.tmp := $(intermediates)/file_contexts.device.tmp
+$(file_contexts.device.tmp): PRIVATE_ADDITIONAL_M4DEFS := $(LOCAL_ADDITIONAL_M4DEFS)
+$(file_contexts.device.tmp): $(device_fcfiles_with_nl)
+	@mkdir -p $(dir $@)
+	$(hide) m4 -s $(PRIVATE_ADDITIONAL_M4DEFS) $^ > $@
+
+file_contexts.device.sorted.tmp := $(intermediates)/file_contexts.device.sorted.tmp
+$(file_contexts.device.sorted.tmp): PRIVATE_SEPOLICY := $(built_sepolicy)
+$(file_contexts.device.sorted.tmp): $(file_contexts.device.tmp) $(built_sepolicy) $(HOST_OUT_EXECUTABLES)/fc_sort $(HOST_OUT_EXECUTABLES)/checkfc
+	@mkdir -p $(dir $@)
+	$(hide) $(HOST_OUT_EXECUTABLES)/checkfc $(PRIVATE_SEPOLICY) $<
+	$(hide) $(HOST_OUT_EXECUTABLES)/fc_sort $< $@
+
+file_contexts.concat.tmp := $(intermediates)/file_contexts.concat.tmp
+$(file_contexts.concat.tmp): $(file_contexts.local.tmp) $(file_contexts.device.sorted.tmp)
+	@mkdir -p $(dir $@)
+	$(hide) m4 -s $^ > $@
 
 $(LOCAL_BUILT_MODULE): PRIVATE_SEPOLICY := $(built_sepolicy)
-$(LOCAL_BUILT_MODULE): $(file_contexts.tmp) $(built_sepolicy) $(HOST_OUT_EXECUTABLES)/sefcontext_compile $(HOST_OUT_EXECUTABLES)/checkfc
+$(LOCAL_BUILT_MODULE): $(file_contexts.concat.tmp) $(built_sepolicy) $(HOST_OUT_EXECUTABLES)/sefcontext_compile $(HOST_OUT_EXECUTABLES)/checkfc
 	@mkdir -p $(dir $@)
 	$(hide) $(HOST_OUT_EXECUTABLES)/checkfc $(PRIVATE_SEPOLICY) $<
 	$(hide) $(HOST_OUT_EXECUTABLES)/sefcontext_compile -o $@ $<
 
 built_fc := $(LOCAL_BUILT_MODULE)
-all_fc_files :=
-all_fcfiles_with_nl :=
-file_contexts.tmp :=
+local_fc_files :=
+local_fcfiles_with_nl :=
+device_fc_files :=
+device_fcfiles_with_nl :=
+file_contexts.concat.tmp :=
+file_contexts.device.sorted.tmp :=
+file_contexts.device.tmp :=
+file_contexts.local.tmp :=
 
 ##################################
 include $(CLEAR_VARS)
@@ -427,6 +469,7 @@
 ##################################
 
 build_policy :=
+build_device_policy :=
 sepolicy_build_files :=
 built_sepolicy :=
 built_sepolicy_recovery :=
diff --git a/tools/fc_sort/Android.mk b/tools/fc_sort/Android.mk
new file mode 100644
index 0000000..f78d550
--- /dev/null
+++ b/tools/fc_sort/Android.mk
@@ -0,0 +1,12 @@
+LOCAL_PATH:= $(call my-dir)
+
+include $(CLEAR_VARS)
+
+LOCAL_MODULE := fc_sort
+LOCAL_MODULE_TAGS := optional
+LOCAL_SRC_FILES := fc_sort.c
+LOCAL_CXX_STL := none
+
+include $(BUILD_HOST_EXECUTABLE)
+
+###################################
diff --git a/tools/fc_sort/MODULE_LICENSE_GPL b/tools/fc_sort/MODULE_LICENSE_GPL
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/tools/fc_sort/MODULE_LICENSE_GPL
diff --git a/tools/fc_sort/README b/tools/fc_sort/README
new file mode 100644
index 0000000..0210dc7
--- /dev/null
+++ b/tools/fc_sort/README
@@ -0,0 +1,9 @@
+fc_sort is a tool used for sorting the file_contexts entries based on a heuristic that is
+ covered by a Fedora document. That document can be found here:
+ * https://fedoraproject.org/wiki/SELinux/ManagingFileContext
+
+The tool itself originates from:
+ * https://github.com/TresysTechnology/refpolicy
+
+It can be updated to the current tip of master branch with the below command:
+$ wget https://raw.githubusercontent.com/TresysTechnology/refpolicy/master/support/fc_sort.c
diff --git a/tools/fc_sort/fc_sort.c b/tools/fc_sort/fc_sort.c
new file mode 100644
index 0000000..f4d2cd0
--- /dev/null
+++ b/tools/fc_sort/fc_sort.c
@@ -0,0 +1,567 @@
+/* Copyright 2005,2013 Tresys Technology
+ *
+ * Some parts of this came from matchpathcon.c in libselinux
+ */
+
+/* PURPOSE OF THIS PROGRAM
+ * The original setfiles sorting algorithm did not take into
+ * account regular expression specificity. With the current
+ * strict and targeted policies this is not an issue because
+ * the file contexts are partially hand sorted and concatenated
+ * in the right order so that the matches are generally correct.
+ * The way reference policy and loadable policy modules handle
+ * file contexts makes them come out in an unpredictable order
+ * and therefore setfiles (or this standalone tool) need to sort
+ * the regular expressions in a deterministic and stable way.
+ */
+
+#define BUF_SIZE 4096;
+#define _GNU_SOURCE
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <ctype.h>
+
+typedef unsigned char bool_t;
+
+/* file_context_node
+ * A node used in a linked list of file contexts.c
+ * Each node contains the regular expression, the type and
+ *  the context, as well as information about the regular
+ *  expression. The regular expression data (meta, stem_len
+ *  and str_len) can be filled in by using the fc_fill_data
+ *  function after the regular expression has been loaded.
+ * next points to the next node in the linked list.
+ */
+typedef struct file_context_node {
+	char *path;
+	char *file_type;
+	char *context;
+	bool_t meta;
+	int stem_len;
+	int str_len;
+	struct file_context_node *next;
+} file_context_node_t;
+
+void file_context_node_destroy(file_context_node_t *x)
+{
+	free(x->path);
+	free(x->file_type);
+	free(x->context);
+}
+
+
+
+/* file_context_bucket
+ * A node used in a linked list of buckets that contain
+ *  file_context_node's.
+ * Each node contains a pointer to a file_context_node which
+ *  is the header of its linked list. This linked list is the
+ *  content of this bucket.
+ * next points to the next bucket in the linked list.
+ */
+typedef struct file_context_bucket {
+	file_context_node_t *data;
+	struct file_context_bucket *next;
+} file_context_bucket_t;
+
+
+
+/* fc_compare
+ * Compares two file contexts' regular expressions and returns:
+ *    -1 if a is less specific than b
+ *     0 if a and be are equally specific
+ *     1 if a is more specific than b
+ * The comparison is based on the following statements,
+ *  in order from most important to least important, given a and b:
+ *     If a is a regular expression and b is not,
+ *      -> a is less specific than b.
+ *     If a's stem length is shorter than b's stem length,
+ *      -> a is less specific than b.
+ *     If a's string length is shorter than b's string length,
+ *      -> a is less specific than b.
+ *     If a does not have a specified type and b does,
+ *      -> a is less specific than b.
+ */
+int fc_compare(file_context_node_t *a, file_context_node_t *b)
+{
+	/* Check to see if either a or b have meta characters
+	 *  and the other doesn't. */
+	if (a->meta && !b->meta)
+		return -1;
+	if (b->meta && !a->meta)
+		return 1;
+
+	/* Check to see if either a or b have a shorter stem
+	 *  length than the other. */
+	if (a->stem_len < b->stem_len)
+		return -1;
+	if (b->stem_len < a->stem_len)
+		return 1;
+
+	/* Check to see if either a or b have a shorter string
+	 *  length than the other. */
+	if (a->str_len < b->str_len)
+		return -1;
+	if (b->str_len < a->str_len)
+		return 1;
+
+	/* Check to see if either a or b has a specified type
+	 *  and the other doesn't. */
+	if (!a->file_type && b->file_type)
+		return -1;
+	if (!b->file_type && a->file_type)
+		return 1;
+
+	/* If none of the above conditions were satisfied,
+	 * then a and b are equally specific. */
+	return 0;
+}
+
+
+
+/* fc_merge
+ * Merges two sorted file context linked lists into one
+ *  sorted one.
+ * Pass two lists a and b, and after the completion of fc_merge,
+ *  the final list is contained in a, and b is empty.
+ */
+file_context_node_t *fc_merge(file_context_node_t *a,
+				   file_context_node_t *b)
+{
+	file_context_node_t *a_current;
+	file_context_node_t *b_current;
+	file_context_node_t *temp;
+	file_context_node_t *jumpto;
+
+
+
+	/* If a is a empty list, and b is not,
+	 *  set a as b and proceed to the end. */
+	if (!a && b)
+		a = b;
+	/* If b is an empty list, leave a as it is. */
+	else if (!b) {
+	} else {
+		/* Make it so the list a has the lesser
+		 *  first element always. */
+		if (fc_compare(a, b) == 1) {
+			temp = a;
+			a = b;
+			b = temp;
+		}
+		a_current = a;
+		b_current = b;
+
+		/* Merge by inserting b's nodes in between a's nodes. */
+		while (a_current->next && b_current) {
+			jumpto = a_current->next;
+
+			/* Insert b's nodes in between the current a node
+			 *  and the next a node.*/
+			while (b_current && a_current->next &&
+			       fc_compare(a_current->next,
+					  b_current) != -1) {
+
+
+				temp = a_current->next;
+				a_current->next = b_current;
+				b_current = b_current->next;
+				a_current->next->next = temp;
+				a_current = a_current->next;
+			}
+
+			/* Skip all the inserted node from b to the
+			 *  next node in the original a. */
+			a_current = jumpto;
+		}
+
+
+		/* if there is anything left in b to be inserted,
+		   put it on the end */
+		if (b_current) {
+			a_current->next = b_current;
+		}
+	}
+
+	return a;
+}
+
+
+
+/* fc_merge_sort
+ * Sorts file contexts from least specific to more specific.
+ * The bucket linked list is passed and after the completion
+ *  of the fc_merge_sort function, there is only one bucket
+ *  (pointed to by master) that contains a linked list
+ *  of all the file contexts, in sorted order.
+ * Explanation of the algorithm:
+ *  The algorithm implemented in fc_merge_sort is an iterative
+ *   implementation of merge sort.
+ *  At first, each bucket has a linked list of file contexts
+ *   that are 1 element each.
+ *  Each pass, each odd numbered bucket is merged into the bucket
+ *   before it. This halves the number of buckets each pass.
+ *  It will continue passing over the buckets (as described above)
+ *   until there is only  one bucket left, containing the list of
+ *   file contexts, sorted.
+ */
+void fc_merge_sort(file_context_bucket_t *master)
+{
+
+
+	file_context_bucket_t *current;
+	file_context_bucket_t *temp;
+
+	/* Loop until master is the only bucket left
+	 * so that this will stop when master contains
+	 * the sorted list. */
+	while (master->next) {
+		current = master;
+
+		/* This loop merges buckets two-by-two. */
+		while (current) {
+
+			if (current->next) {
+
+				current->data =
+				    fc_merge(current->data,
+					     current->next->data);
+
+
+
+				temp = current->next;
+				current->next = current->next->next;
+
+				free(temp);
+
+			}
+
+
+			current = current->next;
+		}
+	}
+
+
+}
+
+
+
+/* fc_fill_data
+ * This processes a regular expression in a file context
+ *  and sets the data held in file_context_node, namely
+ *  meta, str_len and stem_len.
+ * The following changes are made to fc_node after the
+ *  the completion of the function:
+ *     fc_node->meta =		1 if path has a meta character, 0 if not.
+ *     fc_node->str_len =	The string length of the entire path
+ *     fc_node->stem_len = 	The number of characters up until
+ *				 the first meta character.
+ */
+void fc_fill_data(file_context_node_t *fc_node)
+{
+	int c = 0;
+
+	fc_node->meta = 0;
+	fc_node->stem_len = 0;
+	fc_node->str_len = 0;
+
+	/* Process until the string termination character
+	 *  has been reached.
+	 * Note: this while loop has been adapted from
+	 *  spec_hasMetaChars in matchpathcon.c from
+	 *  libselinux-1.22. */
+	while (fc_node->path[c] != '\0') {
+		switch (fc_node->path[c]) {
+		case '.':
+		case '^':
+		case '$':
+		case '?':
+		case '*':
+		case '+':
+		case '|':
+		case '[':
+		case '(':
+		case '{':
+			/* If a meta character is found,
+			 *  set meta to one */
+			fc_node->meta = 1;
+			break;
+		case '\\':
+			/* If a escape character is found,
+			 *  skip the next character. */
+			c++;
+		default:
+			/* If no meta character has been found yet,
+			 *  add one to the stem length. */
+			if (!fc_node->meta)
+				fc_node->stem_len++;
+			break;
+		}
+
+		fc_node->str_len++;
+		c++;
+	}
+}
+
+/* main
+ * This program takes in two arguments, the input filename and the
+ *  output filename. The input file should be syntactically correct.
+ * Overall what is done in the main is read in the file and store each
+ *  line of code, sort it, then output it to the output file.
+ */
+int main(int argc, char *argv[])
+{
+	int lines;
+	size_t start, finish, regex_len, context_len;
+	size_t line_len, buf_len, i, j;
+	char *input_name, *output_name, *line_buf;
+
+	file_context_node_t *temp;
+	file_context_node_t *head;
+	file_context_node_t *current;
+	file_context_bucket_t *master;
+	file_context_bucket_t *bcurrent;
+
+	FILE *in_file, *out_file;
+
+
+	/* Check for the correct number of command line arguments. */
+	if (argc < 2 || argc > 3) {
+		fprintf(stderr, "Usage: %s <infile> [<outfile>]\n",argv[0]);
+		return 1;
+	}
+
+	input_name = argv[1];
+	output_name = (argc >= 3) ? argv[2] : NULL;
+
+	i = j = lines = 0;
+
+	/* Open the input file. */
+	if (!(in_file = fopen(input_name, "r"))) {
+		fprintf(stderr, "Error: failure opening input file for read.\n");
+		return 1;
+	}
+
+	/* Initialize the head of the linked list. */
+	head = current = (file_context_node_t*)malloc(sizeof(file_context_node_t));
+	head->next = NULL;
+
+	/* Parse the file into a file_context linked list. */
+	line_buf = NULL;
+
+	while ( getline(&line_buf, &buf_len, in_file) != -1 ){
+		line_len = strlen(line_buf);
+		if( line_len == 0 || line_len == 1)
+			continue;
+		/* Get rid of whitespace from the front of the line. */
+		for (i = 0; i < line_len; i++) {
+			if (!isspace(line_buf[i]))
+				break;
+		}
+
+
+		if (i >= line_len)
+			continue;
+		/* Check if the line isn't empty and isn't a comment */
+		if (line_buf[i] == '#')
+			continue;
+
+		/* We have a valid line - allocate a new node. */
+		temp = (file_context_node_t *)malloc(sizeof(file_context_node_t));
+		if (!temp) {
+			fprintf(stderr, "Error: failure allocating memory.\n");
+			return 1;
+		}
+		temp->next = NULL;
+		memset(temp, 0, sizeof(file_context_node_t));
+
+		/* Parse out the regular expression from the line. */
+		start = i;
+
+
+		while (i < line_len && (!isspace(line_buf[i])))
+			i++;
+		finish = i;
+
+
+		regex_len = finish - start;
+
+		if (regex_len == 0) {
+			file_context_node_destroy(temp);
+			free(temp);
+
+
+			continue;
+		}
+
+		temp->path = (char*)strndup(&line_buf[start], regex_len);
+		if (!temp->path) {
+			file_context_node_destroy(temp);
+			free(temp);
+			fprintf(stderr, "Error: failure allocating memory.\n");
+			return 1;
+		}
+
+		/* Get rid of whitespace after the regular expression. */
+		for (; i < line_len; i++) {
+
+			if (!isspace(line_buf[i]))
+				break;
+		}
+
+		if (i == line_len) {
+			file_context_node_destroy(temp);
+			free(temp);
+			continue;
+		}
+
+		/* Parse out the type from the line (if it
+			*  is there). */
+		if (line_buf[i] == '-') {
+			temp->file_type = (char *)malloc(sizeof(char) * 3);
+			if (!(temp->file_type)) {
+				fprintf(stderr, "Error: failure allocating memory.\n");
+				return 1;
+			}
+
+			if( i + 2 >= line_len ) {
+				file_context_node_destroy(temp);
+				free(temp);
+
+				continue;
+			}
+
+			/* Fill the type into the array. */
+			temp->file_type[0] = line_buf[i];
+			temp->file_type[1] = line_buf[i + 1];
+			i += 2;
+			temp->file_type[2] = 0;
+
+			/* Get rid of whitespace after the type. */
+			for (; i < line_len; i++) {
+				if (!isspace(line_buf[i]))
+					break;
+			}
+
+			if (i == line_len) {
+
+				file_context_node_destroy(temp);
+				free(temp);
+				continue;
+			}
+		}
+
+		/* Parse out the context from the line. */
+		start = i;
+		while (i < line_len && (!isspace(line_buf[i])))
+			i++;
+		finish = i;
+
+		context_len = finish - start;
+
+		temp->context = (char*)strndup(&line_buf[start], context_len);
+		if (!temp->context) {
+			file_context_node_destroy(temp);
+			free(temp);
+			fprintf(stderr, "Error: failure allocating memory.\n");
+			return 1;
+		}
+
+		/* Set all the data about the regular
+			*  expression. */
+		fc_fill_data(temp);
+
+		/* Link this line of code at the end of
+			*  the linked list. */
+		current->next = temp;
+		current = current->next;
+		lines++;
+
+
+		free(line_buf);
+		line_buf = NULL;
+	}
+	fclose(in_file);
+
+	/* Create the bucket linked list from the earlier linked list. */
+	current = head->next;
+	bcurrent = master =
+	    (file_context_bucket_t *)
+	    malloc(sizeof(file_context_bucket_t));
+	bcurrent->next = NULL;
+	bcurrent->data = NULL;
+
+	/* Go until all the nodes have been put in individual buckets. */
+	while (current) {
+		/* Copy over the file context line into the bucket. */
+		bcurrent->data = current;
+		current = current->next;
+
+		/* Detach the node in the bucket from the old list. */
+		bcurrent->data->next = NULL;
+
+		/* If there should be another bucket, put one at the end. */
+		if (current) {
+			bcurrent->next =
+			    (file_context_bucket_t *)
+			    malloc(sizeof(file_context_bucket_t));
+			if (!(bcurrent->next)) {
+				printf
+				    ("Error: failure allocating memory.\n");
+				return -1;
+			}
+
+			/* Make sure the new bucket thinks it's the end of the
+			 *  list. */
+			bcurrent->next->next = NULL;
+
+			bcurrent = bcurrent->next;
+		}
+
+	}
+
+	/* Sort the bucket list. */
+	fc_merge_sort(master);
+
+	/* Open the output file. */
+	if (output_name) {
+		if (!(out_file = fopen(output_name, "w"))) {
+			printf("Error: failure opening output file for write.\n");
+			return -1;
+		}
+	} else {
+		out_file = stdout;
+	}
+
+	/* Output the sorted file_context linked list to the output file. */
+	current = master->data;
+	while (current) {
+		/* Output the path. */
+		fprintf(out_file, "%s\t\t", current->path);
+
+		/* Output the type, if there is one. */
+		if (current->file_type) {
+			fprintf(out_file, "%s\t", current->file_type);
+		}
+
+		/* Output the context. */
+		fprintf(out_file, "%s\n", current->context);
+
+		/* Remove the node. */
+		temp = current;
+		current = current->next;
+
+		file_context_node_destroy(temp);
+		free(temp);
+
+	}
+	free(master);
+
+	if (output_name) {
+		fclose(out_file);
+	}
+
+	return 0;
+}