firewire: core: optimize Topology Map creation

The Topology Map of the local node was created in CPU byte order,
then a temporary big endian copy was created to compute the CRC,
and when a read request to the Topology Map arrived it had to be
converted to big endian byte order again.

We now generate it in big endian byte order in the first place.
This also rids us of 1000 bytes stack usage in tasklet context.

Signed-off-by: Stefan Richter <stefanr@s5r6.in-berlin.de>
diff --git a/drivers/firewire/core-topology.c b/drivers/firewire/core-topology.c
index fddf2b3..9a5f38c 100644
--- a/drivers/firewire/core-topology.c
+++ b/drivers/firewire/core-topology.c
@@ -28,9 +28,9 @@
 #include <linux/module.h>
 #include <linux/slab.h>
 #include <linux/spinlock.h>
-#include <linux/string.h>
 
 #include <asm/atomic.h>
+#include <asm/byteorder.h>
 #include <asm/system.h>
 
 #include "core.h"
@@ -510,13 +510,16 @@
 static void update_topology_map(struct fw_card *card,
 				u32 *self_ids, int self_id_count)
 {
-	int node_count;
+	int node_count = (card->root_node->node_id & 0x3f) + 1;
+	__be32 *map = card->topology_map;
 
-	card->topology_map[1]++;
-	node_count = (card->root_node->node_id & 0x3f) + 1;
-	card->topology_map[2] = (node_count << 16) | self_id_count;
-	card->topology_map[0] = (self_id_count + 2) << 16;
-	memcpy(&card->topology_map[3], self_ids, self_id_count * 4);
+	*map++ = cpu_to_be32((self_id_count + 2) << 16);
+	*map++ = cpu_to_be32(be32_to_cpu(card->topology_map[1]) + 1);
+	*map++ = cpu_to_be32((node_count << 16) | self_id_count);
+
+	while (self_id_count--)
+		*map++ = cpu_to_be32p(self_ids++);
+
 	fw_compute_block_crc(card->topology_map);
 }