arch/ia64/sn/kernel/bte.c - LeafOS-Devices/android_kernel_samsung_gta4xl - Gitiles

 /*
  * This file is subject to the terms and conditions of the GNU General Public
  * License.  See the file "COPYING" in the main directory of this archive
  * for more details.
  *
  * Copyright (c) 2000-2005 Silicon Graphics, Inc.  All Rights Reserved.
  */

 #include <linux/config.h>
 #include <linux/module.h>
 #include <asm/sn/nodepda.h>
 #include <asm/sn/addrs.h>
 #include <asm/sn/arch.h>
 #include <asm/sn/sn_cpuid.h>
 #include <asm/sn/pda.h>
 #include <asm/sn/shubio.h>
 #include <asm/nodedata.h>
 #include <asm/delay.h>

 #include <linux/bootmem.h>
 #include <linux/string.h>
 #include <linux/sched.h>

 #include <asm/sn/bte.h>

 #ifndef L1_CACHE_MASK
 #define L1_CACHE_MASK (L1_CACHE_BYTES - 1)
 #endif

 /* two interfaces on two btes */
 #define MAX_INTERFACES_TO_TRY		4
 #define MAX_NODES_TO_TRY		2

 static struct bteinfo_s *bte_if_on_node(nasid_t nasid, int interface)
 {
 	nodepda_t *tmp_nodepda;

 	if (nasid_to_cnodeid(nasid) == -1)
 		return (struct bteinfo_s *)NULL;;

 	tmp_nodepda = NODEPDA(nasid_to_cnodeid(nasid));
 	return &tmp_nodepda->bte_if[interface];

 }

 static inline void bte_start_transfer(struct bteinfo_s *bte, u64 len, u64 mode)
 {
 	if (is_shub2()) {
 		BTE_CTRL_STORE(bte, (IBLS_BUSY | ((len) | (mode) << 24)));
 	} else {
 		BTE_LNSTAT_STORE(bte, len);
 		BTE_CTRL_STORE(bte, mode);
 	}
 }

 /************************************************************************
  * Block Transfer Engine copy related functions.
  *
  ***********************************************************************/

 /*
  * bte_copy(src, dest, len, mode, notification)
  *
  * Use the block transfer engine to move kernel memory from src to dest
  * using the assigned mode.
  *
  * Paramaters:
  *   src - physical address of the transfer source.
  *   dest - physical address of the transfer destination.
  *   len - number of bytes to transfer from source to dest.
  *   mode - hardware defined.  See reference information
  *          for IBCT0/1 in the SHUB Programmers Reference
  *   notification - kernel virtual address of the notification cache
  *                  line.  If NULL, the default is used and
  *                  the bte_copy is synchronous.
  *
  * NOTE:  This function requires src, dest, and len to
  * be cacheline aligned.
  */
 bte_result_t bte_copy(u64 src, u64 dest, u64 len, u64 mode, void *notification)
 {
 	u64 transfer_size;
 	u64 transfer_stat;
 	u64 notif_phys_addr;
 	struct bteinfo_s *bte;
 	bte_result_t bte_status;
 	unsigned long irq_flags;
 	unsigned long itc_end = 0;
 	int nasid_to_try[MAX_NODES_TO_TRY];
 	int my_nasid = cpuid_to_nasid(raw_smp_processor_id());
 	int bte_if_index, nasid_index;
 	int bte_first, btes_per_node = BTES_PER_NODE;

 	BTE_PRINTK(("bte_copy(0x%lx, 0x%lx, 0x%lx, 0x%lx, 0x%p)\n",
 		    src, dest, len, mode, notification));

 	if (len == 0) {
 		return BTE_SUCCESS;
 	}

 	BUG_ON((len & L1_CACHE_MASK) ||
 		 (src & L1_CACHE_MASK) || (dest & L1_CACHE_MASK));
 	BUG_ON(!(len < ((BTE_LEN_MASK + 1) << L1_CACHE_SHIFT)));

 	/*
 	 * Start with interface corresponding to cpu number
 	 */
 	bte_first = raw_smp_processor_id() % btes_per_node;

 	if (mode & BTE_USE_DEST) {
 		/* try remote then local */
 		nasid_to_try[0] = NASID_GET(dest);
 		if (mode & BTE_USE_ANY) {
 			nasid_to_try[1] = my_nasid;
 		} else {
 			nasid_to_try[1] = (int)NULL;
 		}
 	} else {
 		/* try local then remote */
 		nasid_to_try[0] = my_nasid;
 		if (mode & BTE_USE_ANY) {
 			nasid_to_try[1] = NASID_GET(dest);
 		} else {
 			nasid_to_try[1] = (int)NULL;
 		}
 	}

 retry_bteop:
 	do {
 		local_irq_save(irq_flags);

 		bte_if_index = bte_first;
 		nasid_index = 0;

 		/* Attempt to lock one of the BTE interfaces. */
 		while (nasid_index < MAX_NODES_TO_TRY) {
 			bte = bte_if_on_node(nasid_to_try[nasid_index],bte_if_index);

 			if (bte == NULL) {
 				continue;
 			}

 			if (spin_trylock(&bte->spinlock)) {
 				if (!(*bte->most_rcnt_na & BTE_WORD_AVAILABLE) ||
 				    (BTE_LNSTAT_LOAD(bte) & BTE_ACTIVE)) {
 					/* Got the lock but BTE still busy */
 					spin_unlock(&bte->spinlock);
 				} else {
 					/* we got the lock and it's not busy */
 					break;
 				}
 			}

 			bte_if_index = (bte_if_index + 1) % btes_per_node; /* Next interface */
 			if (bte_if_index == bte_first) {
 				/*
 				 * We've tried all interfaces on this node
 				 */
 				nasid_index++;
 			}

 			bte = NULL;
 		}

 		if (bte != NULL) {
 			break;
 		}

 		local_irq_restore(irq_flags);

 		if (!(mode & BTE_WACQUIRE)) {
 			return BTEFAIL_NOTAVAIL;
 		}
 	} while (1);

 	if (notification == NULL) {
 		/* User does not want to be notified. */
 		bte->most_rcnt_na = &bte->notify;
 	} else {
 		bte->most_rcnt_na = notification;
 	}

 	/* Calculate the number of cache lines to transfer. */
 	transfer_size = ((len >> L1_CACHE_SHIFT) & BTE_LEN_MASK);

 	/* Initialize the notification to a known value. */
 	*bte->most_rcnt_na = BTE_WORD_BUSY;
 	notif_phys_addr = TO_PHYS(ia64_tpa((unsigned long)bte->most_rcnt_na));

 	if (is_shub2()) {
 		src = SH2_TIO_PHYS_TO_DMA(src);
 		dest = SH2_TIO_PHYS_TO_DMA(dest);
 		notif_phys_addr = SH2_TIO_PHYS_TO_DMA(notif_phys_addr);
 	}
 	/* Set the source and destination registers */
 	BTE_PRINTKV(("IBSA = 0x%lx)\n", (TO_PHYS(src))));
 	BTE_SRC_STORE(bte, TO_PHYS(src));
 	BTE_PRINTKV(("IBDA = 0x%lx)\n", (TO_PHYS(dest))));
 	BTE_DEST_STORE(bte, TO_PHYS(dest));

 	/* Set the notification register */
 	BTE_PRINTKV(("IBNA = 0x%lx)\n", notif_phys_addr));
 	BTE_NOTIF_STORE(bte, notif_phys_addr);

 	/* Initiate the transfer */
 	BTE_PRINTK(("IBCT = 0x%lx)\n", BTE_VALID_MODE(mode)));
 	bte_start_transfer(bte, transfer_size, BTE_VALID_MODE(mode));

 	itc_end = ia64_get_itc() + (40000000 * local_cpu_data->cyc_per_usec);

 	spin_unlock_irqrestore(&bte->spinlock, irq_flags);

 	if (notification != NULL) {
 		return BTE_SUCCESS;
 	}

 	while ((transfer_stat = *bte->most_rcnt_na) == BTE_WORD_BUSY) {
 		cpu_relax();
 		if (ia64_get_itc() > itc_end) {
 			BTE_PRINTK(("BTE timeout nasid 0x%x bte%d IBLS = 0x%lx na 0x%lx\n",
 				NASID_GET(bte->bte_base_addr), bte->bte_num,
 				BTE_LNSTAT_LOAD(bte), *bte->most_rcnt_na) );
 			bte->bte_error_count++;
 			bte->bh_error = IBLS_ERROR;
 			bte_error_handler((unsigned long)NODEPDA(bte->bte_cnode));
 			*bte->most_rcnt_na = BTE_WORD_AVAILABLE;
 			goto retry_bteop;
 		}
 	}

 	BTE_PRINTKV((" Delay Done.  IBLS = 0x%lx, most_rcnt_na = 0x%lx\n",
 		     BTE_LNSTAT_LOAD(bte), *bte->most_rcnt_na));

 	if (transfer_stat & IBLS_ERROR) {
 		bte_status = transfer_stat & ~IBLS_ERROR;
 	} else {
 		bte_status = BTE_SUCCESS;
 	}
 	*bte->most_rcnt_na = BTE_WORD_AVAILABLE;

 	BTE_PRINTK(("Returning status is 0x%lx and most_rcnt_na is 0x%lx\n",
 		    BTE_LNSTAT_LOAD(bte), *bte->most_rcnt_na));

 	return bte_status;
 }

 EXPORT_SYMBOL(bte_copy);

 /*
  * bte_unaligned_copy(src, dest, len, mode)
  *
  * use the block transfer engine to move kernel
  * memory from src to dest using the assigned mode.
  *
  * Paramaters:
  *   src - physical address of the transfer source.
  *   dest - physical address of the transfer destination.
  *   len - number of bytes to transfer from source to dest.
  *   mode - hardware defined.  See reference information
  *          for IBCT0/1 in the SGI documentation.
  *
  * NOTE: If the source, dest, and len are all cache line aligned,
  * then it would be _FAR_ preferrable to use bte_copy instead.
  */
 bte_result_t bte_unaligned_copy(u64 src, u64 dest, u64 len, u64 mode)
 {
 	int destFirstCacheOffset;
 	u64 headBteSource;
 	u64 headBteLen;
 	u64 headBcopySrcOffset;
 	u64 headBcopyDest;
 	u64 headBcopyLen;
 	u64 footBteSource;
 	u64 footBteLen;
 	u64 footBcopyDest;
 	u64 footBcopyLen;
 	bte_result_t rv;
 	char *bteBlock, *bteBlock_unaligned;

 	if (len == 0) {
 		return BTE_SUCCESS;
 	}

 	/* temporary buffer used during unaligned transfers */
 	bteBlock_unaligned = kmalloc(len + 3 * L1_CACHE_BYTES,
 				     GFP_KERNEL | GFP_DMA);
 	if (bteBlock_unaligned == NULL) {
 		return BTEFAIL_NOTAVAIL;
 	}
 	bteBlock = (char *)L1_CACHE_ALIGN((u64) bteBlock_unaligned);

 	headBcopySrcOffset = src & L1_CACHE_MASK;
 	destFirstCacheOffset = dest & L1_CACHE_MASK;

 	/*
 	 * At this point, the transfer is broken into
 	 * (up to) three sections.  The first section is
 	 * from the start address to the first physical
 	 * cache line, the second is from the first physical
 	 * cache line to the last complete cache line,
 	 * and the third is from the last cache line to the
 	 * end of the buffer.  The first and third sections
 	 * are handled by bte copying into a temporary buffer
 	 * and then bcopy'ing the necessary section into the
 	 * final location.  The middle section is handled with
 	 * a standard bte copy.
 	 *
 	 * One nasty exception to the above rule is when the
 	 * source and destination are not symetrically
 	 * mis-aligned.  If the source offset from the first
 	 * cache line is different from the destination offset,
 	 * we make the first section be the entire transfer
 	 * and the bcopy the entire block into place.
 	 */
 	if (headBcopySrcOffset == destFirstCacheOffset) {

 		/*
 		 * Both the source and destination are the same
 		 * distance from a cache line boundary so we can
 		 * use the bte to transfer the bulk of the
 		 * data.
 		 */
 		headBteSource = src & ~L1_CACHE_MASK;
 		headBcopyDest = dest;
 		if (headBcopySrcOffset) {
 			headBcopyLen =
 			    (len >
 			     (L1_CACHE_BYTES -
 			      headBcopySrcOffset) ? L1_CACHE_BYTES
 			     - headBcopySrcOffset : len);
 			headBteLen = L1_CACHE_BYTES;
 		} else {
 			headBcopyLen = 0;
 			headBteLen = 0;
 		}

 		if (len > headBcopyLen) {
 			footBcopyLen = (len - headBcopyLen) & L1_CACHE_MASK;
 			footBteLen = L1_CACHE_BYTES;

 			footBteSource = src + len - footBcopyLen;
 			footBcopyDest = dest + len - footBcopyLen;

 			if (footBcopyDest == (headBcopyDest + headBcopyLen)) {
 				/*
 				 * We have two contigous bcopy
 				 * blocks.  Merge them.
 				 */
 				headBcopyLen += footBcopyLen;
 				headBteLen += footBteLen;
 			} else if (footBcopyLen > 0) {
 				rv = bte_copy(footBteSource,
 					      ia64_tpa((unsigned long)bteBlock),
 					      footBteLen, mode, NULL);
 				if (rv != BTE_SUCCESS) {
 					kfree(bteBlock_unaligned);
 					return rv;
 				}

 				memcpy(__va(footBcopyDest),
 				       (char *)bteBlock, footBcopyLen);
 			}
 		} else {
 			footBcopyLen = 0;
 			footBteLen = 0;
 		}

 		if (len > (headBcopyLen + footBcopyLen)) {
 			/* now transfer the middle. */
 			rv = bte_copy((src + headBcopyLen),
 				      (dest +
 				       headBcopyLen),
 				      (len - headBcopyLen -
 				       footBcopyLen), mode, NULL);
 			if (rv != BTE_SUCCESS) {
 				kfree(bteBlock_unaligned);
 				return rv;
 			}

 		}
 	} else {

 		/*
 		 * The transfer is not symetric, we will
 		 * allocate a buffer large enough for all the
 		 * data, bte_copy into that buffer and then
 		 * bcopy to the destination.
 		 */

 		/* Add the leader from source */
 		headBteLen = len + (src & L1_CACHE_MASK);
 		/* Add the trailing bytes from footer. */
 		headBteLen += L1_CACHE_BYTES - (headBteLen & L1_CACHE_MASK);
 		headBteSource = src & ~L1_CACHE_MASK;
 		headBcopySrcOffset = src & L1_CACHE_MASK;
 		headBcopyDest = dest;
 		headBcopyLen = len;
 	}

 	if (headBcopyLen > 0) {
 		rv = bte_copy(headBteSource,
 			      ia64_tpa((unsigned long)bteBlock), headBteLen,
 			      mode, NULL);
 		if (rv != BTE_SUCCESS) {
 			kfree(bteBlock_unaligned);
 			return rv;
 		}

 		memcpy(__va(headBcopyDest), ((char *)bteBlock +
 					     headBcopySrcOffset), headBcopyLen);
 	}
 	kfree(bteBlock_unaligned);
 	return BTE_SUCCESS;
 }

 EXPORT_SYMBOL(bte_unaligned_copy);

 /************************************************************************
  * Block Transfer Engine initialization functions.
  *
  ***********************************************************************/

 /*
  * bte_init_node(nodepda, cnode)
  *
  * Initialize the nodepda structure with BTE base addresses and
  * spinlocks.
  */
 void bte_init_node(nodepda_t * mynodepda, cnodeid_t cnode)
 {
 	int i;

 	/*
 	 * Indicate that all the block transfer engines on this node
 	 * are available.
 	 */

 	/*
 	 * Allocate one bte_recover_t structure per node.  It holds
 	 * the recovery lock for node.  All the bte interface structures
 	 * will point at this one bte_recover structure to get the lock.
 	 */
 	spin_lock_init(&mynodepda->bte_recovery_lock);
 	init_timer(&mynodepda->bte_recovery_timer);
 	mynodepda->bte_recovery_timer.function = bte_error_handler;
 	mynodepda->bte_recovery_timer.data = (unsigned long)mynodepda;

 	for (i = 0; i < BTES_PER_NODE; i++) {
 		u64 *base_addr;

 		/* Which link status register should we use? */
 		base_addr = (u64 *)
 		    REMOTE_HUB_ADDR(cnodeid_to_nasid(cnode), BTE_BASE_ADDR(i));
 		mynodepda->bte_if[i].bte_base_addr = base_addr;
 		mynodepda->bte_if[i].bte_source_addr = BTE_SOURCE_ADDR(base_addr);
 		mynodepda->bte_if[i].bte_destination_addr = BTE_DEST_ADDR(base_addr);
 		mynodepda->bte_if[i].bte_control_addr = BTE_CTRL_ADDR(base_addr);
 		mynodepda->bte_if[i].bte_notify_addr = BTE_NOTIF_ADDR(base_addr);

 		/*
 		 * Initialize the notification and spinlock
 		 * so the first transfer can occur.
 		 */
 		mynodepda->bte_if[i].most_rcnt_na =
 		    &(mynodepda->bte_if[i].notify);
 		mynodepda->bte_if[i].notify = BTE_WORD_AVAILABLE;
 		spin_lock_init(&mynodepda->bte_if[i].spinlock);

 		mynodepda->bte_if[i].bte_cnode = cnode;
 		mynodepda->bte_if[i].bte_error_count = 0;
 		mynodepda->bte_if[i].bte_num = i;
 		mynodepda->bte_if[i].cleanup_active = 0;
 		mynodepda->bte_if[i].bh_error = 0;
 	}

 }
	/*
	* This file is subject to the terms and conditions of the GNU General Public
	* License. See the file "COPYING" in the main directory of this archive
	* for more details.
	*
	* Copyright (c) 2000-2005 Silicon Graphics, Inc. All Rights Reserved.
	*/

	#include <linux/config.h>
	#include <linux/module.h>
	#include <asm/sn/nodepda.h>
	#include <asm/sn/addrs.h>
	#include <asm/sn/arch.h>
	#include <asm/sn/sn_cpuid.h>
	#include <asm/sn/pda.h>
	#include <asm/sn/shubio.h>
	#include <asm/nodedata.h>
	#include <asm/delay.h>

	#include <linux/bootmem.h>
	#include <linux/string.h>
	#include <linux/sched.h>

	#include <asm/sn/bte.h>

	#ifndef L1_CACHE_MASK
	#define L1_CACHE_MASK (L1_CACHE_BYTES - 1)
	#endif

	/* two interfaces on two btes */
	#define MAX_INTERFACES_TO_TRY 4
	#define MAX_NODES_TO_TRY 2

	static struct bteinfo_s *bte_if_on_node(nasid_t nasid, int interface)
	{
	nodepda_t *tmp_nodepda;

	if (nasid_to_cnodeid(nasid) == -1)
	return (struct bteinfo_s *)NULL;;

	tmp_nodepda = NODEPDA(nasid_to_cnodeid(nasid));
	return &tmp_nodepda->bte_if[interface];

	}

	static inline void bte_start_transfer(struct bteinfo_s *bte, u64 len, u64 mode)
	{
	if (is_shub2()) {
	BTE_CTRL_STORE(bte, (IBLS_BUSY \| ((len) \| (mode) << 24)));
	} else {
	BTE_LNSTAT_STORE(bte, len);
	BTE_CTRL_STORE(bte, mode);
	}
	}

	/************************************************************************
	* Block Transfer Engine copy related functions.
	*
	***********************************************************************/

	/*
	* bte_copy(src, dest, len, mode, notification)
	*
	* Use the block transfer engine to move kernel memory from src to dest
	* using the assigned mode.
	*
	* Paramaters:
	* src - physical address of the transfer source.
	* dest - physical address of the transfer destination.
	* len - number of bytes to transfer from source to dest.
	* mode - hardware defined. See reference information
	* for IBCT0/1 in the SHUB Programmers Reference
	* notification - kernel virtual address of the notification cache
	* line. If NULL, the default is used and
	* the bte_copy is synchronous.
	*
	* NOTE: This function requires src, dest, and len to
	* be cacheline aligned.
	*/
	bte_result_t bte_copy(u64 src, u64 dest, u64 len, u64 mode, void *notification)
	{
	u64 transfer_size;
	u64 transfer_stat;
	u64 notif_phys_addr;
	struct bteinfo_s *bte;
	bte_result_t bte_status;
	unsigned long irq_flags;
	unsigned long itc_end = 0;
	int nasid_to_try[MAX_NODES_TO_TRY];
	int my_nasid = cpuid_to_nasid(raw_smp_processor_id());
	int bte_if_index, nasid_index;
	int bte_first, btes_per_node = BTES_PER_NODE;

	BTE_PRINTK(("bte_copy(0x%lx, 0x%lx, 0x%lx, 0x%lx, 0x%p)\n",
	src, dest, len, mode, notification));

	if (len == 0) {
	return BTE_SUCCESS;
	}

	BUG_ON((len & L1_CACHE_MASK) \|\|
	(src & L1_CACHE_MASK) \|\| (dest & L1_CACHE_MASK));
	BUG_ON(!(len < ((BTE_LEN_MASK + 1) << L1_CACHE_SHIFT)));

	/*
	* Start with interface corresponding to cpu number
	*/
	bte_first = raw_smp_processor_id() % btes_per_node;

	if (mode & BTE_USE_DEST) {
	/* try remote then local */
	nasid_to_try[0] = NASID_GET(dest);
	if (mode & BTE_USE_ANY) {
	nasid_to_try[1] = my_nasid;
	} else {
	nasid_to_try[1] = (int)NULL;
	}
	} else {
	/* try local then remote */
	nasid_to_try[0] = my_nasid;
	if (mode & BTE_USE_ANY) {
	nasid_to_try[1] = NASID_GET(dest);
	} else {
	nasid_to_try[1] = (int)NULL;
	}
	}

	retry_bteop:
	do {
	local_irq_save(irq_flags);

	bte_if_index = bte_first;
	nasid_index = 0;

	/* Attempt to lock one of the BTE interfaces. */
	while (nasid_index < MAX_NODES_TO_TRY) {
	bte = bte_if_on_node(nasid_to_try[nasid_index],bte_if_index);

	if (bte == NULL) {
	continue;
	}

	if (spin_trylock(&bte->spinlock)) {
	if (!(*bte->most_rcnt_na & BTE_WORD_AVAILABLE) \|\|
	(BTE_LNSTAT_LOAD(bte) & BTE_ACTIVE)) {
	/* Got the lock but BTE still busy */
	spin_unlock(&bte->spinlock);
	} else {
	/* we got the lock and it's not busy */
	break;
	}
	}

	bte_if_index = (bte_if_index + 1) % btes_per_node; /* Next interface */
	if (bte_if_index == bte_first) {
	/*
	* We've tried all interfaces on this node
	*/
	nasid_index++;
	}

	bte = NULL;
	}

	if (bte != NULL) {
	break;
	}

	local_irq_restore(irq_flags);

	if (!(mode & BTE_WACQUIRE)) {
	return BTEFAIL_NOTAVAIL;
	}
	} while (1);

	if (notification == NULL) {
	/* User does not want to be notified. */
	bte->most_rcnt_na = &bte->notify;
	} else {
	bte->most_rcnt_na = notification;
	}

	/* Calculate the number of cache lines to transfer. */
	transfer_size = ((len >> L1_CACHE_SHIFT) & BTE_LEN_MASK);

	/* Initialize the notification to a known value. */
	*bte->most_rcnt_na = BTE_WORD_BUSY;
	notif_phys_addr = TO_PHYS(ia64_tpa((unsigned long)bte->most_rcnt_na));

	if (is_shub2()) {
	src = SH2_TIO_PHYS_TO_DMA(src);
	dest = SH2_TIO_PHYS_TO_DMA(dest);
	notif_phys_addr = SH2_TIO_PHYS_TO_DMA(notif_phys_addr);
	}
	/* Set the source and destination registers */
	BTE_PRINTKV(("IBSA = 0x%lx)\n", (TO_PHYS(src))));
	BTE_SRC_STORE(bte, TO_PHYS(src));
	BTE_PRINTKV(("IBDA = 0x%lx)\n", (TO_PHYS(dest))));
	BTE_DEST_STORE(bte, TO_PHYS(dest));

	/* Set the notification register */
	BTE_PRINTKV(("IBNA = 0x%lx)\n", notif_phys_addr));
	BTE_NOTIF_STORE(bte, notif_phys_addr);

	/* Initiate the transfer */
	BTE_PRINTK(("IBCT = 0x%lx)\n", BTE_VALID_MODE(mode)));
	bte_start_transfer(bte, transfer_size, BTE_VALID_MODE(mode));

	itc_end = ia64_get_itc() + (40000000 * local_cpu_data->cyc_per_usec);

	spin_unlock_irqrestore(&bte->spinlock, irq_flags);

	if (notification != NULL) {
	return BTE_SUCCESS;
	}

	while ((transfer_stat = *bte->most_rcnt_na) == BTE_WORD_BUSY) {
	cpu_relax();
	if (ia64_get_itc() > itc_end) {
	BTE_PRINTK(("BTE timeout nasid 0x%x bte%d IBLS = 0x%lx na 0x%lx\n",
	NASID_GET(bte->bte_base_addr), bte->bte_num,
	BTE_LNSTAT_LOAD(bte), *bte->most_rcnt_na) );
	bte->bte_error_count++;
	bte->bh_error = IBLS_ERROR;
	bte_error_handler((unsigned long)NODEPDA(bte->bte_cnode));
	*bte->most_rcnt_na = BTE_WORD_AVAILABLE;
	goto retry_bteop;
	}
	}

	BTE_PRINTKV((" Delay Done. IBLS = 0x%lx, most_rcnt_na = 0x%lx\n",
	BTE_LNSTAT_LOAD(bte), *bte->most_rcnt_na));

	if (transfer_stat & IBLS_ERROR) {
	bte_status = transfer_stat & ~IBLS_ERROR;
	} else {
	bte_status = BTE_SUCCESS;
	}
	*bte->most_rcnt_na = BTE_WORD_AVAILABLE;

	BTE_PRINTK(("Returning status is 0x%lx and most_rcnt_na is 0x%lx\n",
	BTE_LNSTAT_LOAD(bte), *bte->most_rcnt_na));

	return bte_status;
	}

	EXPORT_SYMBOL(bte_copy);

	/*
	* bte_unaligned_copy(src, dest, len, mode)
	*
	* use the block transfer engine to move kernel
	* memory from src to dest using the assigned mode.
	*
	* Paramaters:
	* src - physical address of the transfer source.
	* dest - physical address of the transfer destination.
	* len - number of bytes to transfer from source to dest.
	* mode - hardware defined. See reference information
	* for IBCT0/1 in the SGI documentation.
	*
	* NOTE: If the source, dest, and len are all cache line aligned,
	* then it would be _FAR_ preferrable to use bte_copy instead.
	*/
	bte_result_t bte_unaligned_copy(u64 src, u64 dest, u64 len, u64 mode)
	{
	int destFirstCacheOffset;
	u64 headBteSource;
	u64 headBteLen;
	u64 headBcopySrcOffset;
	u64 headBcopyDest;
	u64 headBcopyLen;
	u64 footBteSource;
	u64 footBteLen;
	u64 footBcopyDest;
	u64 footBcopyLen;
	bte_result_t rv;
	char bteBlock, bteBlock_unaligned;

	if (len == 0) {
	return BTE_SUCCESS;
	}

	/* temporary buffer used during unaligned transfers */
	bteBlock_unaligned = kmalloc(len + 3 * L1_CACHE_BYTES,
	GFP_KERNEL \| GFP_DMA);
	if (bteBlock_unaligned == NULL) {
	return BTEFAIL_NOTAVAIL;
	}
	bteBlock = (char *)L1_CACHE_ALIGN((u64) bteBlock_unaligned);

	headBcopySrcOffset = src & L1_CACHE_MASK;
	destFirstCacheOffset = dest & L1_CACHE_MASK;

	/*
	* At this point, the transfer is broken into
	* (up to) three sections. The first section is
	* from the start address to the first physical
	* cache line, the second is from the first physical
	* cache line to the last complete cache line,
	* and the third is from the last cache line to the
	* end of the buffer. The first and third sections
	* are handled by bte copying into a temporary buffer
	* and then bcopy'ing the necessary section into the
	* final location. The middle section is handled with
	* a standard bte copy.
	*
	* One nasty exception to the above rule is when the
	* source and destination are not symetrically
	* mis-aligned. If the source offset from the first
	* cache line is different from the destination offset,
	* we make the first section be the entire transfer
	* and the bcopy the entire block into place.
	*/
	if (headBcopySrcOffset == destFirstCacheOffset) {

	/*
	* Both the source and destination are the same
	* distance from a cache line boundary so we can
	* use the bte to transfer the bulk of the
	* data.
	*/
	headBteSource = src & ~L1_CACHE_MASK;
	headBcopyDest = dest;
	if (headBcopySrcOffset) {
	headBcopyLen =
	(len >
	(L1_CACHE_BYTES -
	headBcopySrcOffset) ? L1_CACHE_BYTES
	- headBcopySrcOffset : len);
	headBteLen = L1_CACHE_BYTES;
	} else {
	headBcopyLen = 0;
	headBteLen = 0;
	}

	if (len > headBcopyLen) {
	footBcopyLen = (len - headBcopyLen) & L1_CACHE_MASK;
	footBteLen = L1_CACHE_BYTES;

	footBteSource = src + len - footBcopyLen;
	footBcopyDest = dest + len - footBcopyLen;

	if (footBcopyDest == (headBcopyDest + headBcopyLen)) {
	/*
	* We have two contigous bcopy
	* blocks. Merge them.
	*/
	headBcopyLen += footBcopyLen;
	headBteLen += footBteLen;
	} else if (footBcopyLen > 0) {
	rv = bte_copy(footBteSource,
	ia64_tpa((unsigned long)bteBlock),
	footBteLen, mode, NULL);
	if (rv != BTE_SUCCESS) {
	kfree(bteBlock_unaligned);
	return rv;
	}

	memcpy(__va(footBcopyDest),
	(char *)bteBlock, footBcopyLen);
	}
	} else {
	footBcopyLen = 0;
	footBteLen = 0;
	}

	if (len > (headBcopyLen + footBcopyLen)) {
	/* now transfer the middle. */
	rv = bte_copy((src + headBcopyLen),
	(dest +
	headBcopyLen),
	(len - headBcopyLen -
	footBcopyLen), mode, NULL);
	if (rv != BTE_SUCCESS) {
	kfree(bteBlock_unaligned);
	return rv;
	}

	}
	} else {

	/*
	* The transfer is not symetric, we will
	* allocate a buffer large enough for all the
	* data, bte_copy into that buffer and then
	* bcopy to the destination.
	*/

	/* Add the leader from source */
	headBteLen = len + (src & L1_CACHE_MASK);
	/* Add the trailing bytes from footer. */
	headBteLen += L1_CACHE_BYTES - (headBteLen & L1_CACHE_MASK);
	headBteSource = src & ~L1_CACHE_MASK;
	headBcopySrcOffset = src & L1_CACHE_MASK;
	headBcopyDest = dest;
	headBcopyLen = len;
	}

	if (headBcopyLen > 0) {
	rv = bte_copy(headBteSource,
	ia64_tpa((unsigned long)bteBlock), headBteLen,
	mode, NULL);
	if (rv != BTE_SUCCESS) {
	kfree(bteBlock_unaligned);
	return rv;
	}

	memcpy(__va(headBcopyDest), ((char *)bteBlock +
	headBcopySrcOffset), headBcopyLen);
	}
	kfree(bteBlock_unaligned);
	return BTE_SUCCESS;
	}

	EXPORT_SYMBOL(bte_unaligned_copy);

	/************************************************************************
	* Block Transfer Engine initialization functions.
	*
	***********************************************************************/

	/*
	* bte_init_node(nodepda, cnode)
	*
	* Initialize the nodepda structure with BTE base addresses and
	* spinlocks.
	*/
	void bte_init_node(nodepda_t * mynodepda, cnodeid_t cnode)
	{
	int i;

	/*
	* Indicate that all the block transfer engines on this node
	* are available.
	*/

	/*
	* Allocate one bte_recover_t structure per node. It holds
	* the recovery lock for node. All the bte interface structures
	* will point at this one bte_recover structure to get the lock.
	*/
	spin_lock_init(&mynodepda->bte_recovery_lock);
	init_timer(&mynodepda->bte_recovery_timer);
	mynodepda->bte_recovery_timer.function = bte_error_handler;
	mynodepda->bte_recovery_timer.data = (unsigned long)mynodepda;

	for (i = 0; i < BTES_PER_NODE; i++) {
	u64 *base_addr;

	/* Which link status register should we use? */
	base_addr = (u64 *)
	REMOTE_HUB_ADDR(cnodeid_to_nasid(cnode), BTE_BASE_ADDR(i));
	mynodepda->bte_if[i].bte_base_addr = base_addr;
	mynodepda->bte_if[i].bte_source_addr = BTE_SOURCE_ADDR(base_addr);
	mynodepda->bte_if[i].bte_destination_addr = BTE_DEST_ADDR(base_addr);
	mynodepda->bte_if[i].bte_control_addr = BTE_CTRL_ADDR(base_addr);
	mynodepda->bte_if[i].bte_notify_addr = BTE_NOTIF_ADDR(base_addr);

	/*
	* Initialize the notification and spinlock
	* so the first transfer can occur.
	*/
	mynodepda->bte_if[i].most_rcnt_na =
	&(mynodepda->bte_if[i].notify);
	mynodepda->bte_if[i].notify = BTE_WORD_AVAILABLE;
	spin_lock_init(&mynodepda->bte_if[i].spinlock);

	mynodepda->bte_if[i].bte_cnode = cnode;
	mynodepda->bte_if[i].bte_error_count = 0;
	mynodepda->bte_if[i].bte_num = i;
	mynodepda->bte_if[i].cleanup_active = 0;
	mynodepda->bte_if[i].bh_error = 0;
	}

	}