On 2008-08-19, Timothy Normand Miller wrote:
> The cache is only available to the PCI target. HQ is behind the
> cache. You can take advantage of the cache in order to reduce the
> amount of PCI read traffic that HQ has to service. If the memory
> space is cachable (reads have no side-effects), then enabling the
> cache is a good idea. If reads have side-effects, then disable it.
> The cache is never used for engine access. Engine registers can be
> volatile, so we don't want to return stale data.
Okay, then we'll need to deal with unaligned PCI reads. I simplified it
by truncating read requests to a single granule. I attach a preliminary
poll_pci implementation. I assumed everything to be word-addressed, so
that has to be fixed, at least. The compiled code is 115 instruction
words.
> >> >> > Another goofy thing: It seems tricky at best to unroll the
> >> >> > transfer-loop for target write. The reason is that we only know the
> >> >> > number of queued commands, but what we need is the number of queued
> >> >> > write-data commands. Any idea?
> >> >>
> >> >> That is tricky, and we may have no good answer for that.
> >> >>
> >> >> I suggest we do nothing about it right now. We should get a working
> >> >> revision out, then we can go back later and see if we can do anything
> >> >> clever with the CPU design.
> >> >
> >> > Okay, we don't unroll, but we can still avoid sending an address for
> >> > each write request.
> >>
> >> You can, although the check to decide whether or not to send an
> >> address would probably take longer than just sending an address. It
> >> depends on what you're doing.
> >>
> >> For instance, if you get an address and some writes, then you don't
> >> get anything, so you go on about some VGA business, then you get
> >> another write without an address, you'll have to reissue the correct
> >> address (where PCI left off), because you may have changed it to
> >> access some memory for what VGA needs to do.
> >
> > My idea was to enter an inner loop once we have a write, and exit to the
> > top level on the first non-write.
>
> If writes occur then nothing, you'll never see a non-write. The only
> solution is to bring some PCI state info into where HQ can get at it.
> For instance, we could add a null command to the fifo. Whenever the
> target terminates a transaction, we send this null command down the
> command pipe. The bridge should be designed to ignore it, but HQ can
> use it as a hint that writes are done.
We have already defined an "idle" command type to be zero, though I
guess it's currently unused. Rather than pushing this onto the pipe, I
think it would be more convenient from the code point of view if the
PCI_T_CMD_INFO port returns 0 (idle) when the pipe is empty. That saves
us two instructions to check PCI_T_CMD_COUNT in the inner write-loop
(see CHECKME in the code).
;;; Copyright (c) 2008 Traversal Technology
;;;
;;; Permission is hereby granted, free of charge, to any person obtaining a
;;; copy of this software and associated documentation files (the "Software"),
;;; to deal in the Software without restriction, including without limitation
;;; the rights to use, copy, modify, merge, publish, distribute, sublicense,
;;; and/or sell copies of the Software, and to permit persons to whom the
;;; Software is furnished to do so, subject to the following conditions:
;;;
;;; The above copyright notice and this permission notice shall be included in
;;; all copies or substantial portions of the Software.
;;;
;;; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
;;; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
;;; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
;;; AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
;;; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
;;; FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
;;; DEALINGS IN THE SOFTWARE.
;;;
;;; Author: Petter Urkedal
include hqio
let G_POLL_BASE = 0 ; FIXME
;; Global Parameters
let G_POLL_MEM_TRANS = G_POLL_BASE + 0
let G_POLL_VMEM_TRANS = G_POLL_BASE + 1
let G_POLL_IO_TRANS = G_POLL_BASE + 2
;; Global State
let G_POLL_TARGET = G_POLL_BASE + 3
let G_POLL_ADDR = G_POLL_BASE + 4
;;; ------------------------------------------------------------------------
;;; poll_pci(r4: continuation)
frame
alias q0..q1 = r3..r4
alias p0 = r5
protect r6..r31
poll_pci:
move [PCI_T_CMD_COUNT], r0
jzero r0, p0
;; Switch between Command Types
;; ----------------------------
move [PCI_T_CMD_INFO], r0
and r0, PCI_TCINFO_TYPE_MASK, r1
xor r1, PCI_TCINFO_TYPE_ADDR, r1
jzero r1, poll_pci_addr
xor r1, PCI_TCINFO_TYPE_RCOUNT ^ PCI_TCINFO_TYPE_ADDR, r1
jzero r1, poll_pci_rcount
xor r1, PCI_TCINFO_TYPE_WDATA ^ PCI_TCINFO_TYPE_RCOUNT, r1
jzero r1, poll_pci_wdata
noop
;; Address Command
;; ---------------
poll_pci_addr:
;; Have r0 = [PCI_T_CMD_INFO]
;; Will set r1 to the address correction.
and r0, PCI_TCINFO_FLAGS_MASK, r0 ; target
move r0, [G_POLL_TARGET]
sub r0, PCI_TARGET_MEM, r0
jzero r0, poll_pci_save_addr
move [G_POLL_MEM_TRANS], r1
sub r0, PCI_TARGET_VMEM - PCI_TARGET_MEM, r0
jzero r0, poll_pci_save_addr
move [G_POLL_VMEM_TRANS], r1
sub r0, PCI_TARGET_IO - PCI_TARGET_VMEM, r0
jzero r0, poll_pci_save_addr
move [G_POLL_IO_TRANS], r1
move 0, r1
poll_pci_save_addr:
;; Save address and tail call.
move [PCI_T_CMD_DATA], r0 ; dequeue address
add r0, r1, r0
move r0, [G_POLL_ADDR]
jump poll_pci
noop
;; Read Command
;; ------------
;;
;; For simplicity, we only fetch up to MEM_GRANULE_SIZE words.
poll_pci_rcount:
;; Write request address
move [G_POLL_ADDR], r0
and r0, MEM_GRANULE_SIZE - 1, r1 ; words to skip
and r0, ~(MEM_GRANULE_SIZE - 1), r0 ; aligned address
move [G_POLL_TARGET], r2
xor r2, PCI_TARGET_ENG, r2
jnzero r2, poll_pci_rcount_not_eng
move r0, [MEM_SEND_ADDR_MEM]
move r0, [MEM_SEND_ADDR_ENG]
poll_pci_rcount_not_eng:
;; Let q0 be the numbers to transfer.
move [PCI_T_CMD_DATA], q0 ; the requested count
jzero q0, poll_pci ; or can we assume this is nonzero?
noop
;; Let q1 be the number of words to skip after the transfer ...
add q0, r1, q1
sub MEM_GRANULE_SIZE, q1, q1
;; ... if it's negative, truncate request count. The request is done
;; here to utilise the delay slot.
move MEM_GRANULE_SIZE, r0
jnneg q1, poll_pci_rcount_no_trunc
move r0, [MEM_SEND_READ_COUNT]
add q0, q1, q0 ; Reduce the transfer count to fit the granule.
move 0, q1 ; No final words to skip.
poll_pci_rcount_no_trunc:
;; Initial skip. The count argument register r1 is already set above.
jump mem_small_skip, r2
noop
;; The transfer to PCI.
jump mem_to_pci_xfer, r2
move q0, r1
;; The final skip.
jump mem_small_skip, r2 ; Final skip.
move q1, r1
jump poll_pci
noop
;; Write Commands
;; --------------
poll_pci_wdata:
;; Send address to bridge and adjust [G_POLL_ADDR].
move [G_POLL_ADDR], r2
move r2, [MEM_SEND_ADDR_MEM]
;; Prepare for the first transfer. We know the next PCI command is a
;; read, and r0 already contains PCI_T_CMD_INFO.
and r0, PCI_TCINFO_FLAGS_MASK, r1 ; byte enables
poll_pci_wdata_next:
move [PCI_T_CMD_DATA], r0
move r0, [add MEM_SEND_DATA_0000, r1]
add r2, 1, r2
;; Repeat as long as we receive write commands.
;; CHECKME: Do we need to test [PCI_T_CMD_COUNT]?
move [PCI_T_CMD_INFO], r0
and r0, PCI_TCINFO_TYPE_MASK, r1
xor r1, PCI_TCINFO_TYPE_WDATA, r1
jzero r1, poll_pci_wdata_next
and r0, PCI_TCINFO_FLAGS_MASK, r1 ; byte enables
;; Save the address in case there consecutive write commands which have
;; just not entered the pipe yet, and recheck PCI queue.
move r2, [G_POLL_ADDR]
jump poll_pci
noop
endframe
;;; ------------------------------------------------------------------------
;;; mem_small_skip(r1: count, r2: cont)
;;;
;;; Drops count words from MEM_READQ_DATA, where 0 ≤ count ≤ 8.
frame
alias p0..p1 = r1..r2
protect r3..r31
mem_small_skip_next: ; Not the entry point!
sub p0, r0, p0 ; decrement counter by available words
jnneg p0, mem_small_skip_no_trunc
noop
add p0, r0, r0
move 0, p0
mem_small_skip_no_trunc:
sub mem_small_skip, r0, r0
jump r0
noop
move [MEM_READQ_DATA], r0
move [MEM_READQ_DATA], r0
move [MEM_READQ_DATA], r0
move [MEM_READQ_DATA], r0
move [MEM_READQ_DATA], r0
move [MEM_READQ_DATA], r0
move [MEM_READQ_DATA], r0
move [MEM_READQ_DATA], r0
mem_small_skip:
jnzero p0, mem_small_skip_next
move [MEM_READQ_AVAIL], r0
jump p1
noop
endframe
;;; ------------------------------------------------------------------------
;;; mem_to_pci_xfer(r1: count, r2: cont)
;;;
;;; Transfer count words from memory to PCI, where the transferred block is
;;; assumed to be aligned on MEM_GRANULE_SIZE and to be confined to a single
;;; block.
frame
alias p0..p1 = r1..r2
protect r3..r31
move [MEM_READQ_DATA], r0
move r0, [PCI_TR_DATA]
move [MEM_READQ_DATA], r0
move r0, [PCI_TR_DATA]
move [MEM_READQ_DATA], r0
move r0, [PCI_TR_DATA]
move [MEM_READQ_DATA], r0
move r0, [PCI_TR_DATA]
move [MEM_READQ_DATA], r0
move r0, [PCI_TR_DATA]
move [MEM_READQ_DATA], r0
move r0, [PCI_TR_DATA]
move [MEM_READQ_DATA], r0
move r0, [PCI_TR_DATA]
move [MEM_READQ_DATA], r0
move r0, [PCI_TR_DATA]
mem_to_pci_xfer:
jzero p0, p1
move [MEM_READQ_AVAIL], r0
sub p0, r0, p0 ; Decrement by the number of words we'll transfer.
;; If the pending count is negative, we readjust to transfer the rest.
jnneg p0, mem_to_pci_xfer_not_last
noop
add p0, r0, r0 ; Set r0 to the remainder of the request.
move 0, p0 ; Clear the next remainder.
mem_to_pci_xfer_not_last:
shift r0, 1, r0 ; Two instruction slots per transfer.
sub mem_to_pci_xfer, r0, r0
jump r0
noop
endframe
_______________________________________________
Open-graphics mailing list
[email protected]
http://lists.duskglow.com/mailman/listinfo/open-graphics
List service provided by Duskglow Consulting, LLC (www.duskglow.com)