On 2008-08-19, Timothy Normand Miller wrote:
> The cache is only available to the PCI target.  HQ is behind the
> cache.  You can take advantage of the cache in order to reduce the
> amount of PCI read traffic that HQ has to service.  If the memory
> space is cachable (reads have no side-effects), then enabling the
> cache is a good idea.  If reads have side-effects, then disable it.
> The cache is never used for engine access.  Engine registers can be
> volatile, so we don't want to return stale data.

Okay, then we'll need to deal with unaligned PCI reads.  I simplified it
by truncating read requests to a single granule.  I attach a preliminary
poll_pci implementation.  I assumed everything to be word-addressed, so
that has to be fixed, at least.  The compiled code is 115 instruction
words.

> >> >> > Another goofy thing:  It seems tricky at best to unroll the
> >> >> > transfer-loop for target write.  The reason is that we only know the
> >> >> > number of queued commands, but what we need is the number of queued
> >> >> > write-data commands.  Any idea?
> >> >>
> >> >> That is tricky, and we may have no good answer for that.
> >> >>
> >> >> I suggest we do nothing about it right now.  We should get a working
> >> >> revision out, then we can go back later and see if we can do anything
> >> >> clever with the CPU design.
> >> >
> >> > Okay, we don't unroll, but we can still avoid sending an address for
> >> > each write request.
> >>
> >> You can, although the check to decide whether or not to send an
> >> address would probably take longer than just sending an address.  It
> >> depends on what you're doing.
> >>
> >> For instance, if you get an address and some writes, then you don't
> >> get anything, so you go on about some VGA business, then you get
> >> another write without an address, you'll have to reissue the correct
> >> address (where PCI left off), because you may have changed it to
> >> access some memory for what VGA needs to do.
> >
> > My idea was to enter an inner loop once we have a write, and exit to the
> > top level on the first non-write.
> 
> If writes occur then nothing, you'll never see a non-write.  The only
> solution is to bring some PCI state info into where HQ can get at it.
> For instance, we could add a null command to the fifo.  Whenever the
> target terminates a transaction, we send this null command down the
> command pipe.  The bridge should be designed to ignore it, but HQ can
> use it as a hint that writes are done.

We have already defined an "idle" command type to be zero, though I
guess it's currently unused.  Rather than pushing this onto the pipe, I
think it would be more convenient from the code point of view if the
PCI_T_CMD_INFO port returns 0 (idle) when the pipe is empty.  That saves
us two instructions to check PCI_T_CMD_COUNT in the inner write-loop
(see CHECKME in the code).
;;; Copyright (c) 2008 Traversal Technology
;;; 
;;; Permission is hereby granted, free of charge, to any person obtaining a
;;; copy of this software and associated documentation files (the "Software"),
;;; to deal in the Software without restriction, including without limitation
;;; the rights to use, copy, modify, merge, publish, distribute, sublicense,
;;; and/or sell copies of the Software, and to permit persons to whom the
;;; Software is furnished to do so, subject to the following conditions:
;;; 
;;; The above copyright notice and this permission notice shall be included in
;;; all copies or substantial portions of the Software.
;;; 
;;; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
;;; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
;;; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
;;; AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
;;; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
;;; FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
;;; DEALINGS IN THE SOFTWARE.
;;;
;;; Author: Petter Urkedal

include hqio

let G_POLL_BASE         = 0  ; FIXME

;; Global Parameters
let G_POLL_MEM_TRANS    = G_POLL_BASE + 0
let G_POLL_VMEM_TRANS   = G_POLL_BASE + 1
let G_POLL_IO_TRANS     = G_POLL_BASE + 2

;; Global State
let G_POLL_TARGET       = G_POLL_BASE + 3
let G_POLL_ADDR         = G_POLL_BASE + 4

;;; ------------------------------------------------------------------------
;;; poll_pci(r4: continuation)

    frame
        alias q0..q1 = r3..r4
        alias p0 = r5
        protect r6..r31
poll_pci:
        move [PCI_T_CMD_COUNT], r0
        jzero r0, p0

        ;; Switch between Command Types
        ;; ----------------------------
        move [PCI_T_CMD_INFO], r0
        and r0, PCI_TCINFO_TYPE_MASK, r1
        xor r1, PCI_TCINFO_TYPE_ADDR, r1
        jzero r1, poll_pci_addr
          xor r1, PCI_TCINFO_TYPE_RCOUNT ^ PCI_TCINFO_TYPE_ADDR, r1
        jzero r1, poll_pci_rcount
          xor r1, PCI_TCINFO_TYPE_WDATA ^ PCI_TCINFO_TYPE_RCOUNT, r1
        jzero r1, poll_pci_wdata
          noop

        ;; Address Command
        ;; ---------------
poll_pci_addr:
        ;; Have r0 = [PCI_T_CMD_INFO]
        ;; Will set r1 to the address correction.
        and r0, PCI_TCINFO_FLAGS_MASK, r0       ; target
        move r0, [G_POLL_TARGET]
        sub r0, PCI_TARGET_MEM, r0
        jzero r0, poll_pci_save_addr
          move [G_POLL_MEM_TRANS], r1
        sub r0, PCI_TARGET_VMEM - PCI_TARGET_MEM, r0
        jzero r0, poll_pci_save_addr
          move [G_POLL_VMEM_TRANS], r1
        sub r0, PCI_TARGET_IO - PCI_TARGET_VMEM, r0
        jzero r0, poll_pci_save_addr
          move [G_POLL_IO_TRANS], r1
        move 0, r1
poll_pci_save_addr:
        ;; Save address and tail call.
        move [PCI_T_CMD_DATA], r0               ; dequeue address
        add r0, r1, r0
        move r0, [G_POLL_ADDR]
        jump poll_pci
        noop

        ;; Read Command
        ;; ------------
        ;;
        ;; For simplicity, we only fetch up to MEM_GRANULE_SIZE words.
poll_pci_rcount:
        ;; Write request address
        move [G_POLL_ADDR], r0
        and r0, MEM_GRANULE_SIZE - 1, r1 ; words to skip
        and r0, ~(MEM_GRANULE_SIZE - 1), r0 ; aligned address
        move [G_POLL_TARGET], r2
        xor r2, PCI_TARGET_ENG, r2
        jnzero r2, poll_pci_rcount_not_eng
          move r0, [MEM_SEND_ADDR_MEM]
        move r0, [MEM_SEND_ADDR_ENG]
poll_pci_rcount_not_eng:
        ;; Let q0 be the numbers to transfer.
        move [PCI_T_CMD_DATA], q0 ; the requested count
        jzero q0, poll_pci ; or can we assume this is nonzero?
          noop
        ;; Let q1 be the number of words to skip after the transfer ...
        add q0, r1, q1
        sub MEM_GRANULE_SIZE, q1, q1
        ;; ... if it's negative, truncate request count.  The request is done
        ;; here to utilise the delay slot.
        move MEM_GRANULE_SIZE, r0
        jnneg q1, poll_pci_rcount_no_trunc
          move r0, [MEM_SEND_READ_COUNT]
        add q0, q1, q0  ; Reduce the transfer count to fit the granule.
        move 0, q1      ; No final words to skip.
poll_pci_rcount_no_trunc:
        ;; Initial skip.  The count argument register r1 is already set above.
        jump mem_small_skip, r2
          noop
        ;; The transfer to PCI.
        jump mem_to_pci_xfer, r2
          move q0, r1
        ;; The final skip.
        jump mem_small_skip, r2 ; Final skip.
          move q1, r1
        jump poll_pci
          noop

        ;; Write Commands
        ;; --------------
poll_pci_wdata:
        ;; Send address to bridge and adjust [G_POLL_ADDR].
        move [G_POLL_ADDR], r2
        move r2, [MEM_SEND_ADDR_MEM]
        ;; Prepare for the first transfer.  We know the next PCI command is a
        ;; read, and r0 already contains PCI_T_CMD_INFO.
        and r0, PCI_TCINFO_FLAGS_MASK, r1 ; byte enables
poll_pci_wdata_next:
        move [PCI_T_CMD_DATA], r0
        move r0, [add MEM_SEND_DATA_0000, r1]
        add r2, 1, r2
        ;; Repeat as long as we receive write commands.
        ;; CHECKME: Do we need to test [PCI_T_CMD_COUNT]?
        move [PCI_T_CMD_INFO], r0
        and r0, PCI_TCINFO_TYPE_MASK, r1
        xor r1, PCI_TCINFO_TYPE_WDATA, r1
        jzero r1, poll_pci_wdata_next
          and r0, PCI_TCINFO_FLAGS_MASK, r1 ; byte enables
        ;; Save the address in case there consecutive write commands which have
        ;; just not entered the pipe yet, and recheck PCI queue.
        move r2, [G_POLL_ADDR]
        jump poll_pci
          noop
    endframe


;;; ------------------------------------------------------------------------
;;; mem_small_skip(r1: count, r2: cont)
;;;
;;; Drops count words from MEM_READQ_DATA, where 0 ≤ count ≤ 8.

    frame
        alias p0..p1 = r1..r2
        protect r3..r31
mem_small_skip_next: ; Not the entry point!
        sub p0, r0, p0 ; decrement counter by available words
        jnneg p0, mem_small_skip_no_trunc
          noop
        add p0, r0, r0
        move 0, p0
mem_small_skip_no_trunc:
        sub mem_small_skip, r0, r0
        jump r0
          noop
        move [MEM_READQ_DATA], r0
        move [MEM_READQ_DATA], r0
        move [MEM_READQ_DATA], r0
        move [MEM_READQ_DATA], r0
        move [MEM_READQ_DATA], r0
        move [MEM_READQ_DATA], r0
        move [MEM_READQ_DATA], r0
        move [MEM_READQ_DATA], r0
mem_small_skip:
        jnzero p0, mem_small_skip_next
          move [MEM_READQ_AVAIL], r0
        jump p1
          noop
    endframe

;;; ------------------------------------------------------------------------
;;; mem_to_pci_xfer(r1: count, r2: cont)
;;;
;;; Transfer count words from memory to PCI, where the transferred block is
;;; assumed to be aligned on MEM_GRANULE_SIZE and to be confined to a single
;;; block.

    frame
        alias p0..p1 = r1..r2
        protect r3..r31
        move [MEM_READQ_DATA], r0
        move r0, [PCI_TR_DATA]
        move [MEM_READQ_DATA], r0
        move r0, [PCI_TR_DATA]
        move [MEM_READQ_DATA], r0
        move r0, [PCI_TR_DATA]
        move [MEM_READQ_DATA], r0
        move r0, [PCI_TR_DATA]
        move [MEM_READQ_DATA], r0
        move r0, [PCI_TR_DATA]
        move [MEM_READQ_DATA], r0
        move r0, [PCI_TR_DATA]
        move [MEM_READQ_DATA], r0
        move r0, [PCI_TR_DATA]
        move [MEM_READQ_DATA], r0
        move r0, [PCI_TR_DATA]
mem_to_pci_xfer:
        jzero p0, p1
          move [MEM_READQ_AVAIL], r0
        sub p0, r0, p0  ; Decrement by the number of words we'll transfer.
        ;; If the pending count is negative, we readjust to transfer the rest.
        jnneg p0, mem_to_pci_xfer_not_last
          noop
        add p0, r0, r0  ; Set r0 to the remainder of the request.
        move 0, p0      ; Clear the next remainder.
mem_to_pci_xfer_not_last:
        shift r0, 1, r0 ; Two instruction slots per transfer.
        sub mem_to_pci_xfer, r0, r0
        jump r0
          noop
    endframe
_______________________________________________
Open-graphics mailing list
[email protected]
http://lists.duskglow.com/mailman/listinfo/open-graphics
List service provided by Duskglow Consulting, LLC (www.duskglow.com)

Reply via email to