On Sat, Oct 13, 2001 at 10:08:50PM +0200, Manuel Teira wrote: Regarding theories on DMA and Mach64 and XFree 4.10:
I just finished a module that uses the System DMA unit for video capture transfers, and it works fine under a (highly hacked) XFree 4.1.0 server. System: Dual 550 Celeron, All-in-Wonder (original) PCI, 3D Rage II+. XServer is 4.1.0 with Gatos devel branch. Module is for 2.4 kernel. To use this on another card, you would have to enter the proper PCI id in the code. Oh yes, it uses the secondary register aperture, not the linear mapped aperture, but I don't think it makes a difference. Have not tried GUI dma, but it is quite similar. R C -- They said it was *daft* to build a space station in a swamp, but I showed them! It sank unto the swamp. So I built a second space station. That sank into the swamp too. My third space station sank into the swamp. So I built a fourth one. That fell into a time warp and _then_ sank into the swamp. But the fifth one... stayed up! --Monty Python/Babylon 5
#include <linux/module.h> #include <linux/pci.h> #include <linux/init.h> #include <linux/pci_ids.h> #include <linux/compatmac.h> #include <linux/devfs_fs_kernel.h> #include <linux/interrupt.h> #include <linux/delay.h> #include <linux/mm.h> #include <linux/slab.h> #include <asm/processor.h> #include <asm-i386/io.h> /*#include <linux/version.h> #include <linux/kernel.h> #include <linux/module.h> #include <linux/fs.h> #include <linux/kernel.h> #include <linux/major.h> #include <linux/malloc.h> #include <linux/mm.h> #include <linux/poll.h> #include <linux/pci.h> #include <linux/signal.h> #include <linux/ioport.h> #include <asm/pgtable.h> #include <asm/page.h> #include <linux/sched.h> #include <asm/segment.h> #include <linux/types.h> #include <linux/wrapper.h> #include <linux/kmod.h> #include <linux/vmalloc.h> #include <linux/init.h>*/ /* 1. Find the device. call pci_register_driver() 2. Enable devices: pci_enable_device() Call pci_set_master() to enable bus master. A field, by default, is 640x240x2 (307200). This corresponds to exactly 75 pages when they are 4K sized (AFAIK the minimum page size). The main restriction is that they must be 4K aligned. */ #define SUCCESS 0 #define MAJOR_NUM 240 #define DEVICE_NAME "atidma" //#define DEBUG 1 #define CAP_INT_CNTL 0x908 #define CAP_INT_STATUS 0x90C #define RADEON_CAP0_VBI0_OFFSET 0x093C #define RADEON_CAP0_VBI1_OFFSET 0x0940 #define RADEON_CAP0_VBI_V_WINDOW 0x0944 #define RADEON_CAP0_VBI_H_WINDOW 0x0948 #define RADEON_CAP0_CONFIG 0x0958 #define regw(a,b) *(MMR+a)=b #define regr(a) *(MMR+a) #define MEM_BASE 0x02480000 #define BUS_CNTL 0x28+(256) #define CRTC_INT_CNTL 0x06+(256) #define BM_SYSTEM_TABLE 0x6F #define CAPTURE_BUF0_OFFSET 0x20 #define CAPTURE_BUF1_OFFSET 0x21 #define CAPTURE_DEBUG 0x19 #define SYSTEM_TO_FRAME_BUFFER 0x0 #define FRAME_BUFFER_TO_SYSTEM 0x1 #define LAST_DESCRIPTOR 1 << 31 #define NUM_BUFS 5 /* Should be multiple of 2. */ #define CHAR_BUF_SIZE 128 /* Should be a multiple of 4 */ #define H_SIZE 1560 #define FIELD_SIZE 307200 /*#define BUF_SIZE H_SIZE*2*/ static unsigned int volatile * MMR = NULL, *BUFF0, *BUFF1, IRQ; static unsigned const char volatile * ATIFB = NULL; static unsigned int saved_bus_cntl, saved_crtc_cntl; static volatile unsigned int * full_page = NULL; static volatile int Device_Open = 0; int ati_module_init(); void ati_module_remove(); void ati_bh(unsigned long); static int ati_probe_pci(struct pci_dev *, const struct pci_device_id *); static void ati_remove_pci(struct pci_dev *); static DECLARE_WAIT_QUEUE_HEAD(read_wait); static DECLARE_TASKLET_DISABLED(ati_tasklet, ati_bh, 0); static void __devexit ati_remove_pci(struct pci_dev *pdev) { int i; /* Disable interrupts, then wait for a while * to be sure. */ tasklet_disable(&ati_tasklet); //writel(0, (MMR + CAP_INT_CNTL / 4 )); mdelay(20); pci_release_regions(pdev); /* Cast to shut up the compiler. */ iounmap((void *) ATIFB); iounmap((void *) MMR); devfs_unregister_chrdev(MAJOR_NUM, DEVICE_NAME); pci_disable_device(pdev); free_irq(IRQ, NULL); if (full_page != NULL) free_pages((unsigned long) full_page, 7); printk("Removed ATI card at irq %d\n", pdev->irq); } /* Interrupt handler */ void ati_int (int irq, void * dev_id, struct pt_regs *regs) { unsigned int flags; flags = regr (CRTC_INT_CNTL); if (flags & 0x02000000) { regw(CRTC_INT_CNTL, flags | 0x02000000); tasklet_schedule(&ati_tasklet); } return; } void ati_bh (unsigned long param) { wake_up_interruptible(&read_wait); return; } static int device_open(struct inode *inode, struct file *file) { unsigned int temp; int a; // This is not race proof. if (Device_Open) return -EBUSY; Device_Open++; // Set VBI Window size temp = regr (BUS_CNTL); saved_bus_cntl = temp; temp = (temp | 0x08000000) & ~(1 << 6); // enable mm regs and bus mastering. regw (BUS_CNTL, temp); temp = regr( CRTC_INT_CNTL); saved_crtc_cntl = temp; regw (CRTC_INT_CNTL, temp | 0x02000000); // Clear interrupt regw (CRTC_INT_CNTL, temp | 0x01000000); // Enable interrupt return SUCCESS; } static int device_release(struct inode *inode, struct file *file) { /* Restore registers */ regw (BUS_CNTL, saved_bus_cntl); regw (CRTC_INT_CNTL, saved_crtc_cntl); Device_Open --; return 0; } static ssize_t device_read( struct file *file, char *u_buffer, /* The buffer to fill with the data */ size_t length, /* The length of the buffer */ loff_t *offset) /* offset to the file */ { unsigned long itime, ttime; unsigned int temp, a; /* Pick the unused buffer; I think this is the correct order */ regw(CAPTURE_DEBUG, 1 <<24); if (regr(CAPTURE_DEBUG) & (1 << 9)) temp = regr(CAPTURE_BUF1_OFFSET); else temp = regr(CAPTURE_BUF0_OFFSET); /* Write the DMA Table. Write to buffer + 4k for table space. */ for (a = 0; a < 75; a++) { full_page[a*4+0] = temp + a * 4096; full_page[a*4+1] = virt_to_bus(full_page) + 0x1000 + a * 4096; full_page[a*4+2] = 4096; full_page[a*4+3] = 0; } regw(BM_SYSTEM_TABLE, virt_to_bus(full_page) | FRAME_BUFFER_TO_SYSTEM); rdtscl(itime); /* Perform a race-proof blocking call */ wait_event_interruptible(read_wait, length > 0); if (length < 640*480) copy_to_user( u_buffer, full_page + 1024, length); else copy_to_user(u_buffer, full_page + 1024, 640*480); rdtscl(ttime); printk("%ld\n", ttime - itime); return (length < 640*480 ? length : 640*480); } static struct file_operations Fops = { release:device_release, open:device_open, read:device_read }; static struct pci_device_id ati_pci_tbl[] __devinitdata = { {PCI_VENDOR_ID_ATI, PCI_DEVICE_ID_ATI_215GTB, /* {PCI_VENDOR_ID_ATI, 0x5144,*/ PCI_ANY_ID, PCI_ANY_ID, 0, 0}, {0,} }; MODULE_DEVICE_TABLE(pci, ati_pci_tbl); static struct pci_driver ati_driver = { name:"atidma", id_table:ati_pci_tbl, probe:ati_probe_pci, remove:ati_remove_pci }; static int ati_probe_pci(struct pci_dev *pdev, const struct pci_device_id *ent) { int err, i; if ((err = pci_enable_device(pdev))) { printk("Cannot enable device!\n"); return -EIO; } pci_set_master(pdev); printk("Found ATI card at 0x%.8lx.\n", pci_resource_start(pdev, 0)); if (pci_request_regions(pdev, "atidma") != 0) { printk("Could not request IO regions!\n"); return -EBUSY; } ATIFB = __ioremap(pci_resource_start(pdev, 0), pci_resource_len(pdev,0), 0); MMR = __ioremap(pci_resource_start(pdev, 2), pci_resource_len(pdev,2), 0); BUFF0 = MMR + 0x20; BUFF1 = MMR + 0x21; IRQ = pdev->irq; if (request_irq(IRQ, ati_int, 0, DEVICE_NAME, NULL) != 0) { printk("Cannot reserve interrupt for ATI card!\n"); return -EBUSY; } if ((full_page = (void *) __get_free_pages(GFP_KERNEL, 7)) == NULL) { printk("Could not allocate field buffer.\n"); ati_remove_pci(pdev); return -EFAULT; } else printk("Buffer at %.8lx.\n", virt_to_bus(full_page) ); tasklet_enable(&ati_tasklet); return 0; } int __init ati_module_init() { int retval; retval = devfs_register_chrdev(MAJOR_NUM, DEVICE_NAME, &Fops); if (retval >= 0) return pci_module_init(&ati_driver); else return retval; } void __exit ati_module_remove() { pci_unregister_driver(&ati_driver); return ; } module_init(ati_module_init); module_exit(ati_module_remove);